SumOfSquaredDeviations.java

  1. /*
  2.  * Licensed to the Apache Software Foundation (ASF) under one or more
  3.  * contributor license agreements.  See the NOTICE file distributed with
  4.  * this work for additional information regarding copyright ownership.
  5.  * The ASF licenses this file to You under the Apache License, Version 2.0
  6.  * (the "License"); you may not use this file except in compliance with
  7.  * the License.  You may obtain a copy of the License at
  8.  *
  9.  *      http://www.apache.org/licenses/LICENSE-2.0
  10.  *
  11.  * Unless required by applicable law or agreed to in writing, software
  12.  * distributed under the License is distributed on an "AS IS" BASIS,
  13.  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14.  * See the License for the specific language governing permissions and
  15.  * limitations under the License.
  16.  */
  17. package org.apache.commons.statistics.descriptive;

  18. /**
  19.  * Computes the sum of squared deviations from the sample mean. This
  20.  * statistic is related to the second moment.
  21.  *
  22.  * <p>The following recursive updating formula is used:
  23.  * <p>Let
  24.  * <ul>
  25.  *  <li> dev = (current obs - previous mean) </li>
  26.  *  <li> n = number of observations (including current obs) </li>
  27.  * </ul>
  28.  * <p>Then
  29.  * <p>new value = old value + dev^2 * (n - 1) / n
  30.  * <p>returns the sum of squared deviations of all values seen so far.
  31.  *
  32.  * <p>Supports up to 2<sup>63</sup> (exclusive) observations.
  33.  * This implementation does not check for overflow of the count.
  34.  *
  35.  * <p><strong>Note that this implementation is not synchronized.</strong> If
  36.  * multiple threads access an instance of this class concurrently, and at least
  37.  * one of the threads invokes the {@link java.util.function.DoubleConsumer#accept(double) accept} or
  38.  * {@link StatisticAccumulator#combine(StatisticResult) combine} method, it must be synchronized externally.
  39.  *
  40.  * <p>However, it is safe to use {@link java.util.function.DoubleConsumer#accept(double) accept}
  41.  * and {@link StatisticAccumulator#combine(StatisticResult) combine}
  42.  * as {@code accumulator} and {@code combiner} functions of
  43.  * {@link java.util.stream.Collector Collector} on a parallel stream,
  44.  * because the parallel implementation of {@link java.util.stream.Stream#collect Stream.collect()}
  45.  * provides the necessary partitioning, isolation, and merging of results for
  46.  * safe and efficient parallel execution.
  47.  *
  48.  * <p>References:
  49.  * <ul>
  50.  *   <li>Chan, Golub and Levesque (1983)
  51.  *       Algorithms for Computing the Sample Variance: Analysis and Recommendations.
  52.  *       American Statistician, 37, 242-247.
  53.  *       <a href="https://doi.org/10.2307/2683386">doi: 10.2307/2683386</a>
  54.  * </ul>
  55.  *
  56.  * @since 1.1
  57.  */
  58. class SumOfSquaredDeviations extends FirstMoment {
  59.     /** Sum of squared deviations of the values that have been added. */
  60.     protected double sumSquaredDev;

  61.     /**
  62.      * Create an instance.
  63.      */
  64.     SumOfSquaredDeviations() {
  65.         // No-op
  66.     }

  67.     /**
  68.      * Copy constructor.
  69.      *
  70.      * @param source Source to copy.
  71.      */
  72.     SumOfSquaredDeviations(SumOfSquaredDeviations source) {
  73.         super(source);
  74.         sumSquaredDev = source.sumSquaredDev;
  75.     }

  76.     /**
  77.      * Create an instance with the given sum of squared deviations and first moment.
  78.      *
  79.      * @param sumSquaredDev Sum of squared deviations.
  80.      * @param m1 First moment.
  81.      */
  82.     private SumOfSquaredDeviations(double sumSquaredDev, FirstMoment m1) {
  83.         super(m1);
  84.         this.sumSquaredDev = sumSquaredDev;
  85.     }

  86.     /**
  87.      * Create an instance with the given sum of squared deviations and first moment.
  88.      *
  89.      * <p>This constructor is used when creating the moment from integer values.
  90.      *
  91.      * @param sumSquaredDev Sum of squared deviations.
  92.      * @param m1 First moment.
  93.      * @param n Count of values.
  94.      */
  95.     SumOfSquaredDeviations(double sumSquaredDev, double m1, long n) {
  96.         super(m1, n);
  97.         this.sumSquaredDev = sumSquaredDev;
  98.     }

  99.     /**
  100.      * Returns an instance populated using the input {@code values}.
  101.      *
  102.      * <p>Note: {@code SumOfSquaredDeviations} computed using {@link #accept accept} may be
  103.      * different from this instance.
  104.      *
  105.      * @param values Values.
  106.      * @return {@code SumOfSquaredDeviations} instance.
  107.      */
  108.     static SumOfSquaredDeviations of(double... values) {
  109.         if (values.length == 0) {
  110.             return new SumOfSquaredDeviations();
  111.         }
  112.         return create(FirstMoment.of(values), values);
  113.     }

  114.     /**
  115.      * Creates the sum of squared deviations.
  116.      *
  117.      * <p>Uses the provided {@code sum} to create the first moment.
  118.      * This method is used by {@link DoubleStatistics} using a sum that can be reused
  119.      * for the {@link Sum} statistic.
  120.      *
  121.      * @param sum Sum of the values.
  122.      * @param values Values.
  123.      * @return {@code SumOfSquaredDeviations} instance.
  124.      */
  125.     static SumOfSquaredDeviations create(org.apache.commons.numbers.core.Sum sum, double[] values) {
  126.         if (values.length == 0) {
  127.             return new SumOfSquaredDeviations();
  128.         }
  129.         return create(FirstMoment.create(sum, values), values);
  130.     }

  131.     /**
  132.      * Creates the sum of squared deviations.
  133.      *
  134.      * @param m1 First moment.
  135.      * @param values Values.
  136.      * @return {@code SumOfSquaredDeviations} instance.
  137.      */
  138.     private static SumOfSquaredDeviations create(FirstMoment m1, double[] values) {
  139.         // "Corrected two-pass algorithm"
  140.         // See: Chan et al (1983) Equation 1.7

  141.         final double xbar = m1.getFirstMoment();
  142.         if (!Double.isFinite(xbar)) {
  143.             return new SumOfSquaredDeviations(Double.NaN, m1);
  144.         }
  145.         double s = 0;
  146.         double ss = 0;
  147.         for (final double x : values) {
  148.             final double dx = x - xbar;
  149.             s += dx;
  150.             ss += dx * dx;
  151.         }
  152.         // The sum of squared deviations is ss - (s * s / n).
  153.         // The second term ideally should be zero; in practice it is a good approximation
  154.         // of the error in the first term.
  155.         // To prevent sumSquaredDev from spuriously attaining a NaN value
  156.         // when ss is infinite, assign it an infinite value which is its intended value.
  157.         final double sumSquaredDev = ss == Double.POSITIVE_INFINITY ?
  158.             Double.POSITIVE_INFINITY :
  159.             ss - (s * s / values.length);
  160.         return new SumOfSquaredDeviations(sumSquaredDev, m1);
  161.     }

  162.     /**
  163.      * Updates the state of the statistic to reflect the addition of {@code value}.
  164.      *
  165.      * @param value Value.
  166.      */
  167.     @Override
  168.     public void accept(double value) {
  169.         // "Updating one-pass algorithm"
  170.         // See: Chan et al (1983) Equation 1.3b
  171.         super.accept(value);
  172.         // Note: account for the half-deviation representation by scaling by 4=2^2
  173.         sumSquaredDev += (n - 1) * dev * nDev * 4;
  174.     }

  175.     /**
  176.      * Gets the sum of squared deviations of all input values.
  177.      *
  178.      * @return sum of squared deviations of all values.
  179.      */
  180.     double getSumOfSquaredDeviations() {
  181.         return Double.isFinite(getFirstMoment()) ? sumSquaredDev : Double.NaN;
  182.     }

  183.     /**
  184.      * Combines the state of another {@code SumOfSquaredDeviations} into this one.
  185.      *
  186.      * @param other Another {@code SumOfSquaredDeviations} to be combined.
  187.      * @return {@code this} instance after combining {@code other}.
  188.      */
  189.     SumOfSquaredDeviations combine(SumOfSquaredDeviations other) {
  190.         final long m = other.n;
  191.         if (n == 0) {
  192.             sumSquaredDev = other.sumSquaredDev;
  193.         } else if (m != 0) {
  194.             // "Updating one-pass algorithm"
  195.             // See: Chan et al (1983) Equation 1.5b (modified for the mean)
  196.             final double diffOfMean = getFirstMomentDifference(other);
  197.             final double sqDiffOfMean = diffOfMean * diffOfMean;
  198.             // Enforce symmetry
  199.             sumSquaredDev = (sumSquaredDev + other.sumSquaredDev) +
  200.                 sqDiffOfMean * (((double) n * m) / ((double) n + m));
  201.         }
  202.         super.combine(other);
  203.         return this;
  204.     }
  205. }