SumOfSquaredDeviations.java

  1. /*
  2.  * Licensed to the Apache Software Foundation (ASF) under one or more
  3.  * contributor license agreements.  See the NOTICE file distributed with
  4.  * this work for additional information regarding copyright ownership.
  5.  * The ASF licenses this file to You under the Apache License, Version 2.0
  6.  * (the "License"); you may not use this file except in compliance with
  7.  * the License.  You may obtain a copy of the License at
  8.  *
  9.  *      http://www.apache.org/licenses/LICENSE-2.0
  10.  *
  11.  * Unless required by applicable law or agreed to in writing, software
  12.  * distributed under the License is distributed on an "AS IS" BASIS,
  13.  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14.  * See the License for the specific language governing permissions and
  15.  * limitations under the License.
  16.  */
  17. package org.apache.commons.statistics.descriptive;

  18. /**
  19.  * Computes the sum of squared deviations from the sample mean. This
  20.  * statistic is related to the second moment.
  21.  *
  22.  * <p>The following recursive updating formula is used:
  23.  * <p>Let
  24.  * <ul>
  25.  *  <li> dev = (current obs - previous mean) </li>
  26.  *  <li> n = number of observations (including current obs) </li>
  27.  * </ul>
  28.  * <p>Then
  29.  * <p>new value = old value + dev^2 * (n - 1) / n
  30.  * <p>returns the sum of squared deviations of all values seen so far.
  31.  *
  32.  * <p>Supports up to 2<sup>63</sup> (exclusive) observations.
  33.  * This implementation does not check for overflow of the count.
  34.  *
  35.  * <p><strong>Note that this implementation is not synchronized.</strong> If
  36.  * multiple threads access an instance of this class concurrently, and at least
  37.  * one of the threads invokes the {@link java.util.function.DoubleConsumer#accept(double) accept} or
  38.  * {@link StatisticAccumulator#combine(StatisticResult) combine} method, it must be synchronized externally.
  39.  *
  40.  * <p>However, it is safe to use {@link java.util.function.DoubleConsumer#accept(double) accept}
  41.  * and {@link StatisticAccumulator#combine(StatisticResult) combine}
  42.  * as {@code accumulator} and {@code combiner} functions of
  43.  * {@link java.util.stream.Collector Collector} on a parallel stream,
  44.  * because the parallel implementation of {@link java.util.stream.Stream#collect Stream.collect()}
  45.  * provides the necessary partitioning, isolation, and merging of results for
  46.  * safe and efficient parallel execution.
  47.  *
  48.  * <p>References:
  49.  * <ul>
  50.  *   <li>Chan, Golub and Levesque (1983)
  51.  *       Algorithms for Computing the Sample Variance: Analysis and Recommendations.
  52.  *       American Statistician, 37, 242-247.
  53.  *       <a href="https://doi.org/10.2307/2683386">doi: 10.2307/2683386</a>
  54.  * </ul>
  55.  *
  56.  * @since 1.1
  57.  */
  58. class SumOfSquaredDeviations extends FirstMoment {
  59.     /** Sum of squared deviations of the values that have been added. */
  60.     protected double sumSquaredDev;

  61.     /**
  62.      * Create an instance.
  63.      */
  64.     SumOfSquaredDeviations() {
  65.         // No-op
  66.     }

  67.     /**
  68.      * Copy constructor.
  69.      *
  70.      * @param source Source to copy.
  71.      */
  72.     SumOfSquaredDeviations(SumOfSquaredDeviations source) {
  73.         super(source);
  74.         sumSquaredDev = source.sumSquaredDev;
  75.     }

  76.     /**
  77.      * Create an instance with the given sum of squared deviations and first moment.
  78.      *
  79.      * @param sumSquaredDev Sum of squared deviations.
  80.      * @param m1 First moment.
  81.      */
  82.     private SumOfSquaredDeviations(double sumSquaredDev, FirstMoment m1) {
  83.         super(m1);
  84.         this.sumSquaredDev = sumSquaredDev;
  85.     }

  86.     /**
  87.      * Create an instance with the given sum of squared deviations and first moment.
  88.      *
  89.      * <p>This constructor is used when creating the moment from integer values.
  90.      *
  91.      * @param sumSquaredDev Sum of squared deviations.
  92.      * @param m1 First moment.
  93.      * @param n Count of values.
  94.      */
  95.     SumOfSquaredDeviations(double sumSquaredDev, double m1, long n) {
  96.         super(m1, n);
  97.         this.sumSquaredDev = sumSquaredDev;
  98.     }

  99.     /**
  100.      * Returns an instance populated using the input {@code values}.
  101.      *
  102.      * <p>Note: {@code SumOfSquaredDeviations} computed using {@link #accept accept} may be
  103.      * different from this instance.
  104.      *
  105.      * @param values Values.
  106.      * @return {@code SumOfSquaredDeviations} instance.
  107.      */
  108.     static SumOfSquaredDeviations of(double... values) {
  109.         if (values.length == 0) {
  110.             return new SumOfSquaredDeviations();
  111.         }
  112.         return create(FirstMoment.of(values), values, 0, values.length);
  113.     }

  114.     /**
  115.      * Returns an instance populated using the specified range of {@code values}.
  116.      *
  117.      * <p>Note: {@code SumOfSquaredDeviations} computed using {@link #accept accept} may be
  118.      * different from this instance.
  119.      *
  120.      * <p>Warning: No range checks are performed.
  121.      *
  122.      * @param values Values.
  123.      * @param from Inclusive start of the range.
  124.      * @param to Exclusive end of the range.
  125.      * @return {@code SumOfSquaredDeviations} instance.
  126.      */
  127.     static SumOfSquaredDeviations ofRange(double[] values, int from, int to) {
  128.         if (from == to) {
  129.             return new SumOfSquaredDeviations();
  130.         }
  131.         return create(FirstMoment.ofRange(values, from, to), values, from, to);
  132.     }

  133.     /**
  134.      * Creates the sum of squared deviations.
  135.      *
  136.      * <p>Uses the provided {@code sum} to create the first moment.
  137.      * This method is used by {@link DoubleStatistics} using a sum that can be reused
  138.      * for the {@link Sum} statistic.
  139.      *
  140.      * <p>Warning: No range checks are performed.
  141.      *
  142.      * @param sum Sum of the values.
  143.      * @param values Values.
  144.      * @param from Inclusive start of the range.
  145.      * @param to Exclusive end of the range.
  146.      * @return {@code SumOfSquaredDeviations} instance.
  147.      */
  148.     static SumOfSquaredDeviations createFromRange(org.apache.commons.numbers.core.Sum sum,
  149.                                                   double[] values, int from, int to) {
  150.         if (from == to) {
  151.             return new SumOfSquaredDeviations();
  152.         }
  153.         return create(FirstMoment.createFromRange(sum, values, from, to), values, from, to);
  154.     }

  155.     /**
  156.      * Creates the sum of squared deviations.
  157.      *
  158.      * @param m1 First moment.
  159.      * @param values Values.
  160.      * @param from Inclusive start of the range.
  161.      * @param to Exclusive end of the range.
  162.      * @return {@code SumOfSquaredDeviations} instance.
  163.      */
  164.     private static SumOfSquaredDeviations create(FirstMoment m1, double[] values, int from, int to) {
  165.         // "Corrected two-pass algorithm"
  166.         // See: Chan et al (1983) Equation 1.7

  167.         final double xbar = m1.getFirstMoment();
  168.         if (!Double.isFinite(xbar)) {
  169.             return new SumOfSquaredDeviations(Double.NaN, m1);
  170.         }
  171.         double s = 0;
  172.         double ss = 0;
  173.         for (int i = from; i < to; i++) {
  174.             final double dx = values[i] - xbar;
  175.             s += dx;
  176.             ss += dx * dx;
  177.         }
  178.         // The sum of squared deviations is ss - (s * s / n).
  179.         // The second term ideally should be zero; in practice it is a good approximation
  180.         // of the error in the first term.
  181.         // To prevent sumSquaredDev from spuriously attaining a NaN value
  182.         // when ss is infinite, assign it an infinite value which is its intended value.
  183.         final double sumSquaredDev = ss == Double.POSITIVE_INFINITY ?
  184.             Double.POSITIVE_INFINITY :
  185.             ss - (s * s / (to - from));
  186.         return new SumOfSquaredDeviations(sumSquaredDev, m1);
  187.     }

  188.     /**
  189.      * Updates the state of the statistic to reflect the addition of {@code value}.
  190.      *
  191.      * @param value Value.
  192.      */
  193.     @Override
  194.     public void accept(double value) {
  195.         // "Updating one-pass algorithm"
  196.         // See: Chan et al (1983) Equation 1.3b
  197.         super.accept(value);
  198.         // Note: account for the half-deviation representation by scaling by 4=2^2
  199.         sumSquaredDev += (n - 1) * dev * nDev * 4;
  200.     }

  201.     /**
  202.      * Gets the sum of squared deviations of all input values.
  203.      *
  204.      * @return sum of squared deviations of all values.
  205.      */
  206.     double getSumOfSquaredDeviations() {
  207.         return Double.isFinite(getFirstMoment()) ? sumSquaredDev : Double.NaN;
  208.     }

  209.     /**
  210.      * Combines the state of another {@code SumOfSquaredDeviations} into this one.
  211.      *
  212.      * @param other Another {@code SumOfSquaredDeviations} to be combined.
  213.      * @return {@code this} instance after combining {@code other}.
  214.      */
  215.     SumOfSquaredDeviations combine(SumOfSquaredDeviations other) {
  216.         final long m = other.n;
  217.         if (n == 0) {
  218.             sumSquaredDev = other.sumSquaredDev;
  219.         } else if (m != 0) {
  220.             // "Updating one-pass algorithm"
  221.             // See: Chan et al (1983) Equation 1.5b (modified for the mean)
  222.             final double diffOfMean = getFirstMomentDifference(other);
  223.             final double sqDiffOfMean = diffOfMean * diffOfMean;
  224.             // Enforce symmetry
  225.             sumSquaredDev = (sumSquaredDev + other.sumSquaredDev) +
  226.                 sqDiffOfMean * (((double) n * m) / ((double) n + m));
  227.         }
  228.         super.combine(other);
  229.         return this;
  230.     }
  231. }