View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.apache.commons.statistics.descriptive;
18  
19  /**
20   * Computes the sum of squared deviations from the sample mean. This
21   * statistic is related to the second moment.
22   *
23   * <p>The following recursive updating formula is used:
24   * <p>Let
25   * <ul>
26   *  <li> dev = (current obs - previous mean) </li>
27   *  <li> n = number of observations (including current obs) </li>
28   * </ul>
29   * <p>Then
30   * <p>new value = old value + dev^2 * (n - 1) / n
31   * <p>returns the sum of squared deviations of all values seen so far.
32   *
33   * <p>Supports up to 2<sup>63</sup> (exclusive) observations.
34   * This implementation does not check for overflow of the count.
35   *
36   * <p><strong>Note that this implementation is not synchronized.</strong> If
37   * multiple threads access an instance of this class concurrently, and at least
38   * one of the threads invokes the {@link java.util.function.DoubleConsumer#accept(double) accept} or
39   * {@link StatisticAccumulator#combine(StatisticResult) combine} method, it must be synchronized externally.
40   *
41   * <p>However, it is safe to use {@link java.util.function.DoubleConsumer#accept(double) accept}
42   * and {@link StatisticAccumulator#combine(StatisticResult) combine}
43   * as {@code accumulator} and {@code combiner} functions of
44   * {@link java.util.stream.Collector Collector} on a parallel stream,
45   * because the parallel implementation of {@link java.util.stream.Stream#collect Stream.collect()}
46   * provides the necessary partitioning, isolation, and merging of results for
47   * safe and efficient parallel execution.
48   *
49   * <p>References:
50   * <ul>
51   *   <li>Chan, Golub and Levesque (1983)
52   *       Algorithms for Computing the Sample Variance: Analysis and Recommendations.
53   *       American Statistician, 37, 242-247.
54   *       <a href="https://doi.org/10.2307/2683386">doi: 10.2307/2683386</a>
55   * </ul>
56   *
57   * @since 1.1
58   */
59  class SumOfSquaredDeviations extends FirstMoment {
60      /** Sum of squared deviations of the values that have been added. */
61      protected double sumSquaredDev;
62  
63      /**
64       * Create an instance.
65       */
66      SumOfSquaredDeviations() {
67          // No-op
68      }
69  
70      /**
71       * Copy constructor.
72       *
73       * @param source Source to copy.
74       */
75      SumOfSquaredDeviations(SumOfSquaredDeviations source) {
76          super(source);
77          sumSquaredDev = source.sumSquaredDev;
78      }
79  
80      /**
81       * Create an instance with the given sum of squared deviations and first moment.
82       *
83       * @param sumSquaredDev Sum of squared deviations.
84       * @param m1 First moment.
85       */
86      private SumOfSquaredDeviations(double sumSquaredDev, FirstMoment m1) {
87          super(m1);
88          this.sumSquaredDev = sumSquaredDev;
89      }
90  
91      /**
92       * Create an instance with the given sum of squared deviations and first moment.
93       *
94       * <p>This constructor is used when creating the moment from integer values.
95       *
96       * @param sumSquaredDev Sum of squared deviations.
97       * @param m1 First moment.
98       * @param n Count of values.
99       */
100     SumOfSquaredDeviations(double sumSquaredDev, double m1, long n) {
101         super(m1, n);
102         this.sumSquaredDev = sumSquaredDev;
103     }
104 
105     /**
106      * Returns an instance populated using the input {@code values}.
107      *
108      * <p>Note: {@code SumOfSquaredDeviations} computed using {@link #accept accept} may be
109      * different from this instance.
110      *
111      * @param values Values.
112      * @return {@code SumOfSquaredDeviations} instance.
113      */
114     static SumOfSquaredDeviations of(double... values) {
115         if (values.length == 0) {
116             return new SumOfSquaredDeviations();
117         }
118         return create(FirstMoment.of(values), values, 0, values.length);
119     }
120 
121     /**
122      * Returns an instance populated using the specified range of {@code values}.
123      *
124      * <p>Note: {@code SumOfSquaredDeviations} computed using {@link #accept accept} may be
125      * different from this instance.
126      *
127      * <p>Warning: No range checks are performed.
128      *
129      * @param values Values.
130      * @param from Inclusive start of the range.
131      * @param to Exclusive end of the range.
132      * @return {@code SumOfSquaredDeviations} instance.
133      */
134     static SumOfSquaredDeviations ofRange(double[] values, int from, int to) {
135         if (from == to) {
136             return new SumOfSquaredDeviations();
137         }
138         return create(FirstMoment.ofRange(values, from, to), values, from, to);
139     }
140 
141     /**
142      * Creates the sum of squared deviations.
143      *
144      * <p>Uses the provided {@code sum} to create the first moment.
145      * This method is used by {@link DoubleStatistics} using a sum that can be reused
146      * for the {@link Sum} statistic.
147      *
148      * <p>Warning: No range checks are performed.
149      *
150      * @param sum Sum of the values.
151      * @param values Values.
152      * @param from Inclusive start of the range.
153      * @param to Exclusive end of the range.
154      * @return {@code SumOfSquaredDeviations} instance.
155      */
156     static SumOfSquaredDeviations createFromRange(org.apache.commons.numbers.core.Sum sum,
157                                                   double[] values, int from, int to) {
158         if (from == to) {
159             return new SumOfSquaredDeviations();
160         }
161         return create(FirstMoment.createFromRange(sum, values, from, to), values, from, to);
162     }
163 
164     /**
165      * Creates the sum of squared deviations.
166      *
167      * @param m1 First moment.
168      * @param values Values.
169      * @param from Inclusive start of the range.
170      * @param to Exclusive end of the range.
171      * @return {@code SumOfSquaredDeviations} instance.
172      */
173     private static SumOfSquaredDeviations create(FirstMoment m1, double[] values, int from, int to) {
174         // "Corrected two-pass algorithm"
175         // See: Chan et al (1983) Equation 1.7
176 
177         final double xbar = m1.getFirstMoment();
178         if (!Double.isFinite(xbar)) {
179             return new SumOfSquaredDeviations(Double.NaN, m1);
180         }
181         double s = 0;
182         double ss = 0;
183         for (int i = from; i < to; i++) {
184             final double dx = values[i] - xbar;
185             s += dx;
186             ss += dx * dx;
187         }
188         // The sum of squared deviations is ss - (s * s / n).
189         // The second term ideally should be zero; in practice it is a good approximation
190         // of the error in the first term.
191         // To prevent sumSquaredDev from spuriously attaining a NaN value
192         // when ss is infinite, assign it an infinite value which is its intended value.
193         final double sumSquaredDev = ss == Double.POSITIVE_INFINITY ?
194             Double.POSITIVE_INFINITY :
195             ss - (s * s / (to - from));
196         return new SumOfSquaredDeviations(sumSquaredDev, m1);
197     }
198 
199     /**
200      * Updates the state of the statistic to reflect the addition of {@code value}.
201      *
202      * @param value Value.
203      */
204     @Override
205     public void accept(double value) {
206         // "Updating one-pass algorithm"
207         // See: Chan et al (1983) Equation 1.3b
208         super.accept(value);
209         // Note: account for the half-deviation representation by scaling by 4=2^2
210         sumSquaredDev += (n - 1) * dev * nDev * 4;
211     }
212 
213     /**
214      * Gets the sum of squared deviations of all input values.
215      *
216      * @return sum of squared deviations of all values.
217      */
218     double getSumOfSquaredDeviations() {
219         return Double.isFinite(getFirstMoment()) ? sumSquaredDev : Double.NaN;
220     }
221 
222     /**
223      * Combines the state of another {@code SumOfSquaredDeviations} into this one.
224      *
225      * @param other Another {@code SumOfSquaredDeviations} to be combined.
226      * @return {@code this} instance after combining {@code other}.
227      */
228     SumOfSquaredDeviations combine(SumOfSquaredDeviations other) {
229         final long m = other.n;
230         if (n == 0) {
231             sumSquaredDev = other.sumSquaredDev;
232         } else if (m != 0) {
233             // "Updating one-pass algorithm"
234             // See: Chan et al (1983) Equation 1.5b (modified for the mean)
235             final double diffOfMean = getFirstMomentDifference(other);
236             final double sqDiffOfMean = diffOfMean * diffOfMean;
237             // Enforce symmetry
238             sumSquaredDev = (sumSquaredDev + other.sumSquaredDev) +
239                 sqDiffOfMean * (((double) n * m) / ((double) n + m));
240         }
241         super.combine(other);
242         return this;
243     }
244 }