1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17 package org.apache.commons.statistics.descriptive;
18
19 /**
20 * Computes the sum of squared deviations from the sample mean. This
21 * statistic is related to the second moment.
22 *
23 * <p>The following recursive updating formula is used:
24 * <p>Let
25 * <ul>
26 * <li> dev = (current obs - previous mean) </li>
27 * <li> n = number of observations (including current obs) </li>
28 * </ul>
29 * <p>Then
30 * <p>new value = old value + dev^2 * (n - 1) / n
31 * <p>returns the sum of squared deviations of all values seen so far.
32 *
33 * <p>Supports up to 2<sup>63</sup> (exclusive) observations.
34 * This implementation does not check for overflow of the count.
35 *
36 * <p><strong>Note that this implementation is not synchronized.</strong> If
37 * multiple threads access an instance of this class concurrently, and at least
38 * one of the threads invokes the {@link java.util.function.DoubleConsumer#accept(double) accept} or
39 * {@link StatisticAccumulator#combine(StatisticResult) combine} method, it must be synchronized externally.
40 *
41 * <p>However, it is safe to use {@link java.util.function.DoubleConsumer#accept(double) accept}
42 * and {@link StatisticAccumulator#combine(StatisticResult) combine}
43 * as {@code accumulator} and {@code combiner} functions of
44 * {@link java.util.stream.Collector Collector} on a parallel stream,
45 * because the parallel implementation of {@link java.util.stream.Stream#collect Stream.collect()}
46 * provides the necessary partitioning, isolation, and merging of results for
47 * safe and efficient parallel execution.
48 *
49 * <p>References:
50 * <ul>
51 * <li>Chan, Golub and Levesque (1983)
52 * Algorithms for Computing the Sample Variance: Analysis and Recommendations.
53 * American Statistician, 37, 242-247.
54 * <a href="https://doi.org/10.2307/2683386">doi: 10.2307/2683386</a>
55 * </ul>
56 *
57 * @since 1.1
58 */
59 class SumOfSquaredDeviations extends FirstMoment {
60 /** Sum of squared deviations of the values that have been added. */
61 protected double sumSquaredDev;
62
63 /**
64 * Create an instance.
65 */
66 SumOfSquaredDeviations() {
67 // No-op
68 }
69
70 /**
71 * Copy constructor.
72 *
73 * @param source Source to copy.
74 */
75 SumOfSquaredDeviations(SumOfSquaredDeviations source) {
76 super(source);
77 sumSquaredDev = source.sumSquaredDev;
78 }
79
80 /**
81 * Create an instance with the given sum of squared deviations and first moment.
82 *
83 * @param sumSquaredDev Sum of squared deviations.
84 * @param m1 First moment.
85 */
86 private SumOfSquaredDeviations(double sumSquaredDev, FirstMoment m1) {
87 super(m1);
88 this.sumSquaredDev = sumSquaredDev;
89 }
90
91 /**
92 * Create an instance with the given sum of squared deviations and first moment.
93 *
94 * <p>This constructor is used when creating the moment from integer values.
95 *
96 * @param sumSquaredDev Sum of squared deviations.
97 * @param m1 First moment.
98 * @param n Count of values.
99 */
100 SumOfSquaredDeviations(double sumSquaredDev, double m1, long n) {
101 super(m1, n);
102 this.sumSquaredDev = sumSquaredDev;
103 }
104
105 /**
106 * Returns an instance populated using the input {@code values}.
107 *
108 * <p>Note: {@code SumOfSquaredDeviations} computed using {@link #accept accept} may be
109 * different from this instance.
110 *
111 * @param values Values.
112 * @return {@code SumOfSquaredDeviations} instance.
113 */
114 static SumOfSquaredDeviations of(double... values) {
115 if (values.length == 0) {
116 return new SumOfSquaredDeviations();
117 }
118 return create(FirstMoment.of(values), values, 0, values.length);
119 }
120
121 /**
122 * Returns an instance populated using the specified range of {@code values}.
123 *
124 * <p>Note: {@code SumOfSquaredDeviations} computed using {@link #accept accept} may be
125 * different from this instance.
126 *
127 * <p>Warning: No range checks are performed.
128 *
129 * @param values Values.
130 * @param from Inclusive start of the range.
131 * @param to Exclusive end of the range.
132 * @return {@code SumOfSquaredDeviations} instance.
133 */
134 static SumOfSquaredDeviations ofRange(double[] values, int from, int to) {
135 if (from == to) {
136 return new SumOfSquaredDeviations();
137 }
138 return create(FirstMoment.ofRange(values, from, to), values, from, to);
139 }
140
141 /**
142 * Creates the sum of squared deviations.
143 *
144 * <p>Uses the provided {@code sum} to create the first moment.
145 * This method is used by {@link DoubleStatistics} using a sum that can be reused
146 * for the {@link Sum} statistic.
147 *
148 * <p>Warning: No range checks are performed.
149 *
150 * @param sum Sum of the values.
151 * @param values Values.
152 * @param from Inclusive start of the range.
153 * @param to Exclusive end of the range.
154 * @return {@code SumOfSquaredDeviations} instance.
155 */
156 static SumOfSquaredDeviations createFromRange(org.apache.commons.numbers.core.Sum sum,
157 double[] values, int from, int to) {
158 if (from == to) {
159 return new SumOfSquaredDeviations();
160 }
161 return create(FirstMoment.createFromRange(sum, values, from, to), values, from, to);
162 }
163
164 /**
165 * Creates the sum of squared deviations.
166 *
167 * @param m1 First moment.
168 * @param values Values.
169 * @param from Inclusive start of the range.
170 * @param to Exclusive end of the range.
171 * @return {@code SumOfSquaredDeviations} instance.
172 */
173 private static SumOfSquaredDeviations create(FirstMoment m1, double[] values, int from, int to) {
174 // "Corrected two-pass algorithm"
175 // See: Chan et al (1983) Equation 1.7
176
177 final double xbar = m1.getFirstMoment();
178 if (!Double.isFinite(xbar)) {
179 return new SumOfSquaredDeviations(Double.NaN, m1);
180 }
181 double s = 0;
182 double ss = 0;
183 for (int i = from; i < to; i++) {
184 final double dx = values[i] - xbar;
185 s += dx;
186 ss += dx * dx;
187 }
188 // The sum of squared deviations is ss - (s * s / n).
189 // The second term ideally should be zero; in practice it is a good approximation
190 // of the error in the first term.
191 // To prevent sumSquaredDev from spuriously attaining a NaN value
192 // when ss is infinite, assign it an infinite value which is its intended value.
193 final double sumSquaredDev = ss == Double.POSITIVE_INFINITY ?
194 Double.POSITIVE_INFINITY :
195 ss - (s * s / (to - from));
196 return new SumOfSquaredDeviations(sumSquaredDev, m1);
197 }
198
199 /**
200 * Updates the state of the statistic to reflect the addition of {@code value}.
201 *
202 * @param value Value.
203 */
204 @Override
205 public void accept(double value) {
206 // "Updating one-pass algorithm"
207 // See: Chan et al (1983) Equation 1.3b
208 super.accept(value);
209 // Note: account for the half-deviation representation by scaling by 4=2^2
210 sumSquaredDev += (n - 1) * dev * nDev * 4;
211 }
212
213 /**
214 * Gets the sum of squared deviations of all input values.
215 *
216 * @return sum of squared deviations of all values.
217 */
218 double getSumOfSquaredDeviations() {
219 return Double.isFinite(getFirstMoment()) ? sumSquaredDev : Double.NaN;
220 }
221
222 /**
223 * Combines the state of another {@code SumOfSquaredDeviations} into this one.
224 *
225 * @param other Another {@code SumOfSquaredDeviations} to be combined.
226 * @return {@code this} instance after combining {@code other}.
227 */
228 SumOfSquaredDeviations combine(SumOfSquaredDeviations other) {
229 final long m = other.n;
230 if (n == 0) {
231 sumSquaredDev = other.sumSquaredDev;
232 } else if (m != 0) {
233 // "Updating one-pass algorithm"
234 // See: Chan et al (1983) Equation 1.5b (modified for the mean)
235 final double diffOfMean = getFirstMomentDifference(other);
236 final double sqDiffOfMean = diffOfMean * diffOfMean;
237 // Enforce symmetry
238 sumSquaredDev = (sumSquaredDev + other.sumSquaredDev) +
239 sqDiffOfMean * (((double) n * m) / ((double) n + m));
240 }
241 super.combine(other);
242 return this;
243 }
244 }