1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17 package org.apache.commons.statistics.descriptive;
18
19 import java.util.function.DoubleConsumer;
20
21 /**
22 * Computes the first moment (arithmetic mean) using the definitional formula:
23 *
24 * <pre>mean = sum(x_i) / n</pre>
25 *
26 * <p> To limit numeric errors, the value of the statistic is computed using the
27 * following recursive updating algorithm:
28 * <ol>
29 * <li>Initialize {@code m = } the first value</li>
30 * <li>For each additional value, update using <br>
31 * {@code m = m + (new value - m) / (number of observations)}</li>
32 * </ol>
33 *
34 * <p>Returns {@code NaN} if the dataset is empty. Note that
35 * {@code NaN} may also be returned if the input includes {@code NaN} and / or infinite
36 * values of opposite sign.
37 *
38 * <p>Supports up to 2<sup>63</sup> (exclusive) observations.
39 * This implementation does not check for overflow of the count.
40 *
41 * <p><strong>Note that this implementation is not synchronized.</strong> If
42 * multiple threads access an instance of this class concurrently, and at least
43 * one of the threads invokes the {@link java.util.function.DoubleConsumer#accept(double) accept} or
44 * {@link StatisticAccumulator#combine(StatisticResult) combine} method, it must be synchronized externally.
45 *
46 * <p>However, it is safe to use {@link java.util.function.DoubleConsumer#accept(double) accept}
47 * and {@link StatisticAccumulator#combine(StatisticResult) combine}
48 * as {@code accumulator} and {@code combiner} functions of
49 * {@link java.util.stream.Collector Collector} on a parallel stream,
50 * because the parallel implementation of {@link java.util.stream.Stream#collect Stream.collect()}
51 * provides the necessary partitioning, isolation, and merging of results for
52 * safe and efficient parallel execution.
53 *
54 * <p>References:
55 * <ul>
56 * <li>Chan, Golub, Levesque (1983)
57 * Algorithms for Computing the Sample Variance.
58 * American Statistician, vol. 37, no. 3, pp. 242-247.
59 * <li>Ling (1974)
60 * Comparison of Several Algorithms for Computing Sample Means and Variances.
61 * Journal of the American Statistical Association, Vol. 69, No. 348, pp. 859-866.
62 * </ul>
63 *
64 * @since 1.1
65 */
66 class FirstMoment implements DoubleConsumer {
67 /** The downscale constant. Used to avoid overflow for all finite input. */
68 private static final double DOWNSCALE = 0.5;
69 /** The rescale constant. */
70 private static final double RESCALE = 2;
71
72 /** Count of values that have been added. */
73 protected long n;
74
75 /**
76 * Half the deviation of most recently added value from the previous first moment.
77 * Retained to prevent repeated computation in higher order moments.
78 *
79 * <p>Note: This is (x - m1) / 2. It is computed as a half value to prevent overflow
80 * when computing for any finite value x and m.
81 *
82 * <p>This value is not used in the {@link #combine(FirstMoment)} method.
83 */
84 protected double dev;
85
86 /**
87 * Half the deviation of most recently added value from the previous first moment,
88 * normalized by current sample size. Retained to prevent repeated
89 * computation in higher order moments.
90 *
91 * <p>Note: This is (x - m1) / 2n. It is computed as a half value to prevent overflow
92 * when computing for any finite value x and m.
93 *
94 * Note: This value is not used in the {@link #combine(FirstMoment)} method.
95 */
96 protected double nDev;
97
98 /** First moment of values that have been added.
99 * This is stored as a half value to prevent overflow for any finite input.
100 * Benchmarks show this has negligible performance impact. */
101 private double m1;
102
103 /**
104 * Running sum of values seen so far.
105 * This is not used in the computation of mean. Used as a return value for first moment when
106 * it is non-finite.
107 */
108 private double nonFiniteValue;
109
110 /**
111 * Create an instance.
112 */
113 FirstMoment() {
114 // No-op
115 }
116
117 /**
118 * Copy constructor.
119 *
120 * @param source Source to copy.
121 */
122 FirstMoment(FirstMoment source) {
123 m1 = source.m1;
124 n = source.n;
125 nonFiniteValue = source.nonFiniteValue;
126 }
127
128 /**
129 * Create an instance with the given first moment.
130 *
131 * <p>This constructor is used when creating the moment from a finite sum of values.
132 *
133 * @param m1 First moment.
134 * @param n Count of values.
135 */
136 FirstMoment(double m1, long n) {
137 this.m1 = m1 * DOWNSCALE;
138 this.n = n;
139 }
140
141 /**
142 * Returns an instance populated using the input {@code values}.
143 *
144 * <p>Note: {@code FirstMoment} computed using {@link #accept} may be different from
145 * this instance.
146 *
147 * @param values Values.
148 * @return {@code FirstMoment} instance.
149 */
150 static FirstMoment of(double... values) {
151 if (values.length == 0) {
152 return new FirstMoment();
153 }
154 // In the typical use-case a sum of values will not overflow and
155 // is faster than the rolling algorithm
156 return createFromRange(org.apache.commons.numbers.core.Sum.of(values), values, 0, values.length);
157 }
158
159 /**
160 * Returns an instance populated using the specified range of {@code values}.
161 *
162 * <p>Note: {@code FirstMoment} computed using {@link #accept} may be different from
163 * this instance.
164 *
165 * <p>Warning: No range checks are performed.
166 *
167 * @param values Values.
168 * @param from Inclusive start of the range.
169 * @param to Exclusive end of the range.
170 * @return {@code FirstMoment} instance.
171 * @throws IndexOutOfBoundsException if the sub-range is out of bounds
172 * @since 1.2
173 */
174 static FirstMoment ofRange(double[] values, int from, int to) {
175 if (from == to) {
176 return new FirstMoment();
177 }
178 return createFromRange(Statistics.sum(values, from, to), values, from, to);
179 }
180
181 /**
182 * Creates the first moment.
183 *
184 * <p>Uses the provided {@code sum} if finite; otherwise reverts to using the rolling moment
185 * to protect from overflow and adds a second pass correction term.
186 *
187 * <p>This method is used by {@link DoubleStatistics} using a sum that can be reused
188 * for the {@link Sum} statistic.
189 *
190 * <p>Warning: No range checks are performed.
191 *
192 * @param sum Sum of the values.
193 * @param values Values.
194 * @param from Inclusive start of the range.
195 * @param to Exclusive end of the range.
196 * @return {@code FirstMoment} instance.
197 */
198 static FirstMoment createFromRange(org.apache.commons.numbers.core.Sum sum,
199 double[] values, int from, int to) {
200 // Protect against empty values
201 if (from == to) {
202 return new FirstMoment();
203 }
204
205 final double s = sum.getAsDouble();
206 if (Double.isFinite(s)) {
207 return new FirstMoment(s / (to - from), to - from);
208 }
209
210 // "Corrected two-pass algorithm"
211
212 // First pass
213 final FirstMoment m1 = create(values, from, to);
214 final double xbar = m1.getFirstMoment();
215 if (!Double.isFinite(xbar)) {
216 return m1;
217 }
218 // Second pass
219 double correction = 0;
220 for (int i = from; i < to; i++) {
221 correction += values[i] - xbar;
222 }
223 // Note: Correction may be infinite
224 if (Double.isFinite(correction)) {
225 // Down scale the correction to the half representation
226 m1.m1 += DOWNSCALE * correction / m1.n;
227 }
228 return m1;
229 }
230
231 /**
232 * Creates the first moment using a rolling algorithm.
233 *
234 * <p>This duplicates the algorithm in the {@link #accept(double)} method
235 * with optimisations due to the processing of an entire array:
236 * <ul>
237 * <li>Avoid updating (unused) class level working variables.
238 * <li>Only computing the non-finite value if required.
239 * </ul>
240 *
241 * @param values Values.
242 * @param from Inclusive start of the range.
243 * @param to Exclusive end of the range.
244 * @return the first moment
245 */
246 private static FirstMoment create(double[] values, int from, int to) {
247 double m1 = 0;
248 int n = 0;
249 for (int i = from; i < to; i++) {
250 // Downscale to avoid overflow for all finite input
251 m1 += (values[i] * DOWNSCALE - m1) / ++n;
252 }
253 final FirstMoment m = new FirstMoment();
254 m.n = n;
255 // Note: m1 is already downscaled here
256 m.m1 = m1;
257 // The non-finite value is only relevant if the data contains inf/nan
258 if (!Double.isFinite(m1 * RESCALE)) {
259 m.nonFiniteValue = computeNonFiniteValue(values, from, to);
260 }
261 return m;
262 }
263
264 /**
265 * Compute the result in the event of non-finite values.
266 *
267 * @param values Values.
268 * @param from Inclusive start of the range.
269 * @param to Exclusive end of the range.
270 * @return the non-finite result
271 */
272 private static double computeNonFiniteValue(double[] values, int from, int to) {
273 double sum = 0;
274 for (int i = from; i < to; i++) {
275 // Scaling down values prevents overflow of finites.
276 sum += values[i] * Double.MIN_NORMAL;
277 }
278 return sum;
279 }
280
281 /**
282 * Updates the state of the statistic to reflect the addition of {@code value}.
283 *
284 * @param value Value.
285 */
286 @Override
287 public void accept(double value) {
288 // "Updating one-pass algorithm"
289 // See: Chan et al (1983) Equation 1.3a
290 // m_{i+1} = m_i + (x - m_i) / (i + 1)
291 // This is modified with scaling to avoid overflow for all finite input.
292 // Scaling the input down by a factor of two ensures that the scaling is lossless.
293 // Sub-classes must alter their scaling factors when using the computed deviations.
294
295 // Note: Maintain the correct non-finite result.
296 // Scaling down values prevents overflow of finites.
297 nonFiniteValue += value * Double.MIN_NORMAL;
298 // Scale down the input
299 dev = value * DOWNSCALE - m1;
300 nDev = dev / ++n;
301 m1 += nDev;
302 }
303
304 /**
305 * Gets the first moment of all input values.
306 *
307 * <p>When no values have been added, the result is {@code NaN}.
308 *
309 * @return {@code First moment} of all values, if it is finite;
310 * {@code +/-Infinity}, if infinities of the same sign have been encountered;
311 * {@code NaN} otherwise.
312 */
313 double getFirstMoment() {
314 // Scale back to the original magnitude
315 final double m = m1 * RESCALE;
316 if (Double.isFinite(m)) {
317 return n == 0 ? Double.NaN : m;
318 }
319 // A non-finite value must have been encountered, return nonFiniteValue which represents m1.
320 return nonFiniteValue;
321 }
322
323 /**
324 * Combines the state of another {@code FirstMoment} into this one.
325 *
326 * @param other Another {@code FirstMoment} to be combined.
327 * @return {@code this} instance after combining {@code other}.
328 */
329 FirstMoment combine(FirstMoment other) {
330 nonFiniteValue += other.nonFiniteValue;
331 final double mu1 = this.m1;
332 final double mu2 = other.m1;
333 final long n1 = n;
334 final long n2 = other.n;
335 n = n1 + n2;
336 // Adjust the mean with the weighted difference:
337 // m1 = m1 + (m2 - m1) * n2 / (n1 + n2)
338 // The half-representation ensures the difference of means is at most MAX_VALUE
339 // so the combine can avoid scaling.
340 if (n1 == n2) {
341 // Optimisation for equal sizes: m1 = (m1 + m2) / 2
342 m1 = (mu1 + mu2) * 0.5;
343 } else {
344 m1 = combine(mu1, mu2, n1, n2);
345 }
346 return this;
347 }
348
349 /**
350 * Combine the moments. This method is used to enforce symmetry. It assumes that
351 * the two sizes are not identical, and at least one size is non-zero.
352 *
353 * @param m1 Moment 1.
354 * @param m2 Moment 2.
355 * @param n1 Size of sample 1.
356 * @param n2 Size of sample 2.
357 * @return the combined first moment
358 */
359 private static double combine(double m1, double m2, long n1, long n2) {
360 // Note: If either size is zero the weighted difference is zero and
361 // the other moment is unchanged.
362 return n2 < n1 ?
363 m1 + (m2 - m1) * ((double) n2 / (n1 + n2)) :
364 m2 + (m1 - m2) * ((double) n1 / (n1 + n2));
365 }
366
367 /**
368 * Gets the difference of the first moment between {@code this} moment and the
369 * {@code other} moment. This is provided for sub-classes.
370 *
371 * @param other Other moment.
372 * @return the difference
373 */
374 double getFirstMomentDifference(FirstMoment other) {
375 // Scale back to the original magnitude
376 return (m1 - other.m1) * RESCALE;
377 }
378
379 /**
380 * Gets the half the difference of the first moment between {@code this} moment and
381 * the {@code other} moment. This is provided for sub-classes.
382 *
383 * @param other Other moment.
384 * @return the difference
385 */
386 double getFirstMomentHalfDifference(FirstMoment other) {
387 return m1 - other.m1;
388 }
389 }