1 /* 2 * Licensed to the Apache Software Foundation (ASF) under one or more 3 * contributor license agreements. See the NOTICE file distributed with 4 * this work for additional information regarding copyright ownership. 5 * The ASF licenses this file to You under the Apache License, Version 2.0 6 * (the "License"); you may not use this file except in compliance with 7 * the License. You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 */ 17 package org.apache.commons.statistics.descriptive; 18 19 import java.util.function.DoubleConsumer; 20 21 /** 22 * Computes the first moment (arithmetic mean) using the definitional formula: 23 * 24 * <pre>mean = sum(x_i) / n</pre> 25 * 26 * <p> To limit numeric errors, the value of the statistic is computed using the 27 * following recursive updating algorithm: 28 * <ol> 29 * <li>Initialize {@code m = } the first value</li> 30 * <li>For each additional value, update using <br> 31 * {@code m = m + (new value - m) / (number of observations)}</li> 32 * </ol> 33 * 34 * <p>Returns {@code NaN} if the dataset is empty. Note that 35 * {@code NaN} may also be returned if the input includes {@code NaN} and / or infinite 36 * values of opposite sign. 37 * 38 * <p>Supports up to 2<sup>63</sup> (exclusive) observations. 39 * This implementation does not check for overflow of the count. 40 * 41 * <p><strong>Note that this implementation is not synchronized.</strong> If 42 * multiple threads access an instance of this class concurrently, and at least 43 * one of the threads invokes the {@link java.util.function.DoubleConsumer#accept(double) accept} or 44 * {@link StatisticAccumulator#combine(StatisticResult) combine} method, it must be synchronized externally. 45 * 46 * <p>However, it is safe to use {@link java.util.function.DoubleConsumer#accept(double) accept} 47 * and {@link StatisticAccumulator#combine(StatisticResult) combine} 48 * as {@code accumulator} and {@code combiner} functions of 49 * {@link java.util.stream.Collector Collector} on a parallel stream, 50 * because the parallel implementation of {@link java.util.stream.Stream#collect Stream.collect()} 51 * provides the necessary partitioning, isolation, and merging of results for 52 * safe and efficient parallel execution. 53 * 54 * <p>References: 55 * <ul> 56 * <li>Chan, Golub, Levesque (1983) 57 * Algorithms for Computing the Sample Variance. 58 * American Statistician, vol. 37, no. 3, pp. 242-247. 59 * <li>Ling (1974) 60 * Comparison of Several Algorithms for Computing Sample Means and Variances. 61 * Journal of the American Statistical Association, Vol. 69, No. 348, pp. 859-866. 62 * </ul> 63 * 64 * @since 1.1 65 */ 66 class FirstMoment implements DoubleConsumer { 67 /** The downscale constant. Used to avoid overflow for all finite input. */ 68 private static final double DOWNSCALE = 0.5; 69 /** The rescale constant. */ 70 private static final double RESCALE = 2; 71 72 /** Count of values that have been added. */ 73 protected long n; 74 75 /** 76 * Half the deviation of most recently added value from the previous first moment. 77 * Retained to prevent repeated computation in higher order moments. 78 * 79 * <p>Note: This is (x - m1) / 2. It is computed as a half value to prevent overflow 80 * when computing for any finite value x and m. 81 * 82 * <p>This value is not used in the {@link #combine(FirstMoment)} method. 83 */ 84 protected double dev; 85 86 /** 87 * Half the deviation of most recently added value from the previous first moment, 88 * normalized by current sample size. Retained to prevent repeated 89 * computation in higher order moments. 90 * 91 * <p>Note: This is (x - m1) / 2n. It is computed as a half value to prevent overflow 92 * when computing for any finite value x and m. 93 * 94 * Note: This value is not used in the {@link #combine(FirstMoment)} method. 95 */ 96 protected double nDev; 97 98 /** First moment of values that have been added. 99 * This is stored as a half value to prevent overflow for any finite input. 100 * Benchmarks show this has negligible performance impact. */ 101 private double m1; 102 103 /** 104 * Running sum of values seen so far. 105 * This is not used in the computation of mean. Used as a return value for first moment when 106 * it is non-finite. 107 */ 108 private double nonFiniteValue; 109 110 /** 111 * Create an instance. 112 */ 113 FirstMoment() { 114 // No-op 115 } 116 117 /** 118 * Copy constructor. 119 * 120 * @param source Source to copy. 121 */ 122 FirstMoment(FirstMoment source) { 123 m1 = source.m1; 124 n = source.n; 125 nonFiniteValue = source.nonFiniteValue; 126 } 127 128 /** 129 * Create an instance with the given first moment. 130 * 131 * <p>This constructor is used when creating the moment from a finite sum of values. 132 * 133 * @param m1 First moment. 134 * @param n Count of values. 135 */ 136 FirstMoment(double m1, long n) { 137 this.m1 = m1 * DOWNSCALE; 138 this.n = n; 139 } 140 141 /** 142 * Returns an instance populated using the input {@code values}. 143 * 144 * <p>Note: {@code FirstMoment} computed using {@link #accept} may be different from 145 * this instance. 146 * 147 * @param values Values. 148 * @return {@code FirstMoment} instance. 149 */ 150 static FirstMoment of(double... values) { 151 if (values.length == 0) { 152 return new FirstMoment(); 153 } 154 // In the typical use-case a sum of values will not overflow and 155 // is faster than the rolling algorithm 156 return createFromRange(org.apache.commons.numbers.core.Sum.of(values), values, 0, values.length); 157 } 158 159 /** 160 * Returns an instance populated using the specified range of {@code values}. 161 * 162 * <p>Note: {@code FirstMoment} computed using {@link #accept} may be different from 163 * this instance. 164 * 165 * <p>Warning: No range checks are performed. 166 * 167 * @param values Values. 168 * @param from Inclusive start of the range. 169 * @param to Exclusive end of the range. 170 * @return {@code FirstMoment} instance. 171 * @throws IndexOutOfBoundsException if the sub-range is out of bounds 172 * @since 1.2 173 */ 174 static FirstMoment ofRange(double[] values, int from, int to) { 175 if (from == to) { 176 return new FirstMoment(); 177 } 178 return createFromRange(Statistics.sum(values, from, to), values, from, to); 179 } 180 181 /** 182 * Creates the first moment. 183 * 184 * <p>Uses the provided {@code sum} if finite; otherwise reverts to using the rolling moment 185 * to protect from overflow and adds a second pass correction term. 186 * 187 * <p>This method is used by {@link DoubleStatistics} using a sum that can be reused 188 * for the {@link Sum} statistic. 189 * 190 * <p>Warning: No range checks are performed. 191 * 192 * @param sum Sum of the values. 193 * @param values Values. 194 * @param from Inclusive start of the range. 195 * @param to Exclusive end of the range. 196 * @return {@code FirstMoment} instance. 197 */ 198 static FirstMoment createFromRange(org.apache.commons.numbers.core.Sum sum, 199 double[] values, int from, int to) { 200 // Protect against empty values 201 if (from == to) { 202 return new FirstMoment(); 203 } 204 205 final double s = sum.getAsDouble(); 206 if (Double.isFinite(s)) { 207 return new FirstMoment(s / (to - from), to - from); 208 } 209 210 // "Corrected two-pass algorithm" 211 212 // First pass 213 final FirstMoment m1 = create(values, from, to); 214 final double xbar = m1.getFirstMoment(); 215 if (!Double.isFinite(xbar)) { 216 return m1; 217 } 218 // Second pass 219 double correction = 0; 220 for (int i = from; i < to; i++) { 221 correction += values[i] - xbar; 222 } 223 // Note: Correction may be infinite 224 if (Double.isFinite(correction)) { 225 // Down scale the correction to the half representation 226 m1.m1 += DOWNSCALE * correction / m1.n; 227 } 228 return m1; 229 } 230 231 /** 232 * Creates the first moment using a rolling algorithm. 233 * 234 * <p>This duplicates the algorithm in the {@link #accept(double)} method 235 * with optimisations due to the processing of an entire array: 236 * <ul> 237 * <li>Avoid updating (unused) class level working variables. 238 * <li>Only computing the non-finite value if required. 239 * </ul> 240 * 241 * @param values Values. 242 * @param from Inclusive start of the range. 243 * @param to Exclusive end of the range. 244 * @return the first moment 245 */ 246 private static FirstMoment create(double[] values, int from, int to) { 247 double m1 = 0; 248 int n = 0; 249 for (int i = from; i < to; i++) { 250 // Downscale to avoid overflow for all finite input 251 m1 += (values[i] * DOWNSCALE - m1) / ++n; 252 } 253 final FirstMoment m = new FirstMoment(); 254 m.n = n; 255 // Note: m1 is already downscaled here 256 m.m1 = m1; 257 // The non-finite value is only relevant if the data contains inf/nan 258 if (!Double.isFinite(m1 * RESCALE)) { 259 m.nonFiniteValue = computeNonFiniteValue(values, from, to); 260 } 261 return m; 262 } 263 264 /** 265 * Compute the result in the event of non-finite values. 266 * 267 * @param values Values. 268 * @param from Inclusive start of the range. 269 * @param to Exclusive end of the range. 270 * @return the non-finite result 271 */ 272 private static double computeNonFiniteValue(double[] values, int from, int to) { 273 double sum = 0; 274 for (int i = from; i < to; i++) { 275 // Scaling down values prevents overflow of finites. 276 sum += values[i] * Double.MIN_NORMAL; 277 } 278 return sum; 279 } 280 281 /** 282 * Updates the state of the statistic to reflect the addition of {@code value}. 283 * 284 * @param value Value. 285 */ 286 @Override 287 public void accept(double value) { 288 // "Updating one-pass algorithm" 289 // See: Chan et al (1983) Equation 1.3a 290 // m_{i+1} = m_i + (x - m_i) / (i + 1) 291 // This is modified with scaling to avoid overflow for all finite input. 292 // Scaling the input down by a factor of two ensures that the scaling is lossless. 293 // Sub-classes must alter their scaling factors when using the computed deviations. 294 295 // Note: Maintain the correct non-finite result. 296 // Scaling down values prevents overflow of finites. 297 nonFiniteValue += value * Double.MIN_NORMAL; 298 // Scale down the input 299 dev = value * DOWNSCALE - m1; 300 nDev = dev / ++n; 301 m1 += nDev; 302 } 303 304 /** 305 * Gets the first moment of all input values. 306 * 307 * <p>When no values have been added, the result is {@code NaN}. 308 * 309 * @return {@code First moment} of all values, if it is finite; 310 * {@code +/-Infinity}, if infinities of the same sign have been encountered; 311 * {@code NaN} otherwise. 312 */ 313 double getFirstMoment() { 314 // Scale back to the original magnitude 315 final double m = m1 * RESCALE; 316 if (Double.isFinite(m)) { 317 return n == 0 ? Double.NaN : m; 318 } 319 // A non-finite value must have been encountered, return nonFiniteValue which represents m1. 320 return nonFiniteValue; 321 } 322 323 /** 324 * Combines the state of another {@code FirstMoment} into this one. 325 * 326 * @param other Another {@code FirstMoment} to be combined. 327 * @return {@code this} instance after combining {@code other}. 328 */ 329 FirstMoment combine(FirstMoment other) { 330 nonFiniteValue += other.nonFiniteValue; 331 final double mu1 = this.m1; 332 final double mu2 = other.m1; 333 final long n1 = n; 334 final long n2 = other.n; 335 n = n1 + n2; 336 // Adjust the mean with the weighted difference: 337 // m1 = m1 + (m2 - m1) * n2 / (n1 + n2) 338 // The half-representation ensures the difference of means is at most MAX_VALUE 339 // so the combine can avoid scaling. 340 if (n1 == n2) { 341 // Optimisation for equal sizes: m1 = (m1 + m2) / 2 342 m1 = (mu1 + mu2) * 0.5; 343 } else { 344 m1 = combine(mu1, mu2, n1, n2); 345 } 346 return this; 347 } 348 349 /** 350 * Combine the moments. This method is used to enforce symmetry. It assumes that 351 * the two sizes are not identical, and at least one size is non-zero. 352 * 353 * @param m1 Moment 1. 354 * @param m2 Moment 2. 355 * @param n1 Size of sample 1. 356 * @param n2 Size of sample 2. 357 * @return the combined first moment 358 */ 359 private static double combine(double m1, double m2, long n1, long n2) { 360 // Note: If either size is zero the weighted difference is zero and 361 // the other moment is unchanged. 362 return n2 < n1 ? 363 m1 + (m2 - m1) * ((double) n2 / (n1 + n2)) : 364 m2 + (m1 - m2) * ((double) n1 / (n1 + n2)); 365 } 366 367 /** 368 * Gets the difference of the first moment between {@code this} moment and the 369 * {@code other} moment. This is provided for sub-classes. 370 * 371 * @param other Other moment. 372 * @return the difference 373 */ 374 double getFirstMomentDifference(FirstMoment other) { 375 // Scale back to the original magnitude 376 return (m1 - other.m1) * RESCALE; 377 } 378 379 /** 380 * Gets the half the difference of the first moment between {@code this} moment and 381 * the {@code other} moment. This is provided for sub-classes. 382 * 383 * @param other Other moment. 384 * @return the difference 385 */ 386 double getFirstMomentHalfDifference(FirstMoment other) { 387 return m1 - other.m1; 388 } 389 }