1 /* 2 * Licensed to the Apache Software Foundation (ASF) under one or more 3 * contributor license agreements. See the NOTICE file distributed with 4 * this work for additional information regarding copyright ownership. 5 * The ASF licenses this file to You under the Apache License, Version 2.0 6 * (the "License"); you may not use this file except in compliance with 7 * the License. You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 */ 17 package org.apache.commons.statistics.descriptive; 18 19 /** 20 * Computes the sum of squared deviations from the sample mean. This 21 * statistic is related to the second moment. 22 * 23 * <p>The following recursive updating formula is used: 24 * <p>Let 25 * <ul> 26 * <li> dev = (current obs - previous mean) </li> 27 * <li> n = number of observations (including current obs) </li> 28 * </ul> 29 * <p>Then 30 * <p>new value = old value + dev^2 * (n - 1) / n 31 * <p>returns the sum of squared deviations of all values seen so far. 32 * 33 * <p>Supports up to 2<sup>63</sup> (exclusive) observations. 34 * This implementation does not check for overflow of the count. 35 * 36 * <p><strong>Note that this implementation is not synchronized.</strong> If 37 * multiple threads access an instance of this class concurrently, and at least 38 * one of the threads invokes the {@link java.util.function.DoubleConsumer#accept(double) accept} or 39 * {@link StatisticAccumulator#combine(StatisticResult) combine} method, it must be synchronized externally. 40 * 41 * <p>However, it is safe to use {@link java.util.function.DoubleConsumer#accept(double) accept} 42 * and {@link StatisticAccumulator#combine(StatisticResult) combine} 43 * as {@code accumulator} and {@code combiner} functions of 44 * {@link java.util.stream.Collector Collector} on a parallel stream, 45 * because the parallel implementation of {@link java.util.stream.Stream#collect Stream.collect()} 46 * provides the necessary partitioning, isolation, and merging of results for 47 * safe and efficient parallel execution. 48 * 49 * <p>References: 50 * <ul> 51 * <li>Chan, Golub and Levesque (1983) 52 * Algorithms for Computing the Sample Variance: Analysis and Recommendations. 53 * American Statistician, 37, 242-247. 54 * <a href="https://doi.org/10.2307/2683386">doi: 10.2307/2683386</a> 55 * </ul> 56 * 57 * @since 1.1 58 */ 59 class SumOfSquaredDeviations extends FirstMoment { 60 /** Sum of squared deviations of the values that have been added. */ 61 protected double sumSquaredDev; 62 63 /** 64 * Create an instance. 65 */ 66 SumOfSquaredDeviations() { 67 // No-op 68 } 69 70 /** 71 * Copy constructor. 72 * 73 * @param source Source to copy. 74 */ 75 SumOfSquaredDeviations(SumOfSquaredDeviations source) { 76 super(source); 77 sumSquaredDev = source.sumSquaredDev; 78 } 79 80 /** 81 * Create an instance with the given sum of squared deviations and first moment. 82 * 83 * @param sumSquaredDev Sum of squared deviations. 84 * @param m1 First moment. 85 */ 86 private SumOfSquaredDeviations(double sumSquaredDev, FirstMoment m1) { 87 super(m1); 88 this.sumSquaredDev = sumSquaredDev; 89 } 90 91 /** 92 * Create an instance with the given sum of squared deviations and first moment. 93 * 94 * <p>This constructor is used when creating the moment from integer values. 95 * 96 * @param sumSquaredDev Sum of squared deviations. 97 * @param m1 First moment. 98 * @param n Count of values. 99 */ 100 SumOfSquaredDeviations(double sumSquaredDev, double m1, long n) { 101 super(m1, n); 102 this.sumSquaredDev = sumSquaredDev; 103 } 104 105 /** 106 * Returns an instance populated using the input {@code values}. 107 * 108 * <p>Note: {@code SumOfSquaredDeviations} computed using {@link #accept accept} may be 109 * different from this instance. 110 * 111 * @param values Values. 112 * @return {@code SumOfSquaredDeviations} instance. 113 */ 114 static SumOfSquaredDeviations of(double... values) { 115 if (values.length == 0) { 116 return new SumOfSquaredDeviations(); 117 } 118 return create(FirstMoment.of(values), values, 0, values.length); 119 } 120 121 /** 122 * Returns an instance populated using the specified range of {@code values}. 123 * 124 * <p>Note: {@code SumOfSquaredDeviations} computed using {@link #accept accept} may be 125 * different from this instance. 126 * 127 * <p>Warning: No range checks are performed. 128 * 129 * @param values Values. 130 * @param from Inclusive start of the range. 131 * @param to Exclusive end of the range. 132 * @return {@code SumOfSquaredDeviations} instance. 133 */ 134 static SumOfSquaredDeviations ofRange(double[] values, int from, int to) { 135 if (from == to) { 136 return new SumOfSquaredDeviations(); 137 } 138 return create(FirstMoment.ofRange(values, from, to), values, from, to); 139 } 140 141 /** 142 * Creates the sum of squared deviations. 143 * 144 * <p>Uses the provided {@code sum} to create the first moment. 145 * This method is used by {@link DoubleStatistics} using a sum that can be reused 146 * for the {@link Sum} statistic. 147 * 148 * <p>Warning: No range checks are performed. 149 * 150 * @param sum Sum of the values. 151 * @param values Values. 152 * @param from Inclusive start of the range. 153 * @param to Exclusive end of the range. 154 * @return {@code SumOfSquaredDeviations} instance. 155 */ 156 static SumOfSquaredDeviations createFromRange(org.apache.commons.numbers.core.Sum sum, 157 double[] values, int from, int to) { 158 if (from == to) { 159 return new SumOfSquaredDeviations(); 160 } 161 return create(FirstMoment.createFromRange(sum, values, from, to), values, from, to); 162 } 163 164 /** 165 * Creates the sum of squared deviations. 166 * 167 * @param m1 First moment. 168 * @param values Values. 169 * @param from Inclusive start of the range. 170 * @param to Exclusive end of the range. 171 * @return {@code SumOfSquaredDeviations} instance. 172 */ 173 private static SumOfSquaredDeviations create(FirstMoment m1, double[] values, int from, int to) { 174 // "Corrected two-pass algorithm" 175 // See: Chan et al (1983) Equation 1.7 176 177 final double xbar = m1.getFirstMoment(); 178 if (!Double.isFinite(xbar)) { 179 return new SumOfSquaredDeviations(Double.NaN, m1); 180 } 181 double s = 0; 182 double ss = 0; 183 for (int i = from; i < to; i++) { 184 final double dx = values[i] - xbar; 185 s += dx; 186 ss += dx * dx; 187 } 188 // The sum of squared deviations is ss - (s * s / n). 189 // The second term ideally should be zero; in practice it is a good approximation 190 // of the error in the first term. 191 // To prevent sumSquaredDev from spuriously attaining a NaN value 192 // when ss is infinite, assign it an infinite value which is its intended value. 193 final double sumSquaredDev = ss == Double.POSITIVE_INFINITY ? 194 Double.POSITIVE_INFINITY : 195 ss - (s * s / (to - from)); 196 return new SumOfSquaredDeviations(sumSquaredDev, m1); 197 } 198 199 /** 200 * Updates the state of the statistic to reflect the addition of {@code value}. 201 * 202 * @param value Value. 203 */ 204 @Override 205 public void accept(double value) { 206 // "Updating one-pass algorithm" 207 // See: Chan et al (1983) Equation 1.3b 208 super.accept(value); 209 // Note: account for the half-deviation representation by scaling by 4=2^2 210 sumSquaredDev += (n - 1) * dev * nDev * 4; 211 } 212 213 /** 214 * Gets the sum of squared deviations of all input values. 215 * 216 * @return sum of squared deviations of all values. 217 */ 218 double getSumOfSquaredDeviations() { 219 return Double.isFinite(getFirstMoment()) ? sumSquaredDev : Double.NaN; 220 } 221 222 /** 223 * Combines the state of another {@code SumOfSquaredDeviations} into this one. 224 * 225 * @param other Another {@code SumOfSquaredDeviations} to be combined. 226 * @return {@code this} instance after combining {@code other}. 227 */ 228 SumOfSquaredDeviations combine(SumOfSquaredDeviations other) { 229 final long m = other.n; 230 if (n == 0) { 231 sumSquaredDev = other.sumSquaredDev; 232 } else if (m != 0) { 233 // "Updating one-pass algorithm" 234 // See: Chan et al (1983) Equation 1.5b (modified for the mean) 235 final double diffOfMean = getFirstMomentDifference(other); 236 final double sqDiffOfMean = diffOfMean * diffOfMean; 237 // Enforce symmetry 238 sumSquaredDev = (sumSquaredDev + other.sumSquaredDev) + 239 sqDiffOfMean * (((double) n * m) / ((double) n + m)); 240 } 241 super.combine(other); 242 return this; 243 } 244 }