1 /* 2 * Licensed to the Apache Software Foundation (ASF) under one or more 3 * contributor license agreements. See the NOTICE file distributed with 4 * this work for additional information regarding copyright ownership. 5 * The ASF licenses this file to You under the Apache License, Version 2.0 6 * (the "License"); you may not use this file except in compliance with 7 * the License. You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 */ 17 package org.apache.commons.statistics.descriptive; 18 19 /** 20 * Computes the sum of cubed deviations from the sample mean. This 21 * statistic is related to the third moment. 22 * 23 * <p>Uses a recursive updating formula as defined in Manca and Marin (2010), equation 10. 24 * Note that the denominator in the third term in that equation has been corrected to 25 * \( (N_1 + N_2)^2 \). Two sum of cubed deviations (SC) can be combined using: 26 * 27 * <p>\[ SC(X) = {SC}_1 + {SC}_2 + \frac{3(m_1 - m_2)({s_1}^2 - {s_2}^2) N_1 N_2}{N_1 + N_2} 28 * + \frac{(m_1 - m_2)^3((N_2 - N_1) N_1 N_2}{(N_1 + N_2)^2} \] 29 * 30 * <p>where \( N \) is the group size, \( m \) is the mean, and \( s^2 \) is the biased variance 31 * such that \( s^2 * N \) is the sum of squared deviations from the mean. Note the term 32 * \( ({s_1}^2 - {s_2}^2) N_1 N_2 == (ss_1 * N_2 - ss_2 * N_1 \) where \( ss \) is the sum 33 * of square deviations. 34 * 35 * <p>If \( N_1 \) is size 1 this reduces to: 36 * 37 * <p>\[ SC_{N+1} = {SC}_N + \frac{3(x - m) -s^2 N}{N + 1} 38 * + \frac{(x - m)^3((N - 1) N}{(N + 1)^2} \] 39 * 40 * <p>where \( s^2 N \) is the sum of squared deviations. 41 * This updating formula is identical to that used in 42 * {@code org.apache.commons.math3.stat.descriptive.moment.ThirdMoment}. 43 * 44 * <p>Supports up to 2<sup>63</sup> (exclusive) observations. 45 * This implementation does not check for overflow of the count. 46 * 47 * <p><strong>Note that this implementation is not synchronized.</strong> If 48 * multiple threads access an instance of this class concurrently, and at least 49 * one of the threads invokes the {@link java.util.function.DoubleConsumer#accept(double) accept} or 50 * {@link StatisticAccumulator#combine(StatisticResult) combine} method, it must be synchronized externally. 51 * 52 * <p>However, it is safe to use {@link java.util.function.DoubleConsumer#accept(double) accept} 53 * and {@link StatisticAccumulator#combine(StatisticResult) combine} 54 * as {@code accumulator} and {@code combiner} functions of 55 * {@link java.util.stream.Collector Collector} on a parallel stream, 56 * because the parallel implementation of {@link java.util.stream.Stream#collect Stream.collect()} 57 * provides the necessary partitioning, isolation, and merging of results for 58 * safe and efficient parallel execution. 59 * 60 * <p>References: 61 * <ul> 62 * <li>Manca and Marin (2020) 63 * Decomposition of the Sum of Cubes, the Sum Raised to the 64 * Power of Four and Codeviance. 65 * Applied Mathematics, 11, 1013-1020. 66 * <a href="https://doi.org/10.4236/am.2020.1110067">doi: 10.4236/am.2020.1110067</a> 67 * </ul> 68 * 69 * @since 1.1 70 */ 71 class SumOfCubedDeviations extends SumOfSquaredDeviations { 72 /** 2, the length limit where the sum-of-cubed deviations is zero. */ 73 static final int LENGTH_TWO = 2; 74 75 /** Sum of cubed deviations of the values that have been added. */ 76 protected double sumCubedDev; 77 78 /** 79 * Create an instance. 80 */ 81 SumOfCubedDeviations() { 82 // No-op 83 } 84 85 /** 86 * Copy constructor. 87 * 88 * @param source Source to copy. 89 */ 90 SumOfCubedDeviations(SumOfCubedDeviations source) { 91 super(source); 92 sumCubedDev = source.sumCubedDev; 93 } 94 95 /** 96 * Create an instance with the given sum of cubed and squared deviations. 97 * 98 * @param sc Sum of cubed deviations. 99 * @param ss Sum of squared deviations. 100 */ 101 SumOfCubedDeviations(double sc, SumOfSquaredDeviations ss) { 102 super(ss); 103 this.sumCubedDev = sc; 104 } 105 106 /** 107 * Create an instance with the given sum of cubed and squared deviations, 108 * and first moment. 109 * 110 * @param sc Sum of cubed deviations. 111 * @param ss Sum of squared deviations. 112 * @param m1 First moment. 113 * @param n Count of values. 114 */ 115 SumOfCubedDeviations(double sc, double ss, double m1, long n) { 116 super(ss, m1, n); 117 this.sumCubedDev = sc; 118 } 119 120 /** 121 * Returns an instance populated using the input {@code values}. 122 * 123 * <p>Note: {@code SumOfCubedDeviations} computed using {@link #accept(double) accept} may be 124 * different from this instance. 125 * 126 * @param values Values. 127 * @return {@code SumOfCubedDeviations} instance. 128 */ 129 static SumOfCubedDeviations of(double... values) { 130 if (values.length == 0) { 131 return new SumOfCubedDeviations(); 132 } 133 return create(SumOfSquaredDeviations.of(values), values, 0, values.length); 134 } 135 136 /** 137 * Returns an instance populated using the specified range of {@code values}. 138 * 139 * <p>Note: {@code SumOfCubedDeviations} computed using {@link #accept(double) accept} may be 140 * different from this instance. 141 * 142 * <p>Warning: No range checks are performed. 143 * 144 * @param values Values. 145 * @param from Inclusive start of the range. 146 * @param to Exclusive end of the range. 147 * @return {@code SumOfCubedDeviations} instance. 148 */ 149 static SumOfCubedDeviations ofRange(double[] values, int from, int to) { 150 if (from == to) { 151 return new SumOfCubedDeviations(); 152 } 153 return create(SumOfSquaredDeviations.ofRange(values, from, to), values, from, to); 154 } 155 156 /** 157 * Creates the sum of cubed deviations. 158 * 159 * <p>Uses the provided {@code sum} to create the first moment. 160 * This method is used by {@link DoubleStatistics} using a sum that can be reused 161 * for the {@link Sum} statistic. 162 * 163 * <p>Warning: No range checks are performed. 164 * 165 * @param sum Sum of the values. 166 * @param values Values. 167 * @param from Inclusive start of the range. 168 * @param to Exclusive end of the range. 169 * @return {@code SumOfCubedDeviations} instance. 170 */ 171 static SumOfCubedDeviations createFromRange(org.apache.commons.numbers.core.Sum sum, 172 double[] values, int from, int to) { 173 if (from == to) { 174 return new SumOfCubedDeviations(); 175 } 176 return create(SumOfSquaredDeviations.createFromRange(sum, values, from, to), values, from, to); 177 } 178 179 /** 180 * Creates the sum of cubed deviations. 181 * 182 * @param ss Sum of squared deviations. 183 * @param values Values. 184 * @param from Inclusive start of the range. 185 * @param to Exclusive end of the range. 186 * @return {@code SumOfCubedDeviations} instance. 187 */ 188 private static SumOfCubedDeviations create(SumOfSquaredDeviations ss, double[] values, int from, int to) { 189 // Edge cases 190 final double xbar = ss.getFirstMoment(); 191 if (!Double.isFinite(xbar)) { 192 return new SumOfCubedDeviations(Double.NaN, ss); 193 } 194 if (!Double.isFinite(ss.sumSquaredDev)) { 195 // Note: If the sum-of-squared (SS) overflows then the same deviations when cubed 196 // will overflow. The *smallest* deviation to overflow SS is a full-length array of 197 // +/- values around a mean of zero, or approximately sqrt(MAX_VALUE / 2^31) = 2.89e149. 198 // In this case the sum cubed could be finite due to cancellation 199 // but this cannot be computed. Only a small array can be known to be zero. 200 return new SumOfCubedDeviations(ss.n <= LENGTH_TWO ? 0 : Double.NaN, ss); 201 } 202 // Compute the sum of cubed deviations. 203 double s = 0; 204 // n=1: no deviation 205 // n=2: the two deviations from the mean are equal magnitude 206 // and opposite sign. So the sum-of-cubed deviations is zero. 207 if (ss.n > LENGTH_TWO) { 208 for (int i = from; i < to; i++) { 209 s += pow3(values[i] - xbar); 210 } 211 } 212 return new SumOfCubedDeviations(s, ss); 213 } 214 215 /** 216 * Returns an instance populated using the input {@code values}. 217 * 218 * <p>Note: {@code SumOfCubedDeviations} computed using {@link #accept(double) accept} may be 219 * different from this instance. 220 * 221 * @param values Values. 222 * @return {@code SumOfCubedDeviations} instance. 223 */ 224 static SumOfCubedDeviations of(int... values) { 225 return ofRange(values, 0, values.length); 226 } 227 228 /** 229 * Returns an instance populated using the specified range of {@code values}. 230 * 231 * <p>Note: {@code SumOfCubedDeviations} computed using {@link #accept(double) accept} may be 232 * different from this instance. 233 * 234 * <p>Warning: No range checks are performed. 235 * 236 * @param values Values. 237 * @param from Inclusive start of the range. 238 * @param to Exclusive end of the range. 239 * @return {@code SumOfCubedDeviations} instance. 240 */ 241 static SumOfCubedDeviations ofRange(int[] values, int from, int to) { 242 // Logic shared with the double[] version with int[] lower order moments 243 if (from == to) { 244 return new SumOfCubedDeviations(); 245 } 246 final IntVariance variance = IntVariance.createFromRange(values, from, to); 247 final double xbar = variance.computeMean(); 248 final double ss = variance.computeSumOfSquaredDeviations(); 249 250 double sc = 0; 251 if (to - from > LENGTH_TWO) { 252 for (int i = from; i < to; i++) { 253 sc += pow3(values[i] - xbar); 254 } 255 } 256 return new SumOfCubedDeviations(sc, ss, xbar, to - from); 257 } 258 259 /** 260 * Returns an instance populated using the input {@code values}. 261 * 262 * <p>Note: {@code SumOfCubedDeviations} computed using {@link #accept(double) accept} may be 263 * different from this instance. 264 * 265 * @param values Values. 266 * @return {@code SumOfCubedDeviations} instance. 267 */ 268 static SumOfCubedDeviations of(long... values) { 269 return ofRange(values, 0, values.length); 270 } 271 272 /** 273 * Returns an instance populated using the specified range of {@code values}. 274 * 275 * <p>Note: {@code SumOfCubedDeviations} computed using {@link #accept(double) accept} may be 276 * different from this instance. 277 * 278 * <p>Warning: No range checks are performed. 279 * 280 * @param values Values. 281 * @param from Inclusive start of the range. 282 * @param to Exclusive end of the range. 283 * @return {@code SumOfCubedDeviations} instance. 284 * @throws IndexOutOfBoundsException if the sub-range is out of bounds 285 */ 286 static SumOfCubedDeviations ofRange(long[] values, int from, int to) { 287 // Logic shared with the double[] version with int[] lower order moments 288 if (from == to) { 289 return new SumOfCubedDeviations(); 290 } 291 final LongVariance variance = LongVariance.createFromRange(values, from, to); 292 final double xbar = variance.computeMean(); 293 final double ss = variance.computeSumOfSquaredDeviations(); 294 295 double sc = 0; 296 if (to - from > LENGTH_TWO) { 297 for (int i = from; i < to; i++) { 298 sc += pow3(values[i] - xbar); 299 } 300 } 301 return new SumOfCubedDeviations(sc, ss, xbar, to - from); 302 } 303 304 /** 305 * Compute {@code x^3}. 306 * Uses compound multiplication. 307 * 308 * @param x Value. 309 * @return x^3 310 */ 311 private static double pow3(double x) { 312 return x * x * x; 313 } 314 315 /** 316 * Updates the state of the statistic to reflect the addition of {@code value}. 317 * 318 * @param value Value. 319 */ 320 @Override 321 public void accept(double value) { 322 // Require current s^2 * N == sum-of-square deviations 323 final double ss = sumSquaredDev; 324 final double np = n; 325 super.accept(value); 326 // Terms are arranged so that values that may be zero 327 // (np, ss) are first. This will cancel any overflow in 328 // multiplication of later terms (nDev * 3 and nDev^2). 329 // This handles initialisation when np in {0, 1) to zero 330 // for any deviation (e.g. series MAX_VALUE, -MAX_VALUE). 331 // Note: account for the half-deviation representation by scaling by 6=3*2; 8=2^3 332 sumCubedDev = sumCubedDev - 333 ss * nDev * 6 + 334 (np - 1.0) * np * nDev * nDev * dev * 8; 335 } 336 337 /** 338 * Gets the sum of cubed deviations of all input values. 339 * 340 * <p>Note that the sum is subject to cancellation of potentially large 341 * positive and negative terms. A non-finite result may be returned 342 * due to intermediate overflow when the exact result may be a representable 343 * {@code double}. 344 * 345 * <p>Note: Any non-finite result should be considered a failed computation. 346 * The result is returned as computed and not consolidated to a single NaN. 347 * This is done for testing purposes to allow the result to be reported. 348 * In particular the sign of an infinity may not indicate the direction 349 * of the asymmetry (if any), only the direction of the first overflow in the 350 * computation. In the event of further overflow of a term to an opposite signed 351 * infinity the sum will be {@code NaN}. 352 * 353 * @return sum of cubed deviations of all values. 354 */ 355 double getSumOfCubedDeviations() { 356 return Double.isFinite(getFirstMoment()) ? sumCubedDev : Double.NaN; 357 } 358 359 /** 360 * Combines the state of another {@code SumOfCubedDeviations} into this one. 361 * 362 * @param other Another {@code SumOfCubedDeviations} to be combined. 363 * @return {@code this} instance after combining {@code other}. 364 */ 365 SumOfCubedDeviations combine(SumOfCubedDeviations other) { 366 if (n == 0) { 367 sumCubedDev = other.sumCubedDev; 368 } else if (other.n != 0) { 369 // Avoid overflow to compute the difference. 370 // This allows any samples of size n=1 to be combined as their SS=0. 371 // The result is a SC=0 for the combined n=2. 372 final double halfDiffOfMean = getFirstMomentHalfDifference(other); 373 sumCubedDev += other.sumCubedDev; 374 // Add additional terms that do not cancel to zero 375 if (halfDiffOfMean != 0) { 376 final double n1 = n; 377 final double n2 = other.n; 378 if (n1 == n2) { 379 // Optimisation where sizes are equal in double-precision. 380 // This is of use in JDK streams as spliterators use a divide by two 381 // strategy for parallel streams. 382 sumCubedDev += (sumSquaredDev - other.sumSquaredDev) * halfDiffOfMean * 3; 383 } else { 384 final double n1n2 = n1 + n2; 385 final double dm = 2 * (halfDiffOfMean / n1n2); 386 sumCubedDev += (sumSquaredDev * n2 - other.sumSquaredDev * n1) * dm * 3 + 387 (n2 - n1) * (n1 * n2) * pow3(dm) * n1n2; 388 } 389 } 390 } 391 super.combine(other); 392 return this; 393 } 394 }