1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17 package org.apache.commons.statistics.descriptive;
18
19 /**
20 * Computes the sum of cubed deviations from the sample mean. This
21 * statistic is related to the third moment.
22 *
23 * <p>Uses a recursive updating formula as defined in Manca and Marin (2010), equation 10.
24 * Note that the denominator in the third term in that equation has been corrected to
25 * \( (N_1 + N_2)^2 \). Two sum of cubed deviations (SC) can be combined using:
26 *
27 * <p>\[ SC(X) = {SC}_1 + {SC}_2 + \frac{3(m_1 - m_2)({s_1}^2 - {s_2}^2) N_1 N_2}{N_1 + N_2}
28 * + \frac{(m_1 - m_2)^3((N_2 - N_1) N_1 N_2}{(N_1 + N_2)^2} \]
29 *
30 * <p>where \( N \) is the group size, \( m \) is the mean, and \( s^2 \) is the biased variance
31 * such that \( s^2 * N \) is the sum of squared deviations from the mean. Note the term
32 * \( ({s_1}^2 - {s_2}^2) N_1 N_2 == (ss_1 * N_2 - ss_2 * N_1 \) where \( ss \) is the sum
33 * of square deviations.
34 *
35 * <p>If \( N_1 \) is size 1 this reduces to:
36 *
37 * <p>\[ SC_{N+1} = {SC}_N + \frac{3(x - m) -s^2 N}{N + 1}
38 * + \frac{(x - m)^3((N - 1) N}{(N + 1)^2} \]
39 *
40 * <p>where \( s^2 N \) is the sum of squared deviations.
41 * This updating formula is identical to that used in
42 * {@code org.apache.commons.math3.stat.descriptive.moment.ThirdMoment}.
43 *
44 * <p>Supports up to 2<sup>63</sup> (exclusive) observations.
45 * This implementation does not check for overflow of the count.
46 *
47 * <p><strong>Note that this implementation is not synchronized.</strong> If
48 * multiple threads access an instance of this class concurrently, and at least
49 * one of the threads invokes the {@link java.util.function.DoubleConsumer#accept(double) accept} or
50 * {@link StatisticAccumulator#combine(StatisticResult) combine} method, it must be synchronized externally.
51 *
52 * <p>However, it is safe to use {@link java.util.function.DoubleConsumer#accept(double) accept}
53 * and {@link StatisticAccumulator#combine(StatisticResult) combine}
54 * as {@code accumulator} and {@code combiner} functions of
55 * {@link java.util.stream.Collector Collector} on a parallel stream,
56 * because the parallel implementation of {@link java.util.stream.Stream#collect Stream.collect()}
57 * provides the necessary partitioning, isolation, and merging of results for
58 * safe and efficient parallel execution.
59 *
60 * <p>References:
61 * <ul>
62 * <li>Manca and Marin (2020)
63 * Decomposition of the Sum of Cubes, the Sum Raised to the
64 * Power of Four and Codeviance.
65 * Applied Mathematics, 11, 1013-1020.
66 * <a href="https://doi.org/10.4236/am.2020.1110067">doi: 10.4236/am.2020.1110067</a>
67 * </ul>
68 *
69 * @since 1.1
70 */
71 class SumOfCubedDeviations extends SumOfSquaredDeviations {
72 /** 2, the length limit where the sum-of-cubed deviations is zero. */
73 static final int LENGTH_TWO = 2;
74
75 /** Sum of cubed deviations of the values that have been added. */
76 protected double sumCubedDev;
77
78 /**
79 * Create an instance.
80 */
81 SumOfCubedDeviations() {
82 // No-op
83 }
84
85 /**
86 * Copy constructor.
87 *
88 * @param source Source to copy.
89 */
90 SumOfCubedDeviations(SumOfCubedDeviations source) {
91 super(source);
92 sumCubedDev = source.sumCubedDev;
93 }
94
95 /**
96 * Create an instance with the given sum of cubed and squared deviations.
97 *
98 * @param sc Sum of cubed deviations.
99 * @param ss Sum of squared deviations.
100 */
101 SumOfCubedDeviations(double sc, SumOfSquaredDeviations ss) {
102 super(ss);
103 this.sumCubedDev = sc;
104 }
105
106 /**
107 * Create an instance with the given sum of cubed and squared deviations,
108 * and first moment.
109 *
110 * @param sc Sum of cubed deviations.
111 * @param ss Sum of squared deviations.
112 * @param m1 First moment.
113 * @param n Count of values.
114 */
115 SumOfCubedDeviations(double sc, double ss, double m1, long n) {
116 super(ss, m1, n);
117 this.sumCubedDev = sc;
118 }
119
120 /**
121 * Returns an instance populated using the input {@code values}.
122 *
123 * <p>Note: {@code SumOfCubedDeviations} computed using {@link #accept(double) accept} may be
124 * different from this instance.
125 *
126 * @param values Values.
127 * @return {@code SumOfCubedDeviations} instance.
128 */
129 static SumOfCubedDeviations of(double... values) {
130 if (values.length == 0) {
131 return new SumOfCubedDeviations();
132 }
133 return create(SumOfSquaredDeviations.of(values), values, 0, values.length);
134 }
135
136 /**
137 * Returns an instance populated using the specified range of {@code values}.
138 *
139 * <p>Note: {@code SumOfCubedDeviations} computed using {@link #accept(double) accept} may be
140 * different from this instance.
141 *
142 * <p>Warning: No range checks are performed.
143 *
144 * @param values Values.
145 * @param from Inclusive start of the range.
146 * @param to Exclusive end of the range.
147 * @return {@code SumOfCubedDeviations} instance.
148 */
149 static SumOfCubedDeviations ofRange(double[] values, int from, int to) {
150 if (from == to) {
151 return new SumOfCubedDeviations();
152 }
153 return create(SumOfSquaredDeviations.ofRange(values, from, to), values, from, to);
154 }
155
156 /**
157 * Creates the sum of cubed deviations.
158 *
159 * <p>Uses the provided {@code sum} to create the first moment.
160 * This method is used by {@link DoubleStatistics} using a sum that can be reused
161 * for the {@link Sum} statistic.
162 *
163 * <p>Warning: No range checks are performed.
164 *
165 * @param sum Sum of the values.
166 * @param values Values.
167 * @param from Inclusive start of the range.
168 * @param to Exclusive end of the range.
169 * @return {@code SumOfCubedDeviations} instance.
170 */
171 static SumOfCubedDeviations createFromRange(org.apache.commons.numbers.core.Sum sum,
172 double[] values, int from, int to) {
173 if (from == to) {
174 return new SumOfCubedDeviations();
175 }
176 return create(SumOfSquaredDeviations.createFromRange(sum, values, from, to), values, from, to);
177 }
178
179 /**
180 * Creates the sum of cubed deviations.
181 *
182 * @param ss Sum of squared deviations.
183 * @param values Values.
184 * @param from Inclusive start of the range.
185 * @param to Exclusive end of the range.
186 * @return {@code SumOfCubedDeviations} instance.
187 */
188 private static SumOfCubedDeviations create(SumOfSquaredDeviations ss, double[] values, int from, int to) {
189 // Edge cases
190 final double xbar = ss.getFirstMoment();
191 if (!Double.isFinite(xbar)) {
192 return new SumOfCubedDeviations(Double.NaN, ss);
193 }
194 if (!Double.isFinite(ss.sumSquaredDev)) {
195 // Note: If the sum-of-squared (SS) overflows then the same deviations when cubed
196 // will overflow. The *smallest* deviation to overflow SS is a full-length array of
197 // +/- values around a mean of zero, or approximately sqrt(MAX_VALUE / 2^31) = 2.89e149.
198 // In this case the sum cubed could be finite due to cancellation
199 // but this cannot be computed. Only a small array can be known to be zero.
200 return new SumOfCubedDeviations(ss.n <= LENGTH_TWO ? 0 : Double.NaN, ss);
201 }
202 // Compute the sum of cubed deviations.
203 double s = 0;
204 // n=1: no deviation
205 // n=2: the two deviations from the mean are equal magnitude
206 // and opposite sign. So the sum-of-cubed deviations is zero.
207 if (ss.n > LENGTH_TWO) {
208 for (int i = from; i < to; i++) {
209 s += pow3(values[i] - xbar);
210 }
211 }
212 return new SumOfCubedDeviations(s, ss);
213 }
214
215 /**
216 * Returns an instance populated using the input {@code values}.
217 *
218 * <p>Note: {@code SumOfCubedDeviations} computed using {@link #accept(double) accept} may be
219 * different from this instance.
220 *
221 * @param values Values.
222 * @return {@code SumOfCubedDeviations} instance.
223 */
224 static SumOfCubedDeviations of(int... values) {
225 return ofRange(values, 0, values.length);
226 }
227
228 /**
229 * Returns an instance populated using the specified range of {@code values}.
230 *
231 * <p>Note: {@code SumOfCubedDeviations} computed using {@link #accept(double) accept} may be
232 * different from this instance.
233 *
234 * <p>Warning: No range checks are performed.
235 *
236 * @param values Values.
237 * @param from Inclusive start of the range.
238 * @param to Exclusive end of the range.
239 * @return {@code SumOfCubedDeviations} instance.
240 */
241 static SumOfCubedDeviations ofRange(int[] values, int from, int to) {
242 // Logic shared with the double[] version with int[] lower order moments
243 if (from == to) {
244 return new SumOfCubedDeviations();
245 }
246 final IntVariance variance = IntVariance.createFromRange(values, from, to);
247 final double xbar = variance.computeMean();
248 final double ss = variance.computeSumOfSquaredDeviations();
249
250 double sc = 0;
251 if (to - from > LENGTH_TWO) {
252 for (int i = from; i < to; i++) {
253 sc += pow3(values[i] - xbar);
254 }
255 }
256 return new SumOfCubedDeviations(sc, ss, xbar, to - from);
257 }
258
259 /**
260 * Returns an instance populated using the input {@code values}.
261 *
262 * <p>Note: {@code SumOfCubedDeviations} computed using {@link #accept(double) accept} may be
263 * different from this instance.
264 *
265 * @param values Values.
266 * @return {@code SumOfCubedDeviations} instance.
267 */
268 static SumOfCubedDeviations of(long... values) {
269 return ofRange(values, 0, values.length);
270 }
271
272 /**
273 * Returns an instance populated using the specified range of {@code values}.
274 *
275 * <p>Note: {@code SumOfCubedDeviations} computed using {@link #accept(double) accept} may be
276 * different from this instance.
277 *
278 * <p>Warning: No range checks are performed.
279 *
280 * @param values Values.
281 * @param from Inclusive start of the range.
282 * @param to Exclusive end of the range.
283 * @return {@code SumOfCubedDeviations} instance.
284 * @throws IndexOutOfBoundsException if the sub-range is out of bounds
285 */
286 static SumOfCubedDeviations ofRange(long[] values, int from, int to) {
287 // Logic shared with the double[] version with int[] lower order moments
288 if (from == to) {
289 return new SumOfCubedDeviations();
290 }
291 final LongVariance variance = LongVariance.createFromRange(values, from, to);
292 final double xbar = variance.computeMean();
293 final double ss = variance.computeSumOfSquaredDeviations();
294
295 double sc = 0;
296 if (to - from > LENGTH_TWO) {
297 for (int i = from; i < to; i++) {
298 sc += pow3(values[i] - xbar);
299 }
300 }
301 return new SumOfCubedDeviations(sc, ss, xbar, to - from);
302 }
303
304 /**
305 * Compute {@code x^3}.
306 * Uses compound multiplication.
307 *
308 * @param x Value.
309 * @return x^3
310 */
311 private static double pow3(double x) {
312 return x * x * x;
313 }
314
315 /**
316 * Updates the state of the statistic to reflect the addition of {@code value}.
317 *
318 * @param value Value.
319 */
320 @Override
321 public void accept(double value) {
322 // Require current s^2 * N == sum-of-square deviations
323 final double ss = sumSquaredDev;
324 final double np = n;
325 super.accept(value);
326 // Terms are arranged so that values that may be zero
327 // (np, ss) are first. This will cancel any overflow in
328 // multiplication of later terms (nDev * 3 and nDev^2).
329 // This handles initialisation when np in {0, 1) to zero
330 // for any deviation (e.g. series MAX_VALUE, -MAX_VALUE).
331 // Note: account for the half-deviation representation by scaling by 6=3*2; 8=2^3
332 sumCubedDev = sumCubedDev -
333 ss * nDev * 6 +
334 (np - 1.0) * np * nDev * nDev * dev * 8;
335 }
336
337 /**
338 * Gets the sum of cubed deviations of all input values.
339 *
340 * <p>Note that the sum is subject to cancellation of potentially large
341 * positive and negative terms. A non-finite result may be returned
342 * due to intermediate overflow when the exact result may be a representable
343 * {@code double}.
344 *
345 * <p>Note: Any non-finite result should be considered a failed computation.
346 * The result is returned as computed and not consolidated to a single NaN.
347 * This is done for testing purposes to allow the result to be reported.
348 * In particular the sign of an infinity may not indicate the direction
349 * of the asymmetry (if any), only the direction of the first overflow in the
350 * computation. In the event of further overflow of a term to an opposite signed
351 * infinity the sum will be {@code NaN}.
352 *
353 * @return sum of cubed deviations of all values.
354 */
355 double getSumOfCubedDeviations() {
356 return Double.isFinite(getFirstMoment()) ? sumCubedDev : Double.NaN;
357 }
358
359 /**
360 * Combines the state of another {@code SumOfCubedDeviations} into this one.
361 *
362 * @param other Another {@code SumOfCubedDeviations} to be combined.
363 * @return {@code this} instance after combining {@code other}.
364 */
365 SumOfCubedDeviations combine(SumOfCubedDeviations other) {
366 if (n == 0) {
367 sumCubedDev = other.sumCubedDev;
368 } else if (other.n != 0) {
369 // Avoid overflow to compute the difference.
370 // This allows any samples of size n=1 to be combined as their SS=0.
371 // The result is a SC=0 for the combined n=2.
372 final double halfDiffOfMean = getFirstMomentHalfDifference(other);
373 sumCubedDev += other.sumCubedDev;
374 // Add additional terms that do not cancel to zero
375 if (halfDiffOfMean != 0) {
376 final double n1 = n;
377 final double n2 = other.n;
378 if (n1 == n2) {
379 // Optimisation where sizes are equal in double-precision.
380 // This is of use in JDK streams as spliterators use a divide by two
381 // strategy for parallel streams.
382 sumCubedDev += (sumSquaredDev - other.sumSquaredDev) * halfDiffOfMean * 3;
383 } else {
384 final double n1n2 = n1 + n2;
385 final double dm = 2 * (halfDiffOfMean / n1n2);
386 sumCubedDev += (sumSquaredDev * n2 - other.sumSquaredDev * n1) * dm * 3 +
387 (n2 - n1) * (n1 * n2) * pow3(dm) * n1n2;
388 }
389 }
390 }
391 super.combine(other);
392 return this;
393 }
394 }