1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17 package org.apache.commons.statistics.descriptive;
18
19 /**
20 * Computes the sum of fourth deviations from the sample mean. This
21 * statistic is related to the fourth moment.
22 *
23 * <p>Uses a recursive updating formula as defined in Manca and Marin (2010), equation 16.
24 * Note that third term in that equation has been corrected by expansion of the same term
25 * from equation 15. Two sum of fourth (quad) deviations (Sq) can be combined using:
26 *
27 * <p>\[ Sq(X) = {Sq}_1 + {Sq}_2 + \frac{4(m_1 - m_2)(g_1 - g_2) N_1 N_2}{N_1 + N_2}
28 * + \frac{6(m_1 - m_2)^2(N_2^2 ss_1 + N_1^2 ss_2)}{(N_1 + N_2)^2}
29 * + \frac{(m_1 - m_2)^4((N_1^2 - N_1 N_2 + N_2^2) N_1 N_2}{(N_1 + N_2)^3} \]
30 *
31 * <p>where \( N \) is the group size, \( m \) is the mean, \( ss \) is
32 * the sum of squared deviations from the mean, and \( g \)
33 * is the asymmetrical index where \( g * N \) is the sum of fourth deviations from the mean.
34 * Note the term \( ({g_1} - {g_2}) N_1 N_2 == (sc_1 * N_2 - sc_2 * N_1 \)
35 * where \( sc \) is the sum of fourth deviations.
36 *
37 * <p>If \( N_1 \) is size 1 this reduces to:
38 *
39 * <p>\[ SC_{N+1} = {SC}_N + \frac{4(x - m) -sc}{N + 1}
40 * + \frac{6(x - m)^2 ss}{(N + 1)^2}
41 * + \frac{(x - m)^4((1 - N + N^2) N}{(N + 1)^3} \]
42 *
43 * <p>where \( ss \) is the sum of squared deviations, and \( sc \) is the sum of
44 * fourth deviations. This updating formula is identical to that used in
45 * {@code org.apache.commons.math3.stat.descriptive.moment.FourthMoment}. The final term
46 * uses a rearrangement \( (1 - N + N^2) = (N+1)^2 - 3N \).
47 *
48 * <p>Supports up to 2<sup>63</sup> (exclusive) observations.
49 * This implementation does not check for overflow of the count.
50 *
51 * <p><strong>Note that this implementation is not synchronized.</strong> If
52 * multiple threads access an instance of this class concurrently, and at least
53 * one of the threads invokes the {@link java.util.function.DoubleConsumer#accept(double) accept} or
54 * {@link StatisticAccumulator#combine(StatisticResult) combine} method, it must be synchronized externally.
55 *
56 * <p>However, it is safe to use {@link java.util.function.DoubleConsumer#accept(double) accept}
57 * and {@link StatisticAccumulator#combine(StatisticResult) combine}
58 * as {@code accumulator} and {@code combiner} functions of
59 * {@link java.util.stream.Collector Collector} on a parallel stream,
60 * because the parallel implementation of {@link java.util.stream.Stream#collect Stream.collect()}
61 * provides the necessary partitioning, isolation, and merging of results for
62 * safe and efficient parallel execution.
63 *
64 * <p>References:
65 * <ul>
66 * <li>Manca and Marin (2020)
67 * Decomposition of the Sum of Cubes, the Sum Raised to the
68 * Power of Four and Codeviance.
69 * Applied Mathematics, 11, 1013-1020.
70 * <a href="https://doi.org/10.4236/am.2020.1110067">doi: 10.4236/am.2020.1110067</a>
71 * </ul>
72 *
73 * @since 1.1
74 */
75 class SumOfFourthDeviations extends SumOfCubedDeviations {
76 /** Sum of forth deviations of the values that have been added. */
77 private double sumFourthDev;
78
79 /**
80 * Create an instance.
81 */
82 SumOfFourthDeviations() {
83 // No-op
84 }
85
86 /**
87 * Create an instance with the given sum of fourth and squared deviations.
88 *
89 * @param sq Sum of fourth (quad) deviations.
90 * @param sc Sum of fourth deviations.
91 */
92 private SumOfFourthDeviations(double sq, SumOfCubedDeviations sc) {
93 super(sc);
94 this.sumFourthDev = sq;
95 }
96
97 /**
98 * Create an instance with the given sum of cubed and squared deviations,
99 * and first moment.
100 *
101 * @param sq Sum of fouth deviations.
102 * @param sc Sum of cubed deviations.
103 * @param ss Sum of squared deviations.
104 * @param m1 First moment.
105 * @param n Count of values.
106 */
107 private SumOfFourthDeviations(double sq, double sc, double ss, double m1, long n) {
108 super(sc, ss, m1, n);
109 this.sumFourthDev = sq;
110 }
111
112 /**
113 * Returns an instance populated using the input {@code values}.
114 *
115 * <p>Note: {@code SumOfFourthDeviations} computed using {@link #accept accept} may be
116 * different from this instance.
117 *
118 * @param values Values.
119 * @return {@code SumOfFourthDeviations} instance.
120 */
121 static SumOfFourthDeviations of(double... values) {
122 if (values.length == 0) {
123 return new SumOfFourthDeviations();
124 }
125 return create(SumOfCubedDeviations.of(values), values, 0, values.length);
126 }
127
128 /**
129 * Returns an instance populated using the specified range of {@code values}.
130 *
131 * <p>Note: {@code SumOfFourthDeviations} computed using {@link #accept accept} may be
132 * different from this instance.
133 *
134 * <p>Warning: No range checks are performed.
135 *
136 * @param values Values.
137 * @param from Inclusive start of the range.
138 * @param to Exclusive end of the range.
139 * @return {@code SumOfFourthDeviations} instance.
140 * @since 1.2
141 */
142 static SumOfFourthDeviations ofRange(double[] values, int from, int to) {
143 if (from == to) {
144 return new SumOfFourthDeviations();
145 }
146 return create(SumOfCubedDeviations.ofRange(values, from, to), values, from, to);
147 }
148
149 /**
150 * Creates the sum of fourth deviations.
151 *
152 * <p>Uses the provided {@code sum} to create the first moment.
153 * This method is used by {@link DoubleStatistics} using a sum that can be reused
154 * for the {@link Sum} statistic.
155 *
156 * <p>Warning: No range checks are performed.
157 *
158 * @param sum Sum of the values.
159 * @param values Values.
160 * @param from Inclusive start of the range.
161 * @param to Exclusive end of the range.
162 * @return {@code SumOfFourthDeviations} instance.
163 */
164 static SumOfFourthDeviations createFromRange(org.apache.commons.numbers.core.Sum sum,
165 double[] values, int from, int to) {
166 if (from == to) {
167 return new SumOfFourthDeviations();
168 }
169 return create(SumOfCubedDeviations.createFromRange(sum, values, from, to), values, from, to);
170 }
171
172 /**
173 * Creates the sum of fourth deviations.
174 *
175 * @param sc Sum of cubed deviations.
176 * @param values Values.
177 * @param from Inclusive start of the range.
178 * @param to Exclusive end of the range.
179 * @return {@code SumOfFourthDeviations} instance.
180 */
181 private static SumOfFourthDeviations create(SumOfCubedDeviations sc, double[] values, int from, int to) {
182 // Edge cases
183 final double xbar = sc.getFirstMoment();
184 if (!Double.isFinite(xbar) ||
185 !Double.isFinite(sc.sumSquaredDev) ||
186 !Double.isFinite(sc.sumCubedDev)) {
187 // Overflow computing lower order deviations will overflow
188 return new SumOfFourthDeviations(Double.NaN, sc);
189 }
190 // Compute the sum of fourth (quad) deviations.
191 // Note: This handles n=1.
192 double s = 0;
193 for (int i = from; i < to; i++) {
194 s += pow4(values[i] - xbar);
195 }
196 return new SumOfFourthDeviations(s, sc);
197 }
198
199 /**
200 * Compute {@code x^4}.
201 * Uses compound multiplication.
202 *
203 * @param x Value.
204 * @return x^4
205 */
206 private static double pow4(double x) {
207 final double x2 = x * x;
208 return x2 * x2;
209 }
210
211 /**
212 * Returns an instance populated using the input {@code values}.
213 *
214 * <p>Note: {@code SumOfFourthDeviations} computed using {@link #accept(double) accept} may be
215 * different from this instance.
216 *
217 * @param values Values.
218 * @return {@code SumOfCubedDeviations} instance.
219 */
220 static SumOfFourthDeviations of(int... values) {
221 return ofRange(values, 0, values.length);
222 }
223
224 /**
225 * Returns an instance populated using the specified range of {@code values}.
226 *
227 * <p>Note: {@code SumOfFourthDeviations} computed using {@link #accept(double) accept} may be
228 * different from this instance.
229 *
230 * <p>Warning: No range checks are performed.
231 *
232 * @param values Values.
233 * @param from Inclusive start of the range.
234 * @param to Exclusive end of the range.
235 * @return {@code SumOfFourthDeviations} instance.
236 */
237 static SumOfFourthDeviations ofRange(int[] values, int from, int to) {
238 // Logic shared with the double[] version with int[] lower order moments
239 if (from == to) {
240 return new SumOfFourthDeviations();
241 }
242 final IntVariance variance = IntVariance.createFromRange(values, from, to);
243 final double xbar = variance.computeMean();
244 final double ss = variance.computeSumOfSquaredDeviations();
245 // Unlike the double[] case, overflow/NaN is not possible:
246 // (max value)^4 times max array length ~ (2^31)^4 * 2^31 ~ 2^155.
247 // Compute sum of cubed and fourth deviations together.
248 double sc = 0;
249 double sq = 0;
250 for (int i = from; i < to; i++) {
251 final double x = values[i] - xbar;
252 final double x2 = x * x;
253 sc += x2 * x;
254 sq += x2 * x2;
255 }
256 // Edge case to avoid floating-point error for zero
257 if (to - from <= LENGTH_TWO) {
258 sc = 0;
259 }
260 return new SumOfFourthDeviations(sq, sc, ss, xbar, to - from);
261 }
262
263 /**
264 * Returns an instance populated using the input {@code values}.
265 *
266 * <p>Note: {@code SumOfFourthDeviations} computed using {@link #accept(double) accept} may be
267 * different from this instance.
268 *
269 * @param values Values.
270 * @return {@code SumOfFourthDeviations} instance.
271 */
272 static SumOfFourthDeviations of(long... values) {
273 return ofRange(values, 0, values.length);
274 }
275
276 /**
277 * Returns an instance populated using the specified range of {@code values}.
278 *
279 * <p>Note: {@code SumOfFourthDeviations} computed using {@link #accept(double) accept} may be
280 * different from this instance.
281 *
282 * <p>Warning: No range checks are performed.
283 *
284 * @param values Values.
285 * @param from Inclusive start of the range.
286 * @param to Exclusive end of the range.
287 * @return {@code SumOfFourthDeviations} instance.
288 */
289 static SumOfFourthDeviations ofRange(long[] values, int from, int to) {
290 // Logic shared with the double[] version with long[] lower order moments
291 if (from == to) {
292 return new SumOfFourthDeviations();
293 }
294 final LongVariance variance = LongVariance.createFromRange(values, from, to);
295 final double xbar = variance.computeMean();
296 final double ss = variance.computeSumOfSquaredDeviations();
297 // Unlike the double[] case, overflow/NaN is not possible:
298 // (max value)^4 times max array length ~ (2^31)^4 * 2^31 ~ 2^155.
299 // Compute sum of cubed and fourth deviations together.
300 double sc = 0;
301 double sq = 0;
302 for (int i = from; i < to; i++) {
303 final double x = values[i] - xbar;
304 final double x2 = x * x;
305 sc += x2 * x;
306 sq += x2 * x2;
307 }
308 // Edge case to avoid floating-point error for zero
309 if (to - from <= LENGTH_TWO) {
310 sc = 0;
311 }
312 return new SumOfFourthDeviations(sq, sc, ss, xbar, to - from);
313 }
314
315 /**
316 * Updates the state of the statistic to reflect the addition of {@code value}.
317 *
318 * @param value Value.
319 */
320 @Override
321 public void accept(double value) {
322 // Require current s^2 * N == sum-of-square deviations
323 // Require current g * N == sum-of-fourth deviations
324 final double ss = sumSquaredDev;
325 final double sc = sumCubedDev;
326 final double np = n;
327 super.accept(value);
328 // Terms are arranged so that values that may be zero
329 // (np, ss, sc) are first. This will cancel any overflow in
330 // multiplication of later terms (nDev * 4, nDev^2, nDev^4).
331 // This handles initialisation when np in {0, 1) to zero
332 // for any deviation (e.g. series MAX_VALUE, -MAX_VALUE).
333 // Note: (np1 * np1 - 3 * np) = (np+1)^2 - 3np = np^2 - np + 1
334 // Note: account for the half-deviation representation by scaling by 8=4*2; 24=6*2^2; 16=2^4
335 final double np1 = n;
336 sumFourthDev = sumFourthDev -
337 sc * nDev * 8 +
338 ss * nDev * nDev * 24 +
339 np * (np1 * np1 - 3 * np) * nDev * nDev * nDev * dev * 16;
340 }
341
342 /**
343 * Gets the sum of fourth deviations of all input values.
344 *
345 * <p>Note that the result should be positive. However the updating sum is subject to
346 * cancellation of potentially large positive and negative terms. Overflow of these
347 * terms can result in a sum of opposite signed infinities and a {@code NaN} result
348 * for finite input values where the correct result is positive infinity.
349 *
350 * <p>Note: Any non-finite result should be considered a failed computation. The
351 * result is returned as computed and not consolidated to a single NaN. This is done
352 * for testing purposes to allow the result to be reported. It is possible to track
353 * input values to finite/non-finite (e.g. using bit mask manipulation of the exponent
354 * field). However this statistic in currently used in the kurtosis and in the case
355 * of failed computation distinguishing a non-finite result is not useful.
356 *
357 * @return sum of fourth deviations of all values.
358 */
359 double getSumOfFourthDeviations() {
360 return Double.isFinite(getFirstMoment()) ? sumFourthDev : Double.NaN;
361 }
362
363 /**
364 * Combines the state of another {@code SumOfFourthDeviations} into this one.
365 *
366 * @param other Another {@code SumOfFourthDeviations} to be combined.
367 * @return {@code this} instance after combining {@code other}.
368 */
369 SumOfFourthDeviations combine(SumOfFourthDeviations other) {
370 if (n == 0) {
371 sumFourthDev = other.sumFourthDev;
372 } else if (other.n != 0) {
373 // Avoid overflow to compute the difference.
374 final double halfDiffOfMean = getFirstMomentHalfDifference(other);
375 sumFourthDev += other.sumFourthDev;
376 // Add additional terms that do not cancel to zero
377 if (halfDiffOfMean != 0) {
378 final double n1 = n;
379 final double n2 = other.n;
380 if (n1 == n2) {
381 // Optimisation where sizes are equal in double-precision.
382 // This is of use in JDK streams as spliterators use a divide by two
383 // strategy for parallel streams.
384 // Note: (n1 * n2) * ((n1+n2)^2 - 3 * (n1 * n2)) == n^4
385 sumFourthDev +=
386 (sumCubedDev - other.sumCubedDev) * halfDiffOfMean * 4 +
387 (sumSquaredDev + other.sumSquaredDev) * (halfDiffOfMean * halfDiffOfMean) * 6 +
388 pow4(halfDiffOfMean) * n1 * 2;
389 } else {
390 final double n1n2 = n1 + n2;
391 final double dm = 2 * (halfDiffOfMean / n1n2);
392 // Use the rearrangement for parity with the accept method
393 // n1*n1 - n1*n2 + n2*n2 == (n1+n2)^2 - 3*n1*n2
394 sumFourthDev +=
395 (sumCubedDev * n2 - other.sumCubedDev * n1) * dm * 4 +
396 (n2 * n2 * sumSquaredDev + n1 * n1 * other.sumSquaredDev) * (dm * dm) * 6 +
397 (n1 * n2) * (n1n2 * n1n2 - 3 * (n1 * n2)) * pow4(dm) * n1n2;
398 }
399 }
400 }
401 super.combine(other);
402 return this;
403 }
404 }