View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.apache.commons.statistics.descriptive;
18  
19  /**
20   * Computes the sum of cubed deviations from the sample mean. This
21   * statistic is related to the third moment.
22   *
23   * <p>Uses a recursive updating formula as defined in Manca and Marin (2010), equation 10.
24   * Note that the denominator in the third term in that equation has been corrected to
25   * \( (N_1 + N_2)^2 \). Two sum of cubed deviations (SC) can be combined using:
26   *
27   * <p>\[ SC(X) = {SC}_1 + {SC}_2 + \frac{3(m_1 - m_2)({s_1}^2 - {s_2}^2) N_1 N_2}{N_1 + N_2}
28   *                               + \frac{(m_1 - m_2)^3((N_2 - N_1) N_1 N_2}{(N_1 + N_2)^2} \]
29   *
30   * <p>where \( N \) is the group size, \( m \) is the mean, and \( s^2 \) is the biased variance
31   * such that \( s^2 * N \) is the sum of squared deviations from the mean. Note the term
32   * \( ({s_1}^2 - {s_2}^2) N_1 N_2 == (ss_1 * N_2 - ss_2 * N_1 \) where \( ss \) is the sum
33   * of square deviations.
34   *
35   * <p>If \( N_1 \) is size 1 this reduces to:
36   *
37   * <p>\[ SC_{N+1} = {SC}_N + \frac{3(x - m) -s^2 N}{N + 1}
38   *                         + \frac{(x - m)^3((N - 1) N}{(N + 1)^2} \]
39   *
40   * <p>where \( s^2 N \) is the sum of squared deviations.
41   * This updating formula is identical to that used in
42   * {@code org.apache.commons.math3.stat.descriptive.moment.ThirdMoment}.
43   *
44   * <p>Supports up to 2<sup>63</sup> (exclusive) observations.
45   * This implementation does not check for overflow of the count.
46   *
47   * <p><strong>Note that this implementation is not synchronized.</strong> If
48   * multiple threads access an instance of this class concurrently, and at least
49   * one of the threads invokes the {@link java.util.function.DoubleConsumer#accept(double) accept} or
50   * {@link StatisticAccumulator#combine(StatisticResult) combine} method, it must be synchronized externally.
51   *
52   * <p>However, it is safe to use {@link java.util.function.DoubleConsumer#accept(double) accept}
53   * and {@link StatisticAccumulator#combine(StatisticResult) combine}
54   * as {@code accumulator} and {@code combiner} functions of
55   * {@link java.util.stream.Collector Collector} on a parallel stream,
56   * because the parallel implementation of {@link java.util.stream.Stream#collect Stream.collect()}
57   * provides the necessary partitioning, isolation, and merging of results for
58   * safe and efficient parallel execution.
59   *
60   * <p>References:
61   * <ul>
62   *   <li>Manca and Marin (2020)
63   *       Decomposition of the Sum of Cubes, the Sum Raised to the
64   *       Power of Four and Codeviance.
65   *       Applied Mathematics, 11, 1013-1020.
66   *       <a href="https://doi.org/10.4236/am.2020.1110067">doi: 10.4236/am.2020.1110067</a>
67   * </ul>
68   *
69   * @since 1.1
70   */
71  class SumOfCubedDeviations extends SumOfSquaredDeviations {
72      /** 2, the length limit where the sum-of-cubed deviations is zero. */
73      static final int LENGTH_TWO = 2;
74  
75      /** Sum of cubed deviations of the values that have been added. */
76      protected double sumCubedDev;
77  
78      /**
79       * Create an instance.
80       */
81      SumOfCubedDeviations() {
82          // No-op
83      }
84  
85      /**
86       * Copy constructor.
87       *
88       * @param source Source to copy.
89       */
90      SumOfCubedDeviations(SumOfCubedDeviations source) {
91          super(source);
92          sumCubedDev = source.sumCubedDev;
93      }
94  
95      /**
96       * Create an instance with the given sum of cubed and squared deviations.
97       *
98       * @param sc Sum of cubed deviations.
99       * @param ss Sum of squared deviations.
100      */
101     SumOfCubedDeviations(double sc, SumOfSquaredDeviations ss) {
102         super(ss);
103         this.sumCubedDev = sc;
104     }
105 
106     /**
107      * Create an instance with the given sum of cubed and squared deviations,
108      * and first moment.
109      *
110      * @param sc Sum of cubed deviations.
111      * @param ss Sum of squared deviations.
112      * @param m1 First moment.
113      * @param n Count of values.
114      */
115     SumOfCubedDeviations(double sc, double ss, double m1, long n) {
116         super(ss, m1, n);
117         this.sumCubedDev = sc;
118     }
119 
120     /**
121      * Returns an instance populated using the input {@code values}.
122      *
123      * <p>Note: {@code SumOfCubedDeviations} computed using {@link #accept(double) accept} may be
124      * different from this instance.
125      *
126      * @param values Values.
127      * @return {@code SumOfCubedDeviations} instance.
128      */
129     static SumOfCubedDeviations of(double... values) {
130         if (values.length == 0) {
131             return new SumOfCubedDeviations();
132         }
133         return create(SumOfSquaredDeviations.of(values), values, 0, values.length);
134     }
135 
136     /**
137      * Returns an instance populated using the specified range of {@code values}.
138      *
139      * <p>Note: {@code SumOfCubedDeviations} computed using {@link #accept(double) accept} may be
140      * different from this instance.
141      *
142      * <p>Warning: No range checks are performed.
143      *
144      * @param values Values.
145      * @param from Inclusive start of the range.
146      * @param to Exclusive end of the range.
147      * @return {@code SumOfCubedDeviations} instance.
148      */
149     static SumOfCubedDeviations ofRange(double[] values, int from, int to) {
150         if (from == to) {
151             return new SumOfCubedDeviations();
152         }
153         return create(SumOfSquaredDeviations.ofRange(values, from, to), values, from, to);
154     }
155 
156     /**
157      * Creates the sum of cubed deviations.
158      *
159      * <p>Uses the provided {@code sum} to create the first moment.
160      * This method is used by {@link DoubleStatistics} using a sum that can be reused
161      * for the {@link Sum} statistic.
162      *
163      * <p>Warning: No range checks are performed.
164      *
165      * @param sum Sum of the values.
166      * @param values Values.
167      * @param from Inclusive start of the range.
168      * @param to Exclusive end of the range.
169      * @return {@code SumOfCubedDeviations} instance.
170      */
171     static SumOfCubedDeviations createFromRange(org.apache.commons.numbers.core.Sum sum,
172                                                 double[] values, int from, int to) {
173         if (from == to) {
174             return new SumOfCubedDeviations();
175         }
176         return create(SumOfSquaredDeviations.createFromRange(sum, values, from, to), values, from, to);
177     }
178 
179     /**
180      * Creates the sum of cubed deviations.
181      *
182      * @param ss Sum of squared deviations.
183      * @param values Values.
184      * @param from Inclusive start of the range.
185      * @param to Exclusive end of the range.
186      * @return {@code SumOfCubedDeviations} instance.
187      */
188     private static SumOfCubedDeviations create(SumOfSquaredDeviations ss, double[] values, int from, int to) {
189         // Edge cases
190         final double xbar = ss.getFirstMoment();
191         if (!Double.isFinite(xbar)) {
192             return new SumOfCubedDeviations(Double.NaN, ss);
193         }
194         if (!Double.isFinite(ss.sumSquaredDev)) {
195             // Note: If the sum-of-squared (SS) overflows then the same deviations when cubed
196             // will overflow. The *smallest* deviation to overflow SS is a full-length array of
197             // +/- values around a mean of zero, or approximately sqrt(MAX_VALUE / 2^31) = 2.89e149.
198             // In this case the sum cubed could be finite due to cancellation
199             // but this cannot be computed. Only a small array can be known to be zero.
200             return new SumOfCubedDeviations(ss.n <= LENGTH_TWO ? 0 : Double.NaN, ss);
201         }
202         // Compute the sum of cubed deviations.
203         double s = 0;
204         // n=1: no deviation
205         // n=2: the two deviations from the mean are equal magnitude
206         // and opposite sign. So the sum-of-cubed deviations is zero.
207         if (ss.n > LENGTH_TWO) {
208             for (int i = from; i < to; i++) {
209                 s += pow3(values[i] - xbar);
210             }
211         }
212         return new SumOfCubedDeviations(s, ss);
213     }
214 
215     /**
216      * Returns an instance populated using the input {@code values}.
217      *
218      * <p>Note: {@code SumOfCubedDeviations} computed using {@link #accept(double) accept} may be
219      * different from this instance.
220      *
221      * @param values Values.
222      * @return {@code SumOfCubedDeviations} instance.
223      */
224     static SumOfCubedDeviations of(int... values) {
225         return ofRange(values, 0, values.length);
226     }
227 
228     /**
229      * Returns an instance populated using the specified range of {@code values}.
230      *
231      * <p>Note: {@code SumOfCubedDeviations} computed using {@link #accept(double) accept} may be
232      * different from this instance.
233      *
234      * <p>Warning: No range checks are performed.
235      *
236      * @param values Values.
237      * @param from Inclusive start of the range.
238      * @param to Exclusive end of the range.
239      * @return {@code SumOfCubedDeviations} instance.
240      */
241     static SumOfCubedDeviations ofRange(int[] values, int from, int to) {
242         // Logic shared with the double[] version with int[] lower order moments
243         if (from == to) {
244             return new SumOfCubedDeviations();
245         }
246         final IntVariance variance = IntVariance.createFromRange(values, from, to);
247         final double xbar = variance.computeMean();
248         final double ss = variance.computeSumOfSquaredDeviations();
249 
250         double sc = 0;
251         if (to - from > LENGTH_TWO) {
252             for (int i = from; i < to; i++) {
253                 sc += pow3(values[i] - xbar);
254             }
255         }
256         return new SumOfCubedDeviations(sc, ss, xbar, to - from);
257     }
258 
259     /**
260      * Returns an instance populated using the input {@code values}.
261      *
262      * <p>Note: {@code SumOfCubedDeviations} computed using {@link #accept(double) accept} may be
263      * different from this instance.
264      *
265      * @param values Values.
266      * @return {@code SumOfCubedDeviations} instance.
267      */
268     static SumOfCubedDeviations of(long... values) {
269         return ofRange(values, 0, values.length);
270     }
271 
272     /**
273      * Returns an instance populated using the specified range of {@code values}.
274      *
275      * <p>Note: {@code SumOfCubedDeviations} computed using {@link #accept(double) accept} may be
276      * different from this instance.
277      *
278      * <p>Warning: No range checks are performed.
279      *
280      * @param values Values.
281      * @param from Inclusive start of the range.
282      * @param to Exclusive end of the range.
283      * @return {@code SumOfCubedDeviations} instance.
284      * @throws IndexOutOfBoundsException if the sub-range is out of bounds
285      */
286     static SumOfCubedDeviations ofRange(long[] values, int from, int to) {
287         // Logic shared with the double[] version with int[] lower order moments
288         if (from == to) {
289             return new SumOfCubedDeviations();
290         }
291         final LongVariance variance = LongVariance.createFromRange(values, from, to);
292         final double xbar = variance.computeMean();
293         final double ss = variance.computeSumOfSquaredDeviations();
294 
295         double sc = 0;
296         if (to - from > LENGTH_TWO) {
297             for (int i = from; i < to; i++) {
298                 sc += pow3(values[i] - xbar);
299             }
300         }
301         return new SumOfCubedDeviations(sc, ss, xbar, to - from);
302     }
303 
304     /**
305      * Compute {@code x^3}.
306      * Uses compound multiplication.
307      *
308      * @param x Value.
309      * @return x^3
310      */
311     private static double pow3(double x) {
312         return x * x * x;
313     }
314 
315     /**
316      * Updates the state of the statistic to reflect the addition of {@code value}.
317      *
318      * @param value Value.
319      */
320     @Override
321     public void accept(double value) {
322         // Require current s^2 * N == sum-of-square deviations
323         final double ss = sumSquaredDev;
324         final double np = n;
325         super.accept(value);
326         // Terms are arranged so that values that may be zero
327         // (np, ss) are first. This will cancel any overflow in
328         // multiplication of later terms (nDev * 3 and nDev^2).
329         // This handles initialisation when np in {0, 1) to zero
330         // for any deviation (e.g. series MAX_VALUE, -MAX_VALUE).
331         // Note: account for the half-deviation representation by scaling by 6=3*2; 8=2^3
332         sumCubedDev = sumCubedDev -
333             ss * nDev * 6 +
334             (np - 1.0) * np * nDev * nDev * dev * 8;
335     }
336 
337     /**
338      * Gets the sum of cubed deviations of all input values.
339      *
340      * <p>Note that the sum is subject to cancellation of potentially large
341      * positive and negative terms. A non-finite result may be returned
342      * due to intermediate overflow when the exact result may be a representable
343      * {@code double}.
344      *
345      * <p>Note: Any non-finite result should be considered a failed computation.
346      * The result is returned as computed and not consolidated to a single NaN.
347      * This is done for testing purposes to allow the result to be reported.
348      * In particular the sign of an infinity may not indicate the direction
349      * of the asymmetry (if any), only the direction of the first overflow in the
350      * computation. In the event of further overflow of a term to an opposite signed
351      * infinity the sum will be {@code NaN}.
352      *
353      * @return sum of cubed deviations of all values.
354      */
355     double getSumOfCubedDeviations() {
356         return Double.isFinite(getFirstMoment()) ? sumCubedDev : Double.NaN;
357     }
358 
359     /**
360      * Combines the state of another {@code SumOfCubedDeviations} into this one.
361      *
362      * @param other Another {@code SumOfCubedDeviations} to be combined.
363      * @return {@code this} instance after combining {@code other}.
364      */
365     SumOfCubedDeviations combine(SumOfCubedDeviations other) {
366         if (n == 0) {
367             sumCubedDev = other.sumCubedDev;
368         } else if (other.n != 0) {
369             // Avoid overflow to compute the difference.
370             // This allows any samples of size n=1 to be combined as their SS=0.
371             // The result is a SC=0 for the combined n=2.
372             final double halfDiffOfMean = getFirstMomentHalfDifference(other);
373             sumCubedDev += other.sumCubedDev;
374             // Add additional terms that do not cancel to zero
375             if (halfDiffOfMean != 0) {
376                 final double n1 = n;
377                 final double n2 = other.n;
378                 if (n1 == n2) {
379                     // Optimisation where sizes are equal in double-precision.
380                     // This is of use in JDK streams as spliterators use a divide by two
381                     // strategy for parallel streams.
382                     sumCubedDev += (sumSquaredDev - other.sumSquaredDev) * halfDiffOfMean * 3;
383                 } else {
384                     final double n1n2 = n1 + n2;
385                     final double dm = 2 * (halfDiffOfMean / n1n2);
386                     sumCubedDev += (sumSquaredDev * n2 - other.sumSquaredDev * n1) * dm * 3 +
387                                    (n2 - n1) * (n1 * n2) * pow3(dm) * n1n2;
388                 }
389             }
390         }
391         super.combine(other);
392         return this;
393     }
394 }