View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.apache.commons.statistics.descriptive;
18  
19  /**
20   * Computes the sum of fourth deviations from the sample mean. This
21   * statistic is related to the fourth moment.
22   *
23   * <p>Uses a recursive updating formula as defined in Manca and Marin (2010), equation 16.
24   * Note that third term in that equation has been corrected by expansion of the same term
25   * from equation 15. Two sum of fourth (quad) deviations (Sq) can be combined using:
26   *
27   * <p>\[ Sq(X) = {Sq}_1 + {Sq}_2 + \frac{4(m_1 - m_2)(g_1 - g_2) N_1 N_2}{N_1 + N_2}
28   *                               + \frac{6(m_1 - m_2)^2(N_2^2 ss_1 + N_1^2 ss_2)}{(N_1 + N_2)^2}
29   *                               + \frac{(m_1 - m_2)^4((N_1^2 - N_1 N_2 + N_2^2) N_1 N_2}{(N_1 + N_2)^3} \]
30   *
31   * <p>where \( N \) is the group size, \( m \) is the mean, \( ss \) is
32   * the sum of squared deviations from the mean, and \( g \)
33   * is the asymmetrical index where \( g * N \) is the sum of fourth deviations from the mean.
34   * Note the term \( ({g_1} - {g_2}) N_1 N_2 == (sc_1 * N_2 - sc_2 * N_1 \)
35   * where \( sc \) is the sum of fourth deviations.
36   *
37   * <p>If \( N_1 \) is size 1 this reduces to:
38   *
39   * <p>\[ SC_{N+1} = {SC}_N + \frac{4(x - m) -sc}{N + 1}
40   *                         + \frac{6(x - m)^2 ss}{(N + 1)^2}
41   *                         + \frac{(x - m)^4((1 - N + N^2) N}{(N + 1)^3} \]
42   *
43   * <p>where \( ss \) is the sum of squared deviations, and \( sc \) is the sum of
44   * fourth deviations. This updating formula is identical to that used in
45   * {@code org.apache.commons.math3.stat.descriptive.moment.FourthMoment}. The final term
46   * uses a rearrangement \( (1 - N + N^2) = (N+1)^2 - 3N \).
47   *
48   * <p>Supports up to 2<sup>63</sup> (exclusive) observations.
49   * This implementation does not check for overflow of the count.
50   *
51   * <p><strong>Note that this implementation is not synchronized.</strong> If
52   * multiple threads access an instance of this class concurrently, and at least
53   * one of the threads invokes the {@link java.util.function.DoubleConsumer#accept(double) accept} or
54   * {@link StatisticAccumulator#combine(StatisticResult) combine} method, it must be synchronized externally.
55   *
56   * <p>However, it is safe to use {@link java.util.function.DoubleConsumer#accept(double) accept}
57   * and {@link StatisticAccumulator#combine(StatisticResult) combine}
58   * as {@code accumulator} and {@code combiner} functions of
59   * {@link java.util.stream.Collector Collector} on a parallel stream,
60   * because the parallel implementation of {@link java.util.stream.Stream#collect Stream.collect()}
61   * provides the necessary partitioning, isolation, and merging of results for
62   * safe and efficient parallel execution.
63   *
64   * <p>References:
65   * <ul>
66   *   <li>Manca and Marin (2020)
67   *       Decomposition of the Sum of Cubes, the Sum Raised to the
68   *       Power of Four and Codeviance.
69   *       Applied Mathematics, 11, 1013-1020.
70   *       <a href="https://doi.org/10.4236/am.2020.1110067">doi: 10.4236/am.2020.1110067</a>
71   * </ul>
72   *
73   * @since 1.1
74   */
75  class SumOfFourthDeviations extends SumOfCubedDeviations {
76      /** Sum of forth deviations of the values that have been added. */
77      private double sumFourthDev;
78  
79      /**
80       * Create an instance.
81       */
82      SumOfFourthDeviations() {
83          // No-op
84      }
85  
86      /**
87       * Create an instance with the given sum of fourth and squared deviations.
88       *
89       * @param sq Sum of fourth (quad) deviations.
90       * @param sc Sum of fourth deviations.
91       */
92      private SumOfFourthDeviations(double sq, SumOfCubedDeviations sc) {
93          super(sc);
94          this.sumFourthDev = sq;
95      }
96  
97      /**
98       * Create an instance with the given sum of cubed and squared deviations,
99       * and first moment.
100      *
101      * @param sq Sum of fouth deviations.
102      * @param sc Sum of cubed deviations.
103      * @param ss Sum of squared deviations.
104      * @param m1 First moment.
105      * @param n Count of values.
106      */
107     private SumOfFourthDeviations(double sq, double sc, double ss, double m1, long n) {
108         super(sc, ss, m1, n);
109         this.sumFourthDev = sq;
110     }
111 
112     /**
113      * Returns an instance populated using the input {@code values}.
114      *
115      * <p>Note: {@code SumOfFourthDeviations} computed using {@link #accept accept} may be
116      * different from this instance.
117      *
118      * @param values Values.
119      * @return {@code SumOfFourthDeviations} instance.
120      */
121     static SumOfFourthDeviations of(double... values) {
122         if (values.length == 0) {
123             return new SumOfFourthDeviations();
124         }
125         return create(SumOfCubedDeviations.of(values), values);
126     }
127 
128     /**
129      * Creates the sum of fourth deviations.
130      *
131      * <p>Uses the provided {@code sum} to create the first moment.
132      * This method is used by {@link DoubleStatistics} using a sum that can be reused
133      * for the {@link Sum} statistic.
134      *
135      * @param sum Sum of the values.
136      * @param values Values.
137      * @return {@code SumOfFourthDeviations} instance.
138      */
139     static SumOfFourthDeviations create(org.apache.commons.numbers.core.Sum sum, double[] values) {
140         if (values.length == 0) {
141             return new SumOfFourthDeviations();
142         }
143         return create(SumOfCubedDeviations.create(sum, values), values);
144     }
145 
146     /**
147      * Creates the sum of fourth deviations.
148      *
149      * @param sc Sum of cubed deviations.
150      * @param values Values.
151      * @return {@code SumOfFourthDeviations} instance.
152      */
153     private static SumOfFourthDeviations create(SumOfCubedDeviations sc, double[] values) {
154         // Edge cases
155         final double xbar = sc.getFirstMoment();
156         if (!Double.isFinite(xbar) ||
157             !Double.isFinite(sc.sumSquaredDev) ||
158             !Double.isFinite(sc.sumCubedDev)) {
159             // Overflow computing lower order deviations will overflow
160             return new SumOfFourthDeviations(Double.NaN, sc);
161         }
162         // Compute the sum of fourth (quad) deviations.
163         // Note: This handles n=1.
164         double s = 0;
165         for (final double x : values) {
166             s += pow4(x - xbar);
167         }
168         return new SumOfFourthDeviations(s, sc);
169     }
170 
171     /**
172      * Compute {@code x^4}.
173      * Uses compound multiplication.
174      *
175      * @param x Value.
176      * @return x^4
177      */
178     private static double pow4(double x) {
179         final double x2 = x * x;
180         return x2 * x2;
181     }
182 
183     /**
184      * Returns an instance populated using the input {@code values}.
185      *
186      * <p>Note: {@code SumOfCubedDeviations} computed using {@link #accept(double) accept} may be
187      * different from this instance.
188      *
189      * @param values Values.
190      * @return {@code SumOfCubedDeviations} instance.
191      */
192     static SumOfFourthDeviations of(int... values) {
193         // Logic shared with the double[] version with int[] lower order moments
194         if (values.length == 0) {
195             return new SumOfFourthDeviations();
196         }
197         final IntVariance variance = IntVariance.of(values);
198         final double xbar = variance.computeMean();
199         final double ss = variance.computeSumOfSquaredDeviations();
200         // Unlike the double[] case, overflow/NaN is not possible:
201         // (max value)^4 times max array length ~ (2^31)^4 * 2^31 ~ 2^155.
202         // Compute sum of cubed and fourth deviations together.
203         double sc = 0;
204         double sq = 0;
205         for (final double y : values) {
206             final double x = y - xbar;
207             final double x2 = x * x;
208             sc += x2 * x;
209             sq += x2 * x2;
210         }
211         // Edge case to avoid floating-point error for zero
212         if (values.length <= LENGTH_TWO) {
213             sc = 0;
214         }
215         return new SumOfFourthDeviations(sq, sc, ss, xbar, values.length);
216     }
217 
218     /**
219      * Returns an instance populated using the input {@code values}.
220      *
221      * <p>Note: {@code SumOfCubedDeviations} computed using {@link #accept(double) accept} may be
222      * different from this instance.
223      *
224      * @param values Values.
225      * @return {@code SumOfCubedDeviations} instance.
226      */
227     static SumOfFourthDeviations of(long... values) {
228         // Logic shared with the double[] version with long[] lower order moments
229         if (values.length == 0) {
230             return new SumOfFourthDeviations();
231         }
232         final LongVariance variance = LongVariance.of(values);
233         final double xbar = variance.computeMean();
234         final double ss = variance.computeSumOfSquaredDeviations();
235         // Unlike the double[] case, overflow/NaN is not possible:
236         // (max value)^4 times max array length ~ (2^63)^4 * 2^31 ~ 2^283.
237         // Compute sum of cubed and fourth deviations together.
238         double sc = 0;
239         double sq = 0;
240         for (final double y : values) {
241             final double x = y - xbar;
242             final double x2 = x * x;
243             sc += x2 * x;
244             sq += x2 * x2;
245         }
246         // Edge case to avoid floating-point error for zero
247         if (values.length <= LENGTH_TWO) {
248             sc = 0;
249         }
250         return new SumOfFourthDeviations(sq, sc, ss, xbar, values.length);
251     }
252 
253     /**
254      * Updates the state of the statistic to reflect the addition of {@code value}.
255      *
256      * @param value Value.
257      */
258     @Override
259     public void accept(double value) {
260         // Require current s^2 * N == sum-of-square deviations
261         // Require current g * N == sum-of-fourth deviations
262         final double ss = sumSquaredDev;
263         final double sc = sumCubedDev;
264         final double np = n;
265         super.accept(value);
266         // Terms are arranged so that values that may be zero
267         // (np, ss, sc) are first. This will cancel any overflow in
268         // multiplication of later terms (nDev * 4, nDev^2, nDev^4).
269         // This handles initialisation when np in {0, 1) to zero
270         // for any deviation (e.g. series MAX_VALUE, -MAX_VALUE).
271         // Note: (np1 * np1 - 3 * np) = (np+1)^2 - 3np = np^2 - np + 1
272         // Note: account for the half-deviation representation by scaling by 8=4*2; 24=6*2^2; 16=2^4
273         final double np1 = n;
274         sumFourthDev = sumFourthDev -
275             sc * nDev * 8 +
276             ss * nDev * nDev * 24 +
277             np * (np1 * np1 - 3 * np) * nDev * nDev * nDev * dev * 16;
278     }
279 
280     /**
281      * Gets the sum of fourth deviations of all input values.
282      *
283      * <p>Note that the result should be positive. However the updating sum is subject to
284      * cancellation of potentially large positive and negative terms. Overflow of these
285      * terms can result in a sum of opposite signed infinities and a {@code NaN} result
286      * for finite input values where the correct result is positive infinity.
287      *
288      * <p>Note: Any non-finite result should be considered a failed computation. The
289      * result is returned as computed and not consolidated to a single NaN. This is done
290      * for testing purposes to allow the result to be reported. It is possible to track
291      * input values to finite/non-finite (e.g. using bit mask manipulation of the exponent
292      * field). However this statistic in currently used in the kurtosis and in the case
293      * of failed computation distinguishing a non-finite result is not useful.
294      *
295      * @return sum of fourth deviations of all values.
296      */
297     double getSumOfFourthDeviations() {
298         return Double.isFinite(getFirstMoment()) ? sumFourthDev : Double.NaN;
299     }
300 
301     /**
302      * Combines the state of another {@code SumOfFourthDeviations} into this one.
303      *
304      * @param other Another {@code SumOfFourthDeviations} to be combined.
305      * @return {@code this} instance after combining {@code other}.
306      */
307     SumOfFourthDeviations combine(SumOfFourthDeviations other) {
308         if (n == 0) {
309             sumFourthDev = other.sumFourthDev;
310         } else if (other.n != 0) {
311             // Avoid overflow to compute the difference.
312             final double halfDiffOfMean = getFirstMomentHalfDifference(other);
313             sumFourthDev += other.sumFourthDev;
314             // Add additional terms that do not cancel to zero
315             if (halfDiffOfMean != 0) {
316                 final double n1 = n;
317                 final double n2 = other.n;
318                 if (n1 == n2) {
319                     // Optimisation where sizes are equal in double-precision.
320                     // This is of use in JDK streams as spliterators use a divide by two
321                     // strategy for parallel streams.
322                     // Note: (n1 * n2) * ((n1+n2)^2 - 3 * (n1 * n2)) == n^4
323                     sumFourthDev +=
324                         (sumCubedDev - other.sumCubedDev) * halfDiffOfMean * 4 +
325                         (sumSquaredDev + other.sumSquaredDev) * (halfDiffOfMean * halfDiffOfMean) * 6 +
326                         pow4(halfDiffOfMean) * n1 * 2;
327                 } else {
328                     final double n1n2 = n1 + n2;
329                     final double dm = 2 * (halfDiffOfMean / n1n2);
330                     // Use the rearrangement for parity with the accept method
331                     // n1*n1 - n1*n2 + n2*n2 == (n1+n2)^2 - 3*n1*n2
332                     sumFourthDev +=
333                         (sumCubedDev * n2 - other.sumCubedDev * n1) * dm * 4 +
334                         (n2 * n2 * sumSquaredDev + n1 * n1 * other.sumSquaredDev) * (dm * dm) * 6 +
335                         (n1 * n2) * (n1n2 * n1n2 - 3 * (n1 * n2)) * pow4(dm) * n1n2;
336                 }
337             }
338         }
339         super.combine(other);
340         return this;
341     }
342 }