View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      https://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.apache.commons.statistics.examples.jmh.descriptive;
18  
19  import java.util.function.DoubleSupplier;
20  import java.util.function.IntConsumer;
21  import java.util.function.LongConsumer;
22  
23  /**
24   * Computes the sum of cubed deviations from the sample mean. This
25   * statistic is related to the third moment.
26   *
27   * <p>This is a specialized version of
28   * {@code o.a.c.statistics.descriptive.SumOfCubedDeviations}
29   * for integer ({@code int/long})
30   * arguments. See that class for details of the updating algorithm.
31   *
32   * <p>When the input are integer values bounded to 2<sup>63</sup> some functionality
33   * can be dropped from the {@code double} argument version:
34   *
35   * <ul>
36   *  <li>No overflow is possible given the maximum values of intermediate terms;
37   *      allows removing the scaling of input.</li>
38   *  <li>No handling of infinite or NaN is required; allows removing computation
39   *      of the non-finite value.</li>
40   * </ul>
41   *
42   * <p>This class does not copy the moment hierarchy from the {@code double}
43   * argument version. The lower moments (mean and variance) are computed
44   * using exact integer arithmetic and not using the updating algorithm.
45   * This allows some reuse of factors within the update methods to compute
46   * first, second and third order moments together.
47   *
48   * <p>This class is used for benchmarking. Performance is only a small
49   * percentage faster than using the {@code double} consuming version within
50   * {@link org.apache.commons.statistics.descriptive.Skewness}. It combines logic
51   * from {@code SumOfCubedDeviations} and {@code Skewness}.
52   *
53   * @since 1.1
54   */
55  class IntegerSumOfCubedDeviations implements IntConsumer, LongConsumer, DoubleSupplier {
56      /** 2, the length limit where the biased skewness is undefined.
57       * This limit effectively imposes the result m3 / m2^1.5 = 0 / 0 = NaN when 1 value
58       * has been added. Note that when more samples are added and the variance
59       * approaches zero the result is also returned as NaN. */
60      private static final int LENGTH_TWO = 2;
61      /** 3, the length limit where the unbiased skewness is undefined. */
62      private static final int LENGTH_THREE = 3;
63  
64      /** Count of values that have been added. */
65      protected long n;
66  
67      /**
68       * Deviation of most recently added value from the previous first moment:
69       * (x - m1).
70       * Retained to prevent repeated computation in higher order moments.
71       */
72      protected double dev;
73  
74      /**
75       * Deviation of most recently added value from the previous first moment,
76       * normalized by current sample size: (x - m1) / n.
77       * Retained to prevent repeated computation in higher order moments.
78       */
79      protected double nDev;
80  
81      /**
82       * Deviation of most recently added value from the previous first moment,
83       * squared, multiplied by previsou sample size and normalised by current
84       * sample size: (n-1) * (x - m1)^2 / n.
85       * Retained to prevent repeated computation in higher order moments.
86       */
87      protected double term1;
88  
89      /** First moment of values that have been added. */
90      protected double m1;
91  
92      /** Sum of squared deviations of the values that have been added. */
93      protected double sumSquaredDev;
94  
95      /** Sum of cubed deviations of the values that have been added. */
96      protected double sumCubedDev;
97  
98      /** Flag to control if the statistic is biased, or should use a bias correction. */
99      private boolean biased;
100 
101     /**
102      * Create an instance.
103      */
104     IntegerSumOfCubedDeviations() {
105         // No-op
106     }
107 
108     /**
109      * Updates the state of the statistic to reflect the addition of {@code value}.
110      *
111      * @param value Value.
112      */
113     @Override
114     public void accept(int value) {
115         update(value);
116     }
117 
118     /**
119      * Updates the state of the statistic to reflect the addition of {@code value}.
120      *
121      * @param value Value.
122      */
123     @Override
124     public void accept(long value) {
125         update(value);
126     }
127 
128     /**
129      * Updates the state of the statistic to reflect the addition of {@code value}.
130      *
131      * @param value Value.
132      */
133     void update(double value) {
134         final long np = n;
135         dev = value - m1;
136         nDev = dev / ++n;
137         term1 = np * dev * nDev;
138 
139         sumCubedDev = sumCubedDev -
140             sumSquaredDev * nDev * 3 +
141             (np - 1) * term1 * nDev;
142         sumSquaredDev += term1;
143         m1 += nDev;
144     }
145 
146     /**
147      * Gets the skewness of all input values.
148      *
149      * <p>When fewer than 3 values have been added, the result is {@code NaN}.
150      *
151      * @return skewness of all values.
152      */
153     @Override
154     public double getAsDouble() {
155         // Adapted from o.a.c.statistics.descriptive.Skewness
156         if (n < (biased ? LENGTH_TWO : LENGTH_THREE)) {
157             return Double.NaN;
158         }
159         final double x2 = sumSquaredDev;
160         // Avoid a divide by zero; for a negligible variance return NaN.
161         // Note: Commons Math returns zero if variance is < 1e-19.
162         final double m2 = x2 / n;
163         // Simple check for zero variance
164         if (m2 == 0) {
165             return Double.NaN;
166         }
167         // denom = pow(m2, 1.5)
168         final double denom = Math.sqrt(m2) * m2;
169         final double x3 = sumCubedDev;
170         final double m3 = x3 / n;
171         double g1 = m3 / denom;
172         if (!biased) {
173             final double n0 = n;
174             g1 *= Math.sqrt(n0 * (n0 - 1)) / (n0 - 2);
175         }
176         return g1;
177     }
178 
179     /**
180      * Sets the value of the biased flag. The default value is {@code false}.
181      * See {@link org.apache.commons.statistics.descriptive.Skewness} for details on the computing algorithm.
182      *
183      * <p>This flag only controls the final computation of the statistic.
184      *
185      * @param v Value.
186      */
187     public void setBiased(boolean v) {
188         biased = v;
189     }
190 }