1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 *
9 * https://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17 package org.apache.commons.statistics.examples.jmh.descriptive;
18
19 import java.util.function.DoubleSupplier;
20 import java.util.function.IntConsumer;
21 import java.util.function.LongConsumer;
22
23 /**
24 * Computes the sum of cubed deviations from the sample mean. This
25 * statistic is related to the third moment.
26 *
27 * <p>This is a specialized version of
28 * {@code o.a.c.statistics.descriptive.SumOfCubedDeviations}
29 * for integer ({@code int/long})
30 * arguments. See that class for details of the updating algorithm.
31 *
32 * <p>When the input are integer values bounded to 2<sup>63</sup> some functionality
33 * can be dropped from the {@code double} argument version:
34 *
35 * <ul>
36 * <li>No overflow is possible given the maximum values of intermediate terms;
37 * allows removing the scaling of input.</li>
38 * <li>No handling of infinite or NaN is required; allows removing computation
39 * of the non-finite value.</li>
40 * </ul>
41 *
42 * <p>This class does not copy the moment hierarchy from the {@code double}
43 * argument version. The lower moments (mean and variance) are computed
44 * using exact integer arithmetic and not using the updating algorithm.
45 * This allows some reuse of factors within the update methods to compute
46 * first, second and third order moments together.
47 *
48 * <p>This class is used for benchmarking. Performance is only a small
49 * percentage faster than using the {@code double} consuming version within
50 * {@link org.apache.commons.statistics.descriptive.Skewness}. It combines logic
51 * from {@code SumOfCubedDeviations} and {@code Skewness}.
52 *
53 * @since 1.1
54 */
55 class IntegerSumOfCubedDeviations implements IntConsumer, LongConsumer, DoubleSupplier {
56 /** 2, the length limit where the biased skewness is undefined.
57 * This limit effectively imposes the result m3 / m2^1.5 = 0 / 0 = NaN when 1 value
58 * has been added. Note that when more samples are added and the variance
59 * approaches zero the result is also returned as NaN. */
60 private static final int LENGTH_TWO = 2;
61 /** 3, the length limit where the unbiased skewness is undefined. */
62 private static final int LENGTH_THREE = 3;
63
64 /** Count of values that have been added. */
65 protected long n;
66
67 /**
68 * Deviation of most recently added value from the previous first moment:
69 * (x - m1).
70 * Retained to prevent repeated computation in higher order moments.
71 */
72 protected double dev;
73
74 /**
75 * Deviation of most recently added value from the previous first moment,
76 * normalized by current sample size: (x - m1) / n.
77 * Retained to prevent repeated computation in higher order moments.
78 */
79 protected double nDev;
80
81 /**
82 * Deviation of most recently added value from the previous first moment,
83 * squared, multiplied by previsou sample size and normalised by current
84 * sample size: (n-1) * (x - m1)^2 / n.
85 * Retained to prevent repeated computation in higher order moments.
86 */
87 protected double term1;
88
89 /** First moment of values that have been added. */
90 protected double m1;
91
92 /** Sum of squared deviations of the values that have been added. */
93 protected double sumSquaredDev;
94
95 /** Sum of cubed deviations of the values that have been added. */
96 protected double sumCubedDev;
97
98 /** Flag to control if the statistic is biased, or should use a bias correction. */
99 private boolean biased;
100
101 /**
102 * Create an instance.
103 */
104 IntegerSumOfCubedDeviations() {
105 // No-op
106 }
107
108 /**
109 * Updates the state of the statistic to reflect the addition of {@code value}.
110 *
111 * @param value Value.
112 */
113 @Override
114 public void accept(int value) {
115 update(value);
116 }
117
118 /**
119 * Updates the state of the statistic to reflect the addition of {@code value}.
120 *
121 * @param value Value.
122 */
123 @Override
124 public void accept(long value) {
125 update(value);
126 }
127
128 /**
129 * Updates the state of the statistic to reflect the addition of {@code value}.
130 *
131 * @param value Value.
132 */
133 void update(double value) {
134 final long np = n;
135 dev = value - m1;
136 nDev = dev / ++n;
137 term1 = np * dev * nDev;
138
139 sumCubedDev = sumCubedDev -
140 sumSquaredDev * nDev * 3 +
141 (np - 1) * term1 * nDev;
142 sumSquaredDev += term1;
143 m1 += nDev;
144 }
145
146 /**
147 * Gets the skewness of all input values.
148 *
149 * <p>When fewer than 3 values have been added, the result is {@code NaN}.
150 *
151 * @return skewness of all values.
152 */
153 @Override
154 public double getAsDouble() {
155 // Adapted from o.a.c.statistics.descriptive.Skewness
156 if (n < (biased ? LENGTH_TWO : LENGTH_THREE)) {
157 return Double.NaN;
158 }
159 final double x2 = sumSquaredDev;
160 // Avoid a divide by zero; for a negligible variance return NaN.
161 // Note: Commons Math returns zero if variance is < 1e-19.
162 final double m2 = x2 / n;
163 // Simple check for zero variance
164 if (m2 == 0) {
165 return Double.NaN;
166 }
167 // denom = pow(m2, 1.5)
168 final double denom = Math.sqrt(m2) * m2;
169 final double x3 = sumCubedDev;
170 final double m3 = x3 / n;
171 double g1 = m3 / denom;
172 if (!biased) {
173 final double n0 = n;
174 g1 *= Math.sqrt(n0 * (n0 - 1)) / (n0 - 2);
175 }
176 return g1;
177 }
178
179 /**
180 * Sets the value of the biased flag. The default value is {@code false}.
181 * See {@link org.apache.commons.statistics.descriptive.Skewness} for details on the computing algorithm.
182 *
183 * <p>This flag only controls the final computation of the statistic.
184 *
185 * @param v Value.
186 */
187 public void setBiased(boolean v) {
188 biased = v;
189 }
190 }