1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17 package org.apache.commons.statistics.descriptive;
18
19 /**
20 * Computes the kurtosis of the available values. The kurtosis is defined as:
21 *
22 * <p>\[ \operatorname{Kurt} = \operatorname{E}\left[ \left(\frac{X-\mu}{\sigma}\right)^4 \right] = \frac{\mu_4}{\sigma^4} \]
23 *
24 * <p>where \( \mu \) is the mean of \( X \), \( \sigma \) is the standard deviation of \( X \),
25 * \( \operatorname{E} \) represents the expectation operator, and \( \mu_4 \) is the fourth
26 * central moment.
27 *
28 * <p>The default implementation uses the following definition of the <em>sample kurtosis</em>:
29 *
30 * <p>\[ G_2 = \frac{k_4}{k_2^2} = \;
31 * \frac{n-1}{(n-2)\,(n-3)} \left[(n+1)\,\frac{m_4}{m_{2}^2} - 3\,(n-1) \right] \]
32 *
33 * <p>where \( k_4 \) is the unique symmetric unbiased estimator of the fourth cumulant,
34 * \( k_2 \) is the symmetric unbiased estimator of the second cumulant (i.e. the <em>sample variance</em>),
35 * \( m_4 \) is the fourth sample moment about the mean,
36 * \( m_2 \) is the second sample moment about the mean,
37 * \( \overline{x} \) is the sample mean,
38 * and \( n \) is the number of samples.
39 *
40 * <ul>
41 * <li>The result is {@code NaN} if less than 4 values are added.
42 * <li>The result is {@code NaN} if any of the values is {@code NaN} or infinite.
43 * <li>The result is {@code NaN} if the sum of the fourth deviations from the mean is infinite.
44 * </ul>
45 *
46 * <p>The default computation is for the adjusted Fisher–Pearson standardized moment coefficient
47 * \( G_2 \). If the {@link #setBiased(boolean) biased} option is enabled the following equation
48 * applies:
49 *
50 * <p>\[ g_2 = \frac{m_4}{m_2^2} - 3 = \frac{\tfrac{1}{n} \sum_{i=1}^n (x_i-\overline{x})^4}
51 * {\left[\tfrac{1}{n} \sum_{i=1}^n (x_i-\overline{x})^2 \right]^2} - 3 \]
52 *
53 * <p>In this case the computation only requires 2 values are added (i.e. the result is
54 * {@code NaN} if less than 2 values are added).
55 *
56 * <p>Note that the computation requires division by the second central moment \( m_2 \).
57 * If this is effectively zero then the result is {@code NaN}. This occurs when the value
58 * \( m_2 \) approaches the machine precision of the mean: \( m_2 \le (m_1 \times 10^{-15})^2 \).
59 *
60 * <p>The {@link #accept(double)} method uses a recursive updating algorithm.
61 *
62 * <p>The {@link #of(double...)} method uses a two-pass algorithm, starting with computation
63 * of the mean, and then computing the sum of deviations in a second pass.
64 *
65 * <p>Note that adding values using {@link #accept(double) accept} and then executing
66 * {@link #getAsDouble() getAsDouble} will
67 * sometimes give a different result than executing
68 * {@link #of(double...) of} with the full array of values. The former approach
69 * should only be used when the full array of values is not available.
70 *
71 * <p>Supports up to 2<sup>63</sup> (exclusive) observations.
72 * This implementation does not check for overflow of the count.
73 *
74 * <p>This class is designed to work with (though does not require)
75 * {@linkplain java.util.stream streams}.
76 *
77 * <p><strong>Note that this instance is not synchronized.</strong> If
78 * multiple threads access an instance of this class concurrently, and at least
79 * one of the threads invokes the {@link java.util.function.DoubleConsumer#accept(double) accept} or
80 * {@link StatisticAccumulator#combine(StatisticResult) combine} method, it must be synchronized externally.
81 *
82 * <p>However, it is safe to use {@link java.util.function.DoubleConsumer#accept(double) accept}
83 * and {@link StatisticAccumulator#combine(StatisticResult) combine}
84 * as {@code accumulator} and {@code combiner} functions of
85 * {@link java.util.stream.Collector Collector} on a parallel stream,
86 * because the parallel instance of {@link java.util.stream.Stream#collect Stream.collect()}
87 * provides the necessary partitioning, isolation, and merging of results for
88 * safe and efficient parallel execution.
89 *
90 * @see <a href="https://en.wikipedia.org/wiki/Kurtosis">Kurtosis (Wikipedia)</a>
91 * @since 1.1
92 */
93 public final class Kurtosis implements DoubleStatistic, StatisticAccumulator<Kurtosis> {
94 /** 2, the length limit where the biased skewness is undefined.
95 * This limit effectively imposes the result m4 / m2^2 = 0 / 0 = NaN when 1 value
96 * has been added. Note that when more samples are added and the variance
97 * approaches zero the result is also returned as NaN. */
98 private static final int LENGTH_TWO = 2;
99 /** 4, the length limit where the kurtosis is undefined. */
100 private static final int LENGTH_FOUR = 4;
101
102 /**
103 * An instance of {@link SumOfFourthDeviations}, which is used to
104 * compute the kurtosis.
105 */
106 private final SumOfFourthDeviations sq;
107
108 /** Flag to control if the statistic is biased, or should use a bias correction. */
109 private boolean biased;
110
111 /**
112 * Create an instance.
113 */
114 private Kurtosis() {
115 this(new SumOfFourthDeviations());
116 }
117
118 /**
119 * Creates an instance with the sum of fourth deviations from the mean.
120 *
121 * @param sq Sum of fourth deviations.
122 */
123 Kurtosis(SumOfFourthDeviations sq) {
124 this.sq = sq;
125 }
126
127 /**
128 * Creates an instance.
129 *
130 * <p>The initial result is {@code NaN}.
131 *
132 * @return {@code Kurtosis} instance.
133 */
134 public static Kurtosis create() {
135 return new Kurtosis();
136 }
137
138 /**
139 * Returns an instance populated using the input {@code values}.
140 *
141 * <p>Note: {@code Kurtosis} computed using {@link #accept(double) accept} may be
142 * different from this instance.
143 *
144 * @param values Values.
145 * @return {@code Kurtosis} instance.
146 */
147 public static Kurtosis of(double... values) {
148 return new Kurtosis(SumOfFourthDeviations.of(values));
149 }
150
151 /**
152 * Returns an instance populated using the specified range of {@code values}.
153 *
154 * <p>Note: {@code Kurtosis} computed using {@link #accept(double) accept} may be
155 * different from this instance.
156 *
157 * @param values Values.
158 * @param from Inclusive start of the range.
159 * @param to Exclusive end of the range.
160 * @return {@code Kurtosis} instance.
161 * @throws IndexOutOfBoundsException if the sub-range is out of bounds
162 * @since 1.2
163 */
164 public static Kurtosis ofRange(double[] values, int from, int to) {
165 Statistics.checkFromToIndex(from, to, values.length);
166 return new Kurtosis(SumOfFourthDeviations.ofRange(values, from, to));
167 }
168
169 /**
170 * Returns an instance populated using the input {@code values}.
171 *
172 * <p>Note: {@code Kurtosis} computed using {@link #accept(double) accept} may be
173 * different from this instance.
174 *
175 * @param values Values.
176 * @return {@code Kurtosis} instance.
177 */
178 public static Kurtosis of(int... values) {
179 return new Kurtosis(SumOfFourthDeviations.of(values));
180 }
181
182 /**
183 * Returns an instance populated using the specified range of {@code values}.
184 *
185 * <p>Note: {@code Kurtosis} computed using {@link #accept(double) accept} may be
186 * different from this instance.
187 *
188 * @param values Values.
189 * @param from Inclusive start of the range.
190 * @param to Exclusive end of the range.
191 * @return {@code Kurtosis} instance.
192 * @throws IndexOutOfBoundsException if the sub-range is out of bounds
193 * @since 1.2
194 */
195 public static Kurtosis ofRange(int[] values, int from, int to) {
196 Statistics.checkFromToIndex(from, to, values.length);
197 return new Kurtosis(SumOfFourthDeviations.ofRange(values, from, to));
198 }
199
200 /**
201 * Returns an instance populated using the input {@code values}.
202 *
203 * <p>Note: {@code Kurtosis} computed using {@link #accept(double) accept} may be
204 * different from this instance.
205 *
206 * @param values Values.
207 * @return {@code Kurtosis} instance.
208 */
209 public static Kurtosis of(long... values) {
210 return new Kurtosis(SumOfFourthDeviations.of(values));
211 }
212
213 /**
214 * Returns an instance populated using the specified range of {@code values}.
215 *
216 * <p>Note: {@code Kurtosis} computed using {@link #accept(double) accept} may be
217 * different from this instance.
218 *
219 * @param values Values.
220 * @param from Inclusive start of the range.
221 * @param to Exclusive end of the range.
222 * @return {@code Kurtosis} instance.
223 * @throws IndexOutOfBoundsException if the sub-range is out of bounds
224 * @since 1.2
225 */
226 public static Kurtosis ofRange(long[] values, int from, int to) {
227 Statistics.checkFromToIndex(from, to, values.length);
228 return new Kurtosis(SumOfFourthDeviations.ofRange(values, from, to));
229 }
230
231 /**
232 * Updates the state of the statistic to reflect the addition of {@code value}.
233 *
234 * @param value Value.
235 */
236 @Override
237 public void accept(double value) {
238 sq.accept(value);
239 }
240
241 /**
242 * Gets the kurtosis of all input values.
243 *
244 * <p>When fewer than 4 values have been added, the result is {@code NaN}.
245 *
246 * @return kurtosis of all values.
247 */
248 @Override
249 public double getAsDouble() {
250 // This method checks the sum of squared or fourth deviations is finite
251 // to provide a consistent NaN when the computation is not possible.
252
253 if (sq.n < (biased ? LENGTH_TWO : LENGTH_FOUR)) {
254 return Double.NaN;
255 }
256 final double x2 = sq.getSumOfSquaredDeviations();
257 if (!Double.isFinite(x2)) {
258 return Double.NaN;
259 }
260 final double x4 = sq.getSumOfFourthDeviations();
261 if (!Double.isFinite(x4)) {
262 return Double.NaN;
263 }
264 // Avoid a divide by zero; for a negligible variance return NaN.
265 // Note: Commons Math returns zero if variance is < 1e-19.
266 final double m2 = x2 / sq.n;
267 if (Statistics.zeroVariance(sq.getFirstMoment(), m2)) {
268 return Double.NaN;
269 }
270 final double m4 = x4 / sq.n;
271 if (biased) {
272 return m4 / (m2 * m2) - 3;
273 }
274 final double n = sq.n;
275 return ((n * n - 1) * m4 / (m2 * m2) - 3 * (n - 1) * (n - 1)) / ((n - 2) * (n - 3));
276 }
277
278 @Override
279 public Kurtosis combine(Kurtosis other) {
280 sq.combine(other.sq);
281 return this;
282 }
283
284 /**
285 * Sets the value of the biased flag. The default value is {@code false}.
286 * See {@link Kurtosis} for details on the computing algorithm.
287 *
288 * <p>This flag only controls the final computation of the statistic. The value of this flag
289 * will not affect compatibility between instances during a {@link #combine(Kurtosis) combine}
290 * operation.
291 *
292 * @param v Value.
293 * @return {@code this} instance
294 */
295 public Kurtosis setBiased(boolean v) {
296 biased = v;
297 return this;
298 }
299 }