1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17 package org.apache.commons.math4.legacy.stat;
18
19 import java.util.List;
20
21 import org.apache.commons.math4.legacy.exception.DimensionMismatchException;
22 import org.apache.commons.math4.legacy.exception.MathIllegalArgumentException;
23 import org.apache.commons.math4.legacy.exception.NoDataException;
24 import org.apache.commons.math4.legacy.exception.NotPositiveException;
25 import org.apache.commons.math4.legacy.exception.NullArgumentException;
26 import org.apache.commons.math4.legacy.exception.NumberIsTooSmallException;
27 import org.apache.commons.math4.legacy.exception.util.LocalizedFormats;
28 import org.apache.commons.math4.legacy.stat.descriptive.DescriptiveStatistics;
29 import org.apache.commons.math4.legacy.stat.descriptive.UnivariateStatistic;
30 import org.apache.commons.math4.legacy.stat.descriptive.moment.GeometricMean;
31 import org.apache.commons.math4.legacy.stat.descriptive.moment.Mean;
32 import org.apache.commons.math4.legacy.stat.descriptive.moment.Variance;
33 import org.apache.commons.math4.legacy.stat.descriptive.rank.Max;
34 import org.apache.commons.math4.legacy.stat.descriptive.rank.Min;
35 import org.apache.commons.math4.legacy.stat.descriptive.rank.Percentile;
36 import org.apache.commons.math4.legacy.stat.descriptive.summary.Product;
37 import org.apache.commons.math4.legacy.stat.descriptive.summary.Sum;
38 import org.apache.commons.math4.legacy.stat.descriptive.summary.SumOfLogs;
39 import org.apache.commons.math4.legacy.stat.descriptive.summary.SumOfSquares;
40
41 /**
42 * StatUtils provides static methods for computing statistics based on data
43 * stored in double[] arrays.
44 */
45 public final class StatUtils {
46
47 /** sum. */
48 private static final UnivariateStatistic SUM = new Sum();
49
50 /** sumSq. */
51 private static final UnivariateStatistic SUM_OF_SQUARES = new SumOfSquares();
52
53 /** prod. */
54 private static final UnivariateStatistic PRODUCT = new Product();
55
56 /** sumLog. */
57 private static final UnivariateStatistic SUM_OF_LOGS = new SumOfLogs();
58
59 /** min. */
60 private static final UnivariateStatistic MIN = new Min();
61
62 /** max. */
63 private static final UnivariateStatistic MAX = new Max();
64
65 /** mean. */
66 private static final UnivariateStatistic MEAN = new Mean();
67
68 /** variance. */
69 private static final Variance VARIANCE = new Variance();
70
71 /** percentile. */
72 private static final Percentile PERCENTILE = new Percentile();
73
74 /** geometric mean. */
75 private static final GeometricMean GEOMETRIC_MEAN = new GeometricMean();
76
77 /**
78 * Private Constructor.
79 */
80 private StatUtils() {
81 }
82
83 /**
84 * Returns the sum of the values in the input array, or
85 * <code>Double.NaN</code> if the array is empty.
86 * <p>
87 * Throws <code>IllegalArgumentException</code> if the input array is null.
88 *
89 * @param values array of values to sum
90 * @return the sum of the values or <code>Double.NaN</code> if the array is empty
91 * @throws MathIllegalArgumentException if the array is null
92 */
93 public static double sum(final double[] values) throws MathIllegalArgumentException {
94 return SUM.evaluate(values);
95 }
96
97 /**
98 * Returns the sum of the entries in the specified portion of
99 * the input array, or <code>Double.NaN</code> if the designated subarray is empty.
100 * <p>
101 * Throws <code>IllegalArgumentException</code> if the array is null.
102 *
103 * @param values the input array
104 * @param begin index of the first array element to include
105 * @param length the number of elements to include
106 * @return the sum of the values or Double.NaN if length = 0
107 * @throws MathIllegalArgumentException if the array is null or the array index
108 * parameters are not valid
109 */
110 public static double sum(final double[] values, final int begin, final int length)
111 throws MathIllegalArgumentException {
112 return SUM.evaluate(values, begin, length);
113 }
114
115 /**
116 * Returns the sum of the squares of the entries in the input array, or
117 * <code>Double.NaN</code> if the array is empty.
118 * <p>
119 * Throws <code>IllegalArgumentException</code> if the array is null.
120 *
121 * @param values input array
122 * @return the sum of the squared values or <code>Double.NaN</code> if the array is empty
123 * @throws MathIllegalArgumentException if the array is null
124 */
125 public static double sumSq(final double[] values) throws MathIllegalArgumentException {
126 return SUM_OF_SQUARES.evaluate(values);
127 }
128
129 /**
130 * Returns the sum of the squares of the entries in the specified portion of
131 * the input array, or <code>Double.NaN</code> if the designated subarray
132 * is empty.
133 * <p>
134 * Throws <code>IllegalArgumentException</code> if the array is null.
135 *
136 * @param values the input array
137 * @param begin index of the first array element to include
138 * @param length the number of elements to include
139 * @return the sum of the squares of the values or Double.NaN if length = 0
140 * @throws MathIllegalArgumentException if the array is null or the array index
141 * parameters are not valid
142 */
143 public static double sumSq(final double[] values, final int begin, final int length)
144 throws MathIllegalArgumentException {
145 return SUM_OF_SQUARES.evaluate(values, begin, length);
146 }
147
148 /**
149 * Returns the product of the entries in the input array, or
150 * <code>Double.NaN</code> if the array is empty.
151 * <p>
152 * Throws <code>IllegalArgumentException</code> if the array is null.
153 *
154 * @param values the input array
155 * @return the product of the values or Double.NaN if the array is empty
156 * @throws MathIllegalArgumentException if the array is null
157 */
158 public static double product(final double[] values) throws MathIllegalArgumentException {
159 return PRODUCT.evaluate(values);
160 }
161
162 /**
163 * Returns the product of the entries in the specified portion of
164 * the input array, or <code>Double.NaN</code> if the designated subarray
165 * is empty.
166 * <p>
167 * Throws <code>IllegalArgumentException</code> if the array is null.
168 *
169 * @param values the input array
170 * @param begin index of the first array element to include
171 * @param length the number of elements to include
172 * @return the product of the values or Double.NaN if length = 0
173 * @throws MathIllegalArgumentException if the array is null or the array index
174 * parameters are not valid
175 */
176 public static double product(final double[] values, final int begin, final int length)
177 throws MathIllegalArgumentException {
178 return PRODUCT.evaluate(values, begin, length);
179 }
180
181 /**
182 * Returns the sum of the natural logs of the entries in the input array, or
183 * <code>Double.NaN</code> if the array is empty.
184 * <p>
185 * Throws <code>IllegalArgumentException</code> if the array is null.
186 * <p>
187 * See {@link org.apache.commons.math4.legacy.stat.descriptive.summary.SumOfLogs}.
188 *
189 * @param values the input array
190 * @return the sum of the natural logs of the values or Double.NaN if the array is empty
191 * @throws MathIllegalArgumentException if the array is null
192 */
193 public static double sumLog(final double[] values) throws MathIllegalArgumentException {
194 return SUM_OF_LOGS.evaluate(values);
195 }
196
197 /**
198 * Returns the sum of the natural logs of the entries in the specified portion of
199 * the input array, or <code>Double.NaN</code> if the designated subarray is empty.
200 * <p>
201 * Throws <code>IllegalArgumentException</code> if the array is null.
202 * <p>
203 * See {@link org.apache.commons.math4.legacy.stat.descriptive.summary.SumOfLogs}.
204 *
205 * @param values the input array
206 * @param begin index of the first array element to include
207 * @param length the number of elements to include
208 * @return the sum of the natural logs of the values or Double.NaN if
209 * length = 0
210 * @throws MathIllegalArgumentException if the array is null or the array index
211 * parameters are not valid
212 */
213 public static double sumLog(final double[] values, final int begin, final int length)
214 throws MathIllegalArgumentException {
215 return SUM_OF_LOGS.evaluate(values, begin, length);
216 }
217
218 /**
219 * Returns the arithmetic mean of the entries in the input array, or
220 * <code>Double.NaN</code> if the array is empty.
221 * <p>
222 * Throws <code>IllegalArgumentException</code> if the array is null.
223 * <p>
224 * See {@link org.apache.commons.math4.legacy.stat.descriptive.moment.Mean} for
225 * details on the computing algorithm.
226 *
227 * @param values the input array
228 * @return the mean of the values or Double.NaN if the array is empty
229 * @throws MathIllegalArgumentException if the array is null
230 */
231 public static double mean(final double[] values) throws MathIllegalArgumentException {
232 return MEAN.evaluate(values);
233 }
234
235 /**
236 * Returns the arithmetic mean of the entries in the specified portion of
237 * the input array, or <code>Double.NaN</code> if the designated subarray
238 * is empty.
239 * <p>
240 * Throws <code>IllegalArgumentException</code> if the array is null.
241 * <p>
242 * See {@link org.apache.commons.math4.legacy.stat.descriptive.moment.Mean Mean} for
243 * details on the computing algorithm.
244 *
245 * @param values the input array
246 * @param begin index of the first array element to include
247 * @param length the number of elements to include
248 * @return the mean of the values or Double.NaN if length = 0
249 * @throws MathIllegalArgumentException if the array is null or the array index
250 * parameters are not valid
251 */
252 public static double mean(final double[] values, final int begin, final int length)
253 throws MathIllegalArgumentException {
254 return MEAN.evaluate(values, begin, length);
255 }
256
257 /**
258 * Returns the geometric mean of the entries in the input array, or
259 * <code>Double.NaN</code> if the array is empty.
260 * <p>
261 * Throws <code>IllegalArgumentException</code> if the array is null.
262 * <p>
263 * See {@link org.apache.commons.math4.legacy.stat.descriptive.moment.GeometricMean GeometricMean}
264 * for details on the computing algorithm.
265 *
266 * @param values the input array
267 * @return the geometric mean of the values or Double.NaN if the array is empty
268 * @throws MathIllegalArgumentException if the array is null
269 */
270 public static double geometricMean(final double[] values) throws MathIllegalArgumentException {
271 return GEOMETRIC_MEAN.evaluate(values);
272 }
273
274 /**
275 * Returns the geometric mean of the entries in the specified portion of
276 * the input array, or <code>Double.NaN</code> if the designated subarray
277 * is empty.
278 * <p>
279 * Throws <code>IllegalArgumentException</code> if the array is null.
280 * <p>
281 * See {@link org.apache.commons.math4.legacy.stat.descriptive.moment.GeometricMean GeometricMean}
282 * for details on the computing algorithm.
283 *
284 * @param values the input array
285 * @param begin index of the first array element to include
286 * @param length the number of elements to include
287 * @return the geometric mean of the values or Double.NaN if length = 0
288 * @throws MathIllegalArgumentException if the array is null or the array index
289 * parameters are not valid
290 */
291 public static double geometricMean(final double[] values, final int begin, final int length)
292 throws MathIllegalArgumentException {
293 return GEOMETRIC_MEAN.evaluate(values, begin, length);
294 }
295
296 /**
297 * Returns the variance of the entries in the input array, or
298 * <code>Double.NaN</code> if the array is empty.
299 * <p>
300 * This method returns the bias-corrected sample variance (using {@code n - 1} in
301 * the denominator). Use {@link #populationVariance(double[])} for the non-bias-corrected
302 * population variance.
303 * <p>
304 * See {@link org.apache.commons.math4.legacy.stat.descriptive.moment.Variance Variance} for
305 * details on the computing algorithm.
306 * <p>
307 * Returns 0 for a single-value (i.e. length = 1) sample.
308 * <p>
309 * Throws <code>MathIllegalArgumentException</code> if the array is null.
310 *
311 * @param values the input array
312 * @return the variance of the values or Double.NaN if the array is empty
313 * @throws MathIllegalArgumentException if the array is null
314 */
315 public static double variance(final double[] values) throws MathIllegalArgumentException {
316 return VARIANCE.evaluate(values);
317 }
318
319 /**
320 * Returns the variance of the entries in the specified portion of
321 * the input array, or <code>Double.NaN</code> if the designated subarray
322 * is empty.
323 * <p>
324 * This method returns the bias-corrected sample variance (using {@code n - 1} in
325 * the denominator). Use {@link #populationVariance(double[], int, int)} for the non-bias-corrected
326 * population variance.
327 * <p>
328 * See {@link org.apache.commons.math4.legacy.stat.descriptive.moment.Variance Variance} for
329 * details on the computing algorithm.
330 * <p>
331 * Returns 0 for a single-value (i.e. length = 1) sample.
332 * <p>
333 * Throws <code>MathIllegalArgumentException</code> if the array is null or the
334 * array index parameters are not valid.
335 *
336 * @param values the input array
337 * @param begin index of the first array element to include
338 * @param length the number of elements to include
339 * @return the variance of the values or Double.NaN if length = 0
340 * @throws MathIllegalArgumentException if the array is null or the array index
341 * parameters are not valid
342 */
343 public static double variance(final double[] values, final int begin, final int length)
344 throws MathIllegalArgumentException {
345 return VARIANCE.evaluate(values, begin, length);
346 }
347
348 /**
349 * Returns the variance of the entries in the specified portion of
350 * the input array, using the precomputed mean value. Returns
351 * <code>Double.NaN</code> if the designated subarray is empty.
352 * <p>
353 * This method returns the bias-corrected sample variance (using {@code n - 1} in
354 * the denominator). Use {@link #populationVariance(double[], double, int, int)} for
355 * the non-bias-corrected population variance.
356 * <p>
357 * See {@link org.apache.commons.math4.legacy.stat.descriptive.moment.Variance Variance} for
358 * details on the computing algorithm.
359 * <p>
360 * The formula used assumes that the supplied mean value is the arithmetic
361 * mean of the sample data, not a known population parameter. This method
362 * is supplied only to save computation when the mean has already been
363 * computed.
364 * <p>
365 * Returns 0 for a single-value (i.e. length = 1) sample.
366 * <p>
367 * Throws <code>MathIllegalArgumentException</code> if the array is null or the
368 * array index parameters are not valid.
369 *
370 * @param values the input array
371 * @param mean the precomputed mean value
372 * @param begin index of the first array element to include
373 * @param length the number of elements to include
374 * @return the variance of the values or Double.NaN if length = 0
375 * @throws MathIllegalArgumentException if the array is null or the array index
376 * parameters are not valid
377 */
378 public static double variance(final double[] values, final double mean, final int begin, final int length)
379 throws MathIllegalArgumentException {
380 return VARIANCE.evaluate(values, mean, begin, length);
381 }
382
383 /**
384 * Returns the variance of the entries in the input array, using the
385 * precomputed mean value. Returns <code>Double.NaN</code> if the array
386 * is empty.
387 * <p>
388 * This method returns the bias-corrected sample variance (using {@code n - 1} in
389 * the denominator). Use {@link #populationVariance(double[], double)} for the
390 * non-bias-corrected population variance.
391 * <p>
392 * See {@link org.apache.commons.math4.legacy.stat.descriptive.moment.Variance Variance} for
393 * details on the computing algorithm.
394 * <p>
395 * The formula used assumes that the supplied mean value is the arithmetic
396 * mean of the sample data, not a known population parameter. This method
397 * is supplied only to save computation when the mean has already been
398 * computed.
399 * <p>
400 * Returns 0 for a single-value (i.e. length = 1) sample.
401 * <p>
402 * Throws <code>MathIllegalArgumentException</code> if the array is null.
403 *
404 * @param values the input array
405 * @param mean the precomputed mean value
406 * @return the variance of the values or Double.NaN if the array is empty
407 * @throws MathIllegalArgumentException if the array is null
408 */
409 public static double variance(final double[] values, final double mean) throws MathIllegalArgumentException {
410 return VARIANCE.evaluate(values, mean);
411 }
412
413 /**
414 * Returns the <a href="http://en.wikibooks.org/wiki/Statistics/Summary/Variance">
415 * population variance</a> of the entries in the input array, or
416 * <code>Double.NaN</code> if the array is empty.
417 * <p>
418 * See {@link org.apache.commons.math4.legacy.stat.descriptive.moment.Variance Variance} for
419 * details on the formula and computing algorithm.
420 * <p>
421 * Returns 0 for a single-value (i.e. length = 1) sample.
422 * <p>
423 * Throws <code>MathIllegalArgumentException</code> if the array is null.
424 *
425 * @param values the input array
426 * @return the population variance of the values or Double.NaN if the array is empty
427 * @throws MathIllegalArgumentException if the array is null
428 */
429 public static double populationVariance(final double[] values) throws MathIllegalArgumentException {
430 return new Variance(false).evaluate(values);
431 }
432
433 /**
434 * Returns the <a href="http://en.wikibooks.org/wiki/Statistics/Summary/Variance">
435 * population variance</a> of the entries in the specified portion of
436 * the input array, or <code>Double.NaN</code> if the designated subarray
437 * is empty.
438 * <p>
439 * See {@link org.apache.commons.math4.legacy.stat.descriptive.moment.Variance Variance} for
440 * details on the computing algorithm.
441 * <p>
442 * Returns 0 for a single-value (i.e. length = 1) sample.
443 * <p>
444 * Throws <code>MathIllegalArgumentException</code> if the array is null or the
445 * array index parameters are not valid.
446 *
447 * @param values the input array
448 * @param begin index of the first array element to include
449 * @param length the number of elements to include
450 * @return the population variance of the values or Double.NaN if length = 0
451 * @throws MathIllegalArgumentException if the array is null or the array index
452 * parameters are not valid
453 */
454 public static double populationVariance(final double[] values, final int begin, final int length)
455 throws MathIllegalArgumentException {
456 return new Variance(false).evaluate(values, begin, length);
457 }
458
459 /**
460 * Returns the <a href="http://en.wikibooks.org/wiki/Statistics/Summary/Variance">
461 * population variance</a> of the entries in the specified portion of
462 * the input array, using the precomputed mean value. Returns
463 * <code>Double.NaN</code> if the designated subarray is empty.
464 * <p>
465 * See {@link org.apache.commons.math4.legacy.stat.descriptive.moment.Variance Variance} for
466 * details on the computing algorithm.
467 * <p>
468 * The formula used assumes that the supplied mean value is the arithmetic
469 * mean of the sample data, not a known population parameter. This method
470 * is supplied only to save computation when the mean has already been
471 * computed.
472 * <p>
473 * Returns 0 for a single-value (i.e. length = 1) sample.
474 * <p>
475 * Throws <code>MathIllegalArgumentException</code> if the array is null or the
476 * array index parameters are not valid.
477 *
478 * @param values the input array
479 * @param mean the precomputed mean value
480 * @param begin index of the first array element to include
481 * @param length the number of elements to include
482 * @return the population variance of the values or Double.NaN if length = 0
483 * @throws MathIllegalArgumentException if the array is null or the array index
484 * parameters are not valid
485 */
486 public static double populationVariance(final double[] values, final double mean,
487 final int begin, final int length)
488 throws MathIllegalArgumentException {
489 return new Variance(false).evaluate(values, mean, begin, length);
490 }
491
492 /**
493 * Returns the <a href="http://en.wikibooks.org/wiki/Statistics/Summary/Variance">
494 * population variance</a> of the entries in the input array, using the precomputed
495 * mean value. Returns <code>Double.NaN</code> if the array is empty.
496 * <p>
497 * See {@link org.apache.commons.math4.legacy.stat.descriptive.moment.Variance Variance} for
498 * details on the computing algorithm.
499 * <p>
500 * The formula used assumes that the supplied mean value is the arithmetic
501 * mean of the sample data, not a known population parameter. This method is
502 * supplied only to save computation when the mean has already been computed.
503 * <p>
504 * Returns 0 for a single-value (i.e. length = 1) sample.
505 * <p>
506 * Throws <code>MathIllegalArgumentException</code> if the array is null.
507 *
508 * @param values the input array
509 * @param mean the precomputed mean value
510 * @return the population variance of the values or Double.NaN if the array is empty
511 * @throws MathIllegalArgumentException if the array is null
512 */
513 public static double populationVariance(final double[] values, final double mean)
514 throws MathIllegalArgumentException {
515 return new Variance(false).evaluate(values, mean);
516 }
517
518 /**
519 * Returns the maximum of the entries in the input array, or
520 * <code>Double.NaN</code> if the array is empty.
521 * <p>
522 * Throws <code>MathIllegalArgumentException</code> if the array is null.
523 * <ul>
524 * <li>The result is <code>NaN</code> iff all values are <code>NaN</code>
525 * (i.e. <code>NaN</code> values have no impact on the value of the statistic).</li>
526 * <li>If any of the values equals <code>Double.POSITIVE_INFINITY</code>,
527 * the result is <code>Double.POSITIVE_INFINITY.</code></li>
528 * </ul>
529 *
530 * @param values the input array
531 * @return the maximum of the values or Double.NaN if the array is empty
532 * @throws MathIllegalArgumentException if the array is null
533 */
534 public static double max(final double[] values) throws MathIllegalArgumentException {
535 return MAX.evaluate(values);
536 }
537
538 /**
539 * Returns the maximum of the entries in the specified portion of the input array,
540 * or <code>Double.NaN</code> if the designated subarray is empty.
541 * <p>
542 * Throws <code>MathIllegalArgumentException</code> if the array is null or
543 * the array index parameters are not valid.
544 * <ul>
545 * <li>The result is <code>NaN</code> iff all values are <code>NaN</code>
546 * (i.e. <code>NaN</code> values have no impact on the value of the statistic).</li>
547 * <li>If any of the values equals <code>Double.POSITIVE_INFINITY</code>,
548 * the result is <code>Double.POSITIVE_INFINITY.</code></li>
549 * </ul>
550 *
551 * @param values the input array
552 * @param begin index of the first array element to include
553 * @param length the number of elements to include
554 * @return the maximum of the values or Double.NaN if length = 0
555 * @throws MathIllegalArgumentException if the array is null or the array index
556 * parameters are not valid
557 */
558 public static double max(final double[] values, final int begin, final int length)
559 throws MathIllegalArgumentException {
560 return MAX.evaluate(values, begin, length);
561 }
562
563 /**
564 * Returns the minimum of the entries in the input array, or
565 * <code>Double.NaN</code> if the array is empty.
566 * <p>
567 * Throws <code>MathIllegalArgumentException</code> if the array is null.
568 * <ul>
569 * <li>The result is <code>NaN</code> iff all values are <code>NaN</code>
570 * (i.e. <code>NaN</code> values have no impact on the value of the statistic).</li>
571 * <li>If any of the values equals <code>Double.NEGATIVE_INFINITY</code>,
572 * the result is <code>Double.NEGATIVE_INFINITY.</code></li>
573 * </ul>
574 *
575 * @param values the input array
576 * @return the minimum of the values or Double.NaN if the array is empty
577 * @throws MathIllegalArgumentException if the array is null
578 */
579 public static double min(final double[] values) throws MathIllegalArgumentException {
580 return MIN.evaluate(values);
581 }
582
583 /**
584 * Returns the minimum of the entries in the specified portion of the input array,
585 * or <code>Double.NaN</code> if the designated subarray is empty.
586 * <p>
587 * Throws <code>MathIllegalArgumentException</code> if the array is null or
588 * the array index parameters are not valid.
589 * <ul>
590 * <li>The result is <code>NaN</code> iff all values are <code>NaN</code>
591 * (i.e. <code>NaN</code> values have no impact on the value of the statistic).</li>
592 * <li>If any of the values equals <code>Double.NEGATIVE_INFINITY</code>,
593 * the result is <code>Double.NEGATIVE_INFINITY.</code></li>
594 * </ul>
595 *
596 * @param values the input array
597 * @param begin index of the first array element to include
598 * @param length the number of elements to include
599 * @return the minimum of the values or Double.NaN if length = 0
600 * @throws MathIllegalArgumentException if the array is null or the array index
601 * parameters are not valid
602 */
603 public static double min(final double[] values, final int begin, final int length)
604 throws MathIllegalArgumentException {
605 return MIN.evaluate(values, begin, length);
606 }
607
608 /**
609 * Returns an estimate of the <code>p</code>th percentile of the values
610 * in the <code>values</code> array.
611 * <ul>
612 * <li>Returns <code>Double.NaN</code> if <code>values</code> has length
613 * <code>0</code></li>
614 * <li>Returns (for any value of <code>p</code>) <code>values[0]</code>
615 * if <code>values</code> has length <code>1</code></li>
616 * <li>Throws <code>IllegalArgumentException</code> if <code>values</code>
617 * is null or p is not a valid quantile value (p must be greater than 0
618 * and less than or equal to 100)</li>
619 * </ul>
620 * <p>
621 * See {@link org.apache.commons.math4.legacy.stat.descriptive.rank.Percentile Percentile}
622 * for a description of the percentile estimation algorithm used.
623 *
624 * @param values input array of values
625 * @param p the percentile value to compute
626 * @return the percentile value or Double.NaN if the array is empty
627 * @throws MathIllegalArgumentException if <code>values</code> is null or p is invalid
628 */
629 public static double percentile(final double[] values, final double p) throws MathIllegalArgumentException {
630 return PERCENTILE.evaluate(values,p);
631 }
632
633 /**
634 * Returns an estimate of the <code>p</code>th percentile of the values
635 * in the <code>values</code> array, starting with the element in (0-based)
636 * position <code>begin</code> in the array and including <code>length</code>
637 * values.
638 * <ul>
639 * <li>Returns <code>Double.NaN</code> if <code>length = 0</code></li>
640 * <li>Returns (for any value of <code>p</code>) <code>values[begin]</code>
641 * if <code>length = 1 </code></li>
642 * <li>Throws <code>MathIllegalArgumentException</code> if <code>values</code>
643 * is null, <code>begin</code> or <code>length</code> is invalid, or
644 * <code>p</code> is not a valid quantile value (p must be greater than 0
645 * and less than or equal to 100)</li>
646 * </ul>
647 * <p>
648 * See {@link org.apache.commons.math4.legacy.stat.descriptive.rank.Percentile Percentile}
649 * for a description of the percentile estimation algorithm used.
650 *
651 * @param values array of input values
652 * @param p the percentile to compute
653 * @param begin the first (0-based) element to include in the computation
654 * @param length the number of array elements to include
655 * @return the percentile value
656 * @throws MathIllegalArgumentException if the parameters are not valid or the input array is null
657 */
658 public static double percentile(final double[] values, final int begin, final int length, final double p)
659 throws MathIllegalArgumentException {
660 return PERCENTILE.evaluate(values, begin, length, p);
661 }
662
663 /**
664 * Returns the sum of the (signed) differences between corresponding elements of the
665 * input arrays -- i.e., sum(sample1[i] - sample2[i]).
666 *
667 * @param sample1 the first array
668 * @param sample2 the second array
669 * @return sum of paired differences
670 * @throws DimensionMismatchException if the arrays do not have the same (positive) length.
671 * @throws NoDataException if the sample arrays are empty.
672 */
673 public static double sumDifference(final double[] sample1, final double[] sample2)
674 throws DimensionMismatchException, NoDataException {
675
676 int n = sample1.length;
677 if (n != sample2.length) {
678 throw new DimensionMismatchException(n, sample2.length);
679 }
680 if (n <= 0) {
681 throw new NoDataException(LocalizedFormats.INSUFFICIENT_DIMENSION);
682 }
683 double result = 0;
684 for (int i = 0; i < n; i++) {
685 result += sample1[i] - sample2[i];
686 }
687 return result;
688 }
689
690 /**
691 * Returns the mean of the (signed) differences between corresponding elements of the
692 * input arrays -- i.e., sum(sample1[i] - sample2[i]) / sample1.length.
693 *
694 * @param sample1 the first array
695 * @param sample2 the second array
696 * @return mean of paired differences
697 * @throws DimensionMismatchException if the arrays do not have the same (positive) length.
698 * @throws NoDataException if the sample arrays are empty.
699 */
700 public static double meanDifference(final double[] sample1, final double[] sample2)
701 throws DimensionMismatchException, NoDataException {
702 return sumDifference(sample1, sample2) / sample1.length;
703 }
704
705 /**
706 * Returns the variance of the (signed) differences between corresponding elements of the
707 * input arrays -- i.e., var(sample1[i] - sample2[i]).
708 *
709 * @param sample1 the first array
710 * @param sample2 the second array
711 * @param meanDifference the mean difference between corresponding entries
712 * @return variance of paired differences
713 * @throws DimensionMismatchException if the arrays do not have the same length.
714 * @throws NumberIsTooSmallException if the arrays length is less than 2.
715 * @see #meanDifference(double[],double[])
716 */
717 public static double varianceDifference(final double[] sample1, final double[] sample2, double meanDifference)
718 throws DimensionMismatchException, NumberIsTooSmallException {
719
720 double sum1 = 0d;
721 double sum2 = 0d;
722 double diff = 0d;
723 int n = sample1.length;
724 if (n != sample2.length) {
725 throw new DimensionMismatchException(n, sample2.length);
726 }
727 if (n < 2) {
728 throw new NumberIsTooSmallException(n, 2, true);
729 }
730 for (int i = 0; i < n; i++) {
731 diff = sample1[i] - sample2[i];
732 sum1 += (diff - meanDifference) *(diff - meanDifference);
733 sum2 += diff - meanDifference;
734 }
735 return (sum1 - (sum2 * sum2 / n)) / (n - 1);
736 }
737
738 /**
739 * Normalize (standardize) the sample, so it is has a mean of 0 and a standard deviation of 1.
740 *
741 * @param sample Sample to normalize.
742 * @return normalized (standardized) sample.
743 * @since 2.2
744 */
745 public static double[] normalize(final double[] sample) {
746 DescriptiveStatistics stats = new DescriptiveStatistics();
747
748 // Add the data from the series to stats
749 for (int i = 0; i < sample.length; i++) {
750 stats.addValue(sample[i]);
751 }
752
753 // Compute mean and standard deviation
754 double mean = stats.getMean();
755 double standardDeviation = stats.getStandardDeviation();
756
757 // initialize the standardizedSample, which has the same length as the sample
758 double[] standardizedSample = new double[sample.length];
759
760 for (int i = 0; i < sample.length; i++) {
761 // z = (x- mean)/standardDeviation
762 standardizedSample[i] = (sample[i] - mean) / standardDeviation;
763 }
764 return standardizedSample;
765 }
766
767 /**
768 * Returns the sample mode(s).
769 * <p>
770 * The mode is the most frequently occurring value in the sample.
771 * If there is a unique value with maximum frequency, this value is returned
772 * as the only element of the output array. Otherwise, the returned array
773 * contains the maximum frequency elements in increasing order.
774 * <p>
775 * For example, if {@code sample} is {0, 12, 5, 6, 0, 13, 5, 17},
776 * the returned array will have length two, with 0 in the first element and
777 * 5 in the second.
778 * <p>
779 * NaN values are ignored when computing the mode - i.e., NaNs will never
780 * appear in the output array. If the sample includes only NaNs or has
781 * length 0, an empty array is returned.
782 *
783 * @param sample input data
784 * @return array of array of the most frequently occurring element(s) sorted in ascending order.
785 * @throws MathIllegalArgumentException if the indices are invalid or the array is null
786 * @since 3.3
787 */
788 public static double[] mode(double[] sample) throws MathIllegalArgumentException {
789 if (sample == null) {
790 throw new NullArgumentException(LocalizedFormats.INPUT_ARRAY);
791 }
792 return getMode(sample, 0, sample.length);
793 }
794
795 /**
796 * Returns the sample mode(s).
797 * <p>
798 * The mode is the most frequently occurring value in the sample.
799 * If there is a unique value with maximum frequency, this value is returned
800 * as the only element of the output array. Otherwise, the returned array
801 * contains the maximum frequency elements in increasing order.
802 * <p>
803 * For example, if {@code sample} is {0, 12, 5, 6, 0, 13, 5, 17},
804 * the returned array will have length two, with 0 in the first element and
805 * 5 in the second.
806 * <p>
807 * NaN values are ignored when computing the mode - i.e., NaNs will never
808 * appear in the output array. If the sample includes only NaNs or has
809 * length 0, an empty array is returned.
810 *
811 * @param sample input data
812 * @param begin index (0-based) of the first array element to include
813 * @param length the number of elements to include
814 * @return array of array of the most frequently occurring element(s) sorted in ascending order.
815 * @throws MathIllegalArgumentException if the indices are invalid or the array is null
816 * @since 3.3
817 */
818 public static double[] mode(double[] sample, final int begin, final int length) {
819 if (sample == null) {
820 throw new NullArgumentException(LocalizedFormats.INPUT_ARRAY);
821 }
822
823 if (begin < 0) {
824 throw new NotPositiveException(LocalizedFormats.START_POSITION, Integer.valueOf(begin));
825 }
826
827 if (length < 0) {
828 throw new NotPositiveException(LocalizedFormats.LENGTH, Integer.valueOf(length));
829 }
830
831 return getMode(sample, begin, length);
832 }
833
834 /**
835 * Private helper method.
836 * Assumes parameters have been validated.
837 * @param values input data
838 * @param begin index (0-based) of the first array element to include
839 * @param length the number of elements to include
840 * @return array of array of the most frequently occurring element(s) sorted in ascending order.
841 */
842 private static double[] getMode(double[] values, final int begin, final int length) {
843 // Add the values to the frequency table
844 Frequency<Double> freq = new Frequency<>();
845 for (int i = begin; i < begin + length; i++) {
846 final double value = values[i];
847 if (!Double.isNaN(value)) {
848 freq.addValue(Double.valueOf(value));
849 }
850 }
851 List<Double> list = freq.getMode();
852 // Convert the list to an array of primitive double
853 return list.stream().mapToDouble(Double::doubleValue).toArray();
854 }
855 }