001 /*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements. See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License. You may obtain a copy of the License at
008 *
009 * http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017 package org.apache.commons.math.stat;
018
019 import org.apache.commons.math.exception.NumberIsTooSmallException;
020 import org.apache.commons.math.exception.DimensionMismatchException;
021 import org.apache.commons.math.exception.NoDataException;
022 import org.apache.commons.math.exception.util.LocalizedFormats;
023 import org.apache.commons.math.stat.descriptive.DescriptiveStatistics;
024 import org.apache.commons.math.stat.descriptive.UnivariateStatistic;
025 import org.apache.commons.math.stat.descriptive.moment.GeometricMean;
026 import org.apache.commons.math.stat.descriptive.moment.Mean;
027 import org.apache.commons.math.stat.descriptive.moment.Variance;
028 import org.apache.commons.math.stat.descriptive.rank.Max;
029 import org.apache.commons.math.stat.descriptive.rank.Min;
030 import org.apache.commons.math.stat.descriptive.rank.Percentile;
031 import org.apache.commons.math.stat.descriptive.summary.Product;
032 import org.apache.commons.math.stat.descriptive.summary.Sum;
033 import org.apache.commons.math.stat.descriptive.summary.SumOfLogs;
034 import org.apache.commons.math.stat.descriptive.summary.SumOfSquares;
035
036 /**
037 * StatUtils provides static methods for computing statistics based on data
038 * stored in double[] arrays.
039 *
040 * @version $Id: StatUtils.java 1131229 2011-06-03 20:49:25Z luc $
041 */
042 public final class StatUtils {
043
044 /** sum */
045 private static final UnivariateStatistic SUM = new Sum();
046
047 /** sumSq */
048 private static final UnivariateStatistic SUM_OF_SQUARES = new SumOfSquares();
049
050 /** prod */
051 private static final UnivariateStatistic PRODUCT = new Product();
052
053 /** sumLog */
054 private static final UnivariateStatistic SUM_OF_LOGS = new SumOfLogs();
055
056 /** min */
057 private static final UnivariateStatistic MIN = new Min();
058
059 /** max */
060 private static final UnivariateStatistic MAX = new Max();
061
062 /** mean */
063 private static final UnivariateStatistic MEAN = new Mean();
064
065 /** variance */
066 private static final Variance VARIANCE = new Variance();
067
068 /** percentile */
069 private static final Percentile PERCENTILE = new Percentile();
070
071 /** geometric mean */
072 private static final GeometricMean GEOMETRIC_MEAN = new GeometricMean();
073
074 /**
075 * Private Constructor
076 */
077 private StatUtils() {
078 }
079
080 /**
081 * Returns the sum of the values in the input array, or
082 * <code>Double.NaN</code> if the array is empty.
083 * <p>
084 * Throws <code>IllegalArgumentException</code> if the input array
085 * is null.</p>
086 *
087 * @param values array of values to sum
088 * @return the sum of the values or <code>Double.NaN</code> if the array
089 * is empty
090 * @throws IllegalArgumentException if the array is null
091 */
092 public static double sum(final double[] values) {
093 return SUM.evaluate(values);
094 }
095
096 /**
097 * Returns the sum of the entries in the specified portion of
098 * the input array, or <code>Double.NaN</code> if the designated subarray
099 * is empty.
100 * <p>
101 * Throws <code>IllegalArgumentException</code> if the array is null.</p>
102 *
103 * @param values the input array
104 * @param begin index of the first array element to include
105 * @param length the number of elements to include
106 * @return the sum of the values or Double.NaN if length = 0
107 * @throws IllegalArgumentException if the array is null or the array index
108 * parameters are not valid
109 */
110 public static double sum(final double[] values, final int begin,
111 final int length) {
112 return SUM.evaluate(values, begin, length);
113 }
114
115 /**
116 * Returns the sum of the squares of the entries in the input array, or
117 * <code>Double.NaN</code> if the array is empty.
118 * <p>
119 * Throws <code>IllegalArgumentException</code> if the array is null.</p>
120 *
121 * @param values input array
122 * @return the sum of the squared values or <code>Double.NaN</code> if the
123 * array is empty
124 * @throws IllegalArgumentException if the array is null
125 */
126 public static double sumSq(final double[] values) {
127 return SUM_OF_SQUARES.evaluate(values);
128 }
129
130 /**
131 * Returns the sum of the squares of the entries in the specified portion of
132 * the input array, or <code>Double.NaN</code> if the designated subarray
133 * is empty.
134 * <p>
135 * Throws <code>IllegalArgumentException</code> if the array is null.</p>
136 *
137 * @param values the input array
138 * @param begin index of the first array element to include
139 * @param length the number of elements to include
140 * @return the sum of the squares of the values or Double.NaN if length = 0
141 * @throws IllegalArgumentException if the array is null or the array index
142 * parameters are not valid
143 */
144 public static double sumSq(final double[] values, final int begin,
145 final int length) {
146 return SUM_OF_SQUARES.evaluate(values, begin, length);
147 }
148
149 /**
150 * Returns the product of the entries in the input array, or
151 * <code>Double.NaN</code> if the array is empty.
152 * <p>
153 * Throws <code>IllegalArgumentException</code> if the array is null.</p>
154 *
155 * @param values the input array
156 * @return the product of the values or Double.NaN if the array is empty
157 * @throws IllegalArgumentException if the array is null
158 */
159 public static double product(final double[] values) {
160 return PRODUCT.evaluate(values);
161 }
162
163 /**
164 * Returns the product of the entries in the specified portion of
165 * the input array, or <code>Double.NaN</code> if the designated subarray
166 * is empty.
167 * <p>
168 * Throws <code>IllegalArgumentException</code> if the array is null.</p>
169 *
170 * @param values the input array
171 * @param begin index of the first array element to include
172 * @param length the number of elements to include
173 * @return the product of the values or Double.NaN if length = 0
174 * @throws IllegalArgumentException if the array is null or the array index
175 * parameters are not valid
176 */
177 public static double product(final double[] values, final int begin,
178 final int length) {
179 return PRODUCT.evaluate(values, begin, length);
180 }
181
182 /**
183 * Returns the sum of the natural logs of the entries in the input array, or
184 * <code>Double.NaN</code> if the array is empty.
185 * <p>
186 * Throws <code>IllegalArgumentException</code> if the array is null.</p>
187 * <p>
188 * See {@link org.apache.commons.math.stat.descriptive.summary.SumOfLogs}.
189 * </p>
190 *
191 * @param values the input array
192 * @return the sum of the natural logs of the values or Double.NaN if
193 * the array is empty
194 * @throws IllegalArgumentException if the array is null
195 */
196 public static double sumLog(final double[] values) {
197 return SUM_OF_LOGS.evaluate(values);
198 }
199
200 /**
201 * Returns the sum of the natural logs of the entries in the specified portion of
202 * the input array, or <code>Double.NaN</code> if the designated subarray
203 * is empty.
204 * <p>
205 * Throws <code>IllegalArgumentException</code> if the array is null.</p>
206 * <p>
207 * See {@link org.apache.commons.math.stat.descriptive.summary.SumOfLogs}.
208 * </p>
209 *
210 * @param values the input array
211 * @param begin index of the first array element to include
212 * @param length the number of elements to include
213 * @return the sum of the natural logs of the values or Double.NaN if
214 * length = 0
215 * @throws IllegalArgumentException if the array is null or the array index
216 * parameters are not valid
217 */
218 public static double sumLog(final double[] values, final int begin,
219 final int length) {
220 return SUM_OF_LOGS.evaluate(values, begin, length);
221 }
222
223 /**
224 * Returns the arithmetic mean of the entries in the input array, or
225 * <code>Double.NaN</code> if the array is empty.
226 * <p>
227 * Throws <code>IllegalArgumentException</code> if the array is null.</p>
228 * <p>
229 * See {@link org.apache.commons.math.stat.descriptive.moment.Mean} for
230 * details on the computing algorithm.</p>
231 *
232 * @param values the input array
233 * @return the mean of the values or Double.NaN if the array is empty
234 * @throws IllegalArgumentException if the array is null
235 */
236 public static double mean(final double[] values) {
237 return MEAN.evaluate(values);
238 }
239
240 /**
241 * Returns the arithmetic mean of the entries in the specified portion of
242 * the input array, or <code>Double.NaN</code> if the designated subarray
243 * is empty.
244 * <p>
245 * Throws <code>IllegalArgumentException</code> if the array is null.</p>
246 * <p>
247 * See {@link org.apache.commons.math.stat.descriptive.moment.Mean} for
248 * details on the computing algorithm.</p>
249 *
250 * @param values the input array
251 * @param begin index of the first array element to include
252 * @param length the number of elements to include
253 * @return the mean of the values or Double.NaN if length = 0
254 * @throws IllegalArgumentException if the array is null or the array index
255 * parameters are not valid
256 */
257 public static double mean(final double[] values, final int begin,
258 final int length) {
259 return MEAN.evaluate(values, begin, length);
260 }
261
262 /**
263 * Returns the geometric mean of the entries in the input array, or
264 * <code>Double.NaN</code> if the array is empty.
265 * <p>
266 * Throws <code>IllegalArgumentException</code> if the array is null.</p>
267 * <p>
268 * See {@link org.apache.commons.math.stat.descriptive.moment.GeometricMean}
269 * for details on the computing algorithm.</p>
270 *
271 * @param values the input array
272 * @return the geometric mean of the values or Double.NaN if the array is empty
273 * @throws IllegalArgumentException if the array is null
274 */
275 public static double geometricMean(final double[] values) {
276 return GEOMETRIC_MEAN.evaluate(values);
277 }
278
279 /**
280 * Returns the geometric mean of the entries in the specified portion of
281 * the input array, or <code>Double.NaN</code> if the designated subarray
282 * is empty.
283 * <p>
284 * Throws <code>IllegalArgumentException</code> if the array is null.</p>
285 * <p>
286 * See {@link org.apache.commons.math.stat.descriptive.moment.GeometricMean}
287 * for details on the computing algorithm.</p>
288 *
289 * @param values the input array
290 * @param begin index of the first array element to include
291 * @param length the number of elements to include
292 * @return the geometric mean of the values or Double.NaN if length = 0
293 * @throws IllegalArgumentException if the array is null or the array index
294 * parameters are not valid
295 */
296 public static double geometricMean(final double[] values, final int begin,
297 final int length) {
298 return GEOMETRIC_MEAN.evaluate(values, begin, length);
299 }
300
301
302 /**
303 * Returns the variance of the entries in the input array, or
304 * <code>Double.NaN</code> if the array is empty.
305 * <p>
306 * See {@link org.apache.commons.math.stat.descriptive.moment.Variance} for
307 * details on the computing algorithm.</p>
308 * <p>
309 * Returns 0 for a single-value (i.e. length = 1) sample.</p>
310 * <p>
311 * Throws <code>IllegalArgumentException</code> if the array is null.</p>
312 *
313 * @param values the input array
314 * @return the variance of the values or Double.NaN if the array is empty
315 * @throws IllegalArgumentException if the array is null
316 */
317 public static double variance(final double[] values) {
318 return VARIANCE.evaluate(values);
319 }
320
321 /**
322 * Returns the variance of the entries in the specified portion of
323 * the input array, or <code>Double.NaN</code> if the designated subarray
324 * is empty.
325 * <p>
326 * See {@link org.apache.commons.math.stat.descriptive.moment.Variance} for
327 * details on the computing algorithm.</p>
328 * <p>
329 * Returns 0 for a single-value (i.e. length = 1) sample.</p>
330 * <p>
331 * Throws <code>IllegalArgumentException</code> if the array is null or the
332 * array index parameters are not valid.</p>
333 *
334 * @param values the input array
335 * @param begin index of the first array element to include
336 * @param length the number of elements to include
337 * @return the variance of the values or Double.NaN if length = 0
338 * @throws IllegalArgumentException if the array is null or the array index
339 * parameters are not valid
340 */
341 public static double variance(final double[] values, final int begin,
342 final int length) {
343 return VARIANCE.evaluate(values, begin, length);
344 }
345
346 /**
347 * Returns the variance of the entries in the specified portion of
348 * the input array, using the precomputed mean value. Returns
349 * <code>Double.NaN</code> if the designated subarray is empty.
350 * <p>
351 * See {@link org.apache.commons.math.stat.descriptive.moment.Variance} for
352 * details on the computing algorithm.</p>
353 * <p>
354 * The formula used assumes that the supplied mean value is the arithmetic
355 * mean of the sample data, not a known population parameter. This method
356 * is supplied only to save computation when the mean has already been
357 * computed.</p>
358 * <p>
359 * Returns 0 for a single-value (i.e. length = 1) sample.</p>
360 * <p>
361 * Throws <code>IllegalArgumentException</code> if the array is null or the
362 * array index parameters are not valid.</p>
363 *
364 * @param values the input array
365 * @param mean the precomputed mean value
366 * @param begin index of the first array element to include
367 * @param length the number of elements to include
368 * @return the variance of the values or Double.NaN if length = 0
369 * @throws IllegalArgumentException if the array is null or the array index
370 * parameters are not valid
371 */
372 public static double variance(final double[] values, final double mean,
373 final int begin, final int length) {
374 return VARIANCE.evaluate(values, mean, begin, length);
375 }
376
377 /**
378 * Returns the variance of the entries in the input array, using the
379 * precomputed mean value. Returns <code>Double.NaN</code> if the array
380 * is empty.
381 * <p>
382 * See {@link org.apache.commons.math.stat.descriptive.moment.Variance} for
383 * details on the computing algorithm.</p>
384 * <p>
385 * The formula used assumes that the supplied mean value is the arithmetic
386 * mean of the sample data, not a known population parameter. This method
387 * is supplied only to save computation when the mean has already been
388 * computed.</p>
389 * <p>
390 * Returns 0 for a single-value (i.e. length = 1) sample.</p>
391 * <p>
392 * Throws <code>IllegalArgumentException</code> if the array is null.</p>
393 *
394 * @param values the input array
395 * @param mean the precomputed mean value
396 * @return the variance of the values or Double.NaN if the array is empty
397 * @throws IllegalArgumentException if the array is null
398 */
399 public static double variance(final double[] values, final double mean) {
400 return VARIANCE.evaluate(values, mean);
401 }
402
403 /**
404 * Returns the maximum of the entries in the input array, or
405 * <code>Double.NaN</code> if the array is empty.
406 * <p>
407 * Throws <code>IllegalArgumentException</code> if the array is null.</p>
408 * <p>
409 * <ul>
410 * <li>The result is <code>NaN</code> iff all values are <code>NaN</code>
411 * (i.e. <code>NaN</code> values have no impact on the value of the statistic).</li>
412 * <li>If any of the values equals <code>Double.POSITIVE_INFINITY</code>,
413 * the result is <code>Double.POSITIVE_INFINITY.</code></li>
414 * </ul></p>
415 *
416 * @param values the input array
417 * @return the maximum of the values or Double.NaN if the array is empty
418 * @throws IllegalArgumentException if the array is null
419 */
420 public static double max(final double[] values) {
421 return MAX.evaluate(values);
422 }
423
424 /**
425 * Returns the maximum of the entries in the specified portion of
426 * the input array, or <code>Double.NaN</code> if the designated subarray
427 * is empty.
428 * <p>
429 * Throws <code>IllegalArgumentException</code> if the array is null or
430 * the array index parameters are not valid.</p>
431 * <p>
432 * <ul>
433 * <li>The result is <code>NaN</code> iff all values are <code>NaN</code>
434 * (i.e. <code>NaN</code> values have no impact on the value of the statistic).</li>
435 * <li>If any of the values equals <code>Double.POSITIVE_INFINITY</code>,
436 * the result is <code>Double.POSITIVE_INFINITY.</code></li>
437 * </ul></p>
438 *
439 * @param values the input array
440 * @param begin index of the first array element to include
441 * @param length the number of elements to include
442 * @return the maximum of the values or Double.NaN if length = 0
443 * @throws IllegalArgumentException if the array is null or the array index
444 * parameters are not valid
445 */
446 public static double max(final double[] values, final int begin,
447 final int length) {
448 return MAX.evaluate(values, begin, length);
449 }
450
451 /**
452 * Returns the minimum of the entries in the input array, or
453 * <code>Double.NaN</code> if the array is empty.
454 * <p>
455 * Throws <code>IllegalArgumentException</code> if the array is null.</p>
456 * <p>
457 * <ul>
458 * <li>The result is <code>NaN</code> iff all values are <code>NaN</code>
459 * (i.e. <code>NaN</code> values have no impact on the value of the statistic).</li>
460 * <li>If any of the values equals <code>Double.NEGATIVE_INFINITY</code>,
461 * the result is <code>Double.NEGATIVE_INFINITY.</code></li>
462 * </ul> </p>
463 *
464 * @param values the input array
465 * @return the minimum of the values or Double.NaN if the array is empty
466 * @throws IllegalArgumentException if the array is null
467 */
468 public static double min(final double[] values) {
469 return MIN.evaluate(values);
470 }
471
472 /**
473 * Returns the minimum of the entries in the specified portion of
474 * the input array, or <code>Double.NaN</code> if the designated subarray
475 * is empty.
476 * <p>
477 * Throws <code>IllegalArgumentException</code> if the array is null or
478 * the array index parameters are not valid.</p>
479 * <p>
480 * <ul>
481 * <li>The result is <code>NaN</code> iff all values are <code>NaN</code>
482 * (i.e. <code>NaN</code> values have no impact on the value of the statistic).</li>
483 * <li>If any of the values equals <code>Double.NEGATIVE_INFINITY</code>,
484 * the result is <code>Double.NEGATIVE_INFINITY.</code></li>
485 * </ul></p>
486 *
487 * @param values the input array
488 * @param begin index of the first array element to include
489 * @param length the number of elements to include
490 * @return the minimum of the values or Double.NaN if length = 0
491 * @throws IllegalArgumentException if the array is null or the array index
492 * parameters are not valid
493 */
494 public static double min(final double[] values, final int begin,
495 final int length) {
496 return MIN.evaluate(values, begin, length);
497 }
498
499 /**
500 * Returns an estimate of the <code>p</code>th percentile of the values
501 * in the <code>values</code> array.
502 * <p>
503 * <ul>
504 * <li>Returns <code>Double.NaN</code> if <code>values</code> has length
505 * <code>0</code></li></p>
506 * <li>Returns (for any value of <code>p</code>) <code>values[0]</code>
507 * if <code>values</code> has length <code>1</code></li>
508 * <li>Throws <code>IllegalArgumentException</code> if <code>values</code>
509 * is null or p is not a valid quantile value (p must be greater than 0
510 * and less than or equal to 100)</li>
511 * </ul></p>
512 * <p>
513 * See {@link org.apache.commons.math.stat.descriptive.rank.Percentile} for
514 * a description of the percentile estimation algorithm used.</p>
515 *
516 * @param values input array of values
517 * @param p the percentile value to compute
518 * @return the percentile value or Double.NaN if the array is empty
519 * @throws IllegalArgumentException if <code>values</code> is null
520 * or p is invalid
521 */
522 public static double percentile(final double[] values, final double p) {
523 return PERCENTILE.evaluate(values,p);
524 }
525
526 /**
527 * Returns an estimate of the <code>p</code>th percentile of the values
528 * in the <code>values</code> array, starting with the element in (0-based)
529 * position <code>begin</code> in the array and including <code>length</code>
530 * values.
531 * <p>
532 * <ul>
533 * <li>Returns <code>Double.NaN</code> if <code>length = 0</code></li>
534 * <li>Returns (for any value of <code>p</code>) <code>values[begin]</code>
535 * if <code>length = 1 </code></li>
536 * <li>Throws <code>IllegalArgumentException</code> if <code>values</code>
537 * is null , <code>begin</code> or <code>length</code> is invalid, or
538 * <code>p</code> is not a valid quantile value (p must be greater than 0
539 * and less than or equal to 100)</li>
540 * </ul></p>
541 * <p>
542 * See {@link org.apache.commons.math.stat.descriptive.rank.Percentile} for
543 * a description of the percentile estimation algorithm used.</p>
544 *
545 * @param values array of input values
546 * @param p the percentile to compute
547 * @param begin the first (0-based) element to include in the computation
548 * @param length the number of array elements to include
549 * @return the percentile value
550 * @throws IllegalArgumentException if the parameters are not valid or the
551 * input array is null
552 */
553 public static double percentile(final double[] values, final int begin,
554 final int length, final double p) {
555 return PERCENTILE.evaluate(values, begin, length, p);
556 }
557
558 /**
559 * Returns the sum of the (signed) differences between corresponding elements of the
560 * input arrays -- i.e., sum(sample1[i] - sample2[i]).
561 *
562 * @param sample1 the first array
563 * @param sample2 the second array
564 * @return sum of paired differences
565 * @throws DimensionMismatchException if the arrays do not have the same
566 * (positive) length.
567 * @throws NoDataException if the sample arrays are empty.
568 */
569 public static double sumDifference(final double[] sample1, final double[] sample2) {
570 int n = sample1.length;
571 if (n != sample2.length) {
572 throw new DimensionMismatchException(n, sample2.length);
573 }
574 if (n <= 0) {
575 throw new NoDataException(LocalizedFormats.INSUFFICIENT_DIMENSION);
576 }
577 double result = 0;
578 for (int i = 0; i < n; i++) {
579 result += sample1[i] - sample2[i];
580 }
581 return result;
582 }
583
584 /**
585 * Returns the mean of the (signed) differences between corresponding elements of the
586 * input arrays -- i.e., sum(sample1[i] - sample2[i]) / sample1.length.
587 *
588 * @param sample1 the first array
589 * @param sample2 the second array
590 * @return mean of paired differences
591 * @throws DimensionMismatchException if the arrays do not have the same
592 * (positive) length.
593 * @throws NoDataException if the sample arrays are empty.
594 */
595 public static double meanDifference(final double[] sample1, final double[] sample2) {
596 return sumDifference(sample1, sample2) / sample1.length;
597 }
598
599 /**
600 * Returns the variance of the (signed) differences between corresponding elements of the
601 * input arrays -- i.e., var(sample1[i] - sample2[i]).
602 *
603 * @param sample1 the first array
604 * @param sample2 the second array
605 * @param meanDifference the mean difference between corresponding entries
606 * @see #meanDifference(double[],double[])
607 * @return variance of paired differences
608 * @throws DimensionMismatchException if the arrays do not have the same
609 * length.
610 * @throws NumberIsTooSmallException if the arrays length is less than 2.
611 */
612 public static double varianceDifference(final double[] sample1,
613 final double[] sample2,
614 double meanDifference) {
615 double sum1 = 0d;
616 double sum2 = 0d;
617 double diff = 0d;
618 int n = sample1.length;
619 if (n != sample2.length) {
620 throw new DimensionMismatchException(n, sample2.length);
621 }
622 if (n < 2) {
623 throw new NumberIsTooSmallException(n, 2, true);
624 }
625 for (int i = 0; i < n; i++) {
626 diff = sample1[i] - sample2[i];
627 sum1 += (diff - meanDifference) *(diff - meanDifference);
628 sum2 += diff - meanDifference;
629 }
630 return (sum1 - (sum2 * sum2 / n)) / (n - 1);
631 }
632
633 /**
634 * Normalize (standardize) the series, so in the end it is having a mean of 0 and a standard deviation of 1.
635 *
636 * @param sample Sample to normalize.
637 * @return normalized (standardized) sample.
638 * @since 2.2
639 */
640 public static double[] normalize(final double[] sample) {
641 DescriptiveStatistics stats = new DescriptiveStatistics();
642
643 // Add the data from the series to stats
644 for (int i = 0; i < sample.length; i++) {
645 stats.addValue(sample[i]);
646 }
647
648 // Compute mean and standard deviation
649 double mean = stats.getMean();
650 double standardDeviation = stats.getStandardDeviation();
651
652 // initialize the standardizedSample, which has the same length as the sample
653 double[] standardizedSample = new double[sample.length];
654
655 for (int i = 0; i < sample.length; i++) {
656 // z = (x- mean)/standardDeviation
657 standardizedSample[i] = (sample[i] - mean) / standardDeviation;
658 }
659 return standardizedSample;
660 }
661 }