001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.commons.math4.stat;
018
019import java.util.List;
020
021import org.apache.commons.math4.exception.DimensionMismatchException;
022import org.apache.commons.math4.exception.MathIllegalArgumentException;
023import org.apache.commons.math4.exception.NoDataException;
024import org.apache.commons.math4.exception.NotPositiveException;
025import org.apache.commons.math4.exception.NullArgumentException;
026import org.apache.commons.math4.exception.NumberIsTooSmallException;
027import org.apache.commons.math4.exception.util.LocalizedFormats;
028import org.apache.commons.math4.stat.descriptive.DescriptiveStatistics;
029import org.apache.commons.math4.stat.descriptive.UnivariateStatistic;
030import org.apache.commons.math4.stat.descriptive.moment.GeometricMean;
031import org.apache.commons.math4.stat.descriptive.moment.Mean;
032import org.apache.commons.math4.stat.descriptive.moment.Variance;
033import org.apache.commons.math4.stat.descriptive.rank.Max;
034import org.apache.commons.math4.stat.descriptive.rank.Min;
035import org.apache.commons.math4.stat.descriptive.rank.Percentile;
036import org.apache.commons.math4.stat.descriptive.summary.Product;
037import org.apache.commons.math4.stat.descriptive.summary.Sum;
038import org.apache.commons.math4.stat.descriptive.summary.SumOfLogs;
039import org.apache.commons.math4.stat.descriptive.summary.SumOfSquares;
040
041/**
042 * StatUtils provides static methods for computing statistics based on data
043 * stored in double[] arrays.
044 */
045public final class StatUtils {
046
047    /** sum */
048    private static final UnivariateStatistic SUM = new Sum();
049
050    /** sumSq */
051    private static final UnivariateStatistic SUM_OF_SQUARES = new SumOfSquares();
052
053    /** prod */
054    private static final UnivariateStatistic PRODUCT = new Product();
055
056    /** sumLog */
057    private static final UnivariateStatistic SUM_OF_LOGS = new SumOfLogs();
058
059    /** min */
060    private static final UnivariateStatistic MIN = new Min();
061
062    /** max */
063    private static final UnivariateStatistic MAX = new Max();
064
065    /** mean */
066    private static final UnivariateStatistic MEAN = new Mean();
067
068    /** variance */
069    private static final Variance VARIANCE = new Variance();
070
071    /** percentile */
072    private static final Percentile PERCENTILE = new Percentile();
073
074    /** geometric mean */
075    private static final GeometricMean GEOMETRIC_MEAN = new GeometricMean();
076
077    /**
078     * Private Constructor
079     */
080    private StatUtils() {
081    }
082
083    /**
084     * Returns the sum of the values in the input array, or
085     * <code>Double.NaN</code> if the array is empty.
086     * <p>
087     * Throws <code>IllegalArgumentException</code> if the input array is null.
088     *
089     * @param values  array of values to sum
090     * @return the sum of the values or <code>Double.NaN</code> if the array is empty
091     * @throws MathIllegalArgumentException if the array is null
092     */
093    public static double sum(final double[] values) throws MathIllegalArgumentException {
094        return SUM.evaluate(values);
095    }
096
097    /**
098     * Returns the sum of the entries in the specified portion of
099     * the input array, or <code>Double.NaN</code> if the designated subarray is empty.
100     * <p>
101     * Throws <code>IllegalArgumentException</code> if the array is null.
102     *
103     * @param values the input array
104     * @param begin index of the first array element to include
105     * @param length the number of elements to include
106     * @return the sum of the values or Double.NaN if length = 0
107     * @throws MathIllegalArgumentException if the array is null or the array index
108     *  parameters are not valid
109     */
110    public static double sum(final double[] values, final int begin, final int length)
111        throws MathIllegalArgumentException {
112        return SUM.evaluate(values, begin, length);
113    }
114
115    /**
116     * Returns the sum of the squares of the entries in the input array, or
117     * <code>Double.NaN</code> if the array is empty.
118     * <p>
119     * Throws <code>IllegalArgumentException</code> if the array is null.
120     *
121     * @param values  input array
122     * @return the sum of the squared values or <code>Double.NaN</code> if the array is empty
123     * @throws MathIllegalArgumentException if the array is null
124     */
125    public static double sumSq(final double[] values) throws MathIllegalArgumentException {
126        return SUM_OF_SQUARES.evaluate(values);
127    }
128
129    /**
130     * Returns the sum of the squares of the entries in the specified portion of
131     * the input array, or <code>Double.NaN</code> if the designated subarray
132     * is empty.
133     * <p>
134     * Throws <code>IllegalArgumentException</code> if the array is null.
135     *
136     * @param values the input array
137     * @param begin index of the first array element to include
138     * @param length the number of elements to include
139     * @return the sum of the squares of the values or Double.NaN if length = 0
140     * @throws MathIllegalArgumentException if the array is null or the array index
141     *  parameters are not valid
142     */
143    public static double sumSq(final double[] values, final int begin, final int length)
144        throws MathIllegalArgumentException {
145        return SUM_OF_SQUARES.evaluate(values, begin, length);
146    }
147
148    /**
149     * Returns the product of the entries in the input array, or
150     * <code>Double.NaN</code> if the array is empty.
151     * <p>
152     * Throws <code>IllegalArgumentException</code> if the array is null.
153     *
154     * @param values the input array
155     * @return the product of the values or Double.NaN if the array is empty
156     * @throws MathIllegalArgumentException if the array is null
157     */
158    public static double product(final double[] values) throws MathIllegalArgumentException {
159        return PRODUCT.evaluate(values);
160    }
161
162    /**
163     * Returns the product of the entries in the specified portion of
164     * the input array, or <code>Double.NaN</code> if the designated subarray
165     * is empty.
166     * <p>
167     * Throws <code>IllegalArgumentException</code> if the array is null.
168     *
169     * @param values the input array
170     * @param begin index of the first array element to include
171     * @param length the number of elements to include
172     * @return the product of the values or Double.NaN if length = 0
173     * @throws MathIllegalArgumentException if the array is null or the array index
174     *  parameters are not valid
175     */
176    public static double product(final double[] values, final int begin, final int length)
177        throws MathIllegalArgumentException {
178        return PRODUCT.evaluate(values, begin, length);
179    }
180
181    /**
182     * Returns the sum of the natural logs of the entries in the input array, or
183     * <code>Double.NaN</code> if the array is empty.
184     * <p>
185     * Throws <code>IllegalArgumentException</code> if the array is null.
186     * <p>
187     * See {@link org.apache.commons.math4.stat.descriptive.summary.SumOfLogs}.
188     *
189     * @param values the input array
190     * @return the sum of the natural logs of the values or Double.NaN if the array is empty
191     * @throws MathIllegalArgumentException if the array is null
192     */
193    public static double sumLog(final double[] values) throws MathIllegalArgumentException {
194        return SUM_OF_LOGS.evaluate(values);
195    }
196
197    /**
198     * Returns the sum of the natural logs of the entries in the specified portion of
199     * the input array, or <code>Double.NaN</code> if the designated subarray is empty.
200     * <p>
201     * Throws <code>IllegalArgumentException</code> if the array is null.
202     * <p>
203     * See {@link org.apache.commons.math4.stat.descriptive.summary.SumOfLogs}.
204     *
205     * @param values the input array
206     * @param begin index of the first array element to include
207     * @param length the number of elements to include
208     * @return the sum of the natural logs of the values or Double.NaN if
209     * length = 0
210     * @throws MathIllegalArgumentException if the array is null or the array index
211     * parameters are not valid
212     */
213    public static double sumLog(final double[] values, final int begin, final int length)
214        throws MathIllegalArgumentException {
215        return SUM_OF_LOGS.evaluate(values, begin, length);
216    }
217
218    /**
219     * Returns the arithmetic mean of the entries in the input array, or
220     * <code>Double.NaN</code> if the array is empty.
221     * <p>
222     * Throws <code>IllegalArgumentException</code> if the array is null.
223     * <p>
224     * See {@link org.apache.commons.math4.stat.descriptive.moment.Mean} for
225     * details on the computing algorithm.
226     *
227     * @param values the input array
228     * @return the mean of the values or Double.NaN if the array is empty
229     * @throws MathIllegalArgumentException if the array is null
230     */
231    public static double mean(final double[] values) throws MathIllegalArgumentException {
232        return MEAN.evaluate(values);
233    }
234
235    /**
236     * Returns the arithmetic mean of the entries in the specified portion of
237     * the input array, or <code>Double.NaN</code> if the designated subarray
238     * is empty.
239     * <p>
240     * Throws <code>IllegalArgumentException</code> if the array is null.
241     * <p>
242     * See {@link org.apache.commons.math4.stat.descriptive.moment.Mean Mean} for
243     * details on the computing algorithm.
244     *
245     * @param values the input array
246     * @param begin index of the first array element to include
247     * @param length the number of elements to include
248     * @return the mean of the values or Double.NaN if length = 0
249     * @throws MathIllegalArgumentException if the array is null or the array index
250     * parameters are not valid
251     */
252    public static double mean(final double[] values, final int begin, final int length)
253        throws MathIllegalArgumentException {
254        return MEAN.evaluate(values, begin, length);
255    }
256
257    /**
258     * Returns the geometric mean of the entries in the input array, or
259     * <code>Double.NaN</code> if the array is empty.
260     * <p>
261     * Throws <code>IllegalArgumentException</code> if the array is null.
262     * <p>
263     * See {@link org.apache.commons.math4.stat.descriptive.moment.GeometricMean GeometricMean}
264     * for details on the computing algorithm.
265     *
266     * @param values the input array
267     * @return the geometric mean of the values or Double.NaN if the array is empty
268     * @throws MathIllegalArgumentException if the array is null
269     */
270    public static double geometricMean(final double[] values) throws MathIllegalArgumentException {
271        return GEOMETRIC_MEAN.evaluate(values);
272    }
273
274    /**
275     * Returns the geometric mean of the entries in the specified portion of
276     * the input array, or <code>Double.NaN</code> if the designated subarray
277     * is empty.
278     * <p>
279     * Throws <code>IllegalArgumentException</code> if the array is null.
280     * <p>
281     * See {@link org.apache.commons.math4.stat.descriptive.moment.GeometricMean GeometricMean}
282     * for details on the computing algorithm.
283     *
284     * @param values the input array
285     * @param begin index of the first array element to include
286     * @param length the number of elements to include
287     * @return the geometric mean of the values or Double.NaN if length = 0
288     * @throws MathIllegalArgumentException if the array is null or the array index
289     *  parameters are not valid
290     */
291    public static double geometricMean(final double[] values, final int begin, final int length)
292        throws MathIllegalArgumentException {
293        return GEOMETRIC_MEAN.evaluate(values, begin, length);
294    }
295
296    /**
297     * Returns the variance of the entries in the input array, or
298     * <code>Double.NaN</code> if the array is empty.
299     * <p>
300     * This method returns the bias-corrected sample variance (using {@code n - 1} in
301     * the denominator). Use {@link #populationVariance(double[])} for the non-bias-corrected
302     * population variance.
303     * <p>
304     * See {@link org.apache.commons.math4.stat.descriptive.moment.Variance Variance} for
305     * details on the computing algorithm.
306     * <p>
307     * Returns 0 for a single-value (i.e. length = 1) sample.
308     * <p>
309     * Throws <code>MathIllegalArgumentException</code> if the array is null.
310     *
311     * @param values the input array
312     * @return the variance of the values or Double.NaN if the array is empty
313     * @throws MathIllegalArgumentException if the array is null
314     */
315    public static double variance(final double[] values) throws MathIllegalArgumentException {
316        return VARIANCE.evaluate(values);
317    }
318
319    /**
320     * Returns the variance of the entries in the specified portion of
321     * the input array, or <code>Double.NaN</code> if the designated subarray
322     * is empty.
323     * <p>
324     * This method returns the bias-corrected sample variance (using {@code n - 1} in
325     * the denominator). Use {@link #populationVariance(double[], int, int)} for the non-bias-corrected
326     * population variance.
327     * <p>
328     * See {@link org.apache.commons.math4.stat.descriptive.moment.Variance Variance} for
329     * details on the computing algorithm.
330     * <p>
331     * Returns 0 for a single-value (i.e. length = 1) sample.
332     * <p>
333     * Throws <code>MathIllegalArgumentException</code> if the array is null or the
334     * array index parameters are not valid.
335     *
336     * @param values the input array
337     * @param begin index of the first array element to include
338     * @param length the number of elements to include
339     * @return the variance of the values or Double.NaN if length = 0
340     * @throws MathIllegalArgumentException if the array is null or the array index
341     *  parameters are not valid
342     */
343    public static double variance(final double[] values, final int begin, final int length)
344        throws MathIllegalArgumentException {
345        return VARIANCE.evaluate(values, begin, length);
346    }
347
348    /**
349     * Returns the variance of the entries in the specified portion of
350     * the input array, using the precomputed mean value.  Returns
351     * <code>Double.NaN</code> if the designated subarray is empty.
352     * <p>
353     * This method returns the bias-corrected sample variance (using {@code n - 1} in
354     * the denominator). Use {@link #populationVariance(double[], double, int, int)} for
355     * the non-bias-corrected population variance.
356     * <p>
357     * See {@link org.apache.commons.math4.stat.descriptive.moment.Variance Variance} for
358     * details on the computing algorithm.
359     * <p>
360     * The formula used assumes that the supplied mean value is the arithmetic
361     * mean of the sample data, not a known population parameter.  This method
362     * is supplied only to save computation when the mean has already been
363     * computed.
364     * <p>
365     * Returns 0 for a single-value (i.e. length = 1) sample.
366     * <p>
367     * Throws <code>MathIllegalArgumentException</code> if the array is null or the
368     * array index parameters are not valid.
369     *
370     * @param values the input array
371     * @param mean the precomputed mean value
372     * @param begin index of the first array element to include
373     * @param length the number of elements to include
374     * @return the variance of the values or Double.NaN if length = 0
375     * @throws MathIllegalArgumentException if the array is null or the array index
376     *  parameters are not valid
377     */
378    public static double variance(final double[] values, final double mean, final int begin, final int length)
379        throws MathIllegalArgumentException {
380        return VARIANCE.evaluate(values, mean, begin, length);
381    }
382
383    /**
384     * Returns the variance of the entries in the input array, using the
385     * precomputed mean value.  Returns <code>Double.NaN</code> if the array
386     * is empty.
387     * <p>
388     * This method returns the bias-corrected sample variance (using {@code n - 1} in
389     * the denominator).  Use {@link #populationVariance(double[], double)} for the
390     * non-bias-corrected population variance.
391     * <p>
392     * See {@link org.apache.commons.math4.stat.descriptive.moment.Variance Variance} for
393     * details on the computing algorithm.
394     * <p>
395     * The formula used assumes that the supplied mean value is the arithmetic
396     * mean of the sample data, not a known population parameter.  This method
397     * is supplied only to save computation when the mean has already been
398     * computed.
399     * <p>
400     * Returns 0 for a single-value (i.e. length = 1) sample.
401     * <p>
402     * Throws <code>MathIllegalArgumentException</code> if the array is null.
403     *
404     * @param values the input array
405     * @param mean the precomputed mean value
406     * @return the variance of the values or Double.NaN if the array is empty
407     * @throws MathIllegalArgumentException if the array is null
408     */
409    public static double variance(final double[] values, final double mean) throws MathIllegalArgumentException {
410        return VARIANCE.evaluate(values, mean);
411    }
412
413    /**
414     * Returns the <a href="http://en.wikibooks.org/wiki/Statistics/Summary/Variance">
415     * population variance</a> of the entries in the input array, or
416     * <code>Double.NaN</code> if the array is empty.
417     * <p>
418     * See {@link org.apache.commons.math4.stat.descriptive.moment.Variance Variance} for
419     * details on the formula and computing algorithm.
420     * <p>
421     * Returns 0 for a single-value (i.e. length = 1) sample.
422     * <p>
423     * Throws <code>MathIllegalArgumentException</code> if the array is null.
424     *
425     * @param values the input array
426     * @return the population variance of the values or Double.NaN if the array is empty
427     * @throws MathIllegalArgumentException if the array is null
428     */
429    public static double populationVariance(final double[] values) throws MathIllegalArgumentException {
430        return new Variance(false).evaluate(values);
431    }
432
433    /**
434     * Returns the <a href="http://en.wikibooks.org/wiki/Statistics/Summary/Variance">
435     * population variance</a> of the entries in the specified portion of
436     * the input array, or <code>Double.NaN</code> if the designated subarray
437     * is empty.
438     * <p>
439     * See {@link org.apache.commons.math4.stat.descriptive.moment.Variance Variance} for
440     * details on the computing algorithm.
441     * <p>
442     * Returns 0 for a single-value (i.e. length = 1) sample.
443     * <p>
444     * Throws <code>MathIllegalArgumentException</code> if the array is null or the
445     * array index parameters are not valid.
446     *
447     * @param values the input array
448     * @param begin index of the first array element to include
449     * @param length the number of elements to include
450     * @return the population variance of the values or Double.NaN if length = 0
451     * @throws MathIllegalArgumentException if the array is null or the array index
452     *  parameters are not valid
453     */
454    public static double populationVariance(final double[] values, final int begin, final int length)
455        throws MathIllegalArgumentException {
456        return new Variance(false).evaluate(values, begin, length);
457    }
458
459    /**
460     * Returns the <a href="http://en.wikibooks.org/wiki/Statistics/Summary/Variance">
461     * population variance</a> of the entries in the specified portion of
462     * the input array, using the precomputed mean value.  Returns
463     * <code>Double.NaN</code> if the designated subarray is empty.
464     * <p>
465     * See {@link org.apache.commons.math4.stat.descriptive.moment.Variance Variance} for
466     * details on the computing algorithm.
467     * <p>
468     * The formula used assumes that the supplied mean value is the arithmetic
469     * mean of the sample data, not a known population parameter.  This method
470     * is supplied only to save computation when the mean has already been
471     * computed.
472     * <p>
473     * Returns 0 for a single-value (i.e. length = 1) sample.
474     * <p>
475     * Throws <code>MathIllegalArgumentException</code> if the array is null or the
476     * array index parameters are not valid.
477     *
478     * @param values the input array
479     * @param mean the precomputed mean value
480     * @param begin index of the first array element to include
481     * @param length the number of elements to include
482     * @return the population variance of the values or Double.NaN if length = 0
483     * @throws MathIllegalArgumentException if the array is null or the array index
484     *  parameters are not valid
485     */
486    public static double populationVariance(final double[] values, final double mean,
487                                            final int begin, final int length)
488        throws MathIllegalArgumentException {
489        return new Variance(false).evaluate(values, mean, begin, length);
490    }
491
492    /**
493     * Returns the <a href="http://en.wikibooks.org/wiki/Statistics/Summary/Variance">
494     * population variance</a> of the entries in the input array, using the precomputed
495     * mean value. Returns <code>Double.NaN</code> if the array is empty.
496     * <p>
497     * See {@link org.apache.commons.math4.stat.descriptive.moment.Variance Variance} for
498     * details on the computing algorithm.
499     * <p>
500     * The formula used assumes that the supplied mean value is the arithmetic
501     * mean of the sample data, not a known population parameter. This method is
502     * supplied only to save computation when the mean has already been computed.
503     * <p>
504     * Returns 0 for a single-value (i.e. length = 1) sample.
505     * <p>
506     * Throws <code>MathIllegalArgumentException</code> if the array is null.
507     *
508     * @param values the input array
509     * @param mean the precomputed mean value
510     * @return the population variance of the values or Double.NaN if the array is empty
511     * @throws MathIllegalArgumentException if the array is null
512     */
513    public static double populationVariance(final double[] values, final double mean)
514        throws MathIllegalArgumentException {
515        return new Variance(false).evaluate(values, mean);
516    }
517
518    /**
519     * Returns the maximum of the entries in the input array, or
520     * <code>Double.NaN</code> if the array is empty.
521     * <p>
522     * Throws <code>MathIllegalArgumentException</code> if the array is null.
523     * <ul>
524     * <li>The result is <code>NaN</code> iff all values are <code>NaN</code>
525     * (i.e. <code>NaN</code> values have no impact on the value of the statistic).</li>
526     * <li>If any of the values equals <code>Double.POSITIVE_INFINITY</code>,
527     * the result is <code>Double.POSITIVE_INFINITY.</code></li>
528     * </ul>
529     *
530     * @param values the input array
531     * @return the maximum of the values or Double.NaN if the array is empty
532     * @throws MathIllegalArgumentException if the array is null
533     */
534    public static double max(final double[] values) throws MathIllegalArgumentException {
535        return MAX.evaluate(values);
536    }
537
538    /**
539     * Returns the maximum of the entries in the specified portion of the input array,
540     * or <code>Double.NaN</code> if the designated subarray is empty.
541     * <p>
542     * Throws <code>MathIllegalArgumentException</code> if the array is null or
543     * the array index parameters are not valid.
544     * <ul>
545     * <li>The result is <code>NaN</code> iff all values are <code>NaN</code>
546     * (i.e. <code>NaN</code> values have no impact on the value of the statistic).</li>
547     * <li>If any of the values equals <code>Double.POSITIVE_INFINITY</code>,
548     * the result is <code>Double.POSITIVE_INFINITY.</code></li>
549     * </ul>
550     *
551     * @param values the input array
552     * @param begin index of the first array element to include
553     * @param length the number of elements to include
554     * @return the maximum of the values or Double.NaN if length = 0
555     * @throws MathIllegalArgumentException if the array is null or the array index
556     *  parameters are not valid
557     */
558    public static double max(final double[] values, final int begin, final int length)
559        throws MathIllegalArgumentException {
560        return MAX.evaluate(values, begin, length);
561    }
562
563    /**
564     * Returns the minimum of the entries in the input array, or
565     * <code>Double.NaN</code> if the array is empty.
566     * <p>
567     * Throws <code>MathIllegalArgumentException</code> if the array is null.
568     * <ul>
569     * <li>The result is <code>NaN</code> iff all values are <code>NaN</code>
570     * (i.e. <code>NaN</code> values have no impact on the value of the statistic).</li>
571     * <li>If any of the values equals <code>Double.NEGATIVE_INFINITY</code>,
572     * the result is <code>Double.NEGATIVE_INFINITY.</code></li>
573     * </ul>
574     *
575     * @param values the input array
576     * @return the minimum of the values or Double.NaN if the array is empty
577     * @throws MathIllegalArgumentException if the array is null
578     */
579    public static double min(final double[] values) throws MathIllegalArgumentException {
580        return MIN.evaluate(values);
581    }
582
583    /**
584     * Returns the minimum of the entries in the specified portion of the input array,
585     * or <code>Double.NaN</code> if the designated subarray is empty.
586     * <p>
587     * Throws <code>MathIllegalArgumentException</code> if the array is null or
588     * the array index parameters are not valid.
589     * <ul>
590     * <li>The result is <code>NaN</code> iff all values are <code>NaN</code>
591     * (i.e. <code>NaN</code> values have no impact on the value of the statistic).</li>
592     * <li>If any of the values equals <code>Double.NEGATIVE_INFINITY</code>,
593     * the result is <code>Double.NEGATIVE_INFINITY.</code></li>
594     * </ul>
595     *
596     * @param values the input array
597     * @param begin index of the first array element to include
598     * @param length the number of elements to include
599     * @return the minimum of the values or Double.NaN if length = 0
600     * @throws MathIllegalArgumentException if the array is null or the array index
601     *  parameters are not valid
602     */
603    public static double min(final double[] values, final int begin, final int length)
604        throws MathIllegalArgumentException {
605        return MIN.evaluate(values, begin, length);
606    }
607
608    /**
609     * Returns an estimate of the <code>p</code>th percentile of the values
610     * in the <code>values</code> array.
611     * <ul>
612     * <li>Returns <code>Double.NaN</code> if <code>values</code> has length
613     *  <code>0</code></li>
614     * <li>Returns (for any value of <code>p</code>) <code>values[0]</code>
615     *  if <code>values</code> has length <code>1</code></li>
616     * <li>Throws <code>IllegalArgumentException</code> if <code>values</code>
617     *  is null  or p is not a valid quantile value (p must be greater than 0
618     *  and less than or equal to 100)</li>
619     * </ul>
620     * <p>
621     * See {@link org.apache.commons.math4.stat.descriptive.rank.Percentile Percentile}
622     * for a description of the percentile estimation algorithm used.
623     *
624     * @param values input array of values
625     * @param p the percentile value to compute
626     * @return the percentile value or Double.NaN if the array is empty
627     * @throws MathIllegalArgumentException if <code>values</code> is null or p is invalid
628     */
629    public static double percentile(final double[] values, final double p) throws MathIllegalArgumentException {
630        return PERCENTILE.evaluate(values,p);
631    }
632
633    /**
634     * Returns an estimate of the <code>p</code>th percentile of the values
635     * in the <code>values</code> array, starting with the element in (0-based)
636     * position <code>begin</code> in the array and including <code>length</code>
637     * values.
638     * <ul>
639     * <li>Returns <code>Double.NaN</code> if <code>length = 0</code></li>
640     * <li>Returns (for any value of <code>p</code>) <code>values[begin]</code>
641     *  if <code>length = 1 </code></li>
642     * <li>Throws <code>MathIllegalArgumentException</code> if <code>values</code>
643     *  is null, <code>begin</code> or <code>length</code> is invalid, or
644     *  <code>p</code> is not a valid quantile value (p must be greater than 0
645     *  and less than or equal to 100)</li>
646     * </ul>
647     * <p>
648     * See {@link org.apache.commons.math4.stat.descriptive.rank.Percentile Percentile}
649     * for a description of the percentile estimation algorithm used.
650     *
651     * @param values array of input values
652     * @param p the percentile to compute
653     * @param begin the first (0-based) element to include in the computation
654     * @param length the number of array elements to include
655     * @return the percentile value
656     * @throws MathIllegalArgumentException if the parameters are not valid or the input array is null
657     */
658    public static double percentile(final double[] values, final int begin, final int length, final double p)
659        throws MathIllegalArgumentException {
660        return PERCENTILE.evaluate(values, begin, length, p);
661    }
662
663    /**
664     * Returns the sum of the (signed) differences between corresponding elements of the
665     * input arrays -- i.e., sum(sample1[i] - sample2[i]).
666     *
667     * @param sample1  the first array
668     * @param sample2  the second array
669     * @return sum of paired differences
670     * @throws DimensionMismatchException if the arrays do not have the same (positive) length.
671     * @throws NoDataException if the sample arrays are empty.
672     */
673    public static double sumDifference(final double[] sample1, final double[] sample2)
674        throws DimensionMismatchException, NoDataException {
675
676        int n = sample1.length;
677        if (n != sample2.length) {
678            throw new DimensionMismatchException(n, sample2.length);
679        }
680        if (n <= 0) {
681            throw new NoDataException(LocalizedFormats.INSUFFICIENT_DIMENSION);
682        }
683        double result = 0;
684        for (int i = 0; i < n; i++) {
685            result += sample1[i] - sample2[i];
686        }
687        return result;
688    }
689
690    /**
691     * Returns the mean of the (signed) differences between corresponding elements of the
692     * input arrays -- i.e., sum(sample1[i] - sample2[i]) / sample1.length.
693     *
694     * @param sample1  the first array
695     * @param sample2  the second array
696     * @return mean of paired differences
697     * @throws DimensionMismatchException if the arrays do not have the same (positive) length.
698     * @throws NoDataException if the sample arrays are empty.
699     */
700    public static double meanDifference(final double[] sample1, final double[] sample2)
701        throws DimensionMismatchException, NoDataException {
702        return sumDifference(sample1, sample2) / sample1.length;
703    }
704
705    /**
706     * Returns the variance of the (signed) differences between corresponding elements of the
707     * input arrays -- i.e., var(sample1[i] - sample2[i]).
708     *
709     * @param sample1  the first array
710     * @param sample2  the second array
711     * @param meanDifference   the mean difference between corresponding entries
712     * @return variance of paired differences
713     * @throws DimensionMismatchException if the arrays do not have the same length.
714     * @throws NumberIsTooSmallException if the arrays length is less than 2.
715     * @see #meanDifference(double[],double[])
716     */
717    public static double varianceDifference(final double[] sample1, final double[] sample2, double meanDifference)
718        throws DimensionMismatchException, NumberIsTooSmallException {
719
720        double sum1 = 0d;
721        double sum2 = 0d;
722        double diff = 0d;
723        int n = sample1.length;
724        if (n != sample2.length) {
725            throw new DimensionMismatchException(n, sample2.length);
726        }
727        if (n < 2) {
728            throw new NumberIsTooSmallException(n, 2, true);
729        }
730        for (int i = 0; i < n; i++) {
731            diff = sample1[i] - sample2[i];
732            sum1 += (diff - meanDifference) *(diff - meanDifference);
733            sum2 += diff - meanDifference;
734        }
735        return (sum1 - (sum2 * sum2 / n)) / (n - 1);
736    }
737
738    /**
739     * Normalize (standardize) the sample, so it is has a mean of 0 and a standard deviation of 1.
740     *
741     * @param sample Sample to normalize.
742     * @return normalized (standardized) sample.
743     * @since 2.2
744     */
745    public static double[] normalize(final double[] sample) {
746        DescriptiveStatistics stats = new DescriptiveStatistics();
747
748        // Add the data from the series to stats
749        for (int i = 0; i < sample.length; i++) {
750            stats.addValue(sample[i]);
751        }
752
753        // Compute mean and standard deviation
754        double mean = stats.getMean();
755        double standardDeviation = stats.getStandardDeviation();
756
757        // initialize the standardizedSample, which has the same length as the sample
758        double[] standardizedSample = new double[sample.length];
759
760        for (int i = 0; i < sample.length; i++) {
761            // z = (x- mean)/standardDeviation
762            standardizedSample[i] = (sample[i] - mean) / standardDeviation;
763        }
764        return standardizedSample;
765    }
766
767    /**
768     * Returns the sample mode(s).
769     * <p>
770     * The mode is the most frequently occurring value in the sample.
771     * If there is a unique value with maximum frequency, this value is returned
772     * as the only element of the output array. Otherwise, the returned array
773     * contains the maximum frequency elements in increasing order.
774     * <p>
775     * For example, if {@code sample} is {0, 12, 5, 6, 0, 13, 5, 17},
776     * the returned array will have length two, with 0 in the first element and
777     * 5 in the second.
778     * <p>
779     * NaN values are ignored when computing the mode - i.e., NaNs will never
780     * appear in the output array.  If the sample includes only NaNs or has
781     * length 0, an empty array is returned.
782     *
783     * @param sample input data
784     * @return array of array of the most frequently occurring element(s) sorted in ascending order.
785     * @throws MathIllegalArgumentException if the indices are invalid or the array is null
786     * @since 3.3
787     */
788    public static double[] mode(double[] sample) throws MathIllegalArgumentException {
789        if (sample == null) {
790            throw new NullArgumentException(LocalizedFormats.INPUT_ARRAY);
791        }
792        return getMode(sample, 0, sample.length);
793    }
794
795    /**
796     * Returns the sample mode(s).
797     * <p>
798     * The mode is the most frequently occurring value in the sample.
799     * If there is a unique value with maximum frequency, this value is returned
800     * as the only element of the output array. Otherwise, the returned array
801     * contains the maximum frequency elements in increasing order.
802     * <p>
803     * For example, if {@code sample} is {0, 12, 5, 6, 0, 13, 5, 17},
804     * the returned array will have length two, with 0 in the first element and
805     * 5 in the second.
806     * <p>
807     * NaN values are ignored when computing the mode - i.e., NaNs will never
808     * appear in the output array.  If the sample includes only NaNs or has
809     * length 0, an empty array is returned.
810     *
811     * @param sample input data
812     * @param begin index (0-based) of the first array element to include
813     * @param length the number of elements to include
814     * @return array of array of the most frequently occurring element(s) sorted in ascending order.
815     * @throws MathIllegalArgumentException if the indices are invalid or the array is null
816     * @since 3.3
817     */
818    public static double[] mode(double[] sample, final int begin, final int length) {
819        if (sample == null) {
820            throw new NullArgumentException(LocalizedFormats.INPUT_ARRAY);
821        }
822
823        if (begin < 0) {
824            throw new NotPositiveException(LocalizedFormats.START_POSITION, Integer.valueOf(begin));
825        }
826
827        if (length < 0) {
828            throw new NotPositiveException(LocalizedFormats.LENGTH, Integer.valueOf(length));
829        }
830
831        return getMode(sample, begin, length);
832    }
833
834    /**
835     * Private helper method.
836     * Assumes parameters have been validated.
837     * @param values input data
838     * @param begin index (0-based) of the first array element to include
839     * @param length the number of elements to include
840     * @return array of array of the most frequently occurring element(s) sorted in ascending order.
841     */
842    private static double[] getMode(double[] values, final int begin, final int length) {
843        // Add the values to the frequency table
844        Frequency<Double> freq = new Frequency<>();
845        for (int i = begin; i < begin + length; i++) {
846            final double value = values[i];
847            if (!Double.isNaN(value)) {
848                freq.addValue(Double.valueOf(value));
849            }
850        }
851        List<Double> list = freq.getMode();
852        // Convert the list to an array of primitive double
853        return list.stream().mapToDouble(Double::doubleValue).toArray();
854    }
855
856}