Source code

001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      https://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.commons.statistics.descriptive;
018
019import java.util.Arrays;
020import java.util.Objects;
021import java.util.function.IntToDoubleFunction;
022import java.util.function.IntToLongFunction;
023import org.apache.commons.numbers.arrays.Selection;
024
025/**
026 * Provides quantile computation.
027 *
028 * <p>For values of length {@code n}:
029 * <ul>
030 * <li>The result is {@code NaN} if {@code n = 0}.</li>
031 * <li>The result is {@code values[0]} if {@code n = 1}.</li>
032 * <li>Otherwise the result is computed using the {@link EstimationMethod}.</li>
033 * </ul>
034 *
035 * <p>Computation of multiple quantiles will handle duplicate and unordered
036 * probabilities. Passing ordered probabilities is recommended if the order is already
037 * known as this can improve efficiency; for example using uniform spacing through the
038 * array data, or to identify extreme values from the data such as {@code [0.001, 0.999]}.
039 *
040 * <p>This implementation respects the ordering imposed by
041 * {@link Double#compare(double, double)} for {@code NaN} values. If a {@code NaN} occurs
042 * in the selected positions in the fully sorted values then the result is {@code NaN}.
043 *
044 * <p>The {@link NaNPolicy} can be used to change the behaviour on {@code NaN} values.
045 *
046 * <p>Instances of this class are immutable and thread-safe.
047 *
048 * <p><strong>Support for {@code long} arrays</strong>
049 *
050 * <p>The result on {@code long} values can be returned as a {@code double} or
051 * a {@code long} using a {@link StatisticResult}.
052 *
053 * <p>The {@code double} result is computed within 1 ULP of the exact result. In some
054 * cases this may be outside the range defined by the minimum and maximum of the input
055 * array following rounding to a 53-bit floating point representation. For example a
056 * quantile of an array containing only {@link Long#MAX_VALUE} as a {@code double} is
057 * 2<sup>63</sup>, which is the closest representation of 2<sup>63</sup> - 1.
058 *
059 * <p>The {@code long} result is returned using the nearest whole number.
060 * In the event of ties the result is rounded towards positive infinity.
061 * This value will always be within the range defined by the minimum and maximum
062 * of the input array. Due to interpolation it may be a value not observed in
063 * the input values.
064 *
065 * <p>Interpolation between two {@code long} values requires extended precision
066 * floating-point arithmetic. This can be avoided using a discontinuous {@link EstimationMethod}.
067 * In this case the {@code long} quantile will be a value observed in the input values.
068 *
069 * <p>If the array length {@code n} is zero the result as a {@code double} is
070 * {@code NaN} and the result as a {@code long} will raise an {@link ArithmeticException}.
071 *
072 * <p>Multiple quantile results required as only one of the primitive values can be converted
073 * to a primitive array using a stream, for example:
074 *
075 * <pre>{@code
076 * long[] values = ...
077 * double[] p = Quantile.probabilities(10);
078 * Quantile q = Quantile.withDefaults();
079 * long[] result = Arrays.stream(q.evaluate(values, p))
080 *                       .mapToLong(StatisticResult::getAsLong)
081 *                       .toArray();
082 * }</pre>
083 *
084 * @see #with(NaNPolicy)
085 * @see <a href="https://en.wikipedia.org/wiki/Quantile">Quantile (Wikipedia)</a>
086 * @since 1.1
087 */
088public final class Quantile {
089    /** Message when the probability is not in the range {@code [0, 1]}. */
090    private static final String INVALID_PROBABILITY = "Invalid probability: ";
091    /** Message when no probabilities are provided for the varargs method. */
092    private static final String NO_PROBABILITIES_SPECIFIED = "No probabilities specified";
093    /** Message when the size is not valid. */
094    private static final String INVALID_SIZE = "Invalid size: ";
095    /** Message when the number of probabilities in a range is not valid. */
096    private static final String INVALID_NUMBER_OF_PROBABILITIES = "Invalid number of probabilities: ";
097
098    /** Default instance. Method 8 is recommended by Hyndman and Fan. */
099    private static final Quantile DEFAULT = new Quantile(false, NaNPolicy.INCLUDE, EstimationMethod.HF8);
100
101    /** Flag to indicate if the data should be copied. */
102    private final boolean copy;
103    /** NaN policy for floating point data. */
104    private final NaNPolicy nanPolicy;
105    /** Transformer for NaN data. */
106    private final NaNTransformer nanTransformer;
107    /** Estimation type used to determine the value from the quantile. */
108    private final EstimationMethod estimationType;
109
110    /**
111     * @param copy Flag to indicate if the data should be copied.
112     * @param nanPolicy NaN policy.
113     * @param estimationType Estimation type used to determine the value from the quantile.
114     */
115    private Quantile(boolean copy, NaNPolicy nanPolicy, EstimationMethod estimationType) {
116        this.copy = copy;
117        this.nanPolicy = nanPolicy;
118        this.estimationType = estimationType;
119        nanTransformer = NaNTransformers.createNaNTransformer(nanPolicy, copy);
120    }
121
122    /**
123     * Return a new instance with the default options.
124     *
125     * <ul>
126     * <li>{@linkplain #withCopy(boolean) Copy = false}</li>
127     * <li>{@linkplain #with(NaNPolicy) NaN policy = include}</li>
128     * <li>{@linkplain #with(EstimationMethod) Estimation method = HF8}</li>
129     * </ul>
130     *
131     * <p>Note: The default options configure for processing in-place and including
132     * {@code NaN} values in the data. This is the most efficient mode and has the
133     * smallest memory consumption.
134     *
135     * @return the quantile implementation
136     * @see #withCopy(boolean)
137     * @see #with(NaNPolicy)
138     * @see #with(EstimationMethod)
139     */
140    public static Quantile withDefaults() {
141        return DEFAULT;
142    }
143
144    /**
145     * Return an instance with the configured copy behaviour. If {@code false} then
146     * the input array will be modified by the call to evaluate the quantiles; otherwise
147     * the computation uses a copy of the data.
148     *
149     * @param v Value.
150     * @return an instance
151     */
152    public Quantile withCopy(boolean v) {
153        return new Quantile(v, nanPolicy, estimationType);
154    }
155
156    /**
157     * Return an instance with the configured {@link NaNPolicy}.
158     *
159     * <p>Note: This implementation respects the ordering imposed by
160     * {@link Double#compare(double, double)} for {@code NaN} values: {@code NaN} is
161     * considered greater than all other values, and all {@code NaN} values are equal. The
162     * {@link NaNPolicy} changes the computation of the statistic in the presence of
163     * {@code NaN} values.
164     *
165     * <ul>
166     * <li>{@link NaNPolicy#INCLUDE}: {@code NaN} values are moved to the end of the data;
167     * the size of the data <em>includes</em> the {@code NaN} values and the quantile will be
168     * {@code NaN} if any value used for quantile interpolation is {@code NaN}.</li>
169     * <li>{@link NaNPolicy#EXCLUDE}: {@code NaN} values are moved to the end of the data;
170     * the size of the data <em>excludes</em> the {@code NaN} values and the quantile will
171     * never be {@code NaN} for non-zero size. If all data are {@code NaN} then the size is zero
172     * and the result is {@code NaN}.</li>
173     * <li>{@link NaNPolicy#ERROR}: An exception is raised if the data contains {@code NaN}
174     * values.</li>
175     * </ul>
176     *
177     * <p>Note that the result is identical for all policies if no {@code NaN} values are present.
178     *
179     * @param v Value.
180     * @return an instance
181     */
182    public Quantile with(NaNPolicy v) {
183        return new Quantile(copy, Objects.requireNonNull(v), estimationType);
184    }
185
186    /**
187     * Return an instance with the configured {@link EstimationMethod}.
188     *
189     * @param v Value.
190     * @return an instance
191     */
192    public Quantile with(EstimationMethod v) {
193        return new Quantile(copy, nanPolicy, Objects.requireNonNull(v));
194    }
195
196    /**
197     * Generate {@code n} evenly spaced probabilities in the range {@code [0, 1]}.
198     *
199     * <pre>
200     * 1/(n + 1), 2/(n + 1), ..., n/(n + 1)
201     * </pre>
202     *
203     * @param n Number of probabilities.
204     * @return the probabilities
205     * @throws IllegalArgumentException if {@code n < 1}
206     */
207    public static double[] probabilities(int n) {
208        checkNumberOfProbabilities(n);
209        final double c1 = n + 1.0;
210        final double[] p = new double[n];
211        for (int i = 0; i < n; i++) {
212            p[i] = (i + 1.0) / c1;
213        }
214        return p;
215    }
216
217    /**
218     * Generate {@code n} evenly spaced probabilities in the range {@code [p1, p2]}.
219     *
220     * <pre>
221     * w = p2 - p1
222     * p1 + w/(n + 1), p1 + 2w/(n + 1), ..., p1 + nw/(n + 1)
223     * </pre>
224     *
225     * @param n Number of probabilities.
226     * @param p1 Lower probability.
227     * @param p2 Upper probability.
228     * @return the probabilities
229     * @throws IllegalArgumentException if {@code n < 1}; if the probabilities are not in the
230     * range {@code [0, 1]}; or {@code p2 <= p1}.
231     */
232    public static double[] probabilities(int n, double p1, double p2) {
233        checkProbability(p1);
234        checkProbability(p2);
235        if (p2 <= p1) {
236            throw new IllegalArgumentException("Invalid range: [" + p1 + ", " + p2 + "]");
237        }
238        final double[] p = probabilities(n);
239        for (int i = 0; i < n; i++) {
240            p[i] = (1 - p[i]) * p1 + p[i] * p2;
241        }
242        return p;
243    }
244
245    /**
246     * Evaluate the {@code p}-th quantile of the values.
247     *
248     * <p>Note: This method may partially sort the input values if not configured to
249     * {@link #withCopy(boolean) copy} the input data.
250     *
251     * <p><strong>Performance</strong>
252     *
253     * <p>It is not recommended to use this method for repeat calls for different quantiles
254     * within the same values. The {@link #evaluate(double[], double...)} method should be used
255     * which provides better performance.
256     *
257     * @param values Values.
258     * @param p Probability for the quantile to compute.
259     * @return the quantile
260     * @throws IllegalArgumentException if the probability {@code p} is not in the range {@code [0, 1]};
261     * or if the values contain NaN and the configuration is {@link NaNPolicy#ERROR}
262     * @see #evaluate(double[], double...)
263     * @see #with(NaNPolicy)
264     */
265    public double evaluate(double[] values, double p) {
266        return compute(values, 0, values.length, p);
267    }
268
269    /**
270     * Evaluate the {@code p}-th quantile of the specified range of values.
271     *
272     * <p>Note: This method may partially sort the input values if not configured to
273     * {@link #withCopy(boolean) copy} the input data.
274     *
275     * <p><strong>Performance</strong>
276     *
277     * <p>It is not recommended to use this method for repeat calls for different quantiles
278     * within the same values. The {@link #evaluateRange(double[], int, int, double...)} method should be used
279     * which provides better performance.
280     *
281     * @param values Values.
282     * @param from Inclusive start of the range.
283     * @param to Exclusive end of the range.
284     * @param p Probability for the quantile to compute.
285     * @return the quantile
286     * @throws IllegalArgumentException if the probability {@code p} is not in the range {@code [0, 1]};
287     * or if the values contain NaN and the configuration is {@link NaNPolicy#ERROR}
288     * @throws IndexOutOfBoundsException if the sub-range is out of bounds
289     * @see #evaluateRange(double[], int, int, double...)
290     * @see #with(NaNPolicy)
291     * @since 1.2
292     */
293    public double evaluateRange(double[] values, int from, int to, double p) {
294        Statistics.checkFromToIndex(from, to, values.length);
295        return compute(values, from, to, p);
296    }
297
298    /**
299     * Compute the {@code p}-th quantile of the specified range of values.
300     *
301     * @param values Values.
302     * @param from Inclusive start of the range.
303     * @param to Exclusive end of the range.
304     * @param p Probability for the quantile to compute.
305     * @return the quantile
306     * @throws IllegalArgumentException if the probability {@code p} is not in the range {@code [0, 1]}
307     */
308    private double compute(double[] values, int from, int to, double p) {
309        checkProbability(p);
310        // Floating-point data handling
311        final int[] bounds = new int[2];
312        final double[] x = nanTransformer.apply(values, from, to, bounds);
313        final int start = bounds[0];
314        final int end = bounds[1];
315        final int n = end - start;
316        // Special cases
317        if (n <= 1) {
318            return n == 0 ? Double.NaN : x[start];
319        }
320
321        final double pos = estimationType.index(p, n);
322        final int ip = (int) pos;
323        final int i = start + ip;
324
325        // Partition and compute
326        if (pos > ip) {
327            Selection.select(x, start, end, new int[] {i, i + 1});
328            return Interpolation.interpolate(x[i], x[i + 1], pos - ip);
329        }
330        Selection.select(x, start, end, i);
331        return x[i];
332    }
333
334    /**
335     * Evaluate the {@code p}-th quantiles of the values.
336     *
337     * <p>Note: This method may partially sort the input values if not configured to
338     * {@link #withCopy(boolean) copy} the input data.
339     *
340     * @param values Values.
341     * @param p Probabilities for the quantiles to compute.
342     * @return the quantiles
343     * @throws IllegalArgumentException if any probability {@code p} is not in the range {@code [0, 1]};
344     * no probabilities are specified; or if the values contain NaN and the configuration is {@link NaNPolicy#ERROR}
345     * @see #with(NaNPolicy)
346     */
347    public double[] evaluate(double[] values, double... p) {
348        return compute(values, 0, values.length, p);
349    }
350
351    /**
352     * Evaluate the {@code p}-th quantiles of the specified range of values.
353     *
354     * <p>Note: This method may partially sort the input values if not configured to
355     * {@link #withCopy(boolean) copy} the input data.
356     *
357     * @param values Values.
358     * @param from Inclusive start of the range.
359     * @param to Exclusive end of the range.
360     * @param p Probabilities for the quantiles to compute.
361     * @return the quantiles
362     * @throws IllegalArgumentException if any probability {@code p} is not in the range {@code [0, 1]};
363     * no probabilities are specified; or if the values contain NaN and the configuration is {@link NaNPolicy#ERROR}
364     * @throws IndexOutOfBoundsException if the sub-range is out of bounds
365     * @see #with(NaNPolicy)
366     * @since 1.2
367     */
368    public double[] evaluateRange(double[] values, int from, int to, double... p) {
369        Statistics.checkFromToIndex(from, to, values.length);
370        return compute(values, from, to, p);
371    }
372
373    /**
374     * Compute the {@code p}-th quantiles of the specified range of values.
375     *
376     * @param values Values.
377     * @param from Inclusive start of the range.
378     * @param to Exclusive end of the range.
379     * @param p Probabilities for the quantiles to compute.
380     * @return the quantiles
381     * @throws IllegalArgumentException if any probability {@code p} is not in the range {@code [0, 1]};
382     * or no probabilities are specified.
383     */
384    private double[] compute(double[] values, int from, int to, double... p) {
385        checkProbabilities(p);
386        // Floating-point data handling
387        final int[] bounds = new int[2];
388        final double[] x = nanTransformer.apply(values, from, to, bounds);
389        final int start = bounds[0];
390        final int end = bounds[1];
391        final int n = end - start;
392        // Special cases
393        final double[] q = new double[p.length];
394        if (n <= 1) {
395            Arrays.fill(q, n == 0 ? Double.NaN : x[start]);
396            return q;
397        }
398
399        // Collect interpolation positions. We use the output q as storage.
400        final int[] indices = computeIndices(n, p, q, start);
401
402        // Partition
403        Selection.select(x, start, end, indices);
404
405        // Compute
406        for (int k = 0; k < p.length; k++) {
407            // ip in [0, n); i in [start, end)
408            final int ip = (int) q[k];
409            final int i = start + ip;
410            if (q[k] > ip) {
411                q[k] = Interpolation.interpolate(x[i], x[i + 1], q[k] - ip);
412            } else {
413                q[k] = x[i];
414            }
415        }
416        return q;
417    }
418
419    /**
420     * Evaluate the {@code p}-th quantile of the values.
421     *
422     * <p>Note: This method may partially sort the input values if not configured to
423     * {@link #withCopy(boolean) copy} the input data.
424     *
425     * <p><strong>Performance</strong>
426     *
427     * <p>It is not recommended to use this method for repeat calls for different quantiles
428     * within the same values. The {@link #evaluate(int[], double...)} method should be used
429     * which provides better performance.
430     *
431     * @param values Values.
432     * @param p Probability for the quantile to compute.
433     * @return the quantile
434     * @throws IllegalArgumentException if the probability {@code p} is not in the range {@code [0, 1]}
435     * @see #evaluate(int[], double...)
436     */
437    public double evaluate(int[] values, double p) {
438        return compute(values, 0, values.length, p);
439    }
440
441    /**
442     * Evaluate the {@code p}-th quantile of the specified range of values.
443     *
444     * <p>Note: This method may partially sort the input values if not configured to
445     * {@link #withCopy(boolean) copy} the input data.
446     *
447     * <p><strong>Performance</strong>
448     *
449     * <p>It is not recommended to use this method for repeat calls for different quantiles
450     * within the same values. The {@link #evaluateRange(int[], int, int, double...)} method should be used
451     * which provides better performance.
452     *
453     * @param values Values.
454     * @param from Inclusive start of the range.
455     * @param to Exclusive end of the range.
456     * @param p Probability for the quantile to compute.
457     * @return the quantile
458     * @throws IllegalArgumentException if the probability {@code p} is not in the range {@code [0, 1]}
459     * @throws IndexOutOfBoundsException if the sub-range is out of bounds
460     * @see #evaluateRange(int[], int, int, double...)
461     * @since 1.2
462     */
463    public double evaluateRange(int[] values, int from, int to, double p) {
464        Statistics.checkFromToIndex(from, to, values.length);
465        return compute(values, from, to, p);
466    }
467
468    /**
469     * Compute the {@code p}-th quantile of the specified range of values.
470     *
471     * @param values Values.
472     * @param from Inclusive start of the range.
473     * @param to Exclusive end of the range.
474     * @param p Probability for the quantile to compute.
475     * @return the quantile
476     * @throws IllegalArgumentException if the probability {@code p} is not in the range {@code [0, 1]}
477     */
478    private double compute(int[] values, int from, int to, double p) {
479        checkProbability(p);
480        final int n = to - from;
481        // Special cases
482        if (n <= 1) {
483            return n == 0 ? Double.NaN : values[from];
484        }
485
486        // Create the range
487        final int[] x;
488        final int start;
489        final int end;
490        if (copy) {
491            x = Statistics.copy(values, from, to);
492            start = 0;
493            end = n;
494        } else {
495            x = values;
496            start = from;
497            end = to;
498        }
499
500        final double pos = estimationType.index(p, n);
501        final int ip = (int) pos;
502        final int i = start + ip;
503
504        // Partition and compute
505        if (pos > ip) {
506            Selection.select(x, start, end, new int[] {i, i + 1});
507            return Interpolation.interpolate((double) x[i], (double) x[i + 1], pos - ip);
508        }
509        Selection.select(x, start, end, i);
510        return x[i];
511    }
512
513    /**
514     * Evaluate the {@code p}-th quantiles of the values.
515     *
516     * <p>Note: This method may partially sort the input values if not configured to
517     * {@link #withCopy(boolean) copy} the input data.
518     *
519     * @param values Values.
520     * @param p Probabilities for the quantiles to compute.
521     * @return the quantiles
522     * @throws IllegalArgumentException if any probability {@code p} is not in the range {@code [0, 1]};
523     * or no probabilities are specified.
524     */
525    public double[] evaluate(int[] values, double... p) {
526        return compute(values, 0, values.length, p);
527    }
528
529    /**
530     * Evaluate the {@code p}-th quantiles of the specified range of values..
531     *
532     * <p>Note: This method may partially sort the input values if not configured to
533     * {@link #withCopy(boolean) copy} the input data.
534     *
535     * @param values Values.
536     * @param from Inclusive start of the range.
537     * @param to Exclusive end of the range.
538     * @param p Probabilities for the quantiles to compute.
539     * @return the quantiles
540     * @throws IllegalArgumentException if any probability {@code p} is not in the range {@code [0, 1]};
541     * or no probabilities are specified.
542     * @throws IndexOutOfBoundsException if the sub-range is out of bounds
543     * @since 1.2
544     */
545    public double[] evaluateRange(int[] values, int from, int to, double... p) {
546        Statistics.checkFromToIndex(from, to, values.length);
547        return compute(values, from, to, p);
548    }
549
550    /**
551     * Evaluate the {@code p}-th quantiles of the specified range of values..
552     *
553     * <p>Note: This method may partially sort the input values if not configured to
554     * {@link #withCopy(boolean) copy} the input data.
555     *
556     * @param values Values.
557     * @param from Inclusive start of the range.
558     * @param to Exclusive end of the range.
559     * @param p Probabilities for the quantiles to compute.
560     * @return the quantiles
561     * @throws IllegalArgumentException if any probability {@code p} is not in the range {@code [0, 1]};
562     * or no probabilities are specified.
563     */
564    private double[] compute(int[] values, int from, int to, double... p) {
565        checkProbabilities(p);
566        final int n = to - from;
567        // Special cases
568        final double[] q = new double[p.length];
569        if (n <= 1) {
570            Arrays.fill(q, n == 0 ? Double.NaN : values[from]);
571            return q;
572        }
573
574        // Create the range
575        final int[] x;
576        final int start;
577        final int end;
578        if (copy) {
579            x = Statistics.copy(values, from, to);
580            start = 0;
581            end = n;
582        } else {
583            x = values;
584            start = from;
585            end = to;
586        }
587
588        // Collect interpolation positions. We use the output q as storage.
589        final int[] indices = computeIndices(n, p, q, start);
590
591        // Partition
592        Selection.select(x, start, end, indices);
593
594        // Compute
595        for (int k = 0; k < p.length; k++) {
596            // ip in [0, n); i in [start, end)
597            final int ip = (int) q[k];
598            final int i = start + ip;
599            if (q[k] > ip) {
600                q[k] = Interpolation.interpolate((double) x[i], (double) x[i + 1], q[k] - ip);
601            } else {
602                q[k] = x[i];
603            }
604        }
605        return q;
606    }
607
608    /**
609     * Evaluate the {@code p}-th quantile of the values.
610     *
611     * <p>Note: This method may partially sort the input values if not configured to
612     * {@link #withCopy(boolean) copy} the input data.
613     *
614     * <p><strong>Performance</strong>
615     *
616     * <p>It is not recommended to use this method for repeat calls for different
617     * quantiles within the same values. The {@link #evaluate(long[], double...)} method
618     * should be used which provides better performance.
619     *
620     * @param values Values.
621     * @param p Probability for the quantile to compute.
622     * @return the quantile
623     * @throws IllegalArgumentException if the probability {@code p} is not in the range
624     * {@code [0, 1]}
625     * @see #evaluate(long[], double...)
626     * @since 1.3
627     */
628    public StatisticResult evaluate(long[] values, double p) {
629        return compute(values, 0, values.length, p);
630    }
631
632    /**
633     * Evaluate the {@code p}-th quantile of the specified range of values.
634     *
635     * <p>Note: This method may partially sort the input values if not configured to
636     * {@link #withCopy(boolean) copy} the input data.
637     *
638     * <p><strong>Performance</strong>
639     *
640     * <p>It is not recommended to use this method for repeat calls for different quantiles
641     * within the same values. The {@link #evaluateRange(long[], int, int, double...)} method should be used
642     * which provides better performance.
643     *
644     * @param values Values.
645     * @param from Inclusive start of the range.
646     * @param to Exclusive end of the range.
647     * @param p Probability for the quantile to compute.
648     * @return the quantile
649     * @throws IllegalArgumentException if the probability {@code p} is not in the range {@code [0, 1]}
650     * @throws IndexOutOfBoundsException if the sub-range is out of bounds
651     * @see #evaluateRange(long[], int, int, double...)
652     * @since 1.3
653     */
654    public StatisticResult evaluateRange(long[] values, int from, int to, double p) {
655        Statistics.checkFromToIndex(from, to, values.length);
656        return compute(values, from, to, p);
657    }
658
659    /**
660     * Compute the {@code p}-th quantile of the specified range of values.
661     *
662     * @param values Values.
663     * @param from Inclusive start of the range.
664     * @param to Exclusive end of the range.
665     * @param p Probability for the quantile to compute.
666     * @return the quantile
667     * @throws IllegalArgumentException if the probability {@code p} is not in the range {@code [0, 1]}
668     * @since 1.3
669     */
670    private StatisticResult compute(long[] values, int from, int to, double p) {
671        checkProbability(p);
672        final int n = to - from;
673        // Special cases
674        if (n <= 1) {
675            return n == 0 ?
676                () -> Double.NaN :
677                Statistics.createStatisticResult(values[from]);
678        }
679
680        // Create the range
681        final long[] x;
682        final int start;
683        final int end;
684        if (copy) {
685            x = Statistics.copy(values, from, to);
686            start = 0;
687            end = n;
688        } else {
689            x = values;
690            start = from;
691            end = to;
692        }
693
694        final double pos = estimationType.index(p, n);
695        final int ip = (int) pos;
696        final int i = start + ip;
697
698        // Partition and compute
699        if (pos > ip) {
700            Selection.select(x, start, end, new int[] {i, i + 1});
701            return Interpolation.interpolate(x[i], x[i + 1], pos - ip);
702        }
703        Selection.select(x, start, end, i);
704        return Statistics.createStatisticResult(x[i]);
705    }
706
707    /**
708     * Evaluate the {@code p}-th quantiles of the values.
709     *
710     * <p>Note: This method may partially sort the input values if not configured to
711     * {@link #withCopy(boolean) copy} the input data.
712     *
713     * @param values Values.
714     * @param p Probabilities for the quantiles to compute.
715     * @return the quantiles
716     * @throws IllegalArgumentException if any probability {@code p} is not in the range {@code [0, 1]};
717     * or no probabilities are specified.
718     * @since 1.3
719     */
720    public StatisticResult[] evaluate(long[] values, double... p) {
721        return compute(values, 0, values.length, p);
722    }
723
724    /**
725     * Evaluate the {@code p}-th quantiles of the specified range of values..
726     *
727     * <p>Note: This method may partially sort the input values if not configured to
728     * {@link #withCopy(boolean) copy} the input data.
729     *
730     * @param values Values.
731     * @param from Inclusive start of the range.
732     * @param to Exclusive end of the range.
733     * @param p Probabilities for the quantiles to compute.
734     * @return the quantiles
735     * @throws IllegalArgumentException if any probability {@code p} is not in the range {@code [0, 1]};
736     * or no probabilities are specified.
737     * @throws IndexOutOfBoundsException if the sub-range is out of bounds
738     * @since 1.3
739     */
740    public StatisticResult[] evaluateRange(long[] values, int from, int to, double... p) {
741        Statistics.checkFromToIndex(from, to, values.length);
742        return compute(values, from, to, p);
743    }
744
745    /**
746     * Evaluate the {@code p}-th quantiles of the specified range of values..
747     *
748     * <p>Note: This method may partially sort the input values if not configured to
749     * {@link #withCopy(boolean) copy} the input data.
750     *
751     * @param values Values.
752     * @param from Inclusive start of the range.
753     * @param to Exclusive end of the range.
754     * @param p Probabilities for the quantiles to compute.
755     * @return the quantiles
756     * @throws IllegalArgumentException if any probability {@code p} is not in the range {@code [0, 1]};
757     * or no probabilities are specified.
758     */
759    private StatisticResult[] compute(long[] values, int from, int to, double... p) {
760        checkProbabilities(p);
761        final int n = to - from;
762        // Special cases
763        final StatisticResult[] result = new StatisticResult[p.length];
764        if (n <= 1) {
765            final StatisticResult r = n == 0 ?
766                () -> Double.NaN :
767                Statistics.createStatisticResult(values[from]);
768            Arrays.fill(result, r);
769            return result;
770        }
771
772        // Create the range
773        final long[] x;
774        final int start;
775        final int end;
776        if (copy) {
777            x = Statistics.copy(values, from, to);
778            start = 0;
779            end = n;
780        } else {
781            x = values;
782            start = from;
783            end = to;
784        }
785
786        // Collect interpolation positions
787        final double[] q = new double[p.length];
788        final int[] indices = computeIndices(n, p, q, start);
789
790        // Partition
791        Selection.select(x, start, end, indices);
792
793        // Compute
794        for (int k = 0; k < p.length; k++) {
795            // ip in [0, n); i in [start, end)
796            final int ip = (int) q[k];
797            final int i = start + ip;
798            if (q[k] > ip) {
799                result[k] = Interpolation.interpolate(x[i], x[i + 1], q[k] - ip);
800            } else {
801                result[k] = Statistics.createStatisticResult(x[i]);
802            }
803        }
804        return result;
805    }
806
807    /**
808     * Evaluate the {@code p}-th quantile of the sorted values provided as a {@code double}.
809     *
810     * <p>This method can be used when the values of known size are already sorted.
811     * It can be used for primitive types not supported by other evaluation methods.
812     * Numeric types {@code byte}, {@code short} and {@code float} can be converted to
813     * type {@code double} without loss of precision.
814     *
815     * <pre>{@code
816     * short[] x = ...
817     * Arrays.sort(x);
818     * double q = Quantile.withDefaults().evaluate(x.length, i -> x[i], 0.05);
819     * }</pre>
820     *
821     * <p>If the sorted array is a {@code long} datatype this method can lose information
822     * about the precision of the quantiles due to primitive type conversion. Use
823     * the method {@link #evaluateAsLong(int, IntToLongFunction, double)} to compute
824     * the {@code long} quantile result.
825     *
826     * @param n Size of the values.
827     * @param values Values function.
828     * @param p Probability for the quantile to compute.
829     * @return the quantile
830     * @throws IllegalArgumentException if {@code size < 0}; or if the probability {@code p} is
831     * not in the range {@code [0, 1]}.
832     * @see #evaluateAsLong(int, IntToLongFunction, double)
833     */
834    public double evaluate(int n, IntToDoubleFunction values, double p) {
835        checkSize(n);
836        checkProbability(p);
837        // Special case
838        if (n <= 1) {
839            return n == 0 ? Double.NaN : values.applyAsDouble(0);
840        }
841        final double pos = estimationType.index(p, n);
842        final int i = (int) pos;
843        final double v1 = values.applyAsDouble(i);
844        if (pos > i) {
845            final double v2 = values.applyAsDouble(i + 1);
846            return Interpolation.interpolate(v1, v2, pos - i);
847        }
848        return v1;
849    }
850
851    /**
852     * Evaluate the {@code p}-th quantiles of the sorted values provided as a {@code double}.
853     *
854     * <p>This method can be used when the values of known size are already sorted.
855     * It can be used for primitive types not supported by other evaluation methods.
856     * Numeric types {@code byte}, {@code short} and {@code float} can be converted to
857     * type {@code double} without loss of precision.
858     *
859     * <pre>{@code
860     * short[] x = ...
861     * Arrays.sort(x);
862     * double[] q = Quantile.withDefaults().evaluate(x.length, i -> x[i], 0.25, 0.5, 0.75);
863     * }</pre>
864     *
865     * <p>If the sorted array is a {@code long} datatype this method can lose information
866     * about the precision of the quantiles due to primitive type conversion. Use
867     * the method {@link #evaluateAsLong(int, IntToLongFunction, double...)} to compute
868     * the {@code long} quantile result.
869     *
870     * @param n Size of the values.
871     * @param values Values function.
872     * @param p Probabilities for the quantiles to compute.
873     * @return the quantiles
874     * @throws IllegalArgumentException if {@code size < 0}; if any probability {@code p} is
875     * not in the range {@code [0, 1]}; or no probabilities are specified.
876     * @see #evaluateAsLong(int, IntToLongFunction, double...)
877     */
878    public double[] evaluate(int n, IntToDoubleFunction values, double... p) {
879        checkSize(n);
880        checkProbabilities(p);
881        // Special case
882        final double[] q = new double[p.length];
883        if (n <= 1) {
884            Arrays.fill(q, n == 0 ? Double.NaN : values.applyAsDouble(0));
885            return q;
886        }
887        for (int k = 0; k < p.length; k++) {
888            final double pos = estimationType.index(p[k], n);
889            final int i = (int) pos;
890            final double v1 = values.applyAsDouble(i);
891            if (pos > i) {
892                final double v2 = values.applyAsDouble(i + 1);
893                q[k] = Interpolation.interpolate(v1, v2, pos - i);
894            } else {
895                q[k] = v1;
896            }
897        }
898        return q;
899    }
900
901    /**
902     * Evaluate the {@code p}-th quantile of the sorted values provided as a {@code long}.
903     *
904     * <p>This method can be used when the values of known size are already sorted.
905     *
906     * <pre>{@code
907     * long[] x = ...
908     * Arrays.sort(x);
909     * StatisticResult q = Quantile.withDefaults()
910     *                             .evaluateAsLong(x.length, i -> x[i], 0.05);
911     * }</pre>
912     *
913     * <p>Note: It is not recommended to sort data for use only in the quantile computation.
914     * The {@link #evaluate(long[], double)} method will partially sort the data as required
915     * and in most cases will be more efficient.
916     *
917     * @param n Size of the values.
918     * @param values Values function.
919     * @param p Probability for the quantile to compute.
920     * @return the quantile
921     * @throws IllegalArgumentException if {@code size < 0}; or if the probability {@code p} is
922     * not in the range {@code [0, 1]}.
923     * @since 1.3
924     */
925    public StatisticResult evaluateAsLong(int n, IntToLongFunction values, double p) {
926        checkSize(n);
927        checkProbability(p);
928        // Special case
929        if (n <= 1) {
930            return n == 0 ?
931                () -> Double.NaN :
932                Statistics.createStatisticResult(values.applyAsLong(0));
933        }
934        final double pos = estimationType.index(p, n);
935        final int i = (int) pos;
936        final long v1 = values.applyAsLong(i);
937        if (pos > i) {
938            final long v2 = values.applyAsLong(i + 1);
939            return Interpolation.interpolate(v1, v2, pos - i);
940        }
941        return Statistics.createStatisticResult(v1);
942    }
943
944    /**
945     * Evaluate the {@code p}-th quantiles of the sorted values provided as a {@code long}.
946     *
947     * <p>This method can be used when the values of known size are already sorted.
948     *
949     * <pre>{@code
950     * long[] x = ...
951     * Arrays.sort(x);
952     * StatisticResult[] q = Quantile.withDefaults()
953     *                               .evaluateAsLong(x.length, i -> x[i], 0.25, 0.5, 0.75);
954     * }</pre>
955     *
956     * <p>Note: It is not recommended to sort data for use only in the quantile computation.
957     * The {@link #evaluate(long[], double...)} method will partially sort the data as required
958     * and in most cases will be more efficient.
959     *
960     * @param n Size of the values.
961     * @param values Values function.
962     * @param p Probabilities for the quantiles to compute.
963     * @return the quantiles
964     * @throws IllegalArgumentException if {@code size < 0}; if any probability {@code p} is
965     * not in the range {@code [0, 1]}; or no probabilities are specified.
966     * @since 1.3
967     */
968    public StatisticResult[] evaluateAsLong(int n, IntToLongFunction values, double... p) {
969        checkSize(n);
970        checkProbabilities(p);
971        // Special case
972        final StatisticResult[] result = new StatisticResult[p.length];
973        if (n <= 1) {
974            final StatisticResult r = n == 0 ?
975                () -> Double.NaN :
976                Statistics.createStatisticResult(values.applyAsLong(0));
977            Arrays.fill(result, r);
978            return result;
979        }
980        for (int k = 0; k < p.length; k++) {
981            final double pos = estimationType.index(p[k], n);
982            final int i = (int) pos;
983            final long v1 = values.applyAsLong(i);
984            if (pos > i) {
985                final long v2 = values.applyAsLong(i + 1);
986                result[k] = Interpolation.interpolate(v1, v2, pos - i);
987            } else {
988                result[k] = Statistics.createStatisticResult(v1);
989            }
990        }
991        return result;
992    }
993
994    /**
995     * Check the probability {@code p} is in the range {@code [0, 1]}.
996     *
997     * @param p Probability for the quantile to compute.
998     * @throws IllegalArgumentException if the probability is not in the range {@code [0, 1]}
999     */
1000    private static void checkProbability(double p) {
1001        // Logic negation will detect NaN
1002        if (!(p >= 0 && p <= 1)) {
1003            throw new IllegalArgumentException(INVALID_PROBABILITY + p);
1004        }
1005    }
1006
1007    /**
1008     * Check the probabilities {@code p} are in the range {@code [0, 1]}.
1009     *
1010     * @param p Probabilities for the quantiles to compute.
1011     * @throws IllegalArgumentException if any probabilities {@code p} is not in the range {@code [0, 1]};
1012     * or no probabilities are specified.
1013     */
1014    private static void checkProbabilities(double... p) {
1015        if (p.length == 0) {
1016            throw new IllegalArgumentException(NO_PROBABILITIES_SPECIFIED);
1017        }
1018        for (final double pp : p) {
1019            checkProbability(pp);
1020        }
1021    }
1022
1023    /**
1024     * Check the {@code size} is positive.
1025     *
1026     * @param n Size of the values.
1027     * @throws IllegalArgumentException if {@code size < 0}
1028     */
1029    private static void checkSize(int n) {
1030        if (n < 0) {
1031            throw new IllegalArgumentException(INVALID_SIZE + n);
1032        }
1033    }
1034
1035    /**
1036     * Check the number of probabilities {@code n} is strictly positive.
1037     *
1038     * @param n Number of probabilities.
1039     * @throws IllegalArgumentException if {@code c < 1}
1040     */
1041    private static void checkNumberOfProbabilities(int n) {
1042        if (n < 1) {
1043            throw new IllegalArgumentException(INVALID_NUMBER_OF_PROBABILITIES + n);
1044        }
1045    }
1046
1047    /**
1048     * Compute the indices required for quantile interpolation.
1049     *
1050     * <p>The zero-based interpolation index in {@code [0, n)} is
1051     * saved into the working array {@code q} for each {@code p}.
1052     *
1053     * <p>The indices are incremented by the provided {@code offset} to allow
1054     * addressing sub-ranges of a larger array.
1055     *
1056     * @param n Size of the data.
1057     * @param p Probabilities for the quantiles to compute.
1058     * @param q Working array for quantiles in {@code [0, n)}.
1059     * @param offset Array offset.
1060     * @return the indices in {@code [offset, offset + n)}
1061     */
1062    private int[] computeIndices(int n, double[] p, double[] q, int offset) {
1063        final int[] indices = new int[p.length << 1];
1064        int count = 0;
1065        for (int k = 0; k < p.length; k++) {
1066            final double pos = estimationType.index(p[k], n);
1067            q[k] = pos;
1068            final int i = (int) pos;
1069            indices[count++] = offset + i;
1070            if (pos > i) {
1071                // Require the next index for interpolation
1072                indices[count++] = offset + i + 1;
1073            }
1074        }
1075        if (count < indices.length) {
1076            return Arrays.copyOf(indices, count);
1077        }
1078        return indices;
1079    }
1080
1081    /**
1082     * Enumerates estimation methods for a quantile. Provides the nine quantile algorithms
1083     * defined in Hyndman and Fan (1996)[1] as {@code HF1 - HF9}.
1084     *
1085     * <p>Samples quantiles are defined by:
1086     *
1087     * <p>\[ Q(p) = (1 - \gamma) x_j + \gamma x_{j+1} \]
1088     *
1089     * <p>where \( \frac{j-m}{n} \leq p \le \frac{j-m+1}{n} \), \( x_j \) is the \( j \)th
1090     * order statistic, \( n \) is the sample size, the value of \( \gamma \) is a function
1091     * of \( j = \lfloor np+m \rfloor \) and \( g = np + m - j \), and \( m \) is a constant
1092     * determined by the sample quantile type.
1093     *
1094     * <p>Note that the real-valued position \( np + m \) is a 1-based index and
1095     * \( j \in [1, n] \). If the real valued position is computed as beyond the lowest or
1096     * highest values in the sample, this implementation will return the minimum or maximum
1097     * observation respectively.
1098     *
1099     * <p>Types 1, 2, and 3 are discontinuous functions of \( p \); types 4 to 9 are continuous
1100     * functions of \( p \).
1101     *
1102     * <p>For the continuous functions, the probability \( p_k \) is provided for the \( k \)-th order
1103     * statistic in size \( n \). Samples quantiles are equivalently obtained to \( Q(p) \) by
1104     * linear interpolation between points \( (p_k, x_k) \) and \( (p_{k+1}, x_{k+1}) \) for
1105     * any \( p_k \leq p \leq p_{k+1} \).
1106     *
1107     * <ol>
1108     * <li>Hyndman and Fan (1996)
1109     *     <i>Sample Quantiles in Statistical Packages.</i>
1110     *     The American Statistician, 50, 361-365.
1111     *     <a href="https://www.jstor.org/stable/2684934">doi.org/10.2307/2684934</a></li>
1112     * <li><a href="https://en.wikipedia.org/wiki/Quantile">Quantile (Wikipedia)</a></li>
1113     * </ol>
1114     */
1115    public enum EstimationMethod {
1116        /**
1117         * Inverse of the empirical distribution function.
1118         *
1119         * <p>\( m = 0 \). \( \gamma = 0 \) if \( g = 0 \), and 1 otherwise.
1120         */
1121        HF1 {
1122            @Override
1123            double position0(double p, int n) {
1124                // position = np + 0. This is 1-based so adjust to 0-based.
1125                return Math.ceil(n * p) - 1;
1126            }
1127        },
1128        /**
1129         * Similar to {@link #HF1} with averaging at discontinuities.
1130         *
1131         * <p>\( m = 0 \). \( \gamma = 0.5 \) if \( g = 0 \), and 1 otherwise.
1132         */
1133        HF2 {
1134            @Override
1135            double position0(double p, int n) {
1136                final double pos = n * p;
1137                // Average at discontinuities
1138                final int j = (int) pos;
1139                final double g = pos - j;
1140                if (g == 0) {
1141                    return j - 0.5;
1142                }
1143                // As HF1 : ceil(j + g) - 1
1144                return j;
1145            }
1146        },
1147        /**
1148         * The observation closest to \( np \). Ties are resolved to the nearest even order statistic.
1149         *
1150         * <p>\( m = -1/2 \). \( \gamma = 0 \) if \( g = 0 \) and \( j \) is even, and 1 otherwise.
1151         */
1152        HF3 {
1153            @Override
1154            double position0(double p, int n) {
1155                // Let rint do the work for ties to even
1156                return Math.rint(n * p) - 1;
1157            }
1158        },
1159        /**
1160         * Linear interpolation of the inverse of the empirical CDF.
1161         *
1162         * <p>\( m = 0 \). \( p_k = \frac{k}{n} \).
1163         */
1164        HF4 {
1165            @Override
1166            double position0(double p, int n) {
1167                // np + 0 - 1
1168                return n * p - 1;
1169            }
1170        },
1171        /**
1172         * A piecewise linear function where the knots are the values midway through the steps of
1173         * the empirical CDF. Proposed by Hazen (1914) and popular amongst hydrologists.
1174         *
1175         * <p>\( m = 1/2 \). \( p_k = \frac{k - 1/2}{n} \).
1176         */
1177        HF5 {
1178            @Override
1179            double position0(double p, int n) {
1180                // np + 0.5 - 1
1181                return n * p - 0.5;
1182            }
1183        },
1184        /**
1185         * Linear interpolation of the expectations for the order statistics for the uniform
1186         * distribution on [0,1]. Proposed by Weibull (1939).
1187         *
1188         * <p>\( m = p \). \( p_k = \frac{k}{n + 1} \).
1189         *
1190         * <p>This method computes the quantile as per the Apache Commons Math Percentile
1191         * legacy implementation.
1192         */
1193        HF6 {
1194            @Override
1195            double position0(double p, int n) {
1196                // np + p - 1
1197                return (n + 1) * p - 1;
1198            }
1199        },
1200        /**
1201         * Linear interpolation of the modes for the order statistics for the uniform
1202         * distribution on [0,1]. Proposed by Gumbull (1939).
1203         *
1204         * <p>\( m = 1 - p \). \( p_k = \frac{k - 1}{n - 1} \).
1205         */
1206        HF7 {
1207            @Override
1208            double position0(double p, int n) {
1209                // np + 1-p - 1
1210                return (n - 1) * p;
1211            }
1212        },
1213        /**
1214         * Linear interpolation of the approximate medians for order statistics.
1215         *
1216         * <p>\( m = (p + 1)/3 \). \( p_k = \frac{k - 1/3}{n + 1/3} \).
1217         *
1218         * <p>As per Hyndman and Fan (1996) this approach is most recommended as it provides
1219         * an approximate median-unbiased estimate regardless of distribution.
1220         */
1221        HF8 {
1222            @Override
1223            double position0(double p, int n) {
1224                return n * p + (p + 1) / 3 - 1;
1225            }
1226        },
1227        /**
1228         * Quantile estimates are approximately unbiased for the expected order statistics if
1229         * \( x \) is normally distributed.
1230         *
1231         * <p>\( m = p/4 + 3/8 \). \( p_k = \frac{k - 3/8}{n + 1/4} \).
1232         */
1233        HF9 {
1234            @Override
1235            double position0(double p, int n) {
1236                // np + p/4 + 3/8 - 1
1237                return (n + 0.25) * p - 0.625;
1238            }
1239        };
1240
1241        /**
1242         * Finds the real-valued position for calculation of the quantile.
1243         *
1244         * <p>Return {@code i + g} such that the quantile value from sorted data is:
1245         *
1246         * <p>value = data[i] + g * (data[i+1] - data[i])
1247         *
1248         * <p>Warning: Interpolation should not use {@code data[i+1]} unless {@code g != 0}.
1249         *
1250         * <p>Note: In contrast to the definition of Hyndman and Fan in the class header
1251         * which uses a 1-based position, this is a zero based index. This change is for
1252         * convenience when addressing array positions.
1253         *
1254         * @param p p<sup>th</sup> quantile.
1255         * @param n Size.
1256         * @return a real-valued position (0-based) into the range {@code [0, n)}
1257         */
1258        abstract double position0(double p, int n);
1259
1260        /**
1261         * Finds the index {@code i} and fractional part {@code g} of a real-valued position
1262         * to interpolate the quantile.
1263         *
1264         * <p>Return {@code i + g} such that the quantile value from sorted data is:
1265         *
1266         * <p>value = data[i] + g * (data[i+1] - data[i])
1267         *
1268         * <p>Note: Interpolation should not use {@code data[i+1]} unless {@code g != 0}.
1269         *
1270         * @param p p<sup>th</sup> quantile.
1271         * @param n Size.
1272         * @return index (in [0, n-1])
1273         */
1274        final double index(double p, int n) {
1275            final double pos = position0(p, n);
1276            // Bounds check in [0, n-1]
1277            if (pos < 0) {
1278                return 0;
1279            }
1280            if (pos > n - 1.0) {
1281                return n - 1.0;
1282            }
1283            return pos;
1284        }
1285    }
1286}