Source code

001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.commons.statistics.descriptive;
018
019import java.util.Arrays;
020import java.util.Objects;
021import java.util.function.IntToDoubleFunction;
022import org.apache.commons.numbers.arrays.Selection;
023
024/**
025 * Provides quantile computation.
026 *
027 * <p>For values of length {@code n}:
028 * <ul>
029 * <li>The result is {@code NaN} if {@code n = 0}.
030 * <li>The result is {@code values[0]} if {@code n = 1}.
031 * <li>Otherwise the result is computed using the {@link EstimationMethod}.
032 * </ul>
033 *
034 * <p>Computation of multiple quantiles and will handle duplicate and unordered
035 * probabilities. Passing ordered probabilities is recommended if the order is already
036 * known as this can improve efficiency; for example using uniform spacing through the
037 * array data, or to identify extreme values from the data such as {@code [0.001, 0.999]}.
038 *
039 * <p>This implementation respects the ordering imposed by
040 * {@link Double#compare(double, double)} for {@code NaN} values. If a {@code NaN} occurs
041 * in the selected positions in the fully sorted values then the result is {@code NaN}.
042 *
043 * <p>The {@link NaNPolicy} can be used to change the behaviour on {@code NaN} values.
044 *
045 * <p>Instances of this class are immutable and thread-safe.
046 *
047 * @see #with(NaNPolicy)
048 * @see <a href="http://en.wikipedia.org/wiki/Quantile">Quantile (Wikipedia)</a>
049 * @since 1.1
050 */
051public final class Quantile {
052    /** Message when the probability is not in the range {@code [0, 1]}. */
053    private static final String INVALID_PROBABILITY = "Invalid probability: ";
054    /** Message when no probabilities are provided for the varargs method. */
055    private static final String NO_PROBABILITIES_SPECIFIED = "No probabilities specified";
056    /** Message when the size is not valid. */
057    private static final String INVALID_SIZE = "Invalid size: ";
058    /** Message when the number of probabilities in a range is not valid. */
059    private static final String INVALID_NUMBER_OF_PROBABILITIES = "Invalid number of probabilities: ";
060
061    /** Default instance. Method 8 is recommended by Hyndman and Fan. */
062    private static final Quantile DEFAULT = new Quantile(false, NaNPolicy.INCLUDE, EstimationMethod.HF8);
063
064    /** Flag to indicate if the data should be copied. */
065    private final boolean copy;
066    /** NaN policy for floating point data. */
067    private final NaNPolicy nanPolicy;
068    /** Transformer for NaN data. */
069    private final NaNTransformer nanTransformer;
070    /** Estimation type used to determine the value from the quantile. */
071    private final EstimationMethod estimationType;
072
073    /**
074     * @param copy Flag to indicate if the data should be copied.
075     * @param nanPolicy NaN policy.
076     * @param estimationType Estimation type used to determine the value from the quantile.
077     */
078    private Quantile(boolean copy, NaNPolicy nanPolicy, EstimationMethod estimationType) {
079        this.copy = copy;
080        this.nanPolicy = nanPolicy;
081        this.estimationType = estimationType;
082        nanTransformer = NaNTransformers.createNaNTransformer(nanPolicy, copy);
083    }
084
085    /**
086     * Return a new instance with the default options.
087     *
088     * <ul>
089     * <li>{@linkplain #withCopy(boolean) Copy = false}
090     * <li>{@linkplain #with(NaNPolicy) NaN policy = include}
091     * <li>{@linkplain #with(EstimationMethod) Estimation method = HF8}
092     * </ul>
093     *
094     * <p>Note: The default options configure for processing in-place and including
095     * {@code NaN} values in the data. This is the most efficient mode and has the
096     * smallest memory consumption.
097     *
098     * @return the quantile implementation
099     * @see #withCopy(boolean)
100     * @see #with(NaNPolicy)
101     * @see #with(EstimationMethod)
102     */
103    public static Quantile withDefaults() {
104        return DEFAULT;
105    }
106
107    /**
108     * Return an instance with the configured copy behaviour. If {@code false} then
109     * the input array will be modified by the call to evaluate the quantiles; otherwise
110     * the computation uses a copy of the data.
111     *
112     * @param v Value.
113     * @return an instance
114     */
115    public Quantile withCopy(boolean v) {
116        return new Quantile(v, nanPolicy, estimationType);
117    }
118
119    /**
120     * Return an instance with the configured {@link NaNPolicy}.
121     *
122     * <p>Note: This implementation respects the ordering imposed by
123     * {@link Double#compare(double, double)} for {@code NaN} values: {@code NaN} is
124     * considered greater than all other values, and all {@code NaN} values are equal. The
125     * {@link NaNPolicy} changes the computation of the statistic in the presence of
126     * {@code NaN} values.
127     *
128     * <ul>
129     * <li>{@link NaNPolicy#INCLUDE}: {@code NaN} values are moved to the end of the data;
130     * the size of the data <em>includes</em> the {@code NaN} values and the quantile will be
131     * {@code NaN} if any value used for quantile interpolation is {@code NaN}.
132     * <li>{@link NaNPolicy#EXCLUDE}: {@code NaN} values are moved to the end of the data;
133     * the size of the data <em>excludes</em> the {@code NaN} values and the quantile will
134     * never be {@code NaN} for non-zero size. If all data are {@code NaN} then the size is zero
135     * and the result is {@code NaN}.
136     * <li>{@link NaNPolicy#ERROR}: An exception is raised if the data contains {@code NaN}
137     * values.
138     * </ul>
139     *
140     * <p>Note that the result is identical for all policies if no {@code NaN} values are present.
141     *
142     * @param v Value.
143     * @return an instance
144     */
145    public Quantile with(NaNPolicy v) {
146        return new Quantile(copy, Objects.requireNonNull(v), estimationType);
147    }
148
149    /**
150     * Return an instance with the configured {@link EstimationMethod}.
151     *
152     * @param v Value.
153     * @return an instance
154     */
155    public Quantile with(EstimationMethod v) {
156        return new Quantile(copy, nanPolicy, Objects.requireNonNull(v));
157    }
158
159    /**
160     * Generate {@code n} evenly spaced probabilities in the range {@code [0, 1]}.
161     *
162     * <pre>
163     * 1/(n + 1), 2/(n + 1), ..., n/(n + 1)
164     * </pre>
165     *
166     * @param n Number of probabilities.
167     * @return the probabilities
168     * @throws IllegalArgumentException if {@code n < 1}
169     */
170    public static double[] probabilities(int n) {
171        checkNumberOfProbabilities(n);
172        final double c1 = n + 1.0;
173        final double[] p = new double[n];
174        for (int i = 0; i < n; i++) {
175            p[i] = (i + 1.0) / c1;
176        }
177        return p;
178    }
179
180    /**
181     * Generate {@code n} evenly spaced probabilities in the range {@code [p1, p2]}.
182     *
183     * <pre>
184     * w = p2 - p1
185     * p1 + w/(n + 1), p1 + 2w/(n + 1), ..., p1 + nw/(n + 1)
186     * </pre>
187     *
188     * @param n Number of probabilities.
189     * @param p1 Lower probability.
190     * @param p2 Upper probability.
191     * @return the probabilities
192     * @throws IllegalArgumentException if {@code n < 1}; if the probabilities are not in the
193     * range {@code [0, 1]}; or {@code p2 <= p1}.
194     */
195    public static double[] probabilities(int n, double p1, double p2) {
196        checkProbability(p1);
197        checkProbability(p2);
198        if (p2 <= p1) {
199            throw new IllegalArgumentException("Invalid range: [" + p1 + ", " + p2 + "]");
200        }
201        final double[] p = probabilities(n);
202        for (int i = 0; i < n; i++) {
203            p[i] = (1 - p[i]) * p1 + p[i] * p2;
204        }
205        return p;
206    }
207
208    /**
209     * Evaluate the {@code p}-th quantile of the values.
210     *
211     * <p>Note: This method may partially sort the input values if not configured to
212     * {@link #withCopy(boolean) copy} the input data.
213     *
214     * <p><strong>Performance</strong>
215     *
216     * <p>It is not recommended to use this method for repeat calls for different quantiles
217     * within the same values. The {@link #evaluate(double[], double...)} method should be used
218     * which provides better performance.
219     *
220     * @param values Values.
221     * @param p Probability for the quantile to compute.
222     * @return the quantile
223     * @throws IllegalArgumentException if the probability {@code p} is not in the range {@code [0, 1]};
224     * or if the values contain NaN and the configuration is {@link NaNPolicy#ERROR}
225     * @see #evaluate(double[], double...)
226     * @see #with(NaNPolicy)
227     */
228    public double evaluate(double[] values, double p) {
229        return compute(values, 0, values.length, p);
230    }
231
232    /**
233     * Evaluate the {@code p}-th quantile of the specified range of values.
234     *
235     * <p>Note: This method may partially sort the input values if not configured to
236     * {@link #withCopy(boolean) copy} the input data.
237     *
238     * <p><strong>Performance</strong>
239     *
240     * <p>It is not recommended to use this method for repeat calls for different quantiles
241     * within the same values. The {@link #evaluateRange(double[], int, int, double...)} method should be used
242     * which provides better performance.
243     *
244     * @param values Values.
245     * @param from Inclusive start of the range.
246     * @param to Exclusive end of the range.
247     * @param p Probability for the quantile to compute.
248     * @return the quantile
249     * @throws IllegalArgumentException if the probability {@code p} is not in the range {@code [0, 1]};
250     * or if the values contain NaN and the configuration is {@link NaNPolicy#ERROR}
251     * @throws IndexOutOfBoundsException if the sub-range is out of bounds
252     * @see #evaluateRange(double[], int, int, double...)
253     * @see #with(NaNPolicy)
254     * @since 1.2
255     */
256    public double evaluateRange(double[] values, int from, int to, double p) {
257        Statistics.checkFromToIndex(from, to, values.length);
258        return compute(values, from, to, p);
259    }
260
261    /**
262     * Compute the {@code p}-th quantile of the specified range of values.
263     *
264     * @param values Values.
265     * @param from Inclusive start of the range.
266     * @param to Exclusive end of the range.
267     * @param p Probability for the quantile to compute.
268     * @return the quantile
269     * @throws IllegalArgumentException if the probability {@code p} is not in the range {@code [0, 1]}
270     */
271    private double compute(double[] values, int from, int to, double p) {
272        checkProbability(p);
273        // Floating-point data handling
274        final int[] bounds = new int[2];
275        final double[] x = nanTransformer.apply(values, from, to, bounds);
276        final int start = bounds[0];
277        final int end = bounds[1];
278        final int n = end - start;
279        // Special cases
280        if (n <= 1) {
281            return n == 0 ? Double.NaN : x[start];
282        }
283
284        final double pos = estimationType.index(p, n);
285        final int ip = (int) pos;
286        final int i = start + ip;
287
288        // Partition and compute
289        if (pos > ip) {
290            Selection.select(x, start, end, new int[] {i, i + 1});
291            return Interpolation.interpolate(x[i], x[i + 1], pos - ip);
292        }
293        Selection.select(x, start, end, i);
294        return x[i];
295    }
296
297    /**
298     * Evaluate the {@code p}-th quantiles of the values.
299     *
300     * <p>Note: This method may partially sort the input values if not configured to
301     * {@link #withCopy(boolean) copy} the input data.
302     *
303     * @param values Values.
304     * @param p Probabilities for the quantiles to compute.
305     * @return the quantiles
306     * @throws IllegalArgumentException if any probability {@code p} is not in the range {@code [0, 1]};
307     * no probabilities are specified; or if the values contain NaN and the configuration is {@link NaNPolicy#ERROR}
308     * @see #with(NaNPolicy)
309     */
310    public double[] evaluate(double[] values, double... p) {
311        return compute(values, 0, values.length, p);
312    }
313
314    /**
315     * Evaluate the {@code p}-th quantiles of the specified range of values.
316     *
317     * <p>Note: This method may partially sort the input values if not configured to
318     * {@link #withCopy(boolean) copy} the input data.
319     *
320     * @param values Values.
321     * @param from Inclusive start of the range.
322     * @param to Exclusive end of the range.
323     * @param p Probabilities for the quantiles to compute.
324     * @return the quantiles
325     * @throws IllegalArgumentException if any probability {@code p} is not in the range {@code [0, 1]};
326     * no probabilities are specified; or if the values contain NaN and the configuration is {@link NaNPolicy#ERROR}
327     * @throws IndexOutOfBoundsException if the sub-range is out of bounds
328     * @see #with(NaNPolicy)
329     * @since 1.2
330     */
331    public double[] evaluateRange(double[] values, int from, int to, double... p) {
332        Statistics.checkFromToIndex(from, to, values.length);
333        return compute(values, from, to, p);
334    }
335
336    /**
337     * Compute the {@code p}-th quantiles of the specified range of values.
338     *
339     * @param values Values.
340     * @param from Inclusive start of the range.
341     * @param to Exclusive end of the range.
342     * @param p Probabilities for the quantiles to compute.
343     * @return the quantiles
344     * @throws IllegalArgumentException if any probability {@code p} is not in the range {@code [0, 1]};
345     * or no probabilities are specified.
346     */
347    private double[] compute(double[] values, int from, int to, double... p) {
348        checkProbabilities(p);
349        // Floating-point data handling
350        final int[] bounds = new int[2];
351        final double[] x = nanTransformer.apply(values, from, to, bounds);
352        final int start = bounds[0];
353        final int end = bounds[1];
354        final int n = end - start;
355        // Special cases
356        final double[] q = new double[p.length];
357        if (n <= 1) {
358            Arrays.fill(q, n == 0 ? Double.NaN : x[start]);
359            return q;
360        }
361
362        // Collect interpolation positions. We use the output q as storage.
363        final int[] indices = computeIndices(n, p, q, start);
364
365        // Partition
366        Selection.select(x, start, end, indices);
367
368        // Compute
369        for (int k = 0; k < p.length; k++) {
370            // ip in [0, n); i in [start, end)
371            final int ip = (int) q[k];
372            final int i = start + ip;
373            if (q[k] > ip) {
374                q[k] = Interpolation.interpolate(x[i], x[i + 1], q[k] - ip);
375            } else {
376                q[k] = x[i];
377            }
378        }
379        return q;
380    }
381
382    /**
383     * Evaluate the {@code p}-th quantile of the values.
384     *
385     * <p>Note: This method may partially sort the input values if not configured to
386     * {@link #withCopy(boolean) copy} the input data.
387     *
388     * <p><strong>Performance</strong>
389     *
390     * <p>It is not recommended to use this method for repeat calls for different quantiles
391     * within the same values. The {@link #evaluate(int[], double...)} method should be used
392     * which provides better performance.
393     *
394     * @param values Values.
395     * @param p Probability for the quantile to compute.
396     * @return the quantile
397     * @throws IllegalArgumentException if the probability {@code p} is not in the range {@code [0, 1]}
398     * @see #evaluate(int[], double...)
399     */
400    public double evaluate(int[] values, double p) {
401        return compute(values, 0, values.length, p);
402    }
403
404    /**
405     * Evaluate the {@code p}-th quantile of the specified range of values.
406     *
407     * <p>Note: This method may partially sort the input values if not configured to
408     * {@link #withCopy(boolean) copy} the input data.
409     *
410     * <p><strong>Performance</strong>
411     *
412     * <p>It is not recommended to use this method for repeat calls for different quantiles
413     * within the same values. The {@link #evaluateRange(int[], int, int, double...)} method should be used
414     * which provides better performance.
415     *
416     * @param values Values.
417     * @param from Inclusive start of the range.
418     * @param to Exclusive end of the range.
419     * @param p Probability for the quantile to compute.
420     * @return the quantile
421     * @throws IllegalArgumentException if the probability {@code p} is not in the range {@code [0, 1]}
422     * @throws IndexOutOfBoundsException if the sub-range is out of bounds
423     * @see #evaluateRange(int[], int, int, double...)
424     * @since 1.2
425     */
426    public double evaluateRange(int[] values, int from, int to, double p) {
427        Statistics.checkFromToIndex(from, to, values.length);
428        return compute(values, from, to, p);
429    }
430
431    /**
432     * Compute the {@code p}-th quantile of the specified range of values.
433     *
434     * @param values Values.
435     * @param from Inclusive start of the range.
436     * @param to Exclusive end of the range.
437     * @param p Probability for the quantile to compute.
438     * @return the quantile
439     * @throws IllegalArgumentException if the probability {@code p} is not in the range {@code [0, 1]}
440     */
441    private double compute(int[] values, int from, int to, double p) {
442        checkProbability(p);
443        final int n = to - from;
444        // Special cases
445        if (n <= 1) {
446            return n == 0 ? Double.NaN : values[from];
447        }
448
449        // Create the range
450        final int[] x;
451        final int start;
452        final int end;
453        if (copy) {
454            x = Statistics.copy(values, from, to);
455            start = 0;
456            end = n;
457        } else {
458            x = values;
459            start = from;
460            end = to;
461        }
462
463        final double pos = estimationType.index(p, n);
464        final int ip = (int) pos;
465        final int i = start + ip;
466
467        // Partition and compute
468        if (pos > ip) {
469            Selection.select(x, start, end, new int[] {i, i + 1});
470            return Interpolation.interpolate(x[i], x[i + 1], pos - ip);
471        }
472        Selection.select(x, start, end, i);
473        return x[i];
474    }
475
476    /**
477     * Evaluate the {@code p}-th quantiles of the values.
478     *
479     * <p>Note: This method may partially sort the input values if not configured to
480     * {@link #withCopy(boolean) copy} the input data.
481     *
482     * @param values Values.
483     * @param p Probabilities for the quantiles to compute.
484     * @return the quantiles
485     * @throws IllegalArgumentException if any probability {@code p} is not in the range {@code [0, 1]};
486     * or no probabilities are specified.
487     */
488    public double[] evaluate(int[] values, double... p) {
489        return compute(values, 0, values.length, p);
490    }
491
492    /**
493     * Evaluate the {@code p}-th quantiles of the specified range of values..
494     *
495     * <p>Note: This method may partially sort the input values if not configured to
496     * {@link #withCopy(boolean) copy} the input data.
497     *
498     * @param values Values.
499     * @param from Inclusive start of the range.
500     * @param to Exclusive end of the range.
501     * @param p Probabilities for the quantiles to compute.
502     * @return the quantiles
503     * @throws IllegalArgumentException if any probability {@code p} is not in the range {@code [0, 1]};
504     * or no probabilities are specified.
505     * @throws IndexOutOfBoundsException if the sub-range is out of bounds
506     * @since 1.2
507     */
508    public double[] evaluateRange(int[] values, int from, int to, double... p) {
509        Statistics.checkFromToIndex(from, to, values.length);
510        return compute(values, from, to, p);
511    }
512
513    /**
514     * Evaluate the {@code p}-th quantiles of the specified range of values..
515     *
516     * <p>Note: This method may partially sort the input values if not configured to
517     * {@link #withCopy(boolean) copy} the input data.
518     *
519     * @param values Values.
520     * @param from Inclusive start of the range.
521     * @param to Exclusive end of the range.
522     * @param p Probabilities for the quantiles to compute.
523     * @return the quantiles
524     * @throws IllegalArgumentException if any probability {@code p} is not in the range {@code [0, 1]};
525     * or no probabilities are specified.
526     */
527    private double[] compute(int[] values, int from, int to, double... p) {
528        checkProbabilities(p);
529        final int n = to - from;
530        // Special cases
531        final double[] q = new double[p.length];
532        if (n <= 1) {
533            Arrays.fill(q, n == 0 ? Double.NaN : values[from]);
534            return q;
535        }
536
537        // Create the range
538        final int[] x;
539        final int start;
540        final int end;
541        if (copy) {
542            x = Statistics.copy(values, from, to);
543            start = 0;
544            end = n;
545        } else {
546            x = values;
547            start = from;
548            end = to;
549        }
550
551        // Collect interpolation positions. We use the output q as storage.
552        final int[] indices = computeIndices(n, p, q, start);
553
554        // Partition
555        Selection.select(x, start, end, indices);
556
557        // Compute
558        for (int k = 0; k < p.length; k++) {
559            // ip in [0, n); i in [start, end)
560            final int ip = (int) q[k];
561            final int i = start + ip;
562            if (q[k] > ip) {
563                q[k] = Interpolation.interpolate(x[i], x[i + 1], q[k] - ip);
564            } else {
565                q[k] = x[i];
566            }
567        }
568        return q;
569    }
570
571    /**
572     * Evaluate the {@code p}-th quantile of the values.
573     *
574     * <p>This method can be used when the values of known size are already sorted.
575     *
576     * <pre>{@code
577     * short[] x = ...
578     * Arrays.sort(x);
579     * double q = Quantile.withDefaults().evaluate(x.length, i -> x[i], 0.05);
580     * }</pre>
581     *
582     * @param n Size of the values.
583     * @param values Values function.
584     * @param p Probability for the quantile to compute.
585     * @return the quantile
586     * @throws IllegalArgumentException if {@code size < 0}; or if the probability {@code p} is
587     * not in the range {@code [0, 1]}.
588     */
589    public double evaluate(int n, IntToDoubleFunction values, double p) {
590        checkSize(n);
591        checkProbability(p);
592        // Special case
593        if (n <= 1) {
594            return n == 0 ? Double.NaN : values.applyAsDouble(0);
595        }
596        final double pos = estimationType.index(p, n);
597        final int i = (int) pos;
598        final double v1 = values.applyAsDouble(i);
599        if (pos > i) {
600            final double v2 = values.applyAsDouble(i + 1);
601            return Interpolation.interpolate(v1, v2, pos - i);
602        }
603        return v1;
604    }
605
606    /**
607     * Evaluate the {@code p}-th quantiles of the values.
608     *
609     * <p>This method can be used when the values of known size are already sorted.
610     *
611     * <pre>{@code
612     * short[] x = ...
613     * Arrays.sort(x);
614     * double[] q = Quantile.withDefaults().evaluate(x.length, i -> x[i], 0.25, 0.5, 0.75);
615     * }</pre>
616     *
617     * @param n Size of the values.
618     * @param values Values function.
619     * @param p Probabilities for the quantiles to compute.
620     * @return the quantiles
621     * @throws IllegalArgumentException if {@code size < 0}; if any probability {@code p} is
622     * not in the range {@code [0, 1]}; or no probabilities are specified.
623     */
624    public double[] evaluate(int n, IntToDoubleFunction values, double... p) {
625        checkSize(n);
626        checkProbabilities(p);
627        // Special case
628        final double[] q = new double[p.length];
629        if (n <= 1) {
630            Arrays.fill(q, n == 0 ? Double.NaN : values.applyAsDouble(0));
631            return q;
632        }
633        for (int k = 0; k < p.length; k++) {
634            final double pos = estimationType.index(p[k], n);
635            final int i = (int) pos;
636            final double v1 = values.applyAsDouble(i);
637            if (pos > i) {
638                final double v2 = values.applyAsDouble(i + 1);
639                q[k] = Interpolation.interpolate(v1, v2, pos - i);
640            } else {
641                q[k] = v1;
642            }
643        }
644        return q;
645    }
646
647    /**
648     * Check the probability {@code p} is in the range {@code [0, 1]}.
649     *
650     * @param p Probability for the quantile to compute.
651     * @throws IllegalArgumentException if the probability is not in the range {@code [0, 1]}
652     */
653    private static void checkProbability(double p) {
654        // Logic negation will detect NaN
655        if (!(p >= 0 && p <= 1)) {
656            throw new IllegalArgumentException(INVALID_PROBABILITY + p);
657        }
658    }
659
660    /**
661     * Check the probabilities {@code p} are in the range {@code [0, 1]}.
662     *
663     * @param p Probabilities for the quantiles to compute.
664     * @throws IllegalArgumentException if any probabilities {@code p} is not in the range {@code [0, 1]};
665     * or no probabilities are specified.
666     */
667    private static void checkProbabilities(double... p) {
668        if (p.length == 0) {
669            throw new IllegalArgumentException(NO_PROBABILITIES_SPECIFIED);
670        }
671        for (final double pp : p) {
672            checkProbability(pp);
673        }
674    }
675
676    /**
677     * Check the {@code size} is positive.
678     *
679     * @param n Size of the values.
680     * @throws IllegalArgumentException if {@code size < 0}
681     */
682    private static void checkSize(int n) {
683        if (n < 0) {
684            throw new IllegalArgumentException(INVALID_SIZE + n);
685        }
686    }
687
688    /**
689     * Check the number of probabilities {@code n} is strictly positive.
690     *
691     * @param n Number of probabilities.
692     * @throws IllegalArgumentException if {@code c < 1}
693     */
694    private static void checkNumberOfProbabilities(int n) {
695        if (n < 1) {
696            throw new IllegalArgumentException(INVALID_NUMBER_OF_PROBABILITIES + n);
697        }
698    }
699
700    /**
701     * Compute the indices required for quantile interpolation.
702     *
703     * <p>The zero-based interpolation index in {@code [0, n)} is
704     * saved into the working array {@code q} for each {@code p}.
705     *
706     * <p>The indices are incremented by the provided {@code offset} to allow
707     * addressing sub-ranges of a larger array.
708     *
709     * @param n Size of the data.
710     * @param p Probabilities for the quantiles to compute.
711     * @param q Working array for quantiles in {@code [0, n)}.
712     * @param offset Array offset.
713     * @return the indices in {@code [offset, offset + n)}
714     */
715    private int[] computeIndices(int n, double[] p, double[] q, int offset) {
716        final int[] indices = new int[p.length << 1];
717        int count = 0;
718        for (int k = 0; k < p.length; k++) {
719            final double pos = estimationType.index(p[k], n);
720            q[k] = pos;
721            final int i = (int) pos;
722            indices[count++] = offset + i;
723            if (pos > i) {
724                // Require the next index for interpolation
725                indices[count++] = offset + i + 1;
726            }
727        }
728        if (count < indices.length) {
729            return Arrays.copyOf(indices, count);
730        }
731        return indices;
732    }
733
734    /**
735     * Estimation methods for a quantile. Provides the nine quantile algorithms
736     * defined in Hyndman and Fan (1996)[1] as {@code HF1 - HF9}.
737     *
738     * <p>Samples quantiles are defined by:
739     *
740     * <p>\[ Q(p) = (1 - \gamma) x_j + \gamma x_{j+1} \]
741     *
742     * <p>where \( \frac{j-m}{n} \leq p \le \frac{j-m+1}{n} \), \( x_j \) is the \( j \)th
743     * order statistic, \( n \) is the sample size, the value of \( \gamma \) is a function
744     * of \( j = \lfloor np+m \rfloor \) and \( g = np + m - j \), and \( m \) is a constant
745     * determined by the sample quantile type.
746     *
747     * <p>Note that the real-valued position \( np + m \) is a 1-based index and
748     * \( j \in [1, n] \). If the real valued position is computed as beyond the lowest or
749     * highest values in the sample, this implementation will return the minimum or maximum
750     * observation respectively.
751     *
752     * <p>Types 1, 2, and 3 are discontinuous functions of \( p \); types 4 to 9 are continuous
753     * functions of \( p \).
754     *
755     * <p>For the continuous functions, the probability \( p_k \) is provided for the \( k \)-th order
756     * statistic in size \( n \). Samples quantiles are equivalently obtained to \( Q(p) \) by
757     * linear interpolation between points \( (p_k, x_k) \) and \( (p_{k+1}, x_{k+1}) \) for
758     * any \( p_k \leq p \leq p_{k+1} \).
759     *
760     * <ol>
761     * <li>Hyndman and Fan (1996)
762     *     <i>Sample Quantiles in Statistical Packages.</i>
763     *     The American Statistician, 50, 361-365.
764     *     <a href="https://www.jstor.org/stable/2684934">doi.org/10.2307/2684934</a>
765     * <li><a href="http://en.wikipedia.org/wiki/Quantile">Quantile (Wikipedia)</a>
766     * </ol>
767     */
768    public enum EstimationMethod {
769        /**
770         * Inverse of the empirical distribution function.
771         *
772         * <p>\( m = 0 \). \( \gamma = 0 \) if \( g = 0 \), and 1 otherwise.
773         */
774        HF1 {
775            @Override
776            double position0(double p, int n) {
777                // position = np + 0. This is 1-based so adjust to 0-based.
778                return Math.ceil(n * p) - 1;
779            }
780        },
781        /**
782         * Similar to {@link #HF1} with averaging at discontinuities.
783         *
784         * <p>\( m = 0 \). \( \gamma = 0.5 \) if \( g = 0 \), and 1 otherwise.
785         */
786        HF2 {
787            @Override
788            double position0(double p, int n) {
789                final double pos = n * p;
790                // Average at discontinuities
791                final int j = (int) pos;
792                final double g = pos - j;
793                if (g == 0) {
794                    return j - 0.5;
795                }
796                // As HF1 : ceil(j + g) - 1
797                return j;
798            }
799        },
800        /**
801         * The observation closest to \( np \). Ties are resolved to the nearest even order statistic.
802         *
803         * <p>\( m = -1/2 \). \( \gamma = 0 \) if \( g = 0 \) and \( j \) is even, and 1 otherwise.
804         */
805        HF3 {
806            @Override
807            double position0(double p, int n) {
808                // Let rint do the work for ties to even
809                return Math.rint(n * p) - 1;
810            }
811        },
812        /**
813         * Linear interpolation of the inverse of the empirical CDF.
814         *
815         * <p>\( m = 0 \). \( p_k = \frac{k}{n} \).
816         */
817        HF4 {
818            @Override
819            double position0(double p, int n) {
820                // np + 0 - 1
821                return n * p - 1;
822            }
823        },
824        /**
825         * A piecewise linear function where the knots are the values midway through the steps of
826         * the empirical CDF. Proposed by Hazen (1914) and popular amongst hydrologists.
827         *
828         * <p>\( m = 1/2 \). \( p_k = \frac{k - 1/2}{n} \).
829         */
830        HF5 {
831            @Override
832            double position0(double p, int n) {
833                // np + 0.5 - 1
834                return n * p - 0.5;
835            }
836        },
837        /**
838         * Linear interpolation of the expectations for the order statistics for the uniform
839         * distribution on [0,1]. Proposed by Weibull (1939).
840         *
841         * <p>\( m = p \). \( p_k = \frac{k}{n + 1} \).
842         *
843         * <p>This method computes the quantile as per the Apache Commons Math Percentile
844         * legacy implementation.
845         */
846        HF6 {
847            @Override
848            double position0(double p, int n) {
849                // np + p - 1
850                return (n + 1) * p - 1;
851            }
852        },
853        /**
854         * Linear interpolation of the modes for the order statistics for the uniform
855         * distribution on [0,1]. Proposed by Gumbull (1939).
856         *
857         * <p>\( m = 1 - p \). \( p_k = \frac{k - 1}{n - 1} \).
858         */
859        HF7 {
860            @Override
861            double position0(double p, int n) {
862                // np + 1-p - 1
863                return (n - 1) * p;
864            }
865        },
866        /**
867         * Linear interpolation of the approximate medians for order statistics.
868         *
869         * <p>\( m = (p + 1)/3 \). \( p_k = \frac{k - 1/3}{n + 1/3} \).
870         *
871         * <p>As per Hyndman and Fan (1996) this approach is most recommended as it provides
872         * an approximate median-unbiased estimate regardless of distribution.
873         */
874        HF8 {
875            @Override
876            double position0(double p, int n) {
877                return n * p + (p + 1) / 3 - 1;
878            }
879        },
880        /**
881         * Quantile estimates are approximately unbiased for the expected order statistics if
882         * \( x \) is normally distributed.
883         *
884         * <p>\( m = p/4 + 3/8 \). \( p_k = \frac{k - 3/8}{n + 1/4} \).
885         */
886        HF9 {
887            @Override
888            double position0(double p, int n) {
889                // np + p/4 + 3/8 - 1
890                return (n + 0.25) * p - 0.625;
891            }
892        };
893
894        /**
895         * Finds the real-valued position for calculation of the quantile.
896         *
897         * <p>Return {@code i + g} such that the quantile value from sorted data is:
898         *
899         * <p>value = data[i] + g * (data[i+1] - data[i])
900         *
901         * <p>Warning: Interpolation should not use {@code data[i+1]} unless {@code g != 0}.
902         *
903         * <p>Note: In contrast to the definition of Hyndman and Fan in the class header
904         * which uses a 1-based position, this is a zero based index. This change is for
905         * convenience when addressing array positions.
906         *
907         * @param p p<sup>th</sup> quantile.
908         * @param n Size.
909         * @return a real-valued position (0-based) into the range {@code [0, n)}
910         */
911        abstract double position0(double p, int n);
912
913        /**
914         * Finds the index {@code i} and fractional part {@code g} of a real-valued position
915         * to interpolate the quantile.
916         *
917         * <p>Return {@code i + g} such that the quantile value from sorted data is:
918         *
919         * <p>value = data[i] + g * (data[i+1] - data[i])
920         *
921         * <p>Note: Interpolation should not use {@code data[i+1]} unless {@code g != 0}.
922         *
923         * @param p p<sup>th</sup> quantile.
924         * @param n Size.
925         * @return index (in [0, n-1])
926         */
927        final double index(double p, int n) {
928            final double pos = position0(p, n);
929            // Bounds check in [0, n-1]
930            if (pos < 0) {
931                return 0;
932            }
933            if (pos > n - 1.0) {
934                return n - 1.0;
935            }
936            return pos;
937        }
938    }
939}