View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.apache.commons.statistics.descriptive;
18  
19  import java.util.Arrays;
20  import java.util.Objects;
21  import java.util.function.IntToDoubleFunction;
22  import org.apache.commons.numbers.arrays.Selection;
23  
24  /**
25   * Provides quantile computation.
26   *
27   * <p>For values of length {@code n}:
28   * <ul>
29   * <li>The result is {@code NaN} if {@code n = 0}.
30   * <li>The result is {@code values[0]} if {@code n = 1}.
31   * <li>Otherwise the result is computed using the {@link EstimationMethod}.
32   * </ul>
33   *
34   * <p>Computation of multiple quantiles and will handle duplicate and unordered
35   * probabilities. Passing ordered probabilities is recommended if the order is already
36   * known as this can improve efficiency; for example using uniform spacing through the
37   * array data, or to identify extreme values from the data such as {@code [0.001, 0.999]}.
38   *
39   * <p>This implementation respects the ordering imposed by
40   * {@link Double#compare(double, double)} for {@code NaN} values. If a {@code NaN} occurs
41   * in the selected positions in the fully sorted values then the result is {@code NaN}.
42   *
43   * <p>The {@link NaNPolicy} can be used to change the behaviour on {@code NaN} values.
44   *
45   * <p>Instances of this class are immutable and thread-safe.
46   *
47   * @see #with(NaNPolicy)
48   * @see <a href="http://en.wikipedia.org/wiki/Quantile">Quantile (Wikipedia)</a>
49   * @since 1.1
50   */
51  public final class Quantile {
52      /** Message when the probability is not in the range {@code [0, 1]}. */
53      private static final String INVALID_PROBABILITY = "Invalid probability: ";
54      /** Message when no probabilities are provided for the varargs method. */
55      private static final String NO_PROBABILITIES_SPECIFIED = "No probabilities specified";
56      /** Message when the size is not valid. */
57      private static final String INVALID_SIZE = "Invalid size: ";
58      /** Message when the number of probabilities in a range is not valid. */
59      private static final String INVALID_NUMBER_OF_PROBABILITIES = "Invalid number of probabilities: ";
60  
61      /** Default instance. Method 8 is recommended by Hyndman and Fan. */
62      private static final Quantile DEFAULT = new Quantile(false, NaNPolicy.INCLUDE, EstimationMethod.HF8);
63  
64      /** Flag to indicate if the data should be copied. */
65      private final boolean copy;
66      /** NaN policy for floating point data. */
67      private final NaNPolicy nanPolicy;
68      /** Transformer for NaN data. */
69      private final NaNTransformer nanTransformer;
70      /** Estimation type used to determine the value from the quantile. */
71      private final EstimationMethod estimationType;
72  
73      /**
74       * @param copy Flag to indicate if the data should be copied.
75       * @param nanPolicy NaN policy.
76       * @param estimationType Estimation type used to determine the value from the quantile.
77       */
78      private Quantile(boolean copy, NaNPolicy nanPolicy, EstimationMethod estimationType) {
79          this.copy = copy;
80          this.nanPolicy = nanPolicy;
81          this.estimationType = estimationType;
82          nanTransformer = NaNTransformers.createNaNTransformer(nanPolicy, copy);
83      }
84  
85      /**
86       * Return a new instance with the default options.
87       *
88       * <ul>
89       * <li>{@linkplain #withCopy(boolean) Copy = false}
90       * <li>{@linkplain #with(NaNPolicy) NaN policy = include}
91       * <li>{@linkplain #with(EstimationMethod) Estimation method = HF8}
92       * </ul>
93       *
94       * <p>Note: The default options configure for processing in-place and including
95       * {@code NaN} values in the data. This is the most efficient mode and has the
96       * smallest memory consumption.
97       *
98       * @return the quantile implementation
99       * @see #withCopy(boolean)
100      * @see #with(NaNPolicy)
101      * @see #with(EstimationMethod)
102      */
103     public static Quantile withDefaults() {
104         return DEFAULT;
105     }
106 
107     /**
108      * Return an instance with the configured copy behaviour. If {@code false} then
109      * the input array will be modified by the call to evaluate the quantiles; otherwise
110      * the computation uses a copy of the data.
111      *
112      * @param v Value.
113      * @return an instance
114      */
115     public Quantile withCopy(boolean v) {
116         return new Quantile(v, nanPolicy, estimationType);
117     }
118 
119     /**
120      * Return an instance with the configured {@link NaNPolicy}.
121      *
122      * <p>Note: This implementation respects the ordering imposed by
123      * {@link Double#compare(double, double)} for {@code NaN} values: {@code NaN} is
124      * considered greater than all other values, and all {@code NaN} values are equal. The
125      * {@link NaNPolicy} changes the computation of the statistic in the presence of
126      * {@code NaN} values.
127      *
128      * <ul>
129      * <li>{@link NaNPolicy#INCLUDE}: {@code NaN} values are moved to the end of the data;
130      * the size of the data <em>includes</em> the {@code NaN} values and the quantile will be
131      * {@code NaN} if any value used for quantile interpolation is {@code NaN}.
132      * <li>{@link NaNPolicy#EXCLUDE}: {@code NaN} values are moved to the end of the data;
133      * the size of the data <em>excludes</em> the {@code NaN} values and the quantile will
134      * never be {@code NaN} for non-zero size. If all data are {@code NaN} then the size is zero
135      * and the result is {@code NaN}.
136      * <li>{@link NaNPolicy#ERROR}: An exception is raised if the data contains {@code NaN}
137      * values.
138      * </ul>
139      *
140      * <p>Note that the result is identical for all policies if no {@code NaN} values are present.
141      *
142      * @param v Value.
143      * @return an instance
144      */
145     public Quantile with(NaNPolicy v) {
146         return new Quantile(copy, Objects.requireNonNull(v), estimationType);
147     }
148 
149     /**
150      * Return an instance with the configured {@link EstimationMethod}.
151      *
152      * @param v Value.
153      * @return an instance
154      */
155     public Quantile with(EstimationMethod v) {
156         return new Quantile(copy, nanPolicy, Objects.requireNonNull(v));
157     }
158 
159     /**
160      * Generate {@code n} evenly spaced probabilities in the range {@code [0, 1]}.
161      *
162      * <pre>
163      * 1/(n + 1), 2/(n + 1), ..., n/(n + 1)
164      * </pre>
165      *
166      * @param n Number of probabilities.
167      * @return the probabilities
168      * @throws IllegalArgumentException if {@code n < 1}
169      */
170     public static double[] probabilities(int n) {
171         checkNumberOfProbabilities(n);
172         final double c1 = n + 1.0;
173         final double[] p = new double[n];
174         for (int i = 0; i < n; i++) {
175             p[i] = (i + 1.0) / c1;
176         }
177         return p;
178     }
179 
180     /**
181      * Generate {@code n} evenly spaced probabilities in the range {@code [p1, p2]}.
182      *
183      * <pre>
184      * w = p2 - p1
185      * p1 + w/(n + 1), p1 + 2w/(n + 1), ..., p1 + nw/(n + 1)
186      * </pre>
187      *
188      * @param n Number of probabilities.
189      * @param p1 Lower probability.
190      * @param p2 Upper probability.
191      * @return the probabilities
192      * @throws IllegalArgumentException if {@code n < 1}; if the probabilities are not in the
193      * range {@code [0, 1]}; or {@code p2 <= p1}.
194      */
195     public static double[] probabilities(int n, double p1, double p2) {
196         checkProbability(p1);
197         checkProbability(p2);
198         if (p2 <= p1) {
199             throw new IllegalArgumentException("Invalid range: [" + p1 + ", " + p2 + "]");
200         }
201         final double[] p = probabilities(n);
202         for (int i = 0; i < n; i++) {
203             p[i] = (1 - p[i]) * p1 + p[i] * p2;
204         }
205         return p;
206     }
207 
208     /**
209      * Evaluate the {@code p}-th quantile of the values.
210      *
211      * <p>Note: This method may partially sort the input values if not configured to
212      * {@link #withCopy(boolean) copy} the input data.
213      *
214      * <p><strong>Performance</strong>
215      *
216      * <p>It is not recommended to use this method for repeat calls for different quantiles
217      * within the same values. The {@link #evaluate(double[], double...)} method should be used
218      * which provides better performance.
219      *
220      * @param values Values.
221      * @param p Probability for the quantile to compute.
222      * @return the quantile
223      * @throws IllegalArgumentException if the probability {@code p} is not in the range {@code [0, 1]};
224      * or if the values contain NaN and the configuration is {@link NaNPolicy#ERROR}
225      * @see #evaluate(double[], double...)
226      * @see #with(NaNPolicy)
227      */
228     public double evaluate(double[] values, double p) {
229         return compute(values, 0, values.length, p);
230     }
231 
232     /**
233      * Evaluate the {@code p}-th quantile of the specified range of values.
234      *
235      * <p>Note: This method may partially sort the input values if not configured to
236      * {@link #withCopy(boolean) copy} the input data.
237      *
238      * <p><strong>Performance</strong>
239      *
240      * <p>It is not recommended to use this method for repeat calls for different quantiles
241      * within the same values. The {@link #evaluateRange(double[], int, int, double...)} method should be used
242      * which provides better performance.
243      *
244      * @param values Values.
245      * @param from Inclusive start of the range.
246      * @param to Exclusive end of the range.
247      * @param p Probability for the quantile to compute.
248      * @return the quantile
249      * @throws IllegalArgumentException if the probability {@code p} is not in the range {@code [0, 1]};
250      * or if the values contain NaN and the configuration is {@link NaNPolicy#ERROR}
251      * @throws IndexOutOfBoundsException if the sub-range is out of bounds
252      * @see #evaluateRange(double[], int, int, double...)
253      * @see #with(NaNPolicy)
254      * @since 1.2
255      */
256     public double evaluateRange(double[] values, int from, int to, double p) {
257         Statistics.checkFromToIndex(from, to, values.length);
258         return compute(values, from, to, p);
259     }
260 
261     /**
262      * Compute the {@code p}-th quantile of the specified range of values.
263      *
264      * @param values Values.
265      * @param from Inclusive start of the range.
266      * @param to Exclusive end of the range.
267      * @param p Probability for the quantile to compute.
268      * @return the quantile
269      * @throws IllegalArgumentException if the probability {@code p} is not in the range {@code [0, 1]}
270      */
271     private double compute(double[] values, int from, int to, double p) {
272         checkProbability(p);
273         // Floating-point data handling
274         final int[] bounds = new int[2];
275         final double[] x = nanTransformer.apply(values, from, to, bounds);
276         final int start = bounds[0];
277         final int end = bounds[1];
278         final int n = end - start;
279         // Special cases
280         if (n <= 1) {
281             return n == 0 ? Double.NaN : x[start];
282         }
283 
284         final double pos = estimationType.index(p, n);
285         final int ip = (int) pos;
286         final int i = start + ip;
287 
288         // Partition and compute
289         if (pos > ip) {
290             Selection.select(x, start, end, new int[] {i, i + 1});
291             return Interpolation.interpolate(x[i], x[i + 1], pos - ip);
292         }
293         Selection.select(x, start, end, i);
294         return x[i];
295     }
296 
297     /**
298      * Evaluate the {@code p}-th quantiles of the values.
299      *
300      * <p>Note: This method may partially sort the input values if not configured to
301      * {@link #withCopy(boolean) copy} the input data.
302      *
303      * @param values Values.
304      * @param p Probabilities for the quantiles to compute.
305      * @return the quantiles
306      * @throws IllegalArgumentException if any probability {@code p} is not in the range {@code [0, 1]};
307      * no probabilities are specified; or if the values contain NaN and the configuration is {@link NaNPolicy#ERROR}
308      * @see #with(NaNPolicy)
309      */
310     public double[] evaluate(double[] values, double... p) {
311         return compute(values, 0, values.length, p);
312     }
313 
314     /**
315      * Evaluate the {@code p}-th quantiles of the specified range of values.
316      *
317      * <p>Note: This method may partially sort the input values if not configured to
318      * {@link #withCopy(boolean) copy} the input data.
319      *
320      * @param values Values.
321      * @param from Inclusive start of the range.
322      * @param to Exclusive end of the range.
323      * @param p Probabilities for the quantiles to compute.
324      * @return the quantiles
325      * @throws IllegalArgumentException if any probability {@code p} is not in the range {@code [0, 1]};
326      * no probabilities are specified; or if the values contain NaN and the configuration is {@link NaNPolicy#ERROR}
327      * @throws IndexOutOfBoundsException if the sub-range is out of bounds
328      * @see #with(NaNPolicy)
329      * @since 1.2
330      */
331     public double[] evaluateRange(double[] values, int from, int to, double... p) {
332         Statistics.checkFromToIndex(from, to, values.length);
333         return compute(values, from, to, p);
334     }
335 
336     /**
337      * Compute the {@code p}-th quantiles of the specified range of values.
338      *
339      * @param values Values.
340      * @param from Inclusive start of the range.
341      * @param to Exclusive end of the range.
342      * @param p Probabilities for the quantiles to compute.
343      * @return the quantiles
344      * @throws IllegalArgumentException if any probability {@code p} is not in the range {@code [0, 1]};
345      * or no probabilities are specified.
346      */
347     private double[] compute(double[] values, int from, int to, double... p) {
348         checkProbabilities(p);
349         // Floating-point data handling
350         final int[] bounds = new int[2];
351         final double[] x = nanTransformer.apply(values, from, to, bounds);
352         final int start = bounds[0];
353         final int end = bounds[1];
354         final int n = end - start;
355         // Special cases
356         final double[] q = new double[p.length];
357         if (n <= 1) {
358             Arrays.fill(q, n == 0 ? Double.NaN : x[start]);
359             return q;
360         }
361 
362         // Collect interpolation positions. We use the output q as storage.
363         final int[] indices = computeIndices(n, p, q, start);
364 
365         // Partition
366         Selection.select(x, start, end, indices);
367 
368         // Compute
369         for (int k = 0; k < p.length; k++) {
370             // ip in [0, n); i in [start, end)
371             final int ip = (int) q[k];
372             final int i = start + ip;
373             if (q[k] > ip) {
374                 q[k] = Interpolation.interpolate(x[i], x[i + 1], q[k] - ip);
375             } else {
376                 q[k] = x[i];
377             }
378         }
379         return q;
380     }
381 
382     /**
383      * Evaluate the {@code p}-th quantile of the values.
384      *
385      * <p>Note: This method may partially sort the input values if not configured to
386      * {@link #withCopy(boolean) copy} the input data.
387      *
388      * <p><strong>Performance</strong>
389      *
390      * <p>It is not recommended to use this method for repeat calls for different quantiles
391      * within the same values. The {@link #evaluate(int[], double...)} method should be used
392      * which provides better performance.
393      *
394      * @param values Values.
395      * @param p Probability for the quantile to compute.
396      * @return the quantile
397      * @throws IllegalArgumentException if the probability {@code p} is not in the range {@code [0, 1]}
398      * @see #evaluate(int[], double...)
399      */
400     public double evaluate(int[] values, double p) {
401         return compute(values, 0, values.length, p);
402     }
403 
404     /**
405      * Evaluate the {@code p}-th quantile of the specified range of values.
406      *
407      * <p>Note: This method may partially sort the input values if not configured to
408      * {@link #withCopy(boolean) copy} the input data.
409      *
410      * <p><strong>Performance</strong>
411      *
412      * <p>It is not recommended to use this method for repeat calls for different quantiles
413      * within the same values. The {@link #evaluateRange(int[], int, int, double...)} method should be used
414      * which provides better performance.
415      *
416      * @param values Values.
417      * @param from Inclusive start of the range.
418      * @param to Exclusive end of the range.
419      * @param p Probability for the quantile to compute.
420      * @return the quantile
421      * @throws IllegalArgumentException if the probability {@code p} is not in the range {@code [0, 1]}
422      * @throws IndexOutOfBoundsException if the sub-range is out of bounds
423      * @see #evaluateRange(int[], int, int, double...)
424      * @since 1.2
425      */
426     public double evaluateRange(int[] values, int from, int to, double p) {
427         Statistics.checkFromToIndex(from, to, values.length);
428         return compute(values, from, to, p);
429     }
430 
431     /**
432      * Compute the {@code p}-th quantile of the specified range of values.
433      *
434      * @param values Values.
435      * @param from Inclusive start of the range.
436      * @param to Exclusive end of the range.
437      * @param p Probability for the quantile to compute.
438      * @return the quantile
439      * @throws IllegalArgumentException if the probability {@code p} is not in the range {@code [0, 1]}
440      */
441     private double compute(int[] values, int from, int to, double p) {
442         checkProbability(p);
443         final int n = to - from;
444         // Special cases
445         if (n <= 1) {
446             return n == 0 ? Double.NaN : values[from];
447         }
448 
449         // Create the range
450         final int[] x;
451         final int start;
452         final int end;
453         if (copy) {
454             x = Statistics.copy(values, from, to);
455             start = 0;
456             end = n;
457         } else {
458             x = values;
459             start = from;
460             end = to;
461         }
462 
463         final double pos = estimationType.index(p, n);
464         final int ip = (int) pos;
465         final int i = start + ip;
466 
467         // Partition and compute
468         if (pos > ip) {
469             Selection.select(x, start, end, new int[] {i, i + 1});
470             return Interpolation.interpolate(x[i], x[i + 1], pos - ip);
471         }
472         Selection.select(x, start, end, i);
473         return x[i];
474     }
475 
476     /**
477      * Evaluate the {@code p}-th quantiles of the values.
478      *
479      * <p>Note: This method may partially sort the input values if not configured to
480      * {@link #withCopy(boolean) copy} the input data.
481      *
482      * @param values Values.
483      * @param p Probabilities for the quantiles to compute.
484      * @return the quantiles
485      * @throws IllegalArgumentException if any probability {@code p} is not in the range {@code [0, 1]};
486      * or no probabilities are specified.
487      */
488     public double[] evaluate(int[] values, double... p) {
489         return compute(values, 0, values.length, p);
490     }
491 
492     /**
493      * Evaluate the {@code p}-th quantiles of the specified range of values..
494      *
495      * <p>Note: This method may partially sort the input values if not configured to
496      * {@link #withCopy(boolean) copy} the input data.
497      *
498      * @param values Values.
499      * @param from Inclusive start of the range.
500      * @param to Exclusive end of the range.
501      * @param p Probabilities for the quantiles to compute.
502      * @return the quantiles
503      * @throws IllegalArgumentException if any probability {@code p} is not in the range {@code [0, 1]};
504      * or no probabilities are specified.
505      * @throws IndexOutOfBoundsException if the sub-range is out of bounds
506      * @since 1.2
507      */
508     public double[] evaluateRange(int[] values, int from, int to, double... p) {
509         Statistics.checkFromToIndex(from, to, values.length);
510         return compute(values, from, to, p);
511     }
512 
513     /**
514      * Evaluate the {@code p}-th quantiles of the specified range of values..
515      *
516      * <p>Note: This method may partially sort the input values if not configured to
517      * {@link #withCopy(boolean) copy} the input data.
518      *
519      * @param values Values.
520      * @param from Inclusive start of the range.
521      * @param to Exclusive end of the range.
522      * @param p Probabilities for the quantiles to compute.
523      * @return the quantiles
524      * @throws IllegalArgumentException if any probability {@code p} is not in the range {@code [0, 1]};
525      * or no probabilities are specified.
526      */
527     private double[] compute(int[] values, int from, int to, double... p) {
528         checkProbabilities(p);
529         final int n = to - from;
530         // Special cases
531         final double[] q = new double[p.length];
532         if (n <= 1) {
533             Arrays.fill(q, n == 0 ? Double.NaN : values[from]);
534             return q;
535         }
536 
537         // Create the range
538         final int[] x;
539         final int start;
540         final int end;
541         if (copy) {
542             x = Statistics.copy(values, from, to);
543             start = 0;
544             end = n;
545         } else {
546             x = values;
547             start = from;
548             end = to;
549         }
550 
551         // Collect interpolation positions. We use the output q as storage.
552         final int[] indices = computeIndices(n, p, q, start);
553 
554         // Partition
555         Selection.select(x, start, end, indices);
556 
557         // Compute
558         for (int k = 0; k < p.length; k++) {
559             // ip in [0, n); i in [start, end)
560             final int ip = (int) q[k];
561             final int i = start + ip;
562             if (q[k] > ip) {
563                 q[k] = Interpolation.interpolate(x[i], x[i + 1], q[k] - ip);
564             } else {
565                 q[k] = x[i];
566             }
567         }
568         return q;
569     }
570 
571     /**
572      * Evaluate the {@code p}-th quantile of the values.
573      *
574      * <p>This method can be used when the values of known size are already sorted.
575      *
576      * <pre>{@code
577      * short[] x = ...
578      * Arrays.sort(x);
579      * double q = Quantile.withDefaults().evaluate(x.length, i -> x[i], 0.05);
580      * }</pre>
581      *
582      * @param n Size of the values.
583      * @param values Values function.
584      * @param p Probability for the quantile to compute.
585      * @return the quantile
586      * @throws IllegalArgumentException if {@code size < 0}; or if the probability {@code p} is
587      * not in the range {@code [0, 1]}.
588      */
589     public double evaluate(int n, IntToDoubleFunction values, double p) {
590         checkSize(n);
591         checkProbability(p);
592         // Special case
593         if (n <= 1) {
594             return n == 0 ? Double.NaN : values.applyAsDouble(0);
595         }
596         final double pos = estimationType.index(p, n);
597         final int i = (int) pos;
598         final double v1 = values.applyAsDouble(i);
599         if (pos > i) {
600             final double v2 = values.applyAsDouble(i + 1);
601             return Interpolation.interpolate(v1, v2, pos - i);
602         }
603         return v1;
604     }
605 
606     /**
607      * Evaluate the {@code p}-th quantiles of the values.
608      *
609      * <p>This method can be used when the values of known size are already sorted.
610      *
611      * <pre>{@code
612      * short[] x = ...
613      * Arrays.sort(x);
614      * double[] q = Quantile.withDefaults().evaluate(x.length, i -> x[i], 0.25, 0.5, 0.75);
615      * }</pre>
616      *
617      * @param n Size of the values.
618      * @param values Values function.
619      * @param p Probabilities for the quantiles to compute.
620      * @return the quantiles
621      * @throws IllegalArgumentException if {@code size < 0}; if any probability {@code p} is
622      * not in the range {@code [0, 1]}; or no probabilities are specified.
623      */
624     public double[] evaluate(int n, IntToDoubleFunction values, double... p) {
625         checkSize(n);
626         checkProbabilities(p);
627         // Special case
628         final double[] q = new double[p.length];
629         if (n <= 1) {
630             Arrays.fill(q, n == 0 ? Double.NaN : values.applyAsDouble(0));
631             return q;
632         }
633         for (int k = 0; k < p.length; k++) {
634             final double pos = estimationType.index(p[k], n);
635             final int i = (int) pos;
636             final double v1 = values.applyAsDouble(i);
637             if (pos > i) {
638                 final double v2 = values.applyAsDouble(i + 1);
639                 q[k] = Interpolation.interpolate(v1, v2, pos - i);
640             } else {
641                 q[k] = v1;
642             }
643         }
644         return q;
645     }
646 
647     /**
648      * Check the probability {@code p} is in the range {@code [0, 1]}.
649      *
650      * @param p Probability for the quantile to compute.
651      * @throws IllegalArgumentException if the probability is not in the range {@code [0, 1]}
652      */
653     private static void checkProbability(double p) {
654         // Logic negation will detect NaN
655         if (!(p >= 0 && p <= 1)) {
656             throw new IllegalArgumentException(INVALID_PROBABILITY + p);
657         }
658     }
659 
660     /**
661      * Check the probabilities {@code p} are in the range {@code [0, 1]}.
662      *
663      * @param p Probabilities for the quantiles to compute.
664      * @throws IllegalArgumentException if any probabilities {@code p} is not in the range {@code [0, 1]};
665      * or no probabilities are specified.
666      */
667     private static void checkProbabilities(double... p) {
668         if (p.length == 0) {
669             throw new IllegalArgumentException(NO_PROBABILITIES_SPECIFIED);
670         }
671         for (final double pp : p) {
672             checkProbability(pp);
673         }
674     }
675 
676     /**
677      * Check the {@code size} is positive.
678      *
679      * @param n Size of the values.
680      * @throws IllegalArgumentException if {@code size < 0}
681      */
682     private static void checkSize(int n) {
683         if (n < 0) {
684             throw new IllegalArgumentException(INVALID_SIZE + n);
685         }
686     }
687 
688     /**
689      * Check the number of probabilities {@code n} is strictly positive.
690      *
691      * @param n Number of probabilities.
692      * @throws IllegalArgumentException if {@code c < 1}
693      */
694     private static void checkNumberOfProbabilities(int n) {
695         if (n < 1) {
696             throw new IllegalArgumentException(INVALID_NUMBER_OF_PROBABILITIES + n);
697         }
698     }
699 
700     /**
701      * Compute the indices required for quantile interpolation.
702      *
703      * <p>The zero-based interpolation index in {@code [0, n)} is
704      * saved into the working array {@code q} for each {@code p}.
705      *
706      * <p>The indices are incremented by the provided {@code offset} to allow
707      * addressing sub-ranges of a larger array.
708      *
709      * @param n Size of the data.
710      * @param p Probabilities for the quantiles to compute.
711      * @param q Working array for quantiles in {@code [0, n)}.
712      * @param offset Array offset.
713      * @return the indices in {@code [offset, offset + n)}
714      */
715     private int[] computeIndices(int n, double[] p, double[] q, int offset) {
716         final int[] indices = new int[p.length << 1];
717         int count = 0;
718         for (int k = 0; k < p.length; k++) {
719             final double pos = estimationType.index(p[k], n);
720             q[k] = pos;
721             final int i = (int) pos;
722             indices[count++] = offset + i;
723             if (pos > i) {
724                 // Require the next index for interpolation
725                 indices[count++] = offset + i + 1;
726             }
727         }
728         if (count < indices.length) {
729             return Arrays.copyOf(indices, count);
730         }
731         return indices;
732     }
733 
734     /**
735      * Estimation methods for a quantile. Provides the nine quantile algorithms
736      * defined in Hyndman and Fan (1996)[1] as {@code HF1 - HF9}.
737      *
738      * <p>Samples quantiles are defined by:
739      *
740      * <p>\[ Q(p) = (1 - \gamma) x_j + \gamma x_{j+1} \]
741      *
742      * <p>where \( \frac{j-m}{n} \leq p \le \frac{j-m+1}{n} \), \( x_j \) is the \( j \)th
743      * order statistic, \( n \) is the sample size, the value of \( \gamma \) is a function
744      * of \( j = \lfloor np+m \rfloor \) and \( g = np + m - j \), and \( m \) is a constant
745      * determined by the sample quantile type.
746      *
747      * <p>Note that the real-valued position \( np + m \) is a 1-based index and
748      * \( j \in [1, n] \). If the real valued position is computed as beyond the lowest or
749      * highest values in the sample, this implementation will return the minimum or maximum
750      * observation respectively.
751      *
752      * <p>Types 1, 2, and 3 are discontinuous functions of \( p \); types 4 to 9 are continuous
753      * functions of \( p \).
754      *
755      * <p>For the continuous functions, the probability \( p_k \) is provided for the \( k \)-th order
756      * statistic in size \( n \). Samples quantiles are equivalently obtained to \( Q(p) \) by
757      * linear interpolation between points \( (p_k, x_k) \) and \( (p_{k+1}, x_{k+1}) \) for
758      * any \( p_k \leq p \leq p_{k+1} \).
759      *
760      * <ol>
761      * <li>Hyndman and Fan (1996)
762      *     <i>Sample Quantiles in Statistical Packages.</i>
763      *     The American Statistician, 50, 361-365.
764      *     <a href="https://www.jstor.org/stable/2684934">doi.org/10.2307/2684934</a>
765      * <li><a href="http://en.wikipedia.org/wiki/Quantile">Quantile (Wikipedia)</a>
766      * </ol>
767      */
768     public enum EstimationMethod {
769         /**
770          * Inverse of the empirical distribution function.
771          *
772          * <p>\( m = 0 \). \( \gamma = 0 \) if \( g = 0 \), and 1 otherwise.
773          */
774         HF1 {
775             @Override
776             double position0(double p, int n) {
777                 // position = np + 0. This is 1-based so adjust to 0-based.
778                 return Math.ceil(n * p) - 1;
779             }
780         },
781         /**
782          * Similar to {@link #HF1} with averaging at discontinuities.
783          *
784          * <p>\( m = 0 \). \( \gamma = 0.5 \) if \( g = 0 \), and 1 otherwise.
785          */
786         HF2 {
787             @Override
788             double position0(double p, int n) {
789                 final double pos = n * p;
790                 // Average at discontinuities
791                 final int j = (int) pos;
792                 final double g = pos - j;
793                 if (g == 0) {
794                     return j - 0.5;
795                 }
796                 // As HF1 : ceil(j + g) - 1
797                 return j;
798             }
799         },
800         /**
801          * The observation closest to \( np \). Ties are resolved to the nearest even order statistic.
802          *
803          * <p>\( m = -1/2 \). \( \gamma = 0 \) if \( g = 0 \) and \( j \) is even, and 1 otherwise.
804          */
805         HF3 {
806             @Override
807             double position0(double p, int n) {
808                 // Let rint do the work for ties to even
809                 return Math.rint(n * p) - 1;
810             }
811         },
812         /**
813          * Linear interpolation of the inverse of the empirical CDF.
814          *
815          * <p>\( m = 0 \). \( p_k = \frac{k}{n} \).
816          */
817         HF4 {
818             @Override
819             double position0(double p, int n) {
820                 // np + 0 - 1
821                 return n * p - 1;
822             }
823         },
824         /**
825          * A piecewise linear function where the knots are the values midway through the steps of
826          * the empirical CDF. Proposed by Hazen (1914) and popular amongst hydrologists.
827          *
828          * <p>\( m = 1/2 \). \( p_k = \frac{k - 1/2}{n} \).
829          */
830         HF5 {
831             @Override
832             double position0(double p, int n) {
833                 // np + 0.5 - 1
834                 return n * p - 0.5;
835             }
836         },
837         /**
838          * Linear interpolation of the expectations for the order statistics for the uniform
839          * distribution on [0,1]. Proposed by Weibull (1939).
840          *
841          * <p>\( m = p \). \( p_k = \frac{k}{n + 1} \).
842          *
843          * <p>This method computes the quantile as per the Apache Commons Math Percentile
844          * legacy implementation.
845          */
846         HF6 {
847             @Override
848             double position0(double p, int n) {
849                 // np + p - 1
850                 return (n + 1) * p - 1;
851             }
852         },
853         /**
854          * Linear interpolation of the modes for the order statistics for the uniform
855          * distribution on [0,1]. Proposed by Gumbull (1939).
856          *
857          * <p>\( m = 1 - p \). \( p_k = \frac{k - 1}{n - 1} \).
858          */
859         HF7 {
860             @Override
861             double position0(double p, int n) {
862                 // np + 1-p - 1
863                 return (n - 1) * p;
864             }
865         },
866         /**
867          * Linear interpolation of the approximate medians for order statistics.
868          *
869          * <p>\( m = (p + 1)/3 \). \( p_k = \frac{k - 1/3}{n + 1/3} \).
870          *
871          * <p>As per Hyndman and Fan (1996) this approach is most recommended as it provides
872          * an approximate median-unbiased estimate regardless of distribution.
873          */
874         HF8 {
875             @Override
876             double position0(double p, int n) {
877                 return n * p + (p + 1) / 3 - 1;
878             }
879         },
880         /**
881          * Quantile estimates are approximately unbiased for the expected order statistics if
882          * \( x \) is normally distributed.
883          *
884          * <p>\( m = p/4 + 3/8 \). \( p_k = \frac{k - 3/8}{n + 1/4} \).
885          */
886         HF9 {
887             @Override
888             double position0(double p, int n) {
889                 // np + p/4 + 3/8 - 1
890                 return (n + 0.25) * p - 0.625;
891             }
892         };
893 
894         /**
895          * Finds the real-valued position for calculation of the quantile.
896          *
897          * <p>Return {@code i + g} such that the quantile value from sorted data is:
898          *
899          * <p>value = data[i] + g * (data[i+1] - data[i])
900          *
901          * <p>Warning: Interpolation should not use {@code data[i+1]} unless {@code g != 0}.
902          *
903          * <p>Note: In contrast to the definition of Hyndman and Fan in the class header
904          * which uses a 1-based position, this is a zero based index. This change is for
905          * convenience when addressing array positions.
906          *
907          * @param p p<sup>th</sup> quantile.
908          * @param n Size.
909          * @return a real-valued position (0-based) into the range {@code [0, n)}
910          */
911         abstract double position0(double p, int n);
912 
913         /**
914          * Finds the index {@code i} and fractional part {@code g} of a real-valued position
915          * to interpolate the quantile.
916          *
917          * <p>Return {@code i + g} such that the quantile value from sorted data is:
918          *
919          * <p>value = data[i] + g * (data[i+1] - data[i])
920          *
921          * <p>Note: Interpolation should not use {@code data[i+1]} unless {@code g != 0}.
922          *
923          * @param p p<sup>th</sup> quantile.
924          * @param n Size.
925          * @return index (in [0, n-1])
926          */
927         final double index(double p, int n) {
928             final double pos = position0(p, n);
929             // Bounds check in [0, n-1]
930             if (pos < 0) {
931                 return 0;
932             }
933             if (pos > n - 1.0) {
934                 return n - 1.0;
935             }
936             return pos;
937         }
938     }
939 }