View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.apache.commons.math3.stat.descriptive.rank;
18  
19  import java.io.Serializable;
20  import java.util.Arrays;
21  import java.util.BitSet;
22  
23  import org.apache.commons.math3.exception.MathIllegalArgumentException;
24  import org.apache.commons.math3.exception.MathUnsupportedOperationException;
25  import org.apache.commons.math3.exception.NullArgumentException;
26  import org.apache.commons.math3.exception.OutOfRangeException;
27  import org.apache.commons.math3.exception.util.LocalizedFormats;
28  import org.apache.commons.math3.stat.descriptive.AbstractUnivariateStatistic;
29  import org.apache.commons.math3.stat.ranking.NaNStrategy;
30  import org.apache.commons.math3.util.FastMath;
31  import org.apache.commons.math3.util.KthSelector;
32  import org.apache.commons.math3.util.MathArrays;
33  import org.apache.commons.math3.util.MathUtils;
34  import org.apache.commons.math3.util.MedianOf3PivotingStrategy;
35  import org.apache.commons.math3.util.PivotingStrategyInterface;
36  import org.apache.commons.math3.util.Precision;
37  
38  /**
39   * Provides percentile computation.
40   * <p>
41   * There are several commonly used methods for estimating percentiles (a.k.a.
42   * quantiles) based on sample data.  For large samples, the different methods
43   * agree closely, but when sample sizes are small, different methods will give
44   * significantly different results.  The algorithm implemented here works as follows:
45   * <ol>
46   * <li>Let <code>n</code> be the length of the (sorted) array and
47   * <code>0 < p <= 100</code> be the desired percentile.</li>
48   * <li>If <code> n = 1 </code> return the unique array element (regardless of
49   * the value of <code>p</code>); otherwise </li>
50   * <li>Compute the estimated percentile position
51   * <code> pos = p * (n + 1) / 100</code> and the difference, <code>d</code>
52   * between <code>pos</code> and <code>floor(pos)</code> (i.e. the fractional
53   * part of <code>pos</code>).</li>
54   * <li> If <code>pos < 1</code> return the smallest element in the array.</li>
55   * <li> Else if <code>pos >= n</code> return the largest element in the array.</li>
56   * <li> Else let <code>lower</code> be the element in position
57   * <code>floor(pos)</code> in the array and let <code>upper</code> be the
58   * next element in the array.  Return <code>lower + d * (upper - lower)</code>
59   * </li>
60   * </ol></p>
61   * <p>
62   * To compute percentiles, the data must be at least partially ordered.  Input
63   * arrays are copied and recursively partitioned using an ordering definition.
64   * The ordering used by <code>Arrays.sort(double[])</code> is the one determined
65   * by {@link java.lang.Double#compareTo(Double)}.  This ordering makes
66   * <code>Double.NaN</code> larger than any other value (including
67   * <code>Double.POSITIVE_INFINITY</code>).  Therefore, for example, the median
68   * (50th percentile) of
69   * <code>{0, 1, 2, 3, 4, Double.NaN}</code> evaluates to <code>2.5.</code></p>
70   * <p>
71   * Since percentile estimation usually involves interpolation between array
72   * elements, arrays containing  <code>NaN</code> or infinite values will often
73   * result in <code>NaN</code> or infinite values returned.</p>
74   * <p>
75   * Further, to include different estimation types such as R1, R2 as mentioned in
76   * <a href="http://en.wikipedia.org/wiki/Quantile">Quantile page(wikipedia)</a>,
77   * a type specific NaN handling strategy is used to closely match with the
78   * typically observed results from popular tools like R(R1-R9), Excel(R7).</p>
79   * <p>
80   * Since 2.2, Percentile uses only selection instead of complete sorting
81   * and caches selection algorithm state between calls to the various
82   * {@code evaluate} methods. This greatly improves efficiency, both for a single
83   * percentile and multiple percentile computations. To maximize performance when
84   * multiple percentiles are computed based on the same data, users should set the
85   * data array once using either one of the {@link #evaluate(double[], double)} or
86   * {@link #setData(double[])} methods and thereafter {@link #evaluate(double)}
87   * with just the percentile provided.
88   * </p>
89   * <p>
90   * <strong>Note that this implementation is not synchronized.</strong> If
91   * multiple threads access an instance of this class concurrently, and at least
92   * one of the threads invokes the <code>increment()</code> or
93   * <code>clear()</code> method, it must be synchronized externally.</p>
94   *
95   */
96  public class Percentile extends AbstractUnivariateStatistic implements Serializable {
97  
98      /** Serializable version identifier */
99      private static final long serialVersionUID = -8091216485095130416L;
100 
101     /** Maximum number of partitioning pivots cached (each level double the number of pivots). */
102     private static final int MAX_CACHED_LEVELS = 10;
103 
104     /** Maximum number of cached pivots in the pivots cached array */
105     private static final int PIVOTS_HEAP_LENGTH = 0x1 << MAX_CACHED_LEVELS - 1;
106 
107     /** Default KthSelector used with default pivoting strategy */
108     private final KthSelector kthSelector;
109 
110     /** Any of the {@link EstimationType}s such as {@link EstimationType#LEGACY CM} can be used. */
111     private final EstimationType estimationType;
112 
113     /** NaN Handling of the input as defined by {@link NaNStrategy} */
114     private final NaNStrategy nanStrategy;
115 
116     /** Determines what percentile is computed when evaluate() is activated
117      * with no quantile argument */
118     private double quantile;
119 
120     /** Cached pivots. */
121     private int[] cachedPivots;
122 
123     /**
124      * Constructs a Percentile with the following defaults.
125      * <ul>
126      *   <li>default quantile: 50.0, can be reset with {@link #setQuantile(double)}</li>
127      *   <li>default estimation type: {@link EstimationType#LEGACY},
128      *   can be reset with {@link #withEstimationType(EstimationType)}</li>
129      *   <li>default NaN strategy: {@link NaNStrategy#REMOVED},
130      *   can be reset with {@link #withNaNStrategy(NaNStrategy)}</li>
131      *   <li>a KthSelector that makes use of {@link MedianOf3PivotingStrategy},
132      *   can be reset with {@link #withKthSelector(KthSelector)}</li>
133      * </ul>
134      */
135     public Percentile() {
136         // No try-catch or advertised exception here - arg is valid
137         this(50.0);
138     }
139 
140     /**
141      * Constructs a Percentile with the specific quantile value and the following
142      * <ul>
143      *   <li>default method type: {@link EstimationType#LEGACY}</li>
144      *   <li>default NaN strategy: {@link NaNStrategy#REMOVED}</li>
145      *   <li>a Kth Selector : {@link KthSelector}</li>
146      * </ul>
147      * @param quantile the quantile
148      * @throws MathIllegalArgumentException  if p is not greater than 0 and less
149      * than or equal to 100
150      */
151     public Percentile(final double quantile) throws MathIllegalArgumentException {
152         this(quantile, EstimationType.LEGACY, NaNStrategy.REMOVED,
153              new KthSelector(new MedianOf3PivotingStrategy()));
154     }
155 
156     /**
157      * Copy constructor, creates a new {@code Percentile} identical
158      * to the {@code original}
159      *
160      * @param original the {@code Percentile} instance to copy
161      * @throws NullArgumentException if original is null
162      */
163     public Percentile(final Percentile original) throws NullArgumentException {
164 
165         MathUtils.checkNotNull(original);
166         estimationType   = original.getEstimationType();
167         nanStrategy      = original.getNaNStrategy();
168         kthSelector      = original.getKthSelector();
169 
170         setData(original.getDataRef());
171         if (original.cachedPivots != null) {
172             System.arraycopy(original.cachedPivots, 0, cachedPivots, 0, original.cachedPivots.length);
173         }
174         setQuantile(original.quantile);
175 
176     }
177 
178     /**
179      * Constructs a Percentile with the specific quantile value,
180      * {@link EstimationType}, {@link NaNStrategy} and {@link KthSelector}.
181      *
182      * @param quantile the quantile to be computed
183      * @param estimationType one of the percentile {@link EstimationType  estimation types}
184      * @param nanStrategy one of {@link NaNStrategy} to handle with NaNs
185      * @param kthSelector a {@link KthSelector} to use for pivoting during search
186      * @throws MathIllegalArgumentException if p is not within (0,100]
187      * @throws NullArgumentException if type or NaNStrategy passed is null
188      */
189     protected Percentile(final double quantile,
190                          final EstimationType estimationType,
191                          final NaNStrategy nanStrategy,
192                          final KthSelector kthSelector)
193         throws MathIllegalArgumentException {
194         setQuantile(quantile);
195         cachedPivots = null;
196         MathUtils.checkNotNull(estimationType);
197         MathUtils.checkNotNull(nanStrategy);
198         MathUtils.checkNotNull(kthSelector);
199         this.estimationType = estimationType;
200         this.nanStrategy = nanStrategy;
201         this.kthSelector = kthSelector;
202     }
203 
204     /** {@inheritDoc} */
205     @Override
206     public void setData(final double[] values) {
207         if (values == null) {
208             cachedPivots = null;
209         } else {
210             cachedPivots = new int[PIVOTS_HEAP_LENGTH];
211             Arrays.fill(cachedPivots, -1);
212         }
213         super.setData(values);
214     }
215 
216     /** {@inheritDoc} */
217     @Override
218     public void setData(final double[] values, final int begin, final int length)
219     throws MathIllegalArgumentException {
220         if (values == null) {
221             cachedPivots = null;
222         } else {
223             cachedPivots = new int[PIVOTS_HEAP_LENGTH];
224             Arrays.fill(cachedPivots, -1);
225         }
226         super.setData(values, begin, length);
227     }
228 
229     /**
230      * Returns the result of evaluating the statistic over the stored data.
231      * <p>
232      * The stored array is the one which was set by previous calls to
233      * {@link #setData(double[])}
234      * </p>
235      * @param p the percentile value to compute
236      * @return the value of the statistic applied to the stored data
237      * @throws MathIllegalArgumentException if p is not a valid quantile value
238      * (p must be greater than 0 and less than or equal to 100)
239      */
240     public double evaluate(final double p) throws MathIllegalArgumentException {
241         return evaluate(getDataRef(), p);
242     }
243 
244     /**
245      * Returns an estimate of the <code>p</code>th percentile of the values
246      * in the <code>values</code> array.
247      * <p>
248      * Calls to this method do not modify the internal <code>quantile</code>
249      * state of this statistic.</p>
250      * <p>
251      * <ul>
252      * <li>Returns <code>Double.NaN</code> if <code>values</code> has length
253      * <code>0</code></li>
254      * <li>Returns (for any value of <code>p</code>) <code>values[0]</code>
255      *  if <code>values</code> has length <code>1</code></li>
256      * <li>Throws <code>MathIllegalArgumentException</code> if <code>values</code>
257      * is null or p is not a valid quantile value (p must be greater than 0
258      * and less than or equal to 100) </li>
259      * </ul></p>
260      * <p>
261      * See {@link Percentile} for a description of the percentile estimation
262      * algorithm used.</p>
263      *
264      * @param values input array of values
265      * @param p the percentile value to compute
266      * @return the percentile value or Double.NaN if the array is empty
267      * @throws MathIllegalArgumentException if <code>values</code> is null
268      *     or p is invalid
269      */
270     public double evaluate(final double[] values, final double p)
271     throws MathIllegalArgumentException {
272         test(values, 0, 0);
273         return evaluate(values, 0, values.length, p);
274     }
275 
276     /**
277      * Returns an estimate of the <code>quantile</code>th percentile of the
278      * designated values in the <code>values</code> array.  The quantile
279      * estimated is determined by the <code>quantile</code> property.
280      * <p>
281      * <ul>
282      * <li>Returns <code>Double.NaN</code> if <code>length = 0</code></li>
283      * <li>Returns (for any value of <code>quantile</code>)
284      * <code>values[begin]</code> if <code>length = 1 </code></li>
285      * <li>Throws <code>MathIllegalArgumentException</code> if <code>values</code>
286      * is null, or <code>start</code> or <code>length</code> is invalid</li>
287      * </ul></p>
288      * <p>
289      * See {@link Percentile} for a description of the percentile estimation
290      * algorithm used.</p>
291      *
292      * @param values the input array
293      * @param start index of the first array element to include
294      * @param length the number of elements to include
295      * @return the percentile value
296      * @throws MathIllegalArgumentException if the parameters are not valid
297      *
298      */
299     @Override
300     public double evaluate(final double[] values, final int start, final int length)
301     throws MathIllegalArgumentException {
302         return evaluate(values, start, length, quantile);
303     }
304 
305      /**
306      * Returns an estimate of the <code>p</code>th percentile of the values
307      * in the <code>values</code> array, starting with the element in (0-based)
308      * position <code>begin</code> in the array and including <code>length</code>
309      * values.
310      * <p>
311      * Calls to this method do not modify the internal <code>quantile</code>
312      * state of this statistic.</p>
313      * <p>
314      * <ul>
315      * <li>Returns <code>Double.NaN</code> if <code>length = 0</code></li>
316      * <li>Returns (for any value of <code>p</code>) <code>values[begin]</code>
317      *  if <code>length = 1 </code></li>
318      * <li>Throws <code>MathIllegalArgumentException</code> if <code>values</code>
319      *  is null , <code>begin</code> or <code>length</code> is invalid, or
320      * <code>p</code> is not a valid quantile value (p must be greater than 0
321      * and less than or equal to 100)</li>
322      * </ul></p>
323      * <p>
324      * See {@link Percentile} for a description of the percentile estimation
325      * algorithm used.</p>
326      *
327      * @param values array of input values
328      * @param p  the percentile to compute
329      * @param begin  the first (0-based) element to include in the computation
330      * @param length  the number of array elements to include
331      * @return  the percentile value
332      * @throws MathIllegalArgumentException if the parameters are not valid or the
333      * input array is null
334      */
335     public double evaluate(final double[] values, final int begin,
336                            final int length, final double p)
337         throws MathIllegalArgumentException {
338 
339         test(values, begin, length);
340         if (p > 100 || p <= 0) {
341             throw new OutOfRangeException(
342                     LocalizedFormats.OUT_OF_BOUNDS_QUANTILE_VALUE, p, 0, 100);
343         }
344         if (length == 0) {
345             return Double.NaN;
346         }
347         if (length == 1) {
348             return values[begin]; // always return single value for n = 1
349         }
350 
351         final double[] work = getWorkArray(values, begin, length);
352         final int[] pivotsHeap = getPivots(values);
353         return work.length == 0 ? Double.NaN :
354                     estimationType.evaluate(work, pivotsHeap, p, kthSelector);
355     }
356 
357     /** Select a pivot index as the median of three
358      * <p>
359      * <b>Note:</b> With the effect of allowing {@link KthSelector} to be set on
360      * {@link Percentile} instances(thus indirectly {@link PivotingStrategy})
361      * this method wont take effect any more and hence is unsupported.
362      * @param work data array
363      * @param begin index of the first element of the slice
364      * @param end index after the last element of the slice
365      * @return the index of the median element chosen between the
366      * first, the middle and the last element of the array slice
367      * @deprecated Please refrain from using this method (as it wont take effect)
368      * and instead use {@link Percentile#withKthSelector(newKthSelector)} if
369      * required.
370      *
371      */
372     @Deprecated
373     int medianOf3(final double[] work, final int begin, final int end) {
374         return new MedianOf3PivotingStrategy().pivotIndex(work, begin, end);
375         //throw new MathUnsupportedOperationException();
376     }
377 
378     /**
379      * Returns the value of the quantile field (determines what percentile is
380      * computed when evaluate() is called with no quantile argument).
381      *
382      * @return quantile set while construction or {@link #setQuantile(double)}
383      */
384     public double getQuantile() {
385         return quantile;
386     }
387 
388     /**
389      * Sets the value of the quantile field (determines what percentile is
390      * computed when evaluate() is called with no quantile argument).
391      *
392      * @param p a value between 0 < p <= 100
393      * @throws MathIllegalArgumentException  if p is not greater than 0 and less
394      * than or equal to 100
395      */
396     public void setQuantile(final double p) throws MathIllegalArgumentException {
397         if (p <= 0 || p > 100) {
398             throw new OutOfRangeException(
399                     LocalizedFormats.OUT_OF_BOUNDS_QUANTILE_VALUE, p, 0, 100);
400         }
401         quantile = p;
402     }
403 
404     /**
405      * {@inheritDoc}
406      */
407     @Override
408     public Percentile copy() {
409         return new Percentile(this);
410     }
411 
412     /**
413      * Copies source to dest.
414      * @param source Percentile to copy
415      * @param dest Percentile to copy to
416      * @exception MathUnsupportedOperationException always thrown since 3.4
417      * @deprecated as of 3.4 this method does not work anymore, as it fails to
418      * copy internal states between instances configured with different
419      * {@link EstimationType estimation type}, {@link NaNStrategy NaN handling strategies}
420      * and {@link KthSelector kthSelector}, it therefore always
421      * throw {@link MathUnsupportedOperationException}
422       */
423     @Deprecated
424     public static void copy(final Percentile source, final Percentile dest)
425         throws MathUnsupportedOperationException {
426         throw new MathUnsupportedOperationException();
427     }
428 
429     /**
430      * Get the work array to operate. Makes use of prior {@code storedData} if
431      * it exists or else do a check on NaNs and copy a subset of the array
432      * defined by begin and length parameters. The set {@link #nanStrategy} will
433      * be used to either retain/remove/replace any NaNs present before returning
434      * the resultant array.
435      *
436      * @param values the array of numbers
437      * @param begin index to start reading the array
438      * @param length the length of array to be read from the begin index
439      * @return work array sliced from values in the range [begin,begin+length)
440      * @throws MathIllegalArgumentException if values or indices are invalid
441      */
442     protected double[] getWorkArray(final double[] values, final int begin, final int length) {
443             final double[] work;
444             if (values == getDataRef()) {
445                 work = getDataRef();
446             } else {
447                 switch (nanStrategy) {
448                 case MAXIMAL:// Replace NaNs with +INFs
449                     work = replaceAndSlice(values, begin, length, Double.NaN, Double.POSITIVE_INFINITY);
450                     break;
451                 case MINIMAL:// Replace NaNs with -INFs
452                     work = replaceAndSlice(values, begin, length, Double.NaN, Double.NEGATIVE_INFINITY);
453                     break;
454                 case REMOVED:// Drop NaNs from data
455                     work = removeAndSlice(values, begin, length, Double.NaN);
456                     break;
457                 case FAILED:// just throw exception as NaN is un-acceptable
458                     work = copyOf(values, begin, length);
459                     MathArrays.checkNotNaN(work);
460                     break;
461                 default: //FIXED
462                     work = copyOf(values,begin,length);
463                     break;
464                 }
465             }
466             return work;
467     }
468 
469     /**
470      * Make a copy of the array for the slice defined by array part from
471      * [begin, begin+length)
472      * @param values the input array
473      * @param begin start index of the array to include
474      * @param length number of elements to include from begin
475      * @return copy of a slice of the original array
476      */
477     private static double[] copyOf(final double[] values, final int begin, final int length) {
478         MathArrays.verifyValues(values, begin, length);
479         return MathArrays.copyOfRange(values, begin, begin + length);
480     }
481 
482     /**
483      * Replace every occurrence of a given value with a replacement value in a
484      * copied slice of array defined by array part from [begin, begin+length).
485      * @param values the input array
486      * @param begin start index of the array to include
487      * @param length number of elements to include from begin
488      * @param original the value to be replaced with
489      * @param replacement the value to be used for replacement
490      * @return the copy of sliced array with replaced values
491      */
492     private static double[] replaceAndSlice(final double[] values,
493                                             final int begin, final int length,
494                                             final double original,
495                                             final double replacement) {
496         final double[] temp = copyOf(values, begin, length);
497         for(int i = 0; i < length; i++) {
498             temp[i] = Precision.equalsIncludingNaN(original, temp[i]) ?
499                       replacement : temp[i];
500         }
501         return temp;
502     }
503 
504     /**
505      * Remove the occurrence of a given value in a copied slice of array
506      * defined by the array part from [begin, begin+length).
507      * @param values the input array
508      * @param begin start index of the array to include
509      * @param length number of elements to include from begin
510      * @param removedValue the value to be removed from the sliced array
511      * @return the copy of the sliced array after removing the removedValue
512      */
513     private static double[] removeAndSlice(final double[] values,
514                                            final int begin, final int length,
515                                            final double removedValue) {
516         MathArrays.verifyValues(values, begin, length);
517         final double[] temp;
518         //BitSet(length) to indicate where the removedValue is located
519         final BitSet bits = new BitSet(length);
520         for (int i = begin; i < begin+length; i++) {
521             if (Precision.equalsIncludingNaN(removedValue, values[i])) {
522                 bits.set(i - begin);
523             }
524         }
525         //Check if empty then create a new copy
526         if (bits.isEmpty()) {
527             temp = copyOf(values, begin, length); // Nothing removed, just copy
528         } else if(bits.cardinality() == length){
529             temp = new double[0];                 // All removed, just empty
530         }else {                                   // Some removable, so new
531             temp = new double[length - bits.cardinality()];
532             int start = begin;  //start index from source array (i.e values)
533             int dest = 0;       //dest index in destination array(i.e temp)
534             int nextOne = -1;   //nextOne is the index of bit set of next one
535             int bitSetPtr = 0;  //bitSetPtr is start index pointer of bitset
536             while ((nextOne = bits.nextSetBit(bitSetPtr)) != -1) {
537                 final int lengthToCopy = nextOne - bitSetPtr;
538                 System.arraycopy(values, start, temp, dest, lengthToCopy);
539                 dest += lengthToCopy;
540                 start = begin + (bitSetPtr = bits.nextClearBit(nextOne));
541             }
542             //Copy any residue past start index till begin+length
543             if (start < begin + length) {
544                 System.arraycopy(values,start,temp,dest,begin + length - start);
545             }
546         }
547         return temp;
548     }
549 
550     /**
551      * Get pivots which is either cached or a newly created one
552      *
553      * @param values array containing the input numbers
554      * @return cached pivots or a newly created one
555      */
556     private int[] getPivots(final double[] values) {
557         final int[] pivotsHeap;
558         if (values == getDataRef()) {
559             pivotsHeap = cachedPivots;
560         } else {
561             pivotsHeap = new int[PIVOTS_HEAP_LENGTH];
562             Arrays.fill(pivotsHeap, -1);
563         }
564         return pivotsHeap;
565     }
566 
567     /**
568      * Get the estimation {@link EstimationType type} used for computation.
569      *
570      * @return the {@code estimationType} set
571      */
572     public EstimationType getEstimationType() {
573         return estimationType;
574     }
575 
576     /**
577      * Build a new instance similar to the current one except for the
578      * {@link EstimationType estimation type}.
579      * <p>
580      * This method is intended to be used as part of a fluent-type builder
581      * pattern. Building finely tune instances should be done as follows:
582      * </p>
583      * <pre>
584      *   Percentile customized = new Percentile(quantile).
585      *                           withEstimationType(estimationType).
586      *                           withNaNStrategy(nanStrategy).
587      *                           withKthSelector(kthSelector);
588      * </pre>
589      * <p>
590      * If any of the {@code withXxx} method is omitted, the default value for
591      * the corresponding customization parameter will be used.
592      * </p>
593      * @param newEstimationType estimation type for the new instance
594      * @return a new instance, with changed estimation type
595      * @throws NullArgumentException when newEstimationType is null
596      */
597     public Percentile withEstimationType(final EstimationType newEstimationType) {
598         return new Percentile(quantile, newEstimationType, nanStrategy, kthSelector);
599     }
600 
601     /**
602      * Get the {@link NaNStrategy NaN Handling} strategy used for computation.
603      * @return {@code NaN Handling} strategy set during construction
604      */
605     public NaNStrategy getNaNStrategy() {
606         return nanStrategy;
607     }
608 
609     /**
610      * Build a new instance similar to the current one except for the
611      * {@link NaNStrategy NaN handling} strategy.
612      * <p>
613      * This method is intended to be used as part of a fluent-type builder
614      * pattern. Building finely tune instances should be done as follows:
615      * </p>
616      * <pre>
617      *   Percentile customized = new Percentile(quantile).
618      *                           withEstimationType(estimationType).
619      *                           withNaNStrategy(nanStrategy).
620      *                           withKthSelector(kthSelector);
621      * </pre>
622      * <p>
623      * If any of the {@code withXxx} method is omitted, the default value for
624      * the corresponding customization parameter will be used.
625      * </p>
626      * @param newNaNStrategy NaN strategy for the new instance
627      * @return a new instance, with changed NaN handling strategy
628      * @throws NullArgumentException when newNaNStrategy is null
629      */
630     public Percentile withNaNStrategy(final NaNStrategy newNaNStrategy) {
631         return new Percentile(quantile, estimationType, newNaNStrategy, kthSelector);
632     }
633 
634     /**
635      * Get the {@link KthSelector kthSelector} used for computation.
636      * @return the {@code kthSelector} set
637      */
638     public KthSelector getKthSelector() {
639         return kthSelector;
640     }
641 
642     /**
643      * Get the {@link PivotingStrategyInterface} used in KthSelector for computation.
644      * @return the pivoting strategy set
645      */
646     public PivotingStrategyInterface getPivotingStrategy() {
647         return kthSelector.getPivotingStrategy();
648     }
649 
650     /**
651      * Build a new instance similar to the current one except for the
652      * {@link KthSelector kthSelector} instance specifically set.
653      * <p>
654      * This method is intended to be used as part of a fluent-type builder
655      * pattern. Building finely tune instances should be done as follows:
656      * </p>
657      * <pre>
658      *   Percentile customized = new Percentile(quantile).
659      *                           withEstimationType(estimationType).
660      *                           withNaNStrategy(nanStrategy).
661      *                           withKthSelector(newKthSelector);
662      * </pre>
663      * <p>
664      * If any of the {@code withXxx} method is omitted, the default value for
665      * the corresponding customization parameter will be used.
666      * </p>
667      * @param newKthSelector KthSelector for the new instance
668      * @return a new instance, with changed KthSelector
669      * @throws NullArgumentException when newKthSelector is null
670      */
671     public Percentile withKthSelector(final KthSelector newKthSelector) {
672         return new Percentile(quantile, estimationType, nanStrategy,
673                                 newKthSelector);
674     }
675 
676     /**
677      * An enum for various estimation strategies of a percentile referred in
678      * <a href="http://en.wikipedia.org/wiki/Quantile">wikipedia on quantile</a>
679      * with the names of enum matching those of types mentioned in
680      * wikipedia.
681      * <p>
682      * Each enum corresponding to the specific type of estimation in wikipedia
683      * implements  the respective formulae that specializes in the below aspects
684      * <ul>
685      * <li>An <b>index method</b> to calculate approximate index of the
686      * estimate</li>
687      * <li>An <b>estimate method</b> to estimate a value found at the earlier
688      * computed index</li>
689      * <li>A <b> minLimit</b> on the quantile for which first element of sorted
690      * input is returned as an estimate </li>
691      * <li>A <b> maxLimit</b> on the quantile for which last element of sorted
692      * input is returned as an estimate </li>
693      * </ul>
694      * <p>
695      * Users can now create {@link Percentile} by explicitly passing this enum;
696      * such as by invoking {@link Percentile#withEstimationType(EstimationType)}
697      * <p>
698      * References:
699      * <ol>
700      * <li>
701      * <a href="http://en.wikipedia.org/wiki/Quantile">Wikipedia on quantile</a>
702      * </li>
703      * <li>
704      * <a href="https://www.amherst.edu/media/view/129116/.../Sample+Quantiles.pdf">
705      * Hyndman, R. J. and Fan, Y. (1996) Sample quantiles in statistical
706      * packages, American Statistician 50, 361–365</a> </li>
707      * <li>
708      * <a href="http://stat.ethz.ch/R-manual/R-devel/library/stats/html/quantile.html">
709      * R-Manual </a></li>
710      * </ol>
711      *
712      */
713     public static enum EstimationType {
714         /**
715          * This is the default type used in the {@link Percentile}.This method
716          * has the following formulae for index and estimates<br>
717          * \( \begin{align}
718          * &amp;index    = (N+1)p\ \\
719          * &amp;estimate = x_{\lceil h\,-\,1/2 \rceil} \\
720          * &amp;minLimit = 0 \\
721          * &amp;maxLimit = 1 \\
722          * \end{align}\)
723          */
724         LEGACY("Legacy Apache Commons Math") {
725             /**
726              * {@inheritDoc}.This method in particular makes use of existing
727              * Apache Commons Math style of picking up the index.
728              */
729             @Override
730             protected double index(final double p, final int length) {
731                 final double minLimit = 0d;
732                 final double maxLimit = 1d;
733                 return Double.compare(p, minLimit) == 0 ? 0 :
734                        Double.compare(p, maxLimit) == 0 ?
735                                length : p * (length + 1);
736             }
737         },
738         /**
739          * The method R_1 has the following formulae for index and estimates<br>
740          * \( \begin{align}
741          * &amp;index= Np + 1/2\,  \\
742          * &amp;estimate= x_{\lceil h\,-\,1/2 \rceil} \\
743          * &amp;minLimit = 0 \\
744          * \end{align}\)
745          */
746         R_1("R-1") {
747 
748             @Override
749             protected double index(final double p, final int length) {
750                 final double minLimit = 0d;
751                 return Double.compare(p, minLimit) == 0 ? 0 : length * p + 0.5;
752             }
753 
754             /**
755              * {@inheritDoc}This method in particular for R_1 uses ceil(pos-0.5)
756              */
757             @Override
758             protected double estimate(final double[] values,
759                                       final int[] pivotsHeap, final double pos,
760                                       final int length, final KthSelector kthSelector) {
761                 return super.estimate(values, pivotsHeap, FastMath.ceil(pos - 0.5), length, kthSelector);
762             }
763 
764         },
765         /**
766          * The method R_2 has the following formulae for index and estimates<br>
767          * \( \begin{align}
768          * &amp;index= Np + 1/2\, \\
769          * &amp;estimate=\frac{x_{\lceil h\,-\,1/2 \rceil} +
770          * x_{\lfloor h\,+\,1/2 \rfloor}}{2} \\
771          * &amp;minLimit = 0 \\
772          * &amp;maxLimit = 1 \\
773          * \end{align}\)
774          */
775         R_2("R-2") {
776 
777             @Override
778             protected double index(final double p, final int length) {
779                 final double minLimit = 0d;
780                 final double maxLimit = 1d;
781                 return Double.compare(p, maxLimit) == 0 ? length :
782                        Double.compare(p, minLimit) == 0 ? 0 : length * p + 0.5;
783             }
784 
785             /**
786              * {@inheritDoc}This method in particular for R_2 averages the
787              * values at ceil(p+0.5) and floor(p-0.5).
788              */
789             @Override
790             protected double estimate(final double[] values,
791                                       final int[] pivotsHeap, final double pos,
792                                       final int length, final KthSelector kthSelector) {
793                 final double low =
794                         super.estimate(values, pivotsHeap, FastMath.ceil(pos - 0.5), length, kthSelector);
795                 final double high =
796                         super.estimate(values, pivotsHeap,FastMath.floor(pos + 0.5), length, kthSelector);
797                 return (low + high) / 2;
798             }
799 
800         },
801         /**
802          * The method R_3 has the following formulae for index and estimates<br>
803          * \( \begin{align}
804          * &amp;index= Np \\
805          * &amp;estimate= x_{\lfloor h \rceil}\, \\
806          * &amp;minLimit = 0.5/N \\
807          * \end{align}\)
808          */
809         R_3("R-3") {
810             @Override
811             protected double index(final double p, final int length) {
812                 final double minLimit = 1d/2 / length;
813                 return Double.compare(p, minLimit) <= 0 ?
814                         0 : FastMath.rint(length * p);
815             }
816 
817         },
818         /**
819          * The method R_4 has the following formulae for index and estimates<br>
820          * \( \begin{align}
821          * &amp;index= Np\, \\
822          * &amp;estimate= x_{\lfloor h \rfloor} + (h -
823          * \lfloor h \rfloor) (x_{\lfloor h \rfloor + 1} - x_{\lfloor h
824          * \rfloor}) \\
825          * &amp;minLimit = 1/N \\
826          * &amp;maxLimit = 1 \\
827          * \end{align}\)
828          */
829         R_4("R-4") {
830             @Override
831             protected double index(final double p, final int length) {
832                 final double minLimit = 1d / length;
833                 final double maxLimit = 1d;
834                 return Double.compare(p, minLimit) < 0 ? 0 :
835                        Double.compare(p, maxLimit) == 0 ? length : length * p;
836             }
837 
838         },
839         /**
840          * The method R_5 has the following formulae for index and estimates<br>
841          * \( \begin{align}
842          * &amp;index= Np + 1/2\\
843          * &amp;estimate= x_{\lfloor h \rfloor} + (h -
844          * \lfloor h \rfloor) (x_{\lfloor h \rfloor + 1} - x_{\lfloor h
845          * \rfloor}) \\
846          * &amp;minLimit = 0.5/N \\
847          * &amp;maxLimit = (N-0.5)/N
848          * \end{align}\)
849          */
850         R_5("R-5"){
851 
852             @Override
853             protected double index(final double p, final int length) {
854                 final double minLimit = 1d/2 / length;
855                 final double maxLimit = (length - 0.5) / length;
856                 return Double.compare(p, minLimit) < 0 ? 0 :
857                        Double.compare(p, maxLimit) >= 0 ?
858                                length : length * p + 0.5;
859             }
860         },
861         /**
862          * The method R_6 has the following formulae for index and estimates<br>
863          * \( \begin{align}
864          * &amp;index= (N + 1)p \\
865          * &amp;estimate= x_{\lfloor h \rfloor} + (h -
866          * \lfloor h \rfloor) (x_{\lfloor h \rfloor + 1} - x_{\lfloor h
867          * \rfloor}) \\
868          * &amp;minLimit = 1/(N+1) \\
869          * &amp;maxLimit = N/(N+1) \\
870          * \end{align}\)
871          * <p>
872          * <b>Note:</b> This method computes the index in a manner very close to
873          * the default Commons Math Percentile existing implementation. However
874          * the difference to be noted is in picking up the limits with which
875          * first element (p&lt;1(N+1)) and last elements (p&gt;N/(N+1))are done.
876          * While in default case; these are done with p=0 and p=1 respectively.
877          */
878         R_6("R-6"){
879 
880             @Override
881             protected double index(final double p, final int length) {
882                 final double minLimit = 1d / (length + 1);
883                 final double maxLimit = 1d * length / (length + 1);
884                 return Double.compare(p, minLimit) < 0 ? 0 :
885                        Double.compare(p, maxLimit) >= 0 ?
886                                length : (length + 1) * p;
887             }
888         },
889 
890         /**
891          * The method R_7 implements Microsoft Excel style computation has the
892          * following formulae for index and estimates.<br>
893          * \( \begin{align}
894          * &amp;index = (N-1)p + 1 \\
895          * &amp;estimate = x_{\lfloor h \rfloor} + (h -
896          * \lfloor h \rfloor) (x_{\lfloor h \rfloor + 1} - x_{\lfloor h
897          * \rfloor}) \\
898          * &amp;minLimit = 0 \\
899          * &amp;maxLimit = 1 \\
900          * \end{align}\)
901          */
902         R_7("R-7") {
903             @Override
904             protected double index(final double p, final int length) {
905                 final double minLimit = 0d;
906                 final double maxLimit = 1d;
907                 return Double.compare(p, minLimit) == 0 ? 0 :
908                        Double.compare(p, maxLimit) == 0 ?
909                                length : 1 + (length - 1) * p;
910             }
911 
912         },
913 
914         /**
915          * The method R_8 has the following formulae for index and estimates<br>
916          * \( \begin{align}
917          * &amp;index = (N + 1/3)p + 1/3  \\
918          * &amp;estimate = x_{\lfloor h \rfloor} + (h -
919            \lfloor h \rfloor) (x_{\lfloor h \rfloor + 1} - x_{\lfloor h
920          * \rfloor}) \\
921          * &amp;minLimit = (2/3)/(N+1/3) \\
922          * &amp;maxLimit = (N-1/3)/(N+1/3) \\
923          * \end{align}\)
924          * <p>
925          * As per Ref [2,3] this approach is most recommended as it provides
926          * an approximate median-unbiased estimate regardless of distribution.
927          */
928         R_8("R-8") {
929             @Override
930             protected double index(final double p, final int length) {
931                 final double minLimit = 2 * (1d / 3) / (length + 1d / 3);
932                 final double maxLimit =
933                         (length - 1d / 3) / (length + 1d / 3);
934                 return Double.compare(p, minLimit) < 0 ? 0 :
935                        Double.compare(p, maxLimit) >= 0 ? length :
936                            (length + 1d / 3) * p + 1d / 3;
937             }
938         },
939 
940         /**
941          * The method R_9 has the following formulae for index and estimates<br>
942          * \( \begin{align}
943          * &amp;index = (N + 1/4)p + 3/8\\
944          * &amp;estimate = x_{\lfloor h \rfloor} + (h -
945            \lfloor h \rfloor) (x_{\lfloor h \rfloor + 1} - x_{\lfloor h
946          * \rfloor}) \\
947          * &amp;minLimit = (5/8)/(N+1/4) \\
948          * &amp;maxLimit = (N-3/8)/(N+1/4) \\
949          * \end{align}\)
950          */
951         R_9("R-9") {
952             @Override
953             protected double index(final double p, final int length) {
954                 final double minLimit = 5d/8 / (length + 0.25);
955                 final double maxLimit = (length - 3d/8) / (length + 0.25);
956                 return Double.compare(p, minLimit) < 0 ? 0 :
957                        Double.compare(p, maxLimit) >= 0 ? length :
958                                (length + 0.25) * p + 3d/8;
959             }
960 
961         },
962         ;
963 
964         /** Simple name such as R-1, R-2 corresponding to those in wikipedia. */
965         private final String name;
966 
967         /**
968          * Constructor
969          *
970          * @param type name of estimation type as per wikipedia
971          */
972         private EstimationType(final String type) {
973             this.name = type;
974         }
975 
976         /**
977          * Finds the index of array that can be used as starting index to
978          * {@link #estimate(double[], int[], double, int, KthSelector) estimate}
979          * percentile. The calculation of index calculation is specific to each
980          * {@link EstimationType}.
981          *
982          * @param p the p<sup>th</sup> quantile
983          * @param length the total number of array elements in the work array
984          * @return a computed real valued index as explained in the wikipedia
985          */
986         protected abstract double index(final double p, final int length);
987 
988         /**
989          * Estimation based on K<sup>th</sup> selection. This may be overridden
990          * in specific enums to compute slightly different estimations.
991          *
992          * @param work array of numbers to be used for finding the percentile
993          * @param pos indicated positional index prior computed from calling
994          *            {@link #index(double, int)}
995          * @param pivotsHeap an earlier populated cache if exists; will be used
996          * @param length size of array considered
997          * @param kthSelector a {@link KthSelector} used for pivoting during search
998          * @return estimated percentile
999          */
1000         protected double estimate(final double[] work, final int[] pivotsHeap,
1001                                   final double pos, final int length,
1002                                   final KthSelector kthSelector) {
1003 
1004             final double fpos = FastMath.floor(pos);
1005             final int intPos = (int) fpos;
1006             final double dif = pos - fpos;
1007 
1008             if (pos < 1) {
1009                 return kthSelector.select(work, pivotsHeap, 0);
1010             }
1011             if (pos >= length) {
1012                 return kthSelector.select(work, pivotsHeap, length - 1);
1013             }
1014 
1015             final double lower = kthSelector.select(work, pivotsHeap, intPos - 1);
1016             final double upper = kthSelector.select(work, pivotsHeap, intPos);
1017             return lower + dif * (upper - lower);
1018         }
1019 
1020         /**
1021          * Evaluate method to compute the percentile for a given bounded array
1022          * using earlier computed pivots heap.<br>
1023          * This basically calls the {@link #index(double, int) index} and then
1024          * {@link #estimate(double[], int[], double, int, KthSelector) estimate}
1025          * functions to return the estimated percentile value.
1026          *
1027          * @param work array of numbers to be used for finding the percentile
1028          * @param pivotsHeap a prior cached heap which can speed up estimation
1029          * @param p the p<sup>th</sup> quantile to be computed
1030          * @param kthSelector a {@link KthSelector} used for pivoting during search
1031          * @return estimated percentile
1032          * @throws OutOfRangeException if p is out of range
1033          * @throws NullArgumentException if work array is null
1034          */
1035         protected double evaluate(final double[] work, final int[] pivotsHeap, final double p,
1036                                   final KthSelector kthSelector) {
1037             MathUtils.checkNotNull(work);
1038             if (p > 100 || p <= 0) {
1039                 throw new OutOfRangeException(LocalizedFormats.OUT_OF_BOUNDS_QUANTILE_VALUE,
1040                                               p, 0, 100);
1041             }
1042             return estimate(work, pivotsHeap, index(p/100d, work.length), work.length, kthSelector);
1043         }
1044 
1045         /**
1046          * Evaluate method to compute the percentile for a given bounded array.
1047          * This basically calls the {@link #index(double, int) index} and then
1048          * {@link #estimate(double[], int[], double, int, KthSelector) estimate}
1049          * functions to return the estimated percentile value. Please
1050          * note that this method does not make use of cached pivots.
1051          *
1052          * @param work array of numbers to be used for finding the percentile
1053          * @param p the p<sup>th</sup> quantile to be computed
1054          * @return estimated percentile
1055          * @param kthSelector a {@link KthSelector} used for pivoting during search
1056          * @throws OutOfRangeException if length or p is out of range
1057          * @throws NullArgumentException if work array is null
1058          */
1059         public double evaluate(final double[] work, final double p, final KthSelector kthSelector) {
1060             return this.evaluate(work, null, p, kthSelector);
1061         }
1062 
1063         /**
1064          * Gets the name of the enum
1065          *
1066          * @return the name
1067          */
1068         String getName() {
1069             return name;
1070         }
1071     }
1072 }