Source code

001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.commons.math3.stat.descriptive.rank;
018
019import java.io.Serializable;
020import java.util.Arrays;
021import java.util.BitSet;
022
023import org.apache.commons.math3.exception.MathIllegalArgumentException;
024import org.apache.commons.math3.exception.MathUnsupportedOperationException;
025import org.apache.commons.math3.exception.NullArgumentException;
026import org.apache.commons.math3.exception.OutOfRangeException;
027import org.apache.commons.math3.exception.util.LocalizedFormats;
028import org.apache.commons.math3.stat.descriptive.AbstractUnivariateStatistic;
029import org.apache.commons.math3.stat.ranking.NaNStrategy;
030import org.apache.commons.math3.util.FastMath;
031import org.apache.commons.math3.util.KthSelector;
032import org.apache.commons.math3.util.MathArrays;
033import org.apache.commons.math3.util.MathUtils;
034import org.apache.commons.math3.util.MedianOf3PivotingStrategy;
035import org.apache.commons.math3.util.PivotingStrategyInterface;
036import org.apache.commons.math3.util.Precision;
037
038/**
039 * Provides percentile computation.
040 * <p>
041 * There are several commonly used methods for estimating percentiles (a.k.a.
042 * quantiles) based on sample data.  For large samples, the different methods
043 * agree closely, but when sample sizes are small, different methods will give
044 * significantly different results.  The algorithm implemented here works as follows:
045 * <ol>
046 * <li>Let <code>n</code> be the length of the (sorted) array and
047 * <code>0 < p <= 100</code> be the desired percentile.</li>
048 * <li>If <code> n = 1 </code> return the unique array element (regardless of
049 * the value of <code>p</code>); otherwise </li>
050 * <li>Compute the estimated percentile position
051 * <code> pos = p * (n + 1) / 100</code> and the difference, <code>d</code>
052 * between <code>pos</code> and <code>floor(pos)</code> (i.e. the fractional
053 * part of <code>pos</code>).</li>
054 * <li> If <code>pos < 1</code> return the smallest element in the array.</li>
055 * <li> Else if <code>pos >= n</code> return the largest element in the array.</li>
056 * <li> Else let <code>lower</code> be the element in position
057 * <code>floor(pos)</code> in the array and let <code>upper</code> be the
058 * next element in the array.  Return <code>lower + d * (upper - lower)</code>
059 * </li>
060 * </ol></p>
061 * <p>
062 * To compute percentiles, the data must be at least partially ordered.  Input
063 * arrays are copied and recursively partitioned using an ordering definition.
064 * The ordering used by <code>Arrays.sort(double[])</code> is the one determined
065 * by {@link java.lang.Double#compareTo(Double)}.  This ordering makes
066 * <code>Double.NaN</code> larger than any other value (including
067 * <code>Double.POSITIVE_INFINITY</code>).  Therefore, for example, the median
068 * (50th percentile) of
069 * <code>{0, 1, 2, 3, 4, Double.NaN}</code> evaluates to <code>2.5.</code></p>
070 * <p>
071 * Since percentile estimation usually involves interpolation between array
072 * elements, arrays containing  <code>NaN</code> or infinite values will often
073 * result in <code>NaN</code> or infinite values returned.</p>
074 * <p>
075 * Further, to include different estimation types such as R1, R2 as mentioned in
076 * <a href="http://en.wikipedia.org/wiki/Quantile">Quantile page(wikipedia)</a>,
077 * a type specific NaN handling strategy is used to closely match with the
078 * typically observed results from popular tools like R(R1-R9), Excel(R7).</p>
079 * <p>
080 * Since 2.2, Percentile uses only selection instead of complete sorting
081 * and caches selection algorithm state between calls to the various
082 * {@code evaluate} methods. This greatly improves efficiency, both for a single
083 * percentile and multiple percentile computations. To maximize performance when
084 * multiple percentiles are computed based on the same data, users should set the
085 * data array once using either one of the {@link #evaluate(double[], double)} or
086 * {@link #setData(double[])} methods and thereafter {@link #evaluate(double)}
087 * with just the percentile provided.
088 * </p>
089 * <p>
090 * <strong>Note that this implementation is not synchronized.</strong> If
091 * multiple threads access an instance of this class concurrently, and at least
092 * one of the threads invokes the <code>increment()</code> or
093 * <code>clear()</code> method, it must be synchronized externally.</p>
094 *
095 */
096public class Percentile extends AbstractUnivariateStatistic implements Serializable {
097
098    /** Serializable version identifier */
099    private static final long serialVersionUID = -8091216485095130416L;
100
101    /** Maximum number of partitioning pivots cached (each level double the number of pivots). */
102    private static final int MAX_CACHED_LEVELS = 10;
103
104    /** Maximum number of cached pivots in the pivots cached array */
105    private static final int PIVOTS_HEAP_LENGTH = 0x1 << MAX_CACHED_LEVELS - 1;
106
107    /** Default KthSelector used with default pivoting strategy */
108    private final KthSelector kthSelector;
109
110    /** Any of the {@link EstimationType}s such as {@link EstimationType#LEGACY CM} can be used. */
111    private final EstimationType estimationType;
112
113    /** NaN Handling of the input as defined by {@link NaNStrategy} */
114    private final NaNStrategy nanStrategy;
115
116    /** Determines what percentile is computed when evaluate() is activated
117     * with no quantile argument */
118    private double quantile;
119
120    /** Cached pivots. */
121    private int[] cachedPivots;
122
123    /**
124     * Constructs a Percentile with the following defaults.
125     * <ul>
126     *   <li>default quantile: 50.0, can be reset with {@link #setQuantile(double)}</li>
127     *   <li>default estimation type: {@link EstimationType#LEGACY},
128     *   can be reset with {@link #withEstimationType(EstimationType)}</li>
129     *   <li>default NaN strategy: {@link NaNStrategy#REMOVED},
130     *   can be reset with {@link #withNaNStrategy(NaNStrategy)}</li>
131     *   <li>a KthSelector that makes use of {@link MedianOf3PivotingStrategy},
132     *   can be reset with {@link #withKthSelector(KthSelector)}</li>
133     * </ul>
134     */
135    public Percentile() {
136        // No try-catch or advertised exception here - arg is valid
137        this(50.0);
138    }
139
140    /**
141     * Constructs a Percentile with the specific quantile value and the following
142     * <ul>
143     *   <li>default method type: {@link EstimationType#LEGACY}</li>
144     *   <li>default NaN strategy: {@link NaNStrategy#REMOVED}</li>
145     *   <li>a Kth Selector : {@link KthSelector}</li>
146     * </ul>
147     * @param quantile the quantile
148     * @throws MathIllegalArgumentException  if p is not greater than 0 and less
149     * than or equal to 100
150     */
151    public Percentile(final double quantile) throws MathIllegalArgumentException {
152        this(quantile, EstimationType.LEGACY, NaNStrategy.REMOVED,
153             new KthSelector(new MedianOf3PivotingStrategy()));
154    }
155
156    /**
157     * Copy constructor, creates a new {@code Percentile} identical
158     * to the {@code original}
159     *
160     * @param original the {@code Percentile} instance to copy
161     * @throws NullArgumentException if original is null
162     */
163    public Percentile(final Percentile original) throws NullArgumentException {
164
165        MathUtils.checkNotNull(original);
166        estimationType   = original.getEstimationType();
167        nanStrategy      = original.getNaNStrategy();
168        kthSelector      = original.getKthSelector();
169
170        setData(original.getDataRef());
171        if (original.cachedPivots != null) {
172            System.arraycopy(original.cachedPivots, 0, cachedPivots, 0, original.cachedPivots.length);
173        }
174        setQuantile(original.quantile);
175
176    }
177
178    /**
179     * Constructs a Percentile with the specific quantile value,
180     * {@link EstimationType}, {@link NaNStrategy} and {@link KthSelector}.
181     *
182     * @param quantile the quantile to be computed
183     * @param estimationType one of the percentile {@link EstimationType  estimation types}
184     * @param nanStrategy one of {@link NaNStrategy} to handle with NaNs
185     * @param kthSelector a {@link KthSelector} to use for pivoting during search
186     * @throws MathIllegalArgumentException if p is not within (0,100]
187     * @throws NullArgumentException if type or NaNStrategy passed is null
188     */
189    protected Percentile(final double quantile,
190                         final EstimationType estimationType,
191                         final NaNStrategy nanStrategy,
192                         final KthSelector kthSelector)
193        throws MathIllegalArgumentException {
194        setQuantile(quantile);
195        cachedPivots = null;
196        MathUtils.checkNotNull(estimationType);
197        MathUtils.checkNotNull(nanStrategy);
198        MathUtils.checkNotNull(kthSelector);
199        this.estimationType = estimationType;
200        this.nanStrategy = nanStrategy;
201        this.kthSelector = kthSelector;
202    }
203
204    /** {@inheritDoc} */
205    @Override
206    public void setData(final double[] values) {
207        if (values == null) {
208            cachedPivots = null;
209        } else {
210            cachedPivots = new int[PIVOTS_HEAP_LENGTH];
211            Arrays.fill(cachedPivots, -1);
212        }
213        super.setData(values);
214    }
215
216    /** {@inheritDoc} */
217    @Override
218    public void setData(final double[] values, final int begin, final int length)
219    throws MathIllegalArgumentException {
220        if (values == null) {
221            cachedPivots = null;
222        } else {
223            cachedPivots = new int[PIVOTS_HEAP_LENGTH];
224            Arrays.fill(cachedPivots, -1);
225        }
226        super.setData(values, begin, length);
227    }
228
229    /**
230     * Returns the result of evaluating the statistic over the stored data.
231     * <p>
232     * The stored array is the one which was set by previous calls to
233     * {@link #setData(double[])}
234     * </p>
235     * @param p the percentile value to compute
236     * @return the value of the statistic applied to the stored data
237     * @throws MathIllegalArgumentException if p is not a valid quantile value
238     * (p must be greater than 0 and less than or equal to 100)
239     */
240    public double evaluate(final double p) throws MathIllegalArgumentException {
241        return evaluate(getDataRef(), p);
242    }
243
244    /**
245     * Returns an estimate of the <code>p</code>th percentile of the values
246     * in the <code>values</code> array.
247     * <p>
248     * Calls to this method do not modify the internal <code>quantile</code>
249     * state of this statistic.</p>
250     * <p>
251     * <ul>
252     * <li>Returns <code>Double.NaN</code> if <code>values</code> has length
253     * <code>0</code></li>
254     * <li>Returns (for any value of <code>p</code>) <code>values[0]</code>
255     *  if <code>values</code> has length <code>1</code></li>
256     * <li>Throws <code>MathIllegalArgumentException</code> if <code>values</code>
257     * is null or p is not a valid quantile value (p must be greater than 0
258     * and less than or equal to 100) </li>
259     * </ul></p>
260     * <p>
261     * See {@link Percentile} for a description of the percentile estimation
262     * algorithm used.</p>
263     *
264     * @param values input array of values
265     * @param p the percentile value to compute
266     * @return the percentile value or Double.NaN if the array is empty
267     * @throws MathIllegalArgumentException if <code>values</code> is null
268     *     or p is invalid
269     */
270    public double evaluate(final double[] values, final double p)
271    throws MathIllegalArgumentException {
272        test(values, 0, 0);
273        return evaluate(values, 0, values.length, p);
274    }
275
276    /**
277     * Returns an estimate of the <code>quantile</code>th percentile of the
278     * designated values in the <code>values</code> array.  The quantile
279     * estimated is determined by the <code>quantile</code> property.
280     * <p>
281     * <ul>
282     * <li>Returns <code>Double.NaN</code> if <code>length = 0</code></li>
283     * <li>Returns (for any value of <code>quantile</code>)
284     * <code>values[begin]</code> if <code>length = 1 </code></li>
285     * <li>Throws <code>MathIllegalArgumentException</code> if <code>values</code>
286     * is null, or <code>start</code> or <code>length</code> is invalid</li>
287     * </ul></p>
288     * <p>
289     * See {@link Percentile} for a description of the percentile estimation
290     * algorithm used.</p>
291     *
292     * @param values the input array
293     * @param start index of the first array element to include
294     * @param length the number of elements to include
295     * @return the percentile value
296     * @throws MathIllegalArgumentException if the parameters are not valid
297     *
298     */
299    @Override
300    public double evaluate(final double[] values, final int start, final int length)
301    throws MathIllegalArgumentException {
302        return evaluate(values, start, length, quantile);
303    }
304
305     /**
306     * Returns an estimate of the <code>p</code>th percentile of the values
307     * in the <code>values</code> array, starting with the element in (0-based)
308     * position <code>begin</code> in the array and including <code>length</code>
309     * values.
310     * <p>
311     * Calls to this method do not modify the internal <code>quantile</code>
312     * state of this statistic.</p>
313     * <p>
314     * <ul>
315     * <li>Returns <code>Double.NaN</code> if <code>length = 0</code></li>
316     * <li>Returns (for any value of <code>p</code>) <code>values[begin]</code>
317     *  if <code>length = 1 </code></li>
318     * <li>Throws <code>MathIllegalArgumentException</code> if <code>values</code>
319     *  is null , <code>begin</code> or <code>length</code> is invalid, or
320     * <code>p</code> is not a valid quantile value (p must be greater than 0
321     * and less than or equal to 100)</li>
322     * </ul></p>
323     * <p>
324     * See {@link Percentile} for a description of the percentile estimation
325     * algorithm used.</p>
326     *
327     * @param values array of input values
328     * @param p  the percentile to compute
329     * @param begin  the first (0-based) element to include in the computation
330     * @param length  the number of array elements to include
331     * @return  the percentile value
332     * @throws MathIllegalArgumentException if the parameters are not valid or the
333     * input array is null
334     */
335    public double evaluate(final double[] values, final int begin,
336                           final int length, final double p)
337        throws MathIllegalArgumentException {
338
339        test(values, begin, length);
340        if (p > 100 || p <= 0) {
341            throw new OutOfRangeException(
342                    LocalizedFormats.OUT_OF_BOUNDS_QUANTILE_VALUE, p, 0, 100);
343        }
344        if (length == 0) {
345            return Double.NaN;
346        }
347        if (length == 1) {
348            return values[begin]; // always return single value for n = 1
349        }
350
351        final double[] work = getWorkArray(values, begin, length);
352        final int[] pivotsHeap = getPivots(values);
353        return work.length == 0 ? Double.NaN :
354                    estimationType.evaluate(work, pivotsHeap, p, kthSelector);
355    }
356
357    /** Select a pivot index as the median of three
358     * <p>
359     * <b>Note:</b> With the effect of allowing {@link KthSelector} to be set on
360     * {@link Percentile} instances(thus indirectly {@link PivotingStrategy})
361     * this method wont take effect any more and hence is unsupported.
362     * @param work data array
363     * @param begin index of the first element of the slice
364     * @param end index after the last element of the slice
365     * @return the index of the median element chosen between the
366     * first, the middle and the last element of the array slice
367     * @deprecated Please refrain from using this method (as it wont take effect)
368     * and instead use {@link Percentile#withKthSelector(newKthSelector)} if
369     * required.
370     *
371     */
372    @Deprecated
373    int medianOf3(final double[] work, final int begin, final int end) {
374        return new MedianOf3PivotingStrategy().pivotIndex(work, begin, end);
375        //throw new MathUnsupportedOperationException();
376    }
377
378    /**
379     * Returns the value of the quantile field (determines what percentile is
380     * computed when evaluate() is called with no quantile argument).
381     *
382     * @return quantile set while construction or {@link #setQuantile(double)}
383     */
384    public double getQuantile() {
385        return quantile;
386    }
387
388    /**
389     * Sets the value of the quantile field (determines what percentile is
390     * computed when evaluate() is called with no quantile argument).
391     *
392     * @param p a value between 0 < p <= 100
393     * @throws MathIllegalArgumentException  if p is not greater than 0 and less
394     * than or equal to 100
395     */
396    public void setQuantile(final double p) throws MathIllegalArgumentException {
397        if (p <= 0 || p > 100) {
398            throw new OutOfRangeException(
399                    LocalizedFormats.OUT_OF_BOUNDS_QUANTILE_VALUE, p, 0, 100);
400        }
401        quantile = p;
402    }
403
404    /**
405     * {@inheritDoc}
406     */
407    @Override
408    public Percentile copy() {
409        return new Percentile(this);
410    }
411
412    /**
413     * Copies source to dest.
414     * @param source Percentile to copy
415     * @param dest Percentile to copy to
416     * @exception MathUnsupportedOperationException always thrown since 3.4
417     * @deprecated as of 3.4 this method does not work anymore, as it fails to
418     * copy internal states between instances configured with different
419     * {@link EstimationType estimation type}, {@link NaNStrategy NaN handling strategies}
420     * and {@link KthSelector kthSelector}, it therefore always
421     * throw {@link MathUnsupportedOperationException}
422      */
423    @Deprecated
424    public static void copy(final Percentile source, final Percentile dest)
425        throws MathUnsupportedOperationException {
426        throw new MathUnsupportedOperationException();
427    }
428
429    /**
430     * Get the work array to operate. Makes use of prior {@code storedData} if
431     * it exists or else do a check on NaNs and copy a subset of the array
432     * defined by begin and length parameters. The set {@link #nanStrategy} will
433     * be used to either retain/remove/replace any NaNs present before returning
434     * the resultant array.
435     *
436     * @param values the array of numbers
437     * @param begin index to start reading the array
438     * @param length the length of array to be read from the begin index
439     * @return work array sliced from values in the range [begin,begin+length)
440     * @throws MathIllegalArgumentException if values or indices are invalid
441     */
442    protected double[] getWorkArray(final double[] values, final int begin, final int length) {
443            final double[] work;
444            if (values == getDataRef()) {
445                work = getDataRef();
446            } else {
447                switch (nanStrategy) {
448                case MAXIMAL:// Replace NaNs with +INFs
449                    work = replaceAndSlice(values, begin, length, Double.NaN, Double.POSITIVE_INFINITY);
450                    break;
451                case MINIMAL:// Replace NaNs with -INFs
452                    work = replaceAndSlice(values, begin, length, Double.NaN, Double.NEGATIVE_INFINITY);
453                    break;
454                case REMOVED:// Drop NaNs from data
455                    work = removeAndSlice(values, begin, length, Double.NaN);
456                    break;
457                case FAILED:// just throw exception as NaN is un-acceptable
458                    work = copyOf(values, begin, length);
459                    MathArrays.checkNotNaN(work);
460                    break;
461                default: //FIXED
462                    work = copyOf(values,begin,length);
463                    break;
464                }
465            }
466            return work;
467    }
468
469    /**
470     * Make a copy of the array for the slice defined by array part from
471     * [begin, begin+length)
472     * @param values the input array
473     * @param begin start index of the array to include
474     * @param length number of elements to include from begin
475     * @return copy of a slice of the original array
476     */
477    private static double[] copyOf(final double[] values, final int begin, final int length) {
478        MathArrays.verifyValues(values, begin, length);
479        return MathArrays.copyOfRange(values, begin, begin + length);
480    }
481
482    /**
483     * Replace every occurrence of a given value with a replacement value in a
484     * copied slice of array defined by array part from [begin, begin+length).
485     * @param values the input array
486     * @param begin start index of the array to include
487     * @param length number of elements to include from begin
488     * @param original the value to be replaced with
489     * @param replacement the value to be used for replacement
490     * @return the copy of sliced array with replaced values
491     */
492    private static double[] replaceAndSlice(final double[] values,
493                                            final int begin, final int length,
494                                            final double original,
495                                            final double replacement) {
496        final double[] temp = copyOf(values, begin, length);
497        for(int i = 0; i < length; i++) {
498            temp[i] = Precision.equalsIncludingNaN(original, temp[i]) ?
499                      replacement : temp[i];
500        }
501        return temp;
502    }
503
504    /**
505     * Remove the occurrence of a given value in a copied slice of array
506     * defined by the array part from [begin, begin+length).
507     * @param values the input array
508     * @param begin start index of the array to include
509     * @param length number of elements to include from begin
510     * @param removedValue the value to be removed from the sliced array
511     * @return the copy of the sliced array after removing the removedValue
512     */
513    private static double[] removeAndSlice(final double[] values,
514                                           final int begin, final int length,
515                                           final double removedValue) {
516        MathArrays.verifyValues(values, begin, length);
517        final double[] temp;
518        //BitSet(length) to indicate where the removedValue is located
519        final BitSet bits = new BitSet(length);
520        for (int i = begin; i < begin+length; i++) {
521            if (Precision.equalsIncludingNaN(removedValue, values[i])) {
522                bits.set(i - begin);
523            }
524        }
525        //Check if empty then create a new copy
526        if (bits.isEmpty()) {
527            temp = copyOf(values, begin, length); // Nothing removed, just copy
528        } else if(bits.cardinality() == length){
529            temp = new double[0];                 // All removed, just empty
530        }else {                                   // Some removable, so new
531            temp = new double[length - bits.cardinality()];
532            int start = begin;  //start index from source array (i.e values)
533            int dest = 0;       //dest index in destination array(i.e temp)
534            int nextOne = -1;   //nextOne is the index of bit set of next one
535            int bitSetPtr = 0;  //bitSetPtr is start index pointer of bitset
536            while ((nextOne = bits.nextSetBit(bitSetPtr)) != -1) {
537                final int lengthToCopy = nextOne - bitSetPtr;
538                System.arraycopy(values, start, temp, dest, lengthToCopy);
539                dest += lengthToCopy;
540                start = begin + (bitSetPtr = bits.nextClearBit(nextOne));
541            }
542            //Copy any residue past start index till begin+length
543            if (start < begin + length) {
544                System.arraycopy(values,start,temp,dest,begin + length - start);
545            }
546        }
547        return temp;
548    }
549
550    /**
551     * Get pivots which is either cached or a newly created one
552     *
553     * @param values array containing the input numbers
554     * @return cached pivots or a newly created one
555     */
556    private int[] getPivots(final double[] values) {
557        final int[] pivotsHeap;
558        if (values == getDataRef()) {
559            pivotsHeap = cachedPivots;
560        } else {
561            pivotsHeap = new int[PIVOTS_HEAP_LENGTH];
562            Arrays.fill(pivotsHeap, -1);
563        }
564        return pivotsHeap;
565    }
566
567    /**
568     * Get the estimation {@link EstimationType type} used for computation.
569     *
570     * @return the {@code estimationType} set
571     */
572    public EstimationType getEstimationType() {
573        return estimationType;
574    }
575
576    /**
577     * Build a new instance similar to the current one except for the
578     * {@link EstimationType estimation type}.
579     * <p>
580     * This method is intended to be used as part of a fluent-type builder
581     * pattern. Building finely tune instances should be done as follows:
582     * </p>
583     * <pre>
584     *   Percentile customized = new Percentile(quantile).
585     *                           withEstimationType(estimationType).
586     *                           withNaNStrategy(nanStrategy).
587     *                           withKthSelector(kthSelector);
588     * </pre>
589     * <p>
590     * If any of the {@code withXxx} method is omitted, the default value for
591     * the corresponding customization parameter will be used.
592     * </p>
593     * @param newEstimationType estimation type for the new instance
594     * @return a new instance, with changed estimation type
595     * @throws NullArgumentException when newEstimationType is null
596     */
597    public Percentile withEstimationType(final EstimationType newEstimationType) {
598        return new Percentile(quantile, newEstimationType, nanStrategy, kthSelector);
599    }
600
601    /**
602     * Get the {@link NaNStrategy NaN Handling} strategy used for computation.
603     * @return {@code NaN Handling} strategy set during construction
604     */
605    public NaNStrategy getNaNStrategy() {
606        return nanStrategy;
607    }
608
609    /**
610     * Build a new instance similar to the current one except for the
611     * {@link NaNStrategy NaN handling} strategy.
612     * <p>
613     * This method is intended to be used as part of a fluent-type builder
614     * pattern. Building finely tune instances should be done as follows:
615     * </p>
616     * <pre>
617     *   Percentile customized = new Percentile(quantile).
618     *                           withEstimationType(estimationType).
619     *                           withNaNStrategy(nanStrategy).
620     *                           withKthSelector(kthSelector);
621     * </pre>
622     * <p>
623     * If any of the {@code withXxx} method is omitted, the default value for
624     * the corresponding customization parameter will be used.
625     * </p>
626     * @param newNaNStrategy NaN strategy for the new instance
627     * @return a new instance, with changed NaN handling strategy
628     * @throws NullArgumentException when newNaNStrategy is null
629     */
630    public Percentile withNaNStrategy(final NaNStrategy newNaNStrategy) {
631        return new Percentile(quantile, estimationType, newNaNStrategy, kthSelector);
632    }
633
634    /**
635     * Get the {@link KthSelector kthSelector} used for computation.
636     * @return the {@code kthSelector} set
637     */
638    public KthSelector getKthSelector() {
639        return kthSelector;
640    }
641
642    /**
643     * Get the {@link PivotingStrategyInterface} used in KthSelector for computation.
644     * @return the pivoting strategy set
645     */
646    public PivotingStrategyInterface getPivotingStrategy() {
647        return kthSelector.getPivotingStrategy();
648    }
649
650    /**
651     * Build a new instance similar to the current one except for the
652     * {@link KthSelector kthSelector} instance specifically set.
653     * <p>
654     * This method is intended to be used as part of a fluent-type builder
655     * pattern. Building finely tune instances should be done as follows:
656     * </p>
657     * <pre>
658     *   Percentile customized = new Percentile(quantile).
659     *                           withEstimationType(estimationType).
660     *                           withNaNStrategy(nanStrategy).
661     *                           withKthSelector(newKthSelector);
662     * </pre>
663     * <p>
664     * If any of the {@code withXxx} method is omitted, the default value for
665     * the corresponding customization parameter will be used.
666     * </p>
667     * @param newKthSelector KthSelector for the new instance
668     * @return a new instance, with changed KthSelector
669     * @throws NullArgumentException when newKthSelector is null
670     */
671    public Percentile withKthSelector(final KthSelector newKthSelector) {
672        return new Percentile(quantile, estimationType, nanStrategy,
673                                newKthSelector);
674    }
675
676    /**
677     * An enum for various estimation strategies of a percentile referred in
678     * <a href="http://en.wikipedia.org/wiki/Quantile">wikipedia on quantile</a>
679     * with the names of enum matching those of types mentioned in
680     * wikipedia.
681     * <p>
682     * Each enum corresponding to the specific type of estimation in wikipedia
683     * implements  the respective formulae that specializes in the below aspects
684     * <ul>
685     * <li>An <b>index method</b> to calculate approximate index of the
686     * estimate</li>
687     * <li>An <b>estimate method</b> to estimate a value found at the earlier
688     * computed index</li>
689     * <li>A <b> minLimit</b> on the quantile for which first element of sorted
690     * input is returned as an estimate </li>
691     * <li>A <b> maxLimit</b> on the quantile for which last element of sorted
692     * input is returned as an estimate </li>
693     * </ul>
694     * <p>
695     * Users can now create {@link Percentile} by explicitly passing this enum;
696     * such as by invoking {@link Percentile#withEstimationType(EstimationType)}
697     * <p>
698     * References:
699     * <ol>
700     * <li>
701     * <a href="http://en.wikipedia.org/wiki/Quantile">Wikipedia on quantile</a>
702     * </li>
703     * <li>
704     * <a href="https://www.amherst.edu/media/view/129116/.../Sample+Quantiles.pdf">
705     * Hyndman, R. J. and Fan, Y. (1996) Sample quantiles in statistical
706     * packages, American Statistician 50, 361–365</a> </li>
707     * <li>
708     * <a href="http://stat.ethz.ch/R-manual/R-devel/library/stats/html/quantile.html">
709     * R-Manual </a></li>
710     * </ol>
711     *
712     */
713    public enum EstimationType {
714        /**
715         * This is the default type used in the {@link Percentile}.This method
716         * has the following formulae for index and estimates<br>
717         * \( \begin{align}
718         * &amp;index    = (N+1)p\ \\
719         * &amp;estimate = x_{\lceil h\,-\,1/2 \rceil} \\
720         * &amp;minLimit = 0 \\
721         * &amp;maxLimit = 1 \\
722         * \end{align}\)
723         */
724        LEGACY("Legacy Apache Commons Math") {
725            /**
726             * {@inheritDoc}.This method in particular makes use of existing
727             * Apache Commons Math style of picking up the index.
728             */
729            @Override
730            protected double index(final double p, final int length) {
731                final double minLimit = 0d;
732                final double maxLimit = 1d;
733                return Double.compare(p, minLimit) == 0 ? 0 :
734                       Double.compare(p, maxLimit) == 0 ?
735                               length : p * (length + 1);
736            }
737        },
738        /**
739         * The method R_1 has the following formulae for index and estimates<br>
740         * \( \begin{align}
741         * &amp;index= Np + 1/2\,  \\
742         * &amp;estimate= x_{\lceil h\,-\,1/2 \rceil} \\
743         * &amp;minLimit = 0 \\
744         * \end{align}\)
745         */
746        R_1("R-1") {
747
748            @Override
749            protected double index(final double p, final int length) {
750                final double minLimit = 0d;
751                return Double.compare(p, minLimit) == 0 ? 0 : length * p + 0.5;
752            }
753
754            /**
755             * {@inheritDoc}This method in particular for R_1 uses ceil(pos-0.5)
756             */
757            @Override
758            protected double estimate(final double[] values,
759                                      final int[] pivotsHeap, final double pos,
760                                      final int length, final KthSelector selector) {
761                return super.estimate(values, pivotsHeap, FastMath.ceil(pos - 0.5), length, selector);
762            }
763
764        },
765        /**
766         * The method R_2 has the following formulae for index and estimates<br>
767         * \( \begin{align}
768         * &amp;index= Np + 1/2\, \\
769         * &amp;estimate=\frac{x_{\lceil h\,-\,1/2 \rceil} +
770         * x_{\lfloor h\,+\,1/2 \rfloor}}{2} \\
771         * &amp;minLimit = 0 \\
772         * &amp;maxLimit = 1 \\
773         * \end{align}\)
774         */
775        R_2("R-2") {
776
777            @Override
778            protected double index(final double p, final int length) {
779                final double minLimit = 0d;
780                final double maxLimit = 1d;
781                return Double.compare(p, maxLimit) == 0 ? length :
782                       Double.compare(p, minLimit) == 0 ? 0 : length * p + 0.5;
783            }
784
785            /**
786             * {@inheritDoc}This method in particular for R_2 averages the
787             * values at ceil(p+0.5) and floor(p-0.5).
788             */
789            @Override
790            protected double estimate(final double[] values,
791                                      final int[] pivotsHeap, final double pos,
792                                      final int length, final KthSelector selector) {
793                final double low =
794                        super.estimate(values, pivotsHeap, FastMath.ceil(pos - 0.5), length, selector);
795                final double high =
796                        super.estimate(values, pivotsHeap,FastMath.floor(pos + 0.5), length, selector);
797                return (low + high) / 2;
798            }
799
800        },
801        /**
802         * The method R_3 has the following formulae for index and estimates<br>
803         * \( \begin{align}
804         * &amp;index= Np \\
805         * &amp;estimate= x_{\lfloor h \rceil}\, \\
806         * &amp;minLimit = 0.5/N \\
807         * \end{align}\)
808         */
809        R_3("R-3") {
810            @Override
811            protected double index(final double p, final int length) {
812                final double minLimit = 1d/2 / length;
813                return Double.compare(p, minLimit) <= 0 ?
814                        0 : FastMath.rint(length * p);
815            }
816
817        },
818        /**
819         * The method R_4 has the following formulae for index and estimates<br>
820         * \( \begin{align}
821         * &amp;index= Np\, \\
822         * &amp;estimate= x_{\lfloor h \rfloor} + (h -
823         * \lfloor h \rfloor) (x_{\lfloor h \rfloor + 1} - x_{\lfloor h
824         * \rfloor}) \\
825         * &amp;minLimit = 1/N \\
826         * &amp;maxLimit = 1 \\
827         * \end{align}\)
828         */
829        R_4("R-4") {
830            @Override
831            protected double index(final double p, final int length) {
832                final double minLimit = 1d / length;
833                final double maxLimit = 1d;
834                return Double.compare(p, minLimit) < 0 ? 0 :
835                       Double.compare(p, maxLimit) == 0 ? length : length * p;
836            }
837
838        },
839        /**
840         * The method R_5 has the following formulae for index and estimates<br>
841         * \( \begin{align}
842         * &amp;index= Np + 1/2\\
843         * &amp;estimate= x_{\lfloor h \rfloor} + (h -
844         * \lfloor h \rfloor) (x_{\lfloor h \rfloor + 1} - x_{\lfloor h
845         * \rfloor}) \\
846         * &amp;minLimit = 0.5/N \\
847         * &amp;maxLimit = (N-0.5)/N
848         * \end{align}\)
849         */
850        R_5("R-5"){
851
852            @Override
853            protected double index(final double p, final int length) {
854                final double minLimit = 1d/2 / length;
855                final double maxLimit = (length - 0.5) / length;
856                return Double.compare(p, minLimit) < 0 ? 0 :
857                       Double.compare(p, maxLimit) >= 0 ?
858                               length : length * p + 0.5;
859            }
860        },
861        /**
862         * The method R_6 has the following formulae for index and estimates<br>
863         * \( \begin{align}
864         * &amp;index= (N + 1)p \\
865         * &amp;estimate= x_{\lfloor h \rfloor} + (h -
866         * \lfloor h \rfloor) (x_{\lfloor h \rfloor + 1} - x_{\lfloor h
867         * \rfloor}) \\
868         * &amp;minLimit = 1/(N+1) \\
869         * &amp;maxLimit = N/(N+1) \\
870         * \end{align}\)
871         * <p>
872         * <b>Note:</b> This method computes the index in a manner very close to
873         * the default Commons Math Percentile existing implementation. However
874         * the difference to be noted is in picking up the limits with which
875         * first element (p&lt;1(N+1)) and last elements (p&gt;N/(N+1))are done.
876         * While in default case; these are done with p=0 and p=1 respectively.
877         */
878        R_6("R-6"){
879
880            @Override
881            protected double index(final double p, final int length) {
882                final double minLimit = 1d / (length + 1);
883                final double maxLimit = 1d * length / (length + 1);
884                return Double.compare(p, minLimit) < 0 ? 0 :
885                       Double.compare(p, maxLimit) >= 0 ?
886                               length : (length + 1) * p;
887            }
888        },
889
890        /**
891         * The method R_7 implements Microsoft Excel style computation has the
892         * following formulae for index and estimates.<br>
893         * \( \begin{align}
894         * &amp;index = (N-1)p + 1 \\
895         * &amp;estimate = x_{\lfloor h \rfloor} + (h -
896         * \lfloor h \rfloor) (x_{\lfloor h \rfloor + 1} - x_{\lfloor h
897         * \rfloor}) \\
898         * &amp;minLimit = 0 \\
899         * &amp;maxLimit = 1 \\
900         * \end{align}\)
901         */
902        R_7("R-7") {
903            @Override
904            protected double index(final double p, final int length) {
905                final double minLimit = 0d;
906                final double maxLimit = 1d;
907                return Double.compare(p, minLimit) == 0 ? 0 :
908                       Double.compare(p, maxLimit) == 0 ?
909                               length : 1 + (length - 1) * p;
910            }
911
912        },
913
914        /**
915         * The method R_8 has the following formulae for index and estimates<br>
916         * \( \begin{align}
917         * &amp;index = (N + 1/3)p + 1/3  \\
918         * &amp;estimate = x_{\lfloor h \rfloor} + (h -
919           \lfloor h \rfloor) (x_{\lfloor h \rfloor + 1} - x_{\lfloor h
920         * \rfloor}) \\
921         * &amp;minLimit = (2/3)/(N+1/3) \\
922         * &amp;maxLimit = (N-1/3)/(N+1/3) \\
923         * \end{align}\)
924         * <p>
925         * As per Ref [2,3] this approach is most recommended as it provides
926         * an approximate median-unbiased estimate regardless of distribution.
927         */
928        R_8("R-8") {
929            @Override
930            protected double index(final double p, final int length) {
931                final double minLimit = 2 * (1d / 3) / (length + 1d / 3);
932                final double maxLimit =
933                        (length - 1d / 3) / (length + 1d / 3);
934                return Double.compare(p, minLimit) < 0 ? 0 :
935                       Double.compare(p, maxLimit) >= 0 ? length :
936                           (length + 1d / 3) * p + 1d / 3;
937            }
938        },
939
940        /**
941         * The method R_9 has the following formulae for index and estimates<br>
942         * \( \begin{align}
943         * &amp;index = (N + 1/4)p + 3/8\\
944         * &amp;estimate = x_{\lfloor h \rfloor} + (h -
945           \lfloor h \rfloor) (x_{\lfloor h \rfloor + 1} - x_{\lfloor h
946         * \rfloor}) \\
947         * &amp;minLimit = (5/8)/(N+1/4) \\
948         * &amp;maxLimit = (N-3/8)/(N+1/4) \\
949         * \end{align}\)
950         */
951        R_9("R-9") {
952            @Override
953            protected double index(final double p, final int length) {
954                final double minLimit = 5d/8 / (length + 0.25);
955                final double maxLimit = (length - 3d/8) / (length + 0.25);
956                return Double.compare(p, minLimit) < 0 ? 0 :
957                       Double.compare(p, maxLimit) >= 0 ? length :
958                               (length + 0.25) * p + 3d/8;
959            }
960
961        },
962        ;
963
964        /** Simple name such as R-1, R-2 corresponding to those in wikipedia. */
965        private final String name;
966
967        /**
968         * Constructor
969         *
970         * @param type name of estimation type as per wikipedia
971         */
972        EstimationType(final String type) {
973            this.name = type;
974        }
975
976        /**
977         * Finds the index of array that can be used as starting index to
978         * {@link #estimate(double[], int[], double, int, KthSelector) estimate}
979         * percentile. The calculation of index calculation is specific to each
980         * {@link EstimationType}.
981         *
982         * @param p the p<sup>th</sup> quantile
983         * @param length the total number of array elements in the work array
984         * @return a computed real valued index as explained in the wikipedia
985         */
986        protected abstract double index(final double p, final int length);
987
988        /**
989         * Estimation based on K<sup>th</sup> selection. This may be overridden
990         * in specific enums to compute slightly different estimations.
991         *
992         * @param work array of numbers to be used for finding the percentile
993         * @param pos indicated positional index prior computed from calling
994         *            {@link #index(double, int)}
995         * @param pivotsHeap an earlier populated cache if exists; will be used
996         * @param length size of array considered
997         * @param selector a {@link KthSelector} used for pivoting during search
998         * @return estimated percentile
999         */
1000        protected double estimate(final double[] work, final int[] pivotsHeap,
1001                                  final double pos, final int length,
1002                                  final KthSelector selector) {
1003
1004            final double fpos = FastMath.floor(pos);
1005            final int intPos = (int) fpos;
1006            final double dif = pos - fpos;
1007
1008            if (pos < 1) {
1009                return selector.select(work, pivotsHeap, 0);
1010            }
1011            if (pos >= length) {
1012                return selector.select(work, pivotsHeap, length - 1);
1013            }
1014
1015            final double lower = selector.select(work, pivotsHeap, intPos - 1);
1016            final double upper = selector.select(work, pivotsHeap, intPos);
1017            return lower + dif * (upper - lower);
1018        }
1019
1020        /**
1021         * Evaluate method to compute the percentile for a given bounded array
1022         * using earlier computed pivots heap.<br>
1023         * This basically calls the {@link #index(double, int) index} and then
1024         * {@link #estimate(double[], int[], double, int, KthSelector) estimate}
1025         * functions to return the estimated percentile value.
1026         *
1027         * @param work array of numbers to be used for finding the percentile
1028         * @param pivotsHeap a prior cached heap which can speed up estimation
1029         * @param p the p<sup>th</sup> quantile to be computed
1030         * @param selector a {@link KthSelector} used for pivoting during search
1031         * @return estimated percentile
1032         * @throws OutOfRangeException if p is out of range
1033         * @throws NullArgumentException if work array is null
1034         */
1035        protected double evaluate(final double[] work, final int[] pivotsHeap, final double p,
1036                                  final KthSelector selector) {
1037            MathUtils.checkNotNull(work);
1038            if (p > 100 || p <= 0) {
1039                throw new OutOfRangeException(LocalizedFormats.OUT_OF_BOUNDS_QUANTILE_VALUE,
1040                                              p, 0, 100);
1041            }
1042            return estimate(work, pivotsHeap, index(p/100d, work.length), work.length, selector);
1043        }
1044
1045        /**
1046         * Evaluate method to compute the percentile for a given bounded array.
1047         * This basically calls the {@link #index(double, int) index} and then
1048         * {@link #estimate(double[], int[], double, int, KthSelector) estimate}
1049         * functions to return the estimated percentile value. Please
1050         * note that this method does not make use of cached pivots.
1051         *
1052         * @param work array of numbers to be used for finding the percentile
1053         * @param p the p<sup>th</sup> quantile to be computed
1054         * @return estimated percentile
1055         * @param selector a {@link KthSelector} used for pivoting during search
1056         * @throws OutOfRangeException if length or p is out of range
1057         * @throws NullArgumentException if work array is null
1058         */
1059        public double evaluate(final double[] work, final double p, final KthSelector selector) {
1060            return this.evaluate(work, null, p, selector);
1061        }
1062
1063        /**
1064         * Gets the name of the enum
1065         *
1066         * @return the name
1067         */
1068        String getName() {
1069            return name;
1070        }
1071    }
1072}