Percentile.java

  1. /*
  2.  * Licensed to the Apache Software Foundation (ASF) under one or more
  3.  * contributor license agreements.  See the NOTICE file distributed with
  4.  * this work for additional information regarding copyright ownership.
  5.  * The ASF licenses this file to You under the Apache License, Version 2.0
  6.  * (the "License"); you may not use this file except in compliance with
  7.  * the License.  You may obtain a copy of the License at
  8.  *
  9.  *      http://www.apache.org/licenses/LICENSE-2.0
  10.  *
  11.  * Unless required by applicable law or agreed to in writing, software
  12.  * distributed under the License is distributed on an "AS IS" BASIS,
  13.  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14.  * See the License for the specific language governing permissions and
  15.  * limitations under the License.
  16.  */
  17. package org.apache.commons.math4.legacy.stat.descriptive.rank;

  18. import java.util.Arrays;
  19. import java.util.BitSet;

  20. import org.apache.commons.numbers.core.Precision;
  21. import org.apache.commons.numbers.arrays.SortInPlace;
  22. import org.apache.commons.math4.legacy.exception.NullArgumentException;
  23. import org.apache.commons.math4.legacy.exception.MathIllegalArgumentException;
  24. import org.apache.commons.math4.legacy.exception.NotPositiveException;
  25. import org.apache.commons.math4.legacy.exception.NumberIsTooLargeException;
  26. import org.apache.commons.math4.legacy.exception.OutOfRangeException;
  27. import org.apache.commons.math4.legacy.exception.util.LocalizedFormats;
  28. import org.apache.commons.math4.legacy.stat.descriptive.AbstractUnivariateStatistic;
  29. import org.apache.commons.math4.legacy.stat.ranking.NaNStrategy;
  30. import org.apache.commons.math4.core.jdkmath.JdkMath;
  31. import org.apache.commons.math4.legacy.core.MathArrays;

  32. /**
  33.  * Provides percentile computation.
  34.  * <p>
  35.  * There are several commonly used methods for estimating percentiles (a.k.a.
  36.  * quantiles) based on sample data.  For large samples, the different methods
  37.  * agree closely, but when sample sizes are small, different methods will give
  38.  * significantly different results.  The algorithm implemented here works as follows:
  39.  * <ol>
  40.  * <li>Let <code>n</code> be the length of the (sorted) array and
  41.  * <code>0 &lt; p &lt;= 100</code> be the desired percentile.</li>
  42.  * <li>If <code> n = 1 </code> return the unique array element (regardless of
  43.  * the value of <code>p</code>); otherwise </li>
  44.  * <li>Compute the estimated percentile position
  45.  * <code> pos = p * (n + 1) / 100</code> and the difference, <code>d</code>
  46.  * between <code>pos</code> and <code>floor(pos)</code> (i.e. the fractional
  47.  * part of <code>pos</code>).</li>
  48.  * <li> If <code>pos &lt; 1</code> return the smallest element in the array.</li>
  49.  * <li> Else if <code>pos &gt;= n</code> return the largest element in the array.</li>
  50.  * <li> Else let <code>lower</code> be the element in position
  51.  * <code>floor(pos)</code> in the array and let <code>upper</code> be the
  52.  * next element in the array.  Return <code>lower + d * (upper - lower)</code>
  53.  * </li>
  54.  * </ol>
  55.  * <p>
  56.  * To compute percentiles, the data must be at least partially ordered.  Input
  57.  * arrays are copied and recursively partitioned using an ordering definition.
  58.  * The ordering used by <code>Arrays.sort(double[])</code> is the one determined
  59.  * by {@link java.lang.Double#compareTo(Double)}.  This ordering makes
  60.  * <code>Double.NaN</code> larger than any other value (including
  61.  * <code>Double.POSITIVE_INFINITY</code>).  Therefore, for example, the median
  62.  * (50th percentile) of
  63.  * <code>{0, 1, 2, 3, 4, Double.NaN}</code> evaluates to <code>2.5.</code></p>
  64.  * <p>
  65.  * Since percentile estimation usually involves interpolation between array
  66.  * elements, arrays containing  <code>NaN</code> or infinite values will often
  67.  * result in <code>NaN</code> or infinite values returned.</p>
  68.  * <p>
  69.  * Further, to include different estimation types such as R1, R2 as mentioned in
  70.  * <a href="http://en.wikipedia.org/wiki/Quantile">Quantile page(wikipedia)</a>,
  71.  * a type specific NaN handling strategy is used to closely match with the
  72.  * typically observed results from popular tools like R(R1-R9), Excel(R7).</p>
  73.  * <p>
  74.  * Since 2.2, Percentile uses only selection instead of complete sorting
  75.  * and caches selection algorithm state between calls to the various
  76.  * {@code evaluate} methods. This greatly improves efficiency, both for a single
  77.  * percentile and multiple percentile computations. To maximize performance when
  78.  * multiple percentiles are computed based on the same data, users should set the
  79.  * data array once using either one of the {@link #evaluate(double[], double)} or
  80.  * {@link #setData(double[])} methods and thereafter {@link #evaluate(double)}
  81.  * with just the percentile provided.
  82.  * </p>
  83.  * <p>
  84.  * <strong>Note that this implementation is not synchronized.</strong> If
  85.  * multiple threads access an instance of this class concurrently, and at least
  86.  * one of the threads invokes the <code>increment()</code> or
  87.  * <code>clear()</code> method, it must be synchronized externally.</p>
  88.  */
  89. public class Percentile extends AbstractUnivariateStatistic {
  90.     /** Maximum number of partitioning pivots cached (each level double the number of pivots). */
  91.     private static final int MAX_CACHED_LEVELS = 10;

  92.     /** Maximum number of cached pivots in the pivots cached array. */
  93.     private static final int PIVOTS_HEAP_LENGTH = 0x1 << MAX_CACHED_LEVELS - 1;

  94.     /** Default KthSelector used with default pivoting strategy. */
  95.     private final KthSelector kthSelector;

  96.     /** Any of the {@link EstimationType}s such as {@link EstimationType#LEGACY CM} can be used. */
  97.     private final EstimationType estimationType;

  98.     /** NaN Handling of the input as defined by {@link NaNStrategy}. */
  99.     private final NaNStrategy nanStrategy;

  100.     /**
  101.      * Determines what percentile is computed when evaluate() is activated
  102.      * with no quantile argument.
  103.      */
  104.     private double quantile;

  105.     /** Cached pivots. */
  106.     private int[] cachedPivots;

  107.     /** Weight. */
  108.     private double[] weights;
  109.     /**
  110.      * Constructs a Percentile with the following defaults.
  111.      * <ul>
  112.      *   <li>default quantile: 50.0, can be reset with {@link #setQuantile(double)}</li>
  113.      *   <li>default estimation type: {@link EstimationType#LEGACY},
  114.      *   can be reset with {@link #withEstimationType(EstimationType)}</li>
  115.      *   <li>default NaN strategy: {@link NaNStrategy#REMOVED},
  116.      *   can be reset with {@link #withNaNStrategy(NaNStrategy)}</li>
  117.      *   <li>a KthSelector that makes use of {@link MedianOf3PivotingStrategy},
  118.      *   can be reset with {@link #withKthSelector(KthSelector)}</li>
  119.      * </ul>
  120.      */
  121.     public Percentile() {
  122.         // No try-catch or advertised exception here - arg is valid
  123.         this(50.0);
  124.     }

  125.     /**
  126.      * Constructs a Percentile with the specific quantile value and the following.
  127.      * <ul>
  128.      *   <li>default method type: {@link EstimationType#LEGACY}</li>
  129.      *   <li>default NaN strategy: {@link NaNStrategy#REMOVED}</li>
  130.      *   <li>a Kth Selector : {@link KthSelector}</li>
  131.      * </ul>
  132.      * @param quantile the quantile
  133.      * @throws MathIllegalArgumentException  if p is not greater than 0 and less
  134.      * than or equal to 100
  135.      */
  136.     public Percentile(final double quantile) {
  137.         this(quantile, EstimationType.LEGACY, NaNStrategy.REMOVED,
  138.              new KthSelector(new MedianOf3PivotingStrategy()));
  139.     }

  140.     /**
  141.      * Copy constructor, creates a new {@code Percentile} identical.
  142.      * to the {@code original}
  143.      *
  144.      * @param original the {@code Percentile} instance to copy.
  145.      * Cannot be {@code null}.
  146.      */
  147.     public Percentile(final Percentile original) {
  148.         NullArgumentException.check(original);
  149.         estimationType   = original.getEstimationType();
  150.         nanStrategy      = original.getNaNStrategy();
  151.         kthSelector      = original.getKthSelector();

  152.         setData(original.getDataRef());
  153.         if (original.cachedPivots != null) {
  154.             System.arraycopy(original.cachedPivots, 0, cachedPivots, 0, original.cachedPivots.length);
  155.         }
  156.         setQuantile(original.quantile);
  157.     }

  158.     /**
  159.      * Constructs a Percentile with the specific quantile value,
  160.      * {@link EstimationType}, {@link NaNStrategy} and {@link KthSelector}.
  161.      *
  162.      * @param quantile the quantile to be computed
  163.      * @param estimationType one of the percentile {@link EstimationType  estimation types}
  164.      * @param nanStrategy one of {@link NaNStrategy} to handle with NaNs.
  165.      * Cannot be {@code null}.
  166.      * @param kthSelector a {@link KthSelector} to use for pivoting during search
  167.      * @throws MathIllegalArgumentException if p is not within (0,100]
  168.      */
  169.     protected Percentile(final double quantile,
  170.                          final EstimationType estimationType,
  171.                          final NaNStrategy nanStrategy,
  172.                          final KthSelector kthSelector) {
  173.         setQuantile(quantile);
  174.         cachedPivots = null;
  175.         NullArgumentException.check(estimationType);
  176.         NullArgumentException.check(nanStrategy);
  177.         NullArgumentException.check(kthSelector);
  178.         this.estimationType = estimationType;
  179.         this.nanStrategy = nanStrategy;
  180.         this.kthSelector = kthSelector;
  181.     }

  182.     /** {@inheritDoc} */
  183.     @Override
  184.     public void setData(final double[] values) {
  185.         if (values == null) {
  186.             cachedPivots = null;
  187.         } else {
  188.             cachedPivots = new int[PIVOTS_HEAP_LENGTH];
  189.             Arrays.fill(cachedPivots, -1);
  190.         }
  191.         super.setData(values);
  192.     }
  193.     /**
  194.      * Set the data array.
  195.      * @param values Data array.
  196.      * Cannot be {@code null}.
  197.      * @param sampleWeights corresponding positive and non-NaN weights.
  198.      * Cannot be {@code null}.
  199.      * @throws MathIllegalArgumentException if lengths of values and weights are not equal.
  200.      * @throws org.apache.commons.math4.legacy.exception.NotANumberException if any weight is NaN
  201.      * @throws org.apache.commons.math4.legacy.exception.NotStrictlyPositiveException if any weight is not positive
  202.      */
  203.     public void setData(final double[] values,
  204.                         final double[] sampleWeights) {
  205.         setData(values, sampleWeights, 0, values.length);
  206.     }

  207.     /** {@inheritDoc} */
  208.     @Override
  209.     public void setData(final double[] values, final int begin, final int length) {
  210.         if (values == null) {
  211.             cachedPivots = null;
  212.         } else {
  213.             cachedPivots = new int[PIVOTS_HEAP_LENGTH];
  214.             Arrays.fill(cachedPivots, -1);
  215.         }
  216.         super.setData(values, begin, length);
  217.     }
  218.     /**
  219.      * Set the data and weights arrays.  The input array is copied, not referenced.
  220.      * @param values Data array.
  221.      * Cannot be {@code null}.
  222.      * @param sampleWeights corresponding positive and non-NaN weights.
  223.      * Cannot be {@code null}.
  224.      * @param begin the index of the first element to include
  225.      * @param length the number of elements to include
  226.      * @throws MathIllegalArgumentException if lengths of values and weights are not equal or values or weights is null
  227.      * @throws NotPositiveException if begin or length is not positive
  228.      * @throws NumberIsTooLargeException if begin + length is greater than values.length
  229.      * @throws org.apache.commons.math4.legacy.exception.NotANumberException if any weight is NaN
  230.      * @throws org.apache.commons.math4.legacy.exception.NotStrictlyPositiveException if any weight is not positive
  231.      */
  232.     public void setData(final double[] values,
  233.                         final double[] sampleWeights,
  234.                         final int begin,
  235.                         final int length) {
  236.         if (begin < 0) {
  237.             throw new NotPositiveException(LocalizedFormats.START_POSITION, begin);
  238.         }

  239.         if (length < 0) {
  240.             throw new NotPositiveException(LocalizedFormats.LENGTH, length);
  241.         }

  242.         if (begin + length > values.length) {
  243.             throw new NumberIsTooLargeException(LocalizedFormats.SUBARRAY_ENDS_AFTER_ARRAY_END,
  244.                                                 begin + length, values.length, true);
  245.         }

  246.         if (sampleWeights == null) {
  247.             throw new MathIllegalArgumentException(LocalizedFormats.NULL_NOT_ALLOWED);
  248.         }
  249.         cachedPivots = new int[PIVOTS_HEAP_LENGTH];
  250.         Arrays.fill(cachedPivots, -1);

  251.         // Check length
  252.         if (values.length != sampleWeights.length) {
  253.             throw new MathIllegalArgumentException(LocalizedFormats.LENGTH,
  254.                                                    values, sampleWeights);
  255.         }
  256.         // Check weights > 0
  257.         MathArrays.checkPositive(sampleWeights);
  258.         MathArrays.checkNotNaN(sampleWeights);

  259.         super.setData(values, begin, length);
  260.         weights = new double[length];
  261.         System.arraycopy(sampleWeights, begin, weights, 0, length);
  262.     }
  263.     /**
  264.      * Returns the result of evaluating the statistic over the stored data.
  265.      * If weights have been set, it will compute weighted percentile.
  266.      * <p>
  267.      * The stored array is the one which was set by previous calls to
  268.      * {@link #setData(double[])} or {@link #setData(double[], double[], int, int)}
  269.      * </p>
  270.      * @param p the percentile value to compute
  271.      * @return the value of the statistic applied to the stored data
  272.      * @throws MathIllegalArgumentException if lengths of values and weights are not equal or values or weights is null
  273.      * @throws NotPositiveException if begin, length is negative
  274.      * @throws org.apache.commons.math4.legacy.exception.NotStrictlyPositiveException if any weight is not positive
  275.      * @throws org.apache.commons.math4.legacy.exception.NotANumberException if any weight is NaN
  276.      * @throws OutOfRangeException if p is invalid
  277.      * @throws NumberIsTooLargeException if begin + length is greater than values.length
  278.      * (p must be greater than 0 and less than or equal to 100)
  279.      */
  280.     public double evaluate(final double p) {
  281.         if (weights == null) {
  282.             return evaluate(getDataRef(), p);
  283.         } else {
  284.             return evaluate(getDataRef(), weights, p);
  285.         }
  286.     }

  287.     /**
  288.      * Returns an estimate of the <code>p</code>th percentile of the values
  289.      * in the <code>values</code> array.
  290.      * <p>
  291.      * Calls to this method do not modify the internal <code>quantile</code>
  292.      * state of this statistic.</p>
  293.      * <ul>
  294.      * <li>Returns <code>Double.NaN</code> if <code>values</code> has length
  295.      * <code>0</code></li>
  296.      * <li>Returns (for any value of <code>p</code>) <code>values[0]</code>
  297.      *  if <code>values</code> has length <code>1</code></li>
  298.      * <li>Throws <code>MathIllegalArgumentException</code> if <code>values</code>
  299.      * is null or p is not a valid quantile value (p must be greater than 0
  300.      * and less than or equal to 100) </li>
  301.      * </ul>
  302.      * <p>
  303.      * See {@link Percentile} for a description of the percentile estimation
  304.      * algorithm used.</p>
  305.      *
  306.      * @param values input array of values
  307.      * @param p the percentile value to compute
  308.      * @return the percentile value or Double.NaN if the array is empty
  309.      * @throws MathIllegalArgumentException if <code>values</code> is null or p is invalid
  310.      */
  311.     public double evaluate(final double[] values, final double p) {
  312.         MathArrays.verifyValues(values, 0, 0);
  313.         return evaluate(values, 0, values.length, p);
  314.     }
  315.     /**
  316.      * Returns an estimate of the <code>p</code>th percentile of the values
  317.      * in the <code>values</code> array with their weights.
  318.      * <p>
  319.      * See {@link Percentile} for a description of the percentile estimation
  320.      * algorithm used.</p>
  321.      * @param values input array of values
  322.      * @param sampleWeights weights of values
  323.      * @param p the percentile value to compute
  324.      * @return the weighted percentile value or Double.NaN if the array is empty
  325.      * @throws MathIllegalArgumentException if lengths of values and weights are not equal or values or weights is null
  326.      * @throws NotPositiveException if begin, length is negative
  327.      * @throws org.apache.commons.math4.legacy.exception.NotStrictlyPositiveException if any weight is not positive
  328.      * @throws org.apache.commons.math4.legacy.exception.NotANumberException if any weight is NaN
  329.      * @throws OutOfRangeException if p is invalid
  330.      * @throws NumberIsTooLargeException if begin + length is greater than values.length
  331.      */
  332.     public double evaluate(final double[] values, final double[] sampleWeights, final double p) {
  333.         MathArrays.verifyValues(values, 0, 0);
  334.         MathArrays.verifyValues(sampleWeights, 0, 0);
  335.         return evaluate(values, sampleWeights, 0, values.length, p);
  336.     }

  337.     /**
  338.      * Returns an estimate of the <code>quantile</code>th percentile of the
  339.      * designated values in the <code>values</code> array.  The quantile
  340.      * estimated is determined by the <code>quantile</code> property.
  341.      * <ul>
  342.      * <li>Returns <code>Double.NaN</code> if <code>length = 0</code></li>
  343.      * <li>Returns (for any value of <code>quantile</code>)
  344.      * <code>values[begin]</code> if <code>length = 1 </code></li>
  345.      * <li>Throws <code>MathIllegalArgumentException</code> if <code>values</code>
  346.      * is null, or <code>start</code> or <code>length</code> is invalid</li>
  347.      * </ul>
  348.      * <p>
  349.      * See {@link Percentile} for a description of the percentile estimation
  350.      * algorithm used.</p>
  351.      *
  352.      * @param values the input array
  353.      * @param start index of the first array element to include
  354.      * @param length the number of elements to include
  355.      * @return the percentile value
  356.      * @throws MathIllegalArgumentException if the parameters are not valid
  357.      *
  358.      */
  359.     @Override
  360.     public double evaluate(final double[] values, final int start, final int length) {
  361.         return evaluate(values, start, length, quantile);
  362.     }
  363.     /**
  364.      * Returns an estimate of the weighted <code>quantile</code>th percentile of the
  365.      * designated values in the <code>values</code> array.  The quantile
  366.      * estimated is determined by the <code>quantile</code> property.
  367.      * <p>
  368.      * See {@link Percentile} for a description of the percentile estimation
  369.      * algorithm used.</p>
  370.      *
  371.      * @param values the input array
  372.      * @param sampleWeights the weights of values
  373.      * @param start index of the first array element to include
  374.      * @param length the number of elements to include
  375.      * @return the percentile value
  376.      * @throws MathIllegalArgumentException if lengths of values and weights are not equal or values or weights is null
  377.      * @throws NotPositiveException if begin, length is negative
  378.      * @throws org.apache.commons.math4.legacy.exception.NotStrictlyPositiveException if any weight is not positive
  379.      * @throws org.apache.commons.math4.legacy.exception.NotANumberException if any weight is NaN
  380.      * @throws OutOfRangeException if p is invalid
  381.      * @throws NumberIsTooLargeException if begin + length is greater than values.length
  382.      */
  383.     public double evaluate(final double[] values, final double[] sampleWeights,
  384.                            final int start, final int length) {
  385.         return evaluate(values, sampleWeights, start, length, quantile);
  386.     }

  387.      /**
  388.      * Returns an estimate of the <code>p</code>th percentile of the values
  389.      * in the <code>values</code> array, starting with the element in (0-based)
  390.      * position <code>begin</code> in the array and including <code>length</code>
  391.      * values.
  392.      * <p>
  393.      * Calls to this method do not modify the internal <code>quantile</code>
  394.      * state of this statistic.</p>
  395.      * <ul>
  396.      * <li>Returns <code>Double.NaN</code> if <code>length = 0</code></li>
  397.      * <li>Returns (for any value of <code>p</code>) <code>values[begin]</code>
  398.      *  if <code>length = 1 </code></li>
  399.      * <li>Throws <code>MathIllegalArgumentException</code> if <code>values</code>
  400.      *  is null , <code>begin</code> or <code>length</code> is invalid, or
  401.      * <code>p</code> is not a valid quantile value (p must be greater than 0
  402.      * and less than or equal to 100)</li>
  403.      * </ul>
  404.      * <p>
  405.      * See {@link Percentile} for a description of the percentile estimation
  406.      * algorithm used.</p>
  407.      *
  408.      * @param values array of input values
  409.      * @param p the percentile to compute
  410.      * @param begin the first (0-based) element to include in the computation
  411.      * @param length the number of array elements to include
  412.      * @return the percentile value.
  413.      * @throws MathIllegalArgumentException if the parameters are not valid.
  414.      */
  415.     public double evaluate(final double[] values, final int begin,
  416.                            final int length, final double p) {
  417.         MathArrays.verifyValues(values, begin, length);
  418.         if (p > 100 || p <= 0) {
  419.             throw new OutOfRangeException(LocalizedFormats.OUT_OF_BOUNDS_QUANTILE_VALUE,
  420.                                           p, 0, 100);
  421.         }
  422.         if (length == 0) {
  423.             return Double.NaN;
  424.         }
  425.         if (length == 1) {
  426.             return values[begin]; // always return single value for n = 1
  427.         }

  428.         final double[] work = getWorkArray(values, begin, length);
  429.         final int[] pivotsHeap = getPivots(values);
  430.         return work.length == 0 ?
  431.             Double.NaN :
  432.             estimationType.evaluate(work, pivotsHeap, p, kthSelector);
  433.     }
  434.      /**
  435.      * Returns an estimate of the <code>p</code>th percentile of the values
  436.      * in the <code>values</code> array with <code>sampleWeights</code>, starting with the element in (0-based)
  437.      * position <code>begin</code> in the array and including <code>length</code>
  438.      * values.
  439.      * <p>
  440.      * See {@link Percentile} for a description of the percentile estimation
  441.      * algorithm used.</p>
  442.      *
  443.      * @param values array of input values
  444.      * @param sampleWeights positive and non-NaN weights of values
  445.      * @param begin  the first (0-based) element to include in the computation
  446.      * @param length  the number of array elements to include
  447.      * @param p  percentile to compute
  448.      * @return the weighted percentile value
  449.      * @throws MathIllegalArgumentException if lengths of values and weights are not equal or values or weights is null
  450.      * @throws NotPositiveException if begin, length is negative
  451.      * @throws org.apache.commons.math4.legacy.exception.NotStrictlyPositiveException if any weight is not positive
  452.      * @throws org.apache.commons.math4.legacy.exception.NotANumberException if any weight is NaN
  453.      * @throws OutOfRangeException if p is invalid
  454.      * @throws NumberIsTooLargeException if begin + length is greater than values.length
  455.      */
  456.     public double evaluate(final double[] values, final double[] sampleWeights, final int begin,
  457.                            final int length, final double p) {
  458.         if (values == null || sampleWeights == null) {
  459.             throw new MathIllegalArgumentException(LocalizedFormats.NULL_NOT_ALLOWED);
  460.         }
  461.         // Check length
  462.         if (values.length != sampleWeights.length) {
  463.             throw new MathIllegalArgumentException(LocalizedFormats.LENGTH,
  464.                                                    values, sampleWeights);
  465.         }
  466.         MathArrays.verifyValues(values, begin, length);
  467.         MathArrays.verifyValues(sampleWeights, begin, length);
  468.         MathArrays.checkPositive(sampleWeights);
  469.         MathArrays.checkNotNaN(sampleWeights);

  470.         if (p > 100 || p <= 0) {
  471.             throw new OutOfRangeException(LocalizedFormats.OUT_OF_BOUNDS_QUANTILE_VALUE,
  472.                                           p, 0, 100);
  473.         }
  474.         if (length == 0) {
  475.             return Double.NaN;
  476.         }
  477.         if (length == 1) {
  478.             // Always return single value for n = 1
  479.             return values[begin];
  480.         }

  481.         final double[] work = getWorkArray(values, begin, length);
  482.         final double[] workWeights = getWorkArray(values, sampleWeights, begin, length);
  483.         return work.length == 0 ? Double.NaN :
  484.                     estimationType.evaluate(work, workWeights, p);
  485.     }
  486.     /**
  487.      * Returns the value of the quantile field (determines what percentile is
  488.      * computed when evaluate() is called with no quantile argument).
  489.      *
  490.      * @return quantile set while construction or {@link #setQuantile(double)}
  491.      */
  492.     public double getQuantile() {
  493.         return quantile;
  494.     }

  495.     /**
  496.      * Sets the value of the quantile field (determines what percentile is
  497.      * computed when evaluate() is called with no quantile argument).
  498.      *
  499.      * @param p a value between 0 &lt; p &lt;= 100
  500.      * @throws MathIllegalArgumentException  if p is not greater than 0 and less
  501.      * than or equal to 100
  502.      */
  503.     public void setQuantile(final double p) {
  504.         if (p <= 0 || p > 100) {
  505.             throw new OutOfRangeException(LocalizedFormats.OUT_OF_BOUNDS_QUANTILE_VALUE,
  506.                                           p, 0, 100);
  507.         }
  508.         quantile = p;
  509.     }

  510.     /**
  511.      * {@inheritDoc}
  512.      */
  513.     @Override
  514.     public Percentile copy() {
  515.         return new Percentile(this);
  516.     }

  517.     /**
  518.      * Get the work array to operate. Makes use of prior {@code storedData} if
  519.      * it exists or else do a check on NaNs and copy a subset of the array
  520.      * defined by begin and length parameters. The set {@link #nanStrategy} will
  521.      * be used to either retain/remove/replace any NaNs present before returning
  522.      * the resultant array.
  523.      *
  524.      * @param values the array of numbers
  525.      * @param begin index to start reading the array
  526.      * @param length the length of array to be read from the begin index
  527.      * @return work array sliced from values in the range [begin,begin+length)
  528.      * @throws MathIllegalArgumentException if values or indices are invalid
  529.      */
  530.     private double[] getWorkArray(final double[] values, final int begin, final int length) {
  531.         final double[] work;
  532.         if (values == getDataRef()) {
  533.             work = getDataRef();
  534.         } else {
  535.             switch (nanStrategy) {
  536.                 case MAXIMAL: // Replace NaNs with +INFs
  537.                     work = replaceAndSlice(values, begin, length, Double.NaN, Double.POSITIVE_INFINITY);
  538.                     break;
  539.                 case MINIMAL: // Replace NaNs with -INFs
  540.                     work = replaceAndSlice(values, begin, length, Double.NaN, Double.NEGATIVE_INFINITY);
  541.                     break;
  542.                 case REMOVED: // Drop NaNs from data
  543.                     work = removeAndSlice(values, begin, length, Double.NaN);
  544.                     break;
  545.                 case FAILED: // NaN is not acceptable
  546.                     work = copyOf(values, begin, length);
  547.                     MathArrays.checkNotNaN(work);
  548.                     break;
  549.                 default: // FIXED
  550.                     work = copyOf(values,begin,length);
  551.                     break;
  552.             }
  553.         }
  554.         return work;
  555.     }
  556.     /**
  557.      * Get the work arrays of weights to operate.
  558.      *
  559.      * @param values the array of numbers
  560.      * @param sampleWeights the array of weights
  561.      * @param begin index to start reading the array
  562.      * @param length the length of array to be read from the begin index
  563.      * @return work array sliced from values in the range [begin,begin+length)
  564.      */
  565.     protected double[] getWorkArray(final double[] values, final double[] sampleWeights,
  566.                                     final int begin, final int length) {
  567.         final double[] work;
  568.         if (values == getDataRef()) {
  569.             work = this.weights;
  570.         } else {
  571.             switch (nanStrategy) {
  572.                 case REMOVED: // Drop weight if the data is NaN
  573.                     work = removeAndSliceByRef(values, sampleWeights, begin, length, Double.NaN);
  574.                     break;
  575.                 default: // FIXED
  576.                     work = copyOf(sampleWeights, begin, length);
  577.                     break;
  578.             }
  579.         }
  580.         return work;
  581.     }
  582.     /**
  583.      * Make a copy of the array for the slice defined by array part from.
  584.      * [begin, begin+length)
  585.      * @param values the input array
  586.      * @param begin start index of the array to include
  587.      * @param length number of elements to include from begin
  588.      * @return copy of a slice of the original array
  589.      */
  590.     private static double[] copyOf(final double[] values, final int begin, final int length) {
  591.         MathArrays.verifyValues(values, begin, length);
  592.         return Arrays.copyOfRange(values, begin, begin + length);
  593.     }

  594.     /**
  595.      * Replace every occurrence of a given value with a replacement value in a
  596.      * copied slice of array defined by array part from [begin, begin+length).
  597.      *
  598.      * @param values the input array
  599.      * @param begin start index of the array to include
  600.      * @param length number of elements to include from begin
  601.      * @param original the value to be replaced with
  602.      * @param replacement the value to be used for replacement
  603.      * @return the copy of sliced array with replaced values
  604.      */
  605.     private static double[] replaceAndSlice(final double[] values,
  606.                                             final int begin, final int length,
  607.                                             final double original,
  608.                                             final double replacement) {
  609.         final double[] temp = copyOf(values, begin, length);
  610.         for(int i = 0; i < length; i++) {
  611.             temp[i] = Precision.equalsIncludingNaN(original, temp[i]) ?
  612.                 replacement :
  613.                 temp[i];
  614.         }

  615.         return temp;
  616.     }
  617.     /**
  618.      * Remove the occurrence of a given value in a copied slice of array
  619.      * defined by the array part from [begin, begin+length).
  620.      * @param values the input array
  621.      * @param begin start index of the array to include
  622.      * @param length number of elements to include from begin
  623.      * @param removedValue the value to be removed from the sliced array
  624.      * @return the copy of the sliced array after removing the removedValue
  625.      */
  626.     private static double[] removeAndSlice(final double[] values,
  627.                                            final int begin, final int length,
  628.                                            final double removedValue) {
  629.         MathArrays.verifyValues(values, begin, length);
  630.         final double[] temp;
  631.         // Indicates where the removedValue is located
  632.         final BitSet bits = new BitSet(length);
  633.         for (int i = begin; i < begin+length; i++) {
  634.             if (Precision.equalsIncludingNaN(removedValue, values[i])) {
  635.                 bits.set(i - begin);
  636.             }
  637.         }
  638.         // Check if empty then create a new copy
  639.         if (bits.isEmpty()) {
  640.             // Nothing removed, just copy
  641.             temp = copyOf(values, begin, length);
  642.         } else if(bits.cardinality() == length) {
  643.             // All removed, just empty
  644.             temp = new double[0];
  645.         } else {
  646.             // Some removable, so new
  647.             temp = new double[length - bits.cardinality()];
  648.             // Index from source array (i.e values)
  649.             int start = begin;
  650.             // Index in destination array(i.e temp)
  651.             int dest = 0;
  652.             // Index of bit set of next one
  653.             int nextOne = -1;
  654.             // Start index pointer of bitset
  655.             int bitSetPtr = 0;
  656.             while ((nextOne = bits.nextSetBit(bitSetPtr)) != -1) {
  657.                 final int lengthToCopy = nextOne - bitSetPtr;
  658.                 System.arraycopy(values, start, temp, dest, lengthToCopy);
  659.                 dest += lengthToCopy;
  660.                 start = begin + (bitSetPtr = bits.nextClearBit(nextOne));
  661.             }
  662.             // Copy any residue past start index till begin+length
  663.             if (start < begin + length) {
  664.                 System.arraycopy(values,start,temp,dest,begin + length - start);
  665.             }
  666.         }
  667.         return temp;
  668.     }
  669.     /**
  670.      * Remove weights element if the corresponding data is equal to the given value.
  671.      * in [begin, begin+length)
  672.      *
  673.      * @param values the input array
  674.      * @param sampleWeights weights of the input array
  675.      * @param begin start index of the array to include
  676.      * @param length number of elements to include from begin
  677.      * @param removedValue the value to be removed from the sliced array
  678.      * @return the copy of the sliced array after removing weights
  679.      */
  680.     private static double[] removeAndSliceByRef(final double[] values,
  681.                                                final double[] sampleWeights,
  682.                                                final int begin, final int length,
  683.                                                final double removedValue) {
  684.         MathArrays.verifyValues(values, begin, length);
  685.         final double[] temp;
  686.         //BitSet(length) to indicate where the removedValue is located
  687.         final BitSet bits = new BitSet(length);
  688.         for (int i = begin; i < begin+length; i++) {
  689.             if (Precision.equalsIncludingNaN(removedValue, values[i])) {
  690.                 bits.set(i - begin);
  691.             }
  692.         }
  693.         //Check if empty then create a new copy
  694.         if (bits.isEmpty()) {
  695.             temp = copyOf(sampleWeights, begin, length); // Nothing removed, just copy
  696.         } else if(bits.cardinality() == length) {
  697.             temp = new double[0];                 // All removed, just empty
  698.         }else {                                   // Some removable, so new
  699.             temp = new double[length - bits.cardinality()];
  700.             int start = begin;  //start index from source array (i.e sampleWeights)
  701.             int dest = 0;       //dest index in destination array(i.e temp)
  702.             int nextOne = -1;   //nextOne is the index of bit set of next one
  703.             int bitSetPtr = 0;  //bitSetPtr is start index pointer of bitset
  704.             while ((nextOne = bits.nextSetBit(bitSetPtr)) != -1) {
  705.                 final int lengthToCopy = nextOne - bitSetPtr;
  706.                 System.arraycopy(sampleWeights, start, temp, dest, lengthToCopy);
  707.                 dest += lengthToCopy;
  708.                 start = begin + (bitSetPtr = bits.nextClearBit(nextOne));
  709.             }
  710.             //Copy any residue past start index till begin+length
  711.             if (start < begin + length) {
  712.                 System.arraycopy(sampleWeights,start,temp,dest,begin + length - start);
  713.             }
  714.         }
  715.         return temp;
  716.     }
  717.     /**
  718.      * Get pivots which is either cached or a newly created one.
  719.      *
  720.      * @param values array containing the input numbers
  721.      * @return cached pivots or a newly created one
  722.      */
  723.     private int[] getPivots(final double[] values) {
  724.         final int[] pivotsHeap;
  725.         if (values == getDataRef()) {
  726.             pivotsHeap = cachedPivots;
  727.         } else {
  728.             pivotsHeap = new int[PIVOTS_HEAP_LENGTH];
  729.             Arrays.fill(pivotsHeap, -1);
  730.         }
  731.         return pivotsHeap;
  732.     }

  733.     /**
  734.      * Get the estimation {@link EstimationType type} used for computation.
  735.      *
  736.      * @return the {@code estimationType} set
  737.      */
  738.     public EstimationType getEstimationType() {
  739.         return estimationType;
  740.     }

  741.     /**
  742.      * Build a new instance similar to the current one except for the
  743.      * {@link EstimationType estimation type}.
  744.      * <p>
  745.      * This method is intended to be used as part of a fluent-type builder
  746.      * pattern. Building finely tune instances should be done as follows:
  747.      * </p>
  748.      * <pre>
  749.      *   Percentile customized = new Percentile(quantile).
  750.      *                           withEstimationType(estimationType).
  751.      *                           withNaNStrategy(nanStrategy).
  752.      *                           withKthSelector(kthSelector);
  753.      * </pre>
  754.      * <p>
  755.      * If any of the {@code withXxx} method is omitted, the default value for
  756.      * the corresponding customization parameter will be used.
  757.      * </p>
  758.      * @param newEstimationType estimation type for the new instance.
  759.      * Cannot be {@code null}.
  760.      * @return a new instance, with changed estimation type
  761.      */
  762.     public Percentile withEstimationType(final EstimationType newEstimationType) {
  763.         return new Percentile(quantile, newEstimationType, nanStrategy, kthSelector);
  764.     }

  765.     /**
  766.      * Get the {@link NaNStrategy NaN Handling} strategy used for computation.
  767.      * @return {@code NaN Handling} strategy set during construction
  768.      */
  769.     public NaNStrategy getNaNStrategy() {
  770.         return nanStrategy;
  771.     }

  772.     /**
  773.      * Build a new instance similar to the current one except for the
  774.      * {@link NaNStrategy NaN handling} strategy.
  775.      * <p>
  776.      * This method is intended to be used as part of a fluent-type builder
  777.      * pattern. Building finely tune instances should be done as follows:
  778.      * </p>
  779.      * <pre>
  780.      *   Percentile customized = new Percentile(quantile).
  781.      *                           withEstimationType(estimationType).
  782.      *                           withNaNStrategy(nanStrategy).
  783.      *                           withKthSelector(kthSelector);
  784.      * </pre>
  785.      * <p>
  786.      * If any of the {@code withXxx} method is omitted, the default value for
  787.      * the corresponding customization parameter will be used.
  788.      * </p>
  789.      * @param newNaNStrategy NaN strategy for the new instance.
  790.      * Cannot be {@code null}.
  791.      * @return a new instance, with changed NaN handling strategy
  792.      */
  793.     public Percentile withNaNStrategy(final NaNStrategy newNaNStrategy) {
  794.         return new Percentile(quantile, estimationType, newNaNStrategy, kthSelector);
  795.     }

  796.     /**
  797.      * Get the {@link KthSelector kthSelector} used for computation.
  798.      * @return the {@code kthSelector} set
  799.      */
  800.     public KthSelector getKthSelector() {
  801.         return kthSelector;
  802.     }

  803.     /**
  804.      * Get the {@link PivotingStrategy} used in KthSelector for computation.
  805.      * @return the pivoting strategy set
  806.      */
  807.     public PivotingStrategy getPivotingStrategy() {
  808.         return kthSelector.getPivotingStrategy();
  809.     }

  810.     /**
  811.      * Build a new instance similar to the current one except for the
  812.      * {@link KthSelector kthSelector} instance specifically set.
  813.      * <p>
  814.      * This method is intended to be used as part of a fluent-type builder
  815.      * pattern. Building finely tune instances should be done as follows:
  816.      * </p>
  817.      * <pre>
  818.      *   Percentile customized = new Percentile(quantile).
  819.      *                           withEstimationType(estimationType).
  820.      *                           withNaNStrategy(nanStrategy).
  821.      *                           withKthSelector(newKthSelector);
  822.      * </pre>
  823.      * <p>
  824.      * If any of the {@code withXxx} method is omitted, the default value for
  825.      * the corresponding customization parameter will be used.
  826.      * </p>
  827.      * @param newKthSelector KthSelector for the new instance.
  828.      * Cannot be {@code null}.
  829.      * @return a new instance, with changed KthSelector
  830.      */
  831.     public Percentile withKthSelector(final KthSelector newKthSelector) {
  832.         return new Percentile(quantile, estimationType, nanStrategy, newKthSelector);
  833.     }

  834.     /**
  835.      * An enum for various estimation strategies of a percentile referred in
  836.      * <a href="http://en.wikipedia.org/wiki/Quantile">wikipedia on quantile</a>
  837.      * with the names of enum matching those of types mentioned in
  838.      * wikipedia.
  839.      * <p>
  840.      * Each enum corresponding to the specific type of estimation in wikipedia
  841.      * implements  the respective formulae that specializes in the below aspects
  842.      * <ul>
  843.      * <li>An <b>index method</b> to calculate approximate index of the
  844.      * estimate</li>
  845.      * <li>An <b>estimate method</b> to estimate a value found at the earlier
  846.      * computed index</li>
  847.      * <li>A <b> minLimit</b> on the quantile for which first element of sorted
  848.      * input is returned as an estimate </li>
  849.      * <li>A <b> maxLimit</b> on the quantile for which last element of sorted
  850.      * input is returned as an estimate </li>
  851.      * </ul>
  852.      * <p>
  853.      * Users can now create {@link Percentile} by explicitly passing this enum;
  854.      * such as by invoking {@link Percentile#withEstimationType(EstimationType)}
  855.      * <p>
  856.      * References:
  857.      * <ol>
  858.      * <li>
  859.      * <a href="http://en.wikipedia.org/wiki/Quantile">Wikipedia on quantile</a>
  860.      * </li>
  861.      * <li>
  862.      * <a href="https://www.amherst.edu/media/view/129116/.../Sample+Quantiles.pdf">
  863.      * Hyndman, R. J. and Fan, Y. (1996) Sample quantiles in statistical
  864.      * packages, American Statistician 50, 361–365</a> </li>
  865.      * <li>
  866.      * <a href="http://stat.ethz.ch/R-manual/R-devel/library/stats/html/quantile.html">
  867.      * R-Manual </a></li>
  868.      * </ol>
  869.      */
  870.     public enum EstimationType {
  871.         /**
  872.          * This is the default type used in the {@link Percentile}.This method.
  873.          * has the following formulae for index and estimates<br>
  874.          * \( \begin{align}
  875.          * &amp;index    = (N+1)p\ \\
  876.          * &amp;estimate = x_{\lceil h\,-\,1/2 \rceil} \\
  877.          * &amp;minLimit = 0 \\
  878.          * &amp;maxLimit = 1 \\
  879.          * \end{align}\)
  880.          */
  881.         LEGACY("Legacy Apache Commons Math") {
  882.             /**
  883.              * {@inheritDoc}.This method in particular makes use of existing
  884.              * Apache Commons Math style of picking up the index.
  885.              */
  886.             @Override
  887.             protected double index(final double p, final int length) {
  888.                 final double minLimit = 0d;
  889.                 final double maxLimit = 1d;
  890.                 return Double.compare(p, minLimit) == 0 ? 0 :
  891.                        Double.compare(p, maxLimit) == 0 ?
  892.                                length : p * (length + 1);
  893.             }
  894.             @Override
  895.             public double evaluate(final double[] work, final double[] sampleWeights,
  896.                                    final double p) {
  897.                 throw new MathIllegalArgumentException(LocalizedFormats.UNSUPPORTED_OPERATION);
  898.             }
  899.         },
  900.         /**
  901.          * The method R_1 has the following formulae for index and estimates.<br>
  902.          * \( \begin{align}
  903.          * &amp;index= Np + 1/2\,  \\
  904.          * &amp;estimate= x_{\lceil h\,-\,1/2 \rceil} \\
  905.          * &amp;minLimit = 0 \\
  906.          * \end{align}\)
  907.          */
  908.         R_1("R-1") {

  909.             @Override
  910.             protected double index(final double p, final int length) {
  911.                 final double minLimit = 0d;
  912.                 return Double.compare(p, minLimit) == 0 ? 0 : length * p + 0.5;
  913.             }

  914.             /**
  915.              * {@inheritDoc}This method in particular for R_1 uses ceil(pos-0.5)
  916.              */
  917.             @Override
  918.             protected double estimate(final double[] values,
  919.                                       final int[] pivotsHeap, final double pos,
  920.                                       final int length, final KthSelector selector) {
  921.                 return super.estimate(values, pivotsHeap, JdkMath.ceil(pos - 0.5), length, selector);
  922.             }
  923.             @Override
  924.             public double evaluate(final double[] work, final double[] sampleWeights,
  925.                                    final double p) {
  926.                 throw new MathIllegalArgumentException(LocalizedFormats.UNSUPPORTED_OPERATION);
  927.             }
  928.         },
  929.         /**
  930.          * The method R_2 has the following formulae for index and estimates.<br>
  931.          * \( \begin{align}
  932.          * &amp;index= Np + 1/2\, \\
  933.          * &amp;estimate=\frac{x_{\lceil h\,-\,1/2 \rceil} +
  934.          * x_{\lfloor h\,+\,1/2 \rfloor}}{2} \\
  935.          * &amp;minLimit = 0 \\
  936.          * &amp;maxLimit = 1 \\
  937.          * \end{align}\)
  938.          */
  939.         R_2("R-2") {

  940.             @Override
  941.             protected double index(final double p, final int length) {
  942.                 final double minLimit = 0d;
  943.                 final double maxLimit = 1d;
  944.                 return Double.compare(p, maxLimit) == 0 ? length :
  945.                        Double.compare(p, minLimit) == 0 ? 0 : length * p + 0.5;
  946.             }

  947.             /**
  948.              * {@inheritDoc}This method in particular for R_2 averages the
  949.              * values at ceil(p+0.5) and floor(p-0.5).
  950.              */
  951.             @Override
  952.             protected double estimate(final double[] values,
  953.                                       final int[] pivotsHeap, final double pos,
  954.                                       final int length, final KthSelector selector) {
  955.                 final double low =
  956.                         super.estimate(values, pivotsHeap, JdkMath.ceil(pos - 0.5), length, selector);
  957.                 final double high =
  958.                         super.estimate(values, pivotsHeap,JdkMath.floor(pos + 0.5), length, selector);
  959.                 return (low + high) / 2;
  960.             }
  961.             @Override
  962.             public double evaluate(final double[] work, final double[] sampleWeights,
  963.                                    final double p) {
  964.                 throw new MathIllegalArgumentException(LocalizedFormats.UNSUPPORTED_OPERATION);
  965.             }
  966.         },
  967.         /**
  968.          * The method R_3 has the following formulae for index and estimates.<br>
  969.          * \( \begin{align}
  970.          * &amp;index= Np \\
  971.          * &amp;estimate= x_{\lfloor h \rceil}\, \\
  972.          * &amp;minLimit = 0.5/N \\
  973.          * \end{align}\)
  974.          */
  975.         R_3("R-3") {
  976.             @Override
  977.             protected double index(final double p, final int length) {
  978.                 final double minLimit = 1d/2 / length;
  979.                 return Double.compare(p, minLimit) <= 0 ?
  980.                         0 : JdkMath.rint(length * p);
  981.             }
  982.             @Override
  983.             public double evaluate(final double[] work, final double[] sampleWeights,
  984.                                    final double p) {
  985.                 throw new MathIllegalArgumentException(LocalizedFormats.UNSUPPORTED_OPERATION);
  986.             }
  987.         },
  988.         /**
  989.          * The method R_4 has the following formulae for index and estimates.<br>
  990.          * \( \begin{align}
  991.          * &amp;index= Np\, \\
  992.          * &amp;estimate= x_{\lfloor h \rfloor} + (h -
  993.          * \lfloor h \rfloor) (x_{\lfloor h \rfloor + 1} - x_{\lfloor h
  994.          * \rfloor}) \\
  995.          * &amp;minLimit = 1/N \\
  996.          * &amp;maxLimit = 1 \\
  997.          * \end{align}\)
  998.          */
  999.         R_4("R-4") {
  1000.             @Override
  1001.             protected double index(final double p, final int length) {
  1002.                 final double minLimit = 1d / length;
  1003.                 final double maxLimit = 1d;
  1004.                 return Double.compare(p, minLimit) < 0 ? 0 :
  1005.                        Double.compare(p, maxLimit) == 0 ? length : length * p;
  1006.             }
  1007.             @Override
  1008.             public double evaluate(final double[] work, final double[] sampleWeights,
  1009.                                    final double p) {
  1010.                 throw new MathIllegalArgumentException(LocalizedFormats.UNSUPPORTED_OPERATION);
  1011.             }
  1012.         },
  1013.         /**
  1014.          * The method R_5 has the following formulae for index and estimates.<br>
  1015.          * \( \begin{align}
  1016.          * &amp;index= Np + 1/2\\
  1017.          * &amp;estimate= x_{\lfloor h \rfloor} + (h -
  1018.          * \lfloor h \rfloor) (x_{\lfloor h \rfloor + 1} - x_{\lfloor h
  1019.          * \rfloor}) \\
  1020.          * &amp;minLimit = 0.5/N \\
  1021.          * &amp;maxLimit = (N-0.5)/N
  1022.          * \end{align}\)
  1023.          */
  1024.         R_5("R-5") {

  1025.             @Override
  1026.             protected double index(final double p, final int length) {
  1027.                 final double minLimit = 1d/2 / length;
  1028.                 final double maxLimit = (length - 0.5) / length;
  1029.                 return Double.compare(p, minLimit) < 0 ? 0 :
  1030.                        Double.compare(p, maxLimit) >= 0 ?
  1031.                                length : length * p + 0.5;
  1032.             }
  1033.             @Override
  1034.             public double evaluate(final double[] work, final double[] sampleWeights,
  1035.                                    final double p) {
  1036.                 throw new MathIllegalArgumentException(LocalizedFormats.UNSUPPORTED_OPERATION);
  1037.             }
  1038.         },
  1039.         /**
  1040.          * The method R_6 has the following formulae for index and estimates.<br>
  1041.          * \( \begin{align}
  1042.          * &amp;index= (N + 1)p \\
  1043.          * &amp;estimate= x_{\lfloor h \rfloor} + (h -
  1044.          * \lfloor h \rfloor) (x_{\lfloor h \rfloor + 1} - x_{\lfloor h
  1045.          * \rfloor}) \\
  1046.          * &amp;minLimit = 1/(N+1) \\
  1047.          * &amp;maxLimit = N/(N+1) \\
  1048.          * \end{align}\)
  1049.          * <p>
  1050.          * <b>Note:</b> This method computes the index in a manner very close to
  1051.          * the default Commons Math Percentile existing implementation. However
  1052.          * the difference to be noted is in picking up the limits with which
  1053.          * first element (p&lt;1(N+1)) and last elements (p&gt;N/(N+1))are done.
  1054.          * While in default case; these are done with p=0 and p=1 respectively.
  1055.          */
  1056.         R_6("R-6") {

  1057.             @Override
  1058.             protected double index(final double p, final int length) {
  1059.                 final double minLimit = 1d / (length + 1);
  1060.                 final double maxLimit = 1d * length / (length + 1);
  1061.                 return Double.compare(p, minLimit) < 0 ? 0 :
  1062.                        Double.compare(p, maxLimit) >= 0 ?
  1063.                                length : (length + 1) * p;
  1064.             }
  1065.             @Override
  1066.             public double evaluate(final double[] work, final double[] sampleWeights,
  1067.                                    final double p) {
  1068.                 throw new MathIllegalArgumentException(LocalizedFormats.UNSUPPORTED_OPERATION);
  1069.             }
  1070.         },

  1071.         /**
  1072.          * The method R_7 implements Microsoft Excel style computation has the
  1073.          * following formulae for index and estimates.<br>
  1074.          * \( \begin{align}
  1075.          * &amp;index = (N-1)p + 1 \\
  1076.          * &amp;estimate = x_{\lfloor h \rfloor} + (h -
  1077.          * \lfloor h \rfloor) (x_{\lfloor h \rfloor + 1} - x_{\lfloor h
  1078.          * \rfloor}) \\
  1079.          * &amp;minLimit = 0 \\
  1080.          * &amp;maxLimit = 1 \\
  1081.          * \end{align}\)
  1082.          * The formula to evaluate weighted percentiles is as following.<br>
  1083.          * \( \begin{align}
  1084.          * &amp;S_k = (k-1)w_k + (n-1)\sum_{i=1}^{k-1}w_i
  1085.          * &amp;Then find k s.t. \frac{S_k}{S_n}\leq p \leq \frac{S_{k+1}}{S_n}
  1086.          * \end{align}\)
  1087.          */
  1088.         R_7("R-7") {
  1089.             @Override
  1090.             protected double index(final double p, final int length) {
  1091.                 final double minLimit = 0d;
  1092.                 final double maxLimit = 1d;
  1093.                 return Double.compare(p, minLimit) == 0 ? 0 :
  1094.                        Double.compare(p, maxLimit) == 0 ?
  1095.                                length : 1 + (length - 1) * p;
  1096.             }

  1097.             @Override
  1098.             public double evaluate(final double[] work, final double[] sampleWeights,
  1099.                                    final double p) {
  1100.                 SortInPlace.ASCENDING.apply(work, sampleWeights);
  1101.                 double[] sk = new double[work.length];
  1102.                 for(int k = 0; k < work.length; k++) {
  1103.                     sk[k] = 0;
  1104.                     for (int j = 0; j < k; j++) {
  1105.                         sk[k] += sampleWeights[j];
  1106.                     }
  1107.                     sk[k] = k * sampleWeights[k] + (work.length - 1) * sk[k];
  1108.                 }

  1109.                 double qsn = (p / 100) * sk[sk.length-1];
  1110.                 int k = searchSk(qsn, sk, 0, work.length - 1);

  1111.                 double ret;
  1112.                 if (qsn == sk[k] && k == work.length - 1) {
  1113.                     ret = work[k] - (work[k] - work[k-1]) * (1 - (qsn - sk[k]) / (sk[k] - sk[k-1]));
  1114.                 } else {
  1115.                     ret = work[k] + (work[k+1] - work[k]) * (qsn - sk[k]) / (sk[k+1] - sk[k]);
  1116.                 }
  1117.                 return ret;
  1118.             }
  1119.         },

  1120.         /**
  1121.          * The method R_8 has the following formulae for index and estimates.<br>
  1122.          * \( \begin{align}
  1123.          * &amp;index = (N + 1/3)p + 1/3  \\
  1124.          * &amp;estimate = x_{\lfloor h \rfloor} + (h -
  1125.            \lfloor h \rfloor) (x_{\lfloor h \rfloor + 1} - x_{\lfloor h
  1126.          * \rfloor}) \\
  1127.          * &amp;minLimit = (2/3)/(N+1/3) \\
  1128.          * &amp;maxLimit = (N-1/3)/(N+1/3) \\
  1129.          * \end{align}\)
  1130.          * <p>
  1131.          * As per Ref [2,3] this approach is most recommended as it provides
  1132.          * an approximate median-unbiased estimate regardless of distribution.
  1133.          */
  1134.         R_8("R-8") {
  1135.             @Override
  1136.             protected double index(final double p, final int length) {
  1137.                 final double minLimit = 2 * (1d / 3) / (length + 1d / 3);
  1138.                 final double maxLimit =
  1139.                         (length - 1d / 3) / (length + 1d / 3);
  1140.                 return Double.compare(p, minLimit) < 0 ? 0 :
  1141.                        Double.compare(p, maxLimit) >= 0 ? length :
  1142.                            (length + 1d / 3) * p + 1d / 3;
  1143.             }
  1144.             @Override
  1145.             public double evaluate(final double[] work, final double[] sampleWeights,
  1146.                                    final double p) {
  1147.                 throw new MathIllegalArgumentException(LocalizedFormats.UNSUPPORTED_OPERATION);
  1148.             }
  1149.         },

  1150.         /**
  1151.          * The method R_9 has the following formulae for index and estimates.<br>
  1152.          * \( \begin{align}
  1153.          * &amp;index = (N + 1/4)p + 3/8\\
  1154.          * &amp;estimate = x_{\lfloor h \rfloor} + (h -
  1155.            \lfloor h \rfloor) (x_{\lfloor h \rfloor + 1} - x_{\lfloor h
  1156.          * \rfloor}) \\
  1157.          * &amp;minLimit = (5/8)/(N+1/4) \\
  1158.          * &amp;maxLimit = (N-3/8)/(N+1/4) \\
  1159.          * \end{align}\)
  1160.          */
  1161.         R_9("R-9") {
  1162.             @Override
  1163.             protected double index(final double p, final int length) {
  1164.                 final double minLimit = 5d/8 / (length + 0.25);
  1165.                 final double maxLimit = (length - 3d/8) / (length + 0.25);
  1166.                 return Double.compare(p, minLimit) < 0 ? 0 :
  1167.                        Double.compare(p, maxLimit) >= 0 ? length :
  1168.                                (length + 0.25) * p + 3d/8;
  1169.             }
  1170.             @Override
  1171.             public double evaluate(final double[] work, final double[] sampleWeights,
  1172.                                    final double p) {
  1173.                 throw new MathIllegalArgumentException(LocalizedFormats.UNSUPPORTED_OPERATION);
  1174.             }
  1175.         },
  1176.         ;

  1177.         /** Simple name such as R-1, R-2 corresponding to those in wikipedia. */
  1178.         private final String name;

  1179.         /**
  1180.          * Constructor.
  1181.          *
  1182.          * @param type name of estimation type as per wikipedia
  1183.          */
  1184.         EstimationType(final String type) {
  1185.             this.name = type;
  1186.         }

  1187.         /**
  1188.          * Finds the index of array that can be used as starting index to
  1189.          * {@link #estimate(double[], int[], double, int, KthSelector) estimate}
  1190.          * percentile. The calculation of index calculation is specific to each
  1191.          * {@link EstimationType}.
  1192.          *
  1193.          * @param p the p<sup>th</sup> quantile
  1194.          * @param length the total number of array elements in the work array
  1195.          * @return a computed real valued index as explained in the wikipedia
  1196.          */
  1197.         protected abstract double index(double p, int length);

  1198.         /**
  1199.          * Estimation based on K<sup>th</sup> selection. This may be overridden
  1200.          * in specific enums to compute slightly different estimations.
  1201.          *
  1202.          * @param work array of numbers to be used for finding the percentile
  1203.          * @param pos indicated positional index prior computed from calling
  1204.          *            {@link #index(double, int)}
  1205.          * @param pivotsHeap an earlier populated cache if exists; will be used
  1206.          * @param length size of array considered
  1207.          * @param selector a {@link KthSelector} used for pivoting during search
  1208.          * @return estimated percentile
  1209.          */
  1210.         protected double estimate(final double[] work, final int[] pivotsHeap,
  1211.                                   final double pos, final int length,
  1212.                                   final KthSelector selector) {

  1213.             final double fpos = JdkMath.floor(pos);
  1214.             final int intPos = (int) fpos;
  1215.             final double dif = pos - fpos;

  1216.             if (pos < 1) {
  1217.                 return selector.select(work, pivotsHeap, 0);
  1218.             }
  1219.             if (pos >= length) {
  1220.                 return selector.select(work, pivotsHeap, length - 1);
  1221.             }

  1222.             final double lower = selector.select(work, pivotsHeap, intPos - 1);
  1223.             final double upper = selector.select(work, pivotsHeap, intPos);
  1224.             return lower + dif * (upper - lower);
  1225.         }

  1226.         /**
  1227.          * Evaluate method to compute the percentile for a given bounded array
  1228.          * using earlier computed pivots heap.<br>
  1229.          * This basically calls the {@link #index(double, int) index} and then
  1230.          * {@link #estimate(double[], int[], double, int, KthSelector) estimate}
  1231.          * functions to return the estimated percentile value.
  1232.          *
  1233.          * @param work array of numbers to be used for finding the percentile.
  1234.          * Cannot be {@code null}.
  1235.          * @param pivotsHeap a prior cached heap which can speed up estimation
  1236.          * @param p the p<sup>th</sup> quantile to be computed
  1237.          * @param selector a {@link KthSelector} used for pivoting during search
  1238.          * @return estimated percentile
  1239.          * @throws OutOfRangeException if p is out of range
  1240.          */
  1241.         protected double evaluate(final double[] work, final int[] pivotsHeap, final double p,
  1242.                                   final KthSelector selector) {
  1243.             NullArgumentException.check(work);
  1244.             if (p > 100 || p <= 0) {
  1245.                 throw new OutOfRangeException(LocalizedFormats.OUT_OF_BOUNDS_QUANTILE_VALUE,
  1246.                                               p, 0, 100);
  1247.             }
  1248.             return estimate(work, pivotsHeap, index(p/100d, work.length), work.length, selector);
  1249.         }

  1250.         /**
  1251.          * Evaluate method to compute the percentile for a given bounded array.
  1252.          * This basically calls the {@link #index(double, int) index} and then
  1253.          * {@link #estimate(double[], int[], double, int, KthSelector) estimate}
  1254.          * functions to return the estimated percentile value. Please
  1255.          * note that this method does not make use of cached pivots.
  1256.          *
  1257.          * @param work array of numbers to be used for finding the percentile.
  1258.          * Cannot be {@code null}.
  1259.          * @param p the p<sup>th</sup> quantile to be computed
  1260.          * @return estimated percentile
  1261.          * @param selector a {@link KthSelector} used for pivoting during search
  1262.          * @throws OutOfRangeException if length or p is out of range
  1263.          */
  1264.         public double evaluate(final double[] work, final double p, final KthSelector selector) {
  1265.             return this.evaluate(work, null, p, selector);
  1266.         }
  1267.         /**
  1268.          * Evaluate weighted percentile by estimation rule specified in {@link EstimationType}.
  1269.          * @param work array of numbers to be used for finding the percentile
  1270.          * @param sampleWeights the corresponding weights of data in work
  1271.          * @param p the p<sup>th</sup> quantile to be computed
  1272.          * @return estimated weighted percentile
  1273.          * @throws MathIllegalArgumentException if weighted percentile is not supported by the current estimationType
  1274.          */
  1275.         public abstract double evaluate(double[] work, double[] sampleWeights,
  1276.                                         double p);
  1277.         /**
  1278.          * Search the interval q*sn locates in.
  1279.          * @param qsn q*sn, where n refers to the data size
  1280.          * @param sk the cumulative weights array
  1281.          * @param lo start position to search qsn
  1282.          * @param hi end position to search qsn
  1283.          * @return the index of lower bound qsn locates in
  1284.          */
  1285.         private static int searchSk(double qsn, double[] sk, int lo, int hi) {
  1286.             if (sk.length == 1) {
  1287.                 return 0;
  1288.             }
  1289.             if (hi - lo == 1) {
  1290.                 if (qsn == sk[hi]) {
  1291.                     return hi;
  1292.                 }
  1293.                 return lo;
  1294.             } else {
  1295.                 int mid = (lo + hi) >>> 1;
  1296.                 if (qsn == sk[mid]) {
  1297.                   return mid;
  1298.                 } else if (qsn > sk[mid]) {
  1299.                     return searchSk(qsn, sk, mid, hi);
  1300.                 } else {
  1301.                     return searchSk(qsn, sk, lo, mid);
  1302.                 }
  1303.             }
  1304.         }
  1305.         /**
  1306.          * Gets the name of the enum.
  1307.          *
  1308.          * @return the name
  1309.          */
  1310.         String getName() {
  1311.             return name;
  1312.         }
  1313.     }
  1314. }