1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17 package org.apache.commons.math4.legacy.stat.descriptive.rank;
18
19 import java.util.Arrays;
20 import java.util.BitSet;
21
22 import org.apache.commons.numbers.core.Precision;
23 import org.apache.commons.numbers.arrays.SortInPlace;
24 import org.apache.commons.math4.legacy.exception.NullArgumentException;
25 import org.apache.commons.math4.legacy.exception.MathIllegalArgumentException;
26 import org.apache.commons.math4.legacy.exception.NotPositiveException;
27 import org.apache.commons.math4.legacy.exception.NumberIsTooLargeException;
28 import org.apache.commons.math4.legacy.exception.OutOfRangeException;
29 import org.apache.commons.math4.legacy.exception.util.LocalizedFormats;
30 import org.apache.commons.math4.legacy.stat.descriptive.AbstractUnivariateStatistic;
31 import org.apache.commons.math4.legacy.stat.ranking.NaNStrategy;
32 import org.apache.commons.math4.core.jdkmath.JdkMath;
33 import org.apache.commons.math4.legacy.core.MathArrays;
34
35 /**
36 * Provides percentile computation.
37 * <p>
38 * There are several commonly used methods for estimating percentiles (a.k.a.
39 * quantiles) based on sample data. For large samples, the different methods
40 * agree closely, but when sample sizes are small, different methods will give
41 * significantly different results. The algorithm implemented here works as follows:
42 * <ol>
43 * <li>Let <code>n</code> be the length of the (sorted) array and
44 * <code>0 < p <= 100</code> be the desired percentile.</li>
45 * <li>If <code> n = 1 </code> return the unique array element (regardless of
46 * the value of <code>p</code>); otherwise </li>
47 * <li>Compute the estimated percentile position
48 * <code> pos = p * (n + 1) / 100</code> and the difference, <code>d</code>
49 * between <code>pos</code> and <code>floor(pos)</code> (i.e. the fractional
50 * part of <code>pos</code>).</li>
51 * <li> If <code>pos < 1</code> return the smallest element in the array.</li>
52 * <li> Else if <code>pos >= n</code> return the largest element in the array.</li>
53 * <li> Else let <code>lower</code> be the element in position
54 * <code>floor(pos)</code> in the array and let <code>upper</code> be the
55 * next element in the array. Return <code>lower + d * (upper - lower)</code>
56 * </li>
57 * </ol>
58 * <p>
59 * To compute percentiles, the data must be at least partially ordered. Input
60 * arrays are copied and recursively partitioned using an ordering definition.
61 * The ordering used by <code>Arrays.sort(double[])</code> is the one determined
62 * by {@link java.lang.Double#compareTo(Double)}. This ordering makes
63 * <code>Double.NaN</code> larger than any other value (including
64 * <code>Double.POSITIVE_INFINITY</code>). Therefore, for example, the median
65 * (50th percentile) of
66 * <code>{0, 1, 2, 3, 4, Double.NaN}</code> evaluates to <code>2.5.</code></p>
67 * <p>
68 * Since percentile estimation usually involves interpolation between array
69 * elements, arrays containing <code>NaN</code> or infinite values will often
70 * result in <code>NaN</code> or infinite values returned.</p>
71 * <p>
72 * Further, to include different estimation types such as R1, R2 as mentioned in
73 * <a href="http://en.wikipedia.org/wiki/Quantile">Quantile page(wikipedia)</a>,
74 * a type specific NaN handling strategy is used to closely match with the
75 * typically observed results from popular tools like R(R1-R9), Excel(R7).</p>
76 * <p>
77 * Since 2.2, Percentile uses only selection instead of complete sorting
78 * and caches selection algorithm state between calls to the various
79 * {@code evaluate} methods. This greatly improves efficiency, both for a single
80 * percentile and multiple percentile computations. To maximize performance when
81 * multiple percentiles are computed based on the same data, users should set the
82 * data array once using either one of the {@link #evaluate(double[], double)} or
83 * {@link #setData(double[])} methods and thereafter {@link #evaluate(double)}
84 * with just the percentile provided.
85 * </p>
86 * <p>
87 * <strong>Note that this implementation is not synchronized.</strong> If
88 * multiple threads access an instance of this class concurrently, and at least
89 * one of the threads invokes the <code>increment()</code> or
90 * <code>clear()</code> method, it must be synchronized externally.</p>
91 */
92 public class Percentile extends AbstractUnivariateStatistic {
93 /** Maximum number of partitioning pivots cached (each level double the number of pivots). */
94 private static final int MAX_CACHED_LEVELS = 10;
95
96 /** Maximum number of cached pivots in the pivots cached array. */
97 private static final int PIVOTS_HEAP_LENGTH = 0x1 << MAX_CACHED_LEVELS - 1;
98
99 /** Default KthSelector used with default pivoting strategy. */
100 private final KthSelector kthSelector;
101
102 /** Any of the {@link EstimationType}s such as {@link EstimationType#LEGACY CM} can be used. */
103 private final EstimationType estimationType;
104
105 /** NaN Handling of the input as defined by {@link NaNStrategy}. */
106 private final NaNStrategy nanStrategy;
107
108 /**
109 * Determines what percentile is computed when evaluate() is activated
110 * with no quantile argument.
111 */
112 private double quantile;
113
114 /** Cached pivots. */
115 private int[] cachedPivots;
116
117 /** Weight. */
118 private double[] weights;
119 /**
120 * Constructs a Percentile with the following defaults.
121 * <ul>
122 * <li>default quantile: 50.0, can be reset with {@link #setQuantile(double)}</li>
123 * <li>default estimation type: {@link EstimationType#LEGACY},
124 * can be reset with {@link #withEstimationType(EstimationType)}</li>
125 * <li>default NaN strategy: {@link NaNStrategy#REMOVED},
126 * can be reset with {@link #withNaNStrategy(NaNStrategy)}</li>
127 * <li>a KthSelector that makes use of {@link MedianOf3PivotingStrategy},
128 * can be reset with {@link #withKthSelector(KthSelector)}</li>
129 * </ul>
130 */
131 public Percentile() {
132 // No try-catch or advertised exception here - arg is valid
133 this(50.0);
134 }
135
136 /**
137 * Constructs a Percentile with the specific quantile value and the following.
138 * <ul>
139 * <li>default method type: {@link EstimationType#LEGACY}</li>
140 * <li>default NaN strategy: {@link NaNStrategy#REMOVED}</li>
141 * <li>a Kth Selector : {@link KthSelector}</li>
142 * </ul>
143 * @param quantile the quantile
144 * @throws MathIllegalArgumentException if p is not greater than 0 and less
145 * than or equal to 100
146 */
147 public Percentile(final double quantile) {
148 this(quantile, EstimationType.LEGACY, NaNStrategy.REMOVED,
149 new KthSelector(new MedianOf3PivotingStrategy()));
150 }
151
152 /**
153 * Copy constructor, creates a new {@code Percentile} identical.
154 * to the {@code original}
155 *
156 * @param original the {@code Percentile} instance to copy.
157 * Cannot be {@code null}.
158 */
159 public Percentile(final Percentile original) {
160 NullArgumentException.check(original);
161 estimationType = original.getEstimationType();
162 nanStrategy = original.getNaNStrategy();
163 kthSelector = original.getKthSelector();
164
165 setData(original.getDataRef());
166 if (original.cachedPivots != null) {
167 System.arraycopy(original.cachedPivots, 0, cachedPivots, 0, original.cachedPivots.length);
168 }
169 setQuantile(original.quantile);
170 }
171
172 /**
173 * Constructs a Percentile with the specific quantile value,
174 * {@link EstimationType}, {@link NaNStrategy} and {@link KthSelector}.
175 *
176 * @param quantile the quantile to be computed
177 * @param estimationType one of the percentile {@link EstimationType estimation types}
178 * @param nanStrategy one of {@link NaNStrategy} to handle with NaNs.
179 * Cannot be {@code null}.
180 * @param kthSelector a {@link KthSelector} to use for pivoting during search
181 * @throws MathIllegalArgumentException if p is not within (0,100]
182 */
183 protected Percentile(final double quantile,
184 final EstimationType estimationType,
185 final NaNStrategy nanStrategy,
186 final KthSelector kthSelector) {
187 setQuantile(quantile);
188 cachedPivots = null;
189 NullArgumentException.check(estimationType);
190 NullArgumentException.check(nanStrategy);
191 NullArgumentException.check(kthSelector);
192 this.estimationType = estimationType;
193 this.nanStrategy = nanStrategy;
194 this.kthSelector = kthSelector;
195 }
196
197 /** {@inheritDoc} */
198 @Override
199 public void setData(final double[] values) {
200 if (values == null) {
201 cachedPivots = null;
202 } else {
203 cachedPivots = new int[PIVOTS_HEAP_LENGTH];
204 Arrays.fill(cachedPivots, -1);
205 }
206 super.setData(values);
207 }
208 /**
209 * Set the data array.
210 * @param values Data array.
211 * Cannot be {@code null}.
212 * @param sampleWeights corresponding positive and non-NaN weights.
213 * Cannot be {@code null}.
214 * @throws MathIllegalArgumentException if lengths of values and weights are not equal.
215 * @throws org.apache.commons.math4.legacy.exception.NotANumberException if any weight is NaN
216 * @throws org.apache.commons.math4.legacy.exception.NotStrictlyPositiveException if any weight is not positive
217 */
218 public void setData(final double[] values,
219 final double[] sampleWeights) {
220 setData(values, sampleWeights, 0, values.length);
221 }
222
223 /** {@inheritDoc} */
224 @Override
225 public void setData(final double[] values, final int begin, final int length) {
226 if (values == null) {
227 cachedPivots = null;
228 } else {
229 cachedPivots = new int[PIVOTS_HEAP_LENGTH];
230 Arrays.fill(cachedPivots, -1);
231 }
232 super.setData(values, begin, length);
233 }
234 /**
235 * Set the data and weights arrays. The input array is copied, not referenced.
236 * @param values Data array.
237 * Cannot be {@code null}.
238 * @param sampleWeights corresponding positive and non-NaN weights.
239 * Cannot be {@code null}.
240 * @param begin the index of the first element to include
241 * @param length the number of elements to include
242 * @throws MathIllegalArgumentException if lengths of values and weights are not equal or values or weights is null
243 * @throws NotPositiveException if begin or length is not positive
244 * @throws NumberIsTooLargeException if begin + length is greater than values.length
245 * @throws org.apache.commons.math4.legacy.exception.NotANumberException if any weight is NaN
246 * @throws org.apache.commons.math4.legacy.exception.NotStrictlyPositiveException if any weight is not positive
247 */
248 public void setData(final double[] values,
249 final double[] sampleWeights,
250 final int begin,
251 final int length) {
252 if (begin < 0) {
253 throw new NotPositiveException(LocalizedFormats.START_POSITION, begin);
254 }
255
256 if (length < 0) {
257 throw new NotPositiveException(LocalizedFormats.LENGTH, length);
258 }
259
260 if (begin + length > values.length) {
261 throw new NumberIsTooLargeException(LocalizedFormats.SUBARRAY_ENDS_AFTER_ARRAY_END,
262 begin + length, values.length, true);
263 }
264
265 if (sampleWeights == null) {
266 throw new MathIllegalArgumentException(LocalizedFormats.NULL_NOT_ALLOWED);
267 }
268 cachedPivots = new int[PIVOTS_HEAP_LENGTH];
269 Arrays.fill(cachedPivots, -1);
270
271 // Check length
272 if (values.length != sampleWeights.length) {
273 throw new MathIllegalArgumentException(LocalizedFormats.LENGTH,
274 values, sampleWeights);
275 }
276 // Check weights > 0
277 MathArrays.checkPositive(sampleWeights);
278 MathArrays.checkNotNaN(sampleWeights);
279
280 super.setData(values, begin, length);
281 weights = new double[length];
282 System.arraycopy(sampleWeights, begin, weights, 0, length);
283 }
284 /**
285 * Returns the result of evaluating the statistic over the stored data.
286 * If weights have been set, it will compute weighted percentile.
287 * <p>
288 * The stored array is the one which was set by previous calls to
289 * {@link #setData(double[])} or {@link #setData(double[], double[], int, int)}
290 * </p>
291 * @param p the percentile value to compute
292 * @return the value of the statistic applied to the stored data
293 * @throws MathIllegalArgumentException if lengths of values and weights are not equal or values or weights is null
294 * @throws NotPositiveException if begin, length is negative
295 * @throws org.apache.commons.math4.legacy.exception.NotStrictlyPositiveException if any weight is not positive
296 * @throws org.apache.commons.math4.legacy.exception.NotANumberException if any weight is NaN
297 * @throws OutOfRangeException if p is invalid
298 * @throws NumberIsTooLargeException if begin + length is greater than values.length
299 * (p must be greater than 0 and less than or equal to 100)
300 */
301 public double evaluate(final double p) {
302 if (weights == null) {
303 return evaluate(getDataRef(), p);
304 } else {
305 return evaluate(getDataRef(), weights, p);
306 }
307 }
308
309 /**
310 * Returns an estimate of the <code>p</code>th percentile of the values
311 * in the <code>values</code> array.
312 * <p>
313 * Calls to this method do not modify the internal <code>quantile</code>
314 * state of this statistic.</p>
315 * <ul>
316 * <li>Returns <code>Double.NaN</code> if <code>values</code> has length
317 * <code>0</code></li>
318 * <li>Returns (for any value of <code>p</code>) <code>values[0]</code>
319 * if <code>values</code> has length <code>1</code></li>
320 * <li>Throws <code>MathIllegalArgumentException</code> if <code>values</code>
321 * is null or p is not a valid quantile value (p must be greater than 0
322 * and less than or equal to 100) </li>
323 * </ul>
324 * <p>
325 * See {@link Percentile} for a description of the percentile estimation
326 * algorithm used.</p>
327 *
328 * @param values input array of values
329 * @param p the percentile value to compute
330 * @return the percentile value or Double.NaN if the array is empty
331 * @throws MathIllegalArgumentException if <code>values</code> is null or p is invalid
332 */
333 public double evaluate(final double[] values, final double p) {
334 MathArrays.verifyValues(values, 0, 0);
335 return evaluate(values, 0, values.length, p);
336 }
337 /**
338 * Returns an estimate of the <code>p</code>th percentile of the values
339 * in the <code>values</code> array with their weights.
340 * <p>
341 * See {@link Percentile} for a description of the percentile estimation
342 * algorithm used.</p>
343 * @param values input array of values
344 * @param sampleWeights weights of values
345 * @param p the percentile value to compute
346 * @return the weighted percentile value or Double.NaN if the array is empty
347 * @throws MathIllegalArgumentException if lengths of values and weights are not equal or values or weights is null
348 * @throws NotPositiveException if begin, length is negative
349 * @throws org.apache.commons.math4.legacy.exception.NotStrictlyPositiveException if any weight is not positive
350 * @throws org.apache.commons.math4.legacy.exception.NotANumberException if any weight is NaN
351 * @throws OutOfRangeException if p is invalid
352 * @throws NumberIsTooLargeException if begin + length is greater than values.length
353 */
354 public double evaluate(final double[] values, final double[] sampleWeights, final double p) {
355 MathArrays.verifyValues(values, 0, 0);
356 MathArrays.verifyValues(sampleWeights, 0, 0);
357 return evaluate(values, sampleWeights, 0, values.length, p);
358 }
359
360 /**
361 * Returns an estimate of the <code>quantile</code>th percentile of the
362 * designated values in the <code>values</code> array. The quantile
363 * estimated is determined by the <code>quantile</code> property.
364 * <ul>
365 * <li>Returns <code>Double.NaN</code> if <code>length = 0</code></li>
366 * <li>Returns (for any value of <code>quantile</code>)
367 * <code>values[begin]</code> if <code>length = 1 </code></li>
368 * <li>Throws <code>MathIllegalArgumentException</code> if <code>values</code>
369 * is null, or <code>start</code> or <code>length</code> is invalid</li>
370 * </ul>
371 * <p>
372 * See {@link Percentile} for a description of the percentile estimation
373 * algorithm used.</p>
374 *
375 * @param values the input array
376 * @param start index of the first array element to include
377 * @param length the number of elements to include
378 * @return the percentile value
379 * @throws MathIllegalArgumentException if the parameters are not valid
380 *
381 */
382 @Override
383 public double evaluate(final double[] values, final int start, final int length) {
384 return evaluate(values, start, length, quantile);
385 }
386 /**
387 * Returns an estimate of the weighted <code>quantile</code>th percentile of the
388 * designated values in the <code>values</code> array. The quantile
389 * estimated is determined by the <code>quantile</code> property.
390 * <p>
391 * See {@link Percentile} for a description of the percentile estimation
392 * algorithm used.</p>
393 *
394 * @param values the input array
395 * @param sampleWeights the weights of values
396 * @param start index of the first array element to include
397 * @param length the number of elements to include
398 * @return the percentile value
399 * @throws MathIllegalArgumentException if lengths of values and weights are not equal or values or weights is null
400 * @throws NotPositiveException if begin, length is negative
401 * @throws org.apache.commons.math4.legacy.exception.NotStrictlyPositiveException if any weight is not positive
402 * @throws org.apache.commons.math4.legacy.exception.NotANumberException if any weight is NaN
403 * @throws OutOfRangeException if p is invalid
404 * @throws NumberIsTooLargeException if begin + length is greater than values.length
405 */
406 public double evaluate(final double[] values, final double[] sampleWeights,
407 final int start, final int length) {
408 return evaluate(values, sampleWeights, start, length, quantile);
409 }
410
411 /**
412 * Returns an estimate of the <code>p</code>th percentile of the values
413 * in the <code>values</code> array, starting with the element in (0-based)
414 * position <code>begin</code> in the array and including <code>length</code>
415 * values.
416 * <p>
417 * Calls to this method do not modify the internal <code>quantile</code>
418 * state of this statistic.</p>
419 * <ul>
420 * <li>Returns <code>Double.NaN</code> if <code>length = 0</code></li>
421 * <li>Returns (for any value of <code>p</code>) <code>values[begin]</code>
422 * if <code>length = 1 </code></li>
423 * <li>Throws <code>MathIllegalArgumentException</code> if <code>values</code>
424 * is null , <code>begin</code> or <code>length</code> is invalid, or
425 * <code>p</code> is not a valid quantile value (p must be greater than 0
426 * and less than or equal to 100)</li>
427 * </ul>
428 * <p>
429 * See {@link Percentile} for a description of the percentile estimation
430 * algorithm used.</p>
431 *
432 * @param values array of input values
433 * @param p the percentile to compute
434 * @param begin the first (0-based) element to include in the computation
435 * @param length the number of array elements to include
436 * @return the percentile value.
437 * @throws MathIllegalArgumentException if the parameters are not valid.
438 */
439 public double evaluate(final double[] values, final int begin,
440 final int length, final double p) {
441 MathArrays.verifyValues(values, begin, length);
442 if (p > 100 || p <= 0) {
443 throw new OutOfRangeException(LocalizedFormats.OUT_OF_BOUNDS_QUANTILE_VALUE,
444 p, 0, 100);
445 }
446 if (length == 0) {
447 return Double.NaN;
448 }
449 if (length == 1) {
450 return values[begin]; // always return single value for n = 1
451 }
452
453 final double[] work = getWorkArray(values, begin, length);
454 final int[] pivotsHeap = getPivots(values);
455 return work.length == 0 ?
456 Double.NaN :
457 estimationType.evaluate(work, pivotsHeap, p, kthSelector);
458 }
459 /**
460 * Returns an estimate of the <code>p</code>th percentile of the values
461 * in the <code>values</code> array with <code>sampleWeights</code>, starting with the element in (0-based)
462 * position <code>begin</code> in the array and including <code>length</code>
463 * values.
464 * <p>
465 * See {@link Percentile} for a description of the percentile estimation
466 * algorithm used.</p>
467 *
468 * @param values array of input values
469 * @param sampleWeights positive and non-NaN weights of values
470 * @param begin the first (0-based) element to include in the computation
471 * @param length the number of array elements to include
472 * @param p percentile to compute
473 * @return the weighted percentile value
474 * @throws MathIllegalArgumentException if lengths of values and weights are not equal or values or weights is null
475 * @throws NotPositiveException if begin, length is negative
476 * @throws org.apache.commons.math4.legacy.exception.NotStrictlyPositiveException if any weight is not positive
477 * @throws org.apache.commons.math4.legacy.exception.NotANumberException if any weight is NaN
478 * @throws OutOfRangeException if p is invalid
479 * @throws NumberIsTooLargeException if begin + length is greater than values.length
480 */
481 public double evaluate(final double[] values, final double[] sampleWeights, final int begin,
482 final int length, final double p) {
483 if (values == null || sampleWeights == null) {
484 throw new MathIllegalArgumentException(LocalizedFormats.NULL_NOT_ALLOWED);
485 }
486 // Check length
487 if (values.length != sampleWeights.length) {
488 throw new MathIllegalArgumentException(LocalizedFormats.LENGTH,
489 values, sampleWeights);
490 }
491 MathArrays.verifyValues(values, begin, length);
492 MathArrays.verifyValues(sampleWeights, begin, length);
493 MathArrays.checkPositive(sampleWeights);
494 MathArrays.checkNotNaN(sampleWeights);
495
496 if (p > 100 || p <= 0) {
497 throw new OutOfRangeException(LocalizedFormats.OUT_OF_BOUNDS_QUANTILE_VALUE,
498 p, 0, 100);
499 }
500 if (length == 0) {
501 return Double.NaN;
502 }
503 if (length == 1) {
504 // Always return single value for n = 1
505 return values[begin];
506 }
507
508 final double[] work = getWorkArray(values, begin, length);
509 final double[] workWeights = getWorkArray(values, sampleWeights, begin, length);
510 return work.length == 0 ? Double.NaN :
511 estimationType.evaluate(work, workWeights, p);
512 }
513 /**
514 * Returns the value of the quantile field (determines what percentile is
515 * computed when evaluate() is called with no quantile argument).
516 *
517 * @return quantile set while construction or {@link #setQuantile(double)}
518 */
519 public double getQuantile() {
520 return quantile;
521 }
522
523 /**
524 * Sets the value of the quantile field (determines what percentile is
525 * computed when evaluate() is called with no quantile argument).
526 *
527 * @param p a value between 0 < p <= 100
528 * @throws MathIllegalArgumentException if p is not greater than 0 and less
529 * than or equal to 100
530 */
531 public void setQuantile(final double p) {
532 if (p <= 0 || p > 100) {
533 throw new OutOfRangeException(LocalizedFormats.OUT_OF_BOUNDS_QUANTILE_VALUE,
534 p, 0, 100);
535 }
536 quantile = p;
537 }
538
539 /**
540 * {@inheritDoc}
541 */
542 @Override
543 public Percentile copy() {
544 return new Percentile(this);
545 }
546
547 /**
548 * Get the work array to operate. Makes use of prior {@code storedData} if
549 * it exists or else do a check on NaNs and copy a subset of the array
550 * defined by begin and length parameters. The set {@link #nanStrategy} will
551 * be used to either retain/remove/replace any NaNs present before returning
552 * the resultant array.
553 *
554 * @param values the array of numbers
555 * @param begin index to start reading the array
556 * @param length the length of array to be read from the begin index
557 * @return work array sliced from values in the range [begin,begin+length)
558 * @throws MathIllegalArgumentException if values or indices are invalid
559 */
560 private double[] getWorkArray(final double[] values, final int begin, final int length) {
561 final double[] work;
562 if (values == getDataRef()) {
563 work = getDataRef();
564 } else {
565 switch (nanStrategy) {
566 case MAXIMAL: // Replace NaNs with +INFs
567 work = replaceAndSlice(values, begin, length, Double.NaN, Double.POSITIVE_INFINITY);
568 break;
569 case MINIMAL: // Replace NaNs with -INFs
570 work = replaceAndSlice(values, begin, length, Double.NaN, Double.NEGATIVE_INFINITY);
571 break;
572 case REMOVED: // Drop NaNs from data
573 work = removeAndSlice(values, begin, length, Double.NaN);
574 break;
575 case FAILED: // NaN is not acceptable
576 work = copyOf(values, begin, length);
577 MathArrays.checkNotNaN(work);
578 break;
579 default: // FIXED
580 work = copyOf(values,begin,length);
581 break;
582 }
583 }
584 return work;
585 }
586 /**
587 * Get the work arrays of weights to operate.
588 *
589 * @param values the array of numbers
590 * @param sampleWeights the array of weights
591 * @param begin index to start reading the array
592 * @param length the length of array to be read from the begin index
593 * @return work array sliced from values in the range [begin,begin+length)
594 */
595 protected double[] getWorkArray(final double[] values, final double[] sampleWeights,
596 final int begin, final int length) {
597 final double[] work;
598 if (values == getDataRef()) {
599 work = this.weights;
600 } else {
601 switch (nanStrategy) {
602 case REMOVED: // Drop weight if the data is NaN
603 work = removeAndSliceByRef(values, sampleWeights, begin, length, Double.NaN);
604 break;
605 default: // FIXED
606 work = copyOf(sampleWeights, begin, length);
607 break;
608 }
609 }
610 return work;
611 }
612 /**
613 * Make a copy of the array for the slice defined by array part from.
614 * [begin, begin+length)
615 * @param values the input array
616 * @param begin start index of the array to include
617 * @param length number of elements to include from begin
618 * @return copy of a slice of the original array
619 */
620 private static double[] copyOf(final double[] values, final int begin, final int length) {
621 MathArrays.verifyValues(values, begin, length);
622 return Arrays.copyOfRange(values, begin, begin + length);
623 }
624
625 /**
626 * Replace every occurrence of a given value with a replacement value in a
627 * copied slice of array defined by array part from [begin, begin+length).
628 *
629 * @param values the input array
630 * @param begin start index of the array to include
631 * @param length number of elements to include from begin
632 * @param original the value to be replaced with
633 * @param replacement the value to be used for replacement
634 * @return the copy of sliced array with replaced values
635 */
636 private static double[] replaceAndSlice(final double[] values,
637 final int begin, final int length,
638 final double original,
639 final double replacement) {
640 final double[] temp = copyOf(values, begin, length);
641 for(int i = 0; i < length; i++) {
642 temp[i] = Precision.equalsIncludingNaN(original, temp[i]) ?
643 replacement :
644 temp[i];
645 }
646
647 return temp;
648 }
649 /**
650 * Remove the occurrence of a given value in a copied slice of array
651 * defined by the array part from [begin, begin+length).
652 * @param values the input array
653 * @param begin start index of the array to include
654 * @param length number of elements to include from begin
655 * @param removedValue the value to be removed from the sliced array
656 * @return the copy of the sliced array after removing the removedValue
657 */
658 private static double[] removeAndSlice(final double[] values,
659 final int begin, final int length,
660 final double removedValue) {
661 MathArrays.verifyValues(values, begin, length);
662 final double[] temp;
663 // Indicates where the removedValue is located
664 final BitSet bits = new BitSet(length);
665 for (int i = begin; i < begin+length; i++) {
666 if (Precision.equalsIncludingNaN(removedValue, values[i])) {
667 bits.set(i - begin);
668 }
669 }
670 // Check if empty then create a new copy
671 if (bits.isEmpty()) {
672 // Nothing removed, just copy
673 temp = copyOf(values, begin, length);
674 } else if(bits.cardinality() == length) {
675 // All removed, just empty
676 temp = new double[0];
677 } else {
678 // Some removable, so new
679 temp = new double[length - bits.cardinality()];
680 // Index from source array (i.e values)
681 int start = begin;
682 // Index in destination array(i.e temp)
683 int dest = 0;
684 // Index of bit set of next one
685 int nextOne = -1;
686 // Start index pointer of bitset
687 int bitSetPtr = 0;
688 while ((nextOne = bits.nextSetBit(bitSetPtr)) != -1) {
689 final int lengthToCopy = nextOne - bitSetPtr;
690 System.arraycopy(values, start, temp, dest, lengthToCopy);
691 dest += lengthToCopy;
692 start = begin + (bitSetPtr = bits.nextClearBit(nextOne));
693 }
694 // Copy any residue past start index till begin+length
695 if (start < begin + length) {
696 System.arraycopy(values,start,temp,dest,begin + length - start);
697 }
698 }
699 return temp;
700 }
701 /**
702 * Remove weights element if the corresponding data is equal to the given value.
703 * in [begin, begin+length)
704 *
705 * @param values the input array
706 * @param sampleWeights weights of the input array
707 * @param begin start index of the array to include
708 * @param length number of elements to include from begin
709 * @param removedValue the value to be removed from the sliced array
710 * @return the copy of the sliced array after removing weights
711 */
712 private static double[] removeAndSliceByRef(final double[] values,
713 final double[] sampleWeights,
714 final int begin, final int length,
715 final double removedValue) {
716 MathArrays.verifyValues(values, begin, length);
717 final double[] temp;
718 //BitSet(length) to indicate where the removedValue is located
719 final BitSet bits = new BitSet(length);
720 for (int i = begin; i < begin+length; i++) {
721 if (Precision.equalsIncludingNaN(removedValue, values[i])) {
722 bits.set(i - begin);
723 }
724 }
725 //Check if empty then create a new copy
726 if (bits.isEmpty()) {
727 temp = copyOf(sampleWeights, begin, length); // Nothing removed, just copy
728 } else if(bits.cardinality() == length) {
729 temp = new double[0]; // All removed, just empty
730 }else { // Some removable, so new
731 temp = new double[length - bits.cardinality()];
732 int start = begin; //start index from source array (i.e sampleWeights)
733 int dest = 0; //dest index in destination array(i.e temp)
734 int nextOne = -1; //nextOne is the index of bit set of next one
735 int bitSetPtr = 0; //bitSetPtr is start index pointer of bitset
736 while ((nextOne = bits.nextSetBit(bitSetPtr)) != -1) {
737 final int lengthToCopy = nextOne - bitSetPtr;
738 System.arraycopy(sampleWeights, start, temp, dest, lengthToCopy);
739 dest += lengthToCopy;
740 start = begin + (bitSetPtr = bits.nextClearBit(nextOne));
741 }
742 //Copy any residue past start index till begin+length
743 if (start < begin + length) {
744 System.arraycopy(sampleWeights,start,temp,dest,begin + length - start);
745 }
746 }
747 return temp;
748 }
749 /**
750 * Get pivots which is either cached or a newly created one.
751 *
752 * @param values array containing the input numbers
753 * @return cached pivots or a newly created one
754 */
755 private int[] getPivots(final double[] values) {
756 final int[] pivotsHeap;
757 if (values == getDataRef()) {
758 pivotsHeap = cachedPivots;
759 } else {
760 pivotsHeap = new int[PIVOTS_HEAP_LENGTH];
761 Arrays.fill(pivotsHeap, -1);
762 }
763 return pivotsHeap;
764 }
765
766 /**
767 * Get the estimation {@link EstimationType type} used for computation.
768 *
769 * @return the {@code estimationType} set
770 */
771 public EstimationType getEstimationType() {
772 return estimationType;
773 }
774
775 /**
776 * Build a new instance similar to the current one except for the
777 * {@link EstimationType estimation type}.
778 * <p>
779 * This method is intended to be used as part of a fluent-type builder
780 * pattern. Building finely tune instances should be done as follows:
781 * </p>
782 * <pre>
783 * Percentile customized = new Percentile(quantile).
784 * withEstimationType(estimationType).
785 * withNaNStrategy(nanStrategy).
786 * withKthSelector(kthSelector);
787 * </pre>
788 * <p>
789 * If any of the {@code withXxx} method is omitted, the default value for
790 * the corresponding customization parameter will be used.
791 * </p>
792 * @param newEstimationType estimation type for the new instance.
793 * Cannot be {@code null}.
794 * @return a new instance, with changed estimation type
795 */
796 public Percentile withEstimationType(final EstimationType newEstimationType) {
797 return new Percentile(quantile, newEstimationType, nanStrategy, kthSelector);
798 }
799
800 /**
801 * Get the {@link NaNStrategy NaN Handling} strategy used for computation.
802 * @return {@code NaN Handling} strategy set during construction
803 */
804 public NaNStrategy getNaNStrategy() {
805 return nanStrategy;
806 }
807
808 /**
809 * Build a new instance similar to the current one except for the
810 * {@link NaNStrategy NaN handling} strategy.
811 * <p>
812 * This method is intended to be used as part of a fluent-type builder
813 * pattern. Building finely tune instances should be done as follows:
814 * </p>
815 * <pre>
816 * Percentile customized = new Percentile(quantile).
817 * withEstimationType(estimationType).
818 * withNaNStrategy(nanStrategy).
819 * withKthSelector(kthSelector);
820 * </pre>
821 * <p>
822 * If any of the {@code withXxx} method is omitted, the default value for
823 * the corresponding customization parameter will be used.
824 * </p>
825 * @param newNaNStrategy NaN strategy for the new instance.
826 * Cannot be {@code null}.
827 * @return a new instance, with changed NaN handling strategy
828 */
829 public Percentile withNaNStrategy(final NaNStrategy newNaNStrategy) {
830 return new Percentile(quantile, estimationType, newNaNStrategy, kthSelector);
831 }
832
833 /**
834 * Get the {@link KthSelector kthSelector} used for computation.
835 * @return the {@code kthSelector} set
836 */
837 public KthSelector getKthSelector() {
838 return kthSelector;
839 }
840
841 /**
842 * Get the {@link PivotingStrategy} used in KthSelector for computation.
843 * @return the pivoting strategy set
844 */
845 public PivotingStrategy getPivotingStrategy() {
846 return kthSelector.getPivotingStrategy();
847 }
848
849 /**
850 * Build a new instance similar to the current one except for the
851 * {@link KthSelector kthSelector} instance specifically set.
852 * <p>
853 * This method is intended to be used as part of a fluent-type builder
854 * pattern. Building finely tune instances should be done as follows:
855 * </p>
856 * <pre>
857 * Percentile customized = new Percentile(quantile).
858 * withEstimationType(estimationType).
859 * withNaNStrategy(nanStrategy).
860 * withKthSelector(newKthSelector);
861 * </pre>
862 * <p>
863 * If any of the {@code withXxx} method is omitted, the default value for
864 * the corresponding customization parameter will be used.
865 * </p>
866 * @param newKthSelector KthSelector for the new instance.
867 * Cannot be {@code null}.
868 * @return a new instance, with changed KthSelector
869 */
870 public Percentile withKthSelector(final KthSelector newKthSelector) {
871 return new Percentile(quantile, estimationType, nanStrategy, newKthSelector);
872 }
873
874 /**
875 * An enum for various estimation strategies of a percentile referred in
876 * <a href="http://en.wikipedia.org/wiki/Quantile">wikipedia on quantile</a>
877 * with the names of enum matching those of types mentioned in
878 * wikipedia.
879 * <p>
880 * Each enum corresponding to the specific type of estimation in wikipedia
881 * implements the respective formulae that specializes in the below aspects
882 * <ul>
883 * <li>An <b>index method</b> to calculate approximate index of the
884 * estimate</li>
885 * <li>An <b>estimate method</b> to estimate a value found at the earlier
886 * computed index</li>
887 * <li>A <b> minLimit</b> on the quantile for which first element of sorted
888 * input is returned as an estimate </li>
889 * <li>A <b> maxLimit</b> on the quantile for which last element of sorted
890 * input is returned as an estimate </li>
891 * </ul>
892 * <p>
893 * Users can now create {@link Percentile} by explicitly passing this enum;
894 * such as by invoking {@link Percentile#withEstimationType(EstimationType)}
895 * <p>
896 * References:
897 * <ol>
898 * <li>
899 * <a href="http://en.wikipedia.org/wiki/Quantile">Wikipedia on quantile</a>
900 * </li>
901 * <li>
902 * <a href="https://www.amherst.edu/media/view/129116/.../Sample+Quantiles.pdf">
903 * Hyndman, R. J. and Fan, Y. (1996) Sample quantiles in statistical
904 * packages, American Statistician 50, 361–365</a> </li>
905 * <li>
906 * <a href="http://stat.ethz.ch/R-manual/R-devel/library/stats/html/quantile.html">
907 * R-Manual </a></li>
908 * </ol>
909 */
910 public enum EstimationType {
911 /**
912 * This is the default type used in the {@link Percentile}.This method.
913 * has the following formulae for index and estimates<br>
914 * \( \begin{align}
915 * &index = (N+1)p\ \\
916 * &estimate = x_{\lceil h\,-\,1/2 \rceil} \\
917 * &minLimit = 0 \\
918 * &maxLimit = 1 \\
919 * \end{align}\)
920 */
921 LEGACY("Legacy Apache Commons Math") {
922 /**
923 * {@inheritDoc}.This method in particular makes use of existing
924 * Apache Commons Math style of picking up the index.
925 */
926 @Override
927 protected double index(final double p, final int length) {
928 final double minLimit = 0d;
929 final double maxLimit = 1d;
930 return Double.compare(p, minLimit) == 0 ? 0 :
931 Double.compare(p, maxLimit) == 0 ?
932 length : p * (length + 1);
933 }
934 @Override
935 public double evaluate(final double[] work, final double[] sampleWeights,
936 final double p) {
937 throw new MathIllegalArgumentException(LocalizedFormats.UNSUPPORTED_OPERATION);
938 }
939 },
940 /**
941 * The method R_1 has the following formulae for index and estimates.<br>
942 * \( \begin{align}
943 * &index= Np + 1/2\, \\
944 * &estimate= x_{\lceil h\,-\,1/2 \rceil} \\
945 * &minLimit = 0 \\
946 * \end{align}\)
947 */
948 R_1("R-1") {
949
950 @Override
951 protected double index(final double p, final int length) {
952 final double minLimit = 0d;
953 return Double.compare(p, minLimit) == 0 ? 0 : length * p + 0.5;
954 }
955
956 /**
957 * {@inheritDoc}This method in particular for R_1 uses ceil(pos-0.5)
958 */
959 @Override
960 protected double estimate(final double[] values,
961 final int[] pivotsHeap, final double pos,
962 final int length, final KthSelector selector) {
963 return super.estimate(values, pivotsHeap, JdkMath.ceil(pos - 0.5), length, selector);
964 }
965 @Override
966 public double evaluate(final double[] work, final double[] sampleWeights,
967 final double p) {
968 throw new MathIllegalArgumentException(LocalizedFormats.UNSUPPORTED_OPERATION);
969 }
970 },
971 /**
972 * The method R_2 has the following formulae for index and estimates.<br>
973 * \( \begin{align}
974 * &index= Np + 1/2\, \\
975 * &estimate=\frac{x_{\lceil h\,-\,1/2 \rceil} +
976 * x_{\lfloor h\,+\,1/2 \rfloor}}{2} \\
977 * &minLimit = 0 \\
978 * &maxLimit = 1 \\
979 * \end{align}\)
980 */
981 R_2("R-2") {
982
983 @Override
984 protected double index(final double p, final int length) {
985 final double minLimit = 0d;
986 final double maxLimit = 1d;
987 return Double.compare(p, maxLimit) == 0 ? length :
988 Double.compare(p, minLimit) == 0 ? 0 : length * p + 0.5;
989 }
990
991 /**
992 * {@inheritDoc}This method in particular for R_2 averages the
993 * values at ceil(p+0.5) and floor(p-0.5).
994 */
995 @Override
996 protected double estimate(final double[] values,
997 final int[] pivotsHeap, final double pos,
998 final int length, final KthSelector selector) {
999 final double low =
1000 super.estimate(values, pivotsHeap, JdkMath.ceil(pos - 0.5), length, selector);
1001 final double high =
1002 super.estimate(values, pivotsHeap,JdkMath.floor(pos + 0.5), length, selector);
1003 return (low + high) / 2;
1004 }
1005 @Override
1006 public double evaluate(final double[] work, final double[] sampleWeights,
1007 final double p) {
1008 throw new MathIllegalArgumentException(LocalizedFormats.UNSUPPORTED_OPERATION);
1009 }
1010 },
1011 /**
1012 * The method R_3 has the following formulae for index and estimates.<br>
1013 * \( \begin{align}
1014 * &index= Np \\
1015 * &estimate= x_{\lfloor h \rceil}\, \\
1016 * &minLimit = 0.5/N \\
1017 * \end{align}\)
1018 */
1019 R_3("R-3") {
1020 @Override
1021 protected double index(final double p, final int length) {
1022 final double minLimit = 1d/2 / length;
1023 return Double.compare(p, minLimit) <= 0 ?
1024 0 : JdkMath.rint(length * p);
1025 }
1026 @Override
1027 public double evaluate(final double[] work, final double[] sampleWeights,
1028 final double p) {
1029 throw new MathIllegalArgumentException(LocalizedFormats.UNSUPPORTED_OPERATION);
1030 }
1031 },
1032 /**
1033 * The method R_4 has the following formulae for index and estimates.<br>
1034 * \( \begin{align}
1035 * &index= Np\, \\
1036 * &estimate= x_{\lfloor h \rfloor} + (h -
1037 * \lfloor h \rfloor) (x_{\lfloor h \rfloor + 1} - x_{\lfloor h
1038 * \rfloor}) \\
1039 * &minLimit = 1/N \\
1040 * &maxLimit = 1 \\
1041 * \end{align}\)
1042 */
1043 R_4("R-4") {
1044 @Override
1045 protected double index(final double p, final int length) {
1046 final double minLimit = 1d / length;
1047 final double maxLimit = 1d;
1048 return Double.compare(p, minLimit) < 0 ? 0 :
1049 Double.compare(p, maxLimit) == 0 ? length : length * p;
1050 }
1051 @Override
1052 public double evaluate(final double[] work, final double[] sampleWeights,
1053 final double p) {
1054 throw new MathIllegalArgumentException(LocalizedFormats.UNSUPPORTED_OPERATION);
1055 }
1056 },
1057 /**
1058 * The method R_5 has the following formulae for index and estimates.<br>
1059 * \( \begin{align}
1060 * &index= Np + 1/2\\
1061 * &estimate= x_{\lfloor h \rfloor} + (h -
1062 * \lfloor h \rfloor) (x_{\lfloor h \rfloor + 1} - x_{\lfloor h
1063 * \rfloor}) \\
1064 * &minLimit = 0.5/N \\
1065 * &maxLimit = (N-0.5)/N
1066 * \end{align}\)
1067 */
1068 R_5("R-5") {
1069
1070 @Override
1071 protected double index(final double p, final int length) {
1072 final double minLimit = 1d/2 / length;
1073 final double maxLimit = (length - 0.5) / length;
1074 return Double.compare(p, minLimit) < 0 ? 0 :
1075 Double.compare(p, maxLimit) >= 0 ?
1076 length : length * p + 0.5;
1077 }
1078 @Override
1079 public double evaluate(final double[] work, final double[] sampleWeights,
1080 final double p) {
1081 throw new MathIllegalArgumentException(LocalizedFormats.UNSUPPORTED_OPERATION);
1082 }
1083 },
1084 /**
1085 * The method R_6 has the following formulae for index and estimates.<br>
1086 * \( \begin{align}
1087 * &index= (N + 1)p \\
1088 * &estimate= x_{\lfloor h \rfloor} + (h -
1089 * \lfloor h \rfloor) (x_{\lfloor h \rfloor + 1} - x_{\lfloor h
1090 * \rfloor}) \\
1091 * &minLimit = 1/(N+1) \\
1092 * &maxLimit = N/(N+1) \\
1093 * \end{align}\)
1094 * <p>
1095 * <b>Note:</b> This method computes the index in a manner very close to
1096 * the default Commons Math Percentile existing implementation. However
1097 * the difference to be noted is in picking up the limits with which
1098 * first element (p<1(N+1)) and last elements (p>N/(N+1))are done.
1099 * While in default case; these are done with p=0 and p=1 respectively.
1100 */
1101 R_6("R-6") {
1102
1103 @Override
1104 protected double index(final double p, final int length) {
1105 final double minLimit = 1d / (length + 1);
1106 final double maxLimit = 1d * length / (length + 1);
1107 return Double.compare(p, minLimit) < 0 ? 0 :
1108 Double.compare(p, maxLimit) >= 0 ?
1109 length : (length + 1) * p;
1110 }
1111 @Override
1112 public double evaluate(final double[] work, final double[] sampleWeights,
1113 final double p) {
1114 throw new MathIllegalArgumentException(LocalizedFormats.UNSUPPORTED_OPERATION);
1115 }
1116 },
1117
1118 /**
1119 * The method R_7 implements Microsoft Excel style computation has the
1120 * following formulae for index and estimates.<br>
1121 * \( \begin{align}
1122 * &index = (N-1)p + 1 \\
1123 * &estimate = x_{\lfloor h \rfloor} + (h -
1124 * \lfloor h \rfloor) (x_{\lfloor h \rfloor + 1} - x_{\lfloor h
1125 * \rfloor}) \\
1126 * &minLimit = 0 \\
1127 * &maxLimit = 1 \\
1128 * \end{align}\)
1129 * The formula to evaluate weighted percentiles is as following.<br>
1130 * \( \begin{align}
1131 * &S_k = (k-1)w_k + (n-1)\sum_{i=1}^{k-1}w_i
1132 * &Then find k s.t. \frac{S_k}{S_n}\leq p \leq \frac{S_{k+1}}{S_n}
1133 * \end{align}\)
1134 */
1135 R_7("R-7") {
1136 @Override
1137 protected double index(final double p, final int length) {
1138 final double minLimit = 0d;
1139 final double maxLimit = 1d;
1140 return Double.compare(p, minLimit) == 0 ? 0 :
1141 Double.compare(p, maxLimit) == 0 ?
1142 length : 1 + (length - 1) * p;
1143 }
1144
1145 @Override
1146 public double evaluate(final double[] work, final double[] sampleWeights,
1147 final double p) {
1148 SortInPlace.ASCENDING.apply(work, sampleWeights);
1149 double[] sk = new double[work.length];
1150 for(int k = 0; k < work.length; k++) {
1151 sk[k] = 0;
1152 for (int j = 0; j < k; j++) {
1153 sk[k] += sampleWeights[j];
1154 }
1155 sk[k] = k * sampleWeights[k] + (work.length - 1) * sk[k];
1156 }
1157
1158 double qsn = (p / 100) * sk[sk.length-1];
1159 int k = searchSk(qsn, sk, 0, work.length - 1);
1160
1161 double ret;
1162 if (qsn == sk[k] && k == work.length - 1) {
1163 ret = work[k] - (work[k] - work[k-1]) * (1 - (qsn - sk[k]) / (sk[k] - sk[k-1]));
1164 } else {
1165 ret = work[k] + (work[k+1] - work[k]) * (qsn - sk[k]) / (sk[k+1] - sk[k]);
1166 }
1167 return ret;
1168 }
1169 },
1170
1171 /**
1172 * The method R_8 has the following formulae for index and estimates.<br>
1173 * \( \begin{align}
1174 * &index = (N + 1/3)p + 1/3 \\
1175 * &estimate = x_{\lfloor h \rfloor} + (h -
1176 \lfloor h \rfloor) (x_{\lfloor h \rfloor + 1} - x_{\lfloor h
1177 * \rfloor}) \\
1178 * &minLimit = (2/3)/(N+1/3) \\
1179 * &maxLimit = (N-1/3)/(N+1/3) \\
1180 * \end{align}\)
1181 * <p>
1182 * As per Ref [2,3] this approach is most recommended as it provides
1183 * an approximate median-unbiased estimate regardless of distribution.
1184 */
1185 R_8("R-8") {
1186 @Override
1187 protected double index(final double p, final int length) {
1188 final double minLimit = 2 * (1d / 3) / (length + 1d / 3);
1189 final double maxLimit =
1190 (length - 1d / 3) / (length + 1d / 3);
1191 return Double.compare(p, minLimit) < 0 ? 0 :
1192 Double.compare(p, maxLimit) >= 0 ? length :
1193 (length + 1d / 3) * p + 1d / 3;
1194 }
1195 @Override
1196 public double evaluate(final double[] work, final double[] sampleWeights,
1197 final double p) {
1198 throw new MathIllegalArgumentException(LocalizedFormats.UNSUPPORTED_OPERATION);
1199 }
1200 },
1201
1202 /**
1203 * The method R_9 has the following formulae for index and estimates.<br>
1204 * \( \begin{align}
1205 * &index = (N + 1/4)p + 3/8\\
1206 * &estimate = x_{\lfloor h \rfloor} + (h -
1207 \lfloor h \rfloor) (x_{\lfloor h \rfloor + 1} - x_{\lfloor h
1208 * \rfloor}) \\
1209 * &minLimit = (5/8)/(N+1/4) \\
1210 * &maxLimit = (N-3/8)/(N+1/4) \\
1211 * \end{align}\)
1212 */
1213 R_9("R-9") {
1214 @Override
1215 protected double index(final double p, final int length) {
1216 final double minLimit = 5d/8 / (length + 0.25);
1217 final double maxLimit = (length - 3d/8) / (length + 0.25);
1218 return Double.compare(p, minLimit) < 0 ? 0 :
1219 Double.compare(p, maxLimit) >= 0 ? length :
1220 (length + 0.25) * p + 3d/8;
1221 }
1222 @Override
1223 public double evaluate(final double[] work, final double[] sampleWeights,
1224 final double p) {
1225 throw new MathIllegalArgumentException(LocalizedFormats.UNSUPPORTED_OPERATION);
1226 }
1227 },
1228 ;
1229
1230 /** Simple name such as R-1, R-2 corresponding to those in wikipedia. */
1231 private final String name;
1232
1233 /**
1234 * Constructor.
1235 *
1236 * @param type name of estimation type as per wikipedia
1237 */
1238 EstimationType(final String type) {
1239 this.name = type;
1240 }
1241
1242 /**
1243 * Finds the index of array that can be used as starting index to
1244 * {@link #estimate(double[], int[], double, int, KthSelector) estimate}
1245 * percentile. The calculation of index calculation is specific to each
1246 * {@link EstimationType}.
1247 *
1248 * @param p the p<sup>th</sup> quantile
1249 * @param length the total number of array elements in the work array
1250 * @return a computed real valued index as explained in the wikipedia
1251 */
1252 protected abstract double index(double p, int length);
1253
1254 /**
1255 * Estimation based on K<sup>th</sup> selection. This may be overridden
1256 * in specific enums to compute slightly different estimations.
1257 *
1258 * @param work array of numbers to be used for finding the percentile
1259 * @param pos indicated positional index prior computed from calling
1260 * {@link #index(double, int)}
1261 * @param pivotsHeap an earlier populated cache if exists; will be used
1262 * @param length size of array considered
1263 * @param selector a {@link KthSelector} used for pivoting during search
1264 * @return estimated percentile
1265 */
1266 protected double estimate(final double[] work, final int[] pivotsHeap,
1267 final double pos, final int length,
1268 final KthSelector selector) {
1269
1270 final double fpos = JdkMath.floor(pos);
1271 final int intPos = (int) fpos;
1272 final double dif = pos - fpos;
1273
1274 if (pos < 1) {
1275 return selector.select(work, pivotsHeap, 0);
1276 }
1277 if (pos >= length) {
1278 return selector.select(work, pivotsHeap, length - 1);
1279 }
1280
1281 final double lower = selector.select(work, pivotsHeap, intPos - 1);
1282 final double upper = selector.select(work, pivotsHeap, intPos);
1283 return lower + dif * (upper - lower);
1284 }
1285
1286 /**
1287 * Evaluate method to compute the percentile for a given bounded array
1288 * using earlier computed pivots heap.<br>
1289 * This basically calls the {@link #index(double, int) index} and then
1290 * {@link #estimate(double[], int[], double, int, KthSelector) estimate}
1291 * functions to return the estimated percentile value.
1292 *
1293 * @param work array of numbers to be used for finding the percentile.
1294 * Cannot be {@code null}.
1295 * @param pivotsHeap a prior cached heap which can speed up estimation
1296 * @param p the p<sup>th</sup> quantile to be computed
1297 * @param selector a {@link KthSelector} used for pivoting during search
1298 * @return estimated percentile
1299 * @throws OutOfRangeException if p is out of range
1300 */
1301 protected double evaluate(final double[] work, final int[] pivotsHeap, final double p,
1302 final KthSelector selector) {
1303 NullArgumentException.check(work);
1304 if (p > 100 || p <= 0) {
1305 throw new OutOfRangeException(LocalizedFormats.OUT_OF_BOUNDS_QUANTILE_VALUE,
1306 p, 0, 100);
1307 }
1308 return estimate(work, pivotsHeap, index(p/100d, work.length), work.length, selector);
1309 }
1310
1311 /**
1312 * Evaluate method to compute the percentile for a given bounded array.
1313 * This basically calls the {@link #index(double, int) index} and then
1314 * {@link #estimate(double[], int[], double, int, KthSelector) estimate}
1315 * functions to return the estimated percentile value. Please
1316 * note that this method does not make use of cached pivots.
1317 *
1318 * @param work array of numbers to be used for finding the percentile.
1319 * Cannot be {@code null}.
1320 * @param p the p<sup>th</sup> quantile to be computed
1321 * @return estimated percentile
1322 * @param selector a {@link KthSelector} used for pivoting during search
1323 * @throws OutOfRangeException if length or p is out of range
1324 */
1325 public double evaluate(final double[] work, final double p, final KthSelector selector) {
1326 return this.evaluate(work, null, p, selector);
1327 }
1328 /**
1329 * Evaluate weighted percentile by estimation rule specified in {@link EstimationType}.
1330 * @param work array of numbers to be used for finding the percentile
1331 * @param sampleWeights the corresponding weights of data in work
1332 * @param p the p<sup>th</sup> quantile to be computed
1333 * @return estimated weighted percentile
1334 * @throws MathIllegalArgumentException if weighted percentile is not supported by the current estimationType
1335 */
1336 public abstract double evaluate(double[] work, double[] sampleWeights,
1337 double p);
1338 /**
1339 * Search the interval q*sn locates in.
1340 * @param qsn q*sn, where n refers to the data size
1341 * @param sk the cumulative weights array
1342 * @param lo start position to search qsn
1343 * @param hi end position to search qsn
1344 * @return the index of lower bound qsn locates in
1345 */
1346 private static int searchSk(double qsn, double[] sk, int lo, int hi) {
1347 if (sk.length == 1) {
1348 return 0;
1349 }
1350 if (hi - lo == 1) {
1351 if (qsn == sk[hi]) {
1352 return hi;
1353 }
1354 return lo;
1355 } else {
1356 int mid = (lo + hi) >>> 1;
1357 if (qsn == sk[mid]) {
1358 return mid;
1359 } else if (qsn > sk[mid]) {
1360 return searchSk(qsn, sk, mid, hi);
1361 } else {
1362 return searchSk(qsn, sk, lo, mid);
1363 }
1364 }
1365 }
1366 /**
1367 * Gets the name of the enum.
1368 *
1369 * @return the name
1370 */
1371 String getName() {
1372 return name;
1373 }
1374 }
1375 }