1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18 package org.apache.commons.math4.legacy.stat.ranking;
19
20 import java.util.ArrayList;
21 import java.util.Arrays;
22 import java.util.Iterator;
23 import java.util.List;
24
25 import org.apache.commons.rng.UniformRandomProvider;
26 import org.apache.commons.rng.simple.RandomSource;
27 import org.apache.commons.rng.sampling.distribution.UniformLongSampler;
28 import org.apache.commons.math4.legacy.exception.MathInternalError;
29 import org.apache.commons.math4.legacy.exception.NotANumberException;
30 import org.apache.commons.math4.core.jdkmath.JdkMath;
31
32
33 /**
34 * Ranking based on the natural ordering on doubles.
35 *
36 * <p>NaNs are treated according to the configured {@link NaNStrategy} and ties
37 * are handled using the selected {@link TiesStrategy}.
38 * Configuration settings are supplied in optional constructor arguments.
39 * Defaults are {@link NaNStrategy#FAILED} and {@link TiesStrategy#AVERAGE},
40 * respectively. When using {@link TiesStrategy#RANDOM}, a
41 * {@link UniformRandomProvider random generator} may be supplied as a
42 * constructor argument.</p>
43 * <p>Examples:
44 * <table border="">
45 * <caption>Examples</caption>
46 * <tr><th colspan="3">
47 * Input data: (20, 17, 30, 42.3, 17, 50, Double.NaN, Double.NEGATIVE_INFINITY, 17)
48 * </th></tr>
49 * <tr><th>NaNStrategy</th><th>TiesStrategy</th>
50 * <th><code>rank(data)</code></th>
51 * <tr>
52 * <td>default (NaNs maximal)</td>
53 * <td>default (ties averaged)</td>
54 * <td>(5, 3, 6, 7, 3, 8, 9, 1, 3)</td></tr>
55 * <tr>
56 * <td>default (NaNs maximal)</td>
57 * <td>MINIMUM</td>
58 * <td>(5, 2, 6, 7, 2, 8, 9, 1, 2)</td></tr>
59 * <tr>
60 * <td>MINIMAL</td>
61 * <td>default (ties averaged)</td>
62 * <td>(6, 4, 7, 8, 4, 9, 1.5, 1.5, 4)</td></tr>
63 * <tr>
64 * <td>REMOVED</td>
65 * <td>SEQUENTIAL</td>
66 * <td>(5, 2, 6, 7, 3, 8, 1, 4)</td></tr>
67 * <tr>
68 * <td>MINIMAL</td>
69 * <td>MAXIMUM</td>
70 * <td>(6, 5, 7, 8, 5, 9, 2, 2, 5)</td></tr></table>
71 *
72 * @since 2.0
73 */
74 public class NaturalRanking implements RankingAlgorithm {
75
76 /** default NaN strategy. */
77 public static final NaNStrategy DEFAULT_NAN_STRATEGY = NaNStrategy.FAILED;
78
79 /** default ties strategy. */
80 public static final TiesStrategy DEFAULT_TIES_STRATEGY = TiesStrategy.AVERAGE;
81
82 /** NaN strategy - defaults to NaNs maximal. */
83 private final NaNStrategy nanStrategy;
84
85 /** Ties strategy - defaults to ties averaged. */
86 private final TiesStrategy tiesStrategy;
87
88 /** Source of random data - used only when ties strategy is RANDOM. */
89 private final UniformRandomProvider random;
90
91 /**
92 * Create a NaturalRanking with default strategies for handling ties and NaNs.
93 */
94 public NaturalRanking() {
95 this(DEFAULT_NAN_STRATEGY, DEFAULT_TIES_STRATEGY, null);
96 }
97
98 /**
99 * Create a NaturalRanking with the given TiesStrategy.
100 *
101 * @param tiesStrategy the TiesStrategy to use
102 */
103 public NaturalRanking(TiesStrategy tiesStrategy) {
104 this(DEFAULT_NAN_STRATEGY,
105 tiesStrategy,
106 RandomSource.WELL_19937_C.create());
107 }
108
109 /**
110 * Create a NaturalRanking with the given NaNStrategy.
111 *
112 * @param nanStrategy the NaNStrategy to use
113 */
114 public NaturalRanking(NaNStrategy nanStrategy) {
115 this(nanStrategy, DEFAULT_TIES_STRATEGY, null);
116 }
117
118 /**
119 * Create a NaturalRanking with the given NaNStrategy and TiesStrategy.
120 *
121 * @param nanStrategy NaNStrategy to use
122 * @param tiesStrategy TiesStrategy to use
123 */
124 public NaturalRanking(NaNStrategy nanStrategy,
125 TiesStrategy tiesStrategy) {
126 this(nanStrategy,
127 tiesStrategy,
128 RandomSource.WELL_19937_C.create());
129 }
130
131 /**
132 * Create a NaturalRanking with TiesStrategy.RANDOM and the given
133 * random generator as the source of random data.
134 *
135 * @param randomGenerator source of random data
136 */
137 public NaturalRanking(UniformRandomProvider randomGenerator) {
138 this(DEFAULT_NAN_STRATEGY, TiesStrategy.RANDOM, randomGenerator);
139 }
140
141 /**
142 * Create a NaturalRanking with the given NaNStrategy, TiesStrategy.RANDOM
143 * and the given source of random data.
144 *
145 * @param nanStrategy NaNStrategy to use
146 * @param randomGenerator source of random data
147 */
148 public NaturalRanking(NaNStrategy nanStrategy,
149 UniformRandomProvider randomGenerator) {
150 this(nanStrategy, TiesStrategy.RANDOM, randomGenerator);
151 }
152
153 /**
154 * @param nanStrategy NaN strategy.
155 * @param tiesStrategy Tie strategy.
156 * @param random RNG.
157 */
158 private NaturalRanking(NaNStrategy nanStrategy,
159 TiesStrategy tiesStrategy,
160 UniformRandomProvider random) {
161 this.nanStrategy = nanStrategy;
162 this.tiesStrategy = tiesStrategy;
163 this.random = random;
164 }
165
166 /**
167 * Return the NaNStrategy.
168 *
169 * @return returns the NaNStrategy
170 */
171 public NaNStrategy getNanStrategy() {
172 return nanStrategy;
173 }
174
175 /**
176 * Return the TiesStrategy.
177 *
178 * @return the TiesStrategy
179 */
180 public TiesStrategy getTiesStrategy() {
181 return tiesStrategy;
182 }
183
184 /**
185 * Rank <code>data</code> using the natural ordering on Doubles, with
186 * NaN values handled according to <code>nanStrategy</code> and ties
187 * resolved using <code>tiesStrategy.</code>
188 *
189 * @param data array to be ranked
190 * @return array of ranks
191 * @throws NotANumberException if the selected {@link NaNStrategy} is {@code FAILED}
192 * and a {@link Double#NaN} is encountered in the input data
193 */
194 @Override
195 public double[] rank(double[] data) {
196
197 // Array recording initial positions of data to be ranked
198 IntDoublePair[] ranks = new IntDoublePair[data.length];
199 for (int i = 0; i < data.length; i++) {
200 ranks[i] = new IntDoublePair(data[i], i);
201 }
202
203 // Recode, remove or record positions of NaNs
204 List<Integer> nanPositions = null;
205 switch (nanStrategy) {
206 case MAXIMAL: // Replace NaNs with +INFs
207 recodeNaNs(ranks, Double.POSITIVE_INFINITY);
208 break;
209 case MINIMAL: // Replace NaNs with -INFs
210 recodeNaNs(ranks, Double.NEGATIVE_INFINITY);
211 break;
212 case REMOVED: // Drop NaNs from data
213 ranks = removeNaNs(ranks);
214 break;
215 case FIXED: // Record positions of NaNs
216 nanPositions = getNanPositions(ranks);
217 break;
218 case FAILED:
219 nanPositions = getNanPositions(ranks);
220 if (nanPositions.size() > 0) {
221 throw new NotANumberException();
222 }
223 break;
224 default: // this should not happen unless NaNStrategy enum is changed
225 throw new MathInternalError();
226 }
227
228 // Sort the IntDoublePairs
229 Arrays.sort(ranks);
230
231 // Walk the sorted array, filling output array using sorted positions,
232 // resolving ties as we go
233 double[] out = new double[ranks.length];
234 int pos = 1; // position in sorted array
235 out[ranks[0].getPosition()] = pos;
236 List<Integer> tiesTrace = new ArrayList<>();
237 tiesTrace.add(ranks[0].getPosition());
238 for (int i = 1; i < ranks.length; i++) {
239 if (Double.compare(ranks[i].getValue(), ranks[i - 1].getValue()) > 0) {
240 // tie sequence has ended (or had length 1)
241 pos = i + 1;
242 if (tiesTrace.size() > 1) { // if seq is nontrivial, resolve
243 resolveTie(out, tiesTrace);
244 }
245 tiesTrace = new ArrayList<>();
246 tiesTrace.add(ranks[i].getPosition());
247 } else {
248 // tie sequence continues
249 tiesTrace.add(ranks[i].getPosition());
250 }
251 out[ranks[i].getPosition()] = pos;
252 }
253 if (tiesTrace.size() > 1) { // handle tie sequence at end
254 resolveTie(out, tiesTrace);
255 }
256 if (nanStrategy == NaNStrategy.FIXED) {
257 restoreNaNs(out, nanPositions);
258 }
259 return out;
260 }
261
262 /**
263 * Returns an array that is a copy of the input array with IntDoublePairs
264 * having NaN values removed.
265 *
266 * @param ranks input array
267 * @return array with NaN-valued entries removed
268 */
269 private IntDoublePair[] removeNaNs(IntDoublePair[] ranks) {
270 if (!containsNaNs(ranks)) {
271 return ranks;
272 }
273 IntDoublePair[] outRanks = new IntDoublePair[ranks.length];
274 int j = 0;
275 for (int i = 0; i < ranks.length; i++) {
276 if (Double.isNaN(ranks[i].getValue())) {
277 // drop, but adjust original ranks of later elements
278 for (int k = i + 1; k < ranks.length; k++) {
279 ranks[k] = new IntDoublePair(
280 ranks[k].getValue(), ranks[k].getPosition() - 1);
281 }
282 } else {
283 outRanks[j] = new IntDoublePair(
284 ranks[i].getValue(), ranks[i].getPosition());
285 j++;
286 }
287 }
288 IntDoublePair[] returnRanks = new IntDoublePair[j];
289 System.arraycopy(outRanks, 0, returnRanks, 0, j);
290 return returnRanks;
291 }
292
293 /**
294 * Recodes NaN values to the given value.
295 *
296 * @param ranks array to recode
297 * @param value the value to replace NaNs with
298 */
299 private void recodeNaNs(IntDoublePair[] ranks, double value) {
300 for (int i = 0; i < ranks.length; i++) {
301 if (Double.isNaN(ranks[i].getValue())) {
302 ranks[i] = new IntDoublePair(
303 value, ranks[i].getPosition());
304 }
305 }
306 }
307
308 /**
309 * Checks for presence of NaNs in <code>ranks.</code>
310 *
311 * @param ranks array to be searched for NaNs
312 * @return true iff ranks contains one or more NaNs
313 */
314 private boolean containsNaNs(IntDoublePair[] ranks) {
315 for (int i = 0; i < ranks.length; i++) {
316 if (Double.isNaN(ranks[i].getValue())) {
317 return true;
318 }
319 }
320 return false;
321 }
322
323 /**
324 * Resolve a sequence of ties, using the configured {@link TiesStrategy}.
325 * The input <code>ranks</code> array is expected to take the same value
326 * for all indices in <code>tiesTrace</code>. The common value is recoded
327 * according to the tiesStrategy. For example, if ranks = [5,8,2,6,2,7,1,2],
328 * tiesTrace = [2,4,7] and tiesStrategy is MINIMUM, ranks will be unchanged.
329 * The same array and trace with tiesStrategy AVERAGE will come out
330 * [5,8,3,6,3,7,1,3].
331 *
332 * @param ranks array of ranks
333 * @param tiesTrace list of indices where <code>ranks</code> is constant
334 * -- that is, for any i and j in TiesTrace, <code> ranks[i] == ranks[j]
335 * </code>
336 */
337 private void resolveTie(double[] ranks, List<Integer> tiesTrace) {
338
339 // constant value of ranks over tiesTrace
340 final double c = ranks[tiesTrace.get(0)];
341
342 // length of sequence of tied ranks
343 final int length = tiesTrace.size();
344
345 switch (tiesStrategy) {
346 case AVERAGE: // Replace ranks with average
347 fill(ranks, tiesTrace, (2 * c + length - 1) / 2d);
348 break;
349 case MAXIMUM: // Replace ranks with maximum values
350 fill(ranks, tiesTrace, c + length - 1);
351 break;
352 case MINIMUM: // Replace ties with minimum
353 fill(ranks, tiesTrace, c);
354 break;
355 case RANDOM: // Fill with random integral values in [c, c + length - 1]
356 Iterator<Integer> iterator = tiesTrace.iterator();
357 long f = JdkMath.round(c);
358 final UniformLongSampler sampler = UniformLongSampler.of(random, f, f + length - 1);
359 while (iterator.hasNext()) {
360 // No advertised exception because args are guaranteed valid
361 ranks[iterator.next()] = sampler.sample();
362 }
363 break;
364 case SEQUENTIAL: // Fill sequentially from c to c + length - 1
365 // walk and fill
366 iterator = tiesTrace.iterator();
367 f = JdkMath.round(c);
368 int i = 0;
369 while (iterator.hasNext()) {
370 ranks[iterator.next()] = f + i++;
371 }
372 break;
373 default: // this should not happen unless TiesStrategy enum is changed
374 throw new MathInternalError();
375 }
376 }
377
378 /**
379 * Sets<code>data[i] = value</code> for each i in <code>tiesTrace.</code>
380 *
381 * @param data array to modify
382 * @param tiesTrace list of index values to set
383 * @param value value to set
384 */
385 private void fill(double[] data, List<Integer> tiesTrace, double value) {
386 Iterator<Integer> iterator = tiesTrace.iterator();
387 while (iterator.hasNext()) {
388 data[iterator.next()] = value;
389 }
390 }
391
392 /**
393 * Set <code>ranks[i] = Double.NaN</code> for each i in <code>nanPositions.</code>
394 *
395 * @param ranks array to modify
396 * @param nanPositions list of index values to set to <code>Double.NaN</code>
397 */
398 private void restoreNaNs(double[] ranks, List<Integer> nanPositions) {
399 if (nanPositions.isEmpty()) {
400 return;
401 }
402 Iterator<Integer> iterator = nanPositions.iterator();
403 while (iterator.hasNext()) {
404 ranks[iterator.next().intValue()] = Double.NaN;
405 }
406 }
407
408 /**
409 * Returns a list of indexes where <code>ranks</code> is <code>NaN.</code>
410 *
411 * @param ranks array to search for <code>NaNs</code>
412 * @return list of indexes i such that <code>ranks[i] = NaN</code>
413 */
414 private List<Integer> getNanPositions(IntDoublePair[] ranks) {
415 ArrayList<Integer> out = new ArrayList<>();
416 for (int i = 0; i < ranks.length; i++) {
417 if (Double.isNaN(ranks[i].getValue())) {
418 out.add(Integer.valueOf(i));
419 }
420 }
421 return out;
422 }
423
424 /**
425 * Represents the position of a double value in an ordering.
426 * Comparable interface is implemented so Arrays.sort can be used
427 * to sort an array of IntDoublePairs by value. Note that the
428 * implicitly defined natural ordering is NOT consistent with equals.
429 */
430 private static final class IntDoublePair implements Comparable<IntDoublePair> {
431
432 /** Value of the pair. */
433 private final double value;
434
435 /** Original position of the pair. */
436 private final int position;
437
438 /**
439 * Construct an IntDoublePair with the given value and position.
440 * @param value the value of the pair
441 * @param position the original position
442 */
443 IntDoublePair(double value, int position) {
444 this.value = value;
445 this.position = position;
446 }
447
448 /**
449 * Compare this IntDoublePair to another pair.
450 * Only the <strong>values</strong> are compared.
451 *
452 * @param other the other pair to compare this to
453 * @return result of <code>Double.compare(value, other.value)</code>
454 */
455 @Override
456 public int compareTo(IntDoublePair other) {
457 return Double.compare(value, other.value);
458 }
459
460 // N.B. equals() and hashCode() are not implemented; see MATH-610 for discussion.
461
462 /**
463 * Returns the value of the pair.
464 * @return value
465 */
466 public double getValue() {
467 return value;
468 }
469
470 /**
471 * Returns the original position of the pair.
472 * @return position
473 */
474 public int getPosition() {
475 return position;
476 }
477 }
478 }