001 /*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements. See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License. You may obtain a copy of the License at
008 *
009 * http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017
018 package org.apache.commons.math3.stat.ranking;
019
020 import java.util.ArrayList;
021 import java.util.Arrays;
022 import java.util.Iterator;
023 import java.util.List;
024
025 import org.apache.commons.math3.exception.MathInternalError;
026 import org.apache.commons.math3.exception.NotANumberException;
027 import org.apache.commons.math3.random.RandomDataGenerator;
028 import org.apache.commons.math3.random.RandomGenerator;
029 import org.apache.commons.math3.util.FastMath;
030
031
032 /**
033 * <p> Ranking based on the natural ordering on doubles.</p>
034 * <p>NaNs are treated according to the configured {@link NaNStrategy} and ties
035 * are handled using the selected {@link TiesStrategy}.
036 * Configuration settings are supplied in optional constructor arguments.
037 * Defaults are {@link NaNStrategy#FAILED} and {@link TiesStrategy#AVERAGE},
038 * respectively. When using {@link TiesStrategy#RANDOM}, a
039 * {@link RandomGenerator} may be supplied as a constructor argument.</p>
040 * <p>Examples:
041 * <table border="1" cellpadding="3">
042 * <tr><th colspan="3">
043 * Input data: (20, 17, 30, 42.3, 17, 50, Double.NaN, Double.NEGATIVE_INFINITY, 17)
044 * </th></tr>
045 * <tr><th>NaNStrategy</th><th>TiesStrategy</th>
046 * <th><code>rank(data)</code></th>
047 * <tr>
048 * <td>default (NaNs maximal)</td>
049 * <td>default (ties averaged)</td>
050 * <td>(5, 3, 6, 7, 3, 8, 9, 1, 3)</td></tr>
051 * <tr>
052 * <td>default (NaNs maximal)</td>
053 * <td>MINIMUM</td>
054 * <td>(5, 2, 6, 7, 2, 8, 9, 1, 2)</td></tr>
055 * <tr>
056 * <td>MINIMAL</td>
057 * <td>default (ties averaged)</td>
058 * <td>(6, 4, 7, 8, 4, 9, 1.5, 1.5, 4)</td></tr>
059 * <tr>
060 * <td>REMOVED</td>
061 * <td>SEQUENTIAL</td>
062 * <td>(5, 2, 6, 7, 3, 8, 1, 4)</td></tr>
063 * <tr>
064 * <td>MINIMAL</td>
065 * <td>MAXIMUM</td>
066 * <td>(6, 5, 7, 8, 5, 9, 2, 2, 5)</td></tr></table></p>
067 *
068 * @since 2.0
069 * @version $Id: NaturalRanking.java 1454897 2013-03-10 19:02:54Z luc $
070 */
071 public class NaturalRanking implements RankingAlgorithm {
072
073 /** default NaN strategy */
074 public static final NaNStrategy DEFAULT_NAN_STRATEGY = NaNStrategy.FAILED;
075
076 /** default ties strategy */
077 public static final TiesStrategy DEFAULT_TIES_STRATEGY = TiesStrategy.AVERAGE;
078
079 /** NaN strategy - defaults to NaNs maximal */
080 private final NaNStrategy nanStrategy;
081
082 /** Ties strategy - defaults to ties averaged */
083 private final TiesStrategy tiesStrategy;
084
085 /** Source of random data - used only when ties strategy is RANDOM */
086 private final RandomDataGenerator randomData;
087
088 /**
089 * Create a NaturalRanking with default strategies for handling ties and NaNs.
090 */
091 public NaturalRanking() {
092 super();
093 tiesStrategy = DEFAULT_TIES_STRATEGY;
094 nanStrategy = DEFAULT_NAN_STRATEGY;
095 randomData = null;
096 }
097
098 /**
099 * Create a NaturalRanking with the given TiesStrategy.
100 *
101 * @param tiesStrategy the TiesStrategy to use
102 */
103 public NaturalRanking(TiesStrategy tiesStrategy) {
104 super();
105 this.tiesStrategy = tiesStrategy;
106 nanStrategy = DEFAULT_NAN_STRATEGY;
107 randomData = new RandomDataGenerator();
108 }
109
110 /**
111 * Create a NaturalRanking with the given NaNStrategy.
112 *
113 * @param nanStrategy the NaNStrategy to use
114 */
115 public NaturalRanking(NaNStrategy nanStrategy) {
116 super();
117 this.nanStrategy = nanStrategy;
118 tiesStrategy = DEFAULT_TIES_STRATEGY;
119 randomData = null;
120 }
121
122 /**
123 * Create a NaturalRanking with the given NaNStrategy and TiesStrategy.
124 *
125 * @param nanStrategy NaNStrategy to use
126 * @param tiesStrategy TiesStrategy to use
127 */
128 public NaturalRanking(NaNStrategy nanStrategy, TiesStrategy tiesStrategy) {
129 super();
130 this.nanStrategy = nanStrategy;
131 this.tiesStrategy = tiesStrategy;
132 randomData = new RandomDataGenerator();
133 }
134
135 /**
136 * Create a NaturalRanking with TiesStrategy.RANDOM and the given
137 * RandomGenerator as the source of random data.
138 *
139 * @param randomGenerator source of random data
140 */
141 public NaturalRanking(RandomGenerator randomGenerator) {
142 super();
143 this.tiesStrategy = TiesStrategy.RANDOM;
144 nanStrategy = DEFAULT_NAN_STRATEGY;
145 randomData = new RandomDataGenerator(randomGenerator);
146 }
147
148
149 /**
150 * Create a NaturalRanking with the given NaNStrategy, TiesStrategy.RANDOM
151 * and the given source of random data.
152 *
153 * @param nanStrategy NaNStrategy to use
154 * @param randomGenerator source of random data
155 */
156 public NaturalRanking(NaNStrategy nanStrategy,
157 RandomGenerator randomGenerator) {
158 super();
159 this.nanStrategy = nanStrategy;
160 this.tiesStrategy = TiesStrategy.RANDOM;
161 randomData = new RandomDataGenerator(randomGenerator);
162 }
163
164 /**
165 * Return the NaNStrategy
166 *
167 * @return returns the NaNStrategy
168 */
169 public NaNStrategy getNanStrategy() {
170 return nanStrategy;
171 }
172
173 /**
174 * Return the TiesStrategy
175 *
176 * @return the TiesStrategy
177 */
178 public TiesStrategy getTiesStrategy() {
179 return tiesStrategy;
180 }
181
182 /**
183 * Rank <code>data</code> using the natural ordering on Doubles, with
184 * NaN values handled according to <code>nanStrategy</code> and ties
185 * resolved using <code>tiesStrategy.</code>
186 *
187 * @param data array to be ranked
188 * @return array of ranks
189 * @throws NotANumberException if the selected {@link NaNStrategy} is {@code FAILED}
190 * and a {@link Double#NaN} is encountered in the input data
191 */
192 public double[] rank(double[] data) {
193
194 // Array recording initial positions of data to be ranked
195 IntDoublePair[] ranks = new IntDoublePair[data.length];
196 for (int i = 0; i < data.length; i++) {
197 ranks[i] = new IntDoublePair(data[i], i);
198 }
199
200 // Recode, remove or record positions of NaNs
201 List<Integer> nanPositions = null;
202 switch (nanStrategy) {
203 case MAXIMAL: // Replace NaNs with +INFs
204 recodeNaNs(ranks, Double.POSITIVE_INFINITY);
205 break;
206 case MINIMAL: // Replace NaNs with -INFs
207 recodeNaNs(ranks, Double.NEGATIVE_INFINITY);
208 break;
209 case REMOVED: // Drop NaNs from data
210 ranks = removeNaNs(ranks);
211 break;
212 case FIXED: // Record positions of NaNs
213 nanPositions = getNanPositions(ranks);
214 break;
215 case FAILED:
216 nanPositions = getNanPositions(ranks);
217 if (nanPositions.size() > 0) {
218 throw new NotANumberException();
219 }
220 break;
221 default: // this should not happen unless NaNStrategy enum is changed
222 throw new MathInternalError();
223 }
224
225 // Sort the IntDoublePairs
226 Arrays.sort(ranks);
227
228 // Walk the sorted array, filling output array using sorted positions,
229 // resolving ties as we go
230 double[] out = new double[ranks.length];
231 int pos = 1; // position in sorted array
232 out[ranks[0].getPosition()] = pos;
233 List<Integer> tiesTrace = new ArrayList<Integer>();
234 tiesTrace.add(ranks[0].getPosition());
235 for (int i = 1; i < ranks.length; i++) {
236 if (Double.compare(ranks[i].getValue(), ranks[i - 1].getValue()) > 0) {
237 // tie sequence has ended (or had length 1)
238 pos = i + 1;
239 if (tiesTrace.size() > 1) { // if seq is nontrivial, resolve
240 resolveTie(out, tiesTrace);
241 }
242 tiesTrace = new ArrayList<Integer>();
243 tiesTrace.add(ranks[i].getPosition());
244 } else {
245 // tie sequence continues
246 tiesTrace.add(ranks[i].getPosition());
247 }
248 out[ranks[i].getPosition()] = pos;
249 }
250 if (tiesTrace.size() > 1) { // handle tie sequence at end
251 resolveTie(out, tiesTrace);
252 }
253 if (nanStrategy == NaNStrategy.FIXED) {
254 restoreNaNs(out, nanPositions);
255 }
256 return out;
257 }
258
259 /**
260 * Returns an array that is a copy of the input array with IntDoublePairs
261 * having NaN values removed.
262 *
263 * @param ranks input array
264 * @return array with NaN-valued entries removed
265 */
266 private IntDoublePair[] removeNaNs(IntDoublePair[] ranks) {
267 if (!containsNaNs(ranks)) {
268 return ranks;
269 }
270 IntDoublePair[] outRanks = new IntDoublePair[ranks.length];
271 int j = 0;
272 for (int i = 0; i < ranks.length; i++) {
273 if (Double.isNaN(ranks[i].getValue())) {
274 // drop, but adjust original ranks of later elements
275 for (int k = i + 1; k < ranks.length; k++) {
276 ranks[k] = new IntDoublePair(
277 ranks[k].getValue(), ranks[k].getPosition() - 1);
278 }
279 } else {
280 outRanks[j] = new IntDoublePair(
281 ranks[i].getValue(), ranks[i].getPosition());
282 j++;
283 }
284 }
285 IntDoublePair[] returnRanks = new IntDoublePair[j];
286 System.arraycopy(outRanks, 0, returnRanks, 0, j);
287 return returnRanks;
288 }
289
290 /**
291 * Recodes NaN values to the given value.
292 *
293 * @param ranks array to recode
294 * @param value the value to replace NaNs with
295 */
296 private void recodeNaNs(IntDoublePair[] ranks, double value) {
297 for (int i = 0; i < ranks.length; i++) {
298 if (Double.isNaN(ranks[i].getValue())) {
299 ranks[i] = new IntDoublePair(
300 value, ranks[i].getPosition());
301 }
302 }
303 }
304
305 /**
306 * Checks for presence of NaNs in <code>ranks.</code>
307 *
308 * @param ranks array to be searched for NaNs
309 * @return true iff ranks contains one or more NaNs
310 */
311 private boolean containsNaNs(IntDoublePair[] ranks) {
312 for (int i = 0; i < ranks.length; i++) {
313 if (Double.isNaN(ranks[i].getValue())) {
314 return true;
315 }
316 }
317 return false;
318 }
319
320 /**
321 * Resolve a sequence of ties, using the configured {@link TiesStrategy}.
322 * The input <code>ranks</code> array is expected to take the same value
323 * for all indices in <code>tiesTrace</code>. The common value is recoded
324 * according to the tiesStrategy. For example, if ranks = <5,8,2,6,2,7,1,2>,
325 * tiesTrace = <2,4,7> and tiesStrategy is MINIMUM, ranks will be unchanged.
326 * The same array and trace with tiesStrategy AVERAGE will come out
327 * <5,8,3,6,3,7,1,3>.
328 *
329 * @param ranks array of ranks
330 * @param tiesTrace list of indices where <code>ranks</code> is constant
331 * -- that is, for any i and j in TiesTrace, <code> ranks[i] == ranks[j]
332 * </code>
333 */
334 private void resolveTie(double[] ranks, List<Integer> tiesTrace) {
335
336 // constant value of ranks over tiesTrace
337 final double c = ranks[tiesTrace.get(0)];
338
339 // length of sequence of tied ranks
340 final int length = tiesTrace.size();
341
342 switch (tiesStrategy) {
343 case AVERAGE: // Replace ranks with average
344 fill(ranks, tiesTrace, (2 * c + length - 1) / 2d);
345 break;
346 case MAXIMUM: // Replace ranks with maximum values
347 fill(ranks, tiesTrace, c + length - 1);
348 break;
349 case MINIMUM: // Replace ties with minimum
350 fill(ranks, tiesTrace, c);
351 break;
352 case RANDOM: // Fill with random integral values in [c, c + length - 1]
353 Iterator<Integer> iterator = tiesTrace.iterator();
354 long f = FastMath.round(c);
355 while (iterator.hasNext()) {
356 // No advertised exception because args are guaranteed valid
357 ranks[iterator.next()] =
358 randomData.nextLong(f, f + length - 1);
359 }
360 break;
361 case SEQUENTIAL: // Fill sequentially from c to c + length - 1
362 // walk and fill
363 iterator = tiesTrace.iterator();
364 f = FastMath.round(c);
365 int i = 0;
366 while (iterator.hasNext()) {
367 ranks[iterator.next()] = f + i++;
368 }
369 break;
370 default: // this should not happen unless TiesStrategy enum is changed
371 throw new MathInternalError();
372 }
373 }
374
375 /**
376 * Sets<code>data[i] = value</code> for each i in <code>tiesTrace.</code>
377 *
378 * @param data array to modify
379 * @param tiesTrace list of index values to set
380 * @param value value to set
381 */
382 private void fill(double[] data, List<Integer> tiesTrace, double value) {
383 Iterator<Integer> iterator = tiesTrace.iterator();
384 while (iterator.hasNext()) {
385 data[iterator.next()] = value;
386 }
387 }
388
389 /**
390 * Set <code>ranks[i] = Double.NaN</code> for each i in <code>nanPositions.</code>
391 *
392 * @param ranks array to modify
393 * @param nanPositions list of index values to set to <code>Double.NaN</code>
394 */
395 private void restoreNaNs(double[] ranks, List<Integer> nanPositions) {
396 if (nanPositions.size() == 0) {
397 return;
398 }
399 Iterator<Integer> iterator = nanPositions.iterator();
400 while (iterator.hasNext()) {
401 ranks[iterator.next().intValue()] = Double.NaN;
402 }
403
404 }
405
406 /**
407 * Returns a list of indexes where <code>ranks</code> is <code>NaN.</code>
408 *
409 * @param ranks array to search for <code>NaNs</code>
410 * @return list of indexes i such that <code>ranks[i] = NaN</code>
411 */
412 private List<Integer> getNanPositions(IntDoublePair[] ranks) {
413 ArrayList<Integer> out = new ArrayList<Integer>();
414 for (int i = 0; i < ranks.length; i++) {
415 if (Double.isNaN(ranks[i].getValue())) {
416 out.add(Integer.valueOf(i));
417 }
418 }
419 return out;
420 }
421
422 /**
423 * Represents the position of a double value in an ordering.
424 * Comparable interface is implemented so Arrays.sort can be used
425 * to sort an array of IntDoublePairs by value. Note that the
426 * implicitly defined natural ordering is NOT consistent with equals.
427 */
428 private static class IntDoublePair implements Comparable<IntDoublePair> {
429
430 /** Value of the pair */
431 private final double value;
432
433 /** Original position of the pair */
434 private final int position;
435
436 /**
437 * Construct an IntDoublePair with the given value and position.
438 * @param value the value of the pair
439 * @param position the original position
440 */
441 public IntDoublePair(double value, int position) {
442 this.value = value;
443 this.position = position;
444 }
445
446 /**
447 * Compare this IntDoublePair to another pair.
448 * Only the <strong>values</strong> are compared.
449 *
450 * @param other the other pair to compare this to
451 * @return result of <code>Double.compare(value, other.value)</code>
452 */
453 public int compareTo(IntDoublePair other) {
454 return Double.compare(value, other.value);
455 }
456
457 // N.B. equals() and hashCode() are not implemented; see MATH-610 for discussion.
458
459 /**
460 * Returns the value of the pair.
461 * @return value
462 */
463 public double getValue() {
464 return value;
465 }
466
467 /**
468 * Returns the original position of the pair.
469 * @return position
470 */
471 public int getPosition() {
472 return position;
473 }
474 }
475 }