001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017 018package org.apache.commons.math3.stat.ranking; 019 020import java.util.ArrayList; 021import java.util.Arrays; 022import java.util.Iterator; 023import java.util.List; 024 025import org.apache.commons.math3.exception.MathInternalError; 026import org.apache.commons.math3.exception.NotANumberException; 027import org.apache.commons.math3.random.RandomDataGenerator; 028import org.apache.commons.math3.random.RandomGenerator; 029import org.apache.commons.math3.util.FastMath; 030 031 032/** 033 * <p> Ranking based on the natural ordering on doubles.</p> 034 * <p>NaNs are treated according to the configured {@link NaNStrategy} and ties 035 * are handled using the selected {@link TiesStrategy}. 036 * Configuration settings are supplied in optional constructor arguments. 037 * Defaults are {@link NaNStrategy#FAILED} and {@link TiesStrategy#AVERAGE}, 038 * respectively. When using {@link TiesStrategy#RANDOM}, a 039 * {@link RandomGenerator} may be supplied as a constructor argument.</p> 040 * <p>Examples: 041 * <table border="1" cellpadding="3"> 042 * <tr><th colspan="3"> 043 * Input data: (20, 17, 30, 42.3, 17, 50, Double.NaN, Double.NEGATIVE_INFINITY, 17) 044 * </th></tr> 045 * <tr><th>NaNStrategy</th><th>TiesStrategy</th> 046 * <th><code>rank(data)</code></th> 047 * <tr> 048 * <td>default (NaNs maximal)</td> 049 * <td>default (ties averaged)</td> 050 * <td>(5, 3, 6, 7, 3, 8, 9, 1, 3)</td></tr> 051 * <tr> 052 * <td>default (NaNs maximal)</td> 053 * <td>MINIMUM</td> 054 * <td>(5, 2, 6, 7, 2, 8, 9, 1, 2)</td></tr> 055 * <tr> 056 * <td>MINIMAL</td> 057 * <td>default (ties averaged)</td> 058 * <td>(6, 4, 7, 8, 4, 9, 1.5, 1.5, 4)</td></tr> 059 * <tr> 060 * <td>REMOVED</td> 061 * <td>SEQUENTIAL</td> 062 * <td>(5, 2, 6, 7, 3, 8, 1, 4)</td></tr> 063 * <tr> 064 * <td>MINIMAL</td> 065 * <td>MAXIMUM</td> 066 * <td>(6, 5, 7, 8, 5, 9, 2, 2, 5)</td></tr></table></p> 067 * 068 * @since 2.0 069 */ 070public class NaturalRanking implements RankingAlgorithm { 071 072 /** default NaN strategy */ 073 public static final NaNStrategy DEFAULT_NAN_STRATEGY = NaNStrategy.FAILED; 074 075 /** default ties strategy */ 076 public static final TiesStrategy DEFAULT_TIES_STRATEGY = TiesStrategy.AVERAGE; 077 078 /** NaN strategy - defaults to NaNs maximal */ 079 private final NaNStrategy nanStrategy; 080 081 /** Ties strategy - defaults to ties averaged */ 082 private final TiesStrategy tiesStrategy; 083 084 /** Source of random data - used only when ties strategy is RANDOM */ 085 private final RandomDataGenerator randomData; 086 087 /** 088 * Create a NaturalRanking with default strategies for handling ties and NaNs. 089 */ 090 public NaturalRanking() { 091 super(); 092 tiesStrategy = DEFAULT_TIES_STRATEGY; 093 nanStrategy = DEFAULT_NAN_STRATEGY; 094 randomData = null; 095 } 096 097 /** 098 * Create a NaturalRanking with the given TiesStrategy. 099 * 100 * @param tiesStrategy the TiesStrategy to use 101 */ 102 public NaturalRanking(TiesStrategy tiesStrategy) { 103 super(); 104 this.tiesStrategy = tiesStrategy; 105 nanStrategy = DEFAULT_NAN_STRATEGY; 106 randomData = new RandomDataGenerator(); 107 } 108 109 /** 110 * Create a NaturalRanking with the given NaNStrategy. 111 * 112 * @param nanStrategy the NaNStrategy to use 113 */ 114 public NaturalRanking(NaNStrategy nanStrategy) { 115 super(); 116 this.nanStrategy = nanStrategy; 117 tiesStrategy = DEFAULT_TIES_STRATEGY; 118 randomData = null; 119 } 120 121 /** 122 * Create a NaturalRanking with the given NaNStrategy and TiesStrategy. 123 * 124 * @param nanStrategy NaNStrategy to use 125 * @param tiesStrategy TiesStrategy to use 126 */ 127 public NaturalRanking(NaNStrategy nanStrategy, TiesStrategy tiesStrategy) { 128 super(); 129 this.nanStrategy = nanStrategy; 130 this.tiesStrategy = tiesStrategy; 131 randomData = new RandomDataGenerator(); 132 } 133 134 /** 135 * Create a NaturalRanking with TiesStrategy.RANDOM and the given 136 * RandomGenerator as the source of random data. 137 * 138 * @param randomGenerator source of random data 139 */ 140 public NaturalRanking(RandomGenerator randomGenerator) { 141 super(); 142 this.tiesStrategy = TiesStrategy.RANDOM; 143 nanStrategy = DEFAULT_NAN_STRATEGY; 144 randomData = new RandomDataGenerator(randomGenerator); 145 } 146 147 148 /** 149 * Create a NaturalRanking with the given NaNStrategy, TiesStrategy.RANDOM 150 * and the given source of random data. 151 * 152 * @param nanStrategy NaNStrategy to use 153 * @param randomGenerator source of random data 154 */ 155 public NaturalRanking(NaNStrategy nanStrategy, 156 RandomGenerator randomGenerator) { 157 super(); 158 this.nanStrategy = nanStrategy; 159 this.tiesStrategy = TiesStrategy.RANDOM; 160 randomData = new RandomDataGenerator(randomGenerator); 161 } 162 163 /** 164 * Return the NaNStrategy 165 * 166 * @return returns the NaNStrategy 167 */ 168 public NaNStrategy getNanStrategy() { 169 return nanStrategy; 170 } 171 172 /** 173 * Return the TiesStrategy 174 * 175 * @return the TiesStrategy 176 */ 177 public TiesStrategy getTiesStrategy() { 178 return tiesStrategy; 179 } 180 181 /** 182 * Rank <code>data</code> using the natural ordering on Doubles, with 183 * NaN values handled according to <code>nanStrategy</code> and ties 184 * resolved using <code>tiesStrategy.</code> 185 * 186 * @param data array to be ranked 187 * @return array of ranks 188 * @throws NotANumberException if the selected {@link NaNStrategy} is {@code FAILED} 189 * and a {@link Double#NaN} is encountered in the input data 190 */ 191 public double[] rank(double[] data) { 192 193 // Array recording initial positions of data to be ranked 194 IntDoublePair[] ranks = new IntDoublePair[data.length]; 195 for (int i = 0; i < data.length; i++) { 196 ranks[i] = new IntDoublePair(data[i], i); 197 } 198 199 // Recode, remove or record positions of NaNs 200 List<Integer> nanPositions = null; 201 switch (nanStrategy) { 202 case MAXIMAL: // Replace NaNs with +INFs 203 recodeNaNs(ranks, Double.POSITIVE_INFINITY); 204 break; 205 case MINIMAL: // Replace NaNs with -INFs 206 recodeNaNs(ranks, Double.NEGATIVE_INFINITY); 207 break; 208 case REMOVED: // Drop NaNs from data 209 ranks = removeNaNs(ranks); 210 break; 211 case FIXED: // Record positions of NaNs 212 nanPositions = getNanPositions(ranks); 213 break; 214 case FAILED: 215 nanPositions = getNanPositions(ranks); 216 if (nanPositions.size() > 0) { 217 throw new NotANumberException(); 218 } 219 break; 220 default: // this should not happen unless NaNStrategy enum is changed 221 throw new MathInternalError(); 222 } 223 224 // Sort the IntDoublePairs 225 Arrays.sort(ranks); 226 227 // Walk the sorted array, filling output array using sorted positions, 228 // resolving ties as we go 229 double[] out = new double[ranks.length]; 230 int pos = 1; // position in sorted array 231 out[ranks[0].getPosition()] = pos; 232 List<Integer> tiesTrace = new ArrayList<Integer>(); 233 tiesTrace.add(ranks[0].getPosition()); 234 for (int i = 1; i < ranks.length; i++) { 235 if (Double.compare(ranks[i].getValue(), ranks[i - 1].getValue()) > 0) { 236 // tie sequence has ended (or had length 1) 237 pos = i + 1; 238 if (tiesTrace.size() > 1) { // if seq is nontrivial, resolve 239 resolveTie(out, tiesTrace); 240 } 241 tiesTrace = new ArrayList<Integer>(); 242 tiesTrace.add(ranks[i].getPosition()); 243 } else { 244 // tie sequence continues 245 tiesTrace.add(ranks[i].getPosition()); 246 } 247 out[ranks[i].getPosition()] = pos; 248 } 249 if (tiesTrace.size() > 1) { // handle tie sequence at end 250 resolveTie(out, tiesTrace); 251 } 252 if (nanStrategy == NaNStrategy.FIXED) { 253 restoreNaNs(out, nanPositions); 254 } 255 return out; 256 } 257 258 /** 259 * Returns an array that is a copy of the input array with IntDoublePairs 260 * having NaN values removed. 261 * 262 * @param ranks input array 263 * @return array with NaN-valued entries removed 264 */ 265 private IntDoublePair[] removeNaNs(IntDoublePair[] ranks) { 266 if (!containsNaNs(ranks)) { 267 return ranks; 268 } 269 IntDoublePair[] outRanks = new IntDoublePair[ranks.length]; 270 int j = 0; 271 for (int i = 0; i < ranks.length; i++) { 272 if (Double.isNaN(ranks[i].getValue())) { 273 // drop, but adjust original ranks of later elements 274 for (int k = i + 1; k < ranks.length; k++) { 275 ranks[k] = new IntDoublePair( 276 ranks[k].getValue(), ranks[k].getPosition() - 1); 277 } 278 } else { 279 outRanks[j] = new IntDoublePair( 280 ranks[i].getValue(), ranks[i].getPosition()); 281 j++; 282 } 283 } 284 IntDoublePair[] returnRanks = new IntDoublePair[j]; 285 System.arraycopy(outRanks, 0, returnRanks, 0, j); 286 return returnRanks; 287 } 288 289 /** 290 * Recodes NaN values to the given value. 291 * 292 * @param ranks array to recode 293 * @param value the value to replace NaNs with 294 */ 295 private void recodeNaNs(IntDoublePair[] ranks, double value) { 296 for (int i = 0; i < ranks.length; i++) { 297 if (Double.isNaN(ranks[i].getValue())) { 298 ranks[i] = new IntDoublePair( 299 value, ranks[i].getPosition()); 300 } 301 } 302 } 303 304 /** 305 * Checks for presence of NaNs in <code>ranks.</code> 306 * 307 * @param ranks array to be searched for NaNs 308 * @return true iff ranks contains one or more NaNs 309 */ 310 private boolean containsNaNs(IntDoublePair[] ranks) { 311 for (int i = 0; i < ranks.length; i++) { 312 if (Double.isNaN(ranks[i].getValue())) { 313 return true; 314 } 315 } 316 return false; 317 } 318 319 /** 320 * Resolve a sequence of ties, using the configured {@link TiesStrategy}. 321 * The input <code>ranks</code> array is expected to take the same value 322 * for all indices in <code>tiesTrace</code>. The common value is recoded 323 * according to the tiesStrategy. For example, if ranks = <5,8,2,6,2,7,1,2>, 324 * tiesTrace = <2,4,7> and tiesStrategy is MINIMUM, ranks will be unchanged. 325 * The same array and trace with tiesStrategy AVERAGE will come out 326 * <5,8,3,6,3,7,1,3>. 327 * 328 * @param ranks array of ranks 329 * @param tiesTrace list of indices where <code>ranks</code> is constant 330 * -- that is, for any i and j in TiesTrace, <code> ranks[i] == ranks[j] 331 * </code> 332 */ 333 private void resolveTie(double[] ranks, List<Integer> tiesTrace) { 334 335 // constant value of ranks over tiesTrace 336 final double c = ranks[tiesTrace.get(0)]; 337 338 // length of sequence of tied ranks 339 final int length = tiesTrace.size(); 340 341 switch (tiesStrategy) { 342 case AVERAGE: // Replace ranks with average 343 fill(ranks, tiesTrace, (2 * c + length - 1) / 2d); 344 break; 345 case MAXIMUM: // Replace ranks with maximum values 346 fill(ranks, tiesTrace, c + length - 1); 347 break; 348 case MINIMUM: // Replace ties with minimum 349 fill(ranks, tiesTrace, c); 350 break; 351 case RANDOM: // Fill with random integral values in [c, c + length - 1] 352 Iterator<Integer> iterator = tiesTrace.iterator(); 353 long f = FastMath.round(c); 354 while (iterator.hasNext()) { 355 // No advertised exception because args are guaranteed valid 356 ranks[iterator.next()] = 357 randomData.nextLong(f, f + length - 1); 358 } 359 break; 360 case SEQUENTIAL: // Fill sequentially from c to c + length - 1 361 // walk and fill 362 iterator = tiesTrace.iterator(); 363 f = FastMath.round(c); 364 int i = 0; 365 while (iterator.hasNext()) { 366 ranks[iterator.next()] = f + i++; 367 } 368 break; 369 default: // this should not happen unless TiesStrategy enum is changed 370 throw new MathInternalError(); 371 } 372 } 373 374 /** 375 * Sets<code>data[i] = value</code> for each i in <code>tiesTrace.</code> 376 * 377 * @param data array to modify 378 * @param tiesTrace list of index values to set 379 * @param value value to set 380 */ 381 private void fill(double[] data, List<Integer> tiesTrace, double value) { 382 Iterator<Integer> iterator = tiesTrace.iterator(); 383 while (iterator.hasNext()) { 384 data[iterator.next()] = value; 385 } 386 } 387 388 /** 389 * Set <code>ranks[i] = Double.NaN</code> for each i in <code>nanPositions.</code> 390 * 391 * @param ranks array to modify 392 * @param nanPositions list of index values to set to <code>Double.NaN</code> 393 */ 394 private void restoreNaNs(double[] ranks, List<Integer> nanPositions) { 395 if (nanPositions.size() == 0) { 396 return; 397 } 398 Iterator<Integer> iterator = nanPositions.iterator(); 399 while (iterator.hasNext()) { 400 ranks[iterator.next().intValue()] = Double.NaN; 401 } 402 403 } 404 405 /** 406 * Returns a list of indexes where <code>ranks</code> is <code>NaN.</code> 407 * 408 * @param ranks array to search for <code>NaNs</code> 409 * @return list of indexes i such that <code>ranks[i] = NaN</code> 410 */ 411 private List<Integer> getNanPositions(IntDoublePair[] ranks) { 412 ArrayList<Integer> out = new ArrayList<Integer>(); 413 for (int i = 0; i < ranks.length; i++) { 414 if (Double.isNaN(ranks[i].getValue())) { 415 out.add(Integer.valueOf(i)); 416 } 417 } 418 return out; 419 } 420 421 /** 422 * Represents the position of a double value in an ordering. 423 * Comparable interface is implemented so Arrays.sort can be used 424 * to sort an array of IntDoublePairs by value. Note that the 425 * implicitly defined natural ordering is NOT consistent with equals. 426 */ 427 private static class IntDoublePair implements Comparable<IntDoublePair> { 428 429 /** Value of the pair */ 430 private final double value; 431 432 /** Original position of the pair */ 433 private final int position; 434 435 /** 436 * Construct an IntDoublePair with the given value and position. 437 * @param value the value of the pair 438 * @param position the original position 439 */ 440 IntDoublePair(double value, int position) { 441 this.value = value; 442 this.position = position; 443 } 444 445 /** 446 * Compare this IntDoublePair to another pair. 447 * Only the <strong>values</strong> are compared. 448 * 449 * @param other the other pair to compare this to 450 * @return result of <code>Double.compare(value, other.value)</code> 451 */ 452 public int compareTo(IntDoublePair other) { 453 return Double.compare(value, other.value); 454 } 455 456 // N.B. equals() and hashCode() are not implemented; see MATH-610 for discussion. 457 458 /** 459 * Returns the value of the pair. 460 * @return value 461 */ 462 public double getValue() { 463 return value; 464 } 465 466 /** 467 * Returns the original position of the pair. 468 * @return position 469 */ 470 public int getPosition() { 471 return position; 472 } 473 } 474}