001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017 018package org.apache.commons.math4.legacy.stat.ranking; 019 020import java.util.ArrayList; 021import java.util.Arrays; 022import java.util.Iterator; 023import java.util.List; 024 025import org.apache.commons.rng.UniformRandomProvider; 026import org.apache.commons.rng.simple.RandomSource; 027import org.apache.commons.rng.sampling.distribution.UniformLongSampler; 028import org.apache.commons.math4.legacy.exception.MathInternalError; 029import org.apache.commons.math4.legacy.exception.NotANumberException; 030import org.apache.commons.math4.core.jdkmath.JdkMath; 031 032 033/** 034 * Ranking based on the natural ordering on doubles. 035 * 036 * <p>NaNs are treated according to the configured {@link NaNStrategy} and ties 037 * are handled using the selected {@link TiesStrategy}. 038 * Configuration settings are supplied in optional constructor arguments. 039 * Defaults are {@link NaNStrategy#FAILED} and {@link TiesStrategy#AVERAGE}, 040 * respectively. When using {@link TiesStrategy#RANDOM}, a 041 * {@link UniformRandomProvider random generator} may be supplied as a 042 * constructor argument.</p> 043 * <p>Examples: 044 * <table border=""> 045 * <caption>Examples</caption> 046 * <tr><th colspan="3"> 047 * Input data: (20, 17, 30, 42.3, 17, 50, Double.NaN, Double.NEGATIVE_INFINITY, 17) 048 * </th></tr> 049 * <tr><th>NaNStrategy</th><th>TiesStrategy</th> 050 * <th><code>rank(data)</code></th> 051 * <tr> 052 * <td>default (NaNs maximal)</td> 053 * <td>default (ties averaged)</td> 054 * <td>(5, 3, 6, 7, 3, 8, 9, 1, 3)</td></tr> 055 * <tr> 056 * <td>default (NaNs maximal)</td> 057 * <td>MINIMUM</td> 058 * <td>(5, 2, 6, 7, 2, 8, 9, 1, 2)</td></tr> 059 * <tr> 060 * <td>MINIMAL</td> 061 * <td>default (ties averaged)</td> 062 * <td>(6, 4, 7, 8, 4, 9, 1.5, 1.5, 4)</td></tr> 063 * <tr> 064 * <td>REMOVED</td> 065 * <td>SEQUENTIAL</td> 066 * <td>(5, 2, 6, 7, 3, 8, 1, 4)</td></tr> 067 * <tr> 068 * <td>MINIMAL</td> 069 * <td>MAXIMUM</td> 070 * <td>(6, 5, 7, 8, 5, 9, 2, 2, 5)</td></tr></table> 071 * 072 * @since 2.0 073 */ 074public class NaturalRanking implements RankingAlgorithm { 075 076 /** default NaN strategy. */ 077 public static final NaNStrategy DEFAULT_NAN_STRATEGY = NaNStrategy.FAILED; 078 079 /** default ties strategy. */ 080 public static final TiesStrategy DEFAULT_TIES_STRATEGY = TiesStrategy.AVERAGE; 081 082 /** NaN strategy - defaults to NaNs maximal. */ 083 private final NaNStrategy nanStrategy; 084 085 /** Ties strategy - defaults to ties averaged. */ 086 private final TiesStrategy tiesStrategy; 087 088 /** Source of random data - used only when ties strategy is RANDOM. */ 089 private final UniformRandomProvider random; 090 091 /** 092 * Create a NaturalRanking with default strategies for handling ties and NaNs. 093 */ 094 public NaturalRanking() { 095 this(DEFAULT_NAN_STRATEGY, DEFAULT_TIES_STRATEGY, null); 096 } 097 098 /** 099 * Create a NaturalRanking with the given TiesStrategy. 100 * 101 * @param tiesStrategy the TiesStrategy to use 102 */ 103 public NaturalRanking(TiesStrategy tiesStrategy) { 104 this(DEFAULT_NAN_STRATEGY, 105 tiesStrategy, 106 RandomSource.WELL_19937_C.create()); 107 } 108 109 /** 110 * Create a NaturalRanking with the given NaNStrategy. 111 * 112 * @param nanStrategy the NaNStrategy to use 113 */ 114 public NaturalRanking(NaNStrategy nanStrategy) { 115 this(nanStrategy, DEFAULT_TIES_STRATEGY, null); 116 } 117 118 /** 119 * Create a NaturalRanking with the given NaNStrategy and TiesStrategy. 120 * 121 * @param nanStrategy NaNStrategy to use 122 * @param tiesStrategy TiesStrategy to use 123 */ 124 public NaturalRanking(NaNStrategy nanStrategy, 125 TiesStrategy tiesStrategy) { 126 this(nanStrategy, 127 tiesStrategy, 128 RandomSource.WELL_19937_C.create()); 129 } 130 131 /** 132 * Create a NaturalRanking with TiesStrategy.RANDOM and the given 133 * random generator as the source of random data. 134 * 135 * @param randomGenerator source of random data 136 */ 137 public NaturalRanking(UniformRandomProvider randomGenerator) { 138 this(DEFAULT_NAN_STRATEGY, TiesStrategy.RANDOM, randomGenerator); 139 } 140 141 /** 142 * Create a NaturalRanking with the given NaNStrategy, TiesStrategy.RANDOM 143 * and the given source of random data. 144 * 145 * @param nanStrategy NaNStrategy to use 146 * @param randomGenerator source of random data 147 */ 148 public NaturalRanking(NaNStrategy nanStrategy, 149 UniformRandomProvider randomGenerator) { 150 this(nanStrategy, TiesStrategy.RANDOM, randomGenerator); 151 } 152 153 /** 154 * @param nanStrategy NaN strategy. 155 * @param tiesStrategy Tie strategy. 156 * @param random RNG. 157 */ 158 private NaturalRanking(NaNStrategy nanStrategy, 159 TiesStrategy tiesStrategy, 160 UniformRandomProvider random) { 161 this.nanStrategy = nanStrategy; 162 this.tiesStrategy = tiesStrategy; 163 this.random = random; 164 } 165 166 /** 167 * Return the NaNStrategy. 168 * 169 * @return returns the NaNStrategy 170 */ 171 public NaNStrategy getNanStrategy() { 172 return nanStrategy; 173 } 174 175 /** 176 * Return the TiesStrategy. 177 * 178 * @return the TiesStrategy 179 */ 180 public TiesStrategy getTiesStrategy() { 181 return tiesStrategy; 182 } 183 184 /** 185 * Rank <code>data</code> using the natural ordering on Doubles, with 186 * NaN values handled according to <code>nanStrategy</code> and ties 187 * resolved using <code>tiesStrategy.</code> 188 * 189 * @param data array to be ranked 190 * @return array of ranks 191 * @throws NotANumberException if the selected {@link NaNStrategy} is {@code FAILED} 192 * and a {@link Double#NaN} is encountered in the input data 193 */ 194 @Override 195 public double[] rank(double[] data) { 196 197 // Array recording initial positions of data to be ranked 198 IntDoublePair[] ranks = new IntDoublePair[data.length]; 199 for (int i = 0; i < data.length; i++) { 200 ranks[i] = new IntDoublePair(data[i], i); 201 } 202 203 // Recode, remove or record positions of NaNs 204 List<Integer> nanPositions = null; 205 switch (nanStrategy) { 206 case MAXIMAL: // Replace NaNs with +INFs 207 recodeNaNs(ranks, Double.POSITIVE_INFINITY); 208 break; 209 case MINIMAL: // Replace NaNs with -INFs 210 recodeNaNs(ranks, Double.NEGATIVE_INFINITY); 211 break; 212 case REMOVED: // Drop NaNs from data 213 ranks = removeNaNs(ranks); 214 break; 215 case FIXED: // Record positions of NaNs 216 nanPositions = getNanPositions(ranks); 217 break; 218 case FAILED: 219 nanPositions = getNanPositions(ranks); 220 if (nanPositions.size() > 0) { 221 throw new NotANumberException(); 222 } 223 break; 224 default: // this should not happen unless NaNStrategy enum is changed 225 throw new MathInternalError(); 226 } 227 228 // Sort the IntDoublePairs 229 Arrays.sort(ranks); 230 231 // Walk the sorted array, filling output array using sorted positions, 232 // resolving ties as we go 233 double[] out = new double[ranks.length]; 234 int pos = 1; // position in sorted array 235 out[ranks[0].getPosition()] = pos; 236 List<Integer> tiesTrace = new ArrayList<>(); 237 tiesTrace.add(ranks[0].getPosition()); 238 for (int i = 1; i < ranks.length; i++) { 239 if (Double.compare(ranks[i].getValue(), ranks[i - 1].getValue()) > 0) { 240 // tie sequence has ended (or had length 1) 241 pos = i + 1; 242 if (tiesTrace.size() > 1) { // if seq is nontrivial, resolve 243 resolveTie(out, tiesTrace); 244 } 245 tiesTrace = new ArrayList<>(); 246 tiesTrace.add(ranks[i].getPosition()); 247 } else { 248 // tie sequence continues 249 tiesTrace.add(ranks[i].getPosition()); 250 } 251 out[ranks[i].getPosition()] = pos; 252 } 253 if (tiesTrace.size() > 1) { // handle tie sequence at end 254 resolveTie(out, tiesTrace); 255 } 256 if (nanStrategy == NaNStrategy.FIXED) { 257 restoreNaNs(out, nanPositions); 258 } 259 return out; 260 } 261 262 /** 263 * Returns an array that is a copy of the input array with IntDoublePairs 264 * having NaN values removed. 265 * 266 * @param ranks input array 267 * @return array with NaN-valued entries removed 268 */ 269 private IntDoublePair[] removeNaNs(IntDoublePair[] ranks) { 270 if (!containsNaNs(ranks)) { 271 return ranks; 272 } 273 IntDoublePair[] outRanks = new IntDoublePair[ranks.length]; 274 int j = 0; 275 for (int i = 0; i < ranks.length; i++) { 276 if (Double.isNaN(ranks[i].getValue())) { 277 // drop, but adjust original ranks of later elements 278 for (int k = i + 1; k < ranks.length; k++) { 279 ranks[k] = new IntDoublePair( 280 ranks[k].getValue(), ranks[k].getPosition() - 1); 281 } 282 } else { 283 outRanks[j] = new IntDoublePair( 284 ranks[i].getValue(), ranks[i].getPosition()); 285 j++; 286 } 287 } 288 IntDoublePair[] returnRanks = new IntDoublePair[j]; 289 System.arraycopy(outRanks, 0, returnRanks, 0, j); 290 return returnRanks; 291 } 292 293 /** 294 * Recodes NaN values to the given value. 295 * 296 * @param ranks array to recode 297 * @param value the value to replace NaNs with 298 */ 299 private void recodeNaNs(IntDoublePair[] ranks, double value) { 300 for (int i = 0; i < ranks.length; i++) { 301 if (Double.isNaN(ranks[i].getValue())) { 302 ranks[i] = new IntDoublePair( 303 value, ranks[i].getPosition()); 304 } 305 } 306 } 307 308 /** 309 * Checks for presence of NaNs in <code>ranks.</code> 310 * 311 * @param ranks array to be searched for NaNs 312 * @return true iff ranks contains one or more NaNs 313 */ 314 private boolean containsNaNs(IntDoublePair[] ranks) { 315 for (int i = 0; i < ranks.length; i++) { 316 if (Double.isNaN(ranks[i].getValue())) { 317 return true; 318 } 319 } 320 return false; 321 } 322 323 /** 324 * Resolve a sequence of ties, using the configured {@link TiesStrategy}. 325 * The input <code>ranks</code> array is expected to take the same value 326 * for all indices in <code>tiesTrace</code>. The common value is recoded 327 * according to the tiesStrategy. For example, if ranks = [5,8,2,6,2,7,1,2], 328 * tiesTrace = [2,4,7] and tiesStrategy is MINIMUM, ranks will be unchanged. 329 * The same array and trace with tiesStrategy AVERAGE will come out 330 * [5,8,3,6,3,7,1,3]. 331 * 332 * @param ranks array of ranks 333 * @param tiesTrace list of indices where <code>ranks</code> is constant 334 * -- that is, for any i and j in TiesTrace, <code> ranks[i] == ranks[j] 335 * </code> 336 */ 337 private void resolveTie(double[] ranks, List<Integer> tiesTrace) { 338 339 // constant value of ranks over tiesTrace 340 final double c = ranks[tiesTrace.get(0)]; 341 342 // length of sequence of tied ranks 343 final int length = tiesTrace.size(); 344 345 switch (tiesStrategy) { 346 case AVERAGE: // Replace ranks with average 347 fill(ranks, tiesTrace, (2 * c + length - 1) / 2d); 348 break; 349 case MAXIMUM: // Replace ranks with maximum values 350 fill(ranks, tiesTrace, c + length - 1); 351 break; 352 case MINIMUM: // Replace ties with minimum 353 fill(ranks, tiesTrace, c); 354 break; 355 case RANDOM: // Fill with random integral values in [c, c + length - 1] 356 Iterator<Integer> iterator = tiesTrace.iterator(); 357 long f = JdkMath.round(c); 358 final UniformLongSampler sampler = UniformLongSampler.of(random, f, f + length - 1); 359 while (iterator.hasNext()) { 360 // No advertised exception because args are guaranteed valid 361 ranks[iterator.next()] = sampler.sample(); 362 } 363 break; 364 case SEQUENTIAL: // Fill sequentially from c to c + length - 1 365 // walk and fill 366 iterator = tiesTrace.iterator(); 367 f = JdkMath.round(c); 368 int i = 0; 369 while (iterator.hasNext()) { 370 ranks[iterator.next()] = f + i++; 371 } 372 break; 373 default: // this should not happen unless TiesStrategy enum is changed 374 throw new MathInternalError(); 375 } 376 } 377 378 /** 379 * Sets<code>data[i] = value</code> for each i in <code>tiesTrace.</code> 380 * 381 * @param data array to modify 382 * @param tiesTrace list of index values to set 383 * @param value value to set 384 */ 385 private void fill(double[] data, List<Integer> tiesTrace, double value) { 386 Iterator<Integer> iterator = tiesTrace.iterator(); 387 while (iterator.hasNext()) { 388 data[iterator.next()] = value; 389 } 390 } 391 392 /** 393 * Set <code>ranks[i] = Double.NaN</code> for each i in <code>nanPositions.</code> 394 * 395 * @param ranks array to modify 396 * @param nanPositions list of index values to set to <code>Double.NaN</code> 397 */ 398 private void restoreNaNs(double[] ranks, List<Integer> nanPositions) { 399 if (nanPositions.isEmpty()) { 400 return; 401 } 402 Iterator<Integer> iterator = nanPositions.iterator(); 403 while (iterator.hasNext()) { 404 ranks[iterator.next().intValue()] = Double.NaN; 405 } 406 } 407 408 /** 409 * Returns a list of indexes where <code>ranks</code> is <code>NaN.</code> 410 * 411 * @param ranks array to search for <code>NaNs</code> 412 * @return list of indexes i such that <code>ranks[i] = NaN</code> 413 */ 414 private List<Integer> getNanPositions(IntDoublePair[] ranks) { 415 ArrayList<Integer> out = new ArrayList<>(); 416 for (int i = 0; i < ranks.length; i++) { 417 if (Double.isNaN(ranks[i].getValue())) { 418 out.add(Integer.valueOf(i)); 419 } 420 } 421 return out; 422 } 423 424 /** 425 * Represents the position of a double value in an ordering. 426 * Comparable interface is implemented so Arrays.sort can be used 427 * to sort an array of IntDoublePairs by value. Note that the 428 * implicitly defined natural ordering is NOT consistent with equals. 429 */ 430 private static class IntDoublePair implements Comparable<IntDoublePair> { 431 432 /** Value of the pair. */ 433 private final double value; 434 435 /** Original position of the pair. */ 436 private final int position; 437 438 /** 439 * Construct an IntDoublePair with the given value and position. 440 * @param value the value of the pair 441 * @param position the original position 442 */ 443 IntDoublePair(double value, int position) { 444 this.value = value; 445 this.position = position; 446 } 447 448 /** 449 * Compare this IntDoublePair to another pair. 450 * Only the <strong>values</strong> are compared. 451 * 452 * @param other the other pair to compare this to 453 * @return result of <code>Double.compare(value, other.value)</code> 454 */ 455 @Override 456 public int compareTo(IntDoublePair other) { 457 return Double.compare(value, other.value); 458 } 459 460 // N.B. equals() and hashCode() are not implemented; see MATH-610 for discussion. 461 462 /** 463 * Returns the value of the pair. 464 * @return value 465 */ 466 public double getValue() { 467 return value; 468 } 469 470 /** 471 * Returns the original position of the pair. 472 * @return position 473 */ 474 public int getPosition() { 475 return position; 476 } 477 } 478}