View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  
18  package org.apache.commons.math4.legacy.stat.ranking;
19  
20  import java.util.ArrayList;
21  import java.util.Arrays;
22  import java.util.Iterator;
23  import java.util.List;
24  
25  import org.apache.commons.rng.UniformRandomProvider;
26  import org.apache.commons.rng.simple.RandomSource;
27  import org.apache.commons.rng.sampling.distribution.UniformLongSampler;
28  import org.apache.commons.math4.legacy.exception.MathInternalError;
29  import org.apache.commons.math4.legacy.exception.NotANumberException;
30  import org.apache.commons.math4.core.jdkmath.JdkMath;
31  
32  
33  /**
34   * Ranking based on the natural ordering on doubles.
35   *
36   * <p>NaNs are treated according to the configured {@link NaNStrategy} and ties
37   * are handled using the selected {@link TiesStrategy}.
38   * Configuration settings are supplied in optional constructor arguments.
39   * Defaults are {@link NaNStrategy#FAILED} and {@link TiesStrategy#AVERAGE},
40   * respectively. When using {@link TiesStrategy#RANDOM}, a
41   * {@link UniformRandomProvider random generator} may be supplied as a
42   * constructor argument.</p>
43   * <p>Examples:
44   * <table border="">
45   * <caption>Examples</caption>
46   * <tr><th colspan="3">
47   * Input data: (20, 17, 30, 42.3, 17, 50, Double.NaN, Double.NEGATIVE_INFINITY, 17)
48   * </th></tr>
49   * <tr><th>NaNStrategy</th><th>TiesStrategy</th>
50   * <th><code>rank(data)</code></th>
51   * <tr>
52   * <td>default (NaNs maximal)</td>
53   * <td>default (ties averaged)</td>
54   * <td>(5, 3, 6, 7, 3, 8, 9, 1, 3)</td></tr>
55   * <tr>
56   * <td>default (NaNs maximal)</td>
57   * <td>MINIMUM</td>
58   * <td>(5, 2, 6, 7, 2, 8, 9, 1, 2)</td></tr>
59   * <tr>
60   * <td>MINIMAL</td>
61   * <td>default (ties averaged)</td>
62   * <td>(6, 4, 7, 8, 4, 9, 1.5, 1.5, 4)</td></tr>
63   * <tr>
64   * <td>REMOVED</td>
65   * <td>SEQUENTIAL</td>
66   * <td>(5, 2, 6, 7, 3, 8, 1, 4)</td></tr>
67   * <tr>
68   * <td>MINIMAL</td>
69   * <td>MAXIMUM</td>
70   * <td>(6, 5, 7, 8, 5, 9, 2, 2, 5)</td></tr></table>
71   *
72   * @since 2.0
73   */
74  public class NaturalRanking implements RankingAlgorithm {
75  
76      /** default NaN strategy. */
77      public static final NaNStrategy DEFAULT_NAN_STRATEGY = NaNStrategy.FAILED;
78  
79      /** default ties strategy. */
80      public static final TiesStrategy DEFAULT_TIES_STRATEGY = TiesStrategy.AVERAGE;
81  
82      /** NaN strategy - defaults to NaNs maximal. */
83      private final NaNStrategy nanStrategy;
84  
85      /** Ties strategy - defaults to ties averaged. */
86      private final TiesStrategy tiesStrategy;
87  
88      /** Source of random data - used only when ties strategy is RANDOM. */
89      private final UniformRandomProvider random;
90  
91      /**
92       * Create a NaturalRanking with default strategies for handling ties and NaNs.
93       */
94      public NaturalRanking() {
95          this(DEFAULT_NAN_STRATEGY, DEFAULT_TIES_STRATEGY, null);
96      }
97  
98      /**
99       * Create a NaturalRanking with the given TiesStrategy.
100      *
101      * @param tiesStrategy the TiesStrategy to use
102      */
103     public NaturalRanking(TiesStrategy tiesStrategy) {
104         this(DEFAULT_NAN_STRATEGY,
105              tiesStrategy,
106              RandomSource.WELL_19937_C.create());
107     }
108 
109     /**
110      * Create a NaturalRanking with the given NaNStrategy.
111      *
112      * @param nanStrategy the NaNStrategy to use
113      */
114     public NaturalRanking(NaNStrategy nanStrategy) {
115         this(nanStrategy, DEFAULT_TIES_STRATEGY, null);
116     }
117 
118     /**
119      * Create a NaturalRanking with the given NaNStrategy and TiesStrategy.
120      *
121      * @param nanStrategy NaNStrategy to use
122      * @param tiesStrategy TiesStrategy to use
123      */
124     public NaturalRanking(NaNStrategy nanStrategy,
125                           TiesStrategy tiesStrategy) {
126         this(nanStrategy,
127              tiesStrategy,
128              RandomSource.WELL_19937_C.create());
129     }
130 
131     /**
132      * Create a NaturalRanking with TiesStrategy.RANDOM and the given
133      * random generator as the source of random data.
134      *
135      * @param randomGenerator source of random data
136      */
137     public NaturalRanking(UniformRandomProvider randomGenerator) {
138         this(DEFAULT_NAN_STRATEGY, TiesStrategy.RANDOM, randomGenerator);
139     }
140 
141     /**
142      * Create a NaturalRanking with the given NaNStrategy, TiesStrategy.RANDOM
143      * and the given source of random data.
144      *
145      * @param nanStrategy NaNStrategy to use
146      * @param randomGenerator source of random data
147      */
148     public NaturalRanking(NaNStrategy nanStrategy,
149                           UniformRandomProvider randomGenerator) {
150         this(nanStrategy, TiesStrategy.RANDOM, randomGenerator);
151     }
152 
153     /**
154      * @param nanStrategy NaN strategy.
155      * @param tiesStrategy Tie strategy.
156      * @param random RNG.
157      */
158     private NaturalRanking(NaNStrategy nanStrategy,
159                            TiesStrategy tiesStrategy,
160                            UniformRandomProvider random) {
161         this.nanStrategy = nanStrategy;
162         this.tiesStrategy = tiesStrategy;
163         this.random = random;
164     }
165 
166     /**
167      * Return the NaNStrategy.
168      *
169      * @return returns the NaNStrategy
170      */
171     public NaNStrategy getNanStrategy() {
172         return nanStrategy;
173     }
174 
175     /**
176      * Return the TiesStrategy.
177      *
178      * @return the TiesStrategy
179      */
180     public TiesStrategy getTiesStrategy() {
181         return tiesStrategy;
182     }
183 
184     /**
185      * Rank <code>data</code> using the natural ordering on Doubles, with
186      * NaN values handled according to <code>nanStrategy</code> and ties
187      * resolved using <code>tiesStrategy.</code>
188      *
189      * @param data array to be ranked
190      * @return array of ranks
191      * @throws NotANumberException if the selected {@link NaNStrategy} is {@code FAILED}
192      * and a {@link Double#NaN} is encountered in the input data
193      */
194     @Override
195     public double[] rank(double[] data) {
196 
197         // Array recording initial positions of data to be ranked
198         IntDoublePair[] ranks = new IntDoublePair[data.length];
199         for (int i = 0; i < data.length; i++) {
200             ranks[i] = new IntDoublePair(data[i], i);
201         }
202 
203         // Recode, remove or record positions of NaNs
204         List<Integer> nanPositions = null;
205         switch (nanStrategy) {
206             case MAXIMAL: // Replace NaNs with +INFs
207                 recodeNaNs(ranks, Double.POSITIVE_INFINITY);
208                 break;
209             case MINIMAL: // Replace NaNs with -INFs
210                 recodeNaNs(ranks, Double.NEGATIVE_INFINITY);
211                 break;
212             case REMOVED: // Drop NaNs from data
213                 ranks = removeNaNs(ranks);
214                 break;
215             case FIXED:   // Record positions of NaNs
216                 nanPositions = getNanPositions(ranks);
217                 break;
218             case FAILED:
219                 nanPositions = getNanPositions(ranks);
220                 if (nanPositions.size() > 0) {
221                     throw new NotANumberException();
222                 }
223                 break;
224             default: // this should not happen unless NaNStrategy enum is changed
225                 throw new MathInternalError();
226         }
227 
228         // Sort the IntDoublePairs
229         Arrays.sort(ranks);
230 
231         // Walk the sorted array, filling output array using sorted positions,
232         // resolving ties as we go
233         double[] out = new double[ranks.length];
234         int pos = 1;  // position in sorted array
235         out[ranks[0].getPosition()] = pos;
236         List<Integer> tiesTrace = new ArrayList<>();
237         tiesTrace.add(ranks[0].getPosition());
238         for (int i = 1; i < ranks.length; i++) {
239             if (Double.compare(ranks[i].getValue(), ranks[i - 1].getValue()) > 0) {
240                 // tie sequence has ended (or had length 1)
241                 pos = i + 1;
242                 if (tiesTrace.size() > 1) {  // if seq is nontrivial, resolve
243                     resolveTie(out, tiesTrace);
244                 }
245                 tiesTrace = new ArrayList<>();
246                 tiesTrace.add(ranks[i].getPosition());
247             } else {
248                 // tie sequence continues
249                 tiesTrace.add(ranks[i].getPosition());
250             }
251             out[ranks[i].getPosition()] = pos;
252         }
253         if (tiesTrace.size() > 1) {  // handle tie sequence at end
254             resolveTie(out, tiesTrace);
255         }
256         if (nanStrategy == NaNStrategy.FIXED) {
257             restoreNaNs(out, nanPositions);
258         }
259         return out;
260     }
261 
262     /**
263      * Returns an array that is a copy of the input array with IntDoublePairs
264      * having NaN values removed.
265      *
266      * @param ranks input array
267      * @return array with NaN-valued entries removed
268      */
269     private IntDoublePair[] removeNaNs(IntDoublePair[] ranks) {
270         if (!containsNaNs(ranks)) {
271             return ranks;
272         }
273         IntDoublePair[] outRanks = new IntDoublePair[ranks.length];
274         int j = 0;
275         for (int i = 0; i < ranks.length; i++) {
276             if (Double.isNaN(ranks[i].getValue())) {
277                 // drop, but adjust original ranks of later elements
278                 for (int k = i + 1; k < ranks.length; k++) {
279                     ranks[k] = new IntDoublePair(
280                             ranks[k].getValue(), ranks[k].getPosition() - 1);
281                 }
282             } else {
283                 outRanks[j] = new IntDoublePair(
284                         ranks[i].getValue(), ranks[i].getPosition());
285                 j++;
286             }
287         }
288         IntDoublePair[] returnRanks = new IntDoublePair[j];
289         System.arraycopy(outRanks, 0, returnRanks, 0, j);
290         return returnRanks;
291     }
292 
293     /**
294      * Recodes NaN values to the given value.
295      *
296      * @param ranks array to recode
297      * @param value the value to replace NaNs with
298      */
299     private void recodeNaNs(IntDoublePair[] ranks, double value) {
300         for (int i = 0; i < ranks.length; i++) {
301             if (Double.isNaN(ranks[i].getValue())) {
302                 ranks[i] = new IntDoublePair(
303                         value, ranks[i].getPosition());
304             }
305         }
306     }
307 
308     /**
309      * Checks for presence of NaNs in <code>ranks.</code>
310      *
311      * @param ranks array to be searched for NaNs
312      * @return true iff ranks contains one or more NaNs
313      */
314     private boolean containsNaNs(IntDoublePair[] ranks) {
315         for (int i = 0; i < ranks.length; i++) {
316             if (Double.isNaN(ranks[i].getValue())) {
317                 return true;
318             }
319         }
320         return false;
321     }
322 
323     /**
324      * Resolve a sequence of ties, using the configured {@link TiesStrategy}.
325      * The input <code>ranks</code> array is expected to take the same value
326      * for all indices in <code>tiesTrace</code>.  The common value is recoded
327      * according to the tiesStrategy. For example, if ranks = [5,8,2,6,2,7,1,2],
328      * tiesTrace = [2,4,7] and tiesStrategy is MINIMUM, ranks will be unchanged.
329      * The same array and trace with tiesStrategy AVERAGE will come out
330      * [5,8,3,6,3,7,1,3].
331      *
332      * @param ranks array of ranks
333      * @param tiesTrace list of indices where <code>ranks</code> is constant
334      * -- that is, for any i and j in TiesTrace, <code> ranks[i] == ranks[j]
335      * </code>
336      */
337     private void resolveTie(double[] ranks, List<Integer> tiesTrace) {
338 
339         // constant value of ranks over tiesTrace
340         final double c = ranks[tiesTrace.get(0)];
341 
342         // length of sequence of tied ranks
343         final int length = tiesTrace.size();
344 
345         switch (tiesStrategy) {
346             case  AVERAGE:  // Replace ranks with average
347                 fill(ranks, tiesTrace, (2 * c + length - 1) / 2d);
348                 break;
349             case MAXIMUM:   // Replace ranks with maximum values
350                 fill(ranks, tiesTrace, c + length - 1);
351                 break;
352             case MINIMUM:   // Replace ties with minimum
353                 fill(ranks, tiesTrace, c);
354                 break;
355             case RANDOM:    // Fill with random integral values in [c, c + length - 1]
356                 Iterator<Integer> iterator = tiesTrace.iterator();
357                 long f = JdkMath.round(c);
358                 final UniformLongSampler sampler = UniformLongSampler.of(random, f, f + length - 1);
359                 while (iterator.hasNext()) {
360                     // No advertised exception because args are guaranteed valid
361                     ranks[iterator.next()] = sampler.sample();
362                 }
363                 break;
364             case SEQUENTIAL:  // Fill sequentially from c to c + length - 1
365                 // walk and fill
366                 iterator = tiesTrace.iterator();
367                 f = JdkMath.round(c);
368                 int i = 0;
369                 while (iterator.hasNext()) {
370                     ranks[iterator.next()] = f + i++;
371                 }
372                 break;
373             default: // this should not happen unless TiesStrategy enum is changed
374                 throw new MathInternalError();
375         }
376     }
377 
378     /**
379      * Sets<code>data[i] = value</code> for each i in <code>tiesTrace.</code>
380      *
381      * @param data array to modify
382      * @param tiesTrace list of index values to set
383      * @param value value to set
384      */
385     private void fill(double[] data, List<Integer> tiesTrace, double value) {
386         Iterator<Integer> iterator = tiesTrace.iterator();
387         while (iterator.hasNext()) {
388             data[iterator.next()] = value;
389         }
390     }
391 
392     /**
393      * Set <code>ranks[i] = Double.NaN</code> for each i in <code>nanPositions.</code>
394      *
395      * @param ranks array to modify
396      * @param nanPositions list of index values to set to <code>Double.NaN</code>
397      */
398     private void restoreNaNs(double[] ranks, List<Integer> nanPositions) {
399         if (nanPositions.isEmpty()) {
400             return;
401         }
402         Iterator<Integer> iterator = nanPositions.iterator();
403         while (iterator.hasNext()) {
404             ranks[iterator.next().intValue()] = Double.NaN;
405         }
406     }
407 
408     /**
409      * Returns a list of indexes where <code>ranks</code> is <code>NaN.</code>
410      *
411      * @param ranks array to search for <code>NaNs</code>
412      * @return list of indexes i such that <code>ranks[i] = NaN</code>
413      */
414     private List<Integer> getNanPositions(IntDoublePair[] ranks) {
415         ArrayList<Integer> out = new ArrayList<>();
416         for (int i = 0; i < ranks.length; i++) {
417             if (Double.isNaN(ranks[i].getValue())) {
418                 out.add(Integer.valueOf(i));
419             }
420         }
421         return out;
422     }
423 
424     /**
425      * Represents the position of a double value in an ordering.
426      * Comparable interface is implemented so Arrays.sort can be used
427      * to sort an array of IntDoublePairs by value.  Note that the
428      * implicitly defined natural ordering is NOT consistent with equals.
429      */
430     private static final class IntDoublePair implements Comparable<IntDoublePair>  {
431 
432         /** Value of the pair. */
433         private final double value;
434 
435         /** Original position of the pair. */
436         private final int position;
437 
438         /**
439          * Construct an IntDoublePair with the given value and position.
440          * @param value the value of the pair
441          * @param position the original position
442          */
443         IntDoublePair(double value, int position) {
444             this.value = value;
445             this.position = position;
446         }
447 
448         /**
449          * Compare this IntDoublePair to another pair.
450          * Only the <strong>values</strong> are compared.
451          *
452          * @param other the other pair to compare this to
453          * @return result of <code>Double.compare(value, other.value)</code>
454          */
455         @Override
456         public int compareTo(IntDoublePair other) {
457             return Double.compare(value, other.value);
458         }
459 
460         // N.B. equals() and hashCode() are not implemented; see MATH-610 for discussion.
461 
462         /**
463          * Returns the value of the pair.
464          * @return value
465          */
466         public double getValue() {
467             return value;
468         }
469 
470         /**
471          * Returns the original position of the pair.
472          * @return position
473          */
474         public int getPosition() {
475             return position;
476         }
477     }
478 }