001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017
018package org.apache.commons.math3.stat.ranking;
019
020import java.util.ArrayList;
021import java.util.Arrays;
022import java.util.Iterator;
023import java.util.List;
024
025import org.apache.commons.math3.exception.MathInternalError;
026import org.apache.commons.math3.exception.NotANumberException;
027import org.apache.commons.math3.random.RandomDataGenerator;
028import org.apache.commons.math3.random.RandomGenerator;
029import org.apache.commons.math3.util.FastMath;
030
031
032/**
033 * <p> Ranking based on the natural ordering on doubles.</p>
034 * <p>NaNs are treated according to the configured {@link NaNStrategy} and ties
035 * are handled using the selected {@link TiesStrategy}.
036 * Configuration settings are supplied in optional constructor arguments.
037 * Defaults are {@link NaNStrategy#FAILED} and {@link TiesStrategy#AVERAGE},
038 * respectively. When using {@link TiesStrategy#RANDOM}, a
039 * {@link RandomGenerator} may be supplied as a constructor argument.</p>
040 * <p>Examples:
041 * <table border="1" cellpadding="3">
042 * <tr><th colspan="3">
043 * Input data: (20, 17, 30, 42.3, 17, 50, Double.NaN, Double.NEGATIVE_INFINITY, 17)
044 * </th></tr>
045 * <tr><th>NaNStrategy</th><th>TiesStrategy</th>
046 * <th><code>rank(data)</code></th>
047 * <tr>
048 * <td>default (NaNs maximal)</td>
049 * <td>default (ties averaged)</td>
050 * <td>(5, 3, 6, 7, 3, 8, 9, 1, 3)</td></tr>
051 * <tr>
052 * <td>default (NaNs maximal)</td>
053 * <td>MINIMUM</td>
054 * <td>(5, 2, 6, 7, 2, 8, 9, 1, 2)</td></tr>
055 * <tr>
056 * <td>MINIMAL</td>
057 * <td>default (ties averaged)</td>
058 * <td>(6, 4, 7, 8, 4, 9, 1.5, 1.5, 4)</td></tr>
059 * <tr>
060 * <td>REMOVED</td>
061 * <td>SEQUENTIAL</td>
062 * <td>(5, 2, 6, 7, 3, 8, 1, 4)</td></tr>
063 * <tr>
064 * <td>MINIMAL</td>
065 * <td>MAXIMUM</td>
066 * <td>(6, 5, 7, 8, 5, 9, 2, 2, 5)</td></tr></table></p>
067 *
068 * @since 2.0
069 * @version $Id: NaturalRanking.java 1454897 2013-03-10 19:02:54Z luc $
070 */
071public class NaturalRanking implements RankingAlgorithm {
072
073    /** default NaN strategy */
074    public static final NaNStrategy DEFAULT_NAN_STRATEGY = NaNStrategy.FAILED;
075
076    /** default ties strategy */
077    public static final TiesStrategy DEFAULT_TIES_STRATEGY = TiesStrategy.AVERAGE;
078
079    /** NaN strategy - defaults to NaNs maximal */
080    private final NaNStrategy nanStrategy;
081
082    /** Ties strategy - defaults to ties averaged */
083    private final TiesStrategy tiesStrategy;
084
085    /** Source of random data - used only when ties strategy is RANDOM */
086    private final RandomDataGenerator randomData;
087
088    /**
089     * Create a NaturalRanking with default strategies for handling ties and NaNs.
090     */
091    public NaturalRanking() {
092        super();
093        tiesStrategy = DEFAULT_TIES_STRATEGY;
094        nanStrategy = DEFAULT_NAN_STRATEGY;
095        randomData = null;
096    }
097
098    /**
099     * Create a NaturalRanking with the given TiesStrategy.
100     *
101     * @param tiesStrategy the TiesStrategy to use
102     */
103    public NaturalRanking(TiesStrategy tiesStrategy) {
104        super();
105        this.tiesStrategy = tiesStrategy;
106        nanStrategy = DEFAULT_NAN_STRATEGY;
107        randomData = new RandomDataGenerator();
108    }
109
110    /**
111     * Create a NaturalRanking with the given NaNStrategy.
112     *
113     * @param nanStrategy the NaNStrategy to use
114     */
115    public NaturalRanking(NaNStrategy nanStrategy) {
116        super();
117        this.nanStrategy = nanStrategy;
118        tiesStrategy = DEFAULT_TIES_STRATEGY;
119        randomData = null;
120    }
121
122    /**
123     * Create a NaturalRanking with the given NaNStrategy and TiesStrategy.
124     *
125     * @param nanStrategy NaNStrategy to use
126     * @param tiesStrategy TiesStrategy to use
127     */
128    public NaturalRanking(NaNStrategy nanStrategy, TiesStrategy tiesStrategy) {
129        super();
130        this.nanStrategy = nanStrategy;
131        this.tiesStrategy = tiesStrategy;
132        randomData = new RandomDataGenerator();
133    }
134
135    /**
136     * Create a NaturalRanking with TiesStrategy.RANDOM and the given
137     * RandomGenerator as the source of random data.
138     *
139     * @param randomGenerator source of random data
140     */
141    public NaturalRanking(RandomGenerator randomGenerator) {
142        super();
143        this.tiesStrategy = TiesStrategy.RANDOM;
144        nanStrategy = DEFAULT_NAN_STRATEGY;
145        randomData = new RandomDataGenerator(randomGenerator);
146    }
147
148
149    /**
150     * Create a NaturalRanking with the given NaNStrategy, TiesStrategy.RANDOM
151     * and the given source of random data.
152     *
153     * @param nanStrategy NaNStrategy to use
154     * @param randomGenerator source of random data
155     */
156    public NaturalRanking(NaNStrategy nanStrategy,
157            RandomGenerator randomGenerator) {
158        super();
159        this.nanStrategy = nanStrategy;
160        this.tiesStrategy = TiesStrategy.RANDOM;
161        randomData = new RandomDataGenerator(randomGenerator);
162    }
163
164    /**
165     * Return the NaNStrategy
166     *
167     * @return returns the NaNStrategy
168     */
169    public NaNStrategy getNanStrategy() {
170        return nanStrategy;
171    }
172
173    /**
174     * Return the TiesStrategy
175     *
176     * @return the TiesStrategy
177     */
178    public TiesStrategy getTiesStrategy() {
179        return tiesStrategy;
180    }
181
182    /**
183     * Rank <code>data</code> using the natural ordering on Doubles, with
184     * NaN values handled according to <code>nanStrategy</code> and ties
185     * resolved using <code>tiesStrategy.</code>
186     *
187     * @param data array to be ranked
188     * @return array of ranks
189     * @throws NotANumberException if the selected {@link NaNStrategy} is {@code FAILED}
190     * and a {@link Double#NaN} is encountered in the input data
191     */
192    public double[] rank(double[] data) {
193
194        // Array recording initial positions of data to be ranked
195        IntDoublePair[] ranks = new IntDoublePair[data.length];
196        for (int i = 0; i < data.length; i++) {
197            ranks[i] = new IntDoublePair(data[i], i);
198        }
199
200        // Recode, remove or record positions of NaNs
201        List<Integer> nanPositions = null;
202        switch (nanStrategy) {
203            case MAXIMAL: // Replace NaNs with +INFs
204                recodeNaNs(ranks, Double.POSITIVE_INFINITY);
205                break;
206            case MINIMAL: // Replace NaNs with -INFs
207                recodeNaNs(ranks, Double.NEGATIVE_INFINITY);
208                break;
209            case REMOVED: // Drop NaNs from data
210                ranks = removeNaNs(ranks);
211                break;
212            case FIXED:   // Record positions of NaNs
213                nanPositions = getNanPositions(ranks);
214                break;
215            case FAILED:
216                nanPositions = getNanPositions(ranks);
217                if (nanPositions.size() > 0) {
218                    throw new NotANumberException();
219                }
220                break;
221            default: // this should not happen unless NaNStrategy enum is changed
222                throw new MathInternalError();
223        }
224
225        // Sort the IntDoublePairs
226        Arrays.sort(ranks);
227
228        // Walk the sorted array, filling output array using sorted positions,
229        // resolving ties as we go
230        double[] out = new double[ranks.length];
231        int pos = 1;  // position in sorted array
232        out[ranks[0].getPosition()] = pos;
233        List<Integer> tiesTrace = new ArrayList<Integer>();
234        tiesTrace.add(ranks[0].getPosition());
235        for (int i = 1; i < ranks.length; i++) {
236            if (Double.compare(ranks[i].getValue(), ranks[i - 1].getValue()) > 0) {
237                // tie sequence has ended (or had length 1)
238                pos = i + 1;
239                if (tiesTrace.size() > 1) {  // if seq is nontrivial, resolve
240                    resolveTie(out, tiesTrace);
241                }
242                tiesTrace = new ArrayList<Integer>();
243                tiesTrace.add(ranks[i].getPosition());
244            } else {
245                // tie sequence continues
246                tiesTrace.add(ranks[i].getPosition());
247            }
248            out[ranks[i].getPosition()] = pos;
249        }
250        if (tiesTrace.size() > 1) {  // handle tie sequence at end
251            resolveTie(out, tiesTrace);
252        }
253        if (nanStrategy == NaNStrategy.FIXED) {
254            restoreNaNs(out, nanPositions);
255        }
256        return out;
257    }
258
259    /**
260     * Returns an array that is a copy of the input array with IntDoublePairs
261     * having NaN values removed.
262     *
263     * @param ranks input array
264     * @return array with NaN-valued entries removed
265     */
266    private IntDoublePair[] removeNaNs(IntDoublePair[] ranks) {
267        if (!containsNaNs(ranks)) {
268            return ranks;
269        }
270        IntDoublePair[] outRanks = new IntDoublePair[ranks.length];
271        int j = 0;
272        for (int i = 0; i < ranks.length; i++) {
273            if (Double.isNaN(ranks[i].getValue())) {
274                // drop, but adjust original ranks of later elements
275                for (int k = i + 1; k < ranks.length; k++) {
276                    ranks[k] = new IntDoublePair(
277                            ranks[k].getValue(), ranks[k].getPosition() - 1);
278                }
279            } else {
280                outRanks[j] = new IntDoublePair(
281                        ranks[i].getValue(), ranks[i].getPosition());
282                j++;
283            }
284        }
285        IntDoublePair[] returnRanks = new IntDoublePair[j];
286        System.arraycopy(outRanks, 0, returnRanks, 0, j);
287        return returnRanks;
288    }
289
290    /**
291     * Recodes NaN values to the given value.
292     *
293     * @param ranks array to recode
294     * @param value the value to replace NaNs with
295     */
296    private void recodeNaNs(IntDoublePair[] ranks, double value) {
297        for (int i = 0; i < ranks.length; i++) {
298            if (Double.isNaN(ranks[i].getValue())) {
299                ranks[i] = new IntDoublePair(
300                        value, ranks[i].getPosition());
301            }
302        }
303    }
304
305    /**
306     * Checks for presence of NaNs in <code>ranks.</code>
307     *
308     * @param ranks array to be searched for NaNs
309     * @return true iff ranks contains one or more NaNs
310     */
311    private boolean containsNaNs(IntDoublePair[] ranks) {
312        for (int i = 0; i < ranks.length; i++) {
313            if (Double.isNaN(ranks[i].getValue())) {
314                return true;
315            }
316        }
317        return false;
318    }
319
320    /**
321     * Resolve a sequence of ties, using the configured {@link TiesStrategy}.
322     * The input <code>ranks</code> array is expected to take the same value
323     * for all indices in <code>tiesTrace</code>.  The common value is recoded
324     * according to the tiesStrategy. For example, if ranks = <5,8,2,6,2,7,1,2>,
325     * tiesTrace = <2,4,7> and tiesStrategy is MINIMUM, ranks will be unchanged.
326     * The same array and trace with tiesStrategy AVERAGE will come out
327     * <5,8,3,6,3,7,1,3>.
328     *
329     * @param ranks array of ranks
330     * @param tiesTrace list of indices where <code>ranks</code> is constant
331     * -- that is, for any i and j in TiesTrace, <code> ranks[i] == ranks[j]
332     * </code>
333     */
334    private void resolveTie(double[] ranks, List<Integer> tiesTrace) {
335
336        // constant value of ranks over tiesTrace
337        final double c = ranks[tiesTrace.get(0)];
338
339        // length of sequence of tied ranks
340        final int length = tiesTrace.size();
341
342        switch (tiesStrategy) {
343            case  AVERAGE:  // Replace ranks with average
344                fill(ranks, tiesTrace, (2 * c + length - 1) / 2d);
345                break;
346            case MAXIMUM:   // Replace ranks with maximum values
347                fill(ranks, tiesTrace, c + length - 1);
348                break;
349            case MINIMUM:   // Replace ties with minimum
350                fill(ranks, tiesTrace, c);
351                break;
352            case RANDOM:    // Fill with random integral values in [c, c + length - 1]
353                Iterator<Integer> iterator = tiesTrace.iterator();
354                long f = FastMath.round(c);
355                while (iterator.hasNext()) {
356                    // No advertised exception because args are guaranteed valid
357                    ranks[iterator.next()] =
358                        randomData.nextLong(f, f + length - 1);
359                }
360                break;
361            case SEQUENTIAL:  // Fill sequentially from c to c + length - 1
362                // walk and fill
363                iterator = tiesTrace.iterator();
364                f = FastMath.round(c);
365                int i = 0;
366                while (iterator.hasNext()) {
367                    ranks[iterator.next()] = f + i++;
368                }
369                break;
370            default: // this should not happen unless TiesStrategy enum is changed
371                throw new MathInternalError();
372        }
373    }
374
375    /**
376     * Sets<code>data[i] = value</code> for each i in <code>tiesTrace.</code>
377     *
378     * @param data array to modify
379     * @param tiesTrace list of index values to set
380     * @param value value to set
381     */
382    private void fill(double[] data, List<Integer> tiesTrace, double value) {
383        Iterator<Integer> iterator = tiesTrace.iterator();
384        while (iterator.hasNext()) {
385            data[iterator.next()] = value;
386        }
387    }
388
389    /**
390     * Set <code>ranks[i] = Double.NaN</code> for each i in <code>nanPositions.</code>
391     *
392     * @param ranks array to modify
393     * @param nanPositions list of index values to set to <code>Double.NaN</code>
394     */
395    private void restoreNaNs(double[] ranks, List<Integer> nanPositions) {
396        if (nanPositions.size() == 0) {
397            return;
398        }
399        Iterator<Integer> iterator = nanPositions.iterator();
400        while (iterator.hasNext()) {
401            ranks[iterator.next().intValue()] = Double.NaN;
402        }
403
404    }
405
406    /**
407     * Returns a list of indexes where <code>ranks</code> is <code>NaN.</code>
408     *
409     * @param ranks array to search for <code>NaNs</code>
410     * @return list of indexes i such that <code>ranks[i] = NaN</code>
411     */
412    private List<Integer> getNanPositions(IntDoublePair[] ranks) {
413        ArrayList<Integer> out = new ArrayList<Integer>();
414        for (int i = 0; i < ranks.length; i++) {
415            if (Double.isNaN(ranks[i].getValue())) {
416                out.add(Integer.valueOf(i));
417            }
418        }
419        return out;
420    }
421
422    /**
423     * Represents the position of a double value in an ordering.
424     * Comparable interface is implemented so Arrays.sort can be used
425     * to sort an array of IntDoublePairs by value.  Note that the
426     * implicitly defined natural ordering is NOT consistent with equals.
427     */
428    private static class IntDoublePair implements Comparable<IntDoublePair>  {
429
430        /** Value of the pair */
431        private final double value;
432
433        /** Original position of the pair */
434        private final int position;
435
436        /**
437         * Construct an IntDoublePair with the given value and position.
438         * @param value the value of the pair
439         * @param position the original position
440         */
441        public IntDoublePair(double value, int position) {
442            this.value = value;
443            this.position = position;
444        }
445
446        /**
447         * Compare this IntDoublePair to another pair.
448         * Only the <strong>values</strong> are compared.
449         *
450         * @param other the other pair to compare this to
451         * @return result of <code>Double.compare(value, other.value)</code>
452         */
453        public int compareTo(IntDoublePair other) {
454            return Double.compare(value, other.value);
455        }
456
457        // N.B. equals() and hashCode() are not implemented; see MATH-610 for discussion.
458
459        /**
460         * Returns the value of the pair.
461         * @return value
462         */
463        public double getValue() {
464            return value;
465        }
466
467        /**
468         * Returns the original position of the pair.
469         * @return position
470         */
471        public int getPosition() {
472            return position;
473        }
474    }
475}