001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017
018package org.apache.commons.math3.stat.ranking;
019
020import java.util.ArrayList;
021import java.util.Arrays;
022import java.util.Iterator;
023import java.util.List;
024
025import org.apache.commons.math3.exception.MathInternalError;
026import org.apache.commons.math3.exception.NotANumberException;
027import org.apache.commons.math3.random.RandomDataGenerator;
028import org.apache.commons.math3.random.RandomGenerator;
029import org.apache.commons.math3.util.FastMath;
030
031
032/**
033 * <p> Ranking based on the natural ordering on doubles.</p>
034 * <p>NaNs are treated according to the configured {@link NaNStrategy} and ties
035 * are handled using the selected {@link TiesStrategy}.
036 * Configuration settings are supplied in optional constructor arguments.
037 * Defaults are {@link NaNStrategy#FAILED} and {@link TiesStrategy#AVERAGE},
038 * respectively. When using {@link TiesStrategy#RANDOM}, a
039 * {@link RandomGenerator} may be supplied as a constructor argument.</p>
040 * <p>Examples:
041 * <table border="1" cellpadding="3">
042 * <tr><th colspan="3">
043 * Input data: (20, 17, 30, 42.3, 17, 50, Double.NaN, Double.NEGATIVE_INFINITY, 17)
044 * </th></tr>
045 * <tr><th>NaNStrategy</th><th>TiesStrategy</th>
046 * <th><code>rank(data)</code></th>
047 * <tr>
048 * <td>default (NaNs maximal)</td>
049 * <td>default (ties averaged)</td>
050 * <td>(5, 3, 6, 7, 3, 8, 9, 1, 3)</td></tr>
051 * <tr>
052 * <td>default (NaNs maximal)</td>
053 * <td>MINIMUM</td>
054 * <td>(5, 2, 6, 7, 2, 8, 9, 1, 2)</td></tr>
055 * <tr>
056 * <td>MINIMAL</td>
057 * <td>default (ties averaged)</td>
058 * <td>(6, 4, 7, 8, 4, 9, 1.5, 1.5, 4)</td></tr>
059 * <tr>
060 * <td>REMOVED</td>
061 * <td>SEQUENTIAL</td>
062 * <td>(5, 2, 6, 7, 3, 8, 1, 4)</td></tr>
063 * <tr>
064 * <td>MINIMAL</td>
065 * <td>MAXIMUM</td>
066 * <td>(6, 5, 7, 8, 5, 9, 2, 2, 5)</td></tr></table></p>
067 *
068 * @since 2.0
069 */
070public class NaturalRanking implements RankingAlgorithm {
071
072    /** default NaN strategy */
073    public static final NaNStrategy DEFAULT_NAN_STRATEGY = NaNStrategy.FAILED;
074
075    /** default ties strategy */
076    public static final TiesStrategy DEFAULT_TIES_STRATEGY = TiesStrategy.AVERAGE;
077
078    /** NaN strategy - defaults to NaNs maximal */
079    private final NaNStrategy nanStrategy;
080
081    /** Ties strategy - defaults to ties averaged */
082    private final TiesStrategy tiesStrategy;
083
084    /** Source of random data - used only when ties strategy is RANDOM */
085    private final RandomDataGenerator randomData;
086
087    /**
088     * Create a NaturalRanking with default strategies for handling ties and NaNs.
089     */
090    public NaturalRanking() {
091        super();
092        tiesStrategy = DEFAULT_TIES_STRATEGY;
093        nanStrategy = DEFAULT_NAN_STRATEGY;
094        randomData = null;
095    }
096
097    /**
098     * Create a NaturalRanking with the given TiesStrategy.
099     *
100     * @param tiesStrategy the TiesStrategy to use
101     */
102    public NaturalRanking(TiesStrategy tiesStrategy) {
103        super();
104        this.tiesStrategy = tiesStrategy;
105        nanStrategy = DEFAULT_NAN_STRATEGY;
106        randomData = new RandomDataGenerator();
107    }
108
109    /**
110     * Create a NaturalRanking with the given NaNStrategy.
111     *
112     * @param nanStrategy the NaNStrategy to use
113     */
114    public NaturalRanking(NaNStrategy nanStrategy) {
115        super();
116        this.nanStrategy = nanStrategy;
117        tiesStrategy = DEFAULT_TIES_STRATEGY;
118        randomData = null;
119    }
120
121    /**
122     * Create a NaturalRanking with the given NaNStrategy and TiesStrategy.
123     *
124     * @param nanStrategy NaNStrategy to use
125     * @param tiesStrategy TiesStrategy to use
126     */
127    public NaturalRanking(NaNStrategy nanStrategy, TiesStrategy tiesStrategy) {
128        super();
129        this.nanStrategy = nanStrategy;
130        this.tiesStrategy = tiesStrategy;
131        randomData = new RandomDataGenerator();
132    }
133
134    /**
135     * Create a NaturalRanking with TiesStrategy.RANDOM and the given
136     * RandomGenerator as the source of random data.
137     *
138     * @param randomGenerator source of random data
139     */
140    public NaturalRanking(RandomGenerator randomGenerator) {
141        super();
142        this.tiesStrategy = TiesStrategy.RANDOM;
143        nanStrategy = DEFAULT_NAN_STRATEGY;
144        randomData = new RandomDataGenerator(randomGenerator);
145    }
146
147
148    /**
149     * Create a NaturalRanking with the given NaNStrategy, TiesStrategy.RANDOM
150     * and the given source of random data.
151     *
152     * @param nanStrategy NaNStrategy to use
153     * @param randomGenerator source of random data
154     */
155    public NaturalRanking(NaNStrategy nanStrategy,
156            RandomGenerator randomGenerator) {
157        super();
158        this.nanStrategy = nanStrategy;
159        this.tiesStrategy = TiesStrategy.RANDOM;
160        randomData = new RandomDataGenerator(randomGenerator);
161    }
162
163    /**
164     * Return the NaNStrategy
165     *
166     * @return returns the NaNStrategy
167     */
168    public NaNStrategy getNanStrategy() {
169        return nanStrategy;
170    }
171
172    /**
173     * Return the TiesStrategy
174     *
175     * @return the TiesStrategy
176     */
177    public TiesStrategy getTiesStrategy() {
178        return tiesStrategy;
179    }
180
181    /**
182     * Rank <code>data</code> using the natural ordering on Doubles, with
183     * NaN values handled according to <code>nanStrategy</code> and ties
184     * resolved using <code>tiesStrategy.</code>
185     *
186     * @param data array to be ranked
187     * @return array of ranks
188     * @throws NotANumberException if the selected {@link NaNStrategy} is {@code FAILED}
189     * and a {@link Double#NaN} is encountered in the input data
190     */
191    public double[] rank(double[] data) {
192
193        // Array recording initial positions of data to be ranked
194        IntDoublePair[] ranks = new IntDoublePair[data.length];
195        for (int i = 0; i < data.length; i++) {
196            ranks[i] = new IntDoublePair(data[i], i);
197        }
198
199        // Recode, remove or record positions of NaNs
200        List<Integer> nanPositions = null;
201        switch (nanStrategy) {
202            case MAXIMAL: // Replace NaNs with +INFs
203                recodeNaNs(ranks, Double.POSITIVE_INFINITY);
204                break;
205            case MINIMAL: // Replace NaNs with -INFs
206                recodeNaNs(ranks, Double.NEGATIVE_INFINITY);
207                break;
208            case REMOVED: // Drop NaNs from data
209                ranks = removeNaNs(ranks);
210                break;
211            case FIXED:   // Record positions of NaNs
212                nanPositions = getNanPositions(ranks);
213                break;
214            case FAILED:
215                nanPositions = getNanPositions(ranks);
216                if (nanPositions.size() > 0) {
217                    throw new NotANumberException();
218                }
219                break;
220            default: // this should not happen unless NaNStrategy enum is changed
221                throw new MathInternalError();
222        }
223
224        // Sort the IntDoublePairs
225        Arrays.sort(ranks);
226
227        // Walk the sorted array, filling output array using sorted positions,
228        // resolving ties as we go
229        double[] out = new double[ranks.length];
230        int pos = 1;  // position in sorted array
231        out[ranks[0].getPosition()] = pos;
232        List<Integer> tiesTrace = new ArrayList<Integer>();
233        tiesTrace.add(ranks[0].getPosition());
234        for (int i = 1; i < ranks.length; i++) {
235            if (Double.compare(ranks[i].getValue(), ranks[i - 1].getValue()) > 0) {
236                // tie sequence has ended (or had length 1)
237                pos = i + 1;
238                if (tiesTrace.size() > 1) {  // if seq is nontrivial, resolve
239                    resolveTie(out, tiesTrace);
240                }
241                tiesTrace = new ArrayList<Integer>();
242                tiesTrace.add(ranks[i].getPosition());
243            } else {
244                // tie sequence continues
245                tiesTrace.add(ranks[i].getPosition());
246            }
247            out[ranks[i].getPosition()] = pos;
248        }
249        if (tiesTrace.size() > 1) {  // handle tie sequence at end
250            resolveTie(out, tiesTrace);
251        }
252        if (nanStrategy == NaNStrategy.FIXED) {
253            restoreNaNs(out, nanPositions);
254        }
255        return out;
256    }
257
258    /**
259     * Returns an array that is a copy of the input array with IntDoublePairs
260     * having NaN values removed.
261     *
262     * @param ranks input array
263     * @return array with NaN-valued entries removed
264     */
265    private IntDoublePair[] removeNaNs(IntDoublePair[] ranks) {
266        if (!containsNaNs(ranks)) {
267            return ranks;
268        }
269        IntDoublePair[] outRanks = new IntDoublePair[ranks.length];
270        int j = 0;
271        for (int i = 0; i < ranks.length; i++) {
272            if (Double.isNaN(ranks[i].getValue())) {
273                // drop, but adjust original ranks of later elements
274                for (int k = i + 1; k < ranks.length; k++) {
275                    ranks[k] = new IntDoublePair(
276                            ranks[k].getValue(), ranks[k].getPosition() - 1);
277                }
278            } else {
279                outRanks[j] = new IntDoublePair(
280                        ranks[i].getValue(), ranks[i].getPosition());
281                j++;
282            }
283        }
284        IntDoublePair[] returnRanks = new IntDoublePair[j];
285        System.arraycopy(outRanks, 0, returnRanks, 0, j);
286        return returnRanks;
287    }
288
289    /**
290     * Recodes NaN values to the given value.
291     *
292     * @param ranks array to recode
293     * @param value the value to replace NaNs with
294     */
295    private void recodeNaNs(IntDoublePair[] ranks, double value) {
296        for (int i = 0; i < ranks.length; i++) {
297            if (Double.isNaN(ranks[i].getValue())) {
298                ranks[i] = new IntDoublePair(
299                        value, ranks[i].getPosition());
300            }
301        }
302    }
303
304    /**
305     * Checks for presence of NaNs in <code>ranks.</code>
306     *
307     * @param ranks array to be searched for NaNs
308     * @return true iff ranks contains one or more NaNs
309     */
310    private boolean containsNaNs(IntDoublePair[] ranks) {
311        for (int i = 0; i < ranks.length; i++) {
312            if (Double.isNaN(ranks[i].getValue())) {
313                return true;
314            }
315        }
316        return false;
317    }
318
319    /**
320     * Resolve a sequence of ties, using the configured {@link TiesStrategy}.
321     * The input <code>ranks</code> array is expected to take the same value
322     * for all indices in <code>tiesTrace</code>.  The common value is recoded
323     * according to the tiesStrategy. For example, if ranks = <5,8,2,6,2,7,1,2>,
324     * tiesTrace = <2,4,7> and tiesStrategy is MINIMUM, ranks will be unchanged.
325     * The same array and trace with tiesStrategy AVERAGE will come out
326     * <5,8,3,6,3,7,1,3>.
327     *
328     * @param ranks array of ranks
329     * @param tiesTrace list of indices where <code>ranks</code> is constant
330     * -- that is, for any i and j in TiesTrace, <code> ranks[i] == ranks[j]
331     * </code>
332     */
333    private void resolveTie(double[] ranks, List<Integer> tiesTrace) {
334
335        // constant value of ranks over tiesTrace
336        final double c = ranks[tiesTrace.get(0)];
337
338        // length of sequence of tied ranks
339        final int length = tiesTrace.size();
340
341        switch (tiesStrategy) {
342            case  AVERAGE:  // Replace ranks with average
343                fill(ranks, tiesTrace, (2 * c + length - 1) / 2d);
344                break;
345            case MAXIMUM:   // Replace ranks with maximum values
346                fill(ranks, tiesTrace, c + length - 1);
347                break;
348            case MINIMUM:   // Replace ties with minimum
349                fill(ranks, tiesTrace, c);
350                break;
351            case RANDOM:    // Fill with random integral values in [c, c + length - 1]
352                Iterator<Integer> iterator = tiesTrace.iterator();
353                long f = FastMath.round(c);
354                while (iterator.hasNext()) {
355                    // No advertised exception because args are guaranteed valid
356                    ranks[iterator.next()] =
357                        randomData.nextLong(f, f + length - 1);
358                }
359                break;
360            case SEQUENTIAL:  // Fill sequentially from c to c + length - 1
361                // walk and fill
362                iterator = tiesTrace.iterator();
363                f = FastMath.round(c);
364                int i = 0;
365                while (iterator.hasNext()) {
366                    ranks[iterator.next()] = f + i++;
367                }
368                break;
369            default: // this should not happen unless TiesStrategy enum is changed
370                throw new MathInternalError();
371        }
372    }
373
374    /**
375     * Sets<code>data[i] = value</code> for each i in <code>tiesTrace.</code>
376     *
377     * @param data array to modify
378     * @param tiesTrace list of index values to set
379     * @param value value to set
380     */
381    private void fill(double[] data, List<Integer> tiesTrace, double value) {
382        Iterator<Integer> iterator = tiesTrace.iterator();
383        while (iterator.hasNext()) {
384            data[iterator.next()] = value;
385        }
386    }
387
388    /**
389     * Set <code>ranks[i] = Double.NaN</code> for each i in <code>nanPositions.</code>
390     *
391     * @param ranks array to modify
392     * @param nanPositions list of index values to set to <code>Double.NaN</code>
393     */
394    private void restoreNaNs(double[] ranks, List<Integer> nanPositions) {
395        if (nanPositions.size() == 0) {
396            return;
397        }
398        Iterator<Integer> iterator = nanPositions.iterator();
399        while (iterator.hasNext()) {
400            ranks[iterator.next().intValue()] = Double.NaN;
401        }
402
403    }
404
405    /**
406     * Returns a list of indexes where <code>ranks</code> is <code>NaN.</code>
407     *
408     * @param ranks array to search for <code>NaNs</code>
409     * @return list of indexes i such that <code>ranks[i] = NaN</code>
410     */
411    private List<Integer> getNanPositions(IntDoublePair[] ranks) {
412        ArrayList<Integer> out = new ArrayList<Integer>();
413        for (int i = 0; i < ranks.length; i++) {
414            if (Double.isNaN(ranks[i].getValue())) {
415                out.add(Integer.valueOf(i));
416            }
417        }
418        return out;
419    }
420
421    /**
422     * Represents the position of a double value in an ordering.
423     * Comparable interface is implemented so Arrays.sort can be used
424     * to sort an array of IntDoublePairs by value.  Note that the
425     * implicitly defined natural ordering is NOT consistent with equals.
426     */
427    private static class IntDoublePair implements Comparable<IntDoublePair>  {
428
429        /** Value of the pair */
430        private final double value;
431
432        /** Original position of the pair */
433        private final int position;
434
435        /**
436         * Construct an IntDoublePair with the given value and position.
437         * @param value the value of the pair
438         * @param position the original position
439         */
440        IntDoublePair(double value, int position) {
441            this.value = value;
442            this.position = position;
443        }
444
445        /**
446         * Compare this IntDoublePair to another pair.
447         * Only the <strong>values</strong> are compared.
448         *
449         * @param other the other pair to compare this to
450         * @return result of <code>Double.compare(value, other.value)</code>
451         */
452        public int compareTo(IntDoublePair other) {
453            return Double.compare(value, other.value);
454        }
455
456        // N.B. equals() and hashCode() are not implemented; see MATH-610 for discussion.
457
458        /**
459         * Returns the value of the pair.
460         * @return value
461         */
462        public double getValue() {
463            return value;
464        }
465
466        /**
467         * Returns the original position of the pair.
468         * @return position
469         */
470        public int getPosition() {
471            return position;
472        }
473    }
474}