001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017
018package org.apache.commons.math4.stat.ranking;
019
020import java.util.ArrayList;
021import java.util.Arrays;
022import java.util.Iterator;
023import java.util.List;
024
025import org.apache.commons.math4.exception.MathInternalError;
026import org.apache.commons.math4.exception.NotANumberException;
027import org.apache.commons.rng.UniformRandomProvider;
028import org.apache.commons.rng.simple.RandomSource;
029import org.apache.commons.math4.random.RandomUtils;
030import org.apache.commons.math4.util.FastMath;
031
032
033/**
034 * Ranking based on the natural ordering on doubles.
035 *
036 * <p>NaNs are treated according to the configured {@link NaNStrategy} and ties
037 * are handled using the selected {@link TiesStrategy}.
038 * Configuration settings are supplied in optional constructor arguments.
039 * Defaults are {@link NaNStrategy#FAILED} and {@link TiesStrategy#AVERAGE},
040 * respectively. When using {@link TiesStrategy#RANDOM}, a
041 * {@link UniformRandomProvider random generator} may be supplied as a
042 * constructor argument.</p>
043 * <p>Examples:
044 * <table border="1" cellpadding="3" summary="Examples">
045 * <tr><th colspan="3">
046 * Input data: (20, 17, 30, 42.3, 17, 50, Double.NaN, Double.NEGATIVE_INFINITY, 17)
047 * </th></tr>
048 * <tr><th>NaNStrategy</th><th>TiesStrategy</th>
049 * <th><code>rank(data)</code></th>
050 * <tr>
051 * <td>default (NaNs maximal)</td>
052 * <td>default (ties averaged)</td>
053 * <td>(5, 3, 6, 7, 3, 8, 9, 1, 3)</td></tr>
054 * <tr>
055 * <td>default (NaNs maximal)</td>
056 * <td>MINIMUM</td>
057 * <td>(5, 2, 6, 7, 2, 8, 9, 1, 2)</td></tr>
058 * <tr>
059 * <td>MINIMAL</td>
060 * <td>default (ties averaged)</td>
061 * <td>(6, 4, 7, 8, 4, 9, 1.5, 1.5, 4)</td></tr>
062 * <tr>
063 * <td>REMOVED</td>
064 * <td>SEQUENTIAL</td>
065 * <td>(5, 2, 6, 7, 3, 8, 1, 4)</td></tr>
066 * <tr>
067 * <td>MINIMAL</td>
068 * <td>MAXIMUM</td>
069 * <td>(6, 5, 7, 8, 5, 9, 2, 2, 5)</td></tr></table>
070 *
071 * @since 2.0
072 */
073public class NaturalRanking implements RankingAlgorithm {
074
075    /** default NaN strategy */
076    public static final NaNStrategy DEFAULT_NAN_STRATEGY = NaNStrategy.FAILED;
077
078    /** default ties strategy */
079    public static final TiesStrategy DEFAULT_TIES_STRATEGY = TiesStrategy.AVERAGE;
080
081    /** NaN strategy - defaults to NaNs maximal */
082    private final NaNStrategy nanStrategy;
083
084    /** Ties strategy - defaults to ties averaged */
085    private final TiesStrategy tiesStrategy;
086
087    /** Source of random data - used only when ties strategy is RANDOM */
088    private final RandomUtils.DataGenerator randomData;
089
090    /**
091     * Create a NaturalRanking with default strategies for handling ties and NaNs.
092     */
093    public NaturalRanking() {
094        this(DEFAULT_NAN_STRATEGY, DEFAULT_TIES_STRATEGY, null);
095    }
096
097    /**
098     * Create a NaturalRanking with the given TiesStrategy.
099     *
100     * @param tiesStrategy the TiesStrategy to use
101     */
102    public NaturalRanking(TiesStrategy tiesStrategy) {
103        this(DEFAULT_NAN_STRATEGY,
104             tiesStrategy,
105             RandomSource.create(RandomSource.WELL_19937_C));
106    }
107
108    /**
109     * Create a NaturalRanking with the given NaNStrategy.
110     *
111     * @param nanStrategy the NaNStrategy to use
112     */
113    public NaturalRanking(NaNStrategy nanStrategy) {
114        this(nanStrategy, DEFAULT_TIES_STRATEGY, null);
115    }
116
117    /**
118     * Create a NaturalRanking with the given NaNStrategy and TiesStrategy.
119     *
120     * @param nanStrategy NaNStrategy to use
121     * @param tiesStrategy TiesStrategy to use
122     */
123    public NaturalRanking(NaNStrategy nanStrategy,
124                          TiesStrategy tiesStrategy) {
125        this(nanStrategy,
126             tiesStrategy,
127             RandomSource.create(RandomSource.WELL_19937_C));
128    }
129
130    /**
131     * Create a NaturalRanking with TiesStrategy.RANDOM and the given
132     * random generator as the source of random data.
133     *
134     * @param randomGenerator source of random data
135     */
136    public NaturalRanking(UniformRandomProvider randomGenerator) {
137        this(DEFAULT_NAN_STRATEGY, TiesStrategy.RANDOM, randomGenerator);
138    }
139
140    /**
141     * Create a NaturalRanking with the given NaNStrategy, TiesStrategy.RANDOM
142     * and the given source of random data.
143     *
144     * @param nanStrategy NaNStrategy to use
145     * @param randomGenerator source of random data
146     */
147    public NaturalRanking(NaNStrategy nanStrategy,
148                          UniformRandomProvider randomGenerator) {
149        this(nanStrategy, TiesStrategy.RANDOM, randomGenerator);
150    }
151
152    /**
153     * @param nanStrategy NaN strategy.
154     * @param tiesStrategy Tie strategy.
155     * @param randomGenerator RNG.
156     */
157    private NaturalRanking(NaNStrategy nanStrategy,
158                           TiesStrategy tiesStrategy,
159                           UniformRandomProvider randomGenerator) {
160        this.nanStrategy = nanStrategy;
161        this.tiesStrategy = tiesStrategy;
162        randomData = RandomUtils.createDataGenerator(randomGenerator);
163    }
164
165    /**
166     * Return the NaNStrategy
167     *
168     * @return returns the NaNStrategy
169     */
170    public NaNStrategy getNanStrategy() {
171        return nanStrategy;
172    }
173
174    /**
175     * Return the TiesStrategy
176     *
177     * @return the TiesStrategy
178     */
179    public TiesStrategy getTiesStrategy() {
180        return tiesStrategy;
181    }
182
183    /**
184     * Rank <code>data</code> using the natural ordering on Doubles, with
185     * NaN values handled according to <code>nanStrategy</code> and ties
186     * resolved using <code>tiesStrategy.</code>
187     *
188     * @param data array to be ranked
189     * @return array of ranks
190     * @throws NotANumberException if the selected {@link NaNStrategy} is {@code FAILED}
191     * and a {@link Double#NaN} is encountered in the input data
192     */
193    @Override
194    public double[] rank(double[] data) {
195
196        // Array recording initial positions of data to be ranked
197        IntDoublePair[] ranks = new IntDoublePair[data.length];
198        for (int i = 0; i < data.length; i++) {
199            ranks[i] = new IntDoublePair(data[i], i);
200        }
201
202        // Recode, remove or record positions of NaNs
203        List<Integer> nanPositions = null;
204        switch (nanStrategy) {
205            case MAXIMAL: // Replace NaNs with +INFs
206                recodeNaNs(ranks, Double.POSITIVE_INFINITY);
207                break;
208            case MINIMAL: // Replace NaNs with -INFs
209                recodeNaNs(ranks, Double.NEGATIVE_INFINITY);
210                break;
211            case REMOVED: // Drop NaNs from data
212                ranks = removeNaNs(ranks);
213                break;
214            case FIXED:   // Record positions of NaNs
215                nanPositions = getNanPositions(ranks);
216                break;
217            case FAILED:
218                nanPositions = getNanPositions(ranks);
219                if (nanPositions.size() > 0) {
220                    throw new NotANumberException();
221                }
222                break;
223            default: // this should not happen unless NaNStrategy enum is changed
224                throw new MathInternalError();
225        }
226
227        // Sort the IntDoublePairs
228        Arrays.sort(ranks);
229
230        // Walk the sorted array, filling output array using sorted positions,
231        // resolving ties as we go
232        double[] out = new double[ranks.length];
233        int pos = 1;  // position in sorted array
234        out[ranks[0].getPosition()] = pos;
235        List<Integer> tiesTrace = new ArrayList<>();
236        tiesTrace.add(ranks[0].getPosition());
237        for (int i = 1; i < ranks.length; i++) {
238            if (Double.compare(ranks[i].getValue(), ranks[i - 1].getValue()) > 0) {
239                // tie sequence has ended (or had length 1)
240                pos = i + 1;
241                if (tiesTrace.size() > 1) {  // if seq is nontrivial, resolve
242                    resolveTie(out, tiesTrace);
243                }
244                tiesTrace = new ArrayList<>();
245                tiesTrace.add(ranks[i].getPosition());
246            } else {
247                // tie sequence continues
248                tiesTrace.add(ranks[i].getPosition());
249            }
250            out[ranks[i].getPosition()] = pos;
251        }
252        if (tiesTrace.size() > 1) {  // handle tie sequence at end
253            resolveTie(out, tiesTrace);
254        }
255        if (nanStrategy == NaNStrategy.FIXED) {
256            restoreNaNs(out, nanPositions);
257        }
258        return out;
259    }
260
261    /**
262     * Returns an array that is a copy of the input array with IntDoublePairs
263     * having NaN values removed.
264     *
265     * @param ranks input array
266     * @return array with NaN-valued entries removed
267     */
268    private IntDoublePair[] removeNaNs(IntDoublePair[] ranks) {
269        if (!containsNaNs(ranks)) {
270            return ranks;
271        }
272        IntDoublePair[] outRanks = new IntDoublePair[ranks.length];
273        int j = 0;
274        for (int i = 0; i < ranks.length; i++) {
275            if (Double.isNaN(ranks[i].getValue())) {
276                // drop, but adjust original ranks of later elements
277                for (int k = i + 1; k < ranks.length; k++) {
278                    ranks[k] = new IntDoublePair(
279                            ranks[k].getValue(), ranks[k].getPosition() - 1);
280                }
281            } else {
282                outRanks[j] = new IntDoublePair(
283                        ranks[i].getValue(), ranks[i].getPosition());
284                j++;
285            }
286        }
287        IntDoublePair[] returnRanks = new IntDoublePair[j];
288        System.arraycopy(outRanks, 0, returnRanks, 0, j);
289        return returnRanks;
290    }
291
292    /**
293     * Recodes NaN values to the given value.
294     *
295     * @param ranks array to recode
296     * @param value the value to replace NaNs with
297     */
298    private void recodeNaNs(IntDoublePair[] ranks, double value) {
299        for (int i = 0; i < ranks.length; i++) {
300            if (Double.isNaN(ranks[i].getValue())) {
301                ranks[i] = new IntDoublePair(
302                        value, ranks[i].getPosition());
303            }
304        }
305    }
306
307    /**
308     * Checks for presence of NaNs in <code>ranks.</code>
309     *
310     * @param ranks array to be searched for NaNs
311     * @return true iff ranks contains one or more NaNs
312     */
313    private boolean containsNaNs(IntDoublePair[] ranks) {
314        for (int i = 0; i < ranks.length; i++) {
315            if (Double.isNaN(ranks[i].getValue())) {
316                return true;
317            }
318        }
319        return false;
320    }
321
322    /**
323     * Resolve a sequence of ties, using the configured {@link TiesStrategy}.
324     * The input <code>ranks</code> array is expected to take the same value
325     * for all indices in <code>tiesTrace</code>.  The common value is recoded
326     * according to the tiesStrategy. For example, if ranks = <5,8,2,6,2,7,1,2>,
327     * tiesTrace = <2,4,7> and tiesStrategy is MINIMUM, ranks will be unchanged.
328     * The same array and trace with tiesStrategy AVERAGE will come out
329     * <5,8,3,6,3,7,1,3>.
330     *
331     * @param ranks array of ranks
332     * @param tiesTrace list of indices where <code>ranks</code> is constant
333     * -- that is, for any i and j in TiesTrace, <code> ranks[i] == ranks[j]
334     * </code>
335     */
336    private void resolveTie(double[] ranks, List<Integer> tiesTrace) {
337
338        // constant value of ranks over tiesTrace
339        final double c = ranks[tiesTrace.get(0)];
340
341        // length of sequence of tied ranks
342        final int length = tiesTrace.size();
343
344        switch (tiesStrategy) {
345            case  AVERAGE:  // Replace ranks with average
346                fill(ranks, tiesTrace, (2 * c + length - 1) / 2d);
347                break;
348            case MAXIMUM:   // Replace ranks with maximum values
349                fill(ranks, tiesTrace, c + length - 1);
350                break;
351            case MINIMUM:   // Replace ties with minimum
352                fill(ranks, tiesTrace, c);
353                break;
354            case RANDOM:    // Fill with random integral values in [c, c + length - 1]
355                Iterator<Integer> iterator = tiesTrace.iterator();
356                long f = FastMath.round(c);
357                while (iterator.hasNext()) {
358                    // No advertised exception because args are guaranteed valid
359                    ranks[iterator.next()] =
360                        randomData.nextLong(f, f + length - 1);
361                }
362                break;
363            case SEQUENTIAL:  // Fill sequentially from c to c + length - 1
364                // walk and fill
365                iterator = tiesTrace.iterator();
366                f = FastMath.round(c);
367                int i = 0;
368                while (iterator.hasNext()) {
369                    ranks[iterator.next()] = f + i++;
370                }
371                break;
372            default: // this should not happen unless TiesStrategy enum is changed
373                throw new MathInternalError();
374        }
375    }
376
377    /**
378     * Sets<code>data[i] = value</code> for each i in <code>tiesTrace.</code>
379     *
380     * @param data array to modify
381     * @param tiesTrace list of index values to set
382     * @param value value to set
383     */
384    private void fill(double[] data, List<Integer> tiesTrace, double value) {
385        Iterator<Integer> iterator = tiesTrace.iterator();
386        while (iterator.hasNext()) {
387            data[iterator.next()] = value;
388        }
389    }
390
391    /**
392     * Set <code>ranks[i] = Double.NaN</code> for each i in <code>nanPositions.</code>
393     *
394     * @param ranks array to modify
395     * @param nanPositions list of index values to set to <code>Double.NaN</code>
396     */
397    private void restoreNaNs(double[] ranks, List<Integer> nanPositions) {
398        if (nanPositions.isEmpty()) {
399            return;
400        }
401        Iterator<Integer> iterator = nanPositions.iterator();
402        while (iterator.hasNext()) {
403            ranks[iterator.next().intValue()] = Double.NaN;
404        }
405
406    }
407
408    /**
409     * Returns a list of indexes where <code>ranks</code> is <code>NaN.</code>
410     *
411     * @param ranks array to search for <code>NaNs</code>
412     * @return list of indexes i such that <code>ranks[i] = NaN</code>
413     */
414    private List<Integer> getNanPositions(IntDoublePair[] ranks) {
415        ArrayList<Integer> out = new ArrayList<>();
416        for (int i = 0; i < ranks.length; i++) {
417            if (Double.isNaN(ranks[i].getValue())) {
418                out.add(Integer.valueOf(i));
419            }
420        }
421        return out;
422    }
423
424    /**
425     * Represents the position of a double value in an ordering.
426     * Comparable interface is implemented so Arrays.sort can be used
427     * to sort an array of IntDoublePairs by value.  Note that the
428     * implicitly defined natural ordering is NOT consistent with equals.
429     */
430    private static class IntDoublePair implements Comparable<IntDoublePair>  {
431
432        /** Value of the pair */
433        private final double value;
434
435        /** Original position of the pair */
436        private final int position;
437
438        /**
439         * Construct an IntDoublePair with the given value and position.
440         * @param value the value of the pair
441         * @param position the original position
442         */
443        IntDoublePair(double value, int position) {
444            this.value = value;
445            this.position = position;
446        }
447
448        /**
449         * Compare this IntDoublePair to another pair.
450         * Only the <strong>values</strong> are compared.
451         *
452         * @param other the other pair to compare this to
453         * @return result of <code>Double.compare(value, other.value)</code>
454         */
455        @Override
456        public int compareTo(IntDoublePair other) {
457            return Double.compare(value, other.value);
458        }
459
460        // N.B. equals() and hashCode() are not implemented; see MATH-610 for discussion.
461
462        /**
463         * Returns the value of the pair.
464         * @return value
465         */
466        public double getValue() {
467            return value;
468        }
469
470        /**
471         * Returns the original position of the pair.
472         * @return position
473         */
474        public int getPosition() {
475            return position;
476        }
477    }
478}