001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017
018package org.apache.commons.math4.legacy.stat.ranking;
019
020import java.util.ArrayList;
021import java.util.Arrays;
022import java.util.Iterator;
023import java.util.List;
024
025import org.apache.commons.rng.UniformRandomProvider;
026import org.apache.commons.rng.simple.RandomSource;
027import org.apache.commons.rng.sampling.distribution.UniformLongSampler;
028import org.apache.commons.math4.legacy.exception.MathInternalError;
029import org.apache.commons.math4.legacy.exception.NotANumberException;
030import org.apache.commons.math4.core.jdkmath.JdkMath;
031
032
033/**
034 * Ranking based on the natural ordering on doubles.
035 *
036 * <p>NaNs are treated according to the configured {@link NaNStrategy} and ties
037 * are handled using the selected {@link TiesStrategy}.
038 * Configuration settings are supplied in optional constructor arguments.
039 * Defaults are {@link NaNStrategy#FAILED} and {@link TiesStrategy#AVERAGE},
040 * respectively. When using {@link TiesStrategy#RANDOM}, a
041 * {@link UniformRandomProvider random generator} may be supplied as a
042 * constructor argument.</p>
043 * <p>Examples:
044 * <table border="">
045 * <caption>Examples</caption>
046 * <tr><th colspan="3">
047 * Input data: (20, 17, 30, 42.3, 17, 50, Double.NaN, Double.NEGATIVE_INFINITY, 17)
048 * </th></tr>
049 * <tr><th>NaNStrategy</th><th>TiesStrategy</th>
050 * <th><code>rank(data)</code></th>
051 * <tr>
052 * <td>default (NaNs maximal)</td>
053 * <td>default (ties averaged)</td>
054 * <td>(5, 3, 6, 7, 3, 8, 9, 1, 3)</td></tr>
055 * <tr>
056 * <td>default (NaNs maximal)</td>
057 * <td>MINIMUM</td>
058 * <td>(5, 2, 6, 7, 2, 8, 9, 1, 2)</td></tr>
059 * <tr>
060 * <td>MINIMAL</td>
061 * <td>default (ties averaged)</td>
062 * <td>(6, 4, 7, 8, 4, 9, 1.5, 1.5, 4)</td></tr>
063 * <tr>
064 * <td>REMOVED</td>
065 * <td>SEQUENTIAL</td>
066 * <td>(5, 2, 6, 7, 3, 8, 1, 4)</td></tr>
067 * <tr>
068 * <td>MINIMAL</td>
069 * <td>MAXIMUM</td>
070 * <td>(6, 5, 7, 8, 5, 9, 2, 2, 5)</td></tr></table>
071 *
072 * @since 2.0
073 */
074public class NaturalRanking implements RankingAlgorithm {
075
076    /** default NaN strategy. */
077    public static final NaNStrategy DEFAULT_NAN_STRATEGY = NaNStrategy.FAILED;
078
079    /** default ties strategy. */
080    public static final TiesStrategy DEFAULT_TIES_STRATEGY = TiesStrategy.AVERAGE;
081
082    /** NaN strategy - defaults to NaNs maximal. */
083    private final NaNStrategy nanStrategy;
084
085    /** Ties strategy - defaults to ties averaged. */
086    private final TiesStrategy tiesStrategy;
087
088    /** Source of random data - used only when ties strategy is RANDOM. */
089    private final UniformRandomProvider random;
090
091    /**
092     * Create a NaturalRanking with default strategies for handling ties and NaNs.
093     */
094    public NaturalRanking() {
095        this(DEFAULT_NAN_STRATEGY, DEFAULT_TIES_STRATEGY, null);
096    }
097
098    /**
099     * Create a NaturalRanking with the given TiesStrategy.
100     *
101     * @param tiesStrategy the TiesStrategy to use
102     */
103    public NaturalRanking(TiesStrategy tiesStrategy) {
104        this(DEFAULT_NAN_STRATEGY,
105             tiesStrategy,
106             RandomSource.WELL_19937_C.create());
107    }
108
109    /**
110     * Create a NaturalRanking with the given NaNStrategy.
111     *
112     * @param nanStrategy the NaNStrategy to use
113     */
114    public NaturalRanking(NaNStrategy nanStrategy) {
115        this(nanStrategy, DEFAULT_TIES_STRATEGY, null);
116    }
117
118    /**
119     * Create a NaturalRanking with the given NaNStrategy and TiesStrategy.
120     *
121     * @param nanStrategy NaNStrategy to use
122     * @param tiesStrategy TiesStrategy to use
123     */
124    public NaturalRanking(NaNStrategy nanStrategy,
125                          TiesStrategy tiesStrategy) {
126        this(nanStrategy,
127             tiesStrategy,
128             RandomSource.WELL_19937_C.create());
129    }
130
131    /**
132     * Create a NaturalRanking with TiesStrategy.RANDOM and the given
133     * random generator as the source of random data.
134     *
135     * @param randomGenerator source of random data
136     */
137    public NaturalRanking(UniformRandomProvider randomGenerator) {
138        this(DEFAULT_NAN_STRATEGY, TiesStrategy.RANDOM, randomGenerator);
139    }
140
141    /**
142     * Create a NaturalRanking with the given NaNStrategy, TiesStrategy.RANDOM
143     * and the given source of random data.
144     *
145     * @param nanStrategy NaNStrategy to use
146     * @param randomGenerator source of random data
147     */
148    public NaturalRanking(NaNStrategy nanStrategy,
149                          UniformRandomProvider randomGenerator) {
150        this(nanStrategy, TiesStrategy.RANDOM, randomGenerator);
151    }
152
153    /**
154     * @param nanStrategy NaN strategy.
155     * @param tiesStrategy Tie strategy.
156     * @param random RNG.
157     */
158    private NaturalRanking(NaNStrategy nanStrategy,
159                           TiesStrategy tiesStrategy,
160                           UniformRandomProvider random) {
161        this.nanStrategy = nanStrategy;
162        this.tiesStrategy = tiesStrategy;
163        this.random = random;
164    }
165
166    /**
167     * Return the NaNStrategy.
168     *
169     * @return returns the NaNStrategy
170     */
171    public NaNStrategy getNanStrategy() {
172        return nanStrategy;
173    }
174
175    /**
176     * Return the TiesStrategy.
177     *
178     * @return the TiesStrategy
179     */
180    public TiesStrategy getTiesStrategy() {
181        return tiesStrategy;
182    }
183
184    /**
185     * Rank <code>data</code> using the natural ordering on Doubles, with
186     * NaN values handled according to <code>nanStrategy</code> and ties
187     * resolved using <code>tiesStrategy.</code>
188     *
189     * @param data array to be ranked
190     * @return array of ranks
191     * @throws NotANumberException if the selected {@link NaNStrategy} is {@code FAILED}
192     * and a {@link Double#NaN} is encountered in the input data
193     */
194    @Override
195    public double[] rank(double[] data) {
196
197        // Array recording initial positions of data to be ranked
198        IntDoublePair[] ranks = new IntDoublePair[data.length];
199        for (int i = 0; i < data.length; i++) {
200            ranks[i] = new IntDoublePair(data[i], i);
201        }
202
203        // Recode, remove or record positions of NaNs
204        List<Integer> nanPositions = null;
205        switch (nanStrategy) {
206            case MAXIMAL: // Replace NaNs with +INFs
207                recodeNaNs(ranks, Double.POSITIVE_INFINITY);
208                break;
209            case MINIMAL: // Replace NaNs with -INFs
210                recodeNaNs(ranks, Double.NEGATIVE_INFINITY);
211                break;
212            case REMOVED: // Drop NaNs from data
213                ranks = removeNaNs(ranks);
214                break;
215            case FIXED:   // Record positions of NaNs
216                nanPositions = getNanPositions(ranks);
217                break;
218            case FAILED:
219                nanPositions = getNanPositions(ranks);
220                if (nanPositions.size() > 0) {
221                    throw new NotANumberException();
222                }
223                break;
224            default: // this should not happen unless NaNStrategy enum is changed
225                throw new MathInternalError();
226        }
227
228        // Sort the IntDoublePairs
229        Arrays.sort(ranks);
230
231        // Walk the sorted array, filling output array using sorted positions,
232        // resolving ties as we go
233        double[] out = new double[ranks.length];
234        int pos = 1;  // position in sorted array
235        out[ranks[0].getPosition()] = pos;
236        List<Integer> tiesTrace = new ArrayList<>();
237        tiesTrace.add(ranks[0].getPosition());
238        for (int i = 1; i < ranks.length; i++) {
239            if (Double.compare(ranks[i].getValue(), ranks[i - 1].getValue()) > 0) {
240                // tie sequence has ended (or had length 1)
241                pos = i + 1;
242                if (tiesTrace.size() > 1) {  // if seq is nontrivial, resolve
243                    resolveTie(out, tiesTrace);
244                }
245                tiesTrace = new ArrayList<>();
246                tiesTrace.add(ranks[i].getPosition());
247            } else {
248                // tie sequence continues
249                tiesTrace.add(ranks[i].getPosition());
250            }
251            out[ranks[i].getPosition()] = pos;
252        }
253        if (tiesTrace.size() > 1) {  // handle tie sequence at end
254            resolveTie(out, tiesTrace);
255        }
256        if (nanStrategy == NaNStrategy.FIXED) {
257            restoreNaNs(out, nanPositions);
258        }
259        return out;
260    }
261
262    /**
263     * Returns an array that is a copy of the input array with IntDoublePairs
264     * having NaN values removed.
265     *
266     * @param ranks input array
267     * @return array with NaN-valued entries removed
268     */
269    private IntDoublePair[] removeNaNs(IntDoublePair[] ranks) {
270        if (!containsNaNs(ranks)) {
271            return ranks;
272        }
273        IntDoublePair[] outRanks = new IntDoublePair[ranks.length];
274        int j = 0;
275        for (int i = 0; i < ranks.length; i++) {
276            if (Double.isNaN(ranks[i].getValue())) {
277                // drop, but adjust original ranks of later elements
278                for (int k = i + 1; k < ranks.length; k++) {
279                    ranks[k] = new IntDoublePair(
280                            ranks[k].getValue(), ranks[k].getPosition() - 1);
281                }
282            } else {
283                outRanks[j] = new IntDoublePair(
284                        ranks[i].getValue(), ranks[i].getPosition());
285                j++;
286            }
287        }
288        IntDoublePair[] returnRanks = new IntDoublePair[j];
289        System.arraycopy(outRanks, 0, returnRanks, 0, j);
290        return returnRanks;
291    }
292
293    /**
294     * Recodes NaN values to the given value.
295     *
296     * @param ranks array to recode
297     * @param value the value to replace NaNs with
298     */
299    private void recodeNaNs(IntDoublePair[] ranks, double value) {
300        for (int i = 0; i < ranks.length; i++) {
301            if (Double.isNaN(ranks[i].getValue())) {
302                ranks[i] = new IntDoublePair(
303                        value, ranks[i].getPosition());
304            }
305        }
306    }
307
308    /**
309     * Checks for presence of NaNs in <code>ranks.</code>
310     *
311     * @param ranks array to be searched for NaNs
312     * @return true iff ranks contains one or more NaNs
313     */
314    private boolean containsNaNs(IntDoublePair[] ranks) {
315        for (int i = 0; i < ranks.length; i++) {
316            if (Double.isNaN(ranks[i].getValue())) {
317                return true;
318            }
319        }
320        return false;
321    }
322
323    /**
324     * Resolve a sequence of ties, using the configured {@link TiesStrategy}.
325     * The input <code>ranks</code> array is expected to take the same value
326     * for all indices in <code>tiesTrace</code>.  The common value is recoded
327     * according to the tiesStrategy. For example, if ranks = [5,8,2,6,2,7,1,2],
328     * tiesTrace = [2,4,7] and tiesStrategy is MINIMUM, ranks will be unchanged.
329     * The same array and trace with tiesStrategy AVERAGE will come out
330     * [5,8,3,6,3,7,1,3].
331     *
332     * @param ranks array of ranks
333     * @param tiesTrace list of indices where <code>ranks</code> is constant
334     * -- that is, for any i and j in TiesTrace, <code> ranks[i] == ranks[j]
335     * </code>
336     */
337    private void resolveTie(double[] ranks, List<Integer> tiesTrace) {
338
339        // constant value of ranks over tiesTrace
340        final double c = ranks[tiesTrace.get(0)];
341
342        // length of sequence of tied ranks
343        final int length = tiesTrace.size();
344
345        switch (tiesStrategy) {
346            case  AVERAGE:  // Replace ranks with average
347                fill(ranks, tiesTrace, (2 * c + length - 1) / 2d);
348                break;
349            case MAXIMUM:   // Replace ranks with maximum values
350                fill(ranks, tiesTrace, c + length - 1);
351                break;
352            case MINIMUM:   // Replace ties with minimum
353                fill(ranks, tiesTrace, c);
354                break;
355            case RANDOM:    // Fill with random integral values in [c, c + length - 1]
356                Iterator<Integer> iterator = tiesTrace.iterator();
357                long f = JdkMath.round(c);
358                final UniformLongSampler sampler = UniformLongSampler.of(random, f, f + length - 1);
359                while (iterator.hasNext()) {
360                    // No advertised exception because args are guaranteed valid
361                    ranks[iterator.next()] = sampler.sample();
362                }
363                break;
364            case SEQUENTIAL:  // Fill sequentially from c to c + length - 1
365                // walk and fill
366                iterator = tiesTrace.iterator();
367                f = JdkMath.round(c);
368                int i = 0;
369                while (iterator.hasNext()) {
370                    ranks[iterator.next()] = f + i++;
371                }
372                break;
373            default: // this should not happen unless TiesStrategy enum is changed
374                throw new MathInternalError();
375        }
376    }
377
378    /**
379     * Sets<code>data[i] = value</code> for each i in <code>tiesTrace.</code>
380     *
381     * @param data array to modify
382     * @param tiesTrace list of index values to set
383     * @param value value to set
384     */
385    private void fill(double[] data, List<Integer> tiesTrace, double value) {
386        Iterator<Integer> iterator = tiesTrace.iterator();
387        while (iterator.hasNext()) {
388            data[iterator.next()] = value;
389        }
390    }
391
392    /**
393     * Set <code>ranks[i] = Double.NaN</code> for each i in <code>nanPositions.</code>
394     *
395     * @param ranks array to modify
396     * @param nanPositions list of index values to set to <code>Double.NaN</code>
397     */
398    private void restoreNaNs(double[] ranks, List<Integer> nanPositions) {
399        if (nanPositions.isEmpty()) {
400            return;
401        }
402        Iterator<Integer> iterator = nanPositions.iterator();
403        while (iterator.hasNext()) {
404            ranks[iterator.next().intValue()] = Double.NaN;
405        }
406    }
407
408    /**
409     * Returns a list of indexes where <code>ranks</code> is <code>NaN.</code>
410     *
411     * @param ranks array to search for <code>NaNs</code>
412     * @return list of indexes i such that <code>ranks[i] = NaN</code>
413     */
414    private List<Integer> getNanPositions(IntDoublePair[] ranks) {
415        ArrayList<Integer> out = new ArrayList<>();
416        for (int i = 0; i < ranks.length; i++) {
417            if (Double.isNaN(ranks[i].getValue())) {
418                out.add(Integer.valueOf(i));
419            }
420        }
421        return out;
422    }
423
424    /**
425     * Represents the position of a double value in an ordering.
426     * Comparable interface is implemented so Arrays.sort can be used
427     * to sort an array of IntDoublePairs by value.  Note that the
428     * implicitly defined natural ordering is NOT consistent with equals.
429     */
430    private static class IntDoublePair implements Comparable<IntDoublePair>  {
431
432        /** Value of the pair. */
433        private final double value;
434
435        /** Original position of the pair. */
436        private final int position;
437
438        /**
439         * Construct an IntDoublePair with the given value and position.
440         * @param value the value of the pair
441         * @param position the original position
442         */
443        IntDoublePair(double value, int position) {
444            this.value = value;
445            this.position = position;
446        }
447
448        /**
449         * Compare this IntDoublePair to another pair.
450         * Only the <strong>values</strong> are compared.
451         *
452         * @param other the other pair to compare this to
453         * @return result of <code>Double.compare(value, other.value)</code>
454         */
455        @Override
456        public int compareTo(IntDoublePair other) {
457            return Double.compare(value, other.value);
458        }
459
460        // N.B. equals() and hashCode() are not implemented; see MATH-610 for discussion.
461
462        /**
463         * Returns the value of the pair.
464         * @return value
465         */
466        public double getValue() {
467            return value;
468        }
469
470        /**
471         * Returns the original position of the pair.
472         * @return position
473         */
474        public int getPosition() {
475            return position;
476        }
477    }
478}