001    /*
002     * Licensed to the Apache Software Foundation (ASF) under one or more
003     * contributor license agreements.  See the NOTICE file distributed with
004     * this work for additional information regarding copyright ownership.
005     * The ASF licenses this file to You under the Apache License, Version 2.0
006     * (the "License"); you may not use this file except in compliance with
007     * the License.  You may obtain a copy of the License at
008     *
009     *      http://www.apache.org/licenses/LICENSE-2.0
010     *
011     * Unless required by applicable law or agreed to in writing, software
012     * distributed under the License is distributed on an "AS IS" BASIS,
013     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014     * See the License for the specific language governing permissions and
015     * limitations under the License.
016     */
017    
018    package org.apache.commons.math3.stat.ranking;
019    
020    import java.util.ArrayList;
021    import java.util.Arrays;
022    import java.util.Iterator;
023    import java.util.List;
024    
025    import org.apache.commons.math3.exception.MathInternalError;
026    import org.apache.commons.math3.exception.NotANumberException;
027    import org.apache.commons.math3.random.RandomDataGenerator;
028    import org.apache.commons.math3.random.RandomGenerator;
029    import org.apache.commons.math3.util.FastMath;
030    
031    
032    /**
033     * <p> Ranking based on the natural ordering on doubles.</p>
034     * <p>NaNs are treated according to the configured {@link NaNStrategy} and ties
035     * are handled using the selected {@link TiesStrategy}.
036     * Configuration settings are supplied in optional constructor arguments.
037     * Defaults are {@link NaNStrategy#FAILED} and {@link TiesStrategy#AVERAGE},
038     * respectively. When using {@link TiesStrategy#RANDOM}, a
039     * {@link RandomGenerator} may be supplied as a constructor argument.</p>
040     * <p>Examples:
041     * <table border="1" cellpadding="3">
042     * <tr><th colspan="3">
043     * Input data: (20, 17, 30, 42.3, 17, 50, Double.NaN, Double.NEGATIVE_INFINITY, 17)
044     * </th></tr>
045     * <tr><th>NaNStrategy</th><th>TiesStrategy</th>
046     * <th><code>rank(data)</code></th>
047     * <tr>
048     * <td>default (NaNs maximal)</td>
049     * <td>default (ties averaged)</td>
050     * <td>(5, 3, 6, 7, 3, 8, 9, 1, 3)</td></tr>
051     * <tr>
052     * <td>default (NaNs maximal)</td>
053     * <td>MINIMUM</td>
054     * <td>(5, 2, 6, 7, 2, 8, 9, 1, 2)</td></tr>
055     * <tr>
056     * <td>MINIMAL</td>
057     * <td>default (ties averaged)</td>
058     * <td>(6, 4, 7, 8, 4, 9, 1.5, 1.5, 4)</td></tr>
059     * <tr>
060     * <td>REMOVED</td>
061     * <td>SEQUENTIAL</td>
062     * <td>(5, 2, 6, 7, 3, 8, 1, 4)</td></tr>
063     * <tr>
064     * <td>MINIMAL</td>
065     * <td>MAXIMUM</td>
066     * <td>(6, 5, 7, 8, 5, 9, 2, 2, 5)</td></tr></table></p>
067     *
068     * @since 2.0
069     * @version $Id: NaturalRanking.java 1454897 2013-03-10 19:02:54Z luc $
070     */
071    public class NaturalRanking implements RankingAlgorithm {
072    
073        /** default NaN strategy */
074        public static final NaNStrategy DEFAULT_NAN_STRATEGY = NaNStrategy.FAILED;
075    
076        /** default ties strategy */
077        public static final TiesStrategy DEFAULT_TIES_STRATEGY = TiesStrategy.AVERAGE;
078    
079        /** NaN strategy - defaults to NaNs maximal */
080        private final NaNStrategy nanStrategy;
081    
082        /** Ties strategy - defaults to ties averaged */
083        private final TiesStrategy tiesStrategy;
084    
085        /** Source of random data - used only when ties strategy is RANDOM */
086        private final RandomDataGenerator randomData;
087    
088        /**
089         * Create a NaturalRanking with default strategies for handling ties and NaNs.
090         */
091        public NaturalRanking() {
092            super();
093            tiesStrategy = DEFAULT_TIES_STRATEGY;
094            nanStrategy = DEFAULT_NAN_STRATEGY;
095            randomData = null;
096        }
097    
098        /**
099         * Create a NaturalRanking with the given TiesStrategy.
100         *
101         * @param tiesStrategy the TiesStrategy to use
102         */
103        public NaturalRanking(TiesStrategy tiesStrategy) {
104            super();
105            this.tiesStrategy = tiesStrategy;
106            nanStrategy = DEFAULT_NAN_STRATEGY;
107            randomData = new RandomDataGenerator();
108        }
109    
110        /**
111         * Create a NaturalRanking with the given NaNStrategy.
112         *
113         * @param nanStrategy the NaNStrategy to use
114         */
115        public NaturalRanking(NaNStrategy nanStrategy) {
116            super();
117            this.nanStrategy = nanStrategy;
118            tiesStrategy = DEFAULT_TIES_STRATEGY;
119            randomData = null;
120        }
121    
122        /**
123         * Create a NaturalRanking with the given NaNStrategy and TiesStrategy.
124         *
125         * @param nanStrategy NaNStrategy to use
126         * @param tiesStrategy TiesStrategy to use
127         */
128        public NaturalRanking(NaNStrategy nanStrategy, TiesStrategy tiesStrategy) {
129            super();
130            this.nanStrategy = nanStrategy;
131            this.tiesStrategy = tiesStrategy;
132            randomData = new RandomDataGenerator();
133        }
134    
135        /**
136         * Create a NaturalRanking with TiesStrategy.RANDOM and the given
137         * RandomGenerator as the source of random data.
138         *
139         * @param randomGenerator source of random data
140         */
141        public NaturalRanking(RandomGenerator randomGenerator) {
142            super();
143            this.tiesStrategy = TiesStrategy.RANDOM;
144            nanStrategy = DEFAULT_NAN_STRATEGY;
145            randomData = new RandomDataGenerator(randomGenerator);
146        }
147    
148    
149        /**
150         * Create a NaturalRanking with the given NaNStrategy, TiesStrategy.RANDOM
151         * and the given source of random data.
152         *
153         * @param nanStrategy NaNStrategy to use
154         * @param randomGenerator source of random data
155         */
156        public NaturalRanking(NaNStrategy nanStrategy,
157                RandomGenerator randomGenerator) {
158            super();
159            this.nanStrategy = nanStrategy;
160            this.tiesStrategy = TiesStrategy.RANDOM;
161            randomData = new RandomDataGenerator(randomGenerator);
162        }
163    
164        /**
165         * Return the NaNStrategy
166         *
167         * @return returns the NaNStrategy
168         */
169        public NaNStrategy getNanStrategy() {
170            return nanStrategy;
171        }
172    
173        /**
174         * Return the TiesStrategy
175         *
176         * @return the TiesStrategy
177         */
178        public TiesStrategy getTiesStrategy() {
179            return tiesStrategy;
180        }
181    
182        /**
183         * Rank <code>data</code> using the natural ordering on Doubles, with
184         * NaN values handled according to <code>nanStrategy</code> and ties
185         * resolved using <code>tiesStrategy.</code>
186         *
187         * @param data array to be ranked
188         * @return array of ranks
189         * @throws NotANumberException if the selected {@link NaNStrategy} is {@code FAILED}
190         * and a {@link Double#NaN} is encountered in the input data
191         */
192        public double[] rank(double[] data) {
193    
194            // Array recording initial positions of data to be ranked
195            IntDoublePair[] ranks = new IntDoublePair[data.length];
196            for (int i = 0; i < data.length; i++) {
197                ranks[i] = new IntDoublePair(data[i], i);
198            }
199    
200            // Recode, remove or record positions of NaNs
201            List<Integer> nanPositions = null;
202            switch (nanStrategy) {
203                case MAXIMAL: // Replace NaNs with +INFs
204                    recodeNaNs(ranks, Double.POSITIVE_INFINITY);
205                    break;
206                case MINIMAL: // Replace NaNs with -INFs
207                    recodeNaNs(ranks, Double.NEGATIVE_INFINITY);
208                    break;
209                case REMOVED: // Drop NaNs from data
210                    ranks = removeNaNs(ranks);
211                    break;
212                case FIXED:   // Record positions of NaNs
213                    nanPositions = getNanPositions(ranks);
214                    break;
215                case FAILED:
216                    nanPositions = getNanPositions(ranks);
217                    if (nanPositions.size() > 0) {
218                        throw new NotANumberException();
219                    }
220                    break;
221                default: // this should not happen unless NaNStrategy enum is changed
222                    throw new MathInternalError();
223            }
224    
225            // Sort the IntDoublePairs
226            Arrays.sort(ranks);
227    
228            // Walk the sorted array, filling output array using sorted positions,
229            // resolving ties as we go
230            double[] out = new double[ranks.length];
231            int pos = 1;  // position in sorted array
232            out[ranks[0].getPosition()] = pos;
233            List<Integer> tiesTrace = new ArrayList<Integer>();
234            tiesTrace.add(ranks[0].getPosition());
235            for (int i = 1; i < ranks.length; i++) {
236                if (Double.compare(ranks[i].getValue(), ranks[i - 1].getValue()) > 0) {
237                    // tie sequence has ended (or had length 1)
238                    pos = i + 1;
239                    if (tiesTrace.size() > 1) {  // if seq is nontrivial, resolve
240                        resolveTie(out, tiesTrace);
241                    }
242                    tiesTrace = new ArrayList<Integer>();
243                    tiesTrace.add(ranks[i].getPosition());
244                } else {
245                    // tie sequence continues
246                    tiesTrace.add(ranks[i].getPosition());
247                }
248                out[ranks[i].getPosition()] = pos;
249            }
250            if (tiesTrace.size() > 1) {  // handle tie sequence at end
251                resolveTie(out, tiesTrace);
252            }
253            if (nanStrategy == NaNStrategy.FIXED) {
254                restoreNaNs(out, nanPositions);
255            }
256            return out;
257        }
258    
259        /**
260         * Returns an array that is a copy of the input array with IntDoublePairs
261         * having NaN values removed.
262         *
263         * @param ranks input array
264         * @return array with NaN-valued entries removed
265         */
266        private IntDoublePair[] removeNaNs(IntDoublePair[] ranks) {
267            if (!containsNaNs(ranks)) {
268                return ranks;
269            }
270            IntDoublePair[] outRanks = new IntDoublePair[ranks.length];
271            int j = 0;
272            for (int i = 0; i < ranks.length; i++) {
273                if (Double.isNaN(ranks[i].getValue())) {
274                    // drop, but adjust original ranks of later elements
275                    for (int k = i + 1; k < ranks.length; k++) {
276                        ranks[k] = new IntDoublePair(
277                                ranks[k].getValue(), ranks[k].getPosition() - 1);
278                    }
279                } else {
280                    outRanks[j] = new IntDoublePair(
281                            ranks[i].getValue(), ranks[i].getPosition());
282                    j++;
283                }
284            }
285            IntDoublePair[] returnRanks = new IntDoublePair[j];
286            System.arraycopy(outRanks, 0, returnRanks, 0, j);
287            return returnRanks;
288        }
289    
290        /**
291         * Recodes NaN values to the given value.
292         *
293         * @param ranks array to recode
294         * @param value the value to replace NaNs with
295         */
296        private void recodeNaNs(IntDoublePair[] ranks, double value) {
297            for (int i = 0; i < ranks.length; i++) {
298                if (Double.isNaN(ranks[i].getValue())) {
299                    ranks[i] = new IntDoublePair(
300                            value, ranks[i].getPosition());
301                }
302            }
303        }
304    
305        /**
306         * Checks for presence of NaNs in <code>ranks.</code>
307         *
308         * @param ranks array to be searched for NaNs
309         * @return true iff ranks contains one or more NaNs
310         */
311        private boolean containsNaNs(IntDoublePair[] ranks) {
312            for (int i = 0; i < ranks.length; i++) {
313                if (Double.isNaN(ranks[i].getValue())) {
314                    return true;
315                }
316            }
317            return false;
318        }
319    
320        /**
321         * Resolve a sequence of ties, using the configured {@link TiesStrategy}.
322         * The input <code>ranks</code> array is expected to take the same value
323         * for all indices in <code>tiesTrace</code>.  The common value is recoded
324         * according to the tiesStrategy. For example, if ranks = <5,8,2,6,2,7,1,2>,
325         * tiesTrace = <2,4,7> and tiesStrategy is MINIMUM, ranks will be unchanged.
326         * The same array and trace with tiesStrategy AVERAGE will come out
327         * <5,8,3,6,3,7,1,3>.
328         *
329         * @param ranks array of ranks
330         * @param tiesTrace list of indices where <code>ranks</code> is constant
331         * -- that is, for any i and j in TiesTrace, <code> ranks[i] == ranks[j]
332         * </code>
333         */
334        private void resolveTie(double[] ranks, List<Integer> tiesTrace) {
335    
336            // constant value of ranks over tiesTrace
337            final double c = ranks[tiesTrace.get(0)];
338    
339            // length of sequence of tied ranks
340            final int length = tiesTrace.size();
341    
342            switch (tiesStrategy) {
343                case  AVERAGE:  // Replace ranks with average
344                    fill(ranks, tiesTrace, (2 * c + length - 1) / 2d);
345                    break;
346                case MAXIMUM:   // Replace ranks with maximum values
347                    fill(ranks, tiesTrace, c + length - 1);
348                    break;
349                case MINIMUM:   // Replace ties with minimum
350                    fill(ranks, tiesTrace, c);
351                    break;
352                case RANDOM:    // Fill with random integral values in [c, c + length - 1]
353                    Iterator<Integer> iterator = tiesTrace.iterator();
354                    long f = FastMath.round(c);
355                    while (iterator.hasNext()) {
356                        // No advertised exception because args are guaranteed valid
357                        ranks[iterator.next()] =
358                            randomData.nextLong(f, f + length - 1);
359                    }
360                    break;
361                case SEQUENTIAL:  // Fill sequentially from c to c + length - 1
362                    // walk and fill
363                    iterator = tiesTrace.iterator();
364                    f = FastMath.round(c);
365                    int i = 0;
366                    while (iterator.hasNext()) {
367                        ranks[iterator.next()] = f + i++;
368                    }
369                    break;
370                default: // this should not happen unless TiesStrategy enum is changed
371                    throw new MathInternalError();
372            }
373        }
374    
375        /**
376         * Sets<code>data[i] = value</code> for each i in <code>tiesTrace.</code>
377         *
378         * @param data array to modify
379         * @param tiesTrace list of index values to set
380         * @param value value to set
381         */
382        private void fill(double[] data, List<Integer> tiesTrace, double value) {
383            Iterator<Integer> iterator = tiesTrace.iterator();
384            while (iterator.hasNext()) {
385                data[iterator.next()] = value;
386            }
387        }
388    
389        /**
390         * Set <code>ranks[i] = Double.NaN</code> for each i in <code>nanPositions.</code>
391         *
392         * @param ranks array to modify
393         * @param nanPositions list of index values to set to <code>Double.NaN</code>
394         */
395        private void restoreNaNs(double[] ranks, List<Integer> nanPositions) {
396            if (nanPositions.size() == 0) {
397                return;
398            }
399            Iterator<Integer> iterator = nanPositions.iterator();
400            while (iterator.hasNext()) {
401                ranks[iterator.next().intValue()] = Double.NaN;
402            }
403    
404        }
405    
406        /**
407         * Returns a list of indexes where <code>ranks</code> is <code>NaN.</code>
408         *
409         * @param ranks array to search for <code>NaNs</code>
410         * @return list of indexes i such that <code>ranks[i] = NaN</code>
411         */
412        private List<Integer> getNanPositions(IntDoublePair[] ranks) {
413            ArrayList<Integer> out = new ArrayList<Integer>();
414            for (int i = 0; i < ranks.length; i++) {
415                if (Double.isNaN(ranks[i].getValue())) {
416                    out.add(Integer.valueOf(i));
417                }
418            }
419            return out;
420        }
421    
422        /**
423         * Represents the position of a double value in an ordering.
424         * Comparable interface is implemented so Arrays.sort can be used
425         * to sort an array of IntDoublePairs by value.  Note that the
426         * implicitly defined natural ordering is NOT consistent with equals.
427         */
428        private static class IntDoublePair implements Comparable<IntDoublePair>  {
429    
430            /** Value of the pair */
431            private final double value;
432    
433            /** Original position of the pair */
434            private final int position;
435    
436            /**
437             * Construct an IntDoublePair with the given value and position.
438             * @param value the value of the pair
439             * @param position the original position
440             */
441            public IntDoublePair(double value, int position) {
442                this.value = value;
443                this.position = position;
444            }
445    
446            /**
447             * Compare this IntDoublePair to another pair.
448             * Only the <strong>values</strong> are compared.
449             *
450             * @param other the other pair to compare this to
451             * @return result of <code>Double.compare(value, other.value)</code>
452             */
453            public int compareTo(IntDoublePair other) {
454                return Double.compare(value, other.value);
455            }
456    
457            // N.B. equals() and hashCode() are not implemented; see MATH-610 for discussion.
458    
459            /**
460             * Returns the value of the pair.
461             * @return value
462             */
463            public double getValue() {
464                return value;
465            }
466    
467            /**
468             * Returns the original position of the pair.
469             * @return position
470             */
471            public int getPosition() {
472                return position;
473            }
474        }
475    }