NaturalRanking.java

  1. /*
  2.  * Licensed to the Apache Software Foundation (ASF) under one or more
  3.  * contributor license agreements.  See the NOTICE file distributed with
  4.  * this work for additional information regarding copyright ownership.
  5.  * The ASF licenses this file to You under the Apache License, Version 2.0
  6.  * (the "License"); you may not use this file except in compliance with
  7.  * the License.  You may obtain a copy of the License at
  8.  *
  9.  *      http://www.apache.org/licenses/LICENSE-2.0
  10.  *
  11.  * Unless required by applicable law or agreed to in writing, software
  12.  * distributed under the License is distributed on an "AS IS" BASIS,
  13.  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14.  * See the License for the specific language governing permissions and
  15.  * limitations under the License.
  16.  */

  17. package org.apache.commons.math4.legacy.stat.ranking;

  18. import java.util.ArrayList;
  19. import java.util.Arrays;
  20. import java.util.Iterator;
  21. import java.util.List;

  22. import org.apache.commons.rng.UniformRandomProvider;
  23. import org.apache.commons.rng.simple.RandomSource;
  24. import org.apache.commons.rng.sampling.distribution.UniformLongSampler;
  25. import org.apache.commons.math4.legacy.exception.MathInternalError;
  26. import org.apache.commons.math4.legacy.exception.NotANumberException;
  27. import org.apache.commons.math4.core.jdkmath.JdkMath;


  28. /**
  29.  * Ranking based on the natural ordering on doubles.
  30.  *
  31.  * <p>NaNs are treated according to the configured {@link NaNStrategy} and ties
  32.  * are handled using the selected {@link TiesStrategy}.
  33.  * Configuration settings are supplied in optional constructor arguments.
  34.  * Defaults are {@link NaNStrategy#FAILED} and {@link TiesStrategy#AVERAGE},
  35.  * respectively. When using {@link TiesStrategy#RANDOM}, a
  36.  * {@link UniformRandomProvider random generator} may be supplied as a
  37.  * constructor argument.</p>
  38.  * <p>Examples:
  39.  * <table border="">
  40.  * <caption>Examples</caption>
  41.  * <tr><th colspan="3">
  42.  * Input data: (20, 17, 30, 42.3, 17, 50, Double.NaN, Double.NEGATIVE_INFINITY, 17)
  43.  * </th></tr>
  44.  * <tr><th>NaNStrategy</th><th>TiesStrategy</th>
  45.  * <th><code>rank(data)</code></th>
  46.  * <tr>
  47.  * <td>default (NaNs maximal)</td>
  48.  * <td>default (ties averaged)</td>
  49.  * <td>(5, 3, 6, 7, 3, 8, 9, 1, 3)</td></tr>
  50.  * <tr>
  51.  * <td>default (NaNs maximal)</td>
  52.  * <td>MINIMUM</td>
  53.  * <td>(5, 2, 6, 7, 2, 8, 9, 1, 2)</td></tr>
  54.  * <tr>
  55.  * <td>MINIMAL</td>
  56.  * <td>default (ties averaged)</td>
  57.  * <td>(6, 4, 7, 8, 4, 9, 1.5, 1.5, 4)</td></tr>
  58.  * <tr>
  59.  * <td>REMOVED</td>
  60.  * <td>SEQUENTIAL</td>
  61.  * <td>(5, 2, 6, 7, 3, 8, 1, 4)</td></tr>
  62.  * <tr>
  63.  * <td>MINIMAL</td>
  64.  * <td>MAXIMUM</td>
  65.  * <td>(6, 5, 7, 8, 5, 9, 2, 2, 5)</td></tr></table>
  66.  *
  67.  * @since 2.0
  68.  */
  69. public class NaturalRanking implements RankingAlgorithm {

  70.     /** default NaN strategy. */
  71.     public static final NaNStrategy DEFAULT_NAN_STRATEGY = NaNStrategy.FAILED;

  72.     /** default ties strategy. */
  73.     public static final TiesStrategy DEFAULT_TIES_STRATEGY = TiesStrategy.AVERAGE;

  74.     /** NaN strategy - defaults to NaNs maximal. */
  75.     private final NaNStrategy nanStrategy;

  76.     /** Ties strategy - defaults to ties averaged. */
  77.     private final TiesStrategy tiesStrategy;

  78.     /** Source of random data - used only when ties strategy is RANDOM. */
  79.     private final UniformRandomProvider random;

  80.     /**
  81.      * Create a NaturalRanking with default strategies for handling ties and NaNs.
  82.      */
  83.     public NaturalRanking() {
  84.         this(DEFAULT_NAN_STRATEGY, DEFAULT_TIES_STRATEGY, null);
  85.     }

  86.     /**
  87.      * Create a NaturalRanking with the given TiesStrategy.
  88.      *
  89.      * @param tiesStrategy the TiesStrategy to use
  90.      */
  91.     public NaturalRanking(TiesStrategy tiesStrategy) {
  92.         this(DEFAULT_NAN_STRATEGY,
  93.              tiesStrategy,
  94.              RandomSource.WELL_19937_C.create());
  95.     }

  96.     /**
  97.      * Create a NaturalRanking with the given NaNStrategy.
  98.      *
  99.      * @param nanStrategy the NaNStrategy to use
  100.      */
  101.     public NaturalRanking(NaNStrategy nanStrategy) {
  102.         this(nanStrategy, DEFAULT_TIES_STRATEGY, null);
  103.     }

  104.     /**
  105.      * Create a NaturalRanking with the given NaNStrategy and TiesStrategy.
  106.      *
  107.      * @param nanStrategy NaNStrategy to use
  108.      * @param tiesStrategy TiesStrategy to use
  109.      */
  110.     public NaturalRanking(NaNStrategy nanStrategy,
  111.                           TiesStrategy tiesStrategy) {
  112.         this(nanStrategy,
  113.              tiesStrategy,
  114.              RandomSource.WELL_19937_C.create());
  115.     }

  116.     /**
  117.      * Create a NaturalRanking with TiesStrategy.RANDOM and the given
  118.      * random generator as the source of random data.
  119.      *
  120.      * @param randomGenerator source of random data
  121.      */
  122.     public NaturalRanking(UniformRandomProvider randomGenerator) {
  123.         this(DEFAULT_NAN_STRATEGY, TiesStrategy.RANDOM, randomGenerator);
  124.     }

  125.     /**
  126.      * Create a NaturalRanking with the given NaNStrategy, TiesStrategy.RANDOM
  127.      * and the given source of random data.
  128.      *
  129.      * @param nanStrategy NaNStrategy to use
  130.      * @param randomGenerator source of random data
  131.      */
  132.     public NaturalRanking(NaNStrategy nanStrategy,
  133.                           UniformRandomProvider randomGenerator) {
  134.         this(nanStrategy, TiesStrategy.RANDOM, randomGenerator);
  135.     }

  136.     /**
  137.      * @param nanStrategy NaN strategy.
  138.      * @param tiesStrategy Tie strategy.
  139.      * @param random RNG.
  140.      */
  141.     private NaturalRanking(NaNStrategy nanStrategy,
  142.                            TiesStrategy tiesStrategy,
  143.                            UniformRandomProvider random) {
  144.         this.nanStrategy = nanStrategy;
  145.         this.tiesStrategy = tiesStrategy;
  146.         this.random = random;
  147.     }

  148.     /**
  149.      * Return the NaNStrategy.
  150.      *
  151.      * @return returns the NaNStrategy
  152.      */
  153.     public NaNStrategy getNanStrategy() {
  154.         return nanStrategy;
  155.     }

  156.     /**
  157.      * Return the TiesStrategy.
  158.      *
  159.      * @return the TiesStrategy
  160.      */
  161.     public TiesStrategy getTiesStrategy() {
  162.         return tiesStrategy;
  163.     }

  164.     /**
  165.      * Rank <code>data</code> using the natural ordering on Doubles, with
  166.      * NaN values handled according to <code>nanStrategy</code> and ties
  167.      * resolved using <code>tiesStrategy.</code>
  168.      *
  169.      * @param data array to be ranked
  170.      * @return array of ranks
  171.      * @throws NotANumberException if the selected {@link NaNStrategy} is {@code FAILED}
  172.      * and a {@link Double#NaN} is encountered in the input data
  173.      */
  174.     @Override
  175.     public double[] rank(double[] data) {

  176.         // Array recording initial positions of data to be ranked
  177.         IntDoublePair[] ranks = new IntDoublePair[data.length];
  178.         for (int i = 0; i < data.length; i++) {
  179.             ranks[i] = new IntDoublePair(data[i], i);
  180.         }

  181.         // Recode, remove or record positions of NaNs
  182.         List<Integer> nanPositions = null;
  183.         switch (nanStrategy) {
  184.             case MAXIMAL: // Replace NaNs with +INFs
  185.                 recodeNaNs(ranks, Double.POSITIVE_INFINITY);
  186.                 break;
  187.             case MINIMAL: // Replace NaNs with -INFs
  188.                 recodeNaNs(ranks, Double.NEGATIVE_INFINITY);
  189.                 break;
  190.             case REMOVED: // Drop NaNs from data
  191.                 ranks = removeNaNs(ranks);
  192.                 break;
  193.             case FIXED:   // Record positions of NaNs
  194.                 nanPositions = getNanPositions(ranks);
  195.                 break;
  196.             case FAILED:
  197.                 nanPositions = getNanPositions(ranks);
  198.                 if (nanPositions.size() > 0) {
  199.                     throw new NotANumberException();
  200.                 }
  201.                 break;
  202.             default: // this should not happen unless NaNStrategy enum is changed
  203.                 throw new MathInternalError();
  204.         }

  205.         // Sort the IntDoublePairs
  206.         Arrays.sort(ranks);

  207.         // Walk the sorted array, filling output array using sorted positions,
  208.         // resolving ties as we go
  209.         double[] out = new double[ranks.length];
  210.         int pos = 1;  // position in sorted array
  211.         out[ranks[0].getPosition()] = pos;
  212.         List<Integer> tiesTrace = new ArrayList<>();
  213.         tiesTrace.add(ranks[0].getPosition());
  214.         for (int i = 1; i < ranks.length; i++) {
  215.             if (Double.compare(ranks[i].getValue(), ranks[i - 1].getValue()) > 0) {
  216.                 // tie sequence has ended (or had length 1)
  217.                 pos = i + 1;
  218.                 if (tiesTrace.size() > 1) {  // if seq is nontrivial, resolve
  219.                     resolveTie(out, tiesTrace);
  220.                 }
  221.                 tiesTrace = new ArrayList<>();
  222.                 tiesTrace.add(ranks[i].getPosition());
  223.             } else {
  224.                 // tie sequence continues
  225.                 tiesTrace.add(ranks[i].getPosition());
  226.             }
  227.             out[ranks[i].getPosition()] = pos;
  228.         }
  229.         if (tiesTrace.size() > 1) {  // handle tie sequence at end
  230.             resolveTie(out, tiesTrace);
  231.         }
  232.         if (nanStrategy == NaNStrategy.FIXED) {
  233.             restoreNaNs(out, nanPositions);
  234.         }
  235.         return out;
  236.     }

  237.     /**
  238.      * Returns an array that is a copy of the input array with IntDoublePairs
  239.      * having NaN values removed.
  240.      *
  241.      * @param ranks input array
  242.      * @return array with NaN-valued entries removed
  243.      */
  244.     private IntDoublePair[] removeNaNs(IntDoublePair[] ranks) {
  245.         if (!containsNaNs(ranks)) {
  246.             return ranks;
  247.         }
  248.         IntDoublePair[] outRanks = new IntDoublePair[ranks.length];
  249.         int j = 0;
  250.         for (int i = 0; i < ranks.length; i++) {
  251.             if (Double.isNaN(ranks[i].getValue())) {
  252.                 // drop, but adjust original ranks of later elements
  253.                 for (int k = i + 1; k < ranks.length; k++) {
  254.                     ranks[k] = new IntDoublePair(
  255.                             ranks[k].getValue(), ranks[k].getPosition() - 1);
  256.                 }
  257.             } else {
  258.                 outRanks[j] = new IntDoublePair(
  259.                         ranks[i].getValue(), ranks[i].getPosition());
  260.                 j++;
  261.             }
  262.         }
  263.         IntDoublePair[] returnRanks = new IntDoublePair[j];
  264.         System.arraycopy(outRanks, 0, returnRanks, 0, j);
  265.         return returnRanks;
  266.     }

  267.     /**
  268.      * Recodes NaN values to the given value.
  269.      *
  270.      * @param ranks array to recode
  271.      * @param value the value to replace NaNs with
  272.      */
  273.     private void recodeNaNs(IntDoublePair[] ranks, double value) {
  274.         for (int i = 0; i < ranks.length; i++) {
  275.             if (Double.isNaN(ranks[i].getValue())) {
  276.                 ranks[i] = new IntDoublePair(
  277.                         value, ranks[i].getPosition());
  278.             }
  279.         }
  280.     }

  281.     /**
  282.      * Checks for presence of NaNs in <code>ranks.</code>
  283.      *
  284.      * @param ranks array to be searched for NaNs
  285.      * @return true iff ranks contains one or more NaNs
  286.      */
  287.     private boolean containsNaNs(IntDoublePair[] ranks) {
  288.         for (int i = 0; i < ranks.length; i++) {
  289.             if (Double.isNaN(ranks[i].getValue())) {
  290.                 return true;
  291.             }
  292.         }
  293.         return false;
  294.     }

  295.     /**
  296.      * Resolve a sequence of ties, using the configured {@link TiesStrategy}.
  297.      * The input <code>ranks</code> array is expected to take the same value
  298.      * for all indices in <code>tiesTrace</code>.  The common value is recoded
  299.      * according to the tiesStrategy. For example, if ranks = [5,8,2,6,2,7,1,2],
  300.      * tiesTrace = [2,4,7] and tiesStrategy is MINIMUM, ranks will be unchanged.
  301.      * The same array and trace with tiesStrategy AVERAGE will come out
  302.      * [5,8,3,6,3,7,1,3].
  303.      *
  304.      * @param ranks array of ranks
  305.      * @param tiesTrace list of indices where <code>ranks</code> is constant
  306.      * -- that is, for any i and j in TiesTrace, <code> ranks[i] == ranks[j]
  307.      * </code>
  308.      */
  309.     private void resolveTie(double[] ranks, List<Integer> tiesTrace) {

  310.         // constant value of ranks over tiesTrace
  311.         final double c = ranks[tiesTrace.get(0)];

  312.         // length of sequence of tied ranks
  313.         final int length = tiesTrace.size();

  314.         switch (tiesStrategy) {
  315.             case  AVERAGE:  // Replace ranks with average
  316.                 fill(ranks, tiesTrace, (2 * c + length - 1) / 2d);
  317.                 break;
  318.             case MAXIMUM:   // Replace ranks with maximum values
  319.                 fill(ranks, tiesTrace, c + length - 1);
  320.                 break;
  321.             case MINIMUM:   // Replace ties with minimum
  322.                 fill(ranks, tiesTrace, c);
  323.                 break;
  324.             case RANDOM:    // Fill with random integral values in [c, c + length - 1]
  325.                 Iterator<Integer> iterator = tiesTrace.iterator();
  326.                 long f = JdkMath.round(c);
  327.                 final UniformLongSampler sampler = UniformLongSampler.of(random, f, f + length - 1);
  328.                 while (iterator.hasNext()) {
  329.                     // No advertised exception because args are guaranteed valid
  330.                     ranks[iterator.next()] = sampler.sample();
  331.                 }
  332.                 break;
  333.             case SEQUENTIAL:  // Fill sequentially from c to c + length - 1
  334.                 // walk and fill
  335.                 iterator = tiesTrace.iterator();
  336.                 f = JdkMath.round(c);
  337.                 int i = 0;
  338.                 while (iterator.hasNext()) {
  339.                     ranks[iterator.next()] = f + i++;
  340.                 }
  341.                 break;
  342.             default: // this should not happen unless TiesStrategy enum is changed
  343.                 throw new MathInternalError();
  344.         }
  345.     }

  346.     /**
  347.      * Sets<code>data[i] = value</code> for each i in <code>tiesTrace.</code>
  348.      *
  349.      * @param data array to modify
  350.      * @param tiesTrace list of index values to set
  351.      * @param value value to set
  352.      */
  353.     private void fill(double[] data, List<Integer> tiesTrace, double value) {
  354.         Iterator<Integer> iterator = tiesTrace.iterator();
  355.         while (iterator.hasNext()) {
  356.             data[iterator.next()] = value;
  357.         }
  358.     }

  359.     /**
  360.      * Set <code>ranks[i] = Double.NaN</code> for each i in <code>nanPositions.</code>
  361.      *
  362.      * @param ranks array to modify
  363.      * @param nanPositions list of index values to set to <code>Double.NaN</code>
  364.      */
  365.     private void restoreNaNs(double[] ranks, List<Integer> nanPositions) {
  366.         if (nanPositions.isEmpty()) {
  367.             return;
  368.         }
  369.         Iterator<Integer> iterator = nanPositions.iterator();
  370.         while (iterator.hasNext()) {
  371.             ranks[iterator.next().intValue()] = Double.NaN;
  372.         }
  373.     }

  374.     /**
  375.      * Returns a list of indexes where <code>ranks</code> is <code>NaN.</code>
  376.      *
  377.      * @param ranks array to search for <code>NaNs</code>
  378.      * @return list of indexes i such that <code>ranks[i] = NaN</code>
  379.      */
  380.     private List<Integer> getNanPositions(IntDoublePair[] ranks) {
  381.         ArrayList<Integer> out = new ArrayList<>();
  382.         for (int i = 0; i < ranks.length; i++) {
  383.             if (Double.isNaN(ranks[i].getValue())) {
  384.                 out.add(Integer.valueOf(i));
  385.             }
  386.         }
  387.         return out;
  388.     }

  389.     /**
  390.      * Represents the position of a double value in an ordering.
  391.      * Comparable interface is implemented so Arrays.sort can be used
  392.      * to sort an array of IntDoublePairs by value.  Note that the
  393.      * implicitly defined natural ordering is NOT consistent with equals.
  394.      */
  395.     private static final class IntDoublePair implements Comparable<IntDoublePair>  {

  396.         /** Value of the pair. */
  397.         private final double value;

  398.         /** Original position of the pair. */
  399.         private final int position;

  400.         /**
  401.          * Construct an IntDoublePair with the given value and position.
  402.          * @param value the value of the pair
  403.          * @param position the original position
  404.          */
  405.         IntDoublePair(double value, int position) {
  406.             this.value = value;
  407.             this.position = position;
  408.         }

  409.         /**
  410.          * Compare this IntDoublePair to another pair.
  411.          * Only the <strong>values</strong> are compared.
  412.          *
  413.          * @param other the other pair to compare this to
  414.          * @return result of <code>Double.compare(value, other.value)</code>
  415.          */
  416.         @Override
  417.         public int compareTo(IntDoublePair other) {
  418.             return Double.compare(value, other.value);
  419.         }

  420.         // N.B. equals() and hashCode() are not implemented; see MATH-610 for discussion.

  421.         /**
  422.          * Returns the value of the pair.
  423.          * @return value
  424.          */
  425.         public double getValue() {
  426.             return value;
  427.         }

  428.         /**
  429.          * Returns the original position of the pair.
  430.          * @return position
  431.          */
  432.         public int getPosition() {
  433.             return position;
  434.         }
  435.     }
  436. }