Median.java

  1. /*
  2.  * Licensed to the Apache Software Foundation (ASF) under one or more
  3.  * contributor license agreements.  See the NOTICE file distributed with
  4.  * this work for additional information regarding copyright ownership.
  5.  * The ASF licenses this file to You under the Apache License, Version 2.0
  6.  * (the "License"); you may not use this file except in compliance with
  7.  * the License.  You may obtain a copy of the License at
  8.  *
  9.  *      http://www.apache.org/licenses/LICENSE-2.0
  10.  *
  11.  * Unless required by applicable law or agreed to in writing, software
  12.  * distributed under the License is distributed on an "AS IS" BASIS,
  13.  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14.  * See the License for the specific language governing permissions and
  15.  * limitations under the License.
  16.  */

  17. package org.apache.commons.statistics.descriptive;

  18. import java.util.Objects;
  19. import org.apache.commons.numbers.arrays.Selection;

  20. /**
  21.  * Returns the median of the available values.
  22.  *
  23.  * <p>For values of length {@code n}, let {@code k = n / 2}:
  24.  * <ul>
  25.  * <li>The result is {@code NaN} if {@code n = 0}.
  26.  * <li>The result is {@code values[k]} if {@code n} is odd.
  27.  * <li>The result is {@code (values[k - 1] + values[k]) / 2} if {@code n} is even.
  28.  * </ul>
  29.  *
  30.  * <p>This implementation respects the ordering imposed by
  31.  * {@link Double#compare(double, double)} for {@code NaN} values. If a {@code NaN} occurs
  32.  * in the selected positions in the fully sorted values then the result is {@code NaN}.
  33.  *
  34.  * <p>The {@link NaNPolicy} can be used to change the behaviour on {@code NaN} values.
  35.  *
  36.  * <p>Instances of this class are immutable and thread-safe.
  37.  *
  38.  * @see #with(NaNPolicy)
  39.  * @see <a href="https://en.wikipedia.org/wiki/Median">Median (Wikipedia)</a>
  40.  * @since 1.1
  41.  */
  42. public final class Median {
  43.     /** Default instance. */
  44.     private static final Median DEFAULT = new Median(false, NaNPolicy.INCLUDE);

  45.     /** Flag to indicate if the data should be copied. */
  46.     private final boolean copy;
  47.     /** NaN policy for floating point data. */
  48.     private final NaNPolicy nanPolicy;
  49.     /** Transformer for NaN data. */
  50.     private final NaNTransformer nanTransformer;

  51.     /**
  52.      * @param copy Flag to indicate if the data should be copied.
  53.      * @param nanPolicy NaN policy.
  54.      */
  55.     private Median(boolean copy, NaNPolicy nanPolicy) {
  56.         this.copy = copy;
  57.         this.nanPolicy = nanPolicy;
  58.         nanTransformer = NaNTransformers.createNaNTransformer(nanPolicy, copy);
  59.     }

  60.     /**
  61.      * Return a new instance with the default options.
  62.      *
  63.      * <ul>
  64.      * <li>{@linkplain #withCopy(boolean) Copy = false}
  65.      * <li>{@linkplain #with(NaNPolicy) NaN policy = include}
  66.      * </ul>
  67.      *
  68.      * <p>Note: The default options configure for processing in-place and including
  69.      * {@code NaN} values in the data. This is the most efficient mode and has the
  70.      * smallest memory consumption.
  71.      *
  72.      * @return the median implementation
  73.      * @see #withCopy(boolean)
  74.      * @see #with(NaNPolicy)
  75.      */
  76.     public static Median withDefaults() {
  77.         return DEFAULT;
  78.     }

  79.     /**
  80.      * Return an instance with the configured copy behaviour. If {@code false} then
  81.      * the input array will be modified by the call to evaluate the median; otherwise
  82.      * the computation uses a copy of the data.
  83.      *
  84.      * @param v Value.
  85.      * @return an instance
  86.      */
  87.     public Median withCopy(boolean v) {
  88.         return new Median(v, nanPolicy);
  89.     }

  90.     /**
  91.      * Return an instance with the configured {@link NaNPolicy}.
  92.      *
  93.      * <p>Note: This implementation respects the ordering imposed by
  94.      * {@link Double#compare(double, double)} for {@code NaN} values: {@code NaN} is
  95.      * considered greater than all other values, and all {@code NaN} values are equal. The
  96.      * {@link NaNPolicy} changes the computation of the statistic in the presence of
  97.      * {@code NaN} values.
  98.      *
  99.      * <ul>
  100.      * <li>{@link NaNPolicy#INCLUDE}: {@code NaN} values are moved to the end of the data;
  101.      * the size of the data <em>includes</em> the {@code NaN} values and the median will be
  102.      * {@code NaN} if any value used for median interpolation is {@code NaN}.
  103.      * <li>{@link NaNPolicy#EXCLUDE}: {@code NaN} values are moved to the end of the data;
  104.      * the size of the data <em>excludes</em> the {@code NaN} values and the median will
  105.      * never be {@code NaN} for non-zero size. If all data are {@code NaN} then the size is zero
  106.      * and the result is {@code NaN}.
  107.      * <li>{@link NaNPolicy#ERROR}: An exception is raised if the data contains {@code NaN}
  108.      * values.
  109.      * </ul>
  110.      *
  111.      * <p>Note that the result is identical for all policies if no {@code NaN} values are present.
  112.      *
  113.      * @param v Value.
  114.      * @return an instance
  115.      */
  116.     public Median with(NaNPolicy v) {
  117.         return new Median(copy, Objects.requireNonNull(v));
  118.     }

  119.     /**
  120.      * Evaluate the median.
  121.      *
  122.      * <p>Note: This method may partially sort the input values if not configured to
  123.      * {@link #withCopy(boolean) copy} the input data.
  124.      *
  125.      * @param values Values.
  126.      * @return the median
  127.      * @throws IllegalArgumentException if the values contain NaN and the configuration is {@link NaNPolicy#ERROR}
  128.      * @see #with(NaNPolicy)
  129.      */
  130.     public double evaluate(double[] values) {
  131.         return compute(values, 0, values.length);
  132.     }

  133.     /**
  134.      * Evaluate the median of the specified range.
  135.      *
  136.      * <p>Note: This method may partially sort the input values if not configured to
  137.      * {@link #withCopy(boolean) copy} the input data.
  138.      *
  139.      * @param values Values.
  140.      * @param from Inclusive start of the range.
  141.      * @param to Exclusive end of the range.
  142.      * @return the median
  143.      * @throws IllegalArgumentException if the values contain NaN and the configuration is {@link NaNPolicy#ERROR}
  144.      * @throws IndexOutOfBoundsException if the sub-range is out of bounds
  145.      * @see #with(NaNPolicy)
  146.      * @since 1.2
  147.      */
  148.     public double evaluateRange(double[] values, int from, int to) {
  149.         Statistics.checkFromToIndex(from, to, values.length);
  150.         return compute(values, from, to);
  151.     }

  152.     /**
  153.      * Compute the median of the specified range.
  154.      *
  155.      * @param values Values.
  156.      * @param from Inclusive start of the range.
  157.      * @param to Exclusive end of the range.
  158.      * @return the median
  159.      */
  160.     private double compute(double[] values, int from, int to) {
  161.         // Floating-point data handling
  162.         final int[] bounds = new int[2];
  163.         final double[] x = nanTransformer.apply(values, from, to, bounds);
  164.         final int start = bounds[0];
  165.         final int end = bounds[1];
  166.         final int n = end - start;
  167.         // Special cases
  168.         if (n <= 2) {
  169.             switch (n) {
  170.             case 2:
  171.                 // Sorting the array matches the behaviour of Quantile for n==2
  172.                 // Handle NaN and signed zeros
  173.                 if (Double.compare(x[start + 1], x[start]) < 0) {
  174.                     final double t = x[start];
  175.                     x[start] = x[start + 1];
  176.                     x[start + 1] = t;
  177.                 }
  178.                 return Interpolation.mean(x[start], x[start + 1]);
  179.             case 1:
  180.                 return x[start];
  181.             default:
  182.                 return Double.NaN;
  183.             }
  184.         }
  185.         // Median index (including the offset)
  186.         final int m = (start + end) >>> 1;
  187.         // Odd
  188.         if ((n & 0x1) == 1) {
  189.             Selection.select(x, start, end, m);
  190.             return x[m];
  191.         }
  192.         // Even: require (m-1, m)
  193.         Selection.select(x, start, end, new int[] {m - 1, m});
  194.         return Interpolation.mean(x[m - 1], x[m]);
  195.     }

  196.     /**
  197.      * Evaluate the median.
  198.      *
  199.      * <p>Note: This method may partially sort the input values if not configured to
  200.      * {@link #withCopy(boolean) copy} the input data.
  201.      *
  202.      * @param values Values.
  203.      * @return the median
  204.      */
  205.     public double evaluate(int[] values) {
  206.         return compute(values, 0, values.length);
  207.     }

  208.     /**
  209.      * Evaluate the median of the specified range.
  210.      *
  211.      * <p>Note: This method may partially sort the input values if not configured to
  212.      * {@link #withCopy(boolean) copy} the input data.
  213.      *
  214.      * @param values Values.
  215.      * @param from Inclusive start of the range.
  216.      * @param to Exclusive end of the range.
  217.      * @return the median
  218.      * @throws IndexOutOfBoundsException if the sub-range is out of bounds
  219.      * @since 1.2
  220.      */
  221.     public double evaluateRange(int[] values, int from, int to) {
  222.         Statistics.checkFromToIndex(from, to, values.length);
  223.         return compute(values, from, to);
  224.     }

  225.     /**
  226.      * Compute the median of the specified range.
  227.      *
  228.      * @param values Values.
  229.      * @param from Inclusive start of the range.
  230.      * @param to Exclusive end of the range.
  231.      * @return the median
  232.      */
  233.     private double compute(int[] values, int from, int to) {
  234.         final int[] x;
  235.         final int start;
  236.         final int end;
  237.         if (copy) {
  238.             x = Statistics.copy(values, from, to);
  239.             start = 0;
  240.             end = x.length;
  241.         } else {
  242.             x = values;
  243.             start = from;
  244.             end = to;
  245.         }
  246.         final int n = end - start;
  247.         // Special cases
  248.         if (n <= 2) {
  249.             switch (n) {
  250.             case 2:
  251.                 // Sorting the array matches the behaviour of Quantile for n==2
  252.                 if (x[start + 1] < x[start]) {
  253.                     final int t = x[start];
  254.                     x[start] = x[start + 1];
  255.                     x[start + 1] = t;
  256.                 }
  257.                 return Interpolation.mean(x[start], x[start + 1]);
  258.             case 1:
  259.                 return x[start];
  260.             default:
  261.                 return Double.NaN;
  262.             }
  263.         }
  264.         // Median index (including the offset)
  265.         final int m = (start + end) >>> 1;
  266.         // Odd
  267.         if ((n & 0x1) == 1) {
  268.             Selection.select(x, start, end, m);
  269.             return x[m];
  270.         }
  271.         // Even: require (m-1, m)
  272.         Selection.select(x, start, end, new int[] {m - 1, m});
  273.         return Interpolation.mean(x[m - 1], x[m]);
  274.     }
  275. }