View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  
18  package org.apache.commons.statistics.descriptive;
19  
20  import java.util.Objects;
21  import org.apache.commons.numbers.arrays.Selection;
22  
23  /**
24   * Returns the median of the available values.
25   *
26   * <p>For values of length {@code n}, let {@code k = n / 2}:
27   * <ul>
28   * <li>The result is {@code NaN} if {@code n = 0}.
29   * <li>The result is {@code values[k]} if {@code n} is odd.
30   * <li>The result is {@code (values[k - 1] + values[k]) / 2} if {@code n} is even.
31   * </ul>
32   *
33   * <p>This implementation respects the ordering imposed by
34   * {@link Double#compare(double, double)} for {@code NaN} values. If a {@code NaN} occurs
35   * in the selected positions in the fully sorted values then the result is {@code NaN}.
36   *
37   * <p>The {@link NaNPolicy} can be used to change the behaviour on {@code NaN} values.
38   *
39   * <p>Instances of this class are immutable and thread-safe.
40   *
41   * @see #with(NaNPolicy)
42   * @see <a href="https://en.wikipedia.org/wiki/Median">Median (Wikipedia)</a>
43   * @since 1.1
44   */
45  public final class Median {
46      /** Default instance. */
47      private static final Median DEFAULT = new Median(false, NaNPolicy.INCLUDE);
48  
49      /** Flag to indicate if the data should be copied. */
50      private final boolean copy;
51      /** NaN policy for floating point data. */
52      private final NaNPolicy nanPolicy;
53      /** Transformer for NaN data. */
54      private final NaNTransformer nanTransformer;
55  
56      /**
57       * @param copy Flag to indicate if the data should be copied.
58       * @param nanPolicy NaN policy.
59       */
60      private Median(boolean copy, NaNPolicy nanPolicy) {
61          this.copy = copy;
62          this.nanPolicy = nanPolicy;
63          nanTransformer = NaNTransformers.createNaNTransformer(nanPolicy, copy);
64      }
65  
66      /**
67       * Return a new instance with the default options.
68       *
69       * <ul>
70       * <li>{@linkplain #withCopy(boolean) Copy = false}
71       * <li>{@linkplain #with(NaNPolicy) NaN policy = include}
72       * </ul>
73       *
74       * <p>Note: The default options configure for processing in-place and including
75       * {@code NaN} values in the data. This is the most efficient mode and has the
76       * smallest memory consumption.
77       *
78       * @return the median implementation
79       * @see #withCopy(boolean)
80       * @see #with(NaNPolicy)
81       */
82      public static Median withDefaults() {
83          return DEFAULT;
84      }
85  
86      /**
87       * Return an instance with the configured copy behaviour. If {@code false} then
88       * the input array will be modified by the call to evaluate the median; otherwise
89       * the computation uses a copy of the data.
90       *
91       * @param v Value.
92       * @return an instance
93       */
94      public Median withCopy(boolean v) {
95          return new Median(v, nanPolicy);
96      }
97  
98      /**
99       * Return an instance with the configured {@link NaNPolicy}.
100      *
101      * <p>Note: This implementation respects the ordering imposed by
102      * {@link Double#compare(double, double)} for {@code NaN} values: {@code NaN} is
103      * considered greater than all other values, and all {@code NaN} values are equal. The
104      * {@link NaNPolicy} changes the computation of the statistic in the presence of
105      * {@code NaN} values.
106      *
107      * <ul>
108      * <li>{@link NaNPolicy#INCLUDE}: {@code NaN} values are moved to the end of the data;
109      * the size of the data <em>includes</em> the {@code NaN} values and the median will be
110      * {@code NaN} if any value used for median interpolation is {@code NaN}.
111      * <li>{@link NaNPolicy#EXCLUDE}: {@code NaN} values are moved to the end of the data;
112      * the size of the data <em>excludes</em> the {@code NaN} values and the median will
113      * never be {@code NaN} for non-zero size. If all data are {@code NaN} then the size is zero
114      * and the result is {@code NaN}.
115      * <li>{@link NaNPolicy#ERROR}: An exception is raised if the data contains {@code NaN}
116      * values.
117      * </ul>
118      *
119      * <p>Note that the result is identical for all policies if no {@code NaN} values are present.
120      *
121      * @param v Value.
122      * @return an instance
123      */
124     public Median with(NaNPolicy v) {
125         return new Median(copy, Objects.requireNonNull(v));
126     }
127 
128     /**
129      * Evaluate the median.
130      *
131      * <p>Note: This method may partially sort the input values if not configured to
132      * {@link #withCopy(boolean) copy} the input data.
133      *
134      * @param values Values.
135      * @return the median
136      * @throws IllegalArgumentException if the values contain NaN and the configuration is {@link NaNPolicy#ERROR}
137      * @see #with(NaNPolicy)
138      */
139     public double evaluate(double[] values) {
140         return compute(values, 0, values.length);
141     }
142 
143     /**
144      * Evaluate the median of the specified range.
145      *
146      * <p>Note: This method may partially sort the input values if not configured to
147      * {@link #withCopy(boolean) copy} the input data.
148      *
149      * @param values Values.
150      * @param from Inclusive start of the range.
151      * @param to Exclusive end of the range.
152      * @return the median
153      * @throws IllegalArgumentException if the values contain NaN and the configuration is {@link NaNPolicy#ERROR}
154      * @throws IndexOutOfBoundsException if the sub-range is out of bounds
155      * @see #with(NaNPolicy)
156      * @since 1.2
157      */
158     public double evaluateRange(double[] values, int from, int to) {
159         Statistics.checkFromToIndex(from, to, values.length);
160         return compute(values, from, to);
161     }
162 
163     /**
164      * Compute the median of the specified range.
165      *
166      * @param values Values.
167      * @param from Inclusive start of the range.
168      * @param to Exclusive end of the range.
169      * @return the median
170      */
171     private double compute(double[] values, int from, int to) {
172         // Floating-point data handling
173         final int[] bounds = new int[2];
174         final double[] x = nanTransformer.apply(values, from, to, bounds);
175         final int start = bounds[0];
176         final int end = bounds[1];
177         final int n = end - start;
178         // Special cases
179         if (n <= 2) {
180             switch (n) {
181             case 2:
182                 // Sorting the array matches the behaviour of Quantile for n==2
183                 // Handle NaN and signed zeros
184                 if (Double.compare(x[start + 1], x[start]) < 0) {
185                     final double t = x[start];
186                     x[start] = x[start + 1];
187                     x[start + 1] = t;
188                 }
189                 return Interpolation.mean(x[start], x[start + 1]);
190             case 1:
191                 return x[start];
192             default:
193                 return Double.NaN;
194             }
195         }
196         // Median index (including the offset)
197         final int m = (start + end) >>> 1;
198         // Odd
199         if ((n & 0x1) == 1) {
200             Selection.select(x, start, end, m);
201             return x[m];
202         }
203         // Even: require (m-1, m)
204         Selection.select(x, start, end, new int[] {m - 1, m});
205         return Interpolation.mean(x[m - 1], x[m]);
206     }
207 
208     /**
209      * Evaluate the median.
210      *
211      * <p>Note: This method may partially sort the input values if not configured to
212      * {@link #withCopy(boolean) copy} the input data.
213      *
214      * @param values Values.
215      * @return the median
216      */
217     public double evaluate(int[] values) {
218         return compute(values, 0, values.length);
219     }
220 
221     /**
222      * Evaluate the median of the specified range.
223      *
224      * <p>Note: This method may partially sort the input values if not configured to
225      * {@link #withCopy(boolean) copy} the input data.
226      *
227      * @param values Values.
228      * @param from Inclusive start of the range.
229      * @param to Exclusive end of the range.
230      * @return the median
231      * @throws IndexOutOfBoundsException if the sub-range is out of bounds
232      * @since 1.2
233      */
234     public double evaluateRange(int[] values, int from, int to) {
235         Statistics.checkFromToIndex(from, to, values.length);
236         return compute(values, from, to);
237     }
238 
239     /**
240      * Compute the median of the specified range.
241      *
242      * @param values Values.
243      * @param from Inclusive start of the range.
244      * @param to Exclusive end of the range.
245      * @return the median
246      */
247     private double compute(int[] values, int from, int to) {
248         final int[] x;
249         final int start;
250         final int end;
251         if (copy) {
252             x = Statistics.copy(values, from, to);
253             start = 0;
254             end = x.length;
255         } else {
256             x = values;
257             start = from;
258             end = to;
259         }
260         final int n = end - start;
261         // Special cases
262         if (n <= 2) {
263             switch (n) {
264             case 2:
265                 // Sorting the array matches the behaviour of Quantile for n==2
266                 if (x[start + 1] < x[start]) {
267                     final int t = x[start];
268                     x[start] = x[start + 1];
269                     x[start + 1] = t;
270                 }
271                 return Interpolation.mean(x[start], x[start + 1]);
272             case 1:
273                 return x[start];
274             default:
275                 return Double.NaN;
276             }
277         }
278         // Median index (including the offset)
279         final int m = (start + end) >>> 1;
280         // Odd
281         if ((n & 0x1) == 1) {
282             Selection.select(x, start, end, m);
283             return x[m];
284         }
285         // Even: require (m-1, m)
286         Selection.select(x, start, end, new int[] {m - 1, m});
287         return Interpolation.mean(x[m - 1], x[m]);
288     }
289 }