1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18 package org.apache.commons.statistics.descriptive;
19
20 import java.util.Objects;
21 import org.apache.commons.numbers.arrays.Selection;
22
23 /**
24 * Returns the median of the available values.
25 *
26 * <p>For values of length {@code n}, let {@code k = n / 2}:
27 * <ul>
28 * <li>The result is {@code NaN} if {@code n = 0}.
29 * <li>The result is {@code values[k]} if {@code n} is odd.
30 * <li>The result is {@code (values[k - 1] + values[k]) / 2} if {@code n} is even.
31 * </ul>
32 *
33 * <p>This implementation respects the ordering imposed by
34 * {@link Double#compare(double, double)} for {@code NaN} values. If a {@code NaN} occurs
35 * in the selected positions in the fully sorted values then the result is {@code NaN}.
36 *
37 * <p>The {@link NaNPolicy} can be used to change the behaviour on {@code NaN} values.
38 *
39 * <p>Instances of this class are immutable and thread-safe.
40 *
41 * @see #with(NaNPolicy)
42 * @see <a href="https://en.wikipedia.org/wiki/Median">Median (Wikipedia)</a>
43 * @since 1.1
44 */
45 public final class Median {
46 /** Default instance. */
47 private static final Median DEFAULT = new Median(false, NaNPolicy.INCLUDE);
48
49 /** Flag to indicate if the data should be copied. */
50 private final boolean copy;
51 /** NaN policy for floating point data. */
52 private final NaNPolicy nanPolicy;
53 /** Transformer for NaN data. */
54 private final NaNTransformer nanTransformer;
55
56 /**
57 * @param copy Flag to indicate if the data should be copied.
58 * @param nanPolicy NaN policy.
59 */
60 private Median(boolean copy, NaNPolicy nanPolicy) {
61 this.copy = copy;
62 this.nanPolicy = nanPolicy;
63 nanTransformer = NaNTransformers.createNaNTransformer(nanPolicy, copy);
64 }
65
66 /**
67 * Return a new instance with the default options.
68 *
69 * <ul>
70 * <li>{@linkplain #withCopy(boolean) Copy = false}
71 * <li>{@linkplain #with(NaNPolicy) NaN policy = include}
72 * </ul>
73 *
74 * <p>Note: The default options configure for processing in-place and including
75 * {@code NaN} values in the data. This is the most efficient mode and has the
76 * smallest memory consumption.
77 *
78 * @return the median implementation
79 * @see #withCopy(boolean)
80 * @see #with(NaNPolicy)
81 */
82 public static Median withDefaults() {
83 return DEFAULT;
84 }
85
86 /**
87 * Return an instance with the configured copy behaviour. If {@code false} then
88 * the input array will be modified by the call to evaluate the median; otherwise
89 * the computation uses a copy of the data.
90 *
91 * @param v Value.
92 * @return an instance
93 */
94 public Median withCopy(boolean v) {
95 return new Median(v, nanPolicy);
96 }
97
98 /**
99 * Return an instance with the configured {@link NaNPolicy}.
100 *
101 * <p>Note: This implementation respects the ordering imposed by
102 * {@link Double#compare(double, double)} for {@code NaN} values: {@code NaN} is
103 * considered greater than all other values, and all {@code NaN} values are equal. The
104 * {@link NaNPolicy} changes the computation of the statistic in the presence of
105 * {@code NaN} values.
106 *
107 * <ul>
108 * <li>{@link NaNPolicy#INCLUDE}: {@code NaN} values are moved to the end of the data;
109 * the size of the data <em>includes</em> the {@code NaN} values and the median will be
110 * {@code NaN} if any value used for median interpolation is {@code NaN}.
111 * <li>{@link NaNPolicy#EXCLUDE}: {@code NaN} values are moved to the end of the data;
112 * the size of the data <em>excludes</em> the {@code NaN} values and the median will
113 * never be {@code NaN} for non-zero size. If all data are {@code NaN} then the size is zero
114 * and the result is {@code NaN}.
115 * <li>{@link NaNPolicy#ERROR}: An exception is raised if the data contains {@code NaN}
116 * values.
117 * </ul>
118 *
119 * <p>Note that the result is identical for all policies if no {@code NaN} values are present.
120 *
121 * @param v Value.
122 * @return an instance
123 */
124 public Median with(NaNPolicy v) {
125 return new Median(copy, Objects.requireNonNull(v));
126 }
127
128 /**
129 * Evaluate the median.
130 *
131 * <p>Note: This method may partially sort the input values if not configured to
132 * {@link #withCopy(boolean) copy} the input data.
133 *
134 * @param values Values.
135 * @return the median
136 * @throws IllegalArgumentException if the values contain NaN and the configuration is {@link NaNPolicy#ERROR}
137 * @see #with(NaNPolicy)
138 */
139 public double evaluate(double[] values) {
140 return compute(values, 0, values.length);
141 }
142
143 /**
144 * Evaluate the median of the specified range.
145 *
146 * <p>Note: This method may partially sort the input values if not configured to
147 * {@link #withCopy(boolean) copy} the input data.
148 *
149 * @param values Values.
150 * @param from Inclusive start of the range.
151 * @param to Exclusive end of the range.
152 * @return the median
153 * @throws IllegalArgumentException if the values contain NaN and the configuration is {@link NaNPolicy#ERROR}
154 * @throws IndexOutOfBoundsException if the sub-range is out of bounds
155 * @see #with(NaNPolicy)
156 * @since 1.2
157 */
158 public double evaluateRange(double[] values, int from, int to) {
159 Statistics.checkFromToIndex(from, to, values.length);
160 return compute(values, from, to);
161 }
162
163 /**
164 * Compute the median of the specified range.
165 *
166 * @param values Values.
167 * @param from Inclusive start of the range.
168 * @param to Exclusive end of the range.
169 * @return the median
170 */
171 private double compute(double[] values, int from, int to) {
172 // Floating-point data handling
173 final int[] bounds = new int[2];
174 final double[] x = nanTransformer.apply(values, from, to, bounds);
175 final int start = bounds[0];
176 final int end = bounds[1];
177 final int n = end - start;
178 // Special cases
179 if (n <= 2) {
180 switch (n) {
181 case 2:
182 // Sorting the array matches the behaviour of Quantile for n==2
183 // Handle NaN and signed zeros
184 if (Double.compare(x[start + 1], x[start]) < 0) {
185 final double t = x[start];
186 x[start] = x[start + 1];
187 x[start + 1] = t;
188 }
189 return Interpolation.mean(x[start], x[start + 1]);
190 case 1:
191 return x[start];
192 default:
193 return Double.NaN;
194 }
195 }
196 // Median index (including the offset)
197 final int m = (start + end) >>> 1;
198 // Odd
199 if ((n & 0x1) == 1) {
200 Selection.select(x, start, end, m);
201 return x[m];
202 }
203 // Even: require (m-1, m)
204 Selection.select(x, start, end, new int[] {m - 1, m});
205 return Interpolation.mean(x[m - 1], x[m]);
206 }
207
208 /**
209 * Evaluate the median.
210 *
211 * <p>Note: This method may partially sort the input values if not configured to
212 * {@link #withCopy(boolean) copy} the input data.
213 *
214 * @param values Values.
215 * @return the median
216 */
217 public double evaluate(int[] values) {
218 return compute(values, 0, values.length);
219 }
220
221 /**
222 * Evaluate the median of the specified range.
223 *
224 * <p>Note: This method may partially sort the input values if not configured to
225 * {@link #withCopy(boolean) copy} the input data.
226 *
227 * @param values Values.
228 * @param from Inclusive start of the range.
229 * @param to Exclusive end of the range.
230 * @return the median
231 * @throws IndexOutOfBoundsException if the sub-range is out of bounds
232 * @since 1.2
233 */
234 public double evaluateRange(int[] values, int from, int to) {
235 Statistics.checkFromToIndex(from, to, values.length);
236 return compute(values, from, to);
237 }
238
239 /**
240 * Compute the median of the specified range.
241 *
242 * @param values Values.
243 * @param from Inclusive start of the range.
244 * @param to Exclusive end of the range.
245 * @return the median
246 */
247 private double compute(int[] values, int from, int to) {
248 final int[] x;
249 final int start;
250 final int end;
251 if (copy) {
252 x = Statistics.copy(values, from, to);
253 start = 0;
254 end = x.length;
255 } else {
256 x = values;
257 start = from;
258 end = to;
259 }
260 final int n = end - start;
261 // Special cases
262 if (n <= 2) {
263 switch (n) {
264 case 2:
265 // Sorting the array matches the behaviour of Quantile for n==2
266 if (x[start + 1] < x[start]) {
267 final int t = x[start];
268 x[start] = x[start + 1];
269 x[start + 1] = t;
270 }
271 return Interpolation.mean(x[start], x[start + 1]);
272 case 1:
273 return x[start];
274 default:
275 return Double.NaN;
276 }
277 }
278 // Median index (including the offset)
279 final int m = (start + end) >>> 1;
280 // Odd
281 if ((n & 0x1) == 1) {
282 Selection.select(x, start, end, m);
283 return x[m];
284 }
285 // Even: require (m-1, m)
286 Selection.select(x, start, end, new int[] {m - 1, m});
287 return Interpolation.mean(x[m - 1], x[m]);
288 }
289 }