1 /* 2 * Licensed to the Apache Software Foundation (ASF) under one or more 3 * contributor license agreements. See the NOTICE file distributed with 4 * this work for additional information regarding copyright ownership. 5 * The ASF licenses this file to You under the Apache License, Version 2.0 6 * (the "License"); you may not use this file except in compliance with 7 * the License. You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 */ 17 18 package org.apache.commons.statistics.descriptive; 19 20 import java.util.Objects; 21 import org.apache.commons.numbers.arrays.Selection; 22 23 /** 24 * Returns the median of the available values. 25 * 26 * <p>For values of length {@code n}, let {@code k = n / 2}: 27 * <ul> 28 * <li>The result is {@code NaN} if {@code n = 0}. 29 * <li>The result is {@code values[k]} if {@code n} is odd. 30 * <li>The result is {@code (values[k - 1] + values[k]) / 2} if {@code n} is even. 31 * </ul> 32 * 33 * <p>This implementation respects the ordering imposed by 34 * {@link Double#compare(double, double)} for {@code NaN} values. If a {@code NaN} occurs 35 * in the selected positions in the fully sorted values then the result is {@code NaN}. 36 * 37 * <p>The {@link NaNPolicy} can be used to change the behaviour on {@code NaN} values. 38 * 39 * <p>Instances of this class are immutable and thread-safe. 40 * 41 * @see #with(NaNPolicy) 42 * @see <a href="https://en.wikipedia.org/wiki/Median">Median (Wikipedia)</a> 43 * @since 1.1 44 */ 45 public final class Median { 46 /** Default instance. */ 47 private static final Median DEFAULT = new Median(false, NaNPolicy.INCLUDE); 48 49 /** Flag to indicate if the data should be copied. */ 50 private final boolean copy; 51 /** NaN policy for floating point data. */ 52 private final NaNPolicy nanPolicy; 53 /** Transformer for NaN data. */ 54 private final NaNTransformer nanTransformer; 55 56 /** 57 * @param copy Flag to indicate if the data should be copied. 58 * @param nanPolicy NaN policy. 59 */ 60 private Median(boolean copy, NaNPolicy nanPolicy) { 61 this.copy = copy; 62 this.nanPolicy = nanPolicy; 63 nanTransformer = NaNTransformers.createNaNTransformer(nanPolicy, copy); 64 } 65 66 /** 67 * Return a new instance with the default options. 68 * 69 * <ul> 70 * <li>{@linkplain #withCopy(boolean) Copy = false} 71 * <li>{@linkplain #with(NaNPolicy) NaN policy = include} 72 * </ul> 73 * 74 * <p>Note: The default options configure for processing in-place and including 75 * {@code NaN} values in the data. This is the most efficient mode and has the 76 * smallest memory consumption. 77 * 78 * @return the median implementation 79 * @see #withCopy(boolean) 80 * @see #with(NaNPolicy) 81 */ 82 public static Median withDefaults() { 83 return DEFAULT; 84 } 85 86 /** 87 * Return an instance with the configured copy behaviour. If {@code false} then 88 * the input array will be modified by the call to evaluate the median; otherwise 89 * the computation uses a copy of the data. 90 * 91 * @param v Value. 92 * @return an instance 93 */ 94 public Median withCopy(boolean v) { 95 return new Median(v, nanPolicy); 96 } 97 98 /** 99 * Return an instance with the configured {@link NaNPolicy}. 100 * 101 * <p>Note: This implementation respects the ordering imposed by 102 * {@link Double#compare(double, double)} for {@code NaN} values: {@code NaN} is 103 * considered greater than all other values, and all {@code NaN} values are equal. The 104 * {@link NaNPolicy} changes the computation of the statistic in the presence of 105 * {@code NaN} values. 106 * 107 * <ul> 108 * <li>{@link NaNPolicy#INCLUDE}: {@code NaN} values are moved to the end of the data; 109 * the size of the data <em>includes</em> the {@code NaN} values and the median will be 110 * {@code NaN} if any value used for median interpolation is {@code NaN}. 111 * <li>{@link NaNPolicy#EXCLUDE}: {@code NaN} values are moved to the end of the data; 112 * the size of the data <em>excludes</em> the {@code NaN} values and the median will 113 * never be {@code NaN} for non-zero size. If all data are {@code NaN} then the size is zero 114 * and the result is {@code NaN}. 115 * <li>{@link NaNPolicy#ERROR}: An exception is raised if the data contains {@code NaN} 116 * values. 117 * </ul> 118 * 119 * <p>Note that the result is identical for all policies if no {@code NaN} values are present. 120 * 121 * @param v Value. 122 * @return an instance 123 */ 124 public Median with(NaNPolicy v) { 125 return new Median(copy, Objects.requireNonNull(v)); 126 } 127 128 /** 129 * Evaluate the median. 130 * 131 * <p>Note: This method may partially sort the input values if not configured to 132 * {@link #withCopy(boolean) copy} the input data. 133 * 134 * @param values Values. 135 * @return the median 136 * @throws IllegalArgumentException if the values contain NaN and the configuration is {@link NaNPolicy#ERROR} 137 * @see #with(NaNPolicy) 138 */ 139 public double evaluate(double[] values) { 140 return compute(values, 0, values.length); 141 } 142 143 /** 144 * Evaluate the median of the specified range. 145 * 146 * <p>Note: This method may partially sort the input values if not configured to 147 * {@link #withCopy(boolean) copy} the input data. 148 * 149 * @param values Values. 150 * @param from Inclusive start of the range. 151 * @param to Exclusive end of the range. 152 * @return the median 153 * @throws IllegalArgumentException if the values contain NaN and the configuration is {@link NaNPolicy#ERROR} 154 * @throws IndexOutOfBoundsException if the sub-range is out of bounds 155 * @see #with(NaNPolicy) 156 * @since 1.2 157 */ 158 public double evaluateRange(double[] values, int from, int to) { 159 Statistics.checkFromToIndex(from, to, values.length); 160 return compute(values, from, to); 161 } 162 163 /** 164 * Compute the median of the specified range. 165 * 166 * @param values Values. 167 * @param from Inclusive start of the range. 168 * @param to Exclusive end of the range. 169 * @return the median 170 */ 171 private double compute(double[] values, int from, int to) { 172 // Floating-point data handling 173 final int[] bounds = new int[2]; 174 final double[] x = nanTransformer.apply(values, from, to, bounds); 175 final int start = bounds[0]; 176 final int end = bounds[1]; 177 final int n = end - start; 178 // Special cases 179 if (n <= 2) { 180 switch (n) { 181 case 2: 182 // Sorting the array matches the behaviour of Quantile for n==2 183 // Handle NaN and signed zeros 184 if (Double.compare(x[start + 1], x[start]) < 0) { 185 final double t = x[start]; 186 x[start] = x[start + 1]; 187 x[start + 1] = t; 188 } 189 return Interpolation.mean(x[start], x[start + 1]); 190 case 1: 191 return x[start]; 192 default: 193 return Double.NaN; 194 } 195 } 196 // Median index (including the offset) 197 final int m = (start + end) >>> 1; 198 // Odd 199 if ((n & 0x1) == 1) { 200 Selection.select(x, start, end, m); 201 return x[m]; 202 } 203 // Even: require (m-1, m) 204 Selection.select(x, start, end, new int[] {m - 1, m}); 205 return Interpolation.mean(x[m - 1], x[m]); 206 } 207 208 /** 209 * Evaluate the median. 210 * 211 * <p>Note: This method may partially sort the input values if not configured to 212 * {@link #withCopy(boolean) copy} the input data. 213 * 214 * @param values Values. 215 * @return the median 216 */ 217 public double evaluate(int[] values) { 218 return compute(values, 0, values.length); 219 } 220 221 /** 222 * Evaluate the median of the specified range. 223 * 224 * <p>Note: This method may partially sort the input values if not configured to 225 * {@link #withCopy(boolean) copy} the input data. 226 * 227 * @param values Values. 228 * @param from Inclusive start of the range. 229 * @param to Exclusive end of the range. 230 * @return the median 231 * @throws IndexOutOfBoundsException if the sub-range is out of bounds 232 * @since 1.2 233 */ 234 public double evaluateRange(int[] values, int from, int to) { 235 Statistics.checkFromToIndex(from, to, values.length); 236 return compute(values, from, to); 237 } 238 239 /** 240 * Compute the median of the specified range. 241 * 242 * @param values Values. 243 * @param from Inclusive start of the range. 244 * @param to Exclusive end of the range. 245 * @return the median 246 */ 247 private double compute(int[] values, int from, int to) { 248 final int[] x; 249 final int start; 250 final int end; 251 if (copy) { 252 x = Statistics.copy(values, from, to); 253 start = 0; 254 end = x.length; 255 } else { 256 x = values; 257 start = from; 258 end = to; 259 } 260 final int n = end - start; 261 // Special cases 262 if (n <= 2) { 263 switch (n) { 264 case 2: 265 // Sorting the array matches the behaviour of Quantile for n==2 266 if (x[start + 1] < x[start]) { 267 final int t = x[start]; 268 x[start] = x[start + 1]; 269 x[start + 1] = t; 270 } 271 return Interpolation.mean(x[start], x[start + 1]); 272 case 1: 273 return x[start]; 274 default: 275 return Double.NaN; 276 } 277 } 278 // Median index (including the offset) 279 final int m = (start + end) >>> 1; 280 // Odd 281 if ((n & 0x1) == 1) { 282 Selection.select(x, start, end, m); 283 return x[m]; 284 } 285 // Even: require (m-1, m) 286 Selection.select(x, start, end, new int[] {m - 1, m}); 287 return Interpolation.mean(x[m - 1], x[m]); 288 } 289 }