001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017 018package org.apache.commons.statistics.descriptive; 019 020import java.util.Objects; 021import org.apache.commons.numbers.arrays.Selection; 022 023/** 024 * Returns the median of the available values. 025 * 026 * <p>For values of length {@code n}, let {@code k = n / 2}: 027 * <ul> 028 * <li>The result is {@code NaN} if {@code n = 0}. 029 * <li>The result is {@code values[k]} if {@code n} is odd. 030 * <li>The result is {@code (values[k - 1] + values[k]) / 2} if {@code n} is even. 031 * </ul> 032 * 033 * <p>This implementation respects the ordering imposed by 034 * {@link Double#compare(double, double)} for {@code NaN} values. If a {@code NaN} occurs 035 * in the selected positions in the fully sorted values then the result is {@code NaN}. 036 * 037 * <p>The {@link NaNPolicy} can be used to change the behaviour on {@code NaN} values. 038 * 039 * <p>Instances of this class are immutable and thread-safe. 040 * 041 * @see #with(NaNPolicy) 042 * @see <a href="https://en.wikipedia.org/wiki/Median">Median (Wikipedia)</a> 043 * @since 1.1 044 */ 045public final class Median { 046 /** Default instance. */ 047 private static final Median DEFAULT = new Median(false, NaNPolicy.INCLUDE); 048 049 /** Flag to indicate if the data should be copied. */ 050 private final boolean copy; 051 /** NaN policy for floating point data. */ 052 private final NaNPolicy nanPolicy; 053 /** Transformer for NaN data. */ 054 private final NaNTransformer nanTransformer; 055 056 /** 057 * @param copy Flag to indicate if the data should be copied. 058 * @param nanPolicy NaN policy. 059 */ 060 private Median(boolean copy, NaNPolicy nanPolicy) { 061 this.copy = copy; 062 this.nanPolicy = nanPolicy; 063 nanTransformer = NaNTransformers.createNaNTransformer(nanPolicy, copy); 064 } 065 066 /** 067 * Return a new instance with the default options. 068 * 069 * <ul> 070 * <li>{@linkplain #withCopy(boolean) Copy = false} 071 * <li>{@linkplain #with(NaNPolicy) NaN policy = include} 072 * </ul> 073 * 074 * <p>Note: The default options configure for processing in-place and including 075 * {@code NaN} values in the data. This is the most efficient mode and has the 076 * smallest memory consumption. 077 * 078 * @return the median implementation 079 * @see #withCopy(boolean) 080 * @see #with(NaNPolicy) 081 */ 082 public static Median withDefaults() { 083 return DEFAULT; 084 } 085 086 /** 087 * Return an instance with the configured copy behaviour. If {@code false} then 088 * the input array will be modified by the call to evaluate the median; otherwise 089 * the computation uses a copy of the data. 090 * 091 * @param v Value. 092 * @return an instance 093 */ 094 public Median withCopy(boolean v) { 095 return new Median(v, nanPolicy); 096 } 097 098 /** 099 * Return an instance with the configured {@link NaNPolicy}. 100 * 101 * <p>Note: This implementation respects the ordering imposed by 102 * {@link Double#compare(double, double)} for {@code NaN} values: {@code NaN} is 103 * considered greater than all other values, and all {@code NaN} values are equal. The 104 * {@link NaNPolicy} changes the computation of the statistic in the presence of 105 * {@code NaN} values. 106 * 107 * <ul> 108 * <li>{@link NaNPolicy#INCLUDE}: {@code NaN} values are moved to the end of the data; 109 * the size of the data <em>includes</em> the {@code NaN} values and the median will be 110 * {@code NaN} if any value used for median interpolation is {@code NaN}. 111 * <li>{@link NaNPolicy#EXCLUDE}: {@code NaN} values are moved to the end of the data; 112 * the size of the data <em>excludes</em> the {@code NaN} values and the median will 113 * never be {@code NaN} for non-zero size. If all data are {@code NaN} then the size is zero 114 * and the result is {@code NaN}. 115 * <li>{@link NaNPolicy#ERROR}: An exception is raised if the data contains {@code NaN} 116 * values. 117 * </ul> 118 * 119 * <p>Note that the result is identical for all policies if no {@code NaN} values are present. 120 * 121 * @param v Value. 122 * @return an instance 123 */ 124 public Median with(NaNPolicy v) { 125 return new Median(copy, Objects.requireNonNull(v)); 126 } 127 128 /** 129 * Evaluate the median. 130 * 131 * <p>Note: This method may partially sort the input values if not configured to 132 * {@link #withCopy(boolean) copy} the input data. 133 * 134 * @param values Values. 135 * @return the median 136 * @throws IllegalArgumentException if the values contain NaN and the configuration is {@link NaNPolicy#ERROR} 137 * @see #with(NaNPolicy) 138 */ 139 public double evaluate(double[] values) { 140 return compute(values, 0, values.length); 141 } 142 143 /** 144 * Evaluate the median of the specified range. 145 * 146 * <p>Note: This method may partially sort the input values if not configured to 147 * {@link #withCopy(boolean) copy} the input data. 148 * 149 * @param values Values. 150 * @param from Inclusive start of the range. 151 * @param to Exclusive end of the range. 152 * @return the median 153 * @throws IllegalArgumentException if the values contain NaN and the configuration is {@link NaNPolicy#ERROR} 154 * @throws IndexOutOfBoundsException if the sub-range is out of bounds 155 * @see #with(NaNPolicy) 156 * @since 1.2 157 */ 158 public double evaluateRange(double[] values, int from, int to) { 159 Statistics.checkFromToIndex(from, to, values.length); 160 return compute(values, from, to); 161 } 162 163 /** 164 * Compute the median of the specified range. 165 * 166 * @param values Values. 167 * @param from Inclusive start of the range. 168 * @param to Exclusive end of the range. 169 * @return the median 170 */ 171 private double compute(double[] values, int from, int to) { 172 // Floating-point data handling 173 final int[] bounds = new int[2]; 174 final double[] x = nanTransformer.apply(values, from, to, bounds); 175 final int start = bounds[0]; 176 final int end = bounds[1]; 177 final int n = end - start; 178 // Special cases 179 if (n <= 2) { 180 switch (n) { 181 case 2: 182 // Sorting the array matches the behaviour of Quantile for n==2 183 // Handle NaN and signed zeros 184 if (Double.compare(x[start + 1], x[start]) < 0) { 185 final double t = x[start]; 186 x[start] = x[start + 1]; 187 x[start + 1] = t; 188 } 189 return Interpolation.mean(x[start], x[start + 1]); 190 case 1: 191 return x[start]; 192 default: 193 return Double.NaN; 194 } 195 } 196 // Median index (including the offset) 197 final int m = (start + end) >>> 1; 198 // Odd 199 if ((n & 0x1) == 1) { 200 Selection.select(x, start, end, m); 201 return x[m]; 202 } 203 // Even: require (m-1, m) 204 Selection.select(x, start, end, new int[] {m - 1, m}); 205 return Interpolation.mean(x[m - 1], x[m]); 206 } 207 208 /** 209 * Evaluate the median. 210 * 211 * <p>Note: This method may partially sort the input values if not configured to 212 * {@link #withCopy(boolean) copy} the input data. 213 * 214 * @param values Values. 215 * @return the median 216 */ 217 public double evaluate(int[] values) { 218 return compute(values, 0, values.length); 219 } 220 221 /** 222 * Evaluate the median of the specified range. 223 * 224 * <p>Note: This method may partially sort the input values if not configured to 225 * {@link #withCopy(boolean) copy} the input data. 226 * 227 * @param values Values. 228 * @param from Inclusive start of the range. 229 * @param to Exclusive end of the range. 230 * @return the median 231 * @throws IndexOutOfBoundsException if the sub-range is out of bounds 232 * @since 1.2 233 */ 234 public double evaluateRange(int[] values, int from, int to) { 235 Statistics.checkFromToIndex(from, to, values.length); 236 return compute(values, from, to); 237 } 238 239 /** 240 * Compute the median of the specified range. 241 * 242 * @param values Values. 243 * @param from Inclusive start of the range. 244 * @param to Exclusive end of the range. 245 * @return the median 246 */ 247 private double compute(int[] values, int from, int to) { 248 final int[] x; 249 final int start; 250 final int end; 251 if (copy) { 252 x = Statistics.copy(values, from, to); 253 start = 0; 254 end = x.length; 255 } else { 256 x = values; 257 start = from; 258 end = to; 259 } 260 final int n = end - start; 261 // Special cases 262 if (n <= 2) { 263 switch (n) { 264 case 2: 265 // Sorting the array matches the behaviour of Quantile for n==2 266 if (x[start + 1] < x[start]) { 267 final int t = x[start]; 268 x[start] = x[start + 1]; 269 x[start + 1] = t; 270 } 271 return Interpolation.mean(x[start], x[start + 1]); 272 case 1: 273 return x[start]; 274 default: 275 return Double.NaN; 276 } 277 } 278 // Median index (including the offset) 279 final int m = (start + end) >>> 1; 280 // Odd 281 if ((n & 0x1) == 1) { 282 Selection.select(x, start, end, m); 283 return x[m]; 284 } 285 // Even: require (m-1, m) 286 Selection.select(x, start, end, new int[] {m - 1, m}); 287 return Interpolation.mean(x[m - 1], x[m]); 288 } 289}