001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017
018package org.apache.commons.statistics.descriptive;
019
020import java.util.Objects;
021import org.apache.commons.numbers.arrays.Selection;
022
023/**
024 * Returns the median of the available values.
025 *
026 * <p>For values of length {@code n}, let {@code k = n / 2}:
027 * <ul>
028 * <li>The result is {@code NaN} if {@code n = 0}.
029 * <li>The result is {@code values[k]} if {@code n} is odd.
030 * <li>The result is {@code (values[k - 1] + values[k]) / 2} if {@code n} is even.
031 * </ul>
032 *
033 * <p>This implementation respects the ordering imposed by
034 * {@link Double#compare(double, double)} for {@code NaN} values. If a {@code NaN} occurs
035 * in the selected positions in the fully sorted values then the result is {@code NaN}.
036 *
037 * <p>The {@link NaNPolicy} can be used to change the behaviour on {@code NaN} values.
038 *
039 * <p>Instances of this class are immutable and thread-safe.
040 *
041 * @see #with(NaNPolicy)
042 * @see <a href="https://en.wikipedia.org/wiki/Median">Median (Wikipedia)</a>
043 * @since 1.1
044 */
045public final class Median {
046    /** Default instance. */
047    private static final Median DEFAULT = new Median(false, NaNPolicy.INCLUDE);
048
049    /** Flag to indicate if the data should be copied. */
050    private final boolean copy;
051    /** NaN policy for floating point data. */
052    private final NaNPolicy nanPolicy;
053    /** Transformer for NaN data. */
054    private final NaNTransformer nanTransformer;
055
056    /**
057     * @param copy Flag to indicate if the data should be copied.
058     * @param nanPolicy NaN policy.
059     */
060    private Median(boolean copy, NaNPolicy nanPolicy) {
061        this.copy = copy;
062        this.nanPolicy = nanPolicy;
063        nanTransformer = NaNTransformers.createNaNTransformer(nanPolicy, copy);
064    }
065
066    /**
067     * Return a new instance with the default options.
068     *
069     * <ul>
070     * <li>{@linkplain #withCopy(boolean) Copy = false}
071     * <li>{@linkplain #with(NaNPolicy) NaN policy = include}
072     * </ul>
073     *
074     * <p>Note: The default options configure for processing in-place and including
075     * {@code NaN} values in the data. This is the most efficient mode and has the
076     * smallest memory consumption.
077     *
078     * @return the median implementation
079     * @see #withCopy(boolean)
080     * @see #with(NaNPolicy)
081     */
082    public static Median withDefaults() {
083        return DEFAULT;
084    }
085
086    /**
087     * Return an instance with the configured copy behaviour. If {@code false} then
088     * the input array will be modified by the call to evaluate the median; otherwise
089     * the computation uses a copy of the data.
090     *
091     * @param v Value.
092     * @return an instance
093     */
094    public Median withCopy(boolean v) {
095        return new Median(v, nanPolicy);
096    }
097
098    /**
099     * Return an instance with the configured {@link NaNPolicy}.
100     *
101     * <p>Note: This implementation respects the ordering imposed by
102     * {@link Double#compare(double, double)} for {@code NaN} values: {@code NaN} is
103     * considered greater than all other values, and all {@code NaN} values are equal. The
104     * {@link NaNPolicy} changes the computation of the statistic in the presence of
105     * {@code NaN} values.
106     *
107     * <ul>
108     * <li>{@link NaNPolicy#INCLUDE}: {@code NaN} values are moved to the end of the data;
109     * the size of the data <em>includes</em> the {@code NaN} values and the median will be
110     * {@code NaN} if any value used for median interpolation is {@code NaN}.
111     * <li>{@link NaNPolicy#EXCLUDE}: {@code NaN} values are moved to the end of the data;
112     * the size of the data <em>excludes</em> the {@code NaN} values and the median will
113     * never be {@code NaN} for non-zero size. If all data are {@code NaN} then the size is zero
114     * and the result is {@code NaN}.
115     * <li>{@link NaNPolicy#ERROR}: An exception is raised if the data contains {@code NaN}
116     * values.
117     * </ul>
118     *
119     * <p>Note that the result is identical for all policies if no {@code NaN} values are present.
120     *
121     * @param v Value.
122     * @return an instance
123     */
124    public Median with(NaNPolicy v) {
125        return new Median(copy, Objects.requireNonNull(v));
126    }
127
128    /**
129     * Evaluate the median.
130     *
131     * <p>Note: This method may partially sort the input values if not configured to
132     * {@link #withCopy(boolean) copy} the input data.
133     *
134     * @param values Values.
135     * @return the median
136     * @throws IllegalArgumentException if the values contain NaN and the configuration is {@link NaNPolicy#ERROR}
137     * @see #with(NaNPolicy)
138     */
139    public double evaluate(double[] values) {
140        return compute(values, 0, values.length);
141    }
142
143    /**
144     * Evaluate the median of the specified range.
145     *
146     * <p>Note: This method may partially sort the input values if not configured to
147     * {@link #withCopy(boolean) copy} the input data.
148     *
149     * @param values Values.
150     * @param from Inclusive start of the range.
151     * @param to Exclusive end of the range.
152     * @return the median
153     * @throws IllegalArgumentException if the values contain NaN and the configuration is {@link NaNPolicy#ERROR}
154     * @throws IndexOutOfBoundsException if the sub-range is out of bounds
155     * @see #with(NaNPolicy)
156     * @since 1.2
157     */
158    public double evaluateRange(double[] values, int from, int to) {
159        Statistics.checkFromToIndex(from, to, values.length);
160        return compute(values, from, to);
161    }
162
163    /**
164     * Compute the median of the specified range.
165     *
166     * @param values Values.
167     * @param from Inclusive start of the range.
168     * @param to Exclusive end of the range.
169     * @return the median
170     */
171    private double compute(double[] values, int from, int to) {
172        // Floating-point data handling
173        final int[] bounds = new int[2];
174        final double[] x = nanTransformer.apply(values, from, to, bounds);
175        final int start = bounds[0];
176        final int end = bounds[1];
177        final int n = end - start;
178        // Special cases
179        if (n <= 2) {
180            switch (n) {
181            case 2:
182                // Sorting the array matches the behaviour of Quantile for n==2
183                // Handle NaN and signed zeros
184                if (Double.compare(x[start + 1], x[start]) < 0) {
185                    final double t = x[start];
186                    x[start] = x[start + 1];
187                    x[start + 1] = t;
188                }
189                return Interpolation.mean(x[start], x[start + 1]);
190            case 1:
191                return x[start];
192            default:
193                return Double.NaN;
194            }
195        }
196        // Median index (including the offset)
197        final int m = (start + end) >>> 1;
198        // Odd
199        if ((n & 0x1) == 1) {
200            Selection.select(x, start, end, m);
201            return x[m];
202        }
203        // Even: require (m-1, m)
204        Selection.select(x, start, end, new int[] {m - 1, m});
205        return Interpolation.mean(x[m - 1], x[m]);
206    }
207
208    /**
209     * Evaluate the median.
210     *
211     * <p>Note: This method may partially sort the input values if not configured to
212     * {@link #withCopy(boolean) copy} the input data.
213     *
214     * @param values Values.
215     * @return the median
216     */
217    public double evaluate(int[] values) {
218        return compute(values, 0, values.length);
219    }
220
221    /**
222     * Evaluate the median of the specified range.
223     *
224     * <p>Note: This method may partially sort the input values if not configured to
225     * {@link #withCopy(boolean) copy} the input data.
226     *
227     * @param values Values.
228     * @param from Inclusive start of the range.
229     * @param to Exclusive end of the range.
230     * @return the median
231     * @throws IndexOutOfBoundsException if the sub-range is out of bounds
232     * @since 1.2
233     */
234    public double evaluateRange(int[] values, int from, int to) {
235        Statistics.checkFromToIndex(from, to, values.length);
236        return compute(values, from, to);
237    }
238
239    /**
240     * Compute the median of the specified range.
241     *
242     * @param values Values.
243     * @param from Inclusive start of the range.
244     * @param to Exclusive end of the range.
245     * @return the median
246     */
247    private double compute(int[] values, int from, int to) {
248        final int[] x;
249        final int start;
250        final int end;
251        if (copy) {
252            x = Statistics.copy(values, from, to);
253            start = 0;
254            end = x.length;
255        } else {
256            x = values;
257            start = from;
258            end = to;
259        }
260        final int n = end - start;
261        // Special cases
262        if (n <= 2) {
263            switch (n) {
264            case 2:
265                // Sorting the array matches the behaviour of Quantile for n==2
266                if (x[start + 1] < x[start]) {
267                    final int t = x[start];
268                    x[start] = x[start + 1];
269                    x[start + 1] = t;
270                }
271                return Interpolation.mean(x[start], x[start + 1]);
272            case 1:
273                return x[start];
274            default:
275                return Double.NaN;
276            }
277        }
278        // Median index (including the offset)
279        final int m = (start + end) >>> 1;
280        // Odd
281        if ((n & 0x1) == 1) {
282            Selection.select(x, start, end, m);
283            return x[m];
284        }
285        // Even: require (m-1, m)
286        Selection.select(x, start, end, new int[] {m - 1, m});
287        return Interpolation.mean(x[m - 1], x[m]);
288    }
289}