View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.apache.commons.math4.legacy.stat.inference;
18  
19  import org.apache.commons.statistics.distribution.NormalDistribution;
20  import org.apache.commons.math4.legacy.exception.ConvergenceException;
21  import org.apache.commons.math4.legacy.exception.DimensionMismatchException;
22  import org.apache.commons.math4.legacy.exception.MaxCountExceededException;
23  import org.apache.commons.math4.legacy.exception.NoDataException;
24  import org.apache.commons.math4.legacy.exception.NullArgumentException;
25  import org.apache.commons.math4.legacy.exception.NumberIsTooLargeException;
26  import org.apache.commons.math4.legacy.stat.ranking.NaNStrategy;
27  import org.apache.commons.math4.legacy.stat.ranking.NaturalRanking;
28  import org.apache.commons.math4.legacy.stat.ranking.TiesStrategy;
29  import org.apache.commons.math4.core.jdkmath.JdkMath;
30  
31  /**
32   * An implementation of the Wilcoxon signed-rank test.
33   *
34   */
35  public class WilcoxonSignedRankTest {
36  
37      /** Ranking algorithm. */
38      private NaturalRanking naturalRanking;
39  
40      /**
41       * Create a test instance where NaN's are left in place and ties get
42       * the average of applicable ranks. Use this unless you are very sure
43       * of what you are doing.
44       */
45      public WilcoxonSignedRankTest() {
46          naturalRanking = new NaturalRanking(NaNStrategy.FIXED,
47                  TiesStrategy.AVERAGE);
48      }
49  
50      /**
51       * Create a test instance using the given strategies for NaN's and ties.
52       * Only use this if you are sure of what you are doing.
53       *
54       * @param nanStrategy
55       *            specifies the strategy that should be used for Double.NaN's
56       * @param tiesStrategy
57       *            specifies the strategy that should be used for ties
58       */
59      public WilcoxonSignedRankTest(final NaNStrategy nanStrategy,
60                                    final TiesStrategy tiesStrategy) {
61          naturalRanking = new NaturalRanking(nanStrategy, tiesStrategy);
62      }
63  
64      /**
65       * Ensures that the provided arrays fulfills the assumptions.
66       *
67       * @param x first sample
68       * @param y second sample
69       * @throws NullArgumentException if {@code x} or {@code y} are {@code null}.
70       * @throws NoDataException if {@code x} or {@code y} are zero-length.
71       * @throws DimensionMismatchException if {@code x} and {@code y} do not
72       * have the same length.
73       */
74      private void ensureDataConformance(final double[] x, final double[] y)
75          throws NullArgumentException, NoDataException, DimensionMismatchException {
76  
77          if (x == null ||
78              y == null) {
79                  throw new NullArgumentException();
80          }
81          if (x.length == 0 ||
82              y.length == 0) {
83              throw new NoDataException();
84          }
85          if (y.length != x.length) {
86              throw new DimensionMismatchException(y.length, x.length);
87          }
88      }
89  
90      /**
91       * Calculates y[i] - x[i] for all i.
92       *
93       * @param x first sample
94       * @param y second sample
95       * @return z = y - x
96       */
97      private double[] calculateDifferences(final double[] x, final double[] y) {
98  
99          final double[] z = new double[x.length];
100 
101         for (int i = 0; i < x.length; ++i) {
102             z[i] = y[i] - x[i];
103         }
104 
105         return z;
106     }
107 
108     /**
109      * Calculates |z[i]| for all i.
110      *
111      * @param z sample
112      * @return |z|
113      * @throws NullArgumentException if {@code z} is {@code null}
114      * @throws NoDataException if {@code z} is zero-length.
115      */
116     private double[] calculateAbsoluteDifferences(final double[] z)
117         throws NullArgumentException, NoDataException {
118 
119         if (z == null) {
120             throw new NullArgumentException();
121         }
122 
123         if (z.length == 0) {
124             throw new NoDataException();
125         }
126 
127         final double[] zAbs = new double[z.length];
128 
129         for (int i = 0; i < z.length; ++i) {
130             zAbs[i] = JdkMath.abs(z[i]);
131         }
132 
133         return zAbs;
134     }
135 
136     /**
137      * Computes the <a
138      * href="http://en.wikipedia.org/wiki/Wilcoxon_signed-rank_test">
139      * Wilcoxon signed ranked statistic</a> comparing mean for two related
140      * samples or repeated measurements on a single sample.
141      * <p>
142      * This statistic can be used to perform a Wilcoxon signed ranked test
143      * evaluating the null hypothesis that the two related samples or repeated
144      * measurements on a single sample has equal mean.
145      * </p>
146      * <p>
147      * Let X<sub>i</sub> denote the i'th individual of the first sample and
148      * Y<sub>i</sub> the related i'th individual in the second sample. Let
149      * Z<sub>i</sub> = Y<sub>i</sub> - X<sub>i</sub>.
150      * </p>
151      * <p>
152      * <strong>Preconditions</strong>:
153      * <ul>
154      * <li>The differences Z<sub>i</sub> must be independent.</li>
155      * <li>Each Z<sub>i</sub> comes from a continuous population (they must be
156      * identical) and is symmetric about a common median.</li>
157      * <li>The values that X<sub>i</sub> and Y<sub>i</sub> represent are
158      * ordered, so the comparisons greater than, less than, and equal to are
159      * meaningful.</li>
160      * </ul>
161      *
162      * @param x the first sample
163      * @param y the second sample
164      * @return wilcoxonSignedRank statistic (the larger of W+ and W-)
165      * @throws NullArgumentException if {@code x} or {@code y} are {@code null}.
166      * @throws NoDataException if {@code x} or {@code y} are zero-length.
167      * @throws DimensionMismatchException if {@code x} and {@code y} do not
168      * have the same length.
169      */
170     public double wilcoxonSignedRank(final double[] x, final double[] y)
171         throws NullArgumentException, NoDataException, DimensionMismatchException {
172 
173         ensureDataConformance(x, y);
174 
175         // throws IllegalArgumentException if x and y are not correctly
176         // specified
177         final double[] z = calculateDifferences(x, y);
178         final double[] zAbs = calculateAbsoluteDifferences(z);
179 
180         final double[] ranks = naturalRanking.rank(zAbs);
181 
182         double wPlus = 0;
183 
184         for (int i = 0; i < z.length; ++i) {
185             if (z[i] > 0) {
186                 wPlus += ranks[i];
187             }
188         }
189 
190         final int n = x.length;
191         final double wMinus = (((double) (n * (n + 1))) / 2.0) - wPlus;
192 
193         return JdkMath.max(wPlus, wMinus);
194     }
195 
196     /**
197      * Algorithm inspired by.
198      * http://www.fon.hum.uva.nl/Service/Statistics/Signed_Rank_Algorihms.html#C
199      * by Rob van Son, Institute of Phonetic Sciences & IFOTT,
200      * University of Amsterdam
201      *
202      * @param wMax largest Wilcoxon signed rank value
203      * @param n number of subjects (corresponding to x.length)
204      * @return two-sided exact p-value
205      */
206     private double calculateExactPValue(final double wMax, final int n) {
207 
208         // Total number of outcomes (equal to 2^N but a lot faster)
209         final int m = 1 << n;
210 
211         int largerRankSums = 0;
212 
213         for (int i = 0; i < m; ++i) {
214             int rankSum = 0;
215 
216             // Generate all possible rank sums
217             for (int j = 0; j < n; ++j) {
218 
219                 // (i >> j) & 1 extract i's j-th bit from the right
220                 if (((i >> j) & 1) == 1) {
221                     rankSum += j + 1;
222                 }
223             }
224 
225             if (rankSum >= wMax) {
226                 ++largerRankSums;
227             }
228         }
229 
230         /*
231          * largerRankSums / m gives the one-sided p-value, so it's multiplied
232          * with 2 to get the two-sided p-value
233          */
234         return 2 * ((double) largerRankSums) / ((double) m);
235     }
236 
237     /**
238      * @param wMin smallest Wilcoxon signed rank value
239      * @param n number of subjects (corresponding to x.length)
240      * @return two-sided asymptotic p-value
241      */
242     private double calculateAsymptoticPValue(final double wMin, final int n) {
243 
244         final double es = (double) (n * (n + 1)) / 4.0;
245 
246         /* Same as (but saves computations):
247          * final double VarW = ((double) (N * (N + 1) * (2*N + 1))) / 24;
248          */
249         final double varS = es * ((double) (2 * n + 1) / 6.0);
250 
251         // - 0.5 is a continuity correction
252         final double z = (wMin - es - 0.5) / JdkMath.sqrt(varS);
253 
254         // No try-catch or advertised exception because args are valid
255         // pass a null rng to avoid unneeded overhead as we will not sample from this distribution
256         final NormalDistribution standardNormal = NormalDistribution.of(0, 1);
257 
258         return 2*standardNormal.cumulativeProbability(z);
259     }
260 
261     /**
262      * Returns the <i>observed significance level</i>, or <a href=
263      * "http://www.cas.lancs.ac.uk/glossary_v1.1/hyptest.html#pvalue">
264      * p-value</a>, associated with a <a
265      * href="http://en.wikipedia.org/wiki/Wilcoxon_signed-rank_test">
266      * Wilcoxon signed ranked statistic</a> comparing mean for two related
267      * samples or repeated measurements on a single sample.
268      * <p>
269      * Let X<sub>i</sub> denote the i'th individual of the first sample and
270      * Y<sub>i</sub> the related i'th individual in the second sample. Let
271      * Z<sub>i</sub> = Y<sub>i</sub> - X<sub>i</sub>.
272      * </p>
273      * <p>
274      * <strong>Preconditions</strong>:
275      * <ul>
276      * <li>The differences Z<sub>i</sub> must be independent.</li>
277      * <li>Each Z<sub>i</sub> comes from a continuous population (they must be
278      * identical) and is symmetric about a common median.</li>
279      * <li>The values that X<sub>i</sub> and Y<sub>i</sub> represent are
280      * ordered, so the comparisons greater than, less than, and equal to are
281      * meaningful.</li>
282      * </ul>
283      *
284      * @param x the first sample
285      * @param y the second sample
286      * @param exactPValue
287      *            if the exact p-value is wanted (only works for x.length &gt;= 30,
288      *            if true and x.length &lt; 30, this is ignored because
289      *            calculations may take too long)
290      * @return p-value
291      * @throws NullArgumentException if {@code x} or {@code y} are {@code null}.
292      * @throws NoDataException if {@code x} or {@code y} are zero-length.
293      * @throws DimensionMismatchException if {@code x} and {@code y} do not
294      * have the same length.
295      * @throws NumberIsTooLargeException if {@code exactPValue} is {@code true}
296      * and {@code x.length} &gt; 30
297      * @throws ConvergenceException if the p-value can not be computed due to
298      * a convergence error
299      * @throws MaxCountExceededException if the maximum number of iterations
300      * is exceeded
301      */
302     public double wilcoxonSignedRankTest(final double[] x, final double[] y,
303                                          final boolean exactPValue)
304         throws NullArgumentException, NoDataException, DimensionMismatchException,
305         NumberIsTooLargeException, ConvergenceException, MaxCountExceededException {
306 
307         ensureDataConformance(x, y);
308 
309         final int n = x.length;
310         final double wMax = wilcoxonSignedRank(x, y);
311 
312         if (exactPValue && n > 30) {
313             throw new NumberIsTooLargeException(n, 30, true);
314         }
315 
316         if (exactPValue) {
317             return calculateExactPValue(wMax, n);
318         } else {
319             final double wMin = ( (double)(n*(n+1)) / 2.0 ) - wMax;
320             return calculateAsymptoticPValue(wMin, n);
321         }
322     }
323 }