View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.apache.commons.statistics.inference;
18  
19  import java.util.Arrays;
20  import java.util.stream.IntStream;
21  import java.util.stream.Stream;
22  import org.apache.commons.statistics.distribution.BinomialDistribution;
23  import org.junit.jupiter.api.Assertions;
24  import org.junit.jupiter.api.Test;
25  import org.junit.jupiter.params.ParameterizedTest;
26  import org.junit.jupiter.params.provider.Arguments;
27  import org.junit.jupiter.params.provider.CsvSource;
28  import org.junit.jupiter.params.provider.MethodSource;
29  
30  /**
31   * Test cases for {@link BinomialTest}.
32   */
33  class BinomialTestTest {
34  
35      @Test
36      void testInvalidOptionsThrows() {
37          final BinomialTest test = BinomialTest.withDefaults();
38          Assertions.assertThrows(NullPointerException.class, () ->
39              test.with((AlternativeHypothesis) null));
40      }
41  
42      @ParameterizedTest
43      @CsvSource({
44          "10, 5, -1",
45          "10, 5, 2",
46          "10, -1, 0.5",
47          "10, 11, 0.5",
48          "-1, 5, 0.5",
49          "1, 2, 0.5",
50      })
51      void testBinomialTestThrows(int n, int k, double p) {
52          final BinomialTest test = BinomialTest.withDefaults();
53          Assertions.assertThrows(IllegalArgumentException.class, () -> test.test(n, k, p));
54      }
55  
56      /**
57       * Test the binomial test for each alternative hypothesis and all possible k given
58       * the number of trials (n).
59       *
60       * <p>Compute the p-values using a direct summation of the probability values.
61       * See https://en.wikipedia.org/wiki/Binomial_test.
62       *
63       * <p>The summation is not as accurate as using the CDF / SF so the epsilon
64       * is changed for larger number of trials (n). When n is very large summing the
65       * individual p-values has too much error and is covered by
66       * {@link #testBinomialTestLargeN(int, double)}.
67       */
68      @ParameterizedTest
69      @CsvSource({
70          "0, 0.25, 1e-15, 0",
71          "1, 0.25, 1e-15, 0",
72          "2, 0.25, 1e-15, 0",
73          "0, 0.5, 1e-15, 0",
74          "1, 0.5, 1e-15, 0",
75          "2, 0.5, 1e-15, 0",
76          "0, 0.75, 1e-15, 0",
77          "1, 0.75, 1e-15, 0",
78          "2, 0.75, 1e-15, 0",
79          "10, 0.25, 2e-15, 0",
80          "10, 0.49, 2e-15, 0",
81          "10, 0.5, 2e-15, 0",
82          "10, 0.51, 2e-15, 0",
83          "10, 0.75, 2e-15, 0",
84          "11, 0.25, 3e-15, 0",
85          "11, 0.49, 2e-15, 0",
86          "11, 0.5, 2e-15, 0",
87          "11, 0.51, 2e-15, 0",
88          "11, 0.75, 3e-15, 0",
89          "5, 0.1, 2e-15, 0",
90          "5, 0.7, 1e-15, 0",
91          "20, 0.7, 3e-15, 0",
92      })
93      void testBinomialTest(int n, double p, double eps) {
94          final BinomialDistribution dist = BinomialDistribution.of(n, p);
95          final double[] pk = IntStream.rangeClosed(0, n).mapToDouble(dist::probability).toArray();
96  
97          // Note: TestUtils.assertProbability expects exact equality when p is 0 or 1.
98          // Set the maximum for the sum to below 1 to avoid this.
99          final double maxP = Math.nextDown(1.0);
100 
101         final BinomialTest twoSided = BinomialTest.withDefaults();
102         final BinomialTest less = BinomialTest.withDefaults().with(AlternativeHypothesis.LESS_THAN);
103         final BinomialTest greater = BinomialTest.withDefaults().with(AlternativeHypothesis.GREATER_THAN);
104 
105         IntStream.rangeClosed(0, n).forEach(k -> {
106             double expected;
107 
108             // One-sided
109             expected = Math.min(maxP, IntStream.rangeClosed(0, k).mapToDouble(i -> pk[i]).sum());
110             TestUtils.assertProbability(expected,
111                 less.test(n, k, p).getPValue(), eps,
112                 () -> "less than: k=" + k);
113 
114             expected = Math.min(maxP, IntStream.rangeClosed(k, n).mapToDouble(i -> pk[i]).sum());
115             TestUtils.assertProbability(expected,
116                 greater.test(n, k, p).getPValue(), eps,
117                 () -> "greater than: k=" + k);
118 
119             // Two-sided
120             // Find all i where Pr(X = i) <= Pr(X = k) and sum them.
121             expected = Math.min(maxP, Arrays.stream(pk).filter(x -> x <= pk[k]).sum());
122             TestUtils.assertProbability(expected,
123                     twoSided.test(n, k, p).getPValue(), eps,
124                 () -> "two-sided: k=" + k);
125         });
126     }
127 
128     /**
129      * Test the binomial test for each alternative hypothesis and all possible k given
130      * the number of trials (n).
131      *
132      * <p>Compute the p-values using a summation of the probability values.
133      * See https://en.wikipedia.org/wiki/Binomial_test.
134      *
135      * <p>The summation is performed using the CDF / SF after look-up of the appropriate
136      * value for k. The actual value must be an exact match to the expected. This test
137      * verifies the binary search to locate indices in BinomialTest is correct.
138      */
139     @ParameterizedTest
140     @CsvSource({
141         "1234, 0.3",
142         "1234, 0.55",
143         "1234, 0.87",
144         "12345, 0.3",
145         "12345, 0.55",
146         "12345, 0.87",
147         // Case where the upper and lower mode have different values
148         "10000, 0.49999",
149         "10000, 0.50001",
150     })
151     void testBinomialTestLargeN(int n, double p) {
152         final BinomialDistribution dist = BinomialDistribution.of(n, p);
153         // Use the log probability here which has a larger range than probability and
154         // can detect small p-values that are non-zero but less than Double.MIN_NORMAL.
155         // It is also faster as it avoids a call to Math.exp.
156         final double[] pk = IntStream.rangeClosed(0, n).mapToDouble(dist::logProbability).toArray();
157 
158         // Require an exact match.
159         // This ensures the BinomialTest uses the CDF and SF.
160         final double eps = 0;
161 
162         final BinomialTest twoSided = BinomialTest.withDefaults();
163         final BinomialTest less = BinomialTest.withDefaults().with(AlternativeHypothesis.LESS_THAN);
164         final BinomialTest greater = BinomialTest.withDefaults().with(AlternativeHypothesis.GREATER_THAN);
165 
166         IntStream.rangeClosed(0, n).forEach(k -> {
167             double expected;
168 
169             // One-sided.
170             expected = dist.cumulativeProbability(k);
171             TestUtils.assertProbability(expected,
172                 less.test(n, k, p).getPValue(), eps,
173                 () -> "less than: k=" + k);
174 
175             expected = dist.survivalProbability(k - 1);
176             TestUtils.assertProbability(expected,
177                 greater.test(n, k, p).getPValue(), eps,
178                 () -> "greater than: k=" + k);
179 
180             // Two-sided
181             // Find all i where Pr(X = i) <= Pr(X = k) and sum them.
182             int i = -1;
183             while (i < n && pk[i + 1] <= pk[k]) {
184                 i++;
185             }
186             int j = n + 1;
187             while (j > 0 && pk[j - 1] <= pk[k]) {
188                 j--;
189             }
190             expected = j <= i ? 1 : dist.cumulativeProbability(i) + dist.survivalProbability(j - 1);
191             TestUtils.assertProbability(expected,
192                 twoSided.test(n, k, p).getPValue(), eps,
193                 () -> "two-sided: k=" + k);
194         });
195     }
196 
197     @Test
198     void testBinomialTestPValues() {
199         final int successes = 51;
200         final int trials = 235;
201         final double probability = 1.0 / 6.0;
202 
203         Assertions.assertEquals(0.04375, BinomialTest.withDefaults()
204             .test(trials, successes, probability).getPValue(), 1e-4);
205         Assertions.assertEquals(0.02654, BinomialTest.withDefaults().with(AlternativeHypothesis.GREATER_THAN)
206             .test(trials, successes, probability).getPValue(), 1e-4);
207         Assertions.assertEquals(0.982, BinomialTest.withDefaults().with(AlternativeHypothesis.LESS_THAN)
208             .test(trials, successes, probability).getPValue(), 1e-4);
209 
210         // for special boundary conditions
211         final BinomialTest twoSided = BinomialTest.withDefaults();
212         Assertions.assertEquals(1, twoSided.test(3, 3, 1).getPValue(), 1e-4);
213         Assertions.assertEquals(1, twoSided.test(3, 3, 0.9).getPValue(), 1e-4);
214         Assertions.assertEquals(1, twoSided.test(3, 3, 0.8).getPValue(), 1e-4);
215         Assertions.assertEquals(0.559, twoSided.test(3, 3, 0.7).getPValue(), 1e-4);
216         Assertions.assertEquals(0.28, twoSided.test(3, 3, 0.6).getPValue(), 1e-4);
217         Assertions.assertEquals(0.25, twoSided.test(3, 3, 0.5).getPValue(), 1e-4);
218         Assertions.assertEquals(0.064, twoSided.test(3, 3, 0.4).getPValue(), 1e-4);
219         Assertions.assertEquals(0.027, twoSided.test(3, 3, 0.3).getPValue(), 1e-4);
220         Assertions.assertEquals(0.008, twoSided.test(3, 3, 0.2).getPValue(), 1e-4);
221         Assertions.assertEquals(0.001, twoSided.test(3, 3, 0.1).getPValue(), 1e-4);
222         Assertions.assertEquals(0, twoSided.test(3, 3, 0.0).getPValue(), 1e-4);
223 
224         Assertions.assertEquals(0, twoSided.test(3, 0, 1).getPValue(), 1e-4);
225         Assertions.assertEquals(0.001, twoSided.test(3, 0, 0.9).getPValue(), 1e-4);
226         Assertions.assertEquals(0.008, twoSided.test(3, 0, 0.8).getPValue(), 1e-4);
227         Assertions.assertEquals(0.027, twoSided.test(3, 0, 0.7).getPValue(), 1e-4);
228         Assertions.assertEquals(0.064, twoSided.test(3, 0, 0.6).getPValue(), 1e-4);
229         Assertions.assertEquals(0.25, twoSided.test(3, 0, 0.5).getPValue(), 1e-4);
230         Assertions.assertEquals(0.28, twoSided.test(3, 0, 0.4).getPValue(), 1e-4);
231         Assertions.assertEquals(0.559, twoSided.test(3, 0, 0.3).getPValue(), 1e-4);
232         Assertions.assertEquals(1, twoSided.test(3, 0, 0.2).getPValue(), 1e-4);
233         Assertions.assertEquals(1, twoSided.test(3, 0, 0.1).getPValue(), 1e-4);
234         Assertions.assertEquals(1, twoSided.test(3, 0, 0.0).getPValue(), 1e-4);
235     }
236 
237     @ParameterizedTest
238     @CsvSource({
239         // numberOfSuccesses = numberOfTrials * probability (median)
240         "10, 5, 0.5",
241         "11, 5, 0.5",
242         "11, 6, 0.5",
243         "20, 5, 0.25",
244         "21, 5, 0.25",
245         "21, 6, 0.25",
246         "20, 15, 0.75",
247         "21, 15, 0.75",
248         "21, 16, 0.75",
249     })
250     void testMath1644(int n, int k, double p) {
251         final double pval = BinomialTest.withDefaults().test(n, k, p).getPValue();
252         Assertions.assertTrue(pval <= 1, () -> "pval=" + pval);
253     }
254 
255     @ParameterizedTest
256     @MethodSource
257     void testBinomTest(int n, int k, double probability, double[] p) {
258         int i = 0;
259         for (final AlternativeHypothesis h : AlternativeHypothesis.values()) {
260             final SignificanceResult r = BinomialTest.withDefaults().with(h).test(n, k, probability);
261             Assertions.assertEquals((double) k / n, r.getStatistic(), "statistic");
262             TestUtils.assertProbability(p[i++], r.getPValue(), 1e-14, "p-value");
263         }
264     }
265 
266     static Stream<Arguments> testBinomTest() {
267         // p-values are in the AlternativeHypothesis enum order: two-sided, greater, less
268         // scipy.stats.binomtest (version 1.9.3)
269         final Stream.Builder<Arguments> builder = Stream.builder();
270         builder.add(Arguments.of(15, 3, 0.1,
271             new double[] {0.18406106910639106, 0.18406106910639106, 0.944444369992464}));
272         builder.add(Arguments.of(150, 37, 0.25,
273                 new double[] {1.0, 0.5687513546881982, 0.5062937783866548}));
274         builder.add(Arguments.of(150, 67, 0.25,
275                 new double[] {2.083753914662947e-07, 1.2964820621216238e-07, 0.9999999481384629}));
276         builder.add(Arguments.of(150, 17, 0.25,
277                 new double[] {4.229481760264341e-05, 0.9999911956737946, 2.399451075709081e-05}));
278         return builder.build();
279     }
280 }