View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.apache.commons.statistics.inference;
18  
19  import java.util.Objects;
20  import org.apache.commons.statistics.distribution.BinomialDistribution;
21  
22  /**
23   * Implements binomial test statistics.
24   *
25   * <p>Performs an exact test for the statistical significance of deviations from a
26   * theoretically expected distribution of observations into two categories.
27   *
28   * @see <a href="http://en.wikipedia.org/wiki/Binomial_test">Binomial test (Wikipedia)</a>
29   * @since 1.1
30   */
31  public final class BinomialTest {
32      /** Default instance. */
33      private static final BinomialTest DEFAULT = new BinomialTest(AlternativeHypothesis.TWO_SIDED);
34  
35      /** Alternative hypothesis. */
36      private final AlternativeHypothesis alternative;
37  
38      /**
39       * @param alternative Alternative hypothesis.
40       */
41      private BinomialTest(AlternativeHypothesis alternative) {
42          this.alternative = alternative;
43      }
44  
45      /**
46       * Return an instance using the default options.
47       *
48       * <ul>
49       * <li>{@link AlternativeHypothesis#TWO_SIDED}
50       * </ul>
51       *
52       * @return default instance
53       */
54      public static BinomialTest withDefaults() {
55          return DEFAULT;
56      }
57  
58      /**
59       * Return an instance with the configured alternative hypothesis.
60       *
61       * @param v Value.
62       * @return an instance
63       */
64      public BinomialTest with(AlternativeHypothesis v) {
65          return new BinomialTest(Objects.requireNonNull(v));
66      }
67  
68      /**
69       * Performs a binomial test about the probability of success \( \pi \).
70       *
71       * <p>The null hypothesis is \( H_0:\pi=\pi_0 \) where \( \pi_0 \) is between 0 and 1.
72       *
73       * <p>The probability of observing \( k \) successes from \( n \) trials with a given
74       * probability of success \( p \) is:
75       *
76       * <p>\[ \Pr(X=k)=\binom{n}{k}p^k(1-p)^{n-k} \]
77       *
78       * <p>The test is defined by the {@link AlternativeHypothesis}.
79       *
80       * <p>To test \( \pi &lt; \pi_0 \) (less than):
81       *
82       * <p>\[ p = \sum_{i=0}^k\Pr(X=i)=\sum_{i=0}^k\binom{n}{i}\pi_0^i(1-\pi_0)^{n-i} \]
83       *
84       * <p>To test \( \pi &gt; \pi_0 \) (greater than):
85       *
86       * <p>\[ p = \sum_{i=0}^k\Pr(X=i)=\sum_{i=k}^n\binom{n}{i}\pi_0^i(1-\pi_0)^{n-i} \]
87       *
88       * <p>To test \( \pi \ne \pi_0 \) (two-sided) requires finding all \( i \) such that
89       * \( \mathcal{I}=\{i:\Pr(X=i)\leq \Pr(X=k)\} \) and compute the sum:
90       *
91       * <p>\[ p = \sum_{i\in\mathcal{I}}\Pr(X=i)=\sum_{i\in\mathcal{I}}\binom{n}{i}\pi_0^i(1-\pi_0)^{n-i} \]
92       *
93       * <p>The two-sided p-value represents the likelihood of getting a result at least as
94       * extreme as the sample, given the provided {@code probability} of success on a
95       * single trial.
96       *
97       * <p>The test statistic is equal to the estimated proportion \( \frac{k}{n} \).
98       *
99       * @param numberOfTrials Number of trials performed.
100      * @param numberOfSuccesses Number of successes observed.
101      * @param probability Assumed probability of a single trial under the null
102      * hypothesis.
103      * @return test result
104      * @throws IllegalArgumentException if {@code numberOfTrials} or
105      * {@code numberOfSuccesses} is negative; {@code probability} is not between 0
106      * and 1; or if {@code numberOfTrials < numberOfSuccesses}
107      * @see #with(AlternativeHypothesis)
108      */
109     public SignificanceResult test(int numberOfTrials, int numberOfSuccesses, double probability) {
110         // Note: The distribution validates number of trials and probability.
111         // Here we only have to validate the number of successes.
112         Arguments.checkNonNegative(numberOfSuccesses);
113         if (numberOfTrials < numberOfSuccesses) {
114             throw new InferenceException(
115                 "must have n >= k for binomial coefficient (n, k), got n = %d, k = %d",
116                 numberOfSuccesses, numberOfTrials);
117         }
118 
119         final BinomialDistribution distribution = BinomialDistribution.of(numberOfTrials, probability);
120         final double p;
121         if (alternative == AlternativeHypothesis.GREATER_THAN) {
122             p = distribution.survivalProbability(numberOfSuccesses - 1);
123         } else if (alternative == AlternativeHypothesis.LESS_THAN) {
124             p = distribution.cumulativeProbability(numberOfSuccesses);
125         } else {
126             p = twoSidedBinomialTest(numberOfTrials, numberOfSuccesses, probability, distribution);
127         }
128         return new BaseSignificanceResult((double) numberOfSuccesses / numberOfTrials, p);
129     }
130 
131     /**
132      * Returns the <i>observed significance level</i>, or p-value, associated with a
133      * two-sided binomial test about the probability of success \( \pi \).
134      *
135      * @param n Number of trials performed.
136      * @param k Number of successes observed.
137      * @param probability Assumed probability of a single trial under the null
138      * hypothesis.
139      * @param distribution Binomial distribution.
140      * @return p-value
141      */
142     private static double twoSidedBinomialTest(int n, int k, double probability,
143                                                BinomialDistribution distribution) {
144         // Find all i where Pr(X = i) <= Pr(X = k) and sum them.
145         // Exploit the known unimodal distribution to increase the
146         // search speed. Note the search depends only on magnitude differences.
147         // The current BinomialDistribution is faster using log probability
148         // as it omits a call to Math.exp.
149 
150         // Use the mode as the point of largest probability.
151         // The lower or upper mode is important for the search below.
152         final int m1 = (int) Math.ceil((n + 1.0) * probability) - 1;
153         final int m2 = (int) Math.floor((n + 1.0) * probability);
154         if (k < m1) {
155             final double pk = distribution.logProbability(k);
156             // Lower half = cdf(k)
157             // Find upper half. As k < lower mode i should never
158             // reach the lower mode based on the probability alone.
159             // Bracket with the upper mode.
160             final int i = Searches.searchDescending(m2, n, pk, distribution::logProbability);
161             return distribution.cumulativeProbability(k) +
162                    distribution.survivalProbability(i - 1);
163         } else if (k > m2) {
164             final double pk = distribution.logProbability(k);
165             // Upper half = sf(k - 1)
166             // Find lower half. As k > upper mode i should never
167             // reach the upper mode based on the probability alone.
168             // Bracket with the lower mode.
169             final int i = Searches.searchAscending(0, m1, pk, distribution::logProbability);
170             return distribution.cumulativeProbability(i) +
171                    distribution.survivalProbability(k - 1);
172         }
173         // k == mode
174         // Edge case where the sum of probabilities will be either
175         // 1 or 1 - Pr(X = mode) where mode != k
176         final double pk = distribution.probability(k);
177         final double pm = distribution.probability(k == m1 ? m2 : m1);
178         return pm > pk ? 1 - pm : 1;
179     }
180 }