1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17 package org.apache.commons.math4.legacy.stat.inference;
18
19 import org.apache.commons.statistics.distribution.BinomialDistribution;
20 import org.apache.commons.math4.legacy.exception.MathIllegalArgumentException;
21 import org.apache.commons.math4.legacy.exception.MathInternalError;
22 import org.apache.commons.math4.legacy.exception.NotPositiveException;
23 import org.apache.commons.math4.legacy.exception.NullArgumentException;
24 import org.apache.commons.math4.legacy.exception.OutOfRangeException;
25 import org.apache.commons.math4.legacy.exception.util.LocalizedFormats;
26
27 /**
28 * Implements binomial test statistics.
29 * <p>
30 * Exact test for the statistical significance of deviations from a
31 * theoretically expected distribution of observations into two categories.
32 *
33 * @see <a href="http://en.wikipedia.org/wiki/Binomial_test">Binomial test (Wikipedia)</a>
34 * @since 3.3
35 */
36 public class BinomialTest {
37
38 /**
39 * Returns whether the null hypothesis can be rejected with the given confidence level.
40 * <p>
41 * <strong>Preconditions</strong>:
42 * <ul>
43 * <li>Number of trials must be ≥ 0.</li>
44 * <li>Number of successes must be ≥ 0.</li>
45 * <li>Number of successes must be ≤ number of trials.</li>
46 * <li>Probability must be ≥ 0 and ≤ 1.</li>
47 * </ul>
48 *
49 * @param numberOfTrials number of trials performed
50 * @param numberOfSuccesses number of successes observed
51 * @param probability assumed probability of a single trial under the null hypothesis
52 * @param alternativeHypothesis type of hypothesis being evaluated (one- or two-sided)
53 * @param alpha significance level of the test
54 * @return true if the null hypothesis can be rejected with confidence {@code 1 - alpha}
55 * @throws NotPositiveException if {@code numberOfTrials} or {@code numberOfSuccesses} is negative
56 * @throws OutOfRangeException if {@code probability} is not between 0 and 1
57 * @throws MathIllegalArgumentException if {@code numberOfTrials} < {@code numberOfSuccesses} or
58 * if {@code alternateHypothesis} is null.
59 * @see AlternativeHypothesis
60 */
61 public boolean binomialTest(int numberOfTrials, int numberOfSuccesses, double probability,
62 AlternativeHypothesis alternativeHypothesis, double alpha) {
63 double pValue = binomialTest(numberOfTrials, numberOfSuccesses, probability, alternativeHypothesis);
64 return pValue < alpha;
65 }
66
67 /**
68 * Returns the <i>observed significance level</i>, or
69 * <a href="http://www.cas.lancs.ac.uk/glossary_v1.1/hyptest.html#pvalue">p-value</a>,
70 * associated with a <a href="http://en.wikipedia.org/wiki/Binomial_test"> Binomial test</a>.
71 * <p>
72 * The number returned is the smallest significance level at which one can reject the null hypothesis.
73 * The form of the hypothesis depends on {@code alternativeHypothesis}.</p>
74 * <p>
75 * The p-Value represents the likelihood of getting a result at least as extreme as the sample,
76 * given the provided {@code probability} of success on a single trial. For single-sided tests,
77 * this value can be directly derived from the Binomial distribution. For the two-sided test,
78 * the implementation works as follows: we start by looking at the most extreme cases
79 * (0 success and n success where n is the number of trials from the sample) and determine their likelihood.
80 * The lower value is added to the p-Value (if both values are equal, both are added). Then we continue with
81 * the next extreme value, until we added the value for the actual observed sample.</p>
82 * <p>
83 * <strong>Preconditions</strong>:
84 * <ul>
85 * <li>Number of trials must be ≥ 0.</li>
86 * <li>Number of successes must be ≥ 0.</li>
87 * <li>Number of successes must be ≤ number of trials.</li>
88 * <li>Probability must be ≥ 0 and ≤ 1.</li>
89 * </ul>
90 *
91 * @param numberOfTrials number of trials performed
92 * @param numberOfSuccesses number of successes observed
93 * @param probability assumed probability of a single trial under the null hypothesis
94 * @param alternativeHypothesis type of hypothesis being evaluated (one- or two-sided)
95 * @return p-value
96 * @throws NotPositiveException if {@code numberOfTrials} or {@code numberOfSuccesses} is negative
97 * @throws OutOfRangeException if {@code probability} is not between 0 and 1
98 * @throws MathIllegalArgumentException if {@code numberOfTrials} < {@code numberOfSuccesses} or
99 * if {@code alternateHypothesis} is null.
100 * @see AlternativeHypothesis
101 */
102 public double binomialTest(int numberOfTrials, int numberOfSuccesses, double probability,
103 AlternativeHypothesis alternativeHypothesis) {
104 if (numberOfTrials < 0) {
105 throw new NotPositiveException(numberOfTrials);
106 }
107 if (numberOfSuccesses < 0) {
108 throw new NotPositiveException(numberOfSuccesses);
109 }
110 if (probability < 0 || probability > 1) {
111 throw new OutOfRangeException(probability, 0, 1);
112 }
113 if (numberOfTrials < numberOfSuccesses) {
114 throw new MathIllegalArgumentException(
115 LocalizedFormats.BINOMIAL_INVALID_PARAMETERS_ORDER,
116 numberOfTrials, numberOfSuccesses);
117 }
118 if (alternativeHypothesis == null) {
119 throw new NullArgumentException();
120 }
121
122 final BinomialDistribution distribution = BinomialDistribution.of(numberOfTrials, probability);
123 switch (alternativeHypothesis) {
124 case GREATER_THAN:
125 return distribution.survivalProbability(numberOfSuccesses - 1);
126 case LESS_THAN:
127 return distribution.cumulativeProbability(numberOfSuccesses);
128 case TWO_SIDED:
129 int criticalValueLow = 0;
130 int criticalValueHigh = numberOfTrials;
131 double pTotal = 0;
132
133 while (true) {
134 double pLow = distribution.probability(criticalValueLow);
135 double pHigh = distribution.probability(criticalValueHigh);
136
137 if (pLow == pHigh) {
138 if (criticalValueLow == criticalValueHigh) {
139 pTotal += pLow;
140 } else {
141 pTotal += 2 * Math.nextDown(pLow);
142 }
143 criticalValueLow++;
144 criticalValueHigh--;
145 } else if (pLow < pHigh) {
146 pTotal += pLow;
147 criticalValueLow++;
148 } else {
149 pTotal += pHigh;
150 criticalValueHigh--;
151 }
152
153 if (criticalValueLow > numberOfSuccesses || criticalValueHigh < numberOfSuccesses) {
154 break;
155 }
156 }
157 return pTotal;
158 default:
159 throw new MathInternalError(LocalizedFormats. OUT_OF_RANGE_SIMPLE, alternativeHypothesis,
160 AlternativeHypothesis.TWO_SIDED, AlternativeHypothesis.LESS_THAN);
161 }
162 }
163 }