001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.commons.math3.stat.inference; 018 019import org.apache.commons.math3.distribution.BinomialDistribution; 020import org.apache.commons.math3.exception.MathIllegalArgumentException; 021import org.apache.commons.math3.exception.MathInternalError; 022import org.apache.commons.math3.exception.NotPositiveException; 023import org.apache.commons.math3.exception.NullArgumentException; 024import org.apache.commons.math3.exception.OutOfRangeException; 025import org.apache.commons.math3.exception.util.LocalizedFormats; 026 027/** 028 * Implements binomial test statistics. 029 * <p> 030 * Exact test for the statistical significance of deviations from a 031 * theoretically expected distribution of observations into two categories. 032 * 033 * @see <a href="http://en.wikipedia.org/wiki/Binomial_test">Binomial test (Wikipedia)</a> 034 * @since 3.3 035 */ 036public class BinomialTest { 037 038 /** 039 * Returns whether the null hypothesis can be rejected with the given confidence level. 040 * <p> 041 * <strong>Preconditions</strong>: 042 * <ul> 043 * <li>Number of trials must be ≥ 0.</li> 044 * <li>Number of successes must be ≥ 0.</li> 045 * <li>Number of successes must be ≤ number of trials.</li> 046 * <li>Probability must be ≥ 0 and ≤ 1.</li> 047 * </ul> 048 * 049 * @param numberOfTrials number of trials performed 050 * @param numberOfSuccesses number of successes observed 051 * @param probability assumed probability of a single trial under the null hypothesis 052 * @param alternativeHypothesis type of hypothesis being evaluated (one- or two-sided) 053 * @param alpha significance level of the test 054 * @return true if the null hypothesis can be rejected with confidence {@code 1 - alpha} 055 * @throws NotPositiveException if {@code numberOfTrials} or {@code numberOfSuccesses} is negative 056 * @throws OutOfRangeException if {@code probability} is not between 0 and 1 057 * @throws MathIllegalArgumentException if {@code numberOfTrials} < {@code numberOfSuccesses} or 058 * if {@code alternateHypothesis} is null. 059 * @see AlternativeHypothesis 060 */ 061 public boolean binomialTest(int numberOfTrials, int numberOfSuccesses, double probability, 062 AlternativeHypothesis alternativeHypothesis, double alpha) { 063 double pValue = binomialTest(numberOfTrials, numberOfSuccesses, probability, alternativeHypothesis); 064 return pValue < alpha; 065 } 066 067 /** 068 * Returns the <i>observed significance level</i>, or 069 * <a href="http://www.cas.lancs.ac.uk/glossary_v1.1/hyptest.html#pvalue">p-value</a>, 070 * associated with a <a href="http://en.wikipedia.org/wiki/Binomial_test"> Binomial test</a>. 071 * <p> 072 * The number returned is the smallest significance level at which one can reject the null hypothesis. 073 * The form of the hypothesis depends on {@code alternativeHypothesis}.</p> 074 * <p> 075 * The p-Value represents the likelihood of getting a result at least as extreme as the sample, 076 * given the provided {@code probability} of success on a single trial. For single-sided tests, 077 * this value can be directly derived from the Binomial distribution. For the two-sided test, 078 * the implementation works as follows: we start by looking at the most extreme cases 079 * (0 success and n success where n is the number of trials from the sample) and determine their likelihood. 080 * The lower value is added to the p-Value (if both values are equal, both are added). Then we continue with 081 * the next extreme value, until we added the value for the actual observed sample.</p> 082 * <p> 083 * <strong>Preconditions</strong>: 084 * <ul> 085 * <li>Number of trials must be ≥ 0.</li> 086 * <li>Number of successes must be ≥ 0.</li> 087 * <li>Number of successes must be ≤ number of trials.</li> 088 * <li>Probability must be ≥ 0 and ≤ 1.</li> 089 * </ul></p> 090 * 091 * @param numberOfTrials number of trials performed 092 * @param numberOfSuccesses number of successes observed 093 * @param probability assumed probability of a single trial under the null hypothesis 094 * @param alternativeHypothesis type of hypothesis being evaluated (one- or two-sided) 095 * @return p-value 096 * @throws NotPositiveException if {@code numberOfTrials} or {@code numberOfSuccesses} is negative 097 * @throws OutOfRangeException if {@code probability} is not between 0 and 1 098 * @throws MathIllegalArgumentException if {@code numberOfTrials} < {@code numberOfSuccesses} or 099 * if {@code alternateHypothesis} is null. 100 * @see AlternativeHypothesis 101 */ 102 public double binomialTest(int numberOfTrials, int numberOfSuccesses, double probability, 103 AlternativeHypothesis alternativeHypothesis) { 104 if (numberOfTrials < 0) { 105 throw new NotPositiveException(numberOfTrials); 106 } 107 if (numberOfSuccesses < 0) { 108 throw new NotPositiveException(numberOfSuccesses); 109 } 110 if (probability < 0 || probability > 1) { 111 throw new OutOfRangeException(probability, 0, 1); 112 } 113 if (numberOfTrials < numberOfSuccesses) { 114 throw new MathIllegalArgumentException( 115 LocalizedFormats.BINOMIAL_INVALID_PARAMETERS_ORDER, 116 numberOfTrials, numberOfSuccesses); 117 } 118 if (alternativeHypothesis == null) { 119 throw new NullArgumentException(); 120 } 121 122 // pass a null rng to avoid unneeded overhead as we will not sample from this distribution 123 final BinomialDistribution distribution = new BinomialDistribution(null, numberOfTrials, probability); 124 switch (alternativeHypothesis) { 125 case GREATER_THAN: 126 return 1 - distribution.cumulativeProbability(numberOfSuccesses - 1); 127 case LESS_THAN: 128 return distribution.cumulativeProbability(numberOfSuccesses); 129 case TWO_SIDED: 130 int criticalValueLow = 0; 131 int criticalValueHigh = numberOfTrials; 132 double pTotal = 0; 133 134 while (true) { 135 double pLow = distribution.probability(criticalValueLow); 136 double pHigh = distribution.probability(criticalValueHigh); 137 138 if (pLow == pHigh) { 139 pTotal += 2 * pLow; 140 criticalValueLow++; 141 criticalValueHigh--; 142 } else if (pLow < pHigh) { 143 pTotal += pLow; 144 criticalValueLow++; 145 } else { 146 pTotal += pHigh; 147 criticalValueHigh--; 148 } 149 150 if (criticalValueLow > numberOfSuccesses || criticalValueHigh < numberOfSuccesses) { 151 break; 152 } 153 } 154 return pTotal; 155 default: 156 throw new MathInternalError(LocalizedFormats. OUT_OF_RANGE_SIMPLE, alternativeHypothesis, 157 AlternativeHypothesis.TWO_SIDED, AlternativeHypothesis.LESS_THAN); 158 } 159 } 160}