001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.commons.statistics.interval; 018 019import org.apache.commons.statistics.distribution.BetaDistribution; 020import org.apache.commons.statistics.distribution.NormalDistribution; 021 022/** 023 * Generate confidence intervals for a binomial proportion. 024 * 025 * <p>Note: To avoid <em>overshoot</em>, the confidence intervals are clipped to be in the 026 * {@code [0, 1]} interval in the case of the {@link #NORMAL_APPROXIMATION normal 027 * approximation} and {@link #AGRESTI_COULL Agresti-Coull} methods. 028 * 029 * @see <a 030 * href="https://en.wikipedia.org/wiki/Binomial_proportion_confidence_interval">Binomial 031 * proportion confidence interval (Wikipedia)</a> 032 * 033 * @since 1.2 034 */ 035public enum BinomialConfidenceInterval { 036 /** 037 * Implements the normal approximation method for creating a binomial proportion 038 * confidence interval. 039 * 040 * <p>This method clips the confidence interval to be in {@code [0, 1]}. 041 * 042 * @see <a 043 * href="https://en.wikipedia.org/wiki/Binomial_proportion_confidence_interval#Problems_with_using_a_normal_approximation_or_%22Wald_interval%22"> 044 * Normal approximation interval (Wikipedia)</a> 045 */ 046 NORMAL_APPROXIMATION { 047 @Override 048 Interval create(int n, int x, double alpha) { 049 final double z = NORMAL_DISTRIBUTION.inverseSurvivalProbability(alpha * 0.5); 050 final double p = (double) x / n; 051 final double distance = z * Math.sqrt(p * (1 - p) / n); 052 // This may exceed the interval [0, 1] 053 return new BaseInterval(clip(p - distance), clip(p + distance)); 054 } 055 }, 056 /** 057 * Implements the Wilson score method for creating a binomial proportion confidence 058 * interval. 059 * 060 * @see <a 061 * href="https://en.wikipedia.org/wiki/Binomial_proportion_confidence_interval#Wilson_score_interval"> 062 * Normal approximation interval (Wikipedia)</a> 063 */ 064 WILSON_SCORE { 065 @Override 066 Interval create(int n, int x, double alpha) { 067 final double z = NORMAL_DISTRIBUTION.inverseSurvivalProbability(alpha * 0.5); 068 final double z2 = z * z; 069 final double p = (double) x / n; 070 final double denom = 1 + z2 / n; 071 final double centre = (p + 0.5 * z2 / n) / denom; 072 final double distance = z * Math.sqrt(p * (1 - p) / n + z2 / (4.0 * n * n)) / denom; 073 return new BaseInterval(centre - distance, centre + distance); 074 } 075 }, 076 /** 077 * Implements the Jeffreys method for creating a binomial proportion confidence 078 * interval. 079 * 080 * <p>In order to avoid the coverage probability tending to zero when {@code p} tends 081 * towards 0 or 1, when {@code x = 0} the lower limit is set to 0, and when 082 * {@code x = n} the upper limit is set to 1. 083 * 084 * @see <a 085 * href="https://en.wikipedia.org/wiki/Binomial_proportion_confidence_interval#Jeffreys_interval"> 086 * Jeffreys interval (Wikipedia)</a> 087 */ 088 JEFFREYS { 089 @Override 090 Interval create(int n, int x, double alpha) { 091 final BetaDistribution d = BetaDistribution.of(x + 0.5, n - x + 0.5); 092 final double lower = x == 0 ? 0 : d.inverseCumulativeProbability(alpha * 0.5); 093 final double upper = x == n ? 1 : d.inverseSurvivalProbability(alpha * 0.5); 094 return new BaseInterval(lower, upper); 095 } 096 }, 097 /** 098 * Implements the Clopper-Pearson method for creating a binomial proportion confidence 099 * interval. 100 * 101 * @see <a 102 * href="https://en.wikipedia.org/wiki/Binomial_proportion_confidence_interval#Clopper%E2%80%93Pearson_interval"> 103 * Clopper-Pearson interval (Wikipedia)</a> 104 */ 105 CLOPPER_PEARSON { 106 @Override 107 Interval create(int n, int x, double alpha) { 108 double lower = 0; 109 double upper = 1; 110 // Use closed form expressions 111 if (x == 0) { 112 upper = 1 - Math.pow(alpha * 0.5, 1.0 / n); 113 } else if (x == n) { 114 lower = Math.pow(alpha * 0.5, 1.0 / n); 115 } else { 116 lower = BetaDistribution.of(x, n - x + 1).inverseCumulativeProbability(alpha * 0.5); 117 upper = BetaDistribution.of(x + 1, n - x).inverseSurvivalProbability(alpha * 0.5); 118 } 119 return new BaseInterval(lower, upper); 120 } 121 }, 122 /** 123 * Implements the Agresti-Coull method for creating a binomial proportion confidence 124 * interval. 125 * 126 * <p>This method clips the confidence interval to be in {@code [0, 1]}. 127 * 128 * @see <a 129 * href="https://en.wikipedia.org/wiki/Binomial_proportion_confidence_interval#Agresti%E2%80%93Coull_interval"> 130 * Agresti-Coull interval (Wikipedia)</a> 131 */ 132 AGRESTI_COULL { 133 @Override 134 Interval create(int n, int x, double alpha) { 135 final double z = NORMAL_DISTRIBUTION.inverseSurvivalProbability(alpha * 0.5); 136 final double zSquared = z * z; 137 final double nc = n + zSquared; 138 final double p = (x + 0.5 * zSquared) / nc; 139 final double distance = z * Math.sqrt(p * (1 - p) / nc); 140 // This may exceed the interval [0, 1] 141 return new BaseInterval(clip(p - distance), clip(p + distance)); 142 } 143 }; 144 145 /** The standard normal distribution. */ 146 static final NormalDistribution NORMAL_DISTRIBUTION = NormalDistribution.of(0, 1); 147 148 /** 149 * Create a confidence interval for the true probability of success of an unknown 150 * binomial distribution with the given observed number of trials, successes and error 151 * rate. 152 * 153 * <p>The error rate {@code alpha} is related to the confidence level that the 154 * interval contains the true probability of success as 155 * {@code alpha = 1 - confidence}, where {@code confidence} is the confidence level 156 * in {@code [0, 1]}. For example a 95% confidence level is an {@code alpha} of 0.05. 157 * 158 * @param numberOfTrials Number of trials. 159 * @param numberOfSuccesses Number of successes. 160 * @param alpha Desired error rate that the true probability of success falls <em>outside</em> 161 * the returned interval. 162 * @return Confidence interval containing the probability of success with error rate 163 * {@code alpha} 164 * @throws IllegalArgumentException if {@code numberOfTrials <= 0}, if 165 * {@code numberOfSuccesses < 0}, if {@code numberOfSuccesses > numberOfTrials}, or if 166 * {@code alpha} is not in the open interval {@code (0, 1)}. 167 */ 168 public Interval fromErrorRate(int numberOfTrials, int numberOfSuccesses, double alpha) { 169 if (numberOfTrials <= 0) { 170 throw new IllegalArgumentException("Number of trials is not strictly positive: " + numberOfTrials); 171 } 172 if (numberOfSuccesses < 0) { 173 throw new IllegalArgumentException("Number of successes is not positive: " + numberOfSuccesses); 174 } 175 if (numberOfSuccesses > numberOfTrials) { 176 throw new IllegalArgumentException( 177 String.format("Number of successes (%d) must be less than or equal to number of trials (%d)", 178 numberOfSuccesses, numberOfTrials)); 179 } 180 ArgumentUtils.checkErrorRate(alpha); 181 return create(numberOfTrials, numberOfSuccesses, alpha); 182 } 183 184 /** 185 * Create a confidence interval for the true probability of success of an unknown 186 * binomial distribution with the given observed number of trials, successes and error 187 * rate. 188 * 189 * @param n Number of trials. 190 * @param x Number of successes. 191 * @param alpha Desired error rate. 192 * @return Confidence interval 193 */ 194 abstract Interval create(int n, int x, double alpha); 195 196 /** 197 * Clip the probability to [0, 1]. 198 * 199 * @param p Probability. 200 * @return the probability in [0, 1] 201 */ 202 static double clip(double p) { 203 return Math.min(1, Math.max(0, p)); 204 } 205}