001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.commons.statistics.interval;
018
019import org.apache.commons.statistics.distribution.BetaDistribution;
020import org.apache.commons.statistics.distribution.NormalDistribution;
021
022/**
023 * Generate confidence intervals for a binomial proportion.
024 *
025 * <p>Note: To avoid <em>overshoot</em>, the confidence intervals are clipped to be in the
026 * {@code [0, 1]} interval in the case of the {@link #NORMAL_APPROXIMATION normal
027 * approximation} and {@link #AGRESTI_COULL Agresti-Coull} methods.
028 *
029 * @see <a
030 * href="https://en.wikipedia.org/wiki/Binomial_proportion_confidence_interval">Binomial
031 * proportion confidence interval (Wikipedia)</a>
032 *
033 * @since 1.2
034 */
035public enum BinomialConfidenceInterval {
036    /**
037     * Implements the normal approximation method for creating a binomial proportion
038     * confidence interval.
039     *
040     * <p>This method clips the confidence interval to be in {@code [0, 1]}.
041     *
042     * @see <a
043     * href="https://en.wikipedia.org/wiki/Binomial_proportion_confidence_interval#Problems_with_using_a_normal_approximation_or_%22Wald_interval%22">
044     * Normal approximation interval (Wikipedia)</a>
045     */
046    NORMAL_APPROXIMATION {
047        @Override
048        Interval create(int n, int x, double alpha) {
049            final double z = NORMAL_DISTRIBUTION.inverseSurvivalProbability(alpha * 0.5);
050            final double p = (double) x / n;
051            final double distance = z * Math.sqrt(p * (1 - p) / n);
052            // This may exceed the interval [0, 1]
053            return new BaseInterval(clip(p - distance), clip(p + distance));
054        }
055    },
056    /**
057     * Implements the Wilson score method for creating a binomial proportion confidence
058     * interval.
059     *
060     * @see <a
061     * href="https://en.wikipedia.org/wiki/Binomial_proportion_confidence_interval#Wilson_score_interval">
062     * Normal approximation interval (Wikipedia)</a>
063     */
064    WILSON_SCORE {
065        @Override
066        Interval create(int n, int x, double alpha) {
067            final double z = NORMAL_DISTRIBUTION.inverseSurvivalProbability(alpha * 0.5);
068            final double z2 = z * z;
069            final double p = (double) x / n;
070            final double denom = 1 + z2 / n;
071            final double centre = (p + 0.5 * z2 / n) / denom;
072            final double distance = z * Math.sqrt(p * (1 - p) / n + z2 / (4.0 * n * n)) / denom;
073            return new BaseInterval(centre - distance, centre + distance);
074        }
075    },
076    /**
077     * Implements the Jeffreys method for creating a binomial proportion confidence
078     * interval.
079     *
080     * <p>In order to avoid the coverage probability tending to zero when {@code p} tends
081     * towards 0 or 1, when {@code x = 0} the lower limit is set to 0, and when
082     * {@code x = n} the upper limit is set to 1.
083     *
084     * @see <a
085     * href="https://en.wikipedia.org/wiki/Binomial_proportion_confidence_interval#Jeffreys_interval">
086     * Jeffreys interval (Wikipedia)</a>
087     */
088    JEFFREYS {
089        @Override
090        Interval create(int n, int x, double alpha) {
091            final BetaDistribution d = BetaDistribution.of(x + 0.5, n - x + 0.5);
092            final double lower = x == 0 ? 0 : d.inverseCumulativeProbability(alpha * 0.5);
093            final double upper = x == n ? 1 : d.inverseSurvivalProbability(alpha * 0.5);
094            return new BaseInterval(lower, upper);
095        }
096    },
097    /**
098     * Implements the Clopper-Pearson method for creating a binomial proportion confidence
099     * interval.
100     *
101     * @see <a
102     * href="https://en.wikipedia.org/wiki/Binomial_proportion_confidence_interval#Clopper%E2%80%93Pearson_interval">
103     * Clopper-Pearson interval (Wikipedia)</a>
104     */
105    CLOPPER_PEARSON {
106        @Override
107        Interval create(int n, int x, double alpha) {
108            double lower = 0;
109            double upper = 1;
110            // Use closed form expressions
111            if (x == 0) {
112                upper = 1 - Math.pow(alpha * 0.5, 1.0 / n);
113            } else if (x == n) {
114                lower = Math.pow(alpha * 0.5, 1.0 / n);
115            } else {
116                lower = BetaDistribution.of(x, n - x + 1).inverseCumulativeProbability(alpha * 0.5);
117                upper = BetaDistribution.of(x + 1, n - x).inverseSurvivalProbability(alpha * 0.5);
118            }
119            return new BaseInterval(lower, upper);
120        }
121    },
122    /**
123     * Implements the Agresti-Coull method for creating a binomial proportion confidence
124     * interval.
125     *
126     * <p>This method clips the confidence interval to be in {@code [0, 1]}.
127     *
128     * @see <a
129     * href="https://en.wikipedia.org/wiki/Binomial_proportion_confidence_interval#Agresti%E2%80%93Coull_interval">
130     * Agresti-Coull interval (Wikipedia)</a>
131     */
132    AGRESTI_COULL {
133        @Override
134        Interval create(int n, int x, double alpha) {
135            final double z = NORMAL_DISTRIBUTION.inverseSurvivalProbability(alpha * 0.5);
136            final double zSquared = z * z;
137            final double nc = n + zSquared;
138            final double p = (x + 0.5 * zSquared) / nc;
139            final double distance = z * Math.sqrt(p * (1 - p) / nc);
140            // This may exceed the interval [0, 1]
141            return new BaseInterval(clip(p - distance), clip(p + distance));
142        }
143    };
144
145    /** The standard normal distribution. */
146    static final NormalDistribution NORMAL_DISTRIBUTION = NormalDistribution.of(0, 1);
147
148    /**
149     * Create a confidence interval for the true probability of success of an unknown
150     * binomial distribution with the given observed number of trials, successes and error
151     * rate.
152     *
153     * <p>The error rate {@code alpha} is related to the confidence level that the
154     * interval contains the true probability of success as
155     * {@code alpha = 1 - confidence}, where {@code confidence} is the confidence level
156     * in {@code [0, 1]}. For example a 95% confidence level is an {@code alpha} of 0.05.
157     *
158     * @param numberOfTrials Number of trials.
159     * @param numberOfSuccesses Number of successes.
160     * @param alpha Desired error rate that the true probability of success falls <em>outside</em>
161     * the returned interval.
162     * @return Confidence interval containing the probability of success with error rate
163     * {@code alpha}
164     * @throws IllegalArgumentException if {@code numberOfTrials <= 0}, if
165     * {@code numberOfSuccesses < 0}, if {@code numberOfSuccesses > numberOfTrials}, or if
166     * {@code alpha} is not in the open interval {@code (0, 1)}.
167     */
168    public Interval fromErrorRate(int numberOfTrials, int numberOfSuccesses, double alpha) {
169        if (numberOfTrials <= 0) {
170            throw new IllegalArgumentException("Number of trials is not strictly positive: " + numberOfTrials);
171        }
172        if (numberOfSuccesses < 0) {
173            throw new IllegalArgumentException("Number of successes is not positive: " + numberOfSuccesses);
174        }
175        if (numberOfSuccesses > numberOfTrials) {
176            throw new IllegalArgumentException(
177                String.format("Number of successes (%d) must be less than or equal to number of trials (%d)",
178                    numberOfSuccesses, numberOfTrials));
179        }
180        ArgumentUtils.checkErrorRate(alpha);
181        return create(numberOfTrials, numberOfSuccesses, alpha);
182    }
183
184    /**
185     * Create a confidence interval for the true probability of success of an unknown
186     * binomial distribution with the given observed number of trials, successes and error
187     * rate.
188     *
189     * @param n Number of trials.
190     * @param x Number of successes.
191     * @param alpha Desired error rate.
192     * @return Confidence interval
193     */
194    abstract Interval create(int n, int x, double alpha);
195
196    /**
197     * Clip the probability to [0, 1].
198     *
199     * @param p Probability.
200     * @return the probability in [0, 1]
201     */
202    static double clip(double p) {
203        return Math.min(1, Math.max(0, p));
204    }
205}