BinomialConfidenceInterval.java

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.commons.statistics.interval;

import org.apache.commons.statistics.distribution.BetaDistribution;
import org.apache.commons.statistics.distribution.NormalDistribution;

/**
 * Generate confidence intervals for a binomial proportion.
 *
 * <p>Note: To avoid <em>overshoot</em>, the confidence intervals are clipped to be in the
 * {@code [0, 1]} interval in the case of the {@link #NORMAL_APPROXIMATION normal
 * approximation} and {@link #AGRESTI_COULL Agresti-Coull} methods.
 *
 * @see <a
 * href="https://en.wikipedia.org/wiki/Binomial_proportion_confidence_interval">Binomial
 * proportion confidence interval (Wikipedia)</a>
 *
 * @since 1.2
 */
public enum BinomialConfidenceInterval {
    /**
     * Implements the normal approximation method for creating a binomial proportion
     * confidence interval.
     *
     * <p>This method clips the confidence interval to be in {@code [0, 1]}.
     *
     * @see <a
     * href="https://en.wikipedia.org/wiki/Binomial_proportion_confidence_interval#Problems_with_using_a_normal_approximation_or_%22Wald_interval%22">
     * Normal approximation interval (Wikipedia)</a>
     */
    NORMAL_APPROXIMATION {
        @Override
        Interval create(int n, int x, double alpha) {
            final double z = NORMAL_DISTRIBUTION.inverseSurvivalProbability(alpha * 0.5);
            final double p = (double) x / n;
            final double distance = z * Math.sqrt(p * (1 - p) / n);
            // This may exceed the interval [0, 1]
            return new BaseInterval(clip(p - distance), clip(p + distance));
        }
    },
    /**
     * Implements the Wilson score method for creating a binomial proportion confidence
     * interval.
     *
     * @see <a
     * href="https://en.wikipedia.org/wiki/Binomial_proportion_confidence_interval#Wilson_score_interval">
     * Normal approximation interval (Wikipedia)</a>
     */
    WILSON_SCORE {
        @Override
        Interval create(int n, int x, double alpha) {
            final double z = NORMAL_DISTRIBUTION.inverseSurvivalProbability(alpha * 0.5);
            final double z2 = z * z;
            final double p = (double) x / n;
            final double denom = 1 + z2 / n;
            final double centre = (p + 0.5 * z2 / n) / denom;
            final double distance = z * Math.sqrt(p * (1 - p) / n + z2 / (4.0 * n * n)) / denom;
            return new BaseInterval(centre - distance, centre + distance);
        }
    },
    /**
     * Implements the Jeffreys method for creating a binomial proportion confidence
     * interval.
     *
     * <p>In order to avoid the coverage probability tending to zero when {@code p} tends
     * towards 0 or 1, when {@code x = 0} the lower limit is set to 0, and when
     * {@code x = n} the upper limit is set to 1.
     *
     * @see <a
     * href="https://en.wikipedia.org/wiki/Binomial_proportion_confidence_interval#Jeffreys_interval">
     * Jeffreys interval (Wikipedia)</a>
     */
    JEFFREYS {
        @Override
        Interval create(int n, int x, double alpha) {
            final BetaDistribution d = BetaDistribution.of(x + 0.5, n - x + 0.5);
            final double lower = x == 0 ? 0 : d.inverseCumulativeProbability(alpha * 0.5);
            final double upper = x == n ? 1 : d.inverseSurvivalProbability(alpha * 0.5);
            return new BaseInterval(lower, upper);
        }
    },
    /**
     * Implements the Clopper-Pearson method for creating a binomial proportion confidence
     * interval.
     *
     * @see <a
     * href="https://en.wikipedia.org/wiki/Binomial_proportion_confidence_interval#Clopper%E2%80%93Pearson_interval">
     * Clopper-Pearson interval (Wikipedia)</a>
     */
    CLOPPER_PEARSON {
        @Override
        Interval create(int n, int x, double alpha) {
            double lower = 0;
            double upper = 1;
            // Use closed form expressions
            if (x == 0) {
                upper = 1 - Math.pow(alpha * 0.5, 1.0 / n);
            } else if (x == n) {
                lower = Math.pow(alpha * 0.5, 1.0 / n);
            } else {
                lower = BetaDistribution.of(x, n - x + 1).inverseCumulativeProbability(alpha * 0.5);
                upper = BetaDistribution.of(x + 1, n - x).inverseSurvivalProbability(alpha * 0.5);
            }
            return new BaseInterval(lower, upper);
        }
    },
    /**
     * Implements the Agresti-Coull method for creating a binomial proportion confidence
     * interval.
     *
     * <p>This method clips the confidence interval to be in {@code [0, 1]}.
     *
     * @see <a
     * href="https://en.wikipedia.org/wiki/Binomial_proportion_confidence_interval#Agresti%E2%80%93Coull_interval">
     * Agresti-Coull interval (Wikipedia)</a>
     */
    AGRESTI_COULL {
        @Override
        Interval create(int n, int x, double alpha) {
            final double z = NORMAL_DISTRIBUTION.inverseSurvivalProbability(alpha * 0.5);
            final double zSquared = z * z;
            final double nc = n + zSquared;
            final double p = (x + 0.5 * zSquared) / nc;
            final double distance = z * Math.sqrt(p * (1 - p) / nc);
            // This may exceed the interval [0, 1]
            return new BaseInterval(clip(p - distance), clip(p + distance));
        }
    };

    /** The standard normal distribution. */
    static final NormalDistribution NORMAL_DISTRIBUTION = NormalDistribution.of(0, 1);

    /**
     * Create a confidence interval for the true probability of success of an unknown
     * binomial distribution with the given observed number of trials, successes and error
     * rate.
     *
     * <p>The error rate {@code alpha} is related to the confidence level that the
     * interval contains the true probability of success as
     * {@code alpha = 1 - confidence}, where {@code confidence} is the confidence level
     * in {@code [0, 1]}. For example a 95% confidence level is an {@code alpha} of 0.05.
     *
     * @param numberOfTrials Number of trials.
     * @param numberOfSuccesses Number of successes.
     * @param alpha Desired error rate that the true probability of success falls <em>outside</em>
     * the returned interval.
     * @return Confidence interval containing the probability of success with error rate
     * {@code alpha}
     * @throws IllegalArgumentException if {@code numberOfTrials <= 0}, if
     * {@code numberOfSuccesses < 0}, if {@code numberOfSuccesses > numberOfTrials}, or if
     * {@code alpha} is not in the open interval {@code (0, 1)}.
     */
    public Interval fromErrorRate(int numberOfTrials, int numberOfSuccesses, double alpha) {
        if (numberOfTrials <= 0) {
            throw new IllegalArgumentException("Number of trials is not strictly positive: " + numberOfTrials);
        }
        if (numberOfSuccesses < 0) {
            throw new IllegalArgumentException("Number of successes is not positive: " + numberOfSuccesses);
        }
        if (numberOfSuccesses > numberOfTrials) {
            throw new IllegalArgumentException(
                String.format("Number of successes (%d) must be less than or equal to number of trials (%d)",
                    numberOfSuccesses, numberOfTrials));
        }
        ArgumentUtils.checkErrorRate(alpha);
        return create(numberOfTrials, numberOfSuccesses, alpha);
    }

    /**
     * Create a confidence interval for the true probability of success of an unknown
     * binomial distribution with the given observed number of trials, successes and error
     * rate.
     *
     * @param n Number of trials.
     * @param x Number of successes.
     * @param alpha Desired error rate.
     * @return Confidence interval
     */
    abstract Interval create(int n, int x, double alpha);

    /**
     * Clip the probability to [0, 1].
     *
     * @param p Probability.
     * @return the probability in [0, 1]
     */
    static double clip(double p) {
        return Math.min(1, Math.max(0, p));
    }
}