001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.commons.statistics.distribution;
018
019import org.apache.commons.numbers.combinatorics.BinomialCoefficientDouble;
020import org.apache.commons.numbers.combinatorics.LogBinomialCoefficient;
021import org.apache.commons.numbers.gamma.RegularizedBeta;
022
023/**
024 * Implementation of the <a href="http://en.wikipedia.org/wiki/Negative_binomial_distribution">Pascal distribution.</a>
025 *
026 * The Pascal distribution is a special case of the Negative Binomial distribution
027 * where the number of successes parameter is an integer.
028 *
029 * There are various ways to express the probability mass and distribution
030 * functions for the Pascal distribution. The present implementation represents
031 * the distribution of the number of failures before {@code r} successes occur.
032 * This is the convention adopted in e.g.
033 * <a href="http://mathworld.wolfram.com/NegativeBinomialDistribution.html">MathWorld</a>,
034 * but <em>not</em> in
035 * <a href="http://en.wikipedia.org/wiki/Negative_binomial_distribution">Wikipedia</a>.
036 *
037 * For a random variable {@code X} whose values are distributed according to this
038 * distribution, the probability mass function is given by<br>
039 * {@code P(X = k) = C(k + r - 1, r - 1) * p^r * (1 - p)^k,}<br>
040 * where {@code r} is the number of successes, {@code p} is the probability of
041 * success, and {@code X} is the total number of failures. {@code C(n, k)} is
042 * the binomial coefficient ({@code n} choose {@code k}). The mean and variance
043 * of {@code X} are<br>
044 * {@code E(X) = (1 - p) * r / p, var(X) = (1 - p) * r / p^2.}<br>
045 * Finally, the cumulative distribution function is given by<br>
046 * {@code P(X <= k) = I(p, r, k + 1)},
047 * where I is the regularized incomplete Beta function.
048 */
049public class PascalDistribution extends AbstractDiscreteDistribution {
050    /** The number of successes. */
051    private final int numberOfSuccesses;
052    /** The probability of success. */
053    private final double probabilityOfSuccess;
054    /** The value of {@code log(p)}, where {@code p} is the probability of success,
055     * stored for faster computation. */
056    private final double logProbabilityOfSuccess;
057    /** The value of {@code log(1-p)}, where {@code p} is the probability of success,
058     * stored for faster computation. */
059    private final double log1mProbabilityOfSuccess;
060
061    /**
062     * Create a Pascal distribution with the given number of successes and
063     * probability of success.
064     *
065     * @param r Number of successes.
066     * @param p Probability of success.
067     * @throws IllegalArgumentException if {@code r <= 0} or {@code p < 0}
068     * or {@code p > 1}.
069     */
070    public PascalDistribution(int r,
071                              double p) {
072        if (r <= 0) {
073            throw new DistributionException(DistributionException.NEGATIVE,
074                                            r);
075        }
076        if (p < 0 ||
077            p > 1) {
078            throw new DistributionException(DistributionException.INVALID_PROBABILITY, p);
079        }
080
081        numberOfSuccesses = r;
082        probabilityOfSuccess = p;
083        logProbabilityOfSuccess = Math.log(p);
084        log1mProbabilityOfSuccess = Math.log1p(-p);
085    }
086
087    /**
088     * Access the number of successes for this distribution.
089     *
090     * @return the number of successes.
091     */
092    public int getNumberOfSuccesses() {
093        return numberOfSuccesses;
094    }
095
096    /**
097     * Access the probability of success for this distribution.
098     *
099     * @return the probability of success.
100     */
101    public double getProbabilityOfSuccess() {
102        return probabilityOfSuccess;
103    }
104
105    /** {@inheritDoc} */
106    @Override
107    public double probability(int x) {
108        double ret;
109        if (x < 0) {
110            ret = 0.0;
111        } else {
112            ret = BinomialCoefficientDouble.value(x +
113                  numberOfSuccesses - 1, numberOfSuccesses - 1) *
114                  Math.pow(probabilityOfSuccess, numberOfSuccesses) *
115                  Math.pow(1.0 - probabilityOfSuccess, x);
116        }
117        return ret;
118    }
119
120    /** {@inheritDoc} */
121    @Override
122    public double logProbability(int x) {
123        double ret;
124        if (x < 0) {
125            ret = Double.NEGATIVE_INFINITY;
126        } else {
127            ret = LogBinomialCoefficient.value(x +
128                  numberOfSuccesses - 1, numberOfSuccesses - 1) +
129                  logProbabilityOfSuccess * numberOfSuccesses +
130                  log1mProbabilityOfSuccess * x;
131        }
132        return ret;
133    }
134
135    /** {@inheritDoc} */
136    @Override
137    public double cumulativeProbability(int x) {
138        double ret;
139        if (x < 0) {
140            ret = 0.0;
141        } else {
142            ret = RegularizedBeta.value(probabilityOfSuccess,
143                                        numberOfSuccesses, x + 1.0);
144        }
145        return ret;
146    }
147
148    /**
149     * {@inheritDoc}
150     *
151     * For number of successes {@code r} and probability of success {@code p},
152     * the mean is {@code r * (1 - p) / p}.
153     */
154    @Override
155    public double getMean() {
156        final double p = getProbabilityOfSuccess();
157        final double r = getNumberOfSuccesses();
158        return (r * (1 - p)) / p;
159    }
160
161    /**
162     * {@inheritDoc}
163     *
164     * For number of successes {@code r} and probability of success {@code p},
165     * the variance is {@code r * (1 - p) / p^2}.
166     */
167    @Override
168    public double getVariance() {
169        final double p = getProbabilityOfSuccess();
170        final double r = getNumberOfSuccesses();
171        return r * (1 - p) / (p * p);
172    }
173
174    /**
175     * {@inheritDoc}
176     *
177     * The lower bound of the support is always 0 no matter the parameters.
178     *
179     * @return lower bound of the support (always 0)
180     */
181    @Override
182    public int getSupportLowerBound() {
183        return 0;
184    }
185
186    /**
187     * {@inheritDoc}
188     *
189     * The upper bound of the support is always positive infinity no matter the
190     * parameters. Positive infinity is symbolized by {@code Integer.MAX_VALUE}.
191     *
192     * @return upper bound of the support (always {@code Integer.MAX_VALUE}
193     * for positive infinity)
194     */
195    @Override
196    public int getSupportUpperBound() {
197        return Integer.MAX_VALUE;
198    }
199
200    /**
201     * {@inheritDoc}
202     *
203     * The support of this distribution is connected.
204     *
205     * @return {@code true}
206     */
207    @Override
208    public boolean isSupportConnected() {
209        return true;
210    }
211}