001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017
018package org.apache.commons.statistics.distribution;
019
020/**
021 * Implementation of the <a href="http://en.wikipedia.org/wiki/Hypergeometric_distribution">hypergeometric distribution</a>.
022 */
023public class HypergeometricDistribution extends AbstractDiscreteDistribution {
024    /** The number of successes in the population. */
025    private final int numberOfSuccesses;
026    /** The population size. */
027    private final int populationSize;
028    /** The sample size. */
029    private final int sampleSize;
030
031    /**
032     * Creates a new hypergeometric distribution.
033     *
034     * @param populationSize Population size.
035     * @param numberOfSuccesses Number of successes in the population.
036     * @param sampleSize Sample size.
037     * @throws IllegalArgumentException if {@code numberOfSuccesses < 0}, or
038     * {@code populationSize <= 0} or {@code numberOfSuccesses > populationSize},
039     * or {@code sampleSize > populationSize}.
040     */
041    public HypergeometricDistribution(int populationSize,
042                                      int numberOfSuccesses,
043                                      int sampleSize) {
044        if (populationSize <= 0) {
045            throw new DistributionException(DistributionException.NEGATIVE,
046                                            populationSize);
047        }
048        if (numberOfSuccesses < 0) {
049            throw new DistributionException(DistributionException.NEGATIVE,
050                                            numberOfSuccesses);
051        }
052        if (sampleSize < 0) {
053            throw new DistributionException(DistributionException.NEGATIVE,
054                                            sampleSize);
055        }
056
057        if (numberOfSuccesses > populationSize) {
058            throw new DistributionException(DistributionException.TOO_LARGE,
059                                            numberOfSuccesses, populationSize);
060        }
061        if (sampleSize > populationSize) {
062            throw new DistributionException(DistributionException.TOO_LARGE,
063                                            sampleSize, populationSize);
064        }
065
066        this.numberOfSuccesses = numberOfSuccesses;
067        this.populationSize = populationSize;
068        this.sampleSize = sampleSize;
069    }
070
071    /** {@inheritDoc} */
072    @Override
073    public double cumulativeProbability(int x) {
074        double ret;
075
076        final int[] domain = getDomain(populationSize, numberOfSuccesses, sampleSize);
077        if (x < domain[0]) {
078            ret = 0.0;
079        } else if (x >= domain[1]) {
080            ret = 1.0;
081        } else {
082            ret = innerCumulativeProbability(domain[0], x, 1);
083        }
084
085        return ret;
086    }
087
088    /**
089     * Return the domain for the given hypergeometric distribution parameters.
090     *
091     * @param n Population size.
092     * @param m Number of successes in the population.
093     * @param k Sample size.
094     * @return a two element array containing the lower and upper bounds of the
095     * hypergeometric distribution.
096     */
097    private int[] getDomain(int n, int m, int k) {
098        return new int[] {getLowerDomain(n, m, k), getUpperDomain(m, k)};
099    }
100
101    /**
102     * Return the lowest domain value for the given hypergeometric distribution
103     * parameters.
104     *
105     * @param n Population size.
106     * @param m Number of successes in the population.
107     * @param k Sample size.
108     * @return the lowest domain value of the hypergeometric distribution.
109     */
110    private int getLowerDomain(int n, int m, int k) {
111        return Math.max(0, m - (n - k));
112    }
113
114    /**
115     * Access the number of successes.
116     *
117     * @return the number of successes.
118     */
119    public int getNumberOfSuccesses() {
120        return numberOfSuccesses;
121    }
122
123    /**
124     * Access the population size.
125     *
126     * @return the population size.
127     */
128    public int getPopulationSize() {
129        return populationSize;
130    }
131
132    /**
133     * Access the sample size.
134     *
135     * @return the sample size.
136     */
137    public int getSampleSize() {
138        return sampleSize;
139    }
140
141    /**
142     * Return the highest domain value for the given hypergeometric distribution
143     * parameters.
144     *
145     * @param m Number of successes in the population.
146     * @param k Sample size.
147     * @return the highest domain value of the hypergeometric distribution.
148     */
149    private int getUpperDomain(int m, int k) {
150        return Math.min(k, m);
151    }
152
153    /** {@inheritDoc} */
154    @Override
155    public double probability(int x) {
156        final double logProbability = logProbability(x);
157        return logProbability == Double.NEGATIVE_INFINITY ? 0 : Math.exp(logProbability);
158    }
159
160    /** {@inheritDoc} */
161    @Override
162    public double logProbability(int x) {
163        double ret;
164
165        final int[] domain = getDomain(populationSize, numberOfSuccesses, sampleSize);
166        if (x < domain[0] || x > domain[1]) {
167            ret = Double.NEGATIVE_INFINITY;
168        } else {
169            final double p = (double) sampleSize / (double) populationSize;
170            final double q = (double) (populationSize - sampleSize) / (double) populationSize;
171            final double p1 = SaddlePointExpansionUtils.logBinomialProbability(x,
172                    numberOfSuccesses, p, q);
173            final double p2 =
174                    SaddlePointExpansionUtils.logBinomialProbability(sampleSize - x,
175                            populationSize - numberOfSuccesses, p, q);
176            final double p3 =
177                    SaddlePointExpansionUtils.logBinomialProbability(sampleSize, populationSize, p, q);
178            ret = p1 + p2 - p3;
179        }
180
181        return ret;
182    }
183
184    /**
185     * For this distribution, {@code X}, this method returns {@code P(X >= x)}.
186     *
187     * @param x Value at which the CDF is evaluated.
188     * @return the upper tail CDF for this distribution.
189     */
190    public double upperCumulativeProbability(int x) {
191        double ret;
192
193        final int[] domain = getDomain(populationSize, numberOfSuccesses, sampleSize);
194        if (x <= domain[0]) {
195            ret = 1.0;
196        } else if (x > domain[1]) {
197            ret = 0.0;
198        } else {
199            ret = innerCumulativeProbability(domain[1], x, -1);
200        }
201
202        return ret;
203    }
204
205    /**
206     * For this distribution, {@code X}, this method returns
207     * {@code P(x0 <= X <= x1)}.
208     * This probability is computed by summing the point probabilities for the
209     * values {@code x0, x0 + 1, x0 + 2, ..., x1}, in the order directed by
210     * {@code dx}.
211     *
212     * @param x0 Inclusive lower bound.
213     * @param x1 Inclusive upper bound.
214     * @param dx Direction of summation (1 indicates summing from x0 to x1, and
215     * 0 indicates summing from x1 to x0).
216     * @return {@code P(x0 <= X <= x1)}.
217     */
218    private double innerCumulativeProbability(int x0, int x1, int dx) {
219        int x = x0;
220        double ret = probability(x);
221        while (x != x1) {
222            x += dx;
223            ret += probability(x);
224        }
225        return ret;
226    }
227
228    /**
229     * {@inheritDoc}
230     *
231     * For population size {@code N}, number of successes {@code m}, and sample
232     * size {@code n}, the mean is {@code n * m / N}.
233     */
234    @Override
235    public double getMean() {
236        return getSampleSize() * (getNumberOfSuccesses() / (double) getPopulationSize());
237    }
238
239    /**
240     * {@inheritDoc}
241     *
242     * For population size {@code N}, number of successes {@code m}, and sample
243     * size {@code n}, the variance is
244     * {@code (n * m * (N - n) * (N - m)) / (N^2 * (N - 1))}.
245     */
246    @Override
247    public double getVariance() {
248        final double N = getPopulationSize();
249        final double m = getNumberOfSuccesses();
250        final double n = getSampleSize();
251        return (n * m * (N - n) * (N - m)) / (N * N * (N - 1));
252    }
253
254    /**
255     * {@inheritDoc}
256     *
257     * For population size {@code N}, number of successes {@code m}, and sample
258     * size {@code n}, the lower bound of the support is
259     * {@code max(0, n + m - N)}.
260     *
261     * @return lower bound of the support
262     */
263    @Override
264    public int getSupportLowerBound() {
265        return Math.max(0,
266                        getSampleSize() + getNumberOfSuccesses() - getPopulationSize());
267    }
268
269    /**
270     * {@inheritDoc}
271     *
272     * For number of successes {@code m} and sample size {@code n}, the upper
273     * bound of the support is {@code min(m, n)}.
274     *
275     * @return upper bound of the support
276     */
277    @Override
278    public int getSupportUpperBound() {
279        return Math.min(getNumberOfSuccesses(), getSampleSize());
280    }
281
282    /**
283     * {@inheritDoc}
284     *
285     * The support of this distribution is connected.
286     *
287     * @return {@code true}
288     */
289    @Override
290    public boolean isSupportConnected() {
291        return true;
292    }
293}