001    /*
002     * Licensed to the Apache Software Foundation (ASF) under one or more
003     * contributor license agreements.  See the NOTICE file distributed with
004     * this work for additional information regarding copyright ownership.
005     * The ASF licenses this file to You under the Apache License, Version 2.0
006     * (the "License"); you may not use this file except in compliance with
007     * the License.  You may obtain a copy of the License at
008     *
009     *      http://www.apache.org/licenses/LICENSE-2.0
010     *
011     * Unless required by applicable law or agreed to in writing, software
012     * distributed under the License is distributed on an "AS IS" BASIS,
013     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014     * See the License for the specific language governing permissions and
015     * limitations under the License.
016     */
017    
018    package org.apache.commons.math.distribution;
019    
020    import java.io.Serializable;
021    
022    import org.apache.commons.math.exception.NotPositiveException;
023    import org.apache.commons.math.exception.NotStrictlyPositiveException;
024    import org.apache.commons.math.exception.NumberIsTooLargeException;
025    import org.apache.commons.math.exception.util.LocalizedFormats;
026    import org.apache.commons.math.util.ArithmeticUtils;
027    import org.apache.commons.math.util.FastMath;
028    
029    /**
030     * The default implementation of {@link HypergeometricDistribution}.
031     *
032     * @version $Id: HypergeometricDistributionImpl.java 1182787 2011-10-13 11:20:48Z celestin $
033     */
034    public class HypergeometricDistributionImpl extends AbstractIntegerDistribution
035        implements HypergeometricDistribution, Serializable {
036        /** Serializable version identifier. */
037        private static final long serialVersionUID = -436928820673516179L;
038        /** The number of successes in the population. */
039        private final int numberOfSuccesses;
040        /** The population size. */
041        private final int populationSize;
042        /** The sample size. */
043        private final int sampleSize;
044    
045        /**
046         * Construct a new hypergeometric distribution with the given the
047         * population size, the number of successes in the population, and
048         * the sample size.
049         *
050         * @param populationSize Population size.
051         * @param numberOfSuccesses Number of successes in the population.
052         * @param sampleSize Sample size.
053         * @throws NotPositiveException if {@code numberOfSuccesses < 0}.
054         * @throws NotStrictlyPositiveException if {@code populationSize <= 0}.
055         * @throws NotPositiveException if {@code populationSize < 0}.
056         * @throws NumberIsTooLargeException if {@code numberOfSuccesses > populationSize}.
057         * @throws NumberIsTooLargeException if {@code sampleSize > populationSize}.
058         */
059        public HypergeometricDistributionImpl(int populationSize,
060                                              int numberOfSuccesses,
061                                              int sampleSize) {
062            if (populationSize <= 0) {
063                throw new NotStrictlyPositiveException(LocalizedFormats.POPULATION_SIZE,
064                                                       populationSize);
065            }
066            if (numberOfSuccesses < 0) {
067                throw new NotPositiveException(LocalizedFormats.NUMBER_OF_SUCCESSES,
068                                               numberOfSuccesses);
069            }
070            if (sampleSize < 0) {
071                throw new NotPositiveException(LocalizedFormats.NUMBER_OF_SAMPLES,
072                                               sampleSize);
073            }
074    
075            if (numberOfSuccesses > populationSize) {
076                throw new NumberIsTooLargeException(LocalizedFormats.NUMBER_OF_SUCCESS_LARGER_THAN_POPULATION_SIZE,
077                                                    numberOfSuccesses, populationSize, true);
078            }
079            if (sampleSize > populationSize) {
080                throw new NumberIsTooLargeException(LocalizedFormats.SAMPLE_SIZE_LARGER_THAN_POPULATION_SIZE,
081                                                    sampleSize, populationSize, true);
082            }
083    
084            this.numberOfSuccesses = numberOfSuccesses;
085            this.populationSize = populationSize;
086            this.sampleSize = sampleSize;
087        }
088    
089        /**
090         * For this distribution, {@code X}, this method returns {@code P(X <= x)}.
091         *
092         * @param x Value at which the PDF is evaluated.
093         * @return PDF for this distribution.
094         */
095        @Override
096        public double cumulativeProbability(int x) {
097            double ret;
098    
099            int[] domain = getDomain(populationSize, numberOfSuccesses, sampleSize);
100            if (x < domain[0]) {
101                ret = 0.0;
102            } else if (x >= domain[1]) {
103                ret = 1.0;
104            } else {
105                ret = innerCumulativeProbability(domain[0], x, 1, populationSize,
106                                                 numberOfSuccesses, sampleSize);
107            }
108    
109            return ret;
110        }
111    
112        /**
113         * Return the domain for the given hypergeometric distribution parameters.
114         *
115         * @param n Population size.
116         * @param m Number of successes in the population.
117         * @param k Sample size.
118         * @return a two element array containing the lower and upper bounds of the
119         * hypergeometric distribution.
120         */
121        private int[] getDomain(int n, int m, int k) {
122            return new int[] { getLowerDomain(n, m, k), getUpperDomain(m, k) };
123        }
124    
125        /**
126         * Access the domain value lower bound, based on {@code p}, used to
127         * bracket a PDF root.
128         *
129         * @param p Desired probability for the critical value.
130         * @return the domain value lower bound, i.e. {@code P(X < 'lower bound') < p}.
131         */
132        @Override
133        protected int getDomainLowerBound(double p) {
134            return getLowerDomain(populationSize, numberOfSuccesses, sampleSize);
135        }
136    
137        /**
138         * Access the domain value upper bound, based on {@code p}, used to
139         * bracket a PDF root.
140         *
141         * @param p Desired probability for the critical value
142         * @return the domain value upper bound, i.e. {@code P(X < 'upper bound') > p}.
143         */
144        @Override
145        protected int getDomainUpperBound(double p) {
146            return getUpperDomain(sampleSize, numberOfSuccesses);
147        }
148    
149        /**
150         * Return the lowest domain value for the given hypergeometric distribution
151         * parameters.
152         *
153         * @param n Population size.
154         * @param m Number of successes in the population.
155         * @param k Sample size.
156         * @return the lowest domain value of the hypergeometric distribution.
157         */
158        private int getLowerDomain(int n, int m, int k) {
159            return FastMath.max(0, m - (n - k));
160        }
161    
162        /**
163         * {@inheritDoc}
164         */
165        public int getNumberOfSuccesses() {
166            return numberOfSuccesses;
167        }
168    
169        /**
170         * {@inheritDoc}
171         */
172        public int getPopulationSize() {
173            return populationSize;
174        }
175    
176        /**
177         * {@inheritDoc}
178         */
179        public int getSampleSize() {
180            return sampleSize;
181        }
182    
183        /**
184         * Return the highest domain value for the given hypergeometric distribution
185         * parameters.
186         *
187         * @param m Number of successes in the population.
188         * @param k Sample size.
189         * @return the highest domain value of the hypergeometric distribution.
190         */
191        private int getUpperDomain(int m, int k) {
192            return FastMath.min(k, m);
193        }
194    
195        /**
196         * For this distribution, {@code X}, this method returns {@code P(X = x)}.
197         *
198         * @param x Value at which the PMF is evaluated.
199         * @return PMF for this distribution.
200         */
201        public double probability(int x) {
202            double ret;
203    
204            int[] domain = getDomain(populationSize, numberOfSuccesses, sampleSize);
205            if (x < domain[0] || x > domain[1]) {
206                ret = 0.0;
207            } else {
208                double p = (double) sampleSize / (double) populationSize;
209                double q = (double) (populationSize - sampleSize) / (double) populationSize;
210                double p1 = SaddlePointExpansion.logBinomialProbability(x,
211                        numberOfSuccesses, p, q);
212                double p2 =
213                    SaddlePointExpansion.logBinomialProbability(sampleSize - x,
214                        populationSize - numberOfSuccesses, p, q);
215                double p3 =
216                    SaddlePointExpansion.logBinomialProbability(sampleSize, populationSize, p, q);
217                ret = FastMath.exp(p1 + p2 - p3);
218            }
219    
220            return ret;
221        }
222    
223        /**
224         * For this distribution, {@code X}, defined by the given hypergeometric
225         *  distribution parameters, this method returns {@code P(X = x)}.
226         *
227         * @param x Value at which the PMF is evaluated.
228         * @param n the population size.
229         * @param m number of successes in the population.
230         * @param k the sample size.
231         * @return PMF for the distribution.
232         */
233        private double probability(int n, int m, int k, int x) {
234            return FastMath.exp(ArithmeticUtils.binomialCoefficientLog(m, x) +
235                   ArithmeticUtils.binomialCoefficientLog(n - m, k - x) -
236                   ArithmeticUtils.binomialCoefficientLog(n, k));
237        }
238    
239        /**
240         * For this distribution, {@code X}, this method returns {@code P(X >= x)}.
241         *
242         * @param x Value at which the CDF is evaluated.
243         * @return the upper tail CDF for this distribution.
244         * @since 1.1
245         */
246        public double upperCumulativeProbability(int x) {
247            double ret;
248    
249            final int[] domain = getDomain(populationSize, numberOfSuccesses, sampleSize);
250            if (x < domain[0]) {
251                ret = 1.0;
252            } else if (x > domain[1]) {
253                ret = 0.0;
254            } else {
255                ret = innerCumulativeProbability(domain[1], x, -1, populationSize,
256                                                 numberOfSuccesses, sampleSize);
257            }
258    
259            return ret;
260        }
261    
262        /**
263         * For this distribution, {@code X}, this method returns
264         * {@code P(x0 <= X <= x1)}.
265         * This probability is computed by summing the point probabilities for the
266         * values {@code x0, x0 + 1, x0 + 2, ..., x1}, in the order directed by
267         * {@code dx}.
268         *
269         * @param x0 Inclusive lower bound.
270         * @param x1 Inclusive upper bound.
271         * @param dx Direction of summation (1 indicates summing from x0 to x1, and
272         * 0 indicates summing from x1 to x0).
273         * @param n the population size.
274         * @param m number of successes in the population.
275         * @param k the sample size.
276         * @return {@code P(x0 <= X <= x1)}.
277         */
278        private double innerCumulativeProbability(int x0, int x1, int dx,
279                                                  int n, int m, int k) {
280            double ret = probability(n, m, k, x0);
281            while (x0 != x1) {
282                x0 += dx;
283                ret += probability(n, m, k, x0);
284            }
285            return ret;
286        }
287    
288        /**
289         * {@inheritDoc}
290         *
291         * For population size <code>N</code>,
292         * number of successes <code>m</code>, and
293         * sample size <code>n</code>,
294         * the lower bound of the support is
295         * <code>max(0, n + m - N)</code>
296         *
297         * @return lower bound of the support
298         */
299        @Override
300        public int getSupportLowerBound() {
301            return FastMath.max(0,
302                    getSampleSize() + getNumberOfSuccesses() - getPopulationSize());
303        }
304    
305        /**
306         * {@inheritDoc}
307         *
308         * For number of successes <code>m</code> and
309         * sample size <code>n</code>,
310         * the upper bound of the support is
311         * <code>min(m, n)</code>
312         *
313         * @return upper bound of the support
314         */
315        @Override
316        public int getSupportUpperBound() {
317            return FastMath.min(getNumberOfSuccesses(), getSampleSize());
318        }
319    
320        /**
321         * {@inheritDoc}
322         *
323         * For population size <code>N</code>,
324         * number of successes <code>m</code>, and
325         * sample size <code>n</code>, the mean is
326         * <code>n * m / N</code>
327         *
328         * @return {@inheritDoc}
329         */
330        @Override
331        protected double calculateNumericalMean() {
332            return (double)(getSampleSize() * getNumberOfSuccesses()) / (double)getPopulationSize();
333        }
334    
335        /**
336         * {@inheritDoc}
337         *
338         * For population size <code>N</code>,
339         * number of successes <code>m</code>, and
340         * sample size <code>n</code>, the variance is
341         * <code>[ n * m * (N - n) * (N - m) ] / [ N^2 * (N - 1) ]</code>
342         *
343         * @return {@inheritDoc}
344         */
345        @Override
346        protected double calculateNumericalVariance() {
347            final double N = getPopulationSize();
348            final double m = getNumberOfSuccesses();
349            final double n = getSampleSize();
350            return ( n * m * (N - n) * (N - m) ) / ( (N*N * (N - 1)) );
351        }
352    }