001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.commons.statistics.distribution;
018
019import java.util.function.IntToDoubleFunction;
020import org.apache.commons.rng.UniformRandomProvider;
021import org.apache.commons.rng.sampling.distribution.GeometricSampler;
022
023/**
024 * Implementation of the geometric distribution.
025 *
026 * <p>The probability mass function of \( X \) is:
027 *
028 * <p>\[ f(k; p) = (1-p)^k \, p \]
029 *
030 * <p>for \( p \in (0, 1] \) the probability of success and
031 * \( k \in \{0, 1, 2, \dots\} \) the number of failures.
032 *
033 * <p>This parameterization is used to model the number of failures until
034 * the first success.
035 *
036 * @see <a href="https://en.wikipedia.org/wiki/Geometric_distribution">Geometric distribution (Wikipedia)</a>
037 * @see <a href="https://mathworld.wolfram.com/GeometricDistribution.html">Geometric distribution (MathWorld)</a>
038 */
039public final class GeometricDistribution extends AbstractDiscreteDistribution {
040    /** 1/2. */
041    private static final double HALF = 0.5;
042
043    /** The probability of success. */
044    private final double probabilityOfSuccess;
045    /** {@code log(p)} where p is the probability of success. */
046    private final double logProbabilityOfSuccess;
047    /** {@code log(1 - p)} where p is the probability of success. */
048    private final double log1mProbabilityOfSuccess;
049    /** Value of survival probability for x=0.
050     * Used in the survival functions. Equal to (1 - probability of success). */
051    private final double sf0;
052    /** Implementation of PMF(x). Assumes that {@code x > 0}. */
053    private final IntToDoubleFunction pmf;
054
055    /**
056     * @param p Probability of success.
057     */
058    private GeometricDistribution(double p) {
059        probabilityOfSuccess = p;
060        logProbabilityOfSuccess = Math.log(p);
061        log1mProbabilityOfSuccess = Math.log1p(-p);
062        sf0 = 1 - p;
063
064        // Choose the PMF implementation.
065        // When p >= 0.5 then 1 - p is exact and using the power function
066        // is consistently more accurate than the use of the exponential function.
067        // When p -> 0 then the exponential function avoids large error propagation
068        // of the power function used with an inexact 1 - p.
069        // Also compute the survival probability for use when x=0.
070        if (p >= HALF) {
071            pmf = x -> Math.pow(sf0, x) * probabilityOfSuccess;
072        } else {
073            pmf = x -> Math.exp(log1mProbabilityOfSuccess * x) * probabilityOfSuccess;
074        }
075    }
076
077    /**
078     * Creates a geometric distribution.
079     *
080     * @param p Probability of success.
081     * @return the geometric distribution
082     * @throws IllegalArgumentException if {@code p <= 0} or {@code p > 1}.
083     */
084    public static GeometricDistribution of(double p) {
085        if (p <= 0 || p > 1) {
086            throw new DistributionException(DistributionException.INVALID_NON_ZERO_PROBABILITY, p);
087        }
088        return new GeometricDistribution(p);
089    }
090
091    /**
092     * Gets the probability of success parameter of this distribution.
093     *
094     * @return the probability of success.
095     */
096    public double getProbabilityOfSuccess() {
097        return probabilityOfSuccess;
098    }
099
100    /** {@inheritDoc} */
101    @Override
102    public double probability(int x) {
103        if (x <= 0) {
104            // Special case of x=0 exploiting cancellation.
105            return x == 0 ? probabilityOfSuccess : 0;
106        }
107        return pmf.applyAsDouble(x);
108    }
109
110    /** {@inheritDoc} */
111    @Override
112    public double logProbability(int x) {
113        if (x <= 0) {
114            // Special case of x=0 exploiting cancellation.
115            return x == 0 ? logProbabilityOfSuccess : Double.NEGATIVE_INFINITY;
116        }
117        return x * log1mProbabilityOfSuccess + logProbabilityOfSuccess;
118    }
119
120    /** {@inheritDoc} */
121    @Override
122    public double cumulativeProbability(int x) {
123        if (x <= 0) {
124            // Note: CDF(x=0) = PDF(x=0) = probabilityOfSuccess
125            return x == 0 ? probabilityOfSuccess : 0;
126        }
127        // Note: Double addition avoids overflow. This may compute a value less than 1.0
128        // for the max integer value when p is very small.
129        return -Math.expm1(log1mProbabilityOfSuccess * (x + 1.0));
130    }
131
132    /** {@inheritDoc} */
133    @Override
134    public double survivalProbability(int x) {
135        if (x <= 0) {
136            // Note: SF(x=0) = 1 - PDF(x=0) = 1 - probabilityOfSuccess
137            // Use a pre-computed value to avoid cancellation when probabilityOfSuccess -> 0
138            return x == 0 ? sf0 : 1;
139        }
140        // Note: Double addition avoids overflow. This may compute a value greater than 0.0
141        // for the max integer value when p is very small.
142        return Math.exp(log1mProbabilityOfSuccess * (x + 1.0));
143    }
144
145    /** {@inheritDoc} */
146    @Override
147    public int inverseCumulativeProbability(double p) {
148        ArgumentUtils.checkProbability(p);
149        if (p == 1) {
150            return getSupportUpperBound();
151        }
152        if (p <= probabilityOfSuccess) {
153            return 0;
154        }
155        // p > probabilityOfSuccess
156        // => log(1-p) < log(1-probabilityOfSuccess);
157        // Both terms are negative as probabilityOfSuccess > 0.
158        // This should be lower bounded to (2 - 1) = 1
159        int x = (int) (Math.ceil(Math.log1p(-p) / log1mProbabilityOfSuccess) - 1);
160
161        // Correct rounding errors.
162        // This ensures x == icdf(cdf(x))
163
164        if (cumulativeProbability(x - 1) >= p) {
165            // No checks for x=0.
166            // If x=0; cdf(-1) = 0 and the condition is false as p>0 at this point.
167            x--;
168        } else if (cumulativeProbability(x) < p && x < Integer.MAX_VALUE) {
169            // The supported upper bound is max_value here as probabilityOfSuccess != 1
170            x++;
171        }
172
173        return x;
174    }
175
176    /** {@inheritDoc} */
177    @Override
178    public int inverseSurvivalProbability(double p) {
179        ArgumentUtils.checkProbability(p);
180        if (p == 0) {
181            return getSupportUpperBound();
182        }
183        if (p >= sf0) {
184            return 0;
185        }
186
187        // p < 1 - probabilityOfSuccess
188        // Inversion as for icdf using log(p) in place of log1p(-p)
189        int x = (int) (Math.ceil(Math.log(p) / log1mProbabilityOfSuccess) - 1);
190
191        // Correct rounding errors.
192        // This ensures x == isf(sf(x))
193
194        if (survivalProbability(x - 1) <= p) {
195            // No checks for x=0
196            // If x=0; sf(-1) = 1 and the condition is false as p<1 at this point.
197            x--;
198        } else if (survivalProbability(x) > p && x < Integer.MAX_VALUE) {
199            // The supported upper bound is max_value here as probabilityOfSuccess != 1
200            x++;
201        }
202
203        return x;
204    }
205
206    /**
207     * {@inheritDoc}
208     *
209     * <p>For probability parameter \( p \), the mean is:
210     *
211     * <p>\[ \frac{1 - p}{p} \]
212     */
213    @Override
214    public double getMean() {
215        return (1 - probabilityOfSuccess) / probabilityOfSuccess;
216    }
217
218    /**
219     * {@inheritDoc}
220     *
221     * <p>For probability parameter \( p \), the variance is:
222     *
223     * <p>\[ \frac{1 - p}{p^2} \]
224     */
225    @Override
226    public double getVariance() {
227        return (1 - probabilityOfSuccess) / (probabilityOfSuccess * probabilityOfSuccess);
228    }
229
230    /**
231     * {@inheritDoc}
232     *
233     * <p>The lower bound of the support is always 0.
234     *
235     * @return 0.
236     */
237    @Override
238    public int getSupportLowerBound() {
239        return 0;
240    }
241
242    /**
243     * {@inheritDoc}
244     *
245     * <p>The upper bound of the support is positive infinity except for the
246     * probability parameter {@code p = 1.0}.
247     *
248     * @return {@link Integer#MAX_VALUE} or 0.
249     */
250    @Override
251    public int getSupportUpperBound() {
252        return probabilityOfSuccess < 1 ? Integer.MAX_VALUE : 0;
253    }
254
255    /** {@inheritDoc} */
256    @Override
257    public Sampler createSampler(UniformRandomProvider rng) {
258        return GeometricSampler.of(rng, probabilityOfSuccess)::sample;
259    }
260}