001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.commons.statistics.distribution;
018
019import java.util.stream.IntStream;
020import org.apache.commons.rng.UniformRandomProvider;
021
022/**
023 * Interface for distributions on the integers.
024 */
025public interface DiscreteDistribution {
026
027    /**
028     * For a random variable {@code X} whose values are distributed according
029     * to this distribution, this method returns {@code P(X = x)}.
030     * In other words, this method represents the probability mass function (PMF)
031     * for the distribution.
032     *
033     * @param x Point at which the PMF is evaluated.
034     * @return the value of the probability mass function at {@code x}.
035     */
036    double probability(int x);
037
038    /**
039     * For a random variable {@code X} whose values are distributed according
040     * to this distribution, this method returns {@code P(x0 < X <= x1)}.
041     * The default implementation uses the identity
042     * {@code P(x0 < X <= x1) = P(X <= x1) - P(X <= x0)}
043     *
044     * <p>Special cases:
045     * <ul>
046     * <li>returns {@code 0.0} if {@code x0 == x1};
047     * <li>returns {@code probability(x1)} if {@code x0 + 1 == x1};
048     * </ul>
049     *
050     * @param x0 Lower bound (exclusive).
051     * @param x1 Upper bound (inclusive).
052     * @return the probability that a random variable with this distribution
053     * takes a value between {@code x0} and {@code x1},  excluding the lower
054     * and including the upper endpoint.
055     * @throws IllegalArgumentException if {@code x0 > x1}.
056     */
057    default double probability(int x0,
058                               int x1) {
059        if (x0 > x1) {
060            throw new DistributionException(DistributionException.INVALID_RANGE_LOW_GT_HIGH, x0, x1);
061        }
062        // Long addition avoids overflow
063        if (x0 + 1L >= x1) {
064            return x0 == x1 ? 0.0 : probability(x1);
065        }
066        return cumulativeProbability(x1) - cumulativeProbability(x0);
067    }
068
069    /**
070     * For a random variable {@code X} whose values are distributed according
071     * to this distribution, this method returns {@code log(P(X = x))}, where
072     * {@code log} is the natural logarithm.
073     *
074     * @param x Point at which the PMF is evaluated.
075     * @return the logarithm of the value of the probability mass function at
076     * {@code x}.
077     */
078    default double logProbability(int x) {
079        return Math.log(probability(x));
080    }
081
082    /**
083     * For a random variable {@code X} whose values are distributed according
084     * to this distribution, this method returns {@code P(X <= x)}.
085     * In other, words, this method represents the (cumulative) distribution
086     * function (CDF) for this distribution.
087     *
088     * @param x Point at which the CDF is evaluated.
089     * @return the probability that a random variable with this distribution
090     * takes a value less than or equal to {@code x}.
091     */
092    double cumulativeProbability(int x);
093
094    /**
095     * For a random variable {@code X} whose values are distributed according
096     * to this distribution, this method returns {@code P(X > x)}.
097     * In other words, this method represents the complementary cumulative
098     * distribution function.
099     *
100     * <p>By default, this is defined as {@code 1 - cumulativeProbability(x)}, but
101     * the specific implementation may be more accurate.
102     *
103     * @param x Point at which the survival function is evaluated.
104     * @return the probability that a random variable with this
105     * distribution takes a value greater than {@code x}.
106     */
107    default double survivalProbability(int x) {
108        return 1.0 - cumulativeProbability(x);
109    }
110
111    /**
112     * Computes the quantile function of this distribution.
113     * For a random variable {@code X} distributed according to this distribution,
114     * the returned value is:
115     *
116     * <p>\[ x = \begin{cases}
117     *       \inf \{ x \in \mathbb Z : P(X \le x) \ge p\}   &amp; \text{for } 0 \lt p \le 1 \\
118     *       \inf \{ x \in \mathbb Z : P(X \le x) \gt 0 \}  &amp; \text{for } p = 0
119     *       \end{cases} \]
120     *
121     * <p>If the result exceeds the range of the data type {@code int},
122     * then {@link Integer#MIN_VALUE} or {@link Integer#MAX_VALUE} is returned.
123     * In this case the result of {@link #cumulativeProbability(int) cumulativeProbability(x)}
124     * called using the returned {@code p}-quantile may not compute the original {@code p}.
125     *
126     * @param p Cumulative probability.
127     * @return the smallest {@code p}-quantile of this distribution
128     * (largest 0-quantile for {@code p = 0}).
129     * @throws IllegalArgumentException if {@code p < 0} or {@code p > 1}.
130     */
131    int inverseCumulativeProbability(double p);
132
133    /**
134     * Computes the inverse survival probability function of this distribution.
135     * For a random variable {@code X} distributed according to this distribution,
136     * the returned value is:
137     *
138     * <p>\[ x = \begin{cases}
139     *       \inf \{ x \in \mathbb Z : P(X \ge x) \le p\}   &amp; \text{for } 0 \le p \lt 1 \\
140     *       \inf \{ x \in \mathbb Z : P(X \ge x) \lt 1 \}  &amp; \text{for } p = 1
141     *       \end{cases} \]
142     *
143     * <p>If the result exceeds the range of the data type {@code int},
144     * then {@link Integer#MIN_VALUE} or {@link Integer#MAX_VALUE} is returned.
145     * In this case the result of {@link #survivalProbability(int) survivalProbability(x)}
146     * called using the returned {@code (1-p)}-quantile may not compute the original {@code p}.
147     *
148     * <p>By default, this is defined as {@code inverseCumulativeProbability(1 - p)}, but
149     * the specific implementation may be more accurate.
150     *
151     * @param p Cumulative probability.
152     * @return the smallest {@code (1-p)}-quantile of this distribution
153     * (largest 0-quantile for {@code p = 1}).
154     * @throws IllegalArgumentException if {@code p < 0} or {@code p > 1}.
155     */
156    default int inverseSurvivalProbability(double p) {
157        return inverseCumulativeProbability(1 - p);
158    }
159
160    /**
161     * Gets the mean of this distribution.
162     *
163     * @return the mean.
164     */
165    double getMean();
166
167    /**
168     * Gets the variance of this distribution.
169     *
170     * @return the variance.
171     */
172    double getVariance();
173
174    /**
175     * Gets the lower bound of the support.
176     * This method must return the same value as
177     * {@code inverseCumulativeProbability(0)}, i.e.
178     * \( \inf \{ x \in \mathbb Z : P(X \le x) \gt 0 \} \).
179     * By convention, {@link Integer#MIN_VALUE} should be substituted
180     * for negative infinity.
181     *
182     * @return the lower bound of the support.
183     */
184    int getSupportLowerBound();
185
186    /**
187     * Gets the upper bound of the support.
188     * This method must return the same value as
189     * {@code inverseCumulativeProbability(1)}, i.e.
190     * \( \inf \{ x \in \mathbb Z : P(X \le x) = 1 \} \).
191     * By convention, {@link Integer#MAX_VALUE} should be substituted
192     * for positive infinity.
193     *
194     * @return the upper bound of the support.
195     */
196    int getSupportUpperBound();
197
198    /**
199     * Creates a sampler.
200     *
201     * @param rng Generator of uniformly distributed numbers.
202     * @return a sampler that produces random numbers according this
203     * distribution.
204     */
205    Sampler createSampler(UniformRandomProvider rng);
206
207    /**
208     * Distribution sampling functionality.
209     */
210    @FunctionalInterface
211    interface Sampler {
212        /**
213         * Generates a random value sampled from this distribution.
214         *
215         * @return a random value.
216         */
217        int sample();
218
219        /**
220         * Returns an effectively unlimited stream of {@code int} sample values.
221         *
222         * <p>The default implementation produces a sequential stream that repeatedly
223         * calls {@link #sample sample}().
224         *
225         * @return a stream of {@code int} values.
226         */
227        default IntStream samples() {
228            return IntStream.generate(this::sample).sequential();
229        }
230
231        /**
232         * Returns a stream producing the given {@code streamSize} number of {@code int}
233         * sample values.
234         *
235         * <p>The default implementation produces a sequential stream that repeatedly
236         * calls {@link #sample sample}(); the stream is limited to the given {@code streamSize}.
237         *
238         * @param streamSize Number of values to generate.
239         * @return a stream of {@code int} values.
240         */
241        default IntStream samples(long streamSize) {
242            return samples().limit(streamSize);
243        }
244    }
245}