1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17 package org.apache.commons.statistics.distribution;
18
19 import java.util.stream.IntStream;
20 import org.apache.commons.rng.UniformRandomProvider;
21
22 /**
23 * Interface for distributions on the integers.
24 */
25 public interface DiscreteDistribution {
26
27 /**
28 * For a random variable {@code X} whose values are distributed according
29 * to this distribution, this method returns {@code P(X = x)}.
30 * In other words, this method represents the probability mass function (PMF)
31 * for the distribution.
32 *
33 * @param x Point at which the PMF is evaluated.
34 * @return the value of the probability mass function at {@code x}.
35 */
36 double probability(int x);
37
38 /**
39 * For a random variable {@code X} whose values are distributed according
40 * to this distribution, this method returns {@code P(x0 < X <= x1)}.
41 * The default implementation uses the identity
42 * {@code P(x0 < X <= x1) = P(X <= x1) - P(X <= x0)}
43 *
44 * <p>Special cases:
45 * <ul>
46 * <li>returns {@code 0.0} if {@code x0 == x1};
47 * <li>returns {@code probability(x1)} if {@code x0 + 1 == x1};
48 * </ul>
49 *
50 * @param x0 Lower bound (exclusive).
51 * @param x1 Upper bound (inclusive).
52 * @return the probability that a random variable with this distribution
53 * takes a value between {@code x0} and {@code x1}, excluding the lower
54 * and including the upper endpoint.
55 * @throws IllegalArgumentException if {@code x0 > x1}.
56 */
57 default double probability(int x0,
58 int x1) {
59 if (x0 > x1) {
60 throw new DistributionException(DistributionException.INVALID_RANGE_LOW_GT_HIGH, x0, x1);
61 }
62 // Long addition avoids overflow
63 if (x0 + 1L >= x1) {
64 return x0 == x1 ? 0.0 : probability(x1);
65 }
66 return cumulativeProbability(x1) - cumulativeProbability(x0);
67 }
68
69 /**
70 * For a random variable {@code X} whose values are distributed according
71 * to this distribution, this method returns {@code log(P(X = x))}, where
72 * {@code log} is the natural logarithm.
73 *
74 * @param x Point at which the PMF is evaluated.
75 * @return the logarithm of the value of the probability mass function at
76 * {@code x}.
77 */
78 default double logProbability(int x) {
79 return Math.log(probability(x));
80 }
81
82 /**
83 * For a random variable {@code X} whose values are distributed according
84 * to this distribution, this method returns {@code P(X <= x)}.
85 * In other, words, this method represents the (cumulative) distribution
86 * function (CDF) for this distribution.
87 *
88 * @param x Point at which the CDF is evaluated.
89 * @return the probability that a random variable with this distribution
90 * takes a value less than or equal to {@code x}.
91 */
92 double cumulativeProbability(int x);
93
94 /**
95 * For a random variable {@code X} whose values are distributed according
96 * to this distribution, this method returns {@code P(X > x)}.
97 * In other words, this method represents the complementary cumulative
98 * distribution function.
99 *
100 * <p>By default, this is defined as {@code 1 - cumulativeProbability(x)}, but
101 * the specific implementation may be more accurate.
102 *
103 * @param x Point at which the survival function is evaluated.
104 * @return the probability that a random variable with this
105 * distribution takes a value greater than {@code x}.
106 */
107 default double survivalProbability(int x) {
108 return 1.0 - cumulativeProbability(x);
109 }
110
111 /**
112 * Computes the quantile function of this distribution.
113 * For a random variable {@code X} distributed according to this distribution,
114 * the returned value is:
115 *
116 * <p>\[ x = \begin{cases}
117 * \inf \{ x \in \mathbb Z : P(X \le x) \ge p\} & \text{for } 0 \lt p \le 1 \\
118 * \inf \{ x \in \mathbb Z : P(X \le x) \gt 0 \} & \text{for } p = 0
119 * \end{cases} \]
120 *
121 * <p>If the result exceeds the range of the data type {@code int},
122 * then {@link Integer#MIN_VALUE} or {@link Integer#MAX_VALUE} is returned.
123 * In this case the result of {@link #cumulativeProbability(int) cumulativeProbability(x)}
124 * called using the returned {@code p}-quantile may not compute the original {@code p}.
125 *
126 * @param p Cumulative probability.
127 * @return the smallest {@code p}-quantile of this distribution
128 * (largest 0-quantile for {@code p = 0}).
129 * @throws IllegalArgumentException if {@code p < 0} or {@code p > 1}.
130 */
131 int inverseCumulativeProbability(double p);
132
133 /**
134 * Computes the inverse survival probability function of this distribution.
135 * For a random variable {@code X} distributed according to this distribution,
136 * the returned value is:
137 *
138 * <p>\[ x = \begin{cases}
139 * \inf \{ x \in \mathbb Z : P(X \gt x) \le p\} & \text{for } 0 \le p \lt 1 \\
140 * \inf \{ x \in \mathbb Z : P(X \gt x) \lt 1 \} & \text{for } p = 1
141 * \end{cases} \]
142 *
143 * <p>If the result exceeds the range of the data type {@code int},
144 * then {@link Integer#MIN_VALUE} or {@link Integer#MAX_VALUE} is returned.
145 * In this case the result of {@link #survivalProbability(int) survivalProbability(x)}
146 * called using the returned {@code (1-p)}-quantile may not compute the original {@code p}.
147 *
148 * <p>By default, this is defined as {@code inverseCumulativeProbability(1 - p)}, but
149 * the specific implementation may be more accurate.
150 *
151 * @param p Cumulative probability.
152 * @return the smallest {@code (1-p)}-quantile of this distribution
153 * (largest 0-quantile for {@code p = 1}).
154 * @throws IllegalArgumentException if {@code p < 0} or {@code p > 1}.
155 */
156 default int inverseSurvivalProbability(double p) {
157 return inverseCumulativeProbability(1 - p);
158 }
159
160 /**
161 * Gets the mean of this distribution.
162 *
163 * @return the mean.
164 */
165 double getMean();
166
167 /**
168 * Gets the variance of this distribution.
169 *
170 * @return the variance.
171 */
172 double getVariance();
173
174 /**
175 * Gets the lower bound of the support.
176 * This method must return the same value as
177 * {@code inverseCumulativeProbability(0)}, i.e.
178 * \( \inf \{ x \in \mathbb Z : P(X \le x) \gt 0 \} \).
179 * By convention, {@link Integer#MIN_VALUE} should be substituted
180 * for negative infinity.
181 *
182 * @return the lower bound of the support.
183 */
184 int getSupportLowerBound();
185
186 /**
187 * Gets the upper bound of the support.
188 * This method must return the same value as
189 * {@code inverseCumulativeProbability(1)}, i.e.
190 * \( \inf \{ x \in \mathbb Z : P(X \le x) = 1 \} \).
191 * By convention, {@link Integer#MAX_VALUE} should be substituted
192 * for positive infinity.
193 *
194 * @return the upper bound of the support.
195 */
196 int getSupportUpperBound();
197
198 /**
199 * Creates a sampler.
200 *
201 * @param rng Generator of uniformly distributed numbers.
202 * @return a sampler that produces random numbers according this
203 * distribution.
204 */
205 Sampler createSampler(UniformRandomProvider rng);
206
207 /**
208 * Distribution sampling functionality.
209 */
210 @FunctionalInterface
211 interface Sampler {
212 /**
213 * Generates a random value sampled from this distribution.
214 *
215 * @return a random value.
216 */
217 int sample();
218
219 /**
220 * Returns an effectively unlimited stream of {@code int} sample values.
221 *
222 * <p>The default implementation produces a sequential stream that repeatedly
223 * calls {@link #sample sample}().
224 *
225 * @return a stream of {@code int} values.
226 */
227 default IntStream samples() {
228 return IntStream.generate(this::sample).sequential();
229 }
230
231 /**
232 * Returns a stream producing the given {@code streamSize} number of {@code int}
233 * sample values.
234 *
235 * <p>The default implementation produces a sequential stream that repeatedly
236 * calls {@link #sample sample}(); the stream is limited to the given {@code streamSize}.
237 *
238 * @param streamSize Number of values to generate.
239 * @return a stream of {@code int} values.
240 */
241 default IntStream samples(long streamSize) {
242 return samples().limit(streamSize);
243 }
244 }
245 }