1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18 package org.apache.commons.statistics.distribution;
19
20 import java.util.function.DoubleUnaryOperator;
21 import org.apache.commons.rng.UniformRandomProvider;
22 import org.apache.commons.rng.sampling.distribution.InverseTransformParetoSampler;
23
24 /**
25 * Implementation of the Pareto (Type I) distribution.
26 *
27 * <p>The probability density function of \( X \) is:
28 *
29 * <p>\[ f(x; k, \alpha) = \frac{\alpha k^\alpha}{x^{\alpha + 1}} \]
30 *
31 * <p>for \( k > 0 \),
32 * \( \alpha > 0 \), and
33 * \( x \in [k, \infty) \).
34 *
35 * <p>\( k \) is a <em>scale</em> parameter: this is the minimum possible value of \( X \).
36 * <br>\( \alpha \) is a <em>shape</em> parameter: this is the Pareto index.
37 *
38 * @see <a href="https://en.wikipedia.org/wiki/Pareto_distribution">Pareto distribution (Wikipedia)</a>
39 * @see <a href="https://mathworld.wolfram.com/ParetoDistribution.html">Pareto distribution (MathWorld)</a>
40 */
41 public final class ParetoDistribution extends AbstractContinuousDistribution {
42 /** The minimum value for the shape parameter when computing when computing the variance. */
43 private static final double MIN_SHAPE_FOR_VARIANCE = 2.0;
44
45 /** The scale parameter of this distribution. Also known as {@code k};
46 * the minimum possible value for the random variable {@code X}. */
47 private final double scale;
48 /** The shape parameter of this distribution. */
49 private final double shape;
50 /** Implementation of PDF(x). Assumes that {@code x >= scale}. */
51 private final DoubleUnaryOperator pdf;
52 /** Implementation of log PDF(x). Assumes that {@code x >= scale}. */
53 private final DoubleUnaryOperator logpdf;
54
55 /**
56 * @param scale Scale parameter (minimum possible value of X).
57 * @param shape Shape parameter (Pareto index).
58 */
59 private ParetoDistribution(double scale,
60 double shape) {
61 this.scale = scale;
62 this.shape = shape;
63
64 // The Pareto distribution approaches a Dirac delta function when shape -> inf.
65 // Parameterisations can also lead to underflow in the standard computation.
66 // Extract the PDF and CDF to specialized implementations to handle edge cases.
67
68 // Pre-compute factors for the standard computation
69 final double shapeByScalePowShape = shape * Math.pow(scale, shape);
70 final double logShapePlusShapeByLogScale = Math.log(shape) + Math.log(scale) * shape;
71
72 if (shapeByScalePowShape < Double.POSITIVE_INFINITY &&
73 shapeByScalePowShape >= Double.MIN_NORMAL) {
74 // Standard computation
75 pdf = x -> shapeByScalePowShape / Math.pow(x, shape + 1);
76 logpdf = x -> logShapePlusShapeByLogScale - Math.log(x) * (shape + 1);
77 } else {
78 // Standard computation overflow; underflow to sub-normal or zero; or nan (pow(1.0, inf))
79 if (Double.isFinite(logShapePlusShapeByLogScale)) {
80 // Log computation is valid
81 logpdf = x -> logShapePlusShapeByLogScale - Math.log(x) * (shape + 1);
82 pdf = x -> Math.exp(logpdf.applyAsDouble(x));
83 } else {
84 // Assume Dirac function
85 logpdf = x -> x > scale ? Double.NEGATIVE_INFINITY : Double.POSITIVE_INFINITY;
86 // PDF has infinite value at lower bound
87 pdf = x -> x > scale ? 0 : Double.POSITIVE_INFINITY;
88 }
89 }
90 }
91
92 /**
93 * Creates a Pareto distribution.
94 *
95 * @param scale Scale parameter (minimum possible value of X).
96 * @param shape Shape parameter (Pareto index).
97 * @return the distribution
98 * @throws IllegalArgumentException if {@code scale <= 0}, {@code scale} is
99 * infinite, or {@code shape <= 0}.
100 */
101 public static ParetoDistribution of(double scale,
102 double shape) {
103 if (scale <= 0 || scale == Double.POSITIVE_INFINITY) {
104 throw new DistributionException(DistributionException.NOT_STRICTLY_POSITIVE_FINITE, scale);
105 }
106 if (shape <= 0) {
107 throw new DistributionException(DistributionException.NOT_STRICTLY_POSITIVE, shape);
108 }
109 return new ParetoDistribution(scale, shape);
110 }
111
112 /**
113 * Gets the scale parameter of this distribution.
114 * This is the minimum possible value of X.
115 *
116 * @return the scale parameter.
117 */
118 public double getScale() {
119 return scale;
120 }
121
122 /**
123 * Gets the shape parameter of this distribution.
124 * This is the Pareto index.
125 *
126 * @return the shape parameter.
127 */
128 public double getShape() {
129 return shape;
130 }
131
132 /**
133 * {@inheritDoc}
134 *
135 * <p>For scale parameter \( k \) and shape parameter \( \alpha \), the PDF is:
136 *
137 * <p>\[ f(x; k, \alpha) = \begin{cases}
138 * 0 & \text{for } x \lt k \\
139 * \frac{\alpha k^\alpha}{x^{\alpha + 1}} & \text{for } x \ge k
140 * \end{cases} \]
141 */
142 @Override
143 public double density(double x) {
144 if (x < scale) {
145 return 0;
146 }
147 return pdf.applyAsDouble(x);
148 }
149
150 /** {@inheritDoc}
151 *
152 * <p>See documentation of {@link #density(double)} for computation details.
153 */
154 @Override
155 public double logDensity(double x) {
156 if (x < scale) {
157 return Double.NEGATIVE_INFINITY;
158 }
159 return logpdf.applyAsDouble(x);
160 }
161
162 /**
163 * {@inheritDoc}
164 *
165 * <p>For scale parameter \( k \) and shape parameter \( \alpha \), the CDF is:
166 *
167 * <p>\[ F(x; k, \alpha) = \begin{cases}
168 * 0 & \text{for } x \le k \\
169 * 1 - \left( \frac{k}{x} \right)^\alpha & \text{for } x \gt k
170 * \end{cases} \]
171 */
172 @Override
173 public double cumulativeProbability(double x) {
174 if (x <= scale) {
175 return 0;
176 }
177 // Increase accuracy for CDF close to 0 by using a log calculation:
178 // 1 - exp(α * ln(k / x)) == -(exp(α * ln(k / x)) - 1)
179 return -Math.expm1(shape * Math.log(scale / x));
180 }
181
182 /**
183 * {@inheritDoc}
184 *
185 * <p>For scale parameter \( k \) and shape parameter \( \alpha \), the survival function is:
186 *
187 * <p>\[ S(x; k, \alpha) = \begin{cases}
188 * 1 & \text{for } x \le k \\
189 * \left( \frac{k}{x} \right)^\alpha & \text{for } x \gt k
190 * \end{cases} \]
191 */
192 @Override
193 public double survivalProbability(double x) {
194 if (x <= scale) {
195 return 1;
196 }
197 return Math.pow(scale / x, shape);
198 }
199
200 /** {@inheritDoc} */
201 @Override
202 public double inverseCumulativeProbability(double p) {
203 ArgumentUtils.checkProbability(p);
204 if (p == 0) {
205 return getSupportLowerBound();
206 }
207 if (p == 1) {
208 return getSupportUpperBound();
209 }
210 return scale / Math.exp(Math.log1p(-p) / shape);
211 }
212
213 /** {@inheritDoc} */
214 @Override
215 public double inverseSurvivalProbability(double p) {
216 ArgumentUtils.checkProbability(p);
217 if (p == 1) {
218 return getSupportLowerBound();
219 }
220 if (p == 0) {
221 return getSupportUpperBound();
222 }
223 return scale / Math.pow(p, 1 / shape);
224 }
225
226 /**
227 * {@inheritDoc}
228 *
229 * <p>For scale parameter \( k \) and shape parameter \( \alpha \), the mean is:
230 *
231 * <p>\[ \mathbb{E}[X] = \begin{cases}
232 * \infty & \text{for } \alpha \le 1 \\
233 * \frac{k \alpha}{(\alpha-1)} & \text{for } \alpha \gt 1
234 * \end{cases} \]
235 */
236 @Override
237 public double getMean() {
238 if (shape <= 1) {
239 return Double.POSITIVE_INFINITY;
240 }
241 if (shape == Double.POSITIVE_INFINITY) {
242 return scale;
243 }
244 return scale * (shape / (shape - 1));
245 }
246
247 /**
248 * {@inheritDoc}
249 *
250 * <p>For scale parameter \( k \) and shape parameter \( \alpha \), the variance is:
251 *
252 * <p>\[ \operatorname{var}[X] = \begin{cases}
253 * \infty & \text{for } \alpha \le 2 \\
254 * \frac{k^2 \alpha}{(\alpha-1)^2 (\alpha-2)} & \text{for } \alpha \gt 2
255 * \end{cases} \]
256 */
257 @Override
258 public double getVariance() {
259 if (shape <= MIN_SHAPE_FOR_VARIANCE) {
260 return Double.POSITIVE_INFINITY;
261 }
262 if (shape == Double.POSITIVE_INFINITY) {
263 return 0;
264 }
265 final double s = shape - 1;
266 final double z = shape / s / s / (shape - 2);
267 // Avoid intermediate overflow of scale^2 if z is small
268 return z < 1 ? z * scale * scale : scale * scale * z;
269 }
270
271 /**
272 * {@inheritDoc}
273 * <p>
274 * The lower bound of the support is equal to the scale parameter {@code k}.
275 *
276 * @return scale.
277 */
278 @Override
279 public double getSupportLowerBound() {
280 return getScale();
281 }
282
283 /**
284 * {@inheritDoc}
285 * <p>
286 * The upper bound of the support is always positive infinity.
287 *
288 * @return {@linkplain Double#POSITIVE_INFINITY positive infinity}.
289 */
290 @Override
291 public double getSupportUpperBound() {
292 return Double.POSITIVE_INFINITY;
293 }
294
295 /** {@inheritDoc} */
296 @Override
297 public ContinuousDistribution.Sampler createSampler(final UniformRandomProvider rng) {
298 // Pareto distribution sampler
299 return InverseTransformParetoSampler.of(rng, scale, shape)::sample;
300 }
301 }