View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  
18  package org.apache.commons.statistics.distribution;
19  
20  import java.util.function.DoubleUnaryOperator;
21  import org.apache.commons.rng.UniformRandomProvider;
22  import org.apache.commons.rng.sampling.distribution.InverseTransformParetoSampler;
23  
24  /**
25   * Implementation of the Pareto (Type I) distribution.
26   *
27   * <p>The probability density function of \( X \) is:
28   *
29   * <p>\[ f(x; k, \alpha) = \frac{\alpha  k^\alpha}{x^{\alpha + 1}} \]
30   *
31   * <p>for \( k &gt; 0 \),
32   * \( \alpha &gt; 0 \), and
33   * \( x \in [k, \infty) \).
34   *
35   * <p>\( k \) is a <em>scale</em> parameter: this is the minimum possible value of \( X \).
36   * <br>\( \alpha \) is a <em>shape</em> parameter: this is the Pareto index.
37   *
38   * @see  <a href="https://en.wikipedia.org/wiki/Pareto_distribution">Pareto distribution (Wikipedia)</a>
39   * @see  <a href="https://mathworld.wolfram.com/ParetoDistribution.html">Pareto distribution (MathWorld)</a>
40   */
41  public final class ParetoDistribution extends AbstractContinuousDistribution {
42      /** The minimum value for the shape parameter when computing when computing the variance. */
43      private static final double MIN_SHAPE_FOR_VARIANCE = 2.0;
44  
45      /** The scale parameter of this distribution. Also known as {@code k};
46       * the minimum possible value for the random variable {@code X}. */
47      private final double scale;
48      /** The shape parameter of this distribution. */
49      private final double shape;
50      /** Implementation of PDF(x). Assumes that {@code x >= scale}. */
51      private final DoubleUnaryOperator pdf;
52      /** Implementation of log PDF(x). Assumes that {@code x >= scale}. */
53      private final DoubleUnaryOperator logpdf;
54  
55      /**
56       * @param scale Scale parameter (minimum possible value of X).
57       * @param shape Shape parameter (Pareto index).
58       */
59      private ParetoDistribution(double scale,
60                                 double shape) {
61          this.scale = scale;
62          this.shape = shape;
63  
64          // The Pareto distribution approaches a Dirac delta function when shape -> inf.
65          // Parameterisations can also lead to underflow in the standard computation.
66          // Extract the PDF and CDF to specialized implementations to handle edge cases.
67  
68          // Pre-compute factors for the standard computation
69          final double shapeByScalePowShape = shape * Math.pow(scale, shape);
70          final double logShapePlusShapeByLogScale = Math.log(shape) + Math.log(scale) * shape;
71  
72          if (shapeByScalePowShape < Double.POSITIVE_INFINITY &&
73              shapeByScalePowShape >= Double.MIN_NORMAL) {
74              // Standard computation
75              pdf = x -> shapeByScalePowShape / Math.pow(x, shape + 1);
76              logpdf = x -> logShapePlusShapeByLogScale - Math.log(x) * (shape + 1);
77          } else {
78              // Standard computation overflow; underflow to sub-normal or zero; or nan (pow(1.0, inf))
79              if (Double.isFinite(logShapePlusShapeByLogScale)) {
80                  // Log computation is valid
81                  logpdf = x -> logShapePlusShapeByLogScale - Math.log(x) * (shape + 1);
82                  pdf = x -> Math.exp(logpdf.applyAsDouble(x));
83              } else  {
84                  // Assume Dirac function
85                  logpdf = x -> x > scale ? Double.NEGATIVE_INFINITY : Double.POSITIVE_INFINITY;
86                  // PDF has infinite value at lower bound
87                  pdf = x -> x > scale ? 0 : Double.POSITIVE_INFINITY;
88              }
89          }
90      }
91  
92      /**
93       * Creates a Pareto distribution.
94       *
95       * @param scale Scale parameter (minimum possible value of X).
96       * @param shape Shape parameter (Pareto index).
97       * @return the distribution
98       * @throws IllegalArgumentException if {@code scale <= 0}, {@code scale} is
99       * infinite, or {@code shape <= 0}.
100      */
101     public static ParetoDistribution of(double scale,
102                                         double shape) {
103         if (scale <= 0 || scale == Double.POSITIVE_INFINITY) {
104             throw new DistributionException(DistributionException.NOT_STRICTLY_POSITIVE_FINITE, scale);
105         }
106         if (shape <= 0) {
107             throw new DistributionException(DistributionException.NOT_STRICTLY_POSITIVE, shape);
108         }
109         return new ParetoDistribution(scale, shape);
110     }
111 
112     /**
113      * Gets the scale parameter of this distribution.
114      * This is the minimum possible value of X.
115      *
116      * @return the scale parameter.
117      */
118     public double getScale() {
119         return scale;
120     }
121 
122     /**
123      * Gets the shape parameter of this distribution.
124      * This is the Pareto index.
125      *
126      * @return the shape parameter.
127      */
128     public double getShape() {
129         return shape;
130     }
131 
132     /**
133      * {@inheritDoc}
134      *
135      * <p>For scale parameter \( k \) and shape parameter \( \alpha \), the PDF is:
136      *
137      * <p>\[ f(x; k, \alpha) = \begin{cases}
138      *       0                                       &amp; \text{for } x \lt k \\
139      *       \frac{\alpha  k^\alpha}{x^{\alpha + 1}} &amp; \text{for } x \ge k
140      *       \end{cases} \]
141      */
142     @Override
143     public double density(double x) {
144         if (x < scale) {
145             return 0;
146         }
147         return pdf.applyAsDouble(x);
148     }
149 
150     /** {@inheritDoc}
151      *
152      * <p>See documentation of {@link #density(double)} for computation details.
153      */
154     @Override
155     public double logDensity(double x) {
156         if (x < scale) {
157             return Double.NEGATIVE_INFINITY;
158         }
159         return logpdf.applyAsDouble(x);
160     }
161 
162     /**
163      * {@inheritDoc}
164      *
165      * <p>For scale parameter \( k \) and shape parameter \( \alpha \), the CDF is:
166      *
167      * <p>\[ F(x; k, \alpha) = \begin{cases}
168      *       0                                     &amp; \text{for } x \le k \\
169      *       1 - \left( \frac{k}{x} \right)^\alpha &amp; \text{for } x \gt k
170      *       \end{cases} \]
171      */
172     @Override
173     public double cumulativeProbability(double x)  {
174         if (x <= scale) {
175             return 0;
176         }
177         // Increase accuracy for CDF close to 0 by using a log calculation:
178         // 1 - exp(α * ln(k / x)) == -(exp(α * ln(k / x)) - 1)
179         return -Math.expm1(shape * Math.log(scale / x));
180     }
181 
182     /**
183      * {@inheritDoc}
184      *
185      * <p>For scale parameter \( k \) and shape parameter \( \alpha \), the survival function is:
186      *
187      * <p>\[ S(x; k, \alpha) = \begin{cases}
188      *       1                                 &amp; \text{for } x \le k \\
189      *       \left( \frac{k}{x} \right)^\alpha &amp; \text{for } x \gt k
190      *       \end{cases} \]
191      */
192     @Override
193     public double survivalProbability(double x)  {
194         if (x <= scale) {
195             return 1;
196         }
197         return Math.pow(scale / x, shape);
198     }
199 
200     /** {@inheritDoc} */
201     @Override
202     public double inverseCumulativeProbability(double p) {
203         ArgumentUtils.checkProbability(p);
204         if (p == 0) {
205             return getSupportLowerBound();
206         }
207         if (p == 1) {
208             return getSupportUpperBound();
209         }
210         return scale / Math.exp(Math.log1p(-p) / shape);
211     }
212 
213     /** {@inheritDoc} */
214     @Override
215     public double inverseSurvivalProbability(double p) {
216         ArgumentUtils.checkProbability(p);
217         if (p == 1) {
218             return getSupportLowerBound();
219         }
220         if (p == 0) {
221             return getSupportUpperBound();
222         }
223         return scale / Math.pow(p, 1 / shape);
224     }
225 
226     /**
227      * {@inheritDoc}
228      *
229      * <p>For scale parameter \( k \) and shape parameter \( \alpha \), the mean is:
230      *
231      * <p>\[ \mathbb{E}[X] = \begin{cases}
232      *       \infty                      &amp; \text{for } \alpha \le 1 \\
233      *       \frac{k \alpha}{(\alpha-1)} &amp; \text{for } \alpha \gt 1
234      *       \end{cases} \]
235      */
236     @Override
237     public double getMean() {
238         if (shape <= 1) {
239             return Double.POSITIVE_INFINITY;
240         }
241         if (shape == Double.POSITIVE_INFINITY) {
242             return scale;
243         }
244         return scale * (shape / (shape - 1));
245     }
246 
247     /**
248      * {@inheritDoc}
249      *
250      * <p>For scale parameter \( k \) and shape parameter \( \alpha \), the variance is:
251      *
252      * <p>\[ \operatorname{var}[X] = \begin{cases}
253      *       \infty                                     &amp; \text{for } \alpha \le 2 \\
254      *       \frac{k^2 \alpha}{(\alpha-1)^2 (\alpha-2)} &amp; \text{for } \alpha \gt 2
255      *       \end{cases} \]
256      */
257     @Override
258     public double getVariance() {
259         if (shape <= MIN_SHAPE_FOR_VARIANCE) {
260             return Double.POSITIVE_INFINITY;
261         }
262         if (shape == Double.POSITIVE_INFINITY) {
263             return 0;
264         }
265         final double s = shape - 1;
266         final double z = shape / s / s / (shape - 2);
267         // Avoid intermediate overflow of scale^2 if z is small
268         return z < 1 ? z * scale * scale : scale * scale * z;
269     }
270 
271     /**
272      * {@inheritDoc}
273      * <p>
274      * The lower bound of the support is equal to the scale parameter {@code k}.
275      *
276      * @return scale.
277      */
278     @Override
279     public double getSupportLowerBound() {
280         return getScale();
281     }
282 
283     /**
284      * {@inheritDoc}
285      * <p>
286      * The upper bound of the support is always positive infinity.
287      *
288      * @return {@linkplain Double#POSITIVE_INFINITY positive infinity}.
289      */
290     @Override
291     public double getSupportUpperBound() {
292         return Double.POSITIVE_INFINITY;
293     }
294 
295     /** {@inheritDoc} */
296     @Override
297     public ContinuousDistribution.Sampler createSampler(final UniformRandomProvider rng) {
298         // Pareto distribution sampler
299         return InverseTransformParetoSampler.of(rng, scale, shape)::sample;
300     }
301 }