Source code

001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017
018package org.apache.commons.statistics.distribution;
019
020import java.util.function.DoubleUnaryOperator;
021import org.apache.commons.rng.UniformRandomProvider;
022import org.apache.commons.rng.sampling.distribution.InverseTransformParetoSampler;
023
024/**
025 * Implementation of the Pareto (Type I) distribution.
026 *
027 * <p>The probability density function of \( X \) is:
028 *
029 * <p>\[ f(x; k, \alpha) = \frac{\alpha  k^\alpha}{x^{\alpha + 1}} \]
030 *
031 * <p>for \( k &gt; 0 \),
032 * \( \alpha &gt; 0 \), and
033 * \( x \in [k, \infty) \).
034 *
035 * <p>\( k \) is a <em>scale</em> parameter: this is the minimum possible value of \( X \).
036 * <br>\( \alpha \) is a <em>shape</em> parameter: this is the Pareto index.
037 *
038 * @see  <a href="https://en.wikipedia.org/wiki/Pareto_distribution">Pareto distribution (Wikipedia)</a>
039 * @see  <a href="https://mathworld.wolfram.com/ParetoDistribution.html">Pareto distribution (MathWorld)</a>
040 */
041public final class ParetoDistribution extends AbstractContinuousDistribution {
042    /** The minimum value for the shape parameter when computing when computing the variance. */
043    private static final double MIN_SHAPE_FOR_VARIANCE = 2.0;
044
045    /** The scale parameter of this distribution. Also known as {@code k};
046     * the minimum possible value for the random variable {@code X}. */
047    private final double scale;
048    /** The shape parameter of this distribution. */
049    private final double shape;
050    /** Implementation of PDF(x). Assumes that {@code x >= scale}. */
051    private final DoubleUnaryOperator pdf;
052    /** Implementation of log PDF(x). Assumes that {@code x >= scale}. */
053    private final DoubleUnaryOperator logpdf;
054
055    /**
056     * @param scale Scale parameter (minimum possible value of X).
057     * @param shape Shape parameter (Pareto index).
058     */
059    private ParetoDistribution(double scale,
060                               double shape) {
061        this.scale = scale;
062        this.shape = shape;
063
064        // The Pareto distribution approaches a Dirac delta function when shape -> inf.
065        // Parameterisations can also lead to underflow in the standard computation.
066        // Extract the PDF and CDF to specialized implementations to handle edge cases.
067
068        // Pre-compute factors for the standard computation
069        final double shapeByScalePowShape = shape * Math.pow(scale, shape);
070        final double logShapePlusShapeByLogScale = Math.log(shape) + Math.log(scale) * shape;
071
072        if (shapeByScalePowShape < Double.POSITIVE_INFINITY &&
073            shapeByScalePowShape >= Double.MIN_NORMAL) {
074            // Standard computation
075            pdf = x -> shapeByScalePowShape / Math.pow(x, shape + 1);
076            logpdf = x -> logShapePlusShapeByLogScale - Math.log(x) * (shape + 1);
077        } else {
078            // Standard computation overflow; underflow to sub-normal or zero; or nan (pow(1.0, inf))
079            if (Double.isFinite(logShapePlusShapeByLogScale)) {
080                // Log computation is valid
081                logpdf = x -> logShapePlusShapeByLogScale - Math.log(x) * (shape + 1);
082                pdf = x -> Math.exp(logpdf.applyAsDouble(x));
083            } else  {
084                // Assume Dirac function
085                logpdf = x -> x > scale ? Double.NEGATIVE_INFINITY : Double.POSITIVE_INFINITY;
086                // PDF has infinite value at lower bound
087                pdf = x -> x > scale ? 0 : Double.POSITIVE_INFINITY;
088            }
089        }
090    }
091
092    /**
093     * Creates a Pareto distribution.
094     *
095     * @param scale Scale parameter (minimum possible value of X).
096     * @param shape Shape parameter (Pareto index).
097     * @return the distribution
098     * @throws IllegalArgumentException if {@code scale <= 0}, {@code scale} is
099     * infinite, or {@code shape <= 0}.
100     */
101    public static ParetoDistribution of(double scale,
102                                        double shape) {
103        if (scale <= 0 || scale == Double.POSITIVE_INFINITY) {
104            throw new DistributionException(DistributionException.NOT_STRICTLY_POSITIVE_FINITE, scale);
105        }
106        if (shape <= 0) {
107            throw new DistributionException(DistributionException.NOT_STRICTLY_POSITIVE, shape);
108        }
109        return new ParetoDistribution(scale, shape);
110    }
111
112    /**
113     * Gets the scale parameter of this distribution.
114     * This is the minimum possible value of X.
115     *
116     * @return the scale parameter.
117     */
118    public double getScale() {
119        return scale;
120    }
121
122    /**
123     * Gets the shape parameter of this distribution.
124     * This is the Pareto index.
125     *
126     * @return the shape parameter.
127     */
128    public double getShape() {
129        return shape;
130    }
131
132    /**
133     * {@inheritDoc}
134     *
135     * <p>For scale parameter \( k \) and shape parameter \( \alpha \), the PDF is:
136     *
137     * <p>\[ f(x; k, \alpha) = \begin{cases}
138     *       0                                       &amp; \text{for } x \lt k \\
139     *       \frac{\alpha  k^\alpha}{x^{\alpha + 1}} &amp; \text{for } x \ge k
140     *       \end{cases} \]
141     */
142    @Override
143    public double density(double x) {
144        if (x < scale) {
145            return 0;
146        }
147        return pdf.applyAsDouble(x);
148    }
149
150    /** {@inheritDoc}
151     *
152     * <p>See documentation of {@link #density(double)} for computation details.
153     */
154    @Override
155    public double logDensity(double x) {
156        if (x < scale) {
157            return Double.NEGATIVE_INFINITY;
158        }
159        return logpdf.applyAsDouble(x);
160    }
161
162    /**
163     * {@inheritDoc}
164     *
165     * <p>For scale parameter \( k \) and shape parameter \( \alpha \), the CDF is:
166     *
167     * <p>\[ F(x; k, \alpha) = \begin{cases}
168     *       0                                     &amp; \text{for } x \le k \\
169     *       1 - \left( \frac{k}{x} \right)^\alpha &amp; \text{for } x \gt k
170     *       \end{cases} \]
171     */
172    @Override
173    public double cumulativeProbability(double x)  {
174        if (x <= scale) {
175            return 0;
176        }
177        // Increase accuracy for CDF close to 0 by using a log calculation:
178        // 1 - exp(α * ln(k / x)) == -(exp(α * ln(k / x)) - 1)
179        return -Math.expm1(shape * Math.log(scale / x));
180    }
181
182    /**
183     * {@inheritDoc}
184     *
185     * <p>For scale parameter \( k \) and shape parameter \( \alpha \), the survival function is:
186     *
187     * <p>\[ S(x; k, \alpha) = \begin{cases}
188     *       1                                 &amp; \text{for } x \le k \\
189     *       \left( \frac{k}{x} \right)^\alpha &amp; \text{for } x \gt k
190     *       \end{cases} \]
191     */
192    @Override
193    public double survivalProbability(double x)  {
194        if (x <= scale) {
195            return 1;
196        }
197        return Math.pow(scale / x, shape);
198    }
199
200    /** {@inheritDoc} */
201    @Override
202    public double inverseCumulativeProbability(double p) {
203        ArgumentUtils.checkProbability(p);
204        if (p == 0) {
205            return getSupportLowerBound();
206        }
207        if (p == 1) {
208            return getSupportUpperBound();
209        }
210        return scale / Math.exp(Math.log1p(-p) / shape);
211    }
212
213    /** {@inheritDoc} */
214    @Override
215    public double inverseSurvivalProbability(double p) {
216        ArgumentUtils.checkProbability(p);
217        if (p == 1) {
218            return getSupportLowerBound();
219        }
220        if (p == 0) {
221            return getSupportUpperBound();
222        }
223        return scale / Math.pow(p, 1 / shape);
224    }
225
226    /**
227     * {@inheritDoc}
228     *
229     * <p>For scale parameter \( k \) and shape parameter \( \alpha \), the mean is:
230     *
231     * <p>\[ \mathbb{E}[X] = \begin{cases}
232     *       \infty                      &amp; \text{for } \alpha \le 1 \\
233     *       \frac{k \alpha}{(\alpha-1)} &amp; \text{for } \alpha \gt 1
234     *       \end{cases} \]
235     */
236    @Override
237    public double getMean() {
238        if (shape <= 1) {
239            return Double.POSITIVE_INFINITY;
240        }
241        if (shape == Double.POSITIVE_INFINITY) {
242            return scale;
243        }
244        return scale * (shape / (shape - 1));
245    }
246
247    /**
248     * {@inheritDoc}
249     *
250     * <p>For scale parameter \( k \) and shape parameter \( \alpha \), the variance is:
251     *
252     * <p>\[ \operatorname{var}[X] = \begin{cases}
253     *       \infty                                     &amp; \text{for } \alpha \le 2 \\
254     *       \frac{k^2 \alpha}{(\alpha-1)^2 (\alpha-2)} &amp; \text{for } \alpha \gt 2
255     *       \end{cases} \]
256     */
257    @Override
258    public double getVariance() {
259        if (shape <= MIN_SHAPE_FOR_VARIANCE) {
260            return Double.POSITIVE_INFINITY;
261        }
262        if (shape == Double.POSITIVE_INFINITY) {
263            return 0;
264        }
265        final double s = shape - 1;
266        final double z = shape / s / s / (shape - 2);
267        // Avoid intermediate overflow of scale^2 if z is small
268        return z < 1 ? z * scale * scale : scale * scale * z;
269    }
270
271    /**
272     * {@inheritDoc}
273     * <p>
274     * The lower bound of the support is equal to the scale parameter {@code k}.
275     *
276     * @return scale.
277     */
278    @Override
279    public double getSupportLowerBound() {
280        return getScale();
281    }
282
283    /**
284     * {@inheritDoc}
285     * <p>
286     * The upper bound of the support is always positive infinity.
287     *
288     * @return {@linkplain Double#POSITIVE_INFINITY positive infinity}.
289     */
290    @Override
291    public double getSupportUpperBound() {
292        return Double.POSITIVE_INFINITY;
293    }
294
295    /** {@inheritDoc} */
296    @Override
297    public ContinuousDistribution.Sampler createSampler(final UniformRandomProvider rng) {
298        // Pareto distribution sampler
299        return InverseTransformParetoSampler.of(rng, scale, shape)::sample;
300    }
301}