ParetoDistribution.java

  1. /*
  2.  * Licensed to the Apache Software Foundation (ASF) under one or more
  3.  * contributor license agreements.  See the NOTICE file distributed with
  4.  * this work for additional information regarding copyright ownership.
  5.  * The ASF licenses this file to You under the Apache License, Version 2.0
  6.  * (the "License"); you may not use this file except in compliance with
  7.  * the License.  You may obtain a copy of the License at
  8.  *
  9.  *      http://www.apache.org/licenses/LICENSE-2.0
  10.  *
  11.  * Unless required by applicable law or agreed to in writing, software
  12.  * distributed under the License is distributed on an "AS IS" BASIS,
  13.  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14.  * See the License for the specific language governing permissions and
  15.  * limitations under the License.
  16.  */

  17. package org.apache.commons.statistics.distribution;

  18. import java.util.function.DoubleUnaryOperator;
  19. import org.apache.commons.rng.UniformRandomProvider;
  20. import org.apache.commons.rng.sampling.distribution.InverseTransformParetoSampler;

  21. /**
  22.  * Implementation of the Pareto (Type I) distribution.
  23.  *
  24.  * <p>The probability density function of \( X \) is:
  25.  *
  26.  * <p>\[ f(x; k, \alpha) = \frac{\alpha  k^\alpha}{x^{\alpha + 1}} \]
  27.  *
  28.  * <p>for \( k &gt; 0 \),
  29.  * \( \alpha &gt; 0 \), and
  30.  * \( x \in [k, \infty) \).
  31.  *
  32.  * <p>\( k \) is a <em>scale</em> parameter: this is the minimum possible value of \( X \).
  33.  * <br>\( \alpha \) is a <em>shape</em> parameter: this is the Pareto index.
  34.  *
  35.  * @see  <a href="https://en.wikipedia.org/wiki/Pareto_distribution">Pareto distribution (Wikipedia)</a>
  36.  * @see  <a href="https://mathworld.wolfram.com/ParetoDistribution.html">Pareto distribution (MathWorld)</a>
  37.  */
  38. public final class ParetoDistribution extends AbstractContinuousDistribution {
  39.     /** The minimum value for the shape parameter when computing when computing the variance. */
  40.     private static final double MIN_SHAPE_FOR_VARIANCE = 2.0;

  41.     /** The scale parameter of this distribution. Also known as {@code k};
  42.      * the minimum possible value for the random variable {@code X}. */
  43.     private final double scale;
  44.     /** The shape parameter of this distribution. */
  45.     private final double shape;
  46.     /** Implementation of PDF(x). Assumes that {@code x >= scale}. */
  47.     private final DoubleUnaryOperator pdf;
  48.     /** Implementation of log PDF(x). Assumes that {@code x >= scale}. */
  49.     private final DoubleUnaryOperator logpdf;

  50.     /**
  51.      * @param scale Scale parameter (minimum possible value of X).
  52.      * @param shape Shape parameter (Pareto index).
  53.      */
  54.     private ParetoDistribution(double scale,
  55.                                double shape) {
  56.         this.scale = scale;
  57.         this.shape = shape;

  58.         // The Pareto distribution approaches a Dirac delta function when shape -> inf.
  59.         // Parameterisations can also lead to underflow in the standard computation.
  60.         // Extract the PDF and CDF to specialized implementations to handle edge cases.

  61.         // Pre-compute factors for the standard computation
  62.         final double shapeByScalePowShape = shape * Math.pow(scale, shape);
  63.         final double logShapePlusShapeByLogScale = Math.log(shape) + Math.log(scale) * shape;

  64.         if (shapeByScalePowShape < Double.POSITIVE_INFINITY &&
  65.             shapeByScalePowShape >= Double.MIN_NORMAL) {
  66.             // Standard computation
  67.             pdf = x -> shapeByScalePowShape / Math.pow(x, shape + 1);
  68.             logpdf = x -> logShapePlusShapeByLogScale - Math.log(x) * (shape + 1);
  69.         } else {
  70.             // Standard computation overflow; underflow to sub-normal or zero; or nan (pow(1.0, inf))
  71.             if (Double.isFinite(logShapePlusShapeByLogScale)) {
  72.                 // Log computation is valid
  73.                 logpdf = x -> logShapePlusShapeByLogScale - Math.log(x) * (shape + 1);
  74.                 pdf = x -> Math.exp(logpdf.applyAsDouble(x));
  75.             } else  {
  76.                 // Assume Dirac function
  77.                 logpdf = x -> x > scale ? Double.NEGATIVE_INFINITY : Double.POSITIVE_INFINITY;
  78.                 // PDF has infinite value at lower bound
  79.                 pdf = x -> x > scale ? 0 : Double.POSITIVE_INFINITY;
  80.             }
  81.         }
  82.     }

  83.     /**
  84.      * Creates a Pareto distribution.
  85.      *
  86.      * @param scale Scale parameter (minimum possible value of X).
  87.      * @param shape Shape parameter (Pareto index).
  88.      * @return the distribution
  89.      * @throws IllegalArgumentException if {@code scale <= 0}, {@code scale} is
  90.      * infinite, or {@code shape <= 0}.
  91.      */
  92.     public static ParetoDistribution of(double scale,
  93.                                         double shape) {
  94.         if (scale <= 0 || scale == Double.POSITIVE_INFINITY) {
  95.             throw new DistributionException(DistributionException.NOT_STRICTLY_POSITIVE_FINITE, scale);
  96.         }
  97.         if (shape <= 0) {
  98.             throw new DistributionException(DistributionException.NOT_STRICTLY_POSITIVE, shape);
  99.         }
  100.         return new ParetoDistribution(scale, shape);
  101.     }

  102.     /**
  103.      * Gets the scale parameter of this distribution.
  104.      * This is the minimum possible value of X.
  105.      *
  106.      * @return the scale parameter.
  107.      */
  108.     public double getScale() {
  109.         return scale;
  110.     }

  111.     /**
  112.      * Gets the shape parameter of this distribution.
  113.      * This is the Pareto index.
  114.      *
  115.      * @return the shape parameter.
  116.      */
  117.     public double getShape() {
  118.         return shape;
  119.     }

  120.     /**
  121.      * {@inheritDoc}
  122.      *
  123.      * <p>For scale parameter \( k \) and shape parameter \( \alpha \), the PDF is:
  124.      *
  125.      * <p>\[ f(x; k, \alpha) = \begin{cases}
  126.      *       0                                       &amp; \text{for } x \lt k \\
  127.      *       \frac{\alpha  k^\alpha}{x^{\alpha + 1}} &amp; \text{for } x \ge k
  128.      *       \end{cases} \]
  129.      */
  130.     @Override
  131.     public double density(double x) {
  132.         if (x < scale) {
  133.             return 0;
  134.         }
  135.         return pdf.applyAsDouble(x);
  136.     }

  137.     /** {@inheritDoc}
  138.      *
  139.      * <p>See documentation of {@link #density(double)} for computation details.
  140.      */
  141.     @Override
  142.     public double logDensity(double x) {
  143.         if (x < scale) {
  144.             return Double.NEGATIVE_INFINITY;
  145.         }
  146.         return logpdf.applyAsDouble(x);
  147.     }

  148.     /**
  149.      * {@inheritDoc}
  150.      *
  151.      * <p>For scale parameter \( k \) and shape parameter \( \alpha \), the CDF is:
  152.      *
  153.      * <p>\[ F(x; k, \alpha) = \begin{cases}
  154.      *       0                                     &amp; \text{for } x \le k \\
  155.      *       1 - \left( \frac{k}{x} \right)^\alpha &amp; \text{for } x \gt k
  156.      *       \end{cases} \]
  157.      */
  158.     @Override
  159.     public double cumulativeProbability(double x)  {
  160.         if (x <= scale) {
  161.             return 0;
  162.         }
  163.         // Increase accuracy for CDF close to 0 by using a log calculation:
  164.         // 1 - exp(α * ln(k / x)) == -(exp(α * ln(k / x)) - 1)
  165.         return -Math.expm1(shape * Math.log(scale / x));
  166.     }

  167.     /**
  168.      * {@inheritDoc}
  169.      *
  170.      * <p>For scale parameter \( k \) and shape parameter \( \alpha \), the survival function is:
  171.      *
  172.      * <p>\[ S(x; k, \alpha) = \begin{cases}
  173.      *       1                                 &amp; \text{for } x \le k \\
  174.      *       \left( \frac{k}{x} \right)^\alpha &amp; \text{for } x \gt k
  175.      *       \end{cases} \]
  176.      */
  177.     @Override
  178.     public double survivalProbability(double x)  {
  179.         if (x <= scale) {
  180.             return 1;
  181.         }
  182.         return Math.pow(scale / x, shape);
  183.     }

  184.     /** {@inheritDoc} */
  185.     @Override
  186.     public double inverseCumulativeProbability(double p) {
  187.         ArgumentUtils.checkProbability(p);
  188.         if (p == 0) {
  189.             return getSupportLowerBound();
  190.         }
  191.         if (p == 1) {
  192.             return getSupportUpperBound();
  193.         }
  194.         return scale / Math.exp(Math.log1p(-p) / shape);
  195.     }

  196.     /** {@inheritDoc} */
  197.     @Override
  198.     public double inverseSurvivalProbability(double p) {
  199.         ArgumentUtils.checkProbability(p);
  200.         if (p == 1) {
  201.             return getSupportLowerBound();
  202.         }
  203.         if (p == 0) {
  204.             return getSupportUpperBound();
  205.         }
  206.         return scale / Math.pow(p, 1 / shape);
  207.     }

  208.     /**
  209.      * {@inheritDoc}
  210.      *
  211.      * <p>For scale parameter \( k \) and shape parameter \( \alpha \), the mean is:
  212.      *
  213.      * <p>\[ \mathbb{E}[X] = \begin{cases}
  214.      *       \infty                      &amp; \text{for } \alpha \le 1 \\
  215.      *       \frac{k \alpha}{(\alpha-1)} &amp; \text{for } \alpha \gt 1
  216.      *       \end{cases} \]
  217.      */
  218.     @Override
  219.     public double getMean() {
  220.         if (shape <= 1) {
  221.             return Double.POSITIVE_INFINITY;
  222.         }
  223.         if (shape == Double.POSITIVE_INFINITY) {
  224.             return scale;
  225.         }
  226.         return scale * (shape / (shape - 1));
  227.     }

  228.     /**
  229.      * {@inheritDoc}
  230.      *
  231.      * <p>For scale parameter \( k \) and shape parameter \( \alpha \), the variance is:
  232.      *
  233.      * <p>\[ \operatorname{var}[X] = \begin{cases}
  234.      *       \infty                                     &amp; \text{for } \alpha \le 2 \\
  235.      *       \frac{k^2 \alpha}{(\alpha-1)^2 (\alpha-2)} &amp; \text{for } \alpha \gt 2
  236.      *       \end{cases} \]
  237.      */
  238.     @Override
  239.     public double getVariance() {
  240.         if (shape <= MIN_SHAPE_FOR_VARIANCE) {
  241.             return Double.POSITIVE_INFINITY;
  242.         }
  243.         if (shape == Double.POSITIVE_INFINITY) {
  244.             return 0;
  245.         }
  246.         final double s = shape - 1;
  247.         final double z = shape / s / s / (shape - 2);
  248.         // Avoid intermediate overflow of scale^2 if z is small
  249.         return z < 1 ? z * scale * scale : scale * scale * z;
  250.     }

  251.     /**
  252.      * {@inheritDoc}
  253.      * <p>
  254.      * The lower bound of the support is equal to the scale parameter {@code k}.
  255.      *
  256.      * @return scale.
  257.      */
  258.     @Override
  259.     public double getSupportLowerBound() {
  260.         return getScale();
  261.     }

  262.     /**
  263.      * {@inheritDoc}
  264.      * <p>
  265.      * The upper bound of the support is always positive infinity.
  266.      *
  267.      * @return {@linkplain Double#POSITIVE_INFINITY positive infinity}.
  268.      */
  269.     @Override
  270.     public double getSupportUpperBound() {
  271.         return Double.POSITIVE_INFINITY;
  272.     }

  273.     /** {@inheritDoc} */
  274.     @Override
  275.     public ContinuousDistribution.Sampler createSampler(final UniformRandomProvider rng) {
  276.         // Pareto distribution sampler
  277.         return InverseTransformParetoSampler.of(rng, scale, shape)::sample;
  278.     }
  279. }