TDistribution.java

  1. /*
  2.  * Licensed to the Apache Software Foundation (ASF) under one or more
  3.  * contributor license agreements.  See the NOTICE file distributed with
  4.  * this work for additional information regarding copyright ownership.
  5.  * The ASF licenses this file to You under the Apache License, Version 2.0
  6.  * (the "License"); you may not use this file except in compliance with
  7.  * the License.  You may obtain a copy of the License at
  8.  *
  9.  *      http://www.apache.org/licenses/LICENSE-2.0
  10.  *
  11.  * Unless required by applicable law or agreed to in writing, software
  12.  * distributed under the License is distributed on an "AS IS" BASIS,
  13.  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14.  * See the License for the specific language governing permissions and
  15.  * limitations under the License.
  16.  */
  17. package org.apache.commons.statistics.distribution;

  18. import org.apache.commons.numbers.gamma.Beta;
  19. import org.apache.commons.numbers.gamma.LogBeta;
  20. import org.apache.commons.numbers.gamma.RegularizedBeta;
  21. import org.apache.commons.rng.UniformRandomProvider;
  22. import org.apache.commons.rng.sampling.distribution.TSampler;

  23. /**
  24.  * Implementation of Student's t-distribution.
  25.  *
  26.  * <p>The probability density function of \( X \) is:
  27.  *
  28.  * <p>\[ f(x; v) = \frac{\Gamma(\frac{\nu+1}{2})} {\sqrt{\nu\pi}\,\Gamma(\frac{\nu}{2})} \left(1+\frac{t^2}{\nu} \right)^{\!-\frac{\nu+1}{2}} \]
  29.  *
  30.  * <p>for \( v &gt; 0 \) the degrees of freedom,
  31.  * \( \Gamma \) is the gamma function, and
  32.  * \( x \in (-\infty, \infty) \).
  33.  *
  34.  * @see <a href="https://en.wikipedia.org/wiki/Student%27s_t-distribution">Student&#39;s t-distribution (Wikipedia)</a>
  35.  * @see <a href="https://mathworld.wolfram.com/Studentst-Distribution.html">Student&#39;s t-distribution (MathWorld)</a>
  36.  */
  37. public abstract class TDistribution extends AbstractContinuousDistribution {
  38.     /** The degrees of freedom. */
  39.     private final double degreesOfFreedom;

  40.     /**
  41.      * Specialisation of the T-distribution used when there are infinite degrees of freedom.
  42.      * In this case the distribution matches a normal distribution. This is used when the
  43.      * variance is not different from 1.0.
  44.      *
  45.      * <p>This delegates all methods to the standard normal distribution. Instances are
  46.      * allowed to provide access to the degrees of freedom used during construction.
  47.      */
  48.     private static class NormalTDistribution extends TDistribution {
  49.         /** A standard normal distribution used for calculations.
  50.          * This is immutable and thread-safe and can be used across instances. */
  51.         private static final NormalDistribution STANDARD_NORMAL = NormalDistribution.of(0, 1);

  52.         /**
  53.          * @param degreesOfFreedom Degrees of freedom.
  54.          */
  55.         NormalTDistribution(double degreesOfFreedom) {
  56.             super(degreesOfFreedom);
  57.         }

  58.         @Override
  59.         public double density(double x) {
  60.             return STANDARD_NORMAL.density(x);
  61.         }

  62.         @Override
  63.         public double probability(double x0, double x1) {
  64.             return STANDARD_NORMAL.probability(x0, x1);
  65.         }

  66.         @Override
  67.         public double logDensity(double x) {
  68.             return STANDARD_NORMAL.logDensity(x);
  69.         }

  70.         @Override
  71.         public double cumulativeProbability(double x) {
  72.             return STANDARD_NORMAL.cumulativeProbability(x);
  73.         }

  74.         @Override
  75.         public double inverseCumulativeProbability(double p) {
  76.             return STANDARD_NORMAL.inverseCumulativeProbability(p);
  77.         }

  78.         // Survival probability functions inherit the symmetry operations from the TDistribution

  79.         @Override
  80.         public double getMean() {
  81.             return 0;
  82.         }

  83.         @Override
  84.         public double getVariance() {
  85.             return 1.0;
  86.         }

  87.         @Override
  88.         public Sampler createSampler(UniformRandomProvider rng) {
  89.             return STANDARD_NORMAL.createSampler(rng);
  90.         }
  91.     }

  92.     /**
  93.      * Implementation of Student's T-distribution.
  94.      */
  95.     private static class StudentsTDistribution extends TDistribution {
  96.         /** 2. */
  97.         private static final double TWO = 2;
  98.         /** The threshold for the density function where the
  99.          * power function base minus 1 is close to zero. */
  100.         private static final double CLOSE_TO_ZERO = 0.25;

  101.         /** -(v + 1) / 2, where v = degrees of freedom. */
  102.         private final double mvp1Over2;
  103.         /** Density normalisation factor, sqrt(v) * beta(1/2, v/2), where v = degrees of freedom. */
  104.         private final double densityNormalisation;
  105.         /** Log density normalisation term, 0.5 * log(v) + log(beta(1/2, v/2)), where v = degrees of freedom. */
  106.         private final double logDensityNormalisation;
  107.         /** Cached value for inverse probability function. */
  108.         private final double mean;
  109.         /** Cached value for inverse probability function. */
  110.         private final double variance;

  111.         /**
  112.          * @param degreesOfFreedom Degrees of freedom.
  113.          * @param variance Precomputed variance
  114.          */
  115.         StudentsTDistribution(double degreesOfFreedom, double variance) {
  116.             super(degreesOfFreedom);

  117.             mvp1Over2 = -0.5 * (degreesOfFreedom + 1);
  118.             densityNormalisation = Math.sqrt(degreesOfFreedom) * Beta.value(0.5, degreesOfFreedom / 2);
  119.             logDensityNormalisation = 0.5 * Math.log(degreesOfFreedom) + LogBeta.value(0.5, degreesOfFreedom / 2);
  120.             mean = degreesOfFreedom > 1 ? 0 : Double.NaN;
  121.             this.variance = variance;
  122.         }

  123.         /**
  124.          * @param degreesOfFreedom Degrees of freedom.
  125.          * @return the variance
  126.          */
  127.         static double computeVariance(double degreesOfFreedom) {
  128.             if (degreesOfFreedom == Double.POSITIVE_INFINITY) {
  129.                 return 1;
  130.             } else if (degreesOfFreedom > TWO) {
  131.                 return degreesOfFreedom / (degreesOfFreedom - 2);
  132.             } else if (degreesOfFreedom > 1) {
  133.                 return Double.POSITIVE_INFINITY;
  134.             } else {
  135.                 return Double.NaN;
  136.             }
  137.         }

  138.         @Override
  139.         public double density(double x) {
  140.             //          1                       -(v+1)/2
  141.             // ------------------- * (1 + t^2/v)
  142.             // sqrt(v) B(1/2, v/2)

  143.             final double t2OverV = x * x / getDegreesOfFreedom();
  144.             if (t2OverV < CLOSE_TO_ZERO) {
  145.                 // Avoid power function when the base is close to 1
  146.                 return Math.exp(Math.log1p(t2OverV) * mvp1Over2) / densityNormalisation;
  147.             }
  148.             return Math.pow(1 + t2OverV, mvp1Over2) / densityNormalisation;
  149.         }

  150.         @Override
  151.         public double logDensity(double x) {
  152.             return Math.log1p(x * x / getDegreesOfFreedom()) * mvp1Over2 - logDensityNormalisation;
  153.         }

  154.         @Override
  155.         public double cumulativeProbability(double x) {
  156.             if (x == 0) {
  157.                 return 0.5;
  158.             }
  159.             final double v = getDegreesOfFreedom();

  160.             // cdf(t) = 1 - 0.5 * I_x(t)(v/2, 1/2)
  161.             // where x(t) = v / (v + t^2)
  162.             //
  163.             // When t^2 << v using the regularized beta results
  164.             // in loss of precision. Use the complement instead:
  165.             // I[x](a,b) = 1 - I[1-x](b,a)
  166.             // x   = v   / (v + t^2)
  167.             // 1-x = t^2 / (v + t^2)
  168.             // Use the threshold documented in the Boost t-distribution:
  169.             // https://www.boost.org/doc/libs/1_78_0/libs/math/doc/html/math_toolkit/dist_ref/dists/students_t_dist.html

  170.             final double t2 = x * x;
  171.             final double z;
  172.             if (v < 2 * t2) {
  173.                 z = RegularizedBeta.value(v / (v + t2), v / 2, 0.5) / 2;
  174.             } else {
  175.                 z = RegularizedBeta.complement(t2 / (v + t2), 0.5, v / 2) / 2;
  176.             }
  177.             return x > 0 ? 1 - z : z;
  178.         }

  179.         @Override
  180.         public double getMean() {
  181.             return mean;
  182.         }

  183.         @Override
  184.         public double getVariance() {
  185.             return variance;
  186.         }

  187.         @Override
  188.         double getMedian() {
  189.             // Overridden for the probability(double, double) method.
  190.             // This is intentionally not a public method.
  191.             return 0;
  192.         }

  193.         @Override
  194.         public Sampler createSampler(UniformRandomProvider rng) {
  195.             // T distribution sampler.
  196.             return TSampler.of(rng, getDegreesOfFreedom())::sample;
  197.         }
  198.     }

  199.     /**
  200.      * @param degreesOfFreedom Degrees of freedom.
  201.      */
  202.     TDistribution(double degreesOfFreedom) {
  203.         this.degreesOfFreedom = degreesOfFreedom;
  204.     }

  205.     /**
  206.      * Creates a Student's t-distribution.
  207.      *
  208.      * @param degreesOfFreedom Degrees of freedom.
  209.      * @return the distribution
  210.      * @throws IllegalArgumentException if {@code degreesOfFreedom <= 0}
  211.      */
  212.     public static TDistribution of(double degreesOfFreedom) {
  213.         if (degreesOfFreedom <= 0) {
  214.             throw new DistributionException(DistributionException.NOT_STRICTLY_POSITIVE,
  215.                                             degreesOfFreedom);
  216.         }
  217.         // If the variance converges to 1 use a NormalDistribution.
  218.         // Occurs at 2^55 = 3.60e16
  219.         final double variance = StudentsTDistribution.computeVariance(degreesOfFreedom);
  220.         if (variance == 1) {
  221.             return new NormalTDistribution(degreesOfFreedom);
  222.         }
  223.         return new StudentsTDistribution(degreesOfFreedom, variance);
  224.     }

  225.     /**
  226.      * Gets the degrees of freedom parameter of this distribution.
  227.      *
  228.      * @return the degrees of freedom.
  229.      */
  230.     public double getDegreesOfFreedom() {
  231.         return degreesOfFreedom;
  232.     }

  233.     /** {@inheritDoc} */
  234.     @Override
  235.     public double survivalProbability(double x) {
  236.         // Exploit symmetry
  237.         return cumulativeProbability(-x);
  238.     }

  239.     /** {@inheritDoc} */
  240.     @Override
  241.     public double inverseSurvivalProbability(double p) {
  242.         // Exploit symmetry
  243.         // Subtract from 0 avoids returning -0.0 for p=0.5
  244.         return 0.0 - inverseCumulativeProbability(p);
  245.     }

  246.     /**
  247.      * {@inheritDoc}
  248.      *
  249.      * <p>For degrees of freedom parameter \( v \), the mean is:
  250.      *
  251.      * <p>\[ \mathbb{E}[X] = \begin{cases}
  252.      *       0                &amp; \text{for } v \gt 1 \\
  253.      *       \text{undefined} &amp; \text{otherwise}
  254.      *       \end{cases} \]
  255.      *
  256.      * @return the mean, or {@link Double#NaN NaN} if it is not defined.
  257.      */
  258.     @Override
  259.     public abstract double getMean();

  260.     /**
  261.      * {@inheritDoc}
  262.      *
  263.      * <p>For degrees of freedom parameter \( v \), the variance is:
  264.      *
  265.      * <p>\[ \operatorname{var}[X] = \begin{cases}
  266.      *       \frac{v}{v - 2}  &amp; \text{for } v \gt 2 \\
  267.      *       \infty           &amp; \text{for } 1 \lt v \le 2 \\
  268.      *       \text{undefined} &amp; \text{otherwise}
  269.      *       \end{cases} \]
  270.      *
  271.      * @return the variance, or {@link Double#NaN NaN} if it is not defined.
  272.      */
  273.     @Override
  274.     public abstract double getVariance();

  275.     /**
  276.      * {@inheritDoc}
  277.      *
  278.      * <p>The lower bound of the support is always negative infinity.
  279.      *
  280.      * @return {@linkplain Double#NEGATIVE_INFINITY negative infinity}.
  281.      */
  282.     @Override
  283.     public double getSupportLowerBound() {
  284.         return Double.NEGATIVE_INFINITY;
  285.     }

  286.     /**
  287.      * {@inheritDoc}
  288.      *
  289.      * <p>The upper bound of the support is always positive infinity.
  290.      *
  291.      * @return {@linkplain Double#POSITIVE_INFINITY positive infinity}.
  292.      */
  293.     @Override
  294.     public double getSupportUpperBound() {
  295.         return Double.POSITIVE_INFINITY;
  296.     }
  297. }