View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.apache.commons.statistics.distribution;
18  
19  import org.apache.commons.numbers.gamma.Beta;
20  import org.apache.commons.numbers.gamma.LogBeta;
21  import org.apache.commons.numbers.gamma.RegularizedBeta;
22  import org.apache.commons.rng.UniformRandomProvider;
23  import org.apache.commons.rng.sampling.distribution.TSampler;
24  
25  /**
26   * Implementation of Student's t-distribution.
27   *
28   * <p>The probability density function of \( X \) is:
29   *
30   * <p>\[ f(x; v) = \frac{\Gamma(\frac{\nu+1}{2})} {\sqrt{\nu\pi}\,\Gamma(\frac{\nu}{2})} \left(1+\frac{t^2}{\nu} \right)^{\!-\frac{\nu+1}{2}} \]
31   *
32   * <p>for \( v &gt; 0 \) the degrees of freedom,
33   * \( \Gamma \) is the gamma function, and
34   * \( x \in (-\infty, \infty) \).
35   *
36   * @see <a href="https://en.wikipedia.org/wiki/Student%27s_t-distribution">Student&#39;s t-distribution (Wikipedia)</a>
37   * @see <a href="https://mathworld.wolfram.com/Studentst-Distribution.html">Student&#39;s t-distribution (MathWorld)</a>
38   */
39  public abstract class TDistribution extends AbstractContinuousDistribution {
40      /** The degrees of freedom. */
41      private final double degreesOfFreedom;
42  
43      /**
44       * Specialisation of the T-distribution used when there are infinite degrees of freedom.
45       * In this case the distribution matches a normal distribution. This is used when the
46       * variance is not different from 1.0.
47       *
48       * <p>This delegates all methods to the standard normal distribution. Instances are
49       * allowed to provide access to the degrees of freedom used during construction.
50       */
51      private static class NormalTDistribution extends TDistribution {
52          /** A standard normal distribution used for calculations.
53           * This is immutable and thread-safe and can be used across instances. */
54          private static final NormalDistribution STANDARD_NORMAL = NormalDistribution.of(0, 1);
55  
56          /**
57           * @param degreesOfFreedom Degrees of freedom.
58           */
59          NormalTDistribution(double degreesOfFreedom) {
60              super(degreesOfFreedom);
61          }
62  
63          @Override
64          public double density(double x) {
65              return STANDARD_NORMAL.density(x);
66          }
67  
68          @Override
69          public double probability(double x0, double x1) {
70              return STANDARD_NORMAL.probability(x0, x1);
71          }
72  
73          @Override
74          public double logDensity(double x) {
75              return STANDARD_NORMAL.logDensity(x);
76          }
77  
78          @Override
79          public double cumulativeProbability(double x) {
80              return STANDARD_NORMAL.cumulativeProbability(x);
81          }
82  
83          @Override
84          public double inverseCumulativeProbability(double p) {
85              return STANDARD_NORMAL.inverseCumulativeProbability(p);
86          }
87  
88          // Survival probability functions inherit the symmetry operations from the TDistribution
89  
90          @Override
91          public double getMean() {
92              return 0;
93          }
94  
95          @Override
96          public double getVariance() {
97              return 1.0;
98          }
99  
100         @Override
101         public Sampler createSampler(UniformRandomProvider rng) {
102             return STANDARD_NORMAL.createSampler(rng);
103         }
104     }
105 
106     /**
107      * Implementation of Student's T-distribution.
108      */
109     private static class StudentsTDistribution extends TDistribution {
110         /** 2. */
111         private static final double TWO = 2;
112         /** The threshold for the density function where the
113          * power function base minus 1 is close to zero. */
114         private static final double CLOSE_TO_ZERO = 0.25;
115 
116         /** -(v + 1) / 2, where v = degrees of freedom. */
117         private final double mvp1Over2;
118         /** Density normalisation factor, sqrt(v) * beta(1/2, v/2), where v = degrees of freedom. */
119         private final double densityNormalisation;
120         /** Log density normalisation term, 0.5 * log(v) + log(beta(1/2, v/2)), where v = degrees of freedom. */
121         private final double logDensityNormalisation;
122         /** Cached value for inverse probability function. */
123         private final double mean;
124         /** Cached value for inverse probability function. */
125         private final double variance;
126 
127         /**
128          * @param degreesOfFreedom Degrees of freedom.
129          * @param variance Precomputed variance
130          */
131         StudentsTDistribution(double degreesOfFreedom, double variance) {
132             super(degreesOfFreedom);
133 
134             mvp1Over2 = -0.5 * (degreesOfFreedom + 1);
135             densityNormalisation = Math.sqrt(degreesOfFreedom) * Beta.value(0.5, degreesOfFreedom / 2);
136             logDensityNormalisation = 0.5 * Math.log(degreesOfFreedom) + LogBeta.value(0.5, degreesOfFreedom / 2);
137             mean = degreesOfFreedom > 1 ? 0 : Double.NaN;
138             this.variance = variance;
139         }
140 
141         /**
142          * @param degreesOfFreedom Degrees of freedom.
143          * @return the variance
144          */
145         static double computeVariance(double degreesOfFreedom) {
146             if (degreesOfFreedom == Double.POSITIVE_INFINITY) {
147                 return 1;
148             } else if (degreesOfFreedom > TWO) {
149                 return degreesOfFreedom / (degreesOfFreedom - 2);
150             } else if (degreesOfFreedom > 1) {
151                 return Double.POSITIVE_INFINITY;
152             } else {
153                 return Double.NaN;
154             }
155         }
156 
157         @Override
158         public double density(double x) {
159             //          1                       -(v+1)/2
160             // ------------------- * (1 + t^2/v)
161             // sqrt(v) B(1/2, v/2)
162 
163             final double t2OverV = x * x / getDegreesOfFreedom();
164             if (t2OverV < CLOSE_TO_ZERO) {
165                 // Avoid power function when the base is close to 1
166                 return Math.exp(Math.log1p(t2OverV) * mvp1Over2) / densityNormalisation;
167             }
168             return Math.pow(1 + t2OverV, mvp1Over2) / densityNormalisation;
169         }
170 
171         @Override
172         public double logDensity(double x) {
173             return Math.log1p(x * x / getDegreesOfFreedom()) * mvp1Over2 - logDensityNormalisation;
174         }
175 
176         @Override
177         public double cumulativeProbability(double x) {
178             if (x == 0) {
179                 return 0.5;
180             }
181             final double v = getDegreesOfFreedom();
182 
183             // cdf(t) = 1 - 0.5 * I_x(t)(v/2, 1/2)
184             // where x(t) = v / (v + t^2)
185             //
186             // When t^2 << v using the regularized beta results
187             // in loss of precision. Use the complement instead:
188             // I[x](a,b) = 1 - I[1-x](b,a)
189             // x   = v   / (v + t^2)
190             // 1-x = t^2 / (v + t^2)
191             // Use the threshold documented in the Boost t-distribution:
192             // https://www.boost.org/doc/libs/1_78_0/libs/math/doc/html/math_toolkit/dist_ref/dists/students_t_dist.html
193 
194             final double t2 = x * x;
195             final double z;
196             if (v < 2 * t2) {
197                 z = RegularizedBeta.value(v / (v + t2), v / 2, 0.5) / 2;
198             } else {
199                 z = RegularizedBeta.complement(t2 / (v + t2), 0.5, v / 2) / 2;
200             }
201             return x > 0 ? 1 - z : z;
202         }
203 
204         @Override
205         public double getMean() {
206             return mean;
207         }
208 
209         @Override
210         public double getVariance() {
211             return variance;
212         }
213 
214         @Override
215         double getMedian() {
216             // Overridden for the probability(double, double) method.
217             // This is intentionally not a public method.
218             return 0;
219         }
220 
221         @Override
222         public Sampler createSampler(UniformRandomProvider rng) {
223             // T distribution sampler.
224             return TSampler.of(rng, getDegreesOfFreedom())::sample;
225         }
226     }
227 
228     /**
229      * @param degreesOfFreedom Degrees of freedom.
230      */
231     TDistribution(double degreesOfFreedom) {
232         this.degreesOfFreedom = degreesOfFreedom;
233     }
234 
235     /**
236      * Creates a Student's t-distribution.
237      *
238      * @param degreesOfFreedom Degrees of freedom.
239      * @return the distribution
240      * @throws IllegalArgumentException if {@code degreesOfFreedom <= 0}
241      */
242     public static TDistribution of(double degreesOfFreedom) {
243         if (degreesOfFreedom <= 0) {
244             throw new DistributionException(DistributionException.NOT_STRICTLY_POSITIVE,
245                                             degreesOfFreedom);
246         }
247         // If the variance converges to 1 use a NormalDistribution.
248         // Occurs at 2^55 = 3.60e16
249         final double variance = StudentsTDistribution.computeVariance(degreesOfFreedom);
250         if (variance == 1) {
251             return new NormalTDistribution(degreesOfFreedom);
252         }
253         return new StudentsTDistribution(degreesOfFreedom, variance);
254     }
255 
256     /**
257      * Gets the degrees of freedom parameter of this distribution.
258      *
259      * @return the degrees of freedom.
260      */
261     public double getDegreesOfFreedom() {
262         return degreesOfFreedom;
263     }
264 
265     /** {@inheritDoc} */
266     @Override
267     public double survivalProbability(double x) {
268         // Exploit symmetry
269         return cumulativeProbability(-x);
270     }
271 
272     /** {@inheritDoc} */
273     @Override
274     public double inverseSurvivalProbability(double p) {
275         // Exploit symmetry
276         // Subtract from 0 avoids returning -0.0 for p=0.5
277         return 0.0 - inverseCumulativeProbability(p);
278     }
279 
280     /**
281      * {@inheritDoc}
282      *
283      * <p>For degrees of freedom parameter \( v \), the mean is:
284      *
285      * <p>\[ \mathbb{E}[X] = \begin{cases}
286      *       0                &amp; \text{for } v \gt 1 \\
287      *       \text{undefined} &amp; \text{otherwise}
288      *       \end{cases} \]
289      *
290      * @return the mean, or {@link Double#NaN NaN} if it is not defined.
291      */
292     @Override
293     public abstract double getMean();
294 
295     /**
296      * {@inheritDoc}
297      *
298      * <p>For degrees of freedom parameter \( v \), the variance is:
299      *
300      * <p>\[ \operatorname{var}[X] = \begin{cases}
301      *       \frac{v}{v - 2}  &amp; \text{for } v \gt 2 \\
302      *       \infty           &amp; \text{for } 1 \lt v \le 2 \\
303      *       \text{undefined} &amp; \text{otherwise}
304      *       \end{cases} \]
305      *
306      * @return the variance, or {@link Double#NaN NaN} if it is not defined.
307      */
308     @Override
309     public abstract double getVariance();
310 
311     /**
312      * {@inheritDoc}
313      *
314      * <p>The lower bound of the support is always negative infinity.
315      *
316      * @return {@linkplain Double#NEGATIVE_INFINITY negative infinity}.
317      */
318     @Override
319     public double getSupportLowerBound() {
320         return Double.NEGATIVE_INFINITY;
321     }
322 
323     /**
324      * {@inheritDoc}
325      *
326      * <p>The upper bound of the support is always positive infinity.
327      *
328      * @return {@linkplain Double#POSITIVE_INFINITY positive infinity}.
329      */
330     @Override
331     public double getSupportUpperBound() {
332         return Double.POSITIVE_INFINITY;
333     }
334 }