View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  
18  package org.apache.commons.statistics.distribution;
19  
20  import org.apache.commons.numbers.gamma.LogBeta;
21  import org.apache.commons.numbers.gamma.RegularizedBeta;
22  
23  /**
24   * Implementation of the F-distribution.
25   *
26   * <p>The probability density function of \( X \) is:
27   *
28   * <p>\[ \begin{aligned}
29   *       f(x; n, m) &amp;= \frac{1}{\operatorname{B}\left(\frac{n}{2},\frac{m}{2}\right)} \left(\frac{n}{m}\right)^{n/2} x^{n/2 - 1} \left(1+\frac{n}{m} \, x \right)^{-(n+m)/2} \\
30   *                  &amp;= \frac{n^{\frac n 2} m^{\frac m 2} x^{\frac{n}{2}-1} }{ (nx+m)^{\frac{(n+m)}{2}} \operatorname{B}\left(\frac{n}{2},\frac{m}{2}\right)}   \end{aligned} \]
31   *
32   * <p>for \( n, m &gt; 0 \) the degrees of freedom, \( \operatorname{B}(a, b) \) is the beta function,
33   * and \( x \in [0, \infty) \).
34   *
35   * @see <a href="https://en.wikipedia.org/wiki/F-distribution">F-distribution (Wikipedia)</a>
36   * @see <a href="https://mathworld.wolfram.com/F-Distribution.html">F-distribution (MathWorld)</a>
37   */
38  public final class FDistribution extends AbstractContinuousDistribution {
39      /** Support lower bound. */
40      private static final double SUPPORT_LO = 0;
41      /** Support upper bound. */
42      private static final double SUPPORT_HI = Double.POSITIVE_INFINITY;
43      /** The minimum degrees of freedom for the denominator when computing the mean. */
44      private static final double MIN_DENOMINATOR_DF_FOR_MEAN = 2.0;
45      /** The minimum degrees of freedom for the denominator when computing the variance. */
46      private static final double MIN_DENOMINATOR_DF_FOR_VARIANCE = 4.0;
47  
48      /** The numerator degrees of freedom. */
49      private final double numeratorDegreesOfFreedom;
50      /** The denominator degrees of freedom. */
51      private final double denominatorDegreesOfFreedom;
52      /** n/2 * log(n) + m/2 * log(m) with n = numerator DF and m = denominator DF. */
53      private final double nHalfLogNmHalfLogM;
54      /** LogBeta(n/2, n/2) with n = numerator DF. */
55      private final double logBetaNhalfMhalf;
56      /** Cached value for inverse probability function. */
57      private final double mean;
58      /** Cached value for inverse probability function. */
59      private final double variance;
60  
61      /**
62       * @param numeratorDegreesOfFreedom Numerator degrees of freedom.
63       * @param denominatorDegreesOfFreedom Denominator degrees of freedom.
64       */
65      private FDistribution(double numeratorDegreesOfFreedom,
66                            double denominatorDegreesOfFreedom) {
67          this.numeratorDegreesOfFreedom = numeratorDegreesOfFreedom;
68          this.denominatorDegreesOfFreedom = denominatorDegreesOfFreedom;
69          final double nhalf = numeratorDegreesOfFreedom / 2;
70          final double mhalf = denominatorDegreesOfFreedom / 2;
71          nHalfLogNmHalfLogM = nhalf * Math.log(numeratorDegreesOfFreedom) +
72                               mhalf * Math.log(denominatorDegreesOfFreedom);
73          logBetaNhalfMhalf = LogBeta.value(nhalf, mhalf);
74  
75          if (denominatorDegreesOfFreedom > MIN_DENOMINATOR_DF_FOR_MEAN) {
76              mean = denominatorDegreesOfFreedom / (denominatorDegreesOfFreedom - 2);
77          } else {
78              mean = Double.NaN;
79          }
80          if (denominatorDegreesOfFreedom > MIN_DENOMINATOR_DF_FOR_VARIANCE) {
81              final double denomDFMinusTwo = denominatorDegreesOfFreedom - 2;
82              variance = (2 * (denominatorDegreesOfFreedom * denominatorDegreesOfFreedom) *
83                              (numeratorDegreesOfFreedom + denominatorDegreesOfFreedom - 2)) /
84                         (numeratorDegreesOfFreedom * (denomDFMinusTwo * denomDFMinusTwo) *
85                              (denominatorDegreesOfFreedom - 4));
86          } else {
87              variance = Double.NaN;
88          }
89      }
90  
91      /**
92       * Creates an F-distribution.
93       *
94       * @param numeratorDegreesOfFreedom Numerator degrees of freedom.
95       * @param denominatorDegreesOfFreedom Denominator degrees of freedom.
96       * @return the distribution
97       * @throws IllegalArgumentException if {@code numeratorDegreesOfFreedom <= 0} or
98       * {@code denominatorDegreesOfFreedom <= 0}.
99       */
100     public static FDistribution of(double numeratorDegreesOfFreedom,
101                                    double denominatorDegreesOfFreedom) {
102         if (numeratorDegreesOfFreedom <= 0) {
103             throw new DistributionException(DistributionException.NOT_STRICTLY_POSITIVE,
104                                             numeratorDegreesOfFreedom);
105         }
106         if (denominatorDegreesOfFreedom <= 0) {
107             throw new DistributionException(DistributionException.NOT_STRICTLY_POSITIVE,
108                                             denominatorDegreesOfFreedom);
109         }
110         return new FDistribution(numeratorDegreesOfFreedom, denominatorDegreesOfFreedom);
111     }
112 
113     /**
114      * Gets the numerator degrees of freedom parameter of this distribution.
115      *
116      * @return the numerator degrees of freedom.
117      */
118     public double getNumeratorDegreesOfFreedom() {
119         return numeratorDegreesOfFreedom;
120     }
121 
122     /**
123      * Gets the denominator degrees of freedom parameter of this distribution.
124      *
125      * @return the denominator degrees of freedom.
126      */
127     public double getDenominatorDegreesOfFreedom() {
128         return denominatorDegreesOfFreedom;
129     }
130 
131     /** {@inheritDoc}
132      *
133      * <p>Returns the limit when {@code x = 0}:
134      * <ul>
135      * <li>{@code df1 < 2}: Infinity
136      * <li>{@code df1 == 2}: 1
137      * <li>{@code df1 > 2}: 0
138      * </ul>
139      * <p>Where {@code df1} is the numerator degrees of freedom.
140      */
141     @Override
142     public double density(double x) {
143         if (x <= SUPPORT_LO ||
144             x >= SUPPORT_HI) {
145             // Special case x=0:
146             // PDF reduces to the term x^(df1 / 2 - 1) multiplied by a scaling factor
147             if (x == SUPPORT_LO && numeratorDegreesOfFreedom <= 2) {
148                 return numeratorDegreesOfFreedom == 2 ?
149                     1 :
150                     Double.POSITIVE_INFINITY;
151             }
152             return 0;
153         }
154         return computeDensity(x, false);
155     }
156 
157     /** {@inheritDoc}
158      *
159      * <p>Returns the limit when {@code x = 0}:
160      * <ul>
161      * <li>{@code df1 < 2}: Infinity
162      * <li>{@code df1 == 2}: 0
163      * <li>{@code df1 > 2}: -Infinity
164      * </ul>
165      * <p>Where {@code df1} is the numerator degrees of freedom.
166      */
167     @Override
168     public double logDensity(double x) {
169         if (x <= SUPPORT_LO ||
170             x >= SUPPORT_HI) {
171             // Special case x=0:
172             // PDF reduces to the term x^(df1 / 2 - 1) multiplied by a scaling factor
173             if (x == SUPPORT_LO && numeratorDegreesOfFreedom <= 2) {
174                 return numeratorDegreesOfFreedom == 2 ?
175                     0 :
176                     Double.POSITIVE_INFINITY;
177             }
178             return Double.NEGATIVE_INFINITY;
179         }
180         return computeDensity(x, true);
181     }
182 
183     /**
184      * Compute the density at point x. Assumes x is within the support bound.
185      *
186      * @param x Value
187      * @param log true to compute the log density
188      * @return pdf(x) or logpdf(x)
189      */
190     private double computeDensity(double x, boolean log) {
191         // The log computation may suffer cancellation effects due to summation of large
192         // opposing terms. Use it when the standard PDF result is not normal.
193 
194         // Keep the z argument to the regularized beta well away from 1 to avoid rounding error.
195         // See: https://www.boost.org/doc/libs/1_78_0/libs/math/doc/html/math_toolkit/dist_ref/dists/f_dist.html
196 
197         final double n = numeratorDegreesOfFreedom;
198         final double m = denominatorDegreesOfFreedom;
199         final double nx = n * x;
200         final double z = m + nx;
201         final double y = n * m / (z * z);
202         double p;
203         if (nx > m) {
204             p = y * RegularizedBeta.derivative(m / z,
205                                                m / 2, n / 2);
206         } else {
207             p = y * RegularizedBeta.derivative(nx / z,
208                                                n / 2, m / 2);
209         }
210         // RegularizedBeta.derivative can have intermediate overflow before normalisation
211         // with small y. Check the result for a normal finite number.
212         if (p <= Double.MAX_VALUE && p >= Double.MIN_NORMAL) {
213             return log ? Math.log(p) : p;
214         }
215         // Fall back to the log computation
216         p = nHalfLogNmHalfLogM + (n / 2 - 1) * Math.log(x) - logBetaNhalfMhalf -
217                 ((n + m) / 2) * Math.log(z);
218         return log ? p : Math.exp(p);
219     }
220 
221     /** {@inheritDoc} */
222     @Override
223     public double cumulativeProbability(double x)  {
224         if (x <= SUPPORT_LO) {
225             return 0;
226         } else if (x >= SUPPORT_HI) {
227             return 1;
228         }
229 
230         final double n = numeratorDegreesOfFreedom;
231         final double m = denominatorDegreesOfFreedom;
232 
233         // Keep the z argument to the regularized beta well away from 1 to avoid rounding error.
234         // See: https://www.boost.org/doc/libs/1_78_0/libs/math/doc/html/math_toolkit/dist_ref/dists/f_dist.html
235         // Note the logic in the Boost documentation for pdf and cdf is contradictory.
236         // This order will keep the argument far from 1.
237 
238         final double nx = n * x;
239         if (nx > m) {
240             return RegularizedBeta.complement(m / (m + nx),
241                                               m / 2, n / 2);
242         }
243         return RegularizedBeta.value(nx / (m + nx),
244                                      n / 2, m / 2);
245     }
246 
247     /** {@inheritDoc} */
248     @Override
249     public double survivalProbability(double x)  {
250         if (x <= SUPPORT_LO) {
251             return 1;
252         } else if (x >= SUPPORT_HI) {
253             return 0;
254         }
255 
256         final double n = numeratorDegreesOfFreedom;
257         final double m = denominatorDegreesOfFreedom;
258 
259         // Do the opposite of the CDF
260 
261         final double nx = n * x;
262         if (nx > m) {
263             return RegularizedBeta.value(m / (m + nx),
264                                          m / 2, n / 2);
265         }
266         return RegularizedBeta.complement(nx / (m + nx),
267                                           n / 2, m / 2);
268     }
269 
270     /**
271      * {@inheritDoc}
272      *
273      * <p>For denominator degrees of freedom parameter \( m \), the mean is:
274      *
275      * <p>\[ \mathbb{E}[X] = \begin{cases}
276      *       \frac{m}{m-2}    &amp; \text{for } m \gt 2 \\
277      *       \text{undefined} &amp; \text{otherwise}
278      *       \end{cases} \]
279      *
280      * @return the mean, or {@link Double#NaN NaN} if it is not defined.
281      */
282     @Override
283     public double getMean() {
284         return mean;
285     }
286 
287     /**
288      * {@inheritDoc}
289      *
290      * <p>For numerator degrees of freedom parameter \( n \) and denominator
291      * degrees of freedom parameter \( m \), the variance is:
292      *
293      * <p>\[ \operatorname{var}[X] = \begin{cases}
294      *       \frac{2m^2 (n+m-2)}{n (m-2)^2 (m-4)} &amp; \text{for } m \gt 4 \\
295      *       \text{undefined}                     &amp; \text{otherwise}
296      *       \end{cases} \]
297      *
298      * @return the variance, or {@link Double#NaN NaN} if it is not defined.
299      */
300     @Override
301     public double getVariance() {
302         return variance;
303     }
304 
305     /**
306      * {@inheritDoc}
307      *
308      * <p>The lower bound of the support is always 0.
309      *
310      * @return 0.
311      */
312     @Override
313     public double getSupportLowerBound() {
314         return SUPPORT_LO;
315     }
316 
317     /**
318      * {@inheritDoc}
319      *
320      * <p>The upper bound of the support is always positive infinity.
321      *
322      * @return {@linkplain Double#POSITIVE_INFINITY positive infinity}.
323      */
324     @Override
325     public double getSupportUpperBound() {
326         return SUPPORT_HI;
327     }
328 }