FDistribution.java

  1. /*
  2.  * Licensed to the Apache Software Foundation (ASF) under one or more
  3.  * contributor license agreements.  See the NOTICE file distributed with
  4.  * this work for additional information regarding copyright ownership.
  5.  * The ASF licenses this file to You under the Apache License, Version 2.0
  6.  * (the "License"); you may not use this file except in compliance with
  7.  * the License.  You may obtain a copy of the License at
  8.  *
  9.  *      http://www.apache.org/licenses/LICENSE-2.0
  10.  *
  11.  * Unless required by applicable law or agreed to in writing, software
  12.  * distributed under the License is distributed on an "AS IS" BASIS,
  13.  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14.  * See the License for the specific language governing permissions and
  15.  * limitations under the License.
  16.  */

  17. package org.apache.commons.statistics.distribution;

  18. import org.apache.commons.numbers.gamma.LogBeta;
  19. import org.apache.commons.numbers.gamma.RegularizedBeta;

  20. /**
  21.  * Implementation of the F-distribution.
  22.  *
  23.  * <p>The probability density function of \( X \) is:
  24.  *
  25.  * <p>\[ \begin{aligned}
  26.  *       f(x; n, m) &amp;= \frac{1}{\operatorname{B}\left(\frac{n}{2},\frac{m}{2}\right)} \left(\frac{n}{m}\right)^{n/2} x^{n/2 - 1} \left(1+\frac{n}{m} \, x \right)^{-(n+m)/2} \\
  27.  *                  &amp;= \frac{n^{\frac n 2} m^{\frac m 2} x^{\frac{n}{2}-1} }{ (nx+m)^{\frac{(n+m)}{2}} \operatorname{B}\left(\frac{n}{2},\frac{m}{2}\right)}   \end{aligned} \]
  28.  *
  29.  * <p>for \( n, m &gt; 0 \) the degrees of freedom, \( \operatorname{B}(a, b) \) is the beta function,
  30.  * and \( x \in [0, \infty) \).
  31.  *
  32.  * @see <a href="https://en.wikipedia.org/wiki/F-distribution">F-distribution (Wikipedia)</a>
  33.  * @see <a href="https://mathworld.wolfram.com/F-Distribution.html">F-distribution (MathWorld)</a>
  34.  */
  35. public final class FDistribution extends AbstractContinuousDistribution {
  36.     /** Support lower bound. */
  37.     private static final double SUPPORT_LO = 0;
  38.     /** Support upper bound. */
  39.     private static final double SUPPORT_HI = Double.POSITIVE_INFINITY;
  40.     /** The minimum degrees of freedom for the denominator when computing the mean. */
  41.     private static final double MIN_DENOMINATOR_DF_FOR_MEAN = 2.0;
  42.     /** The minimum degrees of freedom for the denominator when computing the variance. */
  43.     private static final double MIN_DENOMINATOR_DF_FOR_VARIANCE = 4.0;

  44.     /** The numerator degrees of freedom. */
  45.     private final double numeratorDegreesOfFreedom;
  46.     /** The denominator degrees of freedom. */
  47.     private final double denominatorDegreesOfFreedom;
  48.     /** n/2 * log(n) + m/2 * log(m) with n = numerator DF and m = denominator DF. */
  49.     private final double nHalfLogNmHalfLogM;
  50.     /** LogBeta(n/2, n/2) with n = numerator DF. */
  51.     private final double logBetaNhalfMhalf;
  52.     /** Cached value for inverse probability function. */
  53.     private final double mean;
  54.     /** Cached value for inverse probability function. */
  55.     private final double variance;

  56.     /**
  57.      * @param numeratorDegreesOfFreedom Numerator degrees of freedom.
  58.      * @param denominatorDegreesOfFreedom Denominator degrees of freedom.
  59.      */
  60.     private FDistribution(double numeratorDegreesOfFreedom,
  61.                           double denominatorDegreesOfFreedom) {
  62.         this.numeratorDegreesOfFreedom = numeratorDegreesOfFreedom;
  63.         this.denominatorDegreesOfFreedom = denominatorDegreesOfFreedom;
  64.         final double nhalf = numeratorDegreesOfFreedom / 2;
  65.         final double mhalf = denominatorDegreesOfFreedom / 2;
  66.         nHalfLogNmHalfLogM = nhalf * Math.log(numeratorDegreesOfFreedom) +
  67.                              mhalf * Math.log(denominatorDegreesOfFreedom);
  68.         logBetaNhalfMhalf = LogBeta.value(nhalf, mhalf);

  69.         if (denominatorDegreesOfFreedom > MIN_DENOMINATOR_DF_FOR_MEAN) {
  70.             mean = denominatorDegreesOfFreedom / (denominatorDegreesOfFreedom - 2);
  71.         } else {
  72.             mean = Double.NaN;
  73.         }
  74.         if (denominatorDegreesOfFreedom > MIN_DENOMINATOR_DF_FOR_VARIANCE) {
  75.             final double denomDFMinusTwo = denominatorDegreesOfFreedom - 2;
  76.             variance = (2 * (denominatorDegreesOfFreedom * denominatorDegreesOfFreedom) *
  77.                             (numeratorDegreesOfFreedom + denominatorDegreesOfFreedom - 2)) /
  78.                        (numeratorDegreesOfFreedom * (denomDFMinusTwo * denomDFMinusTwo) *
  79.                             (denominatorDegreesOfFreedom - 4));
  80.         } else {
  81.             variance = Double.NaN;
  82.         }
  83.     }

  84.     /**
  85.      * Creates an F-distribution.
  86.      *
  87.      * @param numeratorDegreesOfFreedom Numerator degrees of freedom.
  88.      * @param denominatorDegreesOfFreedom Denominator degrees of freedom.
  89.      * @return the distribution
  90.      * @throws IllegalArgumentException if {@code numeratorDegreesOfFreedom <= 0} or
  91.      * {@code denominatorDegreesOfFreedom <= 0}.
  92.      */
  93.     public static FDistribution of(double numeratorDegreesOfFreedom,
  94.                                    double denominatorDegreesOfFreedom) {
  95.         if (numeratorDegreesOfFreedom <= 0) {
  96.             throw new DistributionException(DistributionException.NOT_STRICTLY_POSITIVE,
  97.                                             numeratorDegreesOfFreedom);
  98.         }
  99.         if (denominatorDegreesOfFreedom <= 0) {
  100.             throw new DistributionException(DistributionException.NOT_STRICTLY_POSITIVE,
  101.                                             denominatorDegreesOfFreedom);
  102.         }
  103.         return new FDistribution(numeratorDegreesOfFreedom, denominatorDegreesOfFreedom);
  104.     }

  105.     /**
  106.      * Gets the numerator degrees of freedom parameter of this distribution.
  107.      *
  108.      * @return the numerator degrees of freedom.
  109.      */
  110.     public double getNumeratorDegreesOfFreedom() {
  111.         return numeratorDegreesOfFreedom;
  112.     }

  113.     /**
  114.      * Gets the denominator degrees of freedom parameter of this distribution.
  115.      *
  116.      * @return the denominator degrees of freedom.
  117.      */
  118.     public double getDenominatorDegreesOfFreedom() {
  119.         return denominatorDegreesOfFreedom;
  120.     }

  121.     /** {@inheritDoc}
  122.      *
  123.      * <p>Returns the limit when {@code x = 0}:
  124.      * <ul>
  125.      * <li>{@code df1 < 2}: Infinity
  126.      * <li>{@code df1 == 2}: 1
  127.      * <li>{@code df1 > 2}: 0
  128.      * </ul>
  129.      * <p>Where {@code df1} is the numerator degrees of freedom.
  130.      */
  131.     @Override
  132.     public double density(double x) {
  133.         if (x <= SUPPORT_LO ||
  134.             x >= SUPPORT_HI) {
  135.             // Special case x=0:
  136.             // PDF reduces to the term x^(df1 / 2 - 1) multiplied by a scaling factor
  137.             if (x == SUPPORT_LO && numeratorDegreesOfFreedom <= 2) {
  138.                 return numeratorDegreesOfFreedom == 2 ?
  139.                     1 :
  140.                     Double.POSITIVE_INFINITY;
  141.             }
  142.             return 0;
  143.         }
  144.         return computeDensity(x, false);
  145.     }

  146.     /** {@inheritDoc}
  147.      *
  148.      * <p>Returns the limit when {@code x = 0}:
  149.      * <ul>
  150.      * <li>{@code df1 < 2}: Infinity
  151.      * <li>{@code df1 == 2}: 0
  152.      * <li>{@code df1 > 2}: -Infinity
  153.      * </ul>
  154.      * <p>Where {@code df1} is the numerator degrees of freedom.
  155.      */
  156.     @Override
  157.     public double logDensity(double x) {
  158.         if (x <= SUPPORT_LO ||
  159.             x >= SUPPORT_HI) {
  160.             // Special case x=0:
  161.             // PDF reduces to the term x^(df1 / 2 - 1) multiplied by a scaling factor
  162.             if (x == SUPPORT_LO && numeratorDegreesOfFreedom <= 2) {
  163.                 return numeratorDegreesOfFreedom == 2 ?
  164.                     0 :
  165.                     Double.POSITIVE_INFINITY;
  166.             }
  167.             return Double.NEGATIVE_INFINITY;
  168.         }
  169.         return computeDensity(x, true);
  170.     }

  171.     /**
  172.      * Compute the density at point x. Assumes x is within the support bound.
  173.      *
  174.      * @param x Value
  175.      * @param log true to compute the log density
  176.      * @return pdf(x) or logpdf(x)
  177.      */
  178.     private double computeDensity(double x, boolean log) {
  179.         // The log computation may suffer cancellation effects due to summation of large
  180.         // opposing terms. Use it when the standard PDF result is not normal.

  181.         // Keep the z argument to the regularized beta well away from 1 to avoid rounding error.
  182.         // See: https://www.boost.org/doc/libs/1_78_0/libs/math/doc/html/math_toolkit/dist_ref/dists/f_dist.html

  183.         final double n = numeratorDegreesOfFreedom;
  184.         final double m = denominatorDegreesOfFreedom;
  185.         final double nx = n * x;
  186.         final double z = m + nx;
  187.         final double y = n * m / (z * z);
  188.         double p;
  189.         if (nx > m) {
  190.             p = y * RegularizedBeta.derivative(m / z,
  191.                                                m / 2, n / 2);
  192.         } else {
  193.             p = y * RegularizedBeta.derivative(nx / z,
  194.                                                n / 2, m / 2);
  195.         }
  196.         // RegularizedBeta.derivative can have intermediate overflow before normalisation
  197.         // with small y. Check the result for a normal finite number.
  198.         if (p <= Double.MAX_VALUE && p >= Double.MIN_NORMAL) {
  199.             return log ? Math.log(p) : p;
  200.         }
  201.         // Fall back to the log computation
  202.         p = nHalfLogNmHalfLogM + (n / 2 - 1) * Math.log(x) - logBetaNhalfMhalf -
  203.                 ((n + m) / 2) * Math.log(z);
  204.         return log ? p : Math.exp(p);
  205.     }

  206.     /** {@inheritDoc} */
  207.     @Override
  208.     public double cumulativeProbability(double x)  {
  209.         if (x <= SUPPORT_LO) {
  210.             return 0;
  211.         } else if (x >= SUPPORT_HI) {
  212.             return 1;
  213.         }

  214.         final double n = numeratorDegreesOfFreedom;
  215.         final double m = denominatorDegreesOfFreedom;

  216.         // Keep the z argument to the regularized beta well away from 1 to avoid rounding error.
  217.         // See: https://www.boost.org/doc/libs/1_78_0/libs/math/doc/html/math_toolkit/dist_ref/dists/f_dist.html
  218.         // Note the logic in the Boost documentation for pdf and cdf is contradictory.
  219.         // This order will keep the argument far from 1.

  220.         final double nx = n * x;
  221.         if (nx > m) {
  222.             return RegularizedBeta.complement(m / (m + nx),
  223.                                               m / 2, n / 2);
  224.         }
  225.         return RegularizedBeta.value(nx / (m + nx),
  226.                                      n / 2, m / 2);
  227.     }

  228.     /** {@inheritDoc} */
  229.     @Override
  230.     public double survivalProbability(double x)  {
  231.         if (x <= SUPPORT_LO) {
  232.             return 1;
  233.         } else if (x >= SUPPORT_HI) {
  234.             return 0;
  235.         }

  236.         final double n = numeratorDegreesOfFreedom;
  237.         final double m = denominatorDegreesOfFreedom;

  238.         // Do the opposite of the CDF

  239.         final double nx = n * x;
  240.         if (nx > m) {
  241.             return RegularizedBeta.value(m / (m + nx),
  242.                                          m / 2, n / 2);
  243.         }
  244.         return RegularizedBeta.complement(nx / (m + nx),
  245.                                           n / 2, m / 2);
  246.     }

  247.     /**
  248.      * {@inheritDoc}
  249.      *
  250.      * <p>For denominator degrees of freedom parameter \( m \), the mean is:
  251.      *
  252.      * <p>\[ \mathbb{E}[X] = \begin{cases}
  253.      *       \frac{m}{m-2}    &amp; \text{for } m \gt 2 \\
  254.      *       \text{undefined} &amp; \text{otherwise}
  255.      *       \end{cases} \]
  256.      *
  257.      * @return the mean, or {@link Double#NaN NaN} if it is not defined.
  258.      */
  259.     @Override
  260.     public double getMean() {
  261.         return mean;
  262.     }

  263.     /**
  264.      * {@inheritDoc}
  265.      *
  266.      * <p>For numerator degrees of freedom parameter \( n \) and denominator
  267.      * degrees of freedom parameter \( m \), the variance is:
  268.      *
  269.      * <p>\[ \operatorname{var}[X] = \begin{cases}
  270.      *       \frac{2m^2 (n+m-2)}{n (m-2)^2 (m-4)} &amp; \text{for } m \gt 4 \\
  271.      *       \text{undefined}                     &amp; \text{otherwise}
  272.      *       \end{cases} \]
  273.      *
  274.      * @return the variance, or {@link Double#NaN NaN} if it is not defined.
  275.      */
  276.     @Override
  277.     public double getVariance() {
  278.         return variance;
  279.     }

  280.     /**
  281.      * {@inheritDoc}
  282.      *
  283.      * <p>The lower bound of the support is always 0.
  284.      *
  285.      * @return 0.
  286.      */
  287.     @Override
  288.     public double getSupportLowerBound() {
  289.         return SUPPORT_LO;
  290.     }

  291.     /**
  292.      * {@inheritDoc}
  293.      *
  294.      * <p>The upper bound of the support is always positive infinity.
  295.      *
  296.      * @return {@linkplain Double#POSITIVE_INFINITY positive infinity}.
  297.      */
  298.     @Override
  299.     public double getSupportUpperBound() {
  300.         return SUPPORT_HI;
  301.     }
  302. }