1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18 package org.apache.commons.statistics.distribution;
19
20 import org.apache.commons.numbers.gamma.LogBeta;
21 import org.apache.commons.numbers.gamma.RegularizedBeta;
22
23 /**
24 * Implementation of the F-distribution.
25 *
26 * <p>The probability density function of \( X \) is:
27 *
28 * <p>\[ \begin{aligned}
29 * f(x; n, m) &= \frac{1}{\operatorname{B}\left(\frac{n}{2},\frac{m}{2}\right)} \left(\frac{n}{m}\right)^{n/2} x^{n/2 - 1} \left(1+\frac{n}{m} \, x \right)^{-(n+m)/2} \\
30 * &= \frac{n^{\frac n 2} m^{\frac m 2} x^{\frac{n}{2}-1} }{ (nx+m)^{\frac{(n+m)}{2}} \operatorname{B}\left(\frac{n}{2},\frac{m}{2}\right)} \end{aligned} \]
31 *
32 * <p>for \( n, m > 0 \) the degrees of freedom, \( \operatorname{B}(a, b) \) is the beta function,
33 * and \( x \in [0, \infty) \).
34 *
35 * @see <a href="https://en.wikipedia.org/wiki/F-distribution">F-distribution (Wikipedia)</a>
36 * @see <a href="https://mathworld.wolfram.com/F-Distribution.html">F-distribution (MathWorld)</a>
37 */
38 public final class FDistribution extends AbstractContinuousDistribution {
39 /** Support lower bound. */
40 private static final double SUPPORT_LO = 0;
41 /** Support upper bound. */
42 private static final double SUPPORT_HI = Double.POSITIVE_INFINITY;
43 /** The minimum degrees of freedom for the denominator when computing the mean. */
44 private static final double MIN_DENOMINATOR_DF_FOR_MEAN = 2.0;
45 /** The minimum degrees of freedom for the denominator when computing the variance. */
46 private static final double MIN_DENOMINATOR_DF_FOR_VARIANCE = 4.0;
47
48 /** The numerator degrees of freedom. */
49 private final double numeratorDegreesOfFreedom;
50 /** The denominator degrees of freedom. */
51 private final double denominatorDegreesOfFreedom;
52 /** n/2 * log(n) + m/2 * log(m) with n = numerator DF and m = denominator DF. */
53 private final double nHalfLogNmHalfLogM;
54 /** LogBeta(n/2, n/2) with n = numerator DF. */
55 private final double logBetaNhalfMhalf;
56 /** Cached value for inverse probability function. */
57 private final double mean;
58 /** Cached value for inverse probability function. */
59 private final double variance;
60
61 /**
62 * @param numeratorDegreesOfFreedom Numerator degrees of freedom.
63 * @param denominatorDegreesOfFreedom Denominator degrees of freedom.
64 */
65 private FDistribution(double numeratorDegreesOfFreedom,
66 double denominatorDegreesOfFreedom) {
67 this.numeratorDegreesOfFreedom = numeratorDegreesOfFreedom;
68 this.denominatorDegreesOfFreedom = denominatorDegreesOfFreedom;
69 final double nhalf = numeratorDegreesOfFreedom / 2;
70 final double mhalf = denominatorDegreesOfFreedom / 2;
71 nHalfLogNmHalfLogM = nhalf * Math.log(numeratorDegreesOfFreedom) +
72 mhalf * Math.log(denominatorDegreesOfFreedom);
73 logBetaNhalfMhalf = LogBeta.value(nhalf, mhalf);
74
75 if (denominatorDegreesOfFreedom > MIN_DENOMINATOR_DF_FOR_MEAN) {
76 mean = denominatorDegreesOfFreedom / (denominatorDegreesOfFreedom - 2);
77 } else {
78 mean = Double.NaN;
79 }
80 if (denominatorDegreesOfFreedom > MIN_DENOMINATOR_DF_FOR_VARIANCE) {
81 final double denomDFMinusTwo = denominatorDegreesOfFreedom - 2;
82 variance = (2 * (denominatorDegreesOfFreedom * denominatorDegreesOfFreedom) *
83 (numeratorDegreesOfFreedom + denominatorDegreesOfFreedom - 2)) /
84 (numeratorDegreesOfFreedom * (denomDFMinusTwo * denomDFMinusTwo) *
85 (denominatorDegreesOfFreedom - 4));
86 } else {
87 variance = Double.NaN;
88 }
89 }
90
91 /**
92 * Creates an F-distribution.
93 *
94 * @param numeratorDegreesOfFreedom Numerator degrees of freedom.
95 * @param denominatorDegreesOfFreedom Denominator degrees of freedom.
96 * @return the distribution
97 * @throws IllegalArgumentException if {@code numeratorDegreesOfFreedom <= 0} or
98 * {@code denominatorDegreesOfFreedom <= 0}.
99 */
100 public static FDistribution of(double numeratorDegreesOfFreedom,
101 double denominatorDegreesOfFreedom) {
102 if (numeratorDegreesOfFreedom <= 0) {
103 throw new DistributionException(DistributionException.NOT_STRICTLY_POSITIVE,
104 numeratorDegreesOfFreedom);
105 }
106 if (denominatorDegreesOfFreedom <= 0) {
107 throw new DistributionException(DistributionException.NOT_STRICTLY_POSITIVE,
108 denominatorDegreesOfFreedom);
109 }
110 return new FDistribution(numeratorDegreesOfFreedom, denominatorDegreesOfFreedom);
111 }
112
113 /**
114 * Gets the numerator degrees of freedom parameter of this distribution.
115 *
116 * @return the numerator degrees of freedom.
117 */
118 public double getNumeratorDegreesOfFreedom() {
119 return numeratorDegreesOfFreedom;
120 }
121
122 /**
123 * Gets the denominator degrees of freedom parameter of this distribution.
124 *
125 * @return the denominator degrees of freedom.
126 */
127 public double getDenominatorDegreesOfFreedom() {
128 return denominatorDegreesOfFreedom;
129 }
130
131 /** {@inheritDoc}
132 *
133 * <p>Returns the limit when {@code x = 0}:
134 * <ul>
135 * <li>{@code df1 < 2}: Infinity
136 * <li>{@code df1 == 2}: 1
137 * <li>{@code df1 > 2}: 0
138 * </ul>
139 * <p>Where {@code df1} is the numerator degrees of freedom.
140 */
141 @Override
142 public double density(double x) {
143 if (x <= SUPPORT_LO ||
144 x >= SUPPORT_HI) {
145 // Special case x=0:
146 // PDF reduces to the term x^(df1 / 2 - 1) multiplied by a scaling factor
147 if (x == SUPPORT_LO && numeratorDegreesOfFreedom <= 2) {
148 return numeratorDegreesOfFreedom == 2 ?
149 1 :
150 Double.POSITIVE_INFINITY;
151 }
152 return 0;
153 }
154 return computeDensity(x, false);
155 }
156
157 /** {@inheritDoc}
158 *
159 * <p>Returns the limit when {@code x = 0}:
160 * <ul>
161 * <li>{@code df1 < 2}: Infinity
162 * <li>{@code df1 == 2}: 0
163 * <li>{@code df1 > 2}: -Infinity
164 * </ul>
165 * <p>Where {@code df1} is the numerator degrees of freedom.
166 */
167 @Override
168 public double logDensity(double x) {
169 if (x <= SUPPORT_LO ||
170 x >= SUPPORT_HI) {
171 // Special case x=0:
172 // PDF reduces to the term x^(df1 / 2 - 1) multiplied by a scaling factor
173 if (x == SUPPORT_LO && numeratorDegreesOfFreedom <= 2) {
174 return numeratorDegreesOfFreedom == 2 ?
175 0 :
176 Double.POSITIVE_INFINITY;
177 }
178 return Double.NEGATIVE_INFINITY;
179 }
180 return computeDensity(x, true);
181 }
182
183 /**
184 * Compute the density at point x. Assumes x is within the support bound.
185 *
186 * @param x Value
187 * @param log true to compute the log density
188 * @return pdf(x) or logpdf(x)
189 */
190 private double computeDensity(double x, boolean log) {
191 // The log computation may suffer cancellation effects due to summation of large
192 // opposing terms. Use it when the standard PDF result is not normal.
193
194 // Keep the z argument to the regularized beta well away from 1 to avoid rounding error.
195 // See: https://www.boost.org/doc/libs/1_78_0/libs/math/doc/html/math_toolkit/dist_ref/dists/f_dist.html
196
197 final double n = numeratorDegreesOfFreedom;
198 final double m = denominatorDegreesOfFreedom;
199 final double nx = n * x;
200 final double z = m + nx;
201 final double y = n * m / (z * z);
202 double p;
203 if (nx > m) {
204 p = y * RegularizedBeta.derivative(m / z,
205 m / 2, n / 2);
206 } else {
207 p = y * RegularizedBeta.derivative(nx / z,
208 n / 2, m / 2);
209 }
210 // RegularizedBeta.derivative can have intermediate overflow before normalisation
211 // with small y. Check the result for a normal finite number.
212 if (p <= Double.MAX_VALUE && p >= Double.MIN_NORMAL) {
213 return log ? Math.log(p) : p;
214 }
215 // Fall back to the log computation
216 p = nHalfLogNmHalfLogM + (n / 2 - 1) * Math.log(x) - logBetaNhalfMhalf -
217 ((n + m) / 2) * Math.log(z);
218 return log ? p : Math.exp(p);
219 }
220
221 /** {@inheritDoc} */
222 @Override
223 public double cumulativeProbability(double x) {
224 if (x <= SUPPORT_LO) {
225 return 0;
226 } else if (x >= SUPPORT_HI) {
227 return 1;
228 }
229
230 final double n = numeratorDegreesOfFreedom;
231 final double m = denominatorDegreesOfFreedom;
232
233 // Keep the z argument to the regularized beta well away from 1 to avoid rounding error.
234 // See: https://www.boost.org/doc/libs/1_78_0/libs/math/doc/html/math_toolkit/dist_ref/dists/f_dist.html
235 // Note the logic in the Boost documentation for pdf and cdf is contradictory.
236 // This order will keep the argument far from 1.
237
238 final double nx = n * x;
239 if (nx > m) {
240 return RegularizedBeta.complement(m / (m + nx),
241 m / 2, n / 2);
242 }
243 return RegularizedBeta.value(nx / (m + nx),
244 n / 2, m / 2);
245 }
246
247 /** {@inheritDoc} */
248 @Override
249 public double survivalProbability(double x) {
250 if (x <= SUPPORT_LO) {
251 return 1;
252 } else if (x >= SUPPORT_HI) {
253 return 0;
254 }
255
256 final double n = numeratorDegreesOfFreedom;
257 final double m = denominatorDegreesOfFreedom;
258
259 // Do the opposite of the CDF
260
261 final double nx = n * x;
262 if (nx > m) {
263 return RegularizedBeta.value(m / (m + nx),
264 m / 2, n / 2);
265 }
266 return RegularizedBeta.complement(nx / (m + nx),
267 n / 2, m / 2);
268 }
269
270 /**
271 * {@inheritDoc}
272 *
273 * <p>For denominator degrees of freedom parameter \( m \), the mean is:
274 *
275 * <p>\[ \mathbb{E}[X] = \begin{cases}
276 * \frac{m}{m-2} & \text{for } m \gt 2 \\
277 * \text{undefined} & \text{otherwise}
278 * \end{cases} \]
279 *
280 * @return the mean, or {@link Double#NaN NaN} if it is not defined.
281 */
282 @Override
283 public double getMean() {
284 return mean;
285 }
286
287 /**
288 * {@inheritDoc}
289 *
290 * <p>For numerator degrees of freedom parameter \( n \) and denominator
291 * degrees of freedom parameter \( m \), the variance is:
292 *
293 * <p>\[ \operatorname{var}[X] = \begin{cases}
294 * \frac{2m^2 (n+m-2)}{n (m-2)^2 (m-4)} & \text{for } m \gt 4 \\
295 * \text{undefined} & \text{otherwise}
296 * \end{cases} \]
297 *
298 * @return the variance, or {@link Double#NaN NaN} if it is not defined.
299 */
300 @Override
301 public double getVariance() {
302 return variance;
303 }
304
305 /**
306 * {@inheritDoc}
307 *
308 * <p>The lower bound of the support is always 0.
309 *
310 * @return 0.
311 */
312 @Override
313 public double getSupportLowerBound() {
314 return SUPPORT_LO;
315 }
316
317 /**
318 * {@inheritDoc}
319 *
320 * <p>The upper bound of the support is always positive infinity.
321 *
322 * @return {@linkplain Double#POSITIVE_INFINITY positive infinity}.
323 */
324 @Override
325 public double getSupportUpperBound() {
326 return SUPPORT_HI;
327 }
328 }