1 /* 2 * Licensed to the Apache Software Foundation (ASF) under one or more 3 * contributor license agreements. See the NOTICE file distributed with 4 * this work for additional information regarding copyright ownership. 5 * The ASF licenses this file to You under the Apache License, Version 2.0 6 * (the "License"); you may not use this file except in compliance with 7 * the License. You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 */ 17 package org.apache.commons.statistics.distribution; 18 19 import java.util.function.DoubleBinaryOperator; 20 import java.util.function.DoubleUnaryOperator; 21 import org.apache.commons.numbers.rootfinder.BrentSolver; 22 import org.apache.commons.rng.UniformRandomProvider; 23 import org.apache.commons.rng.sampling.distribution.InverseTransformContinuousSampler; 24 25 /** 26 * Base class for probability distributions on the reals. 27 * Default implementations are provided for some of the methods 28 * that do not vary from distribution to distribution. 29 * 30 * <p>This base class provides a default factory method for creating 31 * a {@linkplain ContinuousDistribution.Sampler sampler instance} that uses the 32 * <a href="https://en.wikipedia.org/wiki/Inverse_transform_sampling"> 33 * inversion method</a> for generating random samples that follow the 34 * distribution. 35 * 36 * <p>The class provides functionality to evaluate the probability in a range 37 * using either the cumulative probability or the survival probability. 38 * The survival probability is used if both arguments to 39 * {@link #probability(double, double)} are above the median. 40 * Child classes with a known median can override the default {@link #getMedian()} 41 * method. 42 */ 43 abstract class AbstractContinuousDistribution 44 implements ContinuousDistribution { 45 46 // Notes on the inverse probability implementation: 47 // 48 // The Brent solver does not allow a stopping criteria for the proximity 49 // to the root; it uses equality to zero within 1 ULP. The search is 50 // iterated until there is a small difference between the upper 51 // and lower bracket of the root, expressed as a combination of relative 52 // and absolute thresholds. 53 54 /** BrentSolver relative accuracy. 55 * This is used with {@code tol = 2 * relEps * abs(b) + absEps} so the minimum 56 * non-zero value with an effect is half of machine epsilon (2^-53). */ 57 private static final double SOLVER_RELATIVE_ACCURACY = 0x1.0p-53; 58 /** BrentSolver absolute accuracy. 59 * This is used with {@code tol = 2 * relEps * abs(b) + absEps} so set to MIN_VALUE 60 * so that when the relative epsilon has no effect (as b is too small) the tolerance 61 * is at least 1 ULP for sub-normal numbers. */ 62 private static final double SOLVER_ABSOLUTE_ACCURACY = Double.MIN_VALUE; 63 /** BrentSolver function value accuracy. 64 * Determines if the Brent solver performs a search. It is not used during the search. 65 * Set to a very low value to search using Brent's method unless 66 * the starting point is correct, or within 1 ULP for sub-normal probabilities. */ 67 private static final double SOLVER_FUNCTION_VALUE_ACCURACY = Double.MIN_VALUE; 68 69 /** Cached value of the median. */ 70 private double median = Double.NaN; 71 72 /** 73 * Gets the median. This is used to determine if the arguments to the 74 * {@link #probability(double, double)} function are in the upper or lower domain. 75 * 76 * <p>The default implementation calls {@link #inverseCumulativeProbability(double)} 77 * with a value of 0.5. 78 * 79 * @return the median 80 */ 81 double getMedian() { 82 double m = median; 83 if (Double.isNaN(m)) { 84 median = m = inverseCumulativeProbability(0.5); 85 } 86 return m; 87 } 88 89 /** {@inheritDoc} */ 90 @Override 91 public double probability(double x0, 92 double x1) { 93 if (x0 > x1) { 94 throw new DistributionException(DistributionException.INVALID_RANGE_LOW_GT_HIGH, x0, x1); 95 } 96 // Use the survival probability when in the upper domain [3]: 97 // 98 // lower median upper 99 // | | | 100 // 1. |------| 101 // x0 x1 102 // 2. |----------| 103 // x0 x1 104 // 3. |--------| 105 // x0 x1 106 107 final double m = getMedian(); 108 if (x0 >= m) { 109 return survivalProbability(x0) - survivalProbability(x1); 110 } 111 return cumulativeProbability(x1) - cumulativeProbability(x0); 112 } 113 114 /** 115 * {@inheritDoc} 116 * 117 * <p>The default implementation returns: 118 * <ul> 119 * <li>{@link #getSupportLowerBound()} for {@code p = 0},</li> 120 * <li>{@link #getSupportUpperBound()} for {@code p = 1}, or</li> 121 * <li>the result of a search for a root between the lower and upper bound using 122 * {@link #cumulativeProbability(double) cumulativeProbability(x) - p}. 123 * The bounds may be bracketed for efficiency.</li> 124 * </ul> 125 * 126 * @throws IllegalArgumentException if {@code p < 0} or {@code p > 1} 127 */ 128 @Override 129 public double inverseCumulativeProbability(double p) { 130 ArgumentUtils.checkProbability(p); 131 return inverseProbability(p, 1 - p, false); 132 } 133 134 /** 135 * {@inheritDoc} 136 * 137 * <p>The default implementation returns: 138 * <ul> 139 * <li>{@link #getSupportLowerBound()} for {@code p = 1},</li> 140 * <li>{@link #getSupportUpperBound()} for {@code p = 0}, or</li> 141 * <li>the result of a search for a root between the lower and upper bound using 142 * {@link #survivalProbability(double) survivalProbability(x) - p}. 143 * The bounds may be bracketed for efficiency.</li> 144 * </ul> 145 * 146 * @throws IllegalArgumentException if {@code p < 0} or {@code p > 1} 147 */ 148 @Override 149 public double inverseSurvivalProbability(double p) { 150 ArgumentUtils.checkProbability(p); 151 return inverseProbability(1 - p, p, true); 152 } 153 154 /** 155 * Implementation for the inverse cumulative or survival probability. 156 * 157 * @param p Cumulative probability. 158 * @param q Survival probability. 159 * @param complement Set to true to compute the inverse survival probability 160 * @return the value 161 */ 162 private double inverseProbability(final double p, final double q, boolean complement) { 163 /* IMPLEMENTATION NOTES 164 * -------------------- 165 * Where applicable, use is made of the one-sided Chebyshev inequality 166 * to bracket the root. This inequality states that 167 * P(X - mu >= k * sig) <= 1 / (1 + k^2), 168 * mu: mean, sig: standard deviation. Equivalently 169 * 1 - P(X < mu + k * sig) <= 1 / (1 + k^2), 170 * F(mu + k * sig) >= k^2 / (1 + k^2). 171 * 172 * For k = sqrt(p / (1 - p)), we find 173 * F(mu + k * sig) >= p, 174 * and (mu + k * sig) is an upper-bound for the root. 175 * 176 * Then, introducing Y = -X, mean(Y) = -mu, sd(Y) = sig, and 177 * P(Y >= -mu + k * sig) <= 1 / (1 + k^2), 178 * P(-X >= -mu + k * sig) <= 1 / (1 + k^2), 179 * P(X <= mu - k * sig) <= 1 / (1 + k^2), 180 * F(mu - k * sig) <= 1 / (1 + k^2). 181 * 182 * For k = sqrt((1 - p) / p), we find 183 * F(mu - k * sig) <= p, 184 * and (mu - k * sig) is a lower-bound for the root. 185 * 186 * In cases where the Chebyshev inequality does not apply, geometric 187 * progressions 1, 2, 4, ... and -1, -2, -4, ... are used to bracket 188 * the root. 189 * 190 * In the case of the survival probability the bracket can be set using the same 191 * bound given that the argument p = 1 - q, with q the survival probability. 192 */ 193 194 double lowerBound = getSupportLowerBound(); 195 if (p == 0) { 196 return lowerBound; 197 } 198 double upperBound = getSupportUpperBound(); 199 if (q == 0) { 200 return upperBound; 201 } 202 203 final double mu = getMean(); 204 final double sig = Math.sqrt(getVariance()); 205 final boolean chebyshevApplies = Double.isFinite(mu) && 206 ArgumentUtils.isFiniteStrictlyPositive(sig); 207 208 if (lowerBound == Double.NEGATIVE_INFINITY) { 209 lowerBound = createFiniteLowerBound(p, q, complement, upperBound, mu, sig, chebyshevApplies); 210 } 211 212 if (upperBound == Double.POSITIVE_INFINITY) { 213 upperBound = createFiniteUpperBound(p, q, complement, lowerBound, mu, sig, chebyshevApplies); 214 } 215 216 // Here the bracket [lower, upper] uses finite values. If the support 217 // is infinite the bracket can truncate the distribution and the target 218 // probability can be outside the range of [lower, upper]. 219 if (upperBound == Double.MAX_VALUE) { 220 if (complement) { 221 if (survivalProbability(upperBound) > q) { 222 return getSupportUpperBound(); 223 } 224 } else if (cumulativeProbability(upperBound) < p) { 225 return getSupportUpperBound(); 226 } 227 } 228 if (lowerBound == -Double.MAX_VALUE) { 229 if (complement) { 230 if (survivalProbability(lowerBound) < q) { 231 return getSupportLowerBound(); 232 } 233 } else if (cumulativeProbability(lowerBound) > p) { 234 return getSupportLowerBound(); 235 } 236 } 237 238 final DoubleUnaryOperator fun = complement ? 239 arg -> survivalProbability(arg) - q : 240 arg -> cumulativeProbability(arg) - p; 241 // Note the initial value is robust to overflow. 242 // Do not use 0.5 * (lowerBound + upperBound). 243 final double x = new BrentSolver(SOLVER_RELATIVE_ACCURACY, 244 SOLVER_ABSOLUTE_ACCURACY, 245 SOLVER_FUNCTION_VALUE_ACCURACY) 246 .findRoot(fun, 247 lowerBound, 248 lowerBound + 0.5 * (upperBound - lowerBound), 249 upperBound); 250 251 if (!isSupportConnected()) { 252 return searchPlateau(complement, lowerBound, x); 253 } 254 return x; 255 } 256 257 /** 258 * Create a finite lower bound. Assumes the current lower bound is negative infinity. 259 * 260 * @param p Cumulative probability. 261 * @param q Survival probability. 262 * @param complement Set to true to compute the inverse survival probability 263 * @param upperBound Current upper bound 264 * @param mu Mean 265 * @param sig Standard deviation 266 * @param chebyshevApplies True if the Chebyshev inequality applies (mean is finite and {@code sig > 0}} 267 * @return the finite lower bound 268 */ 269 private double createFiniteLowerBound(final double p, final double q, boolean complement, 270 double upperBound, final double mu, final double sig, final boolean chebyshevApplies) { 271 double lowerBound; 272 if (chebyshevApplies) { 273 lowerBound = mu - sig * Math.sqrt(q / p); 274 } else { 275 lowerBound = Double.NEGATIVE_INFINITY; 276 } 277 // Bound may have been set as infinite 278 if (lowerBound == Double.NEGATIVE_INFINITY) { 279 lowerBound = Math.min(-1, upperBound); 280 if (complement) { 281 while (survivalProbability(lowerBound) < q) { 282 lowerBound *= 2; 283 } 284 } else { 285 while (cumulativeProbability(lowerBound) >= p) { 286 lowerBound *= 2; 287 } 288 } 289 // Ensure finite 290 lowerBound = Math.max(lowerBound, -Double.MAX_VALUE); 291 } 292 return lowerBound; 293 } 294 295 /** 296 * Create a finite upper bound. Assumes the current upper bound is positive infinity. 297 * 298 * @param p Cumulative probability. 299 * @param q Survival probability. 300 * @param complement Set to true to compute the inverse survival probability 301 * @param lowerBound Current lower bound 302 * @param mu Mean 303 * @param sig Standard deviation 304 * @param chebyshevApplies True if the Chebyshev inequality applies (mean is finite and {@code sig > 0}} 305 * @return the finite lower bound 306 */ 307 private double createFiniteUpperBound(final double p, final double q, boolean complement, 308 double lowerBound, final double mu, final double sig, final boolean chebyshevApplies) { 309 double upperBound; 310 if (chebyshevApplies) { 311 upperBound = mu + sig * Math.sqrt(p / q); 312 } else { 313 upperBound = Double.POSITIVE_INFINITY; 314 } 315 // Bound may have been set as infinite 316 if (upperBound == Double.POSITIVE_INFINITY) { 317 upperBound = Math.max(1, lowerBound); 318 if (complement) { 319 while (survivalProbability(upperBound) >= q) { 320 upperBound *= 2; 321 } 322 } else { 323 while (cumulativeProbability(upperBound) < p) { 324 upperBound *= 2; 325 } 326 } 327 // Ensure finite 328 upperBound = Math.min(upperBound, Double.MAX_VALUE); 329 } 330 return upperBound; 331 } 332 333 /** 334 * Indicates whether the support is connected, i.e. whether all values between the 335 * lower and upper bound of the support are included in the support. 336 * 337 * <p>This method is used in the default implementation of the inverse cumulative and 338 * survival probability functions. 339 * 340 * <p>The default value is true which assumes the cdf and sf have no plateau regions 341 * where the same probability value is returned for a large range of x. 342 * Override this method if there are gaps in the support of the cdf and sf. 343 * 344 * <p>If false then the inverse will perform an additional step to ensure that the 345 * lower-bound of the interval on which the cdf is constant should be returned. This 346 * will search from the initial point x downwards if a smaller value also has the same 347 * cumulative (survival) probability. 348 * 349 * <p>Any plateau with a width in x smaller than the inverse absolute accuracy will 350 * not be searched. 351 * 352 * <p>Note: This method was public in commons math. It has been reduced to package private 353 * in commons statistics as it is an implementation detail. 354 * 355 * @return whether the support is connected. 356 * @see <a href="https://issues.apache.org/jira/browse/MATH-699">MATH-699</a> 357 */ 358 boolean isSupportConnected() { 359 return true; 360 } 361 362 /** 363 * Test the probability function for a plateau at the point x. If detected 364 * search the plateau for the lowest point y such that 365 * {@code inf{y in R | P(y) == P(x)}}. 366 * 367 * <p>This function is used when the distribution support is not connected 368 * to satisfy the inverse probability requirements of {@link ContinuousDistribution} 369 * on the returned value. 370 * 371 * @param complement Set to true to search the survival probability. 372 * @param lower Lower bound used to limit the search downwards. 373 * @param x Current value. 374 * @return the infimum y 375 */ 376 private double searchPlateau(boolean complement, double lower, final double x) { 377 // Test for plateau. Lower the value x if the probability is the same. 378 // Ensure the step is robust to the solver accuracy being less 379 // than 1 ulp of x (e.g. dx=0 will infinite loop) 380 final double dx = Math.max(SOLVER_ABSOLUTE_ACCURACY, Math.ulp(x)); 381 if (x - dx >= lower) { 382 final DoubleUnaryOperator fun = complement ? 383 this::survivalProbability : 384 this::cumulativeProbability; 385 final double px = fun.applyAsDouble(x); 386 if (fun.applyAsDouble(x - dx) == px) { 387 double upperBound = x; 388 double lowerBound = lower; 389 // Bisection search 390 // Require cdf(x) < px and sf(x) > px to move the lower bound 391 // to the midpoint. 392 final DoubleBinaryOperator cmp = complement ? 393 (a, b) -> a > b ? -1 : 1 : 394 (a, b) -> a < b ? -1 : 1; 395 while (upperBound - lowerBound > dx) { 396 final double midPoint = 0.5 * (lowerBound + upperBound); 397 if (cmp.applyAsDouble(fun.applyAsDouble(midPoint), px) < 0) { 398 lowerBound = midPoint; 399 } else { 400 upperBound = midPoint; 401 } 402 } 403 return upperBound; 404 } 405 } 406 return x; 407 } 408 409 /** {@inheritDoc} */ 410 @Override 411 public ContinuousDistribution.Sampler createSampler(final UniformRandomProvider rng) { 412 // Inversion method distribution sampler. 413 return InverseTransformContinuousSampler.of(rng, this::inverseCumulativeProbability)::sample; 414 } 415 }