View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.apache.commons.statistics.distribution;
18  
19  import java.lang.reflect.Modifier;
20  import java.util.ArrayList;
21  import java.util.Arrays;
22  import java.util.Collections;
23  import java.util.Properties;
24  import java.util.function.Function;
25  import java.util.stream.Stream;
26  import org.apache.commons.math3.analysis.UnivariateFunction;
27  import org.apache.commons.math3.analysis.integration.BaseAbstractUnivariateIntegrator;
28  import org.apache.commons.math3.analysis.integration.IterativeLegendreGaussIntegrator;
29  import org.apache.commons.math3.exception.MaxCountExceededException;
30  import org.apache.commons.math3.util.MathArrays;
31  import org.apache.commons.rng.simple.RandomSource;
32  import org.apache.commons.statistics.distribution.DistributionTestData.ContinuousDistributionTestData;
33  import org.junit.jupiter.api.Assertions;
34  import org.junit.jupiter.api.TestInstance;
35  import org.junit.jupiter.api.TestInstance.Lifecycle;
36  import org.junit.jupiter.params.ParameterizedTest;
37  import org.junit.jupiter.params.provider.Arguments;
38  import org.junit.jupiter.params.provider.MethodSource;
39  
40  /**
41   * Abstract base class for {@link ContinuousDistribution} tests.
42   *
43   * <p>This class uses parameterized tests that are repeated for instances of a
44   * distribution. The distribution, test input and expected values are generated
45   * dynamically from properties files loaded from resources.
46   *
47   * <p>The class has a single instance (see {@link Lifecycle#PER_CLASS}) that loads properties
48   * files from resources on creation. Resource files are assumed to be in the corresponding package
49   * for the class and named sequentially from 1:
50   * <pre>
51   * test.distname.1.properties
52   * test.distname.2.properties
53   * </pre>
54   * <p>Where {@code distname} is the name of the distribution. The name is dynamically
55   * created in {@link #getDistributionName()} and can be overridden by implementing classes.
56   * A single parameterization of a distribution is tested using a single properties file.
57   *
58   * <p>To test a distribution create a sub-class and override the following methods:
59   * <ul>
60   * <li>{@link #makeDistribution(Object...) makeDistribution(Object...)} - Creates the distribution from the parameters
61   * <li>{@link #makeInvalidParameters()} - Generate invalid parameters for the distribution
62   * <li>{@link #getParameterNames()} - Return the names of parameter accessors
63   * </ul>
64   *
65   * <p>The distribution is created using
66   * {@link #makeDistribution(Object...) makeDistribution(Object...)}. This should
67   * create an instance of the distribution using parameters defined in the properties file.
68   * The parameters are parsed from String values to the appropriate parameter object. Currently
69   * this supports Double and Integer; numbers can be unboxed and used to create the distribution.
70   *
71   * <p>Illegal arguments for the distribution are tested from parameters provided by
72   * {@link #makeInvalidParameters()}. If there are no illegal arguments this method may return
73   * null to skip the test. Primitive parameters are boxed to objects so ensure the canonical form
74   * is used, e.g. {@code 1.0} not {@code 1} for a {@code double} argument.
75   *
76   * <p>If the distribution provides parameter accessors then the child test class can return
77   * the accessor names using {@link #getParameterNames()}. The distribution method accessors
78   * will be detected and invoked using reflection. The accessor must provide the same value
79   * as that passed to the {@link #makeDistribution(Object...)} method to create the distribution.
80   * This method may return null to skip the test, or null for the name to skip the test for a
81   * single parameter accessor.
82   *
83   * <p>The properties file must contain parameters for the distribution, properties of the
84   * distribution (moments and bounds) and points to test the CDF and PDF with the expected values.
85   * This information can be used to evaluate the distribution CDF and PDF but also the survival
86   * function, consistency of the probability computations and random sampling.
87   *
88   * <p>Optionally:
89   * <ul>
90   * <li>Points for the PDF (and log PDF) can be specified. The default will use the CDF points.
91   * Note: It is not expected that evaluation of the PDF will require different points to the CDF.
92   * <li>Points and expected values for the inverse CDF can be specified. These are used in
93   * addition to test the inverse mapping of the CDF values to the CDF test points.
94   * <li>Expected values for the log PDF can be specified. The default will use
95   * {@link Math#log(double)} on the PDF values.
96   * <li>Points and expected values for the survival function can be specified. These are used in
97   * addition to test the inverse mapping of the SF values to the SF test points.
98   * The default will use the expected CDF values (SF = 1 - CDF).
99   * <li>A tolerance for equality assertions. The default is set by {@link #getAbsoluteTolerance()}
100  * and {@link #getRelativeTolerance()}.
101  * </ul>
102  *
103  * <p>If the distribution provides higher precision implementations of
104  * cumulative probability and/or survival probability as the values approach zero, then test
105  * points and expected values can be provided for high-precision computations. If the default
106  * absolute tolerance has been set to non-zero, then very small p-values will require the
107  * high-precision absolute tolerance is configured for the test to a suitable magnitude (see below).
108  *
109  * <p>Per-test configuration
110  *
111  * <p>Each test is identified with a {@link TestName} key in the properties file. This key
112  * can be used to set a per-test tolerance, or disable the test:
113  * <pre>
114  * cdf.hp.relative = 1e-14
115  * cdf.hp.absolute = 1e-50
116  * sampling.disable
117  * </pre>
118  *
119  * <p>Note: All properties files are read during test initialization. Any errors in a single
120  * property file will throw an exception, invalidating the initialization and no tests
121  * will be executed.
122  *
123  * <p>The parameterized tests in this class are inherited. The tests are final and cannot be
124  * changed. This ensures each instance of a distribution is tested for all functionality in
125  * the {@link ContinuousDistribution} interface. Arguments to the parameterized tests are
126  * generated dynamically using methods of the same name. These can be over-ridden in child
127  * classes to alter parameters. Throwing a
128  * {@link org.opentest4j.TestAbortedException TestAbortedException} in this method will
129  * skip the test as the arguments will not be generated.
130  *
131  * <p>Each parameterized test is effectively static; it uses no instance data.
132  * To implement additional test cases with a specific distribution instance and test
133  * data, create a test in the child class and call the relevant test case to verify
134  * results. Note that it is recommended to use the properties file as this ensures the
135  * entire functionality of the distribution is tested for that parameterization.
136  *
137  * <p>Tests using floating-point equivalence comparisons are asserted using a {@link DoubleTolerance}.
138  * This interface computes true or false for the comparison of two {@code double} types.
139  * This allows the flexibility to use absolute, relative or ULP thresholds in combinations
140  * built using And or Or conditions to compare numbers. The default uses an Or combination
141  * of the absolute and relative thresholds. See {@link DoubleTolerances} to construct
142  * custom instances for additional tests.
143  *
144  * <p>Test data should be validated against reference tables or other packages where
145  * possible, and the source of the reference data and/or validation should be documented
146  * in the properties file or additional test cases as appropriate.
147  *
148  * <p>The properties file uses {@code key=value} pairs loaded using a
149  * {@link java.util.Properties} object. Values will be read as String and then parsed to
150  * numeric data, and data arrays. Multi-line values can use a {@code \} character to join lines.
151  * Data in the properties file will be converted to numbers using standard parsing
152  * functions appropriate to the primitive type, e.g. {@link Double#parseDouble(String)}.
153  * Special double values should use NaN, Infinity and -Infinity. As a convenience
154  * for creating test data parsing of doubles supports the following notations from
155  * other languages ('inf', 'Inf').
156  *
157  * <p>The following is a complete properties file for a distribution:
158  * <pre>
159  * parameters = 0.5 1.0
160  * # Computed using XYZ
161  * mean = 1.0
162  * variance = NaN
163  * # optional (default -Infinity)
164  * lower = 0
165  * # optional (default Infinity)
166  * upper = Infinity
167  * # optional (default 1e-14 or over-ridden in getRelativeTolerance())
168  * tolerance.relative = 1e-9
169  * # optional (default 0.0 or over-ridden in getAbsoluteTolerance())
170  * tolerance.absolute = 0.0
171  * cdf.points = 0, 0.2
172  * cdf.values = 0.0, 0.5
173  * # optional (default uses cdf.points)
174  * pdf.points = 0, 40000
175  * pdf.values = 0.0,\
176  *  0.0
177  * # optional (default uses log pdf.values)
178  * logpdf.values = -1900.123, -Infinity
179  * # optional (default uses cdf.points and 1 - cdf.values)
180  * sf.points = 400.0
181  * sf.values = 0.0
182  * # optional high-precision CDF test
183  * cdf.hp.points = 1e-16
184  * cdf.hp.values = 1.23e-17
185  * # optional high-precision survival function test
186  * sf.hp.points = 9.0
187  * sf.hp.values = 2.34e-18
188  * # optional inverse CDF test (defaults to ignore)
189  * icdf.points = 0.0, 0.5
190  * icdf.values = 0.0, 0.2
191  * # optional inverse CDF test (defaults to ignore)
192  * isf.points = 1.0, 0.5
193  * isf.values = 0.0, 0.2
194  * # Optional per-test tolerance and disable
195  * cdf.hp.relative = 1e-14
196  * cdf.hp.absolute = 1e-50
197  * sampling.disable = true
198  * </pre>
199  *
200  * <p>See {@link NakagamiDistributionTest} for an example and the resource file {@code test.nakagami.1.properties}.
201  */
202 @TestInstance(Lifecycle.PER_CLASS)
203 abstract class BaseContinuousDistributionTest
204     extends BaseDistributionTest<ContinuousDistribution, ContinuousDistributionTestData> {
205 
206     /** Absolute accuracy of the integrator result. */
207     private static final double INTEGRATOR_ABS_ACCURACY = 1e-10;
208     /** Relative accuracy of the integrator result. */
209     private static final double INTEGRATOR_REL_ACCURACY = 1e-12;
210 
211     @Override
212     ContinuousDistributionTestData makeDistributionData(Properties properties) {
213         return new ContinuousDistributionTestData(properties);
214     }
215 
216     //------------------------ Methods to stream the test data -----------------------------
217 
218     // The @MethodSource annotation will default to a no arguments method of the same name
219     // as the @ParameterizedTest method. These can be overridden by child classes to
220     // stream different arguments to the test case.
221 
222     /**
223      * Create a stream of arguments containing the distribution to test, the PDF test points
224      * and values, and the test tolerance.
225      *
226      * @return the stream
227      */
228     Stream<Arguments> testDensity() {
229         return stream(TestName.PDF,
230                       ContinuousDistributionTestData::getPdfPoints,
231                       ContinuousDistributionTestData::getPdfValues);
232     }
233 
234     /**
235      * Create a stream of arguments containing the distribution to test, the log PDF test points
236      * and values, and the test tolerance.
237      *
238      * @return the stream
239      */
240     Stream<Arguments> testLogDensity() {
241         return stream(TestName.LOGPDF,
242                       ContinuousDistributionTestData::getPdfPoints,
243                       ContinuousDistributionTestData::getLogPdfValues);
244     }
245 
246     /**
247      * Create a stream of arguments containing the distribution to test, the CDF test points
248      * and values, and the test tolerance.
249      *
250      * @return the stream
251      */
252     Stream<Arguments> testCumulativeProbability() {
253         return stream(TestName.CDF,
254                       ContinuousDistributionTestData::getCdfPoints,
255                       ContinuousDistributionTestData::getCdfValues);
256     }
257 
258     /**
259      * Create a stream of arguments containing the distribution to test, the survival function
260      * test points and values, and the test tolerance.
261      *
262      * <p>This defaults to using the CDF points. The survival function is tested as 1 - CDF.
263      *
264      * @return the stream
265      */
266     Stream<Arguments> testSurvivalProbability() {
267         return stream(TestName.SF,
268                       ContinuousDistributionTestData::getSfPoints,
269                       ContinuousDistributionTestData::getSfValues);
270     }
271 
272     /**
273      * Create a stream of arguments containing the distribution to test, the CDF test points
274      * and values, and the test tolerance for high-precision computations.
275      *
276      * @return the stream
277      */
278     Stream<Arguments> testCumulativeProbabilityHighPrecision() {
279         return stream(TestName.CDF_HP,
280                       ContinuousDistributionTestData::getCdfHpPoints,
281                       ContinuousDistributionTestData::getCdfHpValues);
282     }
283 
284     /**
285      * Create a stream of arguments containing the distribution to test, the survival function
286      * test points and values, and the test tolerance for high-precision computations.
287      *
288      * @return the stream
289      */
290     Stream<Arguments> testSurvivalProbabilityHighPrecision() {
291         return stream(TestName.SF_HP,
292                       ContinuousDistributionTestData::getSfHpPoints,
293                       ContinuousDistributionTestData::getSfHpValues);
294     }
295 
296     /**
297      * Create a stream of arguments containing the distribution to test, the inverse CDF test points
298      * and values, and the test tolerance.
299      *
300      * @return the stream
301      */
302     Stream<Arguments> testInverseCumulativeProbability() {
303         return stream(TestName.ICDF,
304                       ContinuousDistributionTestData::getIcdfPoints,
305                       ContinuousDistributionTestData::getIcdfValues);
306     }
307 
308     /**
309      * Create a stream of arguments containing the distribution to test, the inverse SF test points
310      * and values, and the test tolerance.
311      *
312      * @return the stream
313      */
314     Stream<Arguments> testInverseSurvivalProbability() {
315         return stream(TestName.ISF,
316                       ContinuousDistributionTestData::getIsfPoints,
317                       ContinuousDistributionTestData::getIsfValues);
318     }
319 
320     /**
321      * Create a stream of arguments containing the distribution to test, the test points
322      * to evaluate the CDF, and the test tolerance. The equality
323      * {@code cdf(x) = cdf(icdf(cdf(x)))} must be true within the tolerance.
324      *
325      * @return the stream
326      */
327     Stream<Arguments> testCumulativeProbabilityInverseMapping() {
328         return stream(TestName.CDF_MAPPING,
329                       ContinuousDistributionTestData::getCdfPoints);
330     }
331 
332     /**
333      * Create a stream of arguments containing the distribution to test, the test points
334      * to evaluate the SF, and the test tolerance. The equality
335      * {@code sf(x) = sf(isf(sf(x)))} must be true within the tolerance.
336      *
337      * @return the stream
338      */
339     Stream<Arguments> testSurvivalProbabilityInverseMapping() {
340         return stream(TestName.SF_MAPPING,
341                       ContinuousDistributionTestData::getSfPoints);
342     }
343 
344     /**
345      * Create a stream of arguments containing the distribution to test, the test points
346      * to evaluate the CDF, and the test tolerance. The equality
347      * {@code cdf(x) = cdf(icdf(cdf(x)))} must be true within the tolerance.
348      * This uses the points for the high-precision CDF.
349      *
350      * @return the stream
351      */
352     Stream<Arguments> testCumulativeProbabilityHighPrecisionInverseMapping() {
353         return stream(TestName.CDF_HP_MAPPING,
354                       ContinuousDistributionTestData::getCdfHpPoints);
355     }
356 
357     /**
358      * Create a stream of arguments containing the distribution to test, the test points
359      * to evaluate the SF, and the test tolerance. The equality
360      * {@code sf(x) = sf(isf(sf(x)))} must be true within the tolerance.
361      * This uses the points for the high-precision SF.
362      *
363      * @return the stream
364      */
365     Stream<Arguments> testSurvivalProbabilityHighPrecisionInverseMapping() {
366         return stream(TestName.SF_HP_MAPPING,
367                       ContinuousDistributionTestData::getSfHpPoints);
368     }
369 
370     /**
371      * Create a stream of arguments containing the distribution to test, the test points
372      * to evaluate the CDF and survival function, and the test tolerance. CDF + SF must equal 1.
373      *
374      * @return the stream
375      */
376     Stream<Arguments> testSurvivalAndCumulativeProbabilityComplement() {
377         // This is not disabled based on cdf.disable && sf.disable.
378         // Those flags are intended to ignore tests against reference values.
379         return stream(TestName.COMPLEMENT,
380                       ContinuousDistributionTestData::getCdfPoints);
381     }
382 
383     /**
384      * Create a stream of arguments containing the distribution to test, the test points
385      * to evaluate the CDF and probability in a range, and the test tolerance.
386      * Used to test CDF(x1) - CDF(x0) = probability(x0, x1).
387      *
388      * @return the stream
389      */
390     Stream<Arguments> testConsistency() {
391         // This is not disabled based on cdf.disable.
392         // That flag is intended to ignore tests against reference values.
393         return stream(TestName.CONSISTENCY,
394                       ContinuousDistributionTestData::getCdfPoints);
395     }
396 
397     /**
398      * Create a stream of arguments containing the distribution to test sampling.
399      *
400      * @return the stream
401      */
402     Stream<Arguments> testSampling() {
403         return stream(TestName.SAMPLING);
404     }
405 
406     /**
407      * Stream the arguments to test the density integrals. The test
408      * integrates the density function between consecutive test points for the cumulative
409      * density function. The default tolerance is based on the convergence tolerance of
410      * the underlying integrator (abs=1e-10, rel=1e-12).
411      * Override this method to change the tolerance.
412      *
413      * @return the stream
414      */
415     Stream<Arguments> testDensityIntegrals() {
416         // Create a tolerance suitable for the same thresholds used by the integrator.
417         final Function<ContinuousDistributionTestData, DoubleTolerance> tolerance =
418             d -> createAbsOrRelTolerance(INTEGRATOR_ABS_ACCURACY * 10, INTEGRATOR_REL_ACCURACY * 10);
419         final TestName name = TestName.INTEGRALS;
420         return stream(d -> d.isDisabled(name),
421                       ContinuousDistributionTestData::getCdfPoints,
422                       ContinuousDistributionTestData::getCdfValues,
423                       tolerance, name.toString());
424     }
425 
426     /**
427      * Create a stream of arguments containing the distribution to test, the support
428      * lower and upper bound, and the support connect flag.
429      *
430      * @return the stream
431      */
432     Stream<Arguments> testSupport() {
433         return streamArguments(TestName.SUPPORT,
434             d -> Arguments.of(namedDistribution(d.getParameters()), d.getLower(), d.getUpper()));
435     }
436 
437     /**
438      * Create a stream of arguments containing the distribution to test, the mean
439      * and variance, and the test tolerance.
440      *
441      * @return the stream
442      */
443     Stream<Arguments> testMoments() {
444         final TestName name = TestName.MOMENTS;
445         return streamArguments(name,
446             d -> Arguments.of(namedDistribution(d.getParameters()), d.getMean(), d.getVariance(),
447                               createTestTolerance(d, name)));
448     }
449 
450     /**
451      * Create a stream of arguments containing the distribution to test and the test tolerance.
452      *
453      * @return the stream
454      */
455     Stream<Arguments> testMedian() {
456         final TestName name = TestName.MEDIAN;
457         return streamArguments(name,
458             d -> Arguments.of(namedDistribution(d.getParameters()), createTestTolerance(d, name)));
459     }
460 
461     //------------------------ Tests -----------------------------
462 
463     // Tests are final. It is expected that the test can be modified by overriding
464     // the method used to stream the arguments, for example to use a specific tolerance
465     // for a test in preference to the tolerance defined in the properties file.
466 
467     /**
468      * Test that density calculations match expected values.
469      */
470     @ParameterizedTest
471     @MethodSource
472     final void testDensity(ContinuousDistribution dist,
473                            double[] points,
474                            double[] values,
475                            DoubleTolerance tolerance) {
476         for (int i = 0; i < points.length; i++) {
477             final double x = points[i];
478             TestUtils.assertEquals(values[i],
479                 dist.density(x), tolerance,
480                 () -> "Incorrect probability density value returned for " + x);
481         }
482     }
483 
484     /**
485      * Test that logarithmic density calculations match expected values.
486      */
487     @ParameterizedTest
488     @MethodSource
489     final void testLogDensity(ContinuousDistribution dist,
490                               double[] points,
491                               double[] values,
492                               DoubleTolerance tolerance) {
493         for (int i = 0; i < points.length; i++) {
494             final double x = points[i];
495             TestUtils.assertEquals(values[i],
496                 dist.logDensity(x), tolerance,
497                 () -> "Incorrect probability density value returned for " + x);
498         }
499     }
500 
501     /**
502      * Test that cumulative probability density calculations match expected values.
503      */
504     @ParameterizedTest
505     @MethodSource
506     final void testCumulativeProbability(ContinuousDistribution dist,
507                                          double[] points,
508                                          double[] values,
509                                          DoubleTolerance tolerance) {
510         // verify cumulativeProbability(double)
511         for (int i = 0; i < points.length; i++) {
512             final double x = points[i];
513             TestUtils.assertEquals(values[i],
514                 dist.cumulativeProbability(x),
515                 tolerance,
516                 () -> "Incorrect cumulative probability value returned for " + x);
517         }
518     }
519 
520     /**
521      * Test that survival probability density calculations match expected values.
522      */
523     @ParameterizedTest
524     @MethodSource
525     final void testSurvivalProbability(ContinuousDistribution dist,
526                                        double[] points,
527                                        double[] values,
528                                        DoubleTolerance tolerance) {
529         for (int i = 0; i < points.length; i++) {
530             final double x = points[i];
531             TestUtils.assertEquals(
532                 values[i],
533                 dist.survivalProbability(points[i]),
534                 tolerance,
535                 () -> "Incorrect survival probability value returned for " + x);
536         }
537     }
538 
539     /**
540      * Test that cumulative probability is not {@code (1 - survival probability)} by testing values
541      * that would result in inaccurate results if simply calculating (1 - sf).
542      */
543     @ParameterizedTest
544     @MethodSource
545     final void testCumulativeProbabilityHighPrecision(ContinuousDistribution dist,
546                                                       double[] points,
547                                                       double[] values,
548                                                       DoubleTolerance tolerance) {
549         assertHighPrecision(tolerance, values);
550         testCumulativeProbability(dist, points, values, tolerance);
551     }
552 
553     /**
554      * Test that survival probability is not {@code (1 - cumulative probability)} by testing values
555      * that would result in inaccurate results if simply calculating (1 - cdf).
556      */
557     @ParameterizedTest
558     @MethodSource
559     final void testSurvivalProbabilityHighPrecision(ContinuousDistribution dist,
560                                                     double[] points,
561                                                     double[] values,
562                                                     DoubleTolerance tolerance) {
563         assertHighPrecision(tolerance, values);
564         testSurvivalProbability(dist, points, values, tolerance);
565     }
566 
567     /**
568      * Test that inverse cumulative probability density calculations match expected values.
569      *
570      * <p>Note: Any expected values outside the support of the distribution are ignored.
571      */
572     @ParameterizedTest
573     @MethodSource
574     final void testInverseCumulativeProbability(ContinuousDistribution dist,
575                                                 double[] points,
576                                                 double[] values,
577                                                 DoubleTolerance tolerance) {
578         final double lower = dist.getSupportLowerBound();
579         final double upper = dist.getSupportUpperBound();
580         for (int i = 0; i < points.length; i++) {
581             final double x = values[i];
582             if (x < lower || x > upper) {
583                 continue;
584             }
585             final double p = points[i];
586             TestUtils.assertEquals(
587                 x,
588                 dist.inverseCumulativeProbability(p),
589                 tolerance,
590                 () -> "Incorrect inverse cumulative probability value returned for " + p);
591         }
592     }
593 
594     /**
595      * Test that inverse survival probability density calculations match expected values.
596      *
597      * <p>Note: Any expected values outside the support of the distribution are ignored.
598      */
599     @ParameterizedTest
600     @MethodSource
601     final void testInverseSurvivalProbability(ContinuousDistribution dist,
602                                               double[] points,
603                                               double[] values,
604                                               DoubleTolerance tolerance) {
605         final double lower = dist.getSupportLowerBound();
606         final double upper = dist.getSupportUpperBound();
607         for (int i = 0; i < points.length; i++) {
608             final double x = values[i];
609             if (x < lower || x > upper) {
610                 continue;
611             }
612             final double p = points[i];
613             TestUtils.assertEquals(
614                 x,
615                 dist.inverseSurvivalProbability(p),
616                 tolerance,
617                 () -> "Incorrect inverse survival probability value returned for " + p);
618         }
619     }
620 
621     /**
622      * Test that an inverse mapping of the cumulative probability density values matches
623      * the original point, {@code x = icdf(cdf(x))}.
624      *
625      * <p>Note: It is possible for two points to compute the same CDF value. In this
626      * case the mapping is not a bijection. Thus a further forward mapping is performed
627      * to check {@code cdf(x) = cdf(icdf(cdf(x)))} within the allowed tolerance.
628      *
629      * <p>Note: Any points outside the support of the distribution are ignored.
630      */
631     @ParameterizedTest
632     @MethodSource
633     final void testCumulativeProbabilityInverseMapping(ContinuousDistribution dist,
634                                                        double[] points,
635                                                        DoubleTolerance tolerance) {
636         final double lower = dist.getSupportLowerBound();
637         final double upper = dist.getSupportUpperBound();
638         for (int i = 0; i < points.length; i++) {
639             final double x = points[i];
640             if (x < lower || x > upper) {
641                 continue;
642             }
643             final double p = dist.cumulativeProbability(x);
644             final double x1 = dist.inverseCumulativeProbability(p);
645             final double p1 = dist.cumulativeProbability(x1);
646             // Check the inverse CDF computed a value that will return to the
647             // same probability value.
648             TestUtils.assertEquals(
649                 p,
650                 p1,
651                 tolerance,
652                 () -> "Incorrect CDF(inverse CDF(CDF(x))) value returned for " + x);
653         }
654     }
655 
656     /**
657      * Test that an inverse mapping of the survival probability density values matches
658      * the original point, {@code x = isf(sf(x))}.
659      *
660      * <p>Note: It is possible for two points to compute the same SF value. In this
661      * case the mapping is not a bijection. Thus a further forward mapping is performed
662      * to check {@code sf(x) = sf(isf(sf(x)))} within the allowed tolerance.
663      *
664      * <p>Note: Any points outside the support of the distribution are ignored.
665      */
666     @ParameterizedTest
667     @MethodSource
668     final void testSurvivalProbabilityInverseMapping(ContinuousDistribution dist,
669                                                      double[] points,
670                                                      DoubleTolerance tolerance) {
671         final double lower = dist.getSupportLowerBound();
672         final double upper = dist.getSupportUpperBound();
673         for (int i = 0; i < points.length; i++) {
674             final double x = points[i];
675             if (x < lower || x > upper) {
676                 continue;
677             }
678             final double p = dist.survivalProbability(x);
679             final double x1 = dist.inverseSurvivalProbability(p);
680             final double p1 = dist.survivalProbability(x1);
681             // Check the inverse SF computed a value that will return to the
682             // same probability value.
683             TestUtils.assertEquals(
684                 p,
685                 p1,
686                 tolerance,
687                 () -> "Incorrect SF(inverse SF(SF(x))) value returned for " + x);
688         }
689     }
690 
691     /**
692      * Test that an inverse mapping of the cumulative probability density values matches
693      * the original point, {@code x = icdf(cdf(x))} using the points for the high-precision
694      * CDF.
695      */
696     @ParameterizedTest
697     @MethodSource
698     final void testCumulativeProbabilityHighPrecisionInverseMapping(
699             ContinuousDistribution dist,
700             double[] points,
701             DoubleTolerance tolerance) {
702         testCumulativeProbabilityInverseMapping(dist, points, tolerance);
703     }
704 
705     /**
706      * Test that an inverse mapping of the survival probability density values matches
707      * the original point, {@code x = isf(sf(x))} using the points for the high-precision
708      * SF.
709      */
710     @ParameterizedTest
711     @MethodSource
712     final void testSurvivalProbabilityHighPrecisionInverseMapping(
713             ContinuousDistribution dist,
714             double[] points,
715             DoubleTolerance tolerance) {
716         testSurvivalProbabilityInverseMapping(dist, points, tolerance);
717     }
718 
719     /**
720      * Test that cumulative probability density and survival probability calculations
721      * sum to approximately 1.0.
722      */
723     @ParameterizedTest
724     @MethodSource
725     final void testSurvivalAndCumulativeProbabilityComplement(ContinuousDistribution dist,
726                                                               double[] points,
727                                                               DoubleTolerance tolerance) {
728         for (final double x : points) {
729             TestUtils.assertEquals(
730                 1.0,
731                 dist.survivalProbability(x) + dist.cumulativeProbability(x),
732                 tolerance,
733                 () -> "survival + cumulative probability were not close to 1.0 for " + x);
734         }
735     }
736 
737     /**
738      * Test that probability computations are consistent.
739      * This checks probability(x, x) = 0; and probability(x0, x1) = CDF(x1) - CDF(x0).
740      */
741     @ParameterizedTest
742     @MethodSource
743     final void testConsistency(ContinuousDistribution dist,
744                                double[] points,
745                                DoubleTolerance tolerance) {
746         for (int i = 0; i < points.length; i++) {
747             final double x0 = points[i];
748 
749             // Check that probability(x, x) == 0
750             Assertions.assertEquals(
751                 0.0,
752                 dist.probability(x0, x0),
753                 () -> "Non-zero probability(x, x) for " + x0);
754 
755             final double cdf0 = dist.cumulativeProbability(x0);
756             final double sf0 = cdf0 >= 0.5 ? dist.survivalProbability(x0) : Double.NaN;
757             for (int j = 0; j < points.length; j++) {
758                 final double x1 = points[j];
759                 // Ignore the same point
760                 if (x0 < x1) {
761                     // Check that probability(x0, x1) = CDF(x1) - CDF(x0).
762                     // If x0 is above the median it is more accurate to use the
763                     // survival probability: probability(x0, x1) = SF(x0) - SF(x1).
764                     double expected;
765                     if (cdf0 >= 0.5) {
766                         expected = sf0 - dist.survivalProbability(x1);
767                     } else {
768                         expected = dist.cumulativeProbability(x1) - cdf0;
769                     }
770                     TestUtils.assertEquals(
771                         expected,
772                         dist.probability(x0, x1),
773                         tolerance,
774                         () -> "Inconsistent probability for (" + x0 + "," + x1 + ")");
775                 } else if (x0 > x1) {
776                     Assertions.assertThrows(IllegalArgumentException.class,
777                         () -> dist.probability(x0, x1),
778                         "probability(double, double) should have thrown an exception that first argument is too large");
779                 }
780             }
781         }
782     }
783 
784     /**
785      * Test CDF and inverse CDF values at the edge of the support of the distribution return
786      * expected values and the CDF outside the support returns consistent values.
787      */
788     @ParameterizedTest
789     @MethodSource(value = "streamDistribution")
790     final void testOutsideSupport(ContinuousDistribution dist) {
791         // Test various quantities when the variable is outside the support.
792         final double lo = dist.getSupportLowerBound();
793         Assertions.assertEquals(0.0, dist.cumulativeProbability(lo), "cdf(lower)");
794         Assertions.assertEquals(lo, dist.inverseCumulativeProbability(-0.0), "icdf(-0.0)");
795         Assertions.assertEquals(lo, dist.inverseCumulativeProbability(0.0), "icdf(0.0)");
796         Assertions.assertEquals(lo, dist.inverseSurvivalProbability(1.0), "isf(1.0)");
797         // Test for rounding errors during inversion
798         Assertions.assertTrue(lo <= dist.inverseCumulativeProbability(Double.MIN_VALUE), "lo <= icdf(min)");
799         Assertions.assertTrue(lo <= dist.inverseCumulativeProbability(Double.MIN_NORMAL), "lo <= icdf(min_normal)");
800         Assertions.assertTrue(lo <= dist.inverseSurvivalProbability(Math.nextDown(1.0)), "lo <= isf(nextDown(1.0))");
801 
802         final double below = Math.nextDown(lo);
803         Assertions.assertEquals(0.0, dist.density(below), "pdf(x < lower)");
804         Assertions.assertEquals(Double.NEGATIVE_INFINITY, dist.logDensity(below), "logpdf(x < lower)");
805         Assertions.assertEquals(0.0, dist.cumulativeProbability(below), "cdf(x < lower)");
806         Assertions.assertEquals(1.0, dist.survivalProbability(below), "sf(x < lower)");
807 
808         final double hi = dist.getSupportUpperBound();
809         Assertions.assertTrue(lo <= hi, "lower <= upper");
810         Assertions.assertEquals(1.0, dist.cumulativeProbability(hi), "cdf(upper)");
811         Assertions.assertEquals(0.0, dist.survivalProbability(hi), "sf(upper)");
812         Assertions.assertEquals(hi, dist.inverseCumulativeProbability(1.0), "icdf(1.0)");
813         Assertions.assertEquals(hi, dist.inverseSurvivalProbability(-0.0), "isf(-0.0)");
814         Assertions.assertEquals(hi, dist.inverseSurvivalProbability(0.0), "isf(0.0)");
815         // Test for rounding errors during inversion
816         Assertions.assertTrue(hi >= dist.inverseCumulativeProbability(Math.nextDown(1.0)), "hi >= icdf(nextDown(1.0))");
817         Assertions.assertTrue(hi >= dist.inverseSurvivalProbability(Double.MIN_VALUE), "lo <= isf(min)");
818         Assertions.assertTrue(hi >= dist.inverseSurvivalProbability(Double.MIN_NORMAL), "lo <= isf(min_normal)");
819 
820         final double above = Math.nextUp(hi);
821         Assertions.assertEquals(0.0, dist.density(above), "pdf(x > upper)");
822         Assertions.assertEquals(Double.NEGATIVE_INFINITY, dist.logDensity(above), "logpdf(x > upper)");
823         Assertions.assertEquals(1.0, dist.cumulativeProbability(above), "cdf(x > upper)");
824         Assertions.assertEquals(0.0, dist.survivalProbability(above), "sf(x > upper)");
825     }
826 
827     /**
828      * Test invalid probabilities passed to computations that require a p-value in {@code [0, 1]}
829      * or a range where {@code p1 <= p2}.
830      */
831     @ParameterizedTest
832     @MethodSource(value = "streamDistribution")
833     final void testInvalidProbabilities(ContinuousDistribution dist) {
834         final double lo = dist.getSupportLowerBound();
835         final double hi = dist.getSupportUpperBound();
836         if (lo < hi) {
837             Assertions.assertThrows(DistributionException.class, () -> dist.probability(hi, lo), "x0 > x1");
838         }
839         Assertions.assertThrows(DistributionException.class, () -> dist.inverseCumulativeProbability(-1), "p < 0.0");
840         Assertions.assertThrows(DistributionException.class, () -> dist.inverseCumulativeProbability(2), "p > 1.0");
841         Assertions.assertThrows(DistributionException.class, () -> dist.inverseSurvivalProbability(-1), "q < 0.0");
842         Assertions.assertThrows(DistributionException.class, () -> dist.inverseSurvivalProbability(2), "q > 1.0");
843     }
844 
845     /**
846      * Test sampling from the distribution.
847      */
848     @ParameterizedTest
849     @MethodSource
850     final void testSampling(ContinuousDistribution dist) {
851         final double[] quartiles = TestUtils.getDistributionQuartiles(dist);
852         // The distribution quartiles are created using the inverse CDF.
853         // This may not be accurate for extreme parameterizations of the distribution.
854         // So use the values to compute the expected probability for each interval.
855         final double[] expected = {
856             dist.cumulativeProbability(quartiles[0]),
857             dist.probability(quartiles[0], quartiles[1]),
858             dist.probability(quartiles[1], quartiles[2]),
859             dist.survivalProbability(quartiles[2]),
860         };
861         // Fail if the quartiles are different from a quarter.
862         // Note: Set the tolerance to a high value to allow the sampling test
863         // to run with uneven intervals. This will determine if the sampler
864         // if broken for this parameterization of the distribution that has
865         // an incorrect CDF(inverse CDF(p)) mapping.
866         final DoubleTolerance tolerance = DoubleTolerances.relative(1e-3);
867         for (final double p : expected) {
868             TestUtils.assertEquals(0.25, p, tolerance,
869                 () -> "Unexpected quartiles: " + Arrays.toString(expected));
870         }
871 
872         final int sampleSize = 1000;
873         MathArrays.scaleInPlace(sampleSize, expected);
874 
875         // Use fixed seed.
876         final ContinuousDistribution.Sampler sampler =
877             dist.createSampler(RandomSource.XO_SHI_RO_256_PP.create(123456789L));
878         final double[] sample = TestUtils.sample(sampleSize, sampler);
879 
880         final long[] counts = new long[4];
881         for (int i = 0; i < sampleSize; i++) {
882             TestUtils.updateCounts(sample[i], counts, quartiles);
883         }
884 
885         TestUtils.assertChiSquareAccept(expected, counts, 0.001);
886     }
887 
888     /**
889      * Test that density integrals match the distribution.
890      * The (filtered, sorted) points array is used to source
891      * integration limits. The integral of the density (estimated using a
892      * Legendre-Gauss integrator) is compared with the cdf over the same
893      * interval. Test points outside of the domain of the density function
894      * are discarded.
895      */
896     @ParameterizedTest
897     @MethodSource
898     final void testDensityIntegrals(ContinuousDistribution dist,
899                                     double[] points,
900                                     double[] values,
901                                     DoubleTolerance tolerance) {
902         final BaseAbstractUnivariateIntegrator integrator =
903             new IterativeLegendreGaussIntegrator(5, INTEGRATOR_REL_ACCURACY, INTEGRATOR_ABS_ACCURACY);
904         final UnivariateFunction d = dist::density;
905         final ArrayList<Double> integrationTestPoints = new ArrayList<>();
906         for (int i = 0; i < points.length; i++) {
907             if (Double.isNaN(values[i]) ||
908                 values[i] < 1e-5 ||
909                 values[i] > 1 - 1e-5) {
910                 continue; // exclude integrals outside domain.
911             }
912             integrationTestPoints.add(points[i]);
913         }
914         Collections.sort(integrationTestPoints);
915         for (int i = 1; i < integrationTestPoints.size(); i++) {
916             final double x0 = integrationTestPoints.get(i - 1);
917             final double x1 = integrationTestPoints.get(i);
918             // Exclude extremely steep integrals
919             // (e.g. the gamma distribution with shape < 1)
920             if (Math.max(dist.density(x0), dist.density(x1)) > 1e3) {
921                 continue;
922             }
923             double integral = 0;
924             try {
925                 // Integrals may be slow to converge
926                 integral = integrator.integrate(1000000,
927                                                 d, x0, x1);
928             } catch (MaxCountExceededException e) {
929                 Assertions.fail("Failed density integral: " + x0 + " to " + x1, e);
930             }
931             TestUtils.assertEquals(
932                 dist.probability(x0, x1),
933                 integral,
934                 tolerance,
935                 () -> "Invalid density integral: " + x0 + " to " + x1);
936         }
937     }
938 
939     /**
940      * Test the support of the distribution matches the expected values.
941      */
942     @ParameterizedTest
943     @MethodSource
944     final void testSupport(ContinuousDistribution dist, double lower, double upper) {
945         Assertions.assertEquals(lower, dist.getSupportLowerBound(), "lower bound");
946         Assertions.assertEquals(upper, dist.getSupportUpperBound(), "upper bound");
947     }
948 
949     /**
950      * Test the moments of the distribution matches the expected values.
951      */
952     @ParameterizedTest
953     @MethodSource
954     final void testMoments(ContinuousDistribution dist, double mean, double variance, DoubleTolerance tolerance) {
955         TestUtils.assertEquals(mean, dist.getMean(), tolerance, "mean");
956         TestUtils.assertEquals(variance, dist.getVariance(), tolerance, "variance");
957     }
958 
959     /**
960      * Test the median of the distribution is equal to the value returned from the inverse CDF.
961      * The median is used internally for computation of the probability of a range
962      * using either the CDF or survival function. If overridden by a distribution it should
963      * be equivalent to the inverse CDF called with 0.5.
964      *
965      * <p>The method modifiers are asserted to check the method is not public or protected.
966      */
967     @ParameterizedTest
968     @MethodSource
969     final void testMedian(ContinuousDistribution dist, DoubleTolerance tolerance) {
970         if (dist instanceof AbstractContinuousDistribution) {
971             final AbstractContinuousDistribution d = (AbstractContinuousDistribution) dist;
972             TestUtils.assertEquals(d.inverseCumulativeProbability(0.5), d.getMedian(), tolerance, "median");
973             assertMethodNotModified(dist.getClass(), Modifier.PUBLIC | Modifier.PROTECTED, "getMedian");
974         }
975     }
976 }