1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17 package org.apache.commons.statistics.distribution;
18
19 import java.lang.reflect.Modifier;
20 import java.util.ArrayList;
21 import java.util.Arrays;
22 import java.util.Collections;
23 import java.util.Properties;
24 import java.util.function.Function;
25 import java.util.stream.Stream;
26 import org.apache.commons.math3.analysis.UnivariateFunction;
27 import org.apache.commons.math3.analysis.integration.BaseAbstractUnivariateIntegrator;
28 import org.apache.commons.math3.analysis.integration.IterativeLegendreGaussIntegrator;
29 import org.apache.commons.math3.exception.MaxCountExceededException;
30 import org.apache.commons.math3.util.MathArrays;
31 import org.apache.commons.rng.simple.RandomSource;
32 import org.apache.commons.statistics.distribution.DistributionTestData.ContinuousDistributionTestData;
33 import org.junit.jupiter.api.Assertions;
34 import org.junit.jupiter.api.TestInstance;
35 import org.junit.jupiter.api.TestInstance.Lifecycle;
36 import org.junit.jupiter.params.ParameterizedTest;
37 import org.junit.jupiter.params.provider.Arguments;
38 import org.junit.jupiter.params.provider.MethodSource;
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202 @TestInstance(Lifecycle.PER_CLASS)
203 abstract class BaseContinuousDistributionTest
204 extends BaseDistributionTest<ContinuousDistribution, ContinuousDistributionTestData> {
205
206
207 private static final double INTEGRATOR_ABS_ACCURACY = 1e-10;
208
209 private static final double INTEGRATOR_REL_ACCURACY = 1e-12;
210
211 @Override
212 ContinuousDistributionTestData makeDistributionData(Properties properties) {
213 return new ContinuousDistributionTestData(properties);
214 }
215
216
217
218
219
220
221
222
223
224
225
226
227
228 Stream<Arguments> testDensity() {
229 return stream(TestName.PDF,
230 ContinuousDistributionTestData::getPdfPoints,
231 ContinuousDistributionTestData::getPdfValues);
232 }
233
234
235
236
237
238
239
240 Stream<Arguments> testLogDensity() {
241 return stream(TestName.LOGPDF,
242 ContinuousDistributionTestData::getPdfPoints,
243 ContinuousDistributionTestData::getLogPdfValues);
244 }
245
246
247
248
249
250
251
252 Stream<Arguments> testCumulativeProbability() {
253 return stream(TestName.CDF,
254 ContinuousDistributionTestData::getCdfPoints,
255 ContinuousDistributionTestData::getCdfValues);
256 }
257
258
259
260
261
262
263
264
265
266 Stream<Arguments> testSurvivalProbability() {
267 return stream(TestName.SF,
268 ContinuousDistributionTestData::getSfPoints,
269 ContinuousDistributionTestData::getSfValues);
270 }
271
272
273
274
275
276
277
278 Stream<Arguments> testCumulativeProbabilityHighPrecision() {
279 return stream(TestName.CDF_HP,
280 ContinuousDistributionTestData::getCdfHpPoints,
281 ContinuousDistributionTestData::getCdfHpValues);
282 }
283
284
285
286
287
288
289
290 Stream<Arguments> testSurvivalProbabilityHighPrecision() {
291 return stream(TestName.SF_HP,
292 ContinuousDistributionTestData::getSfHpPoints,
293 ContinuousDistributionTestData::getSfHpValues);
294 }
295
296
297
298
299
300
301
302 Stream<Arguments> testInverseCumulativeProbability() {
303 return stream(TestName.ICDF,
304 ContinuousDistributionTestData::getIcdfPoints,
305 ContinuousDistributionTestData::getIcdfValues);
306 }
307
308
309
310
311
312
313
314 Stream<Arguments> testInverseSurvivalProbability() {
315 return stream(TestName.ISF,
316 ContinuousDistributionTestData::getIsfPoints,
317 ContinuousDistributionTestData::getIsfValues);
318 }
319
320
321
322
323
324
325
326
327 Stream<Arguments> testCumulativeProbabilityInverseMapping() {
328 return stream(TestName.CDF_MAPPING,
329 ContinuousDistributionTestData::getCdfPoints);
330 }
331
332
333
334
335
336
337
338
339 Stream<Arguments> testSurvivalProbabilityInverseMapping() {
340 return stream(TestName.SF_MAPPING,
341 ContinuousDistributionTestData::getSfPoints);
342 }
343
344
345
346
347
348
349
350
351
352 Stream<Arguments> testCumulativeProbabilityHighPrecisionInverseMapping() {
353 return stream(TestName.CDF_HP_MAPPING,
354 ContinuousDistributionTestData::getCdfHpPoints);
355 }
356
357
358
359
360
361
362
363
364
365 Stream<Arguments> testSurvivalProbabilityHighPrecisionInverseMapping() {
366 return stream(TestName.SF_HP_MAPPING,
367 ContinuousDistributionTestData::getSfHpPoints);
368 }
369
370
371
372
373
374
375
376 Stream<Arguments> testSurvivalAndCumulativeProbabilityComplement() {
377
378
379 return stream(TestName.COMPLEMENT,
380 ContinuousDistributionTestData::getCdfPoints);
381 }
382
383
384
385
386
387
388
389
390 Stream<Arguments> testConsistency() {
391
392
393 return stream(TestName.CONSISTENCY,
394 ContinuousDistributionTestData::getCdfPoints);
395 }
396
397
398
399
400
401
402 Stream<Arguments> testSampling() {
403 return stream(TestName.SAMPLING);
404 }
405
406
407
408
409
410
411
412
413
414
415 Stream<Arguments> testDensityIntegrals() {
416
417 final Function<ContinuousDistributionTestData, DoubleTolerance> tolerance =
418 d -> createAbsOrRelTolerance(INTEGRATOR_ABS_ACCURACY * 10, INTEGRATOR_REL_ACCURACY * 10);
419 final TestName name = TestName.INTEGRALS;
420 return stream(d -> d.isDisabled(name),
421 ContinuousDistributionTestData::getCdfPoints,
422 ContinuousDistributionTestData::getCdfValues,
423 tolerance, name.toString());
424 }
425
426
427
428
429
430
431
432 Stream<Arguments> testSupport() {
433 return streamArguments(TestName.SUPPORT,
434 d -> Arguments.of(namedDistribution(d.getParameters()), d.getLower(), d.getUpper()));
435 }
436
437
438
439
440
441
442
443 Stream<Arguments> testMoments() {
444 final TestName name = TestName.MOMENTS;
445 return streamArguments(name,
446 d -> Arguments.of(namedDistribution(d.getParameters()), d.getMean(), d.getVariance(),
447 createTestTolerance(d, name)));
448 }
449
450
451
452
453
454
455 Stream<Arguments> testMedian() {
456 final TestName name = TestName.MEDIAN;
457 return streamArguments(name,
458 d -> Arguments.of(namedDistribution(d.getParameters()), createTestTolerance(d, name)));
459 }
460
461
462
463
464
465
466
467
468
469
470 @ParameterizedTest
471 @MethodSource
472 final void testDensity(ContinuousDistribution dist,
473 double[] points,
474 double[] values,
475 DoubleTolerance tolerance) {
476 for (int i = 0; i < points.length; i++) {
477 final double x = points[i];
478 TestUtils.assertEquals(values[i],
479 dist.density(x), tolerance,
480 () -> "Incorrect probability density value returned for " + x);
481 }
482 }
483
484
485
486
487 @ParameterizedTest
488 @MethodSource
489 final void testLogDensity(ContinuousDistribution dist,
490 double[] points,
491 double[] values,
492 DoubleTolerance tolerance) {
493 for (int i = 0; i < points.length; i++) {
494 final double x = points[i];
495 TestUtils.assertEquals(values[i],
496 dist.logDensity(x), tolerance,
497 () -> "Incorrect probability density value returned for " + x);
498 }
499 }
500
501
502
503
504 @ParameterizedTest
505 @MethodSource
506 final void testCumulativeProbability(ContinuousDistribution dist,
507 double[] points,
508 double[] values,
509 DoubleTolerance tolerance) {
510
511 for (int i = 0; i < points.length; i++) {
512 final double x = points[i];
513 TestUtils.assertEquals(values[i],
514 dist.cumulativeProbability(x),
515 tolerance,
516 () -> "Incorrect cumulative probability value returned for " + x);
517 }
518 }
519
520
521
522
523 @ParameterizedTest
524 @MethodSource
525 final void testSurvivalProbability(ContinuousDistribution dist,
526 double[] points,
527 double[] values,
528 DoubleTolerance tolerance) {
529 for (int i = 0; i < points.length; i++) {
530 final double x = points[i];
531 TestUtils.assertEquals(
532 values[i],
533 dist.survivalProbability(points[i]),
534 tolerance,
535 () -> "Incorrect survival probability value returned for " + x);
536 }
537 }
538
539
540
541
542
543 @ParameterizedTest
544 @MethodSource
545 final void testCumulativeProbabilityHighPrecision(ContinuousDistribution dist,
546 double[] points,
547 double[] values,
548 DoubleTolerance tolerance) {
549 assertHighPrecision(tolerance, values);
550 testCumulativeProbability(dist, points, values, tolerance);
551 }
552
553
554
555
556
557 @ParameterizedTest
558 @MethodSource
559 final void testSurvivalProbabilityHighPrecision(ContinuousDistribution dist,
560 double[] points,
561 double[] values,
562 DoubleTolerance tolerance) {
563 assertHighPrecision(tolerance, values);
564 testSurvivalProbability(dist, points, values, tolerance);
565 }
566
567
568
569
570
571
572 @ParameterizedTest
573 @MethodSource
574 final void testInverseCumulativeProbability(ContinuousDistribution dist,
575 double[] points,
576 double[] values,
577 DoubleTolerance tolerance) {
578 final double lower = dist.getSupportLowerBound();
579 final double upper = dist.getSupportUpperBound();
580 for (int i = 0; i < points.length; i++) {
581 final double x = values[i];
582 if (x < lower || x > upper) {
583 continue;
584 }
585 final double p = points[i];
586 TestUtils.assertEquals(
587 x,
588 dist.inverseCumulativeProbability(p),
589 tolerance,
590 () -> "Incorrect inverse cumulative probability value returned for " + p);
591 }
592 }
593
594
595
596
597
598
599 @ParameterizedTest
600 @MethodSource
601 final void testInverseSurvivalProbability(ContinuousDistribution dist,
602 double[] points,
603 double[] values,
604 DoubleTolerance tolerance) {
605 final double lower = dist.getSupportLowerBound();
606 final double upper = dist.getSupportUpperBound();
607 for (int i = 0; i < points.length; i++) {
608 final double x = values[i];
609 if (x < lower || x > upper) {
610 continue;
611 }
612 final double p = points[i];
613 TestUtils.assertEquals(
614 x,
615 dist.inverseSurvivalProbability(p),
616 tolerance,
617 () -> "Incorrect inverse survival probability value returned for " + p);
618 }
619 }
620
621
622
623
624
625
626
627
628
629
630
631 @ParameterizedTest
632 @MethodSource
633 final void testCumulativeProbabilityInverseMapping(ContinuousDistribution dist,
634 double[] points,
635 DoubleTolerance tolerance) {
636 final double lower = dist.getSupportLowerBound();
637 final double upper = dist.getSupportUpperBound();
638 for (int i = 0; i < points.length; i++) {
639 final double x = points[i];
640 if (x < lower || x > upper) {
641 continue;
642 }
643 final double p = dist.cumulativeProbability(x);
644 final double x1 = dist.inverseCumulativeProbability(p);
645 final double p1 = dist.cumulativeProbability(x1);
646
647
648 TestUtils.assertEquals(
649 p,
650 p1,
651 tolerance,
652 () -> "Incorrect CDF(inverse CDF(CDF(x))) value returned for " + x);
653 }
654 }
655
656
657
658
659
660
661
662
663
664
665
666 @ParameterizedTest
667 @MethodSource
668 final void testSurvivalProbabilityInverseMapping(ContinuousDistribution dist,
669 double[] points,
670 DoubleTolerance tolerance) {
671 final double lower = dist.getSupportLowerBound();
672 final double upper = dist.getSupportUpperBound();
673 for (int i = 0; i < points.length; i++) {
674 final double x = points[i];
675 if (x < lower || x > upper) {
676 continue;
677 }
678 final double p = dist.survivalProbability(x);
679 final double x1 = dist.inverseSurvivalProbability(p);
680 final double p1 = dist.survivalProbability(x1);
681
682
683 TestUtils.assertEquals(
684 p,
685 p1,
686 tolerance,
687 () -> "Incorrect SF(inverse SF(SF(x))) value returned for " + x);
688 }
689 }
690
691
692
693
694
695
696 @ParameterizedTest
697 @MethodSource
698 final void testCumulativeProbabilityHighPrecisionInverseMapping(
699 ContinuousDistribution dist,
700 double[] points,
701 DoubleTolerance tolerance) {
702 testCumulativeProbabilityInverseMapping(dist, points, tolerance);
703 }
704
705
706
707
708
709
710 @ParameterizedTest
711 @MethodSource
712 final void testSurvivalProbabilityHighPrecisionInverseMapping(
713 ContinuousDistribution dist,
714 double[] points,
715 DoubleTolerance tolerance) {
716 testSurvivalProbabilityInverseMapping(dist, points, tolerance);
717 }
718
719
720
721
722
723 @ParameterizedTest
724 @MethodSource
725 final void testSurvivalAndCumulativeProbabilityComplement(ContinuousDistribution dist,
726 double[] points,
727 DoubleTolerance tolerance) {
728 for (final double x : points) {
729 TestUtils.assertEquals(
730 1.0,
731 dist.survivalProbability(x) + dist.cumulativeProbability(x),
732 tolerance,
733 () -> "survival + cumulative probability were not close to 1.0 for " + x);
734 }
735 }
736
737
738
739
740
741 @ParameterizedTest
742 @MethodSource
743 final void testConsistency(ContinuousDistribution dist,
744 double[] points,
745 DoubleTolerance tolerance) {
746 for (int i = 0; i < points.length; i++) {
747 final double x0 = points[i];
748
749
750 Assertions.assertEquals(
751 0.0,
752 dist.probability(x0, x0),
753 () -> "Non-zero probability(x, x) for " + x0);
754
755 final double cdf0 = dist.cumulativeProbability(x0);
756 final double sf0 = cdf0 >= 0.5 ? dist.survivalProbability(x0) : Double.NaN;
757 for (int j = 0; j < points.length; j++) {
758 final double x1 = points[j];
759
760 if (x0 < x1) {
761
762
763
764 double expected;
765 if (cdf0 >= 0.5) {
766 expected = sf0 - dist.survivalProbability(x1);
767 } else {
768 expected = dist.cumulativeProbability(x1) - cdf0;
769 }
770 TestUtils.assertEquals(
771 expected,
772 dist.probability(x0, x1),
773 tolerance,
774 () -> "Inconsistent probability for (" + x0 + "," + x1 + ")");
775 } else if (x0 > x1) {
776 Assertions.assertThrows(IllegalArgumentException.class,
777 () -> dist.probability(x0, x1),
778 "probability(double, double) should have thrown an exception that first argument is too large");
779 }
780 }
781 }
782 }
783
784
785
786
787
788 @ParameterizedTest
789 @MethodSource(value = "streamDistribution")
790 final void testOutsideSupport(ContinuousDistribution dist) {
791
792 final double lo = dist.getSupportLowerBound();
793 Assertions.assertEquals(0.0, dist.cumulativeProbability(lo), "cdf(lower)");
794 Assertions.assertEquals(lo, dist.inverseCumulativeProbability(-0.0), "icdf(-0.0)");
795 Assertions.assertEquals(lo, dist.inverseCumulativeProbability(0.0), "icdf(0.0)");
796 Assertions.assertEquals(lo, dist.inverseSurvivalProbability(1.0), "isf(1.0)");
797
798 Assertions.assertTrue(lo <= dist.inverseCumulativeProbability(Double.MIN_VALUE), "lo <= icdf(min)");
799 Assertions.assertTrue(lo <= dist.inverseCumulativeProbability(Double.MIN_NORMAL), "lo <= icdf(min_normal)");
800 Assertions.assertTrue(lo <= dist.inverseSurvivalProbability(Math.nextDown(1.0)), "lo <= isf(nextDown(1.0))");
801
802 final double below = Math.nextDown(lo);
803 Assertions.assertEquals(0.0, dist.density(below), "pdf(x < lower)");
804 Assertions.assertEquals(Double.NEGATIVE_INFINITY, dist.logDensity(below), "logpdf(x < lower)");
805 Assertions.assertEquals(0.0, dist.cumulativeProbability(below), "cdf(x < lower)");
806 Assertions.assertEquals(1.0, dist.survivalProbability(below), "sf(x < lower)");
807
808 final double hi = dist.getSupportUpperBound();
809 Assertions.assertTrue(lo <= hi, "lower <= upper");
810 Assertions.assertEquals(1.0, dist.cumulativeProbability(hi), "cdf(upper)");
811 Assertions.assertEquals(0.0, dist.survivalProbability(hi), "sf(upper)");
812 Assertions.assertEquals(hi, dist.inverseCumulativeProbability(1.0), "icdf(1.0)");
813 Assertions.assertEquals(hi, dist.inverseSurvivalProbability(-0.0), "isf(-0.0)");
814 Assertions.assertEquals(hi, dist.inverseSurvivalProbability(0.0), "isf(0.0)");
815
816 Assertions.assertTrue(hi >= dist.inverseCumulativeProbability(Math.nextDown(1.0)), "hi >= icdf(nextDown(1.0))");
817 Assertions.assertTrue(hi >= dist.inverseSurvivalProbability(Double.MIN_VALUE), "lo <= isf(min)");
818 Assertions.assertTrue(hi >= dist.inverseSurvivalProbability(Double.MIN_NORMAL), "lo <= isf(min_normal)");
819
820 final double above = Math.nextUp(hi);
821 Assertions.assertEquals(0.0, dist.density(above), "pdf(x > upper)");
822 Assertions.assertEquals(Double.NEGATIVE_INFINITY, dist.logDensity(above), "logpdf(x > upper)");
823 Assertions.assertEquals(1.0, dist.cumulativeProbability(above), "cdf(x > upper)");
824 Assertions.assertEquals(0.0, dist.survivalProbability(above), "sf(x > upper)");
825 }
826
827
828
829
830
831 @ParameterizedTest
832 @MethodSource(value = "streamDistribution")
833 final void testInvalidProbabilities(ContinuousDistribution dist) {
834 final double lo = dist.getSupportLowerBound();
835 final double hi = dist.getSupportUpperBound();
836 if (lo < hi) {
837 Assertions.assertThrows(DistributionException.class, () -> dist.probability(hi, lo), "x0 > x1");
838 }
839 Assertions.assertThrows(DistributionException.class, () -> dist.inverseCumulativeProbability(-1), "p < 0.0");
840 Assertions.assertThrows(DistributionException.class, () -> dist.inverseCumulativeProbability(2), "p > 1.0");
841 Assertions.assertThrows(DistributionException.class, () -> dist.inverseSurvivalProbability(-1), "q < 0.0");
842 Assertions.assertThrows(DistributionException.class, () -> dist.inverseSurvivalProbability(2), "q > 1.0");
843 }
844
845
846
847
848 @ParameterizedTest
849 @MethodSource
850 final void testSampling(ContinuousDistribution dist) {
851 final double[] quartiles = TestUtils.getDistributionQuartiles(dist);
852
853
854
855 final double[] expected = {
856 dist.cumulativeProbability(quartiles[0]),
857 dist.probability(quartiles[0], quartiles[1]),
858 dist.probability(quartiles[1], quartiles[2]),
859 dist.survivalProbability(quartiles[2]),
860 };
861
862
863
864
865
866 final DoubleTolerance tolerance = DoubleTolerances.relative(1e-3);
867 for (final double p : expected) {
868 TestUtils.assertEquals(0.25, p, tolerance,
869 () -> "Unexpected quartiles: " + Arrays.toString(expected));
870 }
871
872 final int sampleSize = 1000;
873 MathArrays.scaleInPlace(sampleSize, expected);
874
875
876 final ContinuousDistribution.Sampler sampler =
877 dist.createSampler(RandomSource.XO_SHI_RO_256_PP.create(123456789L));
878 final double[] sample = TestUtils.sample(sampleSize, sampler);
879
880 final long[] counts = new long[4];
881 for (int i = 0; i < sampleSize; i++) {
882 TestUtils.updateCounts(sample[i], counts, quartiles);
883 }
884
885 TestUtils.assertChiSquareAccept(expected, counts, 0.001);
886 }
887
888
889
890
891
892
893
894
895
896 @ParameterizedTest
897 @MethodSource
898 final void testDensityIntegrals(ContinuousDistribution dist,
899 double[] points,
900 double[] values,
901 DoubleTolerance tolerance) {
902 final BaseAbstractUnivariateIntegrator integrator =
903 new IterativeLegendreGaussIntegrator(5, INTEGRATOR_REL_ACCURACY, INTEGRATOR_ABS_ACCURACY);
904 final UnivariateFunction d = dist::density;
905 final ArrayList<Double> integrationTestPoints = new ArrayList<>();
906 for (int i = 0; i < points.length; i++) {
907 if (Double.isNaN(values[i]) ||
908 values[i] < 1e-5 ||
909 values[i] > 1 - 1e-5) {
910 continue;
911 }
912 integrationTestPoints.add(points[i]);
913 }
914 Collections.sort(integrationTestPoints);
915 for (int i = 1; i < integrationTestPoints.size(); i++) {
916 final double x0 = integrationTestPoints.get(i - 1);
917 final double x1 = integrationTestPoints.get(i);
918
919
920 if (Math.max(dist.density(x0), dist.density(x1)) > 1e3) {
921 continue;
922 }
923 double integral = 0;
924 try {
925
926 integral = integrator.integrate(1000000,
927 d, x0, x1);
928 } catch (MaxCountExceededException e) {
929 Assertions.fail("Failed density integral: " + x0 + " to " + x1, e);
930 }
931 TestUtils.assertEquals(
932 dist.probability(x0, x1),
933 integral,
934 tolerance,
935 () -> "Invalid density integral: " + x0 + " to " + x1);
936 }
937 }
938
939
940
941
942 @ParameterizedTest
943 @MethodSource
944 final void testSupport(ContinuousDistribution dist, double lower, double upper) {
945 Assertions.assertEquals(lower, dist.getSupportLowerBound(), "lower bound");
946 Assertions.assertEquals(upper, dist.getSupportUpperBound(), "upper bound");
947 }
948
949
950
951
952 @ParameterizedTest
953 @MethodSource
954 final void testMoments(ContinuousDistribution dist, double mean, double variance, DoubleTolerance tolerance) {
955 TestUtils.assertEquals(mean, dist.getMean(), tolerance, "mean");
956 TestUtils.assertEquals(variance, dist.getVariance(), tolerance, "variance");
957 }
958
959
960
961
962
963
964
965
966
967 @ParameterizedTest
968 @MethodSource
969 final void testMedian(ContinuousDistribution dist, DoubleTolerance tolerance) {
970 if (dist instanceof AbstractContinuousDistribution) {
971 final AbstractContinuousDistribution d = (AbstractContinuousDistribution) dist;
972 TestUtils.assertEquals(d.inverseCumulativeProbability(0.5), d.getMedian(), tolerance, "median");
973 assertMethodNotModified(dist.getClass(), Modifier.PUBLIC | Modifier.PROTECTED, "getMedian");
974 }
975 }
976 }