1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17 package org.apache.commons.math4.legacy.distribution;
18
19 import java.io.BufferedReader;
20 import java.io.IOException;
21 import java.io.InputStreamReader;
22 import java.net.URL;
23 import java.util.ArrayList;
24
25 import org.apache.commons.rng.UniformRandomProvider;
26 import org.apache.commons.rng.simple.RandomSource;
27 import org.apache.commons.statistics.distribution.ContinuousDistribution;
28 import org.apache.commons.statistics.distribution.UniformContinuousDistribution;
29 import org.apache.commons.statistics.distribution.NormalDistribution;
30 import org.apache.commons.statistics.distribution.ExponentialDistribution;
31 import org.apache.commons.math4.legacy.TestUtils;
32 import org.apache.commons.math4.legacy.analysis.UnivariateFunction;
33 import org.apache.commons.math4.legacy.analysis.integration.BaseAbstractUnivariateIntegrator;
34 import org.apache.commons.math4.legacy.analysis.integration.IterativeLegendreGaussIntegrator;
35 import org.apache.commons.math4.legacy.exception.NotStrictlyPositiveException;
36 import org.apache.commons.math4.legacy.stat.descriptive.SummaryStatistics;
37 import org.apache.commons.math4.core.jdkmath.JdkMath;
38 import org.junit.Assert;
39 import org.junit.Before;
40 import org.junit.Test;
41
42
43
44
45 public final class EmpiricalDistributionTest extends RealDistributionAbstractTest {
46 private EmpiricalDistribution empiricalDistribution = null;
47 private double[] dataArray = null;
48 private final int n = 10000;
49
50 private final double binMass = 10d / (n + 1);
51
52 private final double firstBinMass = 11d / (n + 1);
53
54 @Override
55 @Before
56 public void setUp() {
57 super.setUp();
58
59 final URL url = getClass().getResource("testData.txt");
60 final ArrayList<Double> list = new ArrayList<>();
61 try {
62 final BufferedReader in = new BufferedReader(new InputStreamReader(url.openStream()));
63 String str = null;
64 while ((str = in.readLine()) != null) {
65 list.add(Double.valueOf(str));
66 }
67 in.close();
68 } catch (IOException ex) {
69 Assert.fail("IOException " + ex);
70 }
71
72 dataArray = new double[list.size()];
73 int i = 0;
74 for (Double data : list) {
75 dataArray[i] = data.doubleValue();
76 i++;
77 }
78
79 empiricalDistribution = EmpiricalDistribution.from(100, dataArray);
80 }
81
82
83 @Test(expected=NotStrictlyPositiveException.class)
84 public void testPrecondition1() {
85 EmpiricalDistribution.from(0, new double[] {1,2,3});
86 }
87
88
89
90
91
92 @Test
93 public void testDoubleLoad() {
94
95
96 Assert.assertEquals(empiricalDistribution.getSampleStats().getN(),
97 1000, 1e-7);
98
99 Assert.assertEquals(empiricalDistribution.getSampleStats().getMean(),
100 5.069831575018909, 1e-7);
101 Assert.assertEquals(empiricalDistribution.getSampleStats().getStandardDeviation(),
102 1.0173699343977738, 1e-7);
103
104 double[] bounds = empiricalDistribution.getGeneratorUpperBounds();
105 Assert.assertEquals(bounds.length, 100);
106 Assert.assertEquals(bounds[99], 1.0, 10e-12);
107 }
108
109
110 @Test
111 public void testMath1531() {
112 final double[] data = new double[] {
113 50.993456376721454, 49.455345691918055, 49.527276095295804, 50.017183448668845, 49.10508147470046,
114 49.813998274118696, 50.87195348756139, 50.419474110037, 50.63614906979689, 49.49694777179407,
115 50.71799078406067, 50.03192853759164, 49.915092423165994, 49.56895392597687, 51.034638001064934,
116 50.681227971275945, 50.43749845081759, 49.86513120270245, 50.21475262482965, 49.99202971042547,
117 50.02382189838519, 49.386888585302884, 49.45585010202781, 49.988009479855435, 49.8136712206123,
118 49.6715197127997, 50.1981278397565, 49.842297508010276, 49.62491227740015, 50.05101916097176,
119 48.834912763303926, 49.806787657848574, 49.478236106374695, 49.56648347371614, 49.95069238081982,
120 49.71845132077346, 50.6097468705947, 49.80724637775541, 49.90448813086025, 49.39641861662603,
121 50.434295712893714, 49.227176959566734, 49.541126466050905, 49.03416593170446, 49.11584328494423,
122 49.61387482435674, 49.92877857995328, 50.70638552955101, 50.60078208448842, 49.39326233277838,
123 49.21488424364095, 49.69503351015096, 50.13733214001718, 50.22084761458942, 51.09804435604931,
124 49.18559131120419, 49.52286371605357, 49.34804374996689, 49.6901827776375, 50.01316351359638,
125 48.7751460520373, 50.12961836291053, 49.9978419772511, 49.885658399408584, 49.673438879979834,
126 49.45565980965606, 50.429747484906564, 49.40129274804164, 50.13034614008073, 49.87685735146651,
127 50.12967905393557, 50.323560376181696, 49.83519233651367, 49.37333369733053, 49.70074301611427,
128 50.11626105774947, 50.28249500380083, 50.543354367136466, 50.05866241335002, 50.39516515672527,
129 49.4838561463057, 50.451757089234796, 50.31370674203726, 49.79063762614284, 50.19652349768548,
130 49.75881420748814, 49.98371855036422, 49.82171344472916, 48.810793204162415, 49.37040569084592,
131 50.050641186203976, 50.48360952263646, 50.86666450358076, 50.463268776129844, 50.137489751888666,
132 50.23823061444118, 49.881460479468004, 50.641174398764356, 49.09314136851421, 48.80877928574451,
133 50.46197084844826, 49.97691704141741, 49.99933997561926, 50.25692254481885, 49.52973451252715,
134 49.81229858420664, 48.996112655915994, 48.740531054814674, 50.026642633066416, 49.98696633604899,
135 49.61307159972952, 50.5115278979726, 50.75245152442404, 50.51807785445929, 49.60929671768147,
136 49.1079533564074, 49.65347196551866, 49.31684818724059, 50.4906368627049, 50.37483603684714
137 };
138
139 EmpiricalDistribution.from(120, data).inverseCumulativeProbability(0.7166666666666669);
140 }
141
142
143
144
145
146
147 @Test
148 public void testNext() {
149 tstGen(empiricalDistribution,
150 0.1);
151 }
152
153
154
155
156 @Test
157 public void testGridTooFine() {
158 tstGen(EmpiricalDistribution.from(1001, dataArray),
159 0.1);
160 }
161
162
163
164
165 @Test
166 public void testGridTooFat() {
167 tstGen(EmpiricalDistribution.from(1, dataArray),
168 5);
169
170 }
171
172
173
174
175 @Test
176 public void testBinIndexOverflow() {
177 double[] x = new double[] {9474.94326071674, 2080107.8865462579};
178 EmpiricalDistribution.from(1000, x);
179 }
180
181 @Test(expected=NullPointerException.class)
182 public void testLoadNullDoubleArray() {
183 EmpiricalDistribution.from(1000, null);
184 }
185
186
187
188
189 @Test
190 public void testGetBinUpperBounds() {
191 double[] testData = {0, 1, 1, 2, 3, 4, 4, 5, 6, 7, 8, 9, 10};
192 EmpiricalDistribution dist = EmpiricalDistribution.from(5, testData);
193 double[] expectedBinUpperBounds = {2, 4, 6, 8, 10};
194 double[] expectedGeneratorUpperBounds = {4d/13d, 7d/13d, 9d/13d, 11d/13d, 1};
195 double tol = 10E-12;
196 TestUtils.assertEquals(expectedBinUpperBounds, dist.getUpperBounds(), tol);
197 TestUtils.assertEquals(expectedGeneratorUpperBounds, dist.getGeneratorUpperBounds(), tol);
198 }
199
200 private void verifySame(EmpiricalDistribution d1,
201 EmpiricalDistribution d2) {
202 Assert.assertEquals(d1.getBinCount(), d2.getBinCount());
203 Assert.assertEquals(d1.getSampleStats(), d2.getSampleStats());
204
205 for (int i = 0; i < d1.getUpperBounds().length; i++) {
206 Assert.assertEquals(d1.getUpperBounds()[i], d2.getUpperBounds()[i], 0);
207 }
208 Assert.assertEquals(d1.getBinStats(), d2.getBinStats());
209 }
210
211 private void tstGen(EmpiricalDistribution dist,
212 double tolerance) {
213 final ContinuousDistribution.Sampler sampler
214 = dist.createSampler(RandomSource.WELL_19937_C.create(1000));
215 final SummaryStatistics stats = new SummaryStatistics();
216 for (int i = 1; i < 1000; i++) {
217 stats.addValue(sampler.sample());
218 }
219 Assert.assertEquals("mean", 5.069831575018909, stats.getMean(),tolerance);
220 Assert.assertEquals("std dev", 1.0173699343977738, stats.getStandardDeviation(),tolerance);
221 }
222
223
224
225 @Override
226 public ContinuousDistribution makeDistribution() {
227
228 final double[] sourceData = new double[n + 1];
229 for (int i = 0; i < n + 1; i++) {
230 sourceData[i] = i;
231 }
232 EmpiricalDistribution dist = EmpiricalDistribution.from(1000, sourceData);
233 return dist;
234 }
235
236 @Override
237 public double[] makeCumulativeTestPoints() {
238 final double[] testPoints = new double[] {9, 10, 15, 1000, 5004, 9999};
239 return testPoints;
240 }
241
242
243 @Override
244 public double[] makeCumulativeTestValues() {
245
246
247
248
249
250 final double[] testPoints = getCumulativeTestPoints();
251 final double[] cumValues = new double[testPoints.length];
252 final EmpiricalDistribution empiricalDistribution = (EmpiricalDistribution) makeDistribution();
253 final double[] binBounds = empiricalDistribution.getUpperBounds();
254 for (int i = 0; i < testPoints.length; i++) {
255 final int bin = findBin(testPoints[i]);
256 final double lower = bin == 0 ? empiricalDistribution.getSupportLowerBound() :
257 binBounds[bin - 1];
258 final double upper = binBounds[bin];
259
260
261 final double bMinus = bin == 0 ? 0 : (bin - 1) * binMass + firstBinMass;
262 final ContinuousDistribution kernel = findKernel(lower, upper);
263 final double withinBinKernelMass = kernel.probability(lower, upper);
264 final double kernelCum = kernel.probability(lower, testPoints[i]);
265 cumValues[i] = bMinus + (bin == 0 ? firstBinMass : binMass) * kernelCum/withinBinKernelMass;
266 }
267 return cumValues;
268 }
269
270 @Override
271 public double[] makeDensityTestValues() {
272 final double[] testPoints = getCumulativeTestPoints();
273 final double[] densityValues = new double[testPoints.length];
274 final EmpiricalDistribution empiricalDistribution = (EmpiricalDistribution) makeDistribution();
275 final double[] binBounds = empiricalDistribution.getUpperBounds();
276 for (int i = 0; i < testPoints.length; i++) {
277 final int bin = findBin(testPoints[i]);
278 final double lower = bin == 0 ? empiricalDistribution.getSupportLowerBound() :
279 binBounds[bin - 1];
280 final double upper = binBounds[bin];
281 final ContinuousDistribution kernel = findKernel(lower, upper);
282 final double withinBinKernelMass = kernel.probability(lower, upper);
283 final double density = kernel.density(testPoints[i]);
284 densityValues[i] = density * (bin == 0 ? firstBinMass : binMass) / withinBinKernelMass;
285 }
286 return densityValues;
287 }
288
289
290
291
292
293
294
295 @Override
296 @Test
297 public void testDensityIntegrals() {
298 final ContinuousDistribution distribution = makeDistribution();
299 final double tol = 1.0e-9;
300 final BaseAbstractUnivariateIntegrator integrator =
301 new IterativeLegendreGaussIntegrator(5, 1.0e-12, 1.0e-10);
302 final UnivariateFunction d = new UnivariateFunction() {
303 @Override
304 public double value(double x) {
305 return distribution.density(x);
306 }
307 };
308 final double[] lower = {0, 5, 1000, 5001, 9995};
309 final double[] upper = {5, 12, 1030, 5010, 10000};
310 for (int i = 1; i < 5; i++) {
311 Assert.assertEquals(
312 distribution.probability(
313 lower[i], upper[i]),
314 integrator.integrate(
315 1000000,
316 d, lower[i], upper[i]), tol);
317 }
318 }
319
320
321
322
323
324 @Test
325 public void testSampleValuesRange() {
326
327
328 final double[] data = new double[100];
329 for (int i = 0; i < 50; i++) {
330 data[i] = 1 / ((double) i + 1);
331 }
332 for (int i = 51; i < 100; i++) {
333 data[i] = 1 - 1 / (100 - (double) i + 2);
334 }
335 EmpiricalDistribution dist = EmpiricalDistribution.from(10, data);
336 ContinuousDistribution.Sampler sampler
337 = dist.createSampler(RandomSource.WELL_19937_C.create(1000));
338 for (int i = 0; i < 1000; i++) {
339 final double dev = sampler.sample();
340 Assert.assertTrue(dev < 1);
341 Assert.assertTrue(dev > 0);
342 }
343 }
344
345
346
347
348 @Test
349 public void testNoBinVariance() {
350 final double[] data = {0, 0, 1, 1};
351 EmpiricalDistribution dist = EmpiricalDistribution.from(2, data);
352 ContinuousDistribution.Sampler sampler
353 = dist.createSampler(RandomSource.WELL_19937_C.create(1000));
354 for (int i = 0; i < 1000; i++) {
355 final double dev = sampler.sample();
356 Assert.assertTrue(dev == 0 || dev == 1);
357 }
358 Assert.assertEquals(0.5, dist.cumulativeProbability(0), Double.MIN_VALUE);
359 Assert.assertEquals(1.0, dist.cumulativeProbability(1), Double.MIN_VALUE);
360 Assert.assertEquals(0.5, dist.cumulativeProbability(0.5), Double.MIN_VALUE);
361 Assert.assertEquals(0.5, dist.cumulativeProbability(0.7), Double.MIN_VALUE);
362 }
363
364
365
366
367 private int findBin(double x) {
368
369 final double nMinus = JdkMath.floor(x / 10);
370 final int bin = (int) JdkMath.round(nMinus);
371
372 return JdkMath.floor(x / 10) == x / 10 ? bin - 1 : bin;
373 }
374
375
376
377
378
379
380
381
382 private ContinuousDistribution findKernel(double lower, double upper) {
383 if (lower < 1) {
384 return NormalDistribution.of(5d, 3.3166247903554);
385 } else {
386 return NormalDistribution.of((upper + lower + 1) / 2d, 3.0276503540974917);
387 }
388 }
389
390 @Test
391 public void testKernelOverrideUniform() {
392 final double[] data = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
393 final EmpiricalDistribution dist =
394 EmpiricalDistribution.from(5, data,
395 s -> UniformContinuousDistribution.of(s.getMin(), s.getMax()));
396 final ContinuousDistribution.Sampler sampler
397 = dist.createSampler(RandomSource.WELL_19937_C.create(1000));
398
399 final double bounds[] = {3d, 6d, 9d, 12d};
400 final double tol = 10E-12;
401 for (int i = 0; i < 20; i++) {
402 final double v = sampler.sample();
403
404 for (int j = 0; j < bounds.length; j++) {
405 Assert.assertFalse(v > bounds[j] + tol && v < bounds[j] + 1 - tol);
406 }
407 }
408 Assert.assertEquals(0.0, dist.cumulativeProbability(1), tol);
409 Assert.assertEquals(0.1, dist.cumulativeProbability(2), tol);
410 Assert.assertEquals(0.6, dist.cumulativeProbability(10), tol);
411 Assert.assertEquals(0.8, dist.cumulativeProbability(12), tol);
412 Assert.assertEquals(0.8, dist.cumulativeProbability(13), tol);
413 Assert.assertEquals(1.0, dist.cumulativeProbability(15), tol);
414
415 Assert.assertEquals(2.0, dist.inverseCumulativeProbability(0.1), tol);
416 Assert.assertEquals(3.0, dist.inverseCumulativeProbability(0.2), tol);
417 Assert.assertEquals(5.0, dist.inverseCumulativeProbability(0.3), tol);
418 Assert.assertEquals(6.0, dist.inverseCumulativeProbability(0.4), tol);
419 Assert.assertEquals(8.0, dist.inverseCumulativeProbability(0.5), tol);
420 Assert.assertEquals(9.0, dist.inverseCumulativeProbability(0.6), tol);
421 }
422
423 @Test
424 public void testMath1431() {
425 final UniformRandomProvider rng = RandomSource.WELL_19937_C.create(1000);
426 final ContinuousDistribution.Sampler exponentialDistributionSampler
427 = ExponentialDistribution.of(0.05).createSampler(rng);
428 final double[] empiricalDataPoints = new double[3000];
429 for (int i = 0; i < empiricalDataPoints.length; i++) {
430 empiricalDataPoints[i] = exponentialDistributionSampler.sample();
431 }
432
433 final EmpiricalDistribution testDistribution = EmpiricalDistribution.from(100, empiricalDataPoints);
434
435 for (int i = 0; i < 1000; i++) {
436 final double point = rng.nextDouble();
437 final double cdf = testDistribution.cumulativeProbability(point);
438 Assert.assertFalse("point: " + point, Double.isNaN(cdf));
439 }
440 }
441
442 @Test
443 public void testMath1462() {
444 final double[] data = {
445 6464.0205, 6449.1328, 6489.4569, 6497.5533, 6251.6487,
446 6252.6513, 6339.7883, 6356.2622, 6222.1251, 6157.3813,
447 6242.4741, 6332.5347, 6468.0633, 6471.2319, 6473.9929,
448 6589.1322, 6511.2191, 6339.4349, 6307.7735, 6288.0915,
449 6354.0572, 6385.8283, 6325.3756, 6433.1699, 6433.6507,
450 6424.6806, 6380.5268, 6407.6705, 6241.2198, 6230.3681,
451 6367.5943, 6358.4817, 6272.8039, 6269.0211, 6312.9027,
452 6349.5926, 6404.0775, 6326.986, 6283.8685, 6309.9021,
453 6336.8554, 6389.1598, 6281.0372, 6304.8852, 6359.2651,
454 6426.519, 6400.3926, 6440.6798, 6292.5812, 6398.4911,
455 6307.0002, 6284.2111, 6271.371, 6368.6377, 6323.3372,
456 6276.2155, 6335.0117, 6319.2466, 6252.9969, 6445.2074,
457 6461.3944, 6384.1345
458 };
459
460 final EmpiricalDistribution ed = EmpiricalDistribution.from(data.length / 10, data);
461
462 double v;
463 double p;
464
465 p = 0.49999;
466 v = ed.inverseCumulativeProbability(p);
467 Assert.assertTrue("p=" + p + " => v=" + v, v < 6344);
468
469 p = 0.5;
470 v = ed.inverseCumulativeProbability(p);
471 Assert.assertTrue("p=" + p + " => v=" + v, v < 7000);
472
473 p = 0.51111;
474 v = ed.inverseCumulativeProbability(p);
475 Assert.assertTrue("p=" + p + " => v=" + v, v < 6350);
476 }
477
478 @Test
479 public void testMath1462InfiniteQuantile() {
480 final double[] data = {
481 18054, 17548, 17350, 17860, 17827, 17653, 18113, 18405, 17746,
482 17647, 18160, 17955, 17705, 17890, 17974, 17857, 13287, 18645,
483 17775, 17730, 17996, 18263, 17861, 17161, 17717, 18134, 18669,
484 18340, 17221, 18292, 18146, 17520, 18207, 17829, 18206, 13301,
485 18257, 17626, 18358, 18340, 18320, 17852, 17804, 17577, 17718,
486 18099, 13395, 17763, 17911, 17978, 12935, 17519, 17550, 18728,
487 18518, 17698, 18739, 18553, 17982, 18113, 17974, 17961, 17645,
488 17867, 17890, 17498, 18718, 18191, 18177, 17923, 18164, 18155,
489 6212, 5961, 711
490 };
491
492 final double p = 0.32;
493 for (int i = 745; i <= 1100; i++) {
494 final EmpiricalDistribution ed = EmpiricalDistribution.from(i, data);
495 final double v = ed.inverseCumulativeProbability(p);
496
497 Assert.assertTrue("p=" + p + " => v=" + v, Double.isFinite(v));
498 }
499 }
500 }