1   
2   
3   
4   
5   
6   
7   
8   
9   
10  
11  
12  
13  
14  
15  
16  
17  package org.apache.commons.math4.legacy.distribution;
18  
19  import java.io.BufferedReader;
20  import java.io.IOException;
21  import java.io.InputStreamReader;
22  import java.net.URL;
23  import java.util.ArrayList;
24  
25  import org.apache.commons.rng.UniformRandomProvider;
26  import org.apache.commons.rng.simple.RandomSource;
27  import org.apache.commons.statistics.distribution.ContinuousDistribution;
28  import org.apache.commons.statistics.distribution.UniformContinuousDistribution;
29  import org.apache.commons.statistics.distribution.NormalDistribution;
30  import org.apache.commons.statistics.distribution.ExponentialDistribution;
31  import org.apache.commons.math4.legacy.TestUtils;
32  import org.apache.commons.math4.legacy.analysis.UnivariateFunction;
33  import org.apache.commons.math4.legacy.analysis.integration.BaseAbstractUnivariateIntegrator;
34  import org.apache.commons.math4.legacy.analysis.integration.IterativeLegendreGaussIntegrator;
35  import org.apache.commons.math4.legacy.exception.NotStrictlyPositiveException;
36  import org.apache.commons.math4.legacy.stat.descriptive.SummaryStatistics;
37  import org.apache.commons.math4.core.jdkmath.JdkMath;
38  import org.junit.Assert;
39  import org.junit.Before;
40  import org.junit.Test;
41  
42  
43  
44  
45  public final class EmpiricalDistributionTest extends RealDistributionAbstractTest {
46      private EmpiricalDistribution empiricalDistribution = null;
47      private double[] dataArray = null;
48      private final int n = 10000;
49      
50      private final double binMass = 10d / (n + 1);
51      
52      private final double firstBinMass = 11d / (n + 1);
53  
54      @Override
55      @Before
56      public void setUp() {
57          super.setUp();
58  
59          final URL url = getClass().getResource("testData.txt");
60          final ArrayList<Double> list = new ArrayList<>();
61          try {
62              final BufferedReader in = new BufferedReader(new InputStreamReader(url.openStream()));
63              String str = null;
64              while ((str = in.readLine()) != null) {
65                  list.add(Double.valueOf(str));
66              }
67              in.close();
68          } catch (IOException ex) {
69              Assert.fail("IOException " + ex);
70          }
71  
72          dataArray = new double[list.size()];
73          int i = 0;
74          for (Double data : list) {
75              dataArray[i] = data.doubleValue();
76              i++;
77          }
78  
79          empiricalDistribution = EmpiricalDistribution.from(100, dataArray);
80      }
81  
82      
83      @Test(expected=NotStrictlyPositiveException.class)
84      public void testPrecondition1() {
85          EmpiricalDistribution.from(0, new double[] {1,2,3});
86      }
87  
88      
89  
90  
91  
92      @Test
93      public void testDoubleLoad() {
94          
95          
96          Assert.assertEquals(empiricalDistribution.getSampleStats().getN(),
97                              1000, 1e-7);
98          
99          Assert.assertEquals(empiricalDistribution.getSampleStats().getMean(),
100                             5.069831575018909, 1e-7);
101         Assert.assertEquals(empiricalDistribution.getSampleStats().getStandardDeviation(),
102                             1.0173699343977738, 1e-7);
103 
104         double[] bounds = empiricalDistribution.getGeneratorUpperBounds();
105         Assert.assertEquals(bounds.length, 100);
106         Assert.assertEquals(bounds[99], 1.0, 10e-12);
107     }
108 
109     
110     @Test
111     public void testMath1531() {
112         final double[] data = new double[] {
113             50.993456376721454, 49.455345691918055, 49.527276095295804, 50.017183448668845, 49.10508147470046,
114             49.813998274118696, 50.87195348756139, 50.419474110037, 50.63614906979689, 49.49694777179407,
115             50.71799078406067, 50.03192853759164, 49.915092423165994, 49.56895392597687, 51.034638001064934,
116             50.681227971275945, 50.43749845081759, 49.86513120270245, 50.21475262482965, 49.99202971042547,
117             50.02382189838519, 49.386888585302884, 49.45585010202781, 49.988009479855435, 49.8136712206123,
118             49.6715197127997, 50.1981278397565, 49.842297508010276, 49.62491227740015, 50.05101916097176,
119             48.834912763303926, 49.806787657848574, 49.478236106374695, 49.56648347371614, 49.95069238081982,
120             49.71845132077346, 50.6097468705947, 49.80724637775541, 49.90448813086025, 49.39641861662603,
121             50.434295712893714, 49.227176959566734, 49.541126466050905, 49.03416593170446, 49.11584328494423,
122             49.61387482435674, 49.92877857995328, 50.70638552955101, 50.60078208448842, 49.39326233277838,
123             49.21488424364095, 49.69503351015096, 50.13733214001718, 50.22084761458942, 51.09804435604931,
124             49.18559131120419, 49.52286371605357, 49.34804374996689, 49.6901827776375, 50.01316351359638,
125             48.7751460520373, 50.12961836291053, 49.9978419772511, 49.885658399408584, 49.673438879979834,
126             49.45565980965606, 50.429747484906564, 49.40129274804164, 50.13034614008073, 49.87685735146651,
127             50.12967905393557, 50.323560376181696, 49.83519233651367, 49.37333369733053, 49.70074301611427,
128             50.11626105774947, 50.28249500380083, 50.543354367136466, 50.05866241335002, 50.39516515672527,
129             49.4838561463057, 50.451757089234796, 50.31370674203726, 49.79063762614284, 50.19652349768548,
130             49.75881420748814, 49.98371855036422, 49.82171344472916, 48.810793204162415, 49.37040569084592,
131             50.050641186203976, 50.48360952263646, 50.86666450358076, 50.463268776129844, 50.137489751888666,
132             50.23823061444118, 49.881460479468004, 50.641174398764356, 49.09314136851421, 48.80877928574451,
133             50.46197084844826, 49.97691704141741, 49.99933997561926, 50.25692254481885, 49.52973451252715,
134             49.81229858420664, 48.996112655915994, 48.740531054814674, 50.026642633066416, 49.98696633604899,
135             49.61307159972952, 50.5115278979726, 50.75245152442404, 50.51807785445929, 49.60929671768147,
136             49.1079533564074, 49.65347196551866, 49.31684818724059, 50.4906368627049, 50.37483603684714
137         };
138 
139         EmpiricalDistribution.from(120, data).inverseCumulativeProbability(0.7166666666666669);
140     }
141 
142     
143 
144 
145 
146 
147     @Test
148     public void testNext() {
149         tstGen(empiricalDistribution,
150                0.1);
151     }
152 
153     
154 
155 
156     @Test
157     public void testGridTooFine() {
158         tstGen(EmpiricalDistribution.from(1001, dataArray),
159                0.1);
160     }
161 
162     
163 
164 
165     @Test
166     public void testGridTooFat() {
167         tstGen(EmpiricalDistribution.from(1, dataArray),
168                5); 
169                    
170     }
171 
172     
173 
174 
175     @Test
176     public void testBinIndexOverflow() {
177         double[] x = new double[] {9474.94326071674, 2080107.8865462579};
178         EmpiricalDistribution.from(1000, x);
179     }
180 
181     @Test(expected=NullPointerException.class)
182     public void testLoadNullDoubleArray() {
183         EmpiricalDistribution.from(1000, null);
184     }
185 
186     
187 
188 
189     @Test
190     public void testGetBinUpperBounds() {
191         double[] testData = {0, 1, 1, 2, 3, 4, 4, 5, 6, 7, 8, 9, 10};
192         EmpiricalDistribution dist = EmpiricalDistribution.from(5, testData);
193         double[] expectedBinUpperBounds = {2, 4, 6, 8, 10};
194         double[] expectedGeneratorUpperBounds = {4d/13d, 7d/13d, 9d/13d, 11d/13d, 1};
195         double tol = 10E-12;
196         TestUtils.assertEquals(expectedBinUpperBounds, dist.getUpperBounds(), tol);
197         TestUtils.assertEquals(expectedGeneratorUpperBounds, dist.getGeneratorUpperBounds(), tol);
198     }
199 
200     private void verifySame(EmpiricalDistribution d1,
201                             EmpiricalDistribution d2) {
202         Assert.assertEquals(d1.getBinCount(), d2.getBinCount());
203         Assert.assertEquals(d1.getSampleStats(), d2.getSampleStats());
204 
205         for (int i = 0;  i < d1.getUpperBounds().length; i++) {
206             Assert.assertEquals(d1.getUpperBounds()[i], d2.getUpperBounds()[i], 0);
207         }
208         Assert.assertEquals(d1.getBinStats(), d2.getBinStats());
209     }
210 
211     private void tstGen(EmpiricalDistribution dist,
212                         double tolerance) {
213         final ContinuousDistribution.Sampler sampler
214             = dist.createSampler(RandomSource.WELL_19937_C.create(1000));
215         final SummaryStatistics stats = new SummaryStatistics();
216         for (int i = 1; i < 1000; i++) {
217             stats.addValue(sampler.sample());
218         }
219         Assert.assertEquals("mean", 5.069831575018909, stats.getMean(),tolerance);
220         Assert.assertEquals("std dev", 1.0173699343977738, stats.getStandardDeviation(),tolerance);
221     }
222 
223     
224 
225     @Override
226     public ContinuousDistribution makeDistribution() {
227         
228         final double[] sourceData = new double[n + 1];
229         for (int i = 0; i < n + 1; i++) {
230             sourceData[i] = i;
231         }
232         EmpiricalDistribution dist = EmpiricalDistribution.from(1000, sourceData);
233         return dist;
234     }
235 
236     @Override
237     public double[] makeCumulativeTestPoints() {
238        final double[] testPoints = new double[] {9, 10, 15, 1000, 5004, 9999};
239        return testPoints;
240     }
241 
242 
243     @Override
244     public double[] makeCumulativeTestValues() {
245         
246 
247 
248 
249 
250         final double[] testPoints = getCumulativeTestPoints();
251         final double[] cumValues = new double[testPoints.length];
252         final EmpiricalDistribution empiricalDistribution = (EmpiricalDistribution) makeDistribution();
253         final double[] binBounds = empiricalDistribution.getUpperBounds();
254         for (int i = 0; i < testPoints.length; i++) {
255             final int bin = findBin(testPoints[i]);
256             final double lower = bin == 0 ? empiricalDistribution.getSupportLowerBound() :
257                 binBounds[bin - 1];
258             final double upper = binBounds[bin];
259             
260             
261             final double bMinus = bin == 0 ? 0 : (bin - 1) * binMass + firstBinMass;
262             final ContinuousDistribution kernel = findKernel(lower, upper);
263             final double withinBinKernelMass = kernel.probability(lower, upper);
264             final double kernelCum = kernel.probability(lower, testPoints[i]);
265             cumValues[i] = bMinus + (bin == 0 ? firstBinMass : binMass) * kernelCum/withinBinKernelMass;
266         }
267         return cumValues;
268     }
269 
270     @Override
271     public double[] makeDensityTestValues() {
272         final double[] testPoints = getCumulativeTestPoints();
273         final double[] densityValues = new double[testPoints.length];
274         final EmpiricalDistribution empiricalDistribution = (EmpiricalDistribution) makeDistribution();
275         final double[] binBounds = empiricalDistribution.getUpperBounds();
276         for (int i = 0; i < testPoints.length; i++) {
277             final int bin = findBin(testPoints[i]);
278             final double lower = bin == 0 ? empiricalDistribution.getSupportLowerBound() :
279                 binBounds[bin - 1];
280             final double upper = binBounds[bin];
281             final ContinuousDistribution kernel = findKernel(lower, upper);
282             final double withinBinKernelMass = kernel.probability(lower, upper);
283             final double density = kernel.density(testPoints[i]);
284             densityValues[i] = density * (bin == 0 ? firstBinMass : binMass) / withinBinKernelMass;
285         }
286         return densityValues;
287     }
288 
289     
290 
291 
292 
293 
294 
295     @Override
296     @Test
297     public void testDensityIntegrals() {
298         final ContinuousDistribution distribution = makeDistribution();
299         final double tol = 1.0e-9;
300         final BaseAbstractUnivariateIntegrator integrator =
301             new IterativeLegendreGaussIntegrator(5, 1.0e-12, 1.0e-10);
302         final UnivariateFunction d = new UnivariateFunction() {
303             @Override
304             public double value(double x) {
305                 return distribution.density(x);
306             }
307         };
308         final double[] lower = {0, 5, 1000, 5001, 9995};
309         final double[] upper = {5, 12, 1030, 5010, 10000};
310         for (int i = 1; i < 5; i++) {
311             Assert.assertEquals(
312                     distribution.probability(
313                             lower[i], upper[i]),
314                             integrator.integrate(
315                                     1000000, 
316                                     d, lower[i], upper[i]), tol);
317         }
318     }
319 
320     
321 
322 
323 
324     @Test
325     public void testSampleValuesRange() {
326         
327         
328         final double[] data = new double[100];
329         for (int i = 0; i < 50; i++) {
330             data[i] = 1 / ((double) i + 1);
331         }
332         for (int i = 51; i < 100; i++) {
333             data[i] = 1 - 1 / (100 - (double) i + 2);
334         }
335         EmpiricalDistribution dist = EmpiricalDistribution.from(10, data);
336         ContinuousDistribution.Sampler sampler
337             = dist.createSampler(RandomSource.WELL_19937_C.create(1000));
338         for (int i = 0; i < 1000; i++) {
339             final double dev = sampler.sample();
340             Assert.assertTrue(dev < 1);
341             Assert.assertTrue(dev > 0);
342         }
343     }
344 
345     
346 
347 
348     @Test
349     public void testNoBinVariance() {
350         final double[] data = {0, 0, 1, 1};
351         EmpiricalDistribution dist = EmpiricalDistribution.from(2, data);
352         ContinuousDistribution.Sampler sampler
353             = dist.createSampler(RandomSource.WELL_19937_C.create(1000));
354         for (int i = 0; i < 1000; i++) {
355             final double dev = sampler.sample();
356             Assert.assertTrue(dev == 0 || dev == 1);
357         }
358         Assert.assertEquals(0.5, dist.cumulativeProbability(0), Double.MIN_VALUE);
359         Assert.assertEquals(1.0, dist.cumulativeProbability(1), Double.MIN_VALUE);
360         Assert.assertEquals(0.5, dist.cumulativeProbability(0.5), Double.MIN_VALUE);
361         Assert.assertEquals(0.5, dist.cumulativeProbability(0.7), Double.MIN_VALUE);
362     }
363 
364     
365 
366 
367     private int findBin(double x) {
368         
369         final double nMinus = JdkMath.floor(x / 10);
370         final int bin =  (int) JdkMath.round(nMinus);
371         
372         return JdkMath.floor(x / 10) == x / 10 ? bin - 1 : bin;
373     }
374 
375     
376 
377 
378 
379 
380 
381 
382     private ContinuousDistribution findKernel(double lower, double upper) {
383         if (lower < 1) {
384             return NormalDistribution.of(5d, 3.3166247903554);
385         } else {
386             return NormalDistribution.of((upper + lower + 1) / 2d, 3.0276503540974917);
387         }
388     }
389 
390     @Test
391     public void testKernelOverrideUniform() {
392         final double[] data = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
393         final EmpiricalDistribution dist =
394             EmpiricalDistribution.from(5, data,
395                                        s -> UniformContinuousDistribution.of(s.getMin(), s.getMax()));
396         final ContinuousDistribution.Sampler sampler
397             = dist.createSampler(RandomSource.WELL_19937_C.create(1000));
398         
399         final double bounds[] = {3d, 6d, 9d, 12d};
400         final double tol = 10E-12;
401         for (int i = 0; i < 20; i++) {
402             final double v = sampler.sample();
403             
404             for (int j = 0; j < bounds.length; j++) {
405                 Assert.assertFalse(v > bounds[j] + tol && v < bounds[j] + 1 - tol);
406             }
407         }
408         Assert.assertEquals(0.0, dist.cumulativeProbability(1), tol);
409         Assert.assertEquals(0.1, dist.cumulativeProbability(2), tol);
410         Assert.assertEquals(0.6, dist.cumulativeProbability(10), tol);
411         Assert.assertEquals(0.8, dist.cumulativeProbability(12), tol);
412         Assert.assertEquals(0.8, dist.cumulativeProbability(13), tol);
413         Assert.assertEquals(1.0, dist.cumulativeProbability(15), tol);
414 
415         Assert.assertEquals(2.0, dist.inverseCumulativeProbability(0.1), tol);
416         Assert.assertEquals(3.0, dist.inverseCumulativeProbability(0.2), tol);
417         Assert.assertEquals(5.0, dist.inverseCumulativeProbability(0.3), tol);
418         Assert.assertEquals(6.0, dist.inverseCumulativeProbability(0.4), tol);
419         Assert.assertEquals(8.0, dist.inverseCumulativeProbability(0.5), tol);
420         Assert.assertEquals(9.0, dist.inverseCumulativeProbability(0.6), tol);
421     }
422 
423     @Test
424     public void testMath1431() {
425         final UniformRandomProvider rng = RandomSource.WELL_19937_C.create(1000);
426         final ContinuousDistribution.Sampler exponentialDistributionSampler
427             = ExponentialDistribution.of(0.05).createSampler(rng);
428         final double[] empiricalDataPoints = new double[3000];
429         for (int i = 0; i < empiricalDataPoints.length; i++) {
430             empiricalDataPoints[i] = exponentialDistributionSampler.sample();
431         }
432 
433         final EmpiricalDistribution testDistribution = EmpiricalDistribution.from(100, empiricalDataPoints);
434 
435         for (int i = 0; i < 1000; i++) {
436             final double point = rng.nextDouble();
437             final double cdf = testDistribution.cumulativeProbability(point);
438             Assert.assertFalse("point: " + point, Double.isNaN(cdf));
439         }
440     }
441 
442     @Test
443     public void testMath1462() {
444         final double[] data = {
445             6464.0205, 6449.1328, 6489.4569, 6497.5533, 6251.6487,
446             6252.6513, 6339.7883, 6356.2622, 6222.1251, 6157.3813,
447             6242.4741, 6332.5347, 6468.0633, 6471.2319, 6473.9929,
448             6589.1322, 6511.2191, 6339.4349, 6307.7735, 6288.0915,
449             6354.0572, 6385.8283, 6325.3756, 6433.1699, 6433.6507,
450             6424.6806, 6380.5268, 6407.6705, 6241.2198, 6230.3681,
451             6367.5943, 6358.4817, 6272.8039, 6269.0211, 6312.9027,
452             6349.5926, 6404.0775, 6326.986, 6283.8685, 6309.9021,
453             6336.8554, 6389.1598, 6281.0372, 6304.8852, 6359.2651,
454             6426.519, 6400.3926, 6440.6798, 6292.5812, 6398.4911,
455             6307.0002, 6284.2111, 6271.371, 6368.6377, 6323.3372,
456             6276.2155, 6335.0117, 6319.2466, 6252.9969, 6445.2074,
457             6461.3944, 6384.1345
458         };
459 
460         final EmpiricalDistribution ed = EmpiricalDistribution.from(data.length / 10, data);
461 
462         double v;
463         double p;
464 
465         p = 0.49999;
466         v = ed.inverseCumulativeProbability(p);
467         Assert.assertTrue("p=" + p + " => v=" + v, v < 6344);
468 
469         p = 0.5;
470         v = ed.inverseCumulativeProbability(p);
471         Assert.assertTrue("p=" + p + " => v=" + v, v < 7000);
472 
473         p = 0.51111;
474         v = ed.inverseCumulativeProbability(p);
475         Assert.assertTrue("p=" + p + " => v=" + v, v < 6350);
476     }
477 
478     @Test
479     public void testMath1462InfiniteQuantile() {
480         final double[] data = {
481             18054, 17548, 17350, 17860, 17827, 17653, 18113, 18405, 17746,
482             17647, 18160, 17955, 17705, 17890, 17974, 17857, 13287, 18645,
483             17775, 17730, 17996, 18263, 17861, 17161, 17717, 18134, 18669,
484             18340, 17221, 18292, 18146, 17520, 18207, 17829, 18206, 13301,
485             18257, 17626, 18358, 18340, 18320, 17852, 17804, 17577, 17718,
486             18099, 13395, 17763, 17911, 17978, 12935, 17519, 17550, 18728,
487             18518, 17698, 18739, 18553, 17982, 18113, 17974, 17961, 17645,
488             17867, 17890, 17498, 18718, 18191, 18177, 17923, 18164, 18155,
489             6212, 5961, 711
490         };
491 
492         final double p = 0.32;
493         for (int i = 745; i <= 1100; i++) {
494             final EmpiricalDistribution ed = EmpiricalDistribution.from(i, data);
495             final double v = ed.inverseCumulativeProbability(p);
496 
497             Assert.assertTrue("p=" + p + " => v=" + v, Double.isFinite(v));
498         }
499     }
500 }