1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18 package org.apache.commons.rng.examples.jmh.sampling.distribution;
19
20 import org.apache.commons.rng.UniformRandomProvider;
21 import org.apache.commons.rng.sampling.distribution.DiscreteSampler;
22 import org.apache.commons.rng.sampling.distribution.KempSmallMeanPoissonSampler;
23 import org.apache.commons.rng.sampling.distribution.LargeMeanPoissonSampler;
24 import org.apache.commons.rng.sampling.distribution.SmallMeanPoissonSampler;
25 import org.apache.commons.rng.simple.RandomSource;
26
27 import org.openjdk.jmh.annotations.Benchmark;
28 import org.openjdk.jmh.annotations.BenchmarkMode;
29 import org.openjdk.jmh.annotations.Fork;
30 import org.openjdk.jmh.annotations.Measurement;
31 import org.openjdk.jmh.annotations.Mode;
32 import org.openjdk.jmh.annotations.OutputTimeUnit;
33 import org.openjdk.jmh.annotations.Param;
34 import org.openjdk.jmh.annotations.Scope;
35 import org.openjdk.jmh.annotations.Setup;
36 import org.openjdk.jmh.annotations.State;
37 import org.openjdk.jmh.annotations.Warmup;
38
39 import java.util.concurrent.TimeUnit;
40 import java.util.function.Supplier;
41
42 /**
43 * Executes benchmark to compare the speed of generation of Poisson distributed random numbers.
44 */
45 @BenchmarkMode(Mode.AverageTime)
46 @OutputTimeUnit(TimeUnit.NANOSECONDS)
47 @Warmup(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS)
48 @Measurement(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS)
49 @State(Scope.Benchmark)
50 @Fork(value = 1, jvmArgs = {"-server", "-Xms128M", "-Xmx128M"})
51 public class PoissonSamplersPerformance {
52 /**
53 * The value for the baseline generation of an {@code int} value.
54 *
55 * <p>This must NOT be final!</p>
56 */
57 private int value;
58
59 /**
60 * The mean for the call to {@link Math#exp(double)}.
61 */
62 @State(Scope.Benchmark)
63 public static class Means {
64 /**
65 * The Poisson mean. This is set at a level where the small mean sampler is to be used
66 * in preference to the large mean sampler.
67 */
68 @Param({"0.25",
69 "0.5",
70 "1",
71 "2",
72 "4",
73 "8",
74 "16",
75 "32"})
76 private double mean;
77
78 /**
79 * Gets the mean.
80 *
81 * @return the mean
82 */
83 public double getMean() {
84 return mean;
85 }
86 }
87
88 /**
89 * The {@link DiscreteSampler} samplers to use for testing. Creates the sampler for each
90 * {@link RandomSource} in the default
91 * {@link org.apache.commons.rng.examples.jmh.RandomSources RandomSources}.
92 */
93 @State(Scope.Benchmark)
94 public static class Sources {
95 /**
96 * RNG providers.
97 *
98 * <p>Use different speeds.</p>
99 *
100 * @see <a href="https://commons.apache.org/proper/commons-rng/userguide/rng.html">
101 * Commons RNG user guide</a>
102 */
103 @Param({"WELL_44497_B",
104 //"ISAAC",
105 "XO_RO_SHI_RO_128_PLUS"})
106 private String randomSourceName;
107
108 /**
109 * The sampler type.
110 */
111 @Param({"SmallMeanPoissonSampler",
112 "KempSmallMeanPoissonSampler",
113 "BoundedKempSmallMeanPoissonSampler",
114 "KempSmallMeanPoissonSamplerP50",
115 "KempSmallMeanPoissonSamplerBinarySearch",
116 "KempSmallMeanPoissonSamplerGuideTable",
117 "LargeMeanPoissonSampler",
118 "TinyMeanPoissonSampler"})
119 private String samplerType;
120
121 /**
122 * The Poisson mean. This is set at a level where the small mean sampler is to be used
123 * in preference to the large mean sampler.
124 */
125 @Param({"0.25",
126 "0.5",
127 "1",
128 "2",
129 "4",
130 "8",
131 "16",
132 "32",
133 "64"})
134 private double mean;
135
136 /** RNG. */
137 private UniformRandomProvider generator;
138
139 /** The factory. */
140 private Supplier<DiscreteSampler> factory;
141
142 /** The sampler. */
143 private DiscreteSampler sampler;
144
145 /**
146 * @return The RNG.
147 */
148 public UniformRandomProvider getGenerator() {
149 return generator;
150 }
151
152 /**
153 * Gets the sampler.
154 *
155 * @return The sampler.
156 */
157 public DiscreteSampler getSampler() {
158 return sampler;
159 }
160
161 /** Instantiates sampler. */
162 @Setup
163 public void setup() {
164 final RandomSource randomSource = RandomSource.valueOf(randomSourceName);
165 generator = randomSource.create();
166 if ("SmallMeanPoissonSampler".equals(samplerType)) {
167 factory = () -> SmallMeanPoissonSampler.of(generator, mean);
168 } else if ("KempSmallMeanPoissonSampler".equals(samplerType)) {
169 factory = () -> KempSmallMeanPoissonSampler.of(generator, mean);
170 } else if ("BoundedKempSmallMeanPoissonSampler".equals(samplerType)) {
171 factory = () -> new BoundedKempSmallMeanPoissonSampler(generator, mean);
172 } else if ("KempSmallMeanPoissonSamplerP50".equals(samplerType)) {
173 factory = () -> new KempSmallMeanPoissonSamplerP50(generator, mean);
174 } else if ("KempSmallMeanPoissonSamplerBinarySearch".equals(samplerType)) {
175 factory = () -> new KempSmallMeanPoissonSamplerBinarySearch(generator, mean);
176 } else if ("KempSmallMeanPoissonSamplerGuideTable".equals(samplerType)) {
177 factory = () -> new KempSmallMeanPoissonSamplerGuideTable(generator, mean);
178 } else if ("LargeMeanPoissonSampler".equals(samplerType)) {
179 factory = () -> LargeMeanPoissonSampler.of(generator, mean);
180 } else if ("TinyMeanPoissonSampler".equals(samplerType)) {
181 factory = () -> new TinyMeanPoissonSampler(generator, mean);
182 }
183 sampler = factory.get();
184 }
185
186 /**
187 * Creates a new instance of the sampler.
188 *
189 * @return The sampler.
190 */
191 public DiscreteSampler createSampler() {
192 return factory.get();
193 }
194 }
195
196 /**
197 * Kemp sampler for the <a href="http://mathworld.wolfram.com/PoissonDistribution.html">Poisson
198 * distribution</a>.
199 *
200 * <ul>
201 * <li>
202 * For small means, a Poisson process is simulated using uniform deviates, as
203 * described in Kemp, A, W, (1981) Efficient Generation of Logarithmically Distributed
204 * Pseudo-Random Variables. Journal of the Royal Statistical Society. Vol. 30, No. 3, pp. 249-253.
205 * </li>
206 * </ul>
207 *
208 * <p>Note: This is similar to {@link KempSmallMeanPoissonSampler} but the sample is
209 * bounded by 1000 * mean.</p>
210 *
211 * @see <a href="https://www.jstor.org/stable/2346348">Kemp, A.W. (1981) JRSS Vol. 30, pp. 249-253</a>
212 */
213 static class BoundedKempSmallMeanPoissonSampler
214 implements DiscreteSampler {
215 /** Underlying source of randomness. */
216 private final UniformRandomProvider rng;
217 /**
218 * Pre-compute {@code Math.exp(-mean)}.
219 * Note: This is the probability of the Poisson sample {@code p(x=0)}.
220 */
221 private final double p0;
222 /** Pre-compute {@code 1000 * mean} as the upper limit of the sample. */
223 private final int limit;
224 /**
225 * The mean of the Poisson sample.
226 */
227 private final double mean;
228
229 /**
230 * @param rng Generator of uniformly distributed random numbers.
231 * @param mean Mean.
232 * @throws IllegalArgumentException if {@code mean <= 0} or {@code mean > 700}.
233 */
234 BoundedKempSmallMeanPoissonSampler(UniformRandomProvider rng,
235 double mean) {
236 if (mean <= 0) {
237 throw new IllegalArgumentException();
238 }
239
240 p0 = Math.exp(-mean);
241 if (p0 == 0) {
242 throw new IllegalArgumentException();
243 }
244 // The returned sample is bounded by 1000 * mean
245 limit = (int) Math.ceil(1000 * mean);
246 this.rng = rng;
247 this.mean = mean;
248 }
249
250 /** {@inheritDoc} */
251 @Override
252 public int sample() {
253 // Note on the algorithm:
254 // - X is the unknown sample deviate (the output of the algorithm)
255 // - x is the current value from the distribution
256 // - p is the probability of the current value x, p(X=x)
257 // - u is effectively the cumulative probability that the sample X
258 // is equal or above the current value x, p(X>=x)
259 // So if p(X>=x) > p(X=x) the sample must be above x, otherwise it is x
260 double u = rng.nextDouble();
261 int x = 0;
262 double p = p0;
263 while (u > p) {
264 u -= p;
265 // Compute the next probability using a recurrence relation.
266 // p(x+1) = p(x) * mean / (x+1)
267 p *= mean / ++x;
268 // The algorithm listed in Kemp (1981) does not check that the rolling probability
269 // is positive. This check is added to ensure a simple bounds in the event that
270 // p == 0
271 if (x == limit) {
272 return x;
273 }
274 }
275 return x;
276 }
277 }
278
279 /**
280 * Kemp sampler for the Poisson distribution.
281 *
282 * <p>Note: This is a modification of the original algorithm by Kemp. It implements a hedge
283 * on the cumulative probability set at 50%. This saves computation in half of the samples.</p>
284 */
285 static class KempSmallMeanPoissonSamplerP50
286 implements DiscreteSampler {
287 /** The value of p=0.5. */
288 private static final double ONE_HALF = 0.5;
289 /**
290 * The threshold that defines the cumulative probability for the long tail of the
291 * Poisson distribution. Above this threshold the recurrence relation that computes the
292 * next probability must check that the p-value is not zero.
293 */
294 private static final double LONG_TAIL_THRESHOLD = 0.999;
295
296 /** Underlying source of randomness. */
297 private final UniformRandomProvider rng;
298 /**
299 * Pre-compute {@code Math.exp(-mean)}.
300 * Note: This is the probability of the Poisson sample {@code p(x=0)}.
301 */
302 private final double p0;
303 /**
304 * The mean of the Poisson sample.
305 */
306 private final double mean;
307 /**
308 * Pre-compute the cumulative probability for all samples up to and including x.
309 * This is F(x) = sum of {@code p(X<=x)}.
310 *
311 * <p>The value is computed at approximately 50% allowing the algorithm to choose to start
312 * at value (x+1) approximately half of the time.
313 */
314 private final double fx;
315 /**
316 * Store the value (x+1) corresponding to the next value after the cumulative probability is
317 * above 50%.
318 */
319 private final int x1;
320 /**
321 * Store the probability value p(x+1), allowing the algorithm to start from the point x+1.
322 */
323 private final double px1;
324
325 /**
326 * Create a new instance.
327 *
328 * <p>This is valid for means as large as approximately {@code 744}.</p>
329 *
330 * @param rng Generator of uniformly distributed random numbers.
331 * @param mean Mean.
332 * @throws IllegalArgumentException if {@code mean <= 0} or {@code Math.exp(-mean) == 0}.
333 */
334 KempSmallMeanPoissonSamplerP50(UniformRandomProvider rng,
335 double mean) {
336 if (mean <= 0) {
337 throw new IllegalArgumentException();
338 }
339
340 this.rng = rng;
341 p0 = Math.exp(-mean);
342 this.mean = mean;
343
344 // Pre-compute a hedge value for the cumulative probability at approximately 50%.
345 // This is only done when p0 is less than the long tail threshold.
346 // The result is that the rolling probability computation should never hit the
347 // long tail where p reaches zero.
348 if (p0 <= LONG_TAIL_THRESHOLD) {
349 // Check the edge case for no probability
350 if (p0 == 0) {
351 throw new IllegalArgumentException();
352 }
353
354 double p = p0;
355 int x = 0;
356 // Sum is cumulative probability F(x) = sum p(X<=x)
357 double sum = p;
358 while (sum < ONE_HALF) {
359 // Compute the next probability using a recurrence relation.
360 // p(x+1) = p(x) * mean / (x+1)
361 p *= mean / ++x;
362 sum += p;
363 }
364 fx = sum;
365 x1 = x + 1;
366 px1 = p * mean / x1;
367 } else {
368 // Always start at zero.
369 // Note: If NaN is input as the mean this path is executed and the sample is always zero.
370 fx = 0;
371 x1 = 0;
372 px1 = p0;
373 }
374 }
375
376 /** {@inheritDoc} */
377 @Override
378 public int sample() {
379 // Note on the algorithm:
380 // - X is the unknown sample deviate (the output of the algorithm)
381 // - x is the current value from the distribution
382 // - p is the probability of the current value x, p(X=x)
383 // - u is effectively the cumulative probability that the sample X
384 // is equal or above the current value x, p(X>=x)
385 // So if p(X>=x) > p(X=x) the sample must be above x, otherwise it is x
386 final double u = rng.nextDouble();
387
388 if (u <= fx) {
389 // Sample from the lower half of the distribution starting at zero
390 return sampleBeforeLongTail(u, 0, p0);
391 }
392
393 // Sample from the upper half of the distribution starting at cumulative probability fx.
394 // This is reached when u > fx and sample X > x.
395
396 // If below the long tail threshold then omit the check on the asymptote of p -> zero
397 if (u <= LONG_TAIL_THRESHOLD) {
398 return sampleBeforeLongTail(u - fx, x1, px1);
399 }
400
401 return sampleWithinLongTail(u - fx, x1, px1);
402 }
403
404 /**
405 * Compute the sample assuming it is <strong>not</strong> in the long tail of the distribution.
406 *
407 * <p>This avoids a check on the next probability value assuming that the cumulative probability
408 * is at a level where the long tail of the Poisson distribution will not be reached.
409 *
410 * @param u the remaining cumulative probability ({@code p(X>x)})
411 * @param x the current sample value X
412 * @param p the current probability of the sample (p(X=x))
413 * @return the sample X
414 */
415 private int sampleBeforeLongTail(double u, int x, double p) {
416 while (u > p) {
417 // Update the remaining cumulative probability
418 u -= p;
419 // Compute the next probability using a recurrence relation.
420 // p(x+1) = p(x) * mean / (x+1)
421 p *= mean / ++x;
422 // The algorithm listed in Kemp (1981) does not check that the rolling probability
423 // is positive (non-zero). This is omitted here on the assumption that the cumulative
424 // probability will not be in the long tail where the probability asymptotes to zero.
425 }
426 return x;
427 }
428
429 /**
430 * Compute the sample assuming it is in the long tail of the distribution.
431 *
432 * <p>This requires a check on the next probability value which is expected to asymptote to zero.
433 *
434 * @param u the remaining cumulative probability
435 * @param x the current sample value X
436 * @param p the current probability of the sample (p(X=x))
437 * @return the sample X
438 */
439 private int sampleWithinLongTail(double u, int x, double p) {
440 while (u > p) {
441 // Update the remaining cumulative probability
442 u -= p;
443 // Compute the next probability using a recurrence relation.
444 // p(x+1) = p(x) * mean / (x+1)
445 p *= mean / ++x;
446 // The algorithm listed in Kemp (1981) does not check that the rolling probability
447 // is positive. This check is added to ensure no errors when the limit of the summation
448 // 1 - sum(p(x)) is above 0 due to cumulative error in floating point arithmetic when
449 // in the long tail of the distribution.
450 if (p == 0) {
451 return x;
452 }
453 }
454 return x;
455 }
456 }
457
458 /**
459 * Kemp sampler for the Poisson distribution.
460 *
461 * <p>Note: This is a modification of the original algorithm by Kemp. It stores the
462 * cumulative probability table for repeat use. The table is searched using a binary
463 * search algorithm.</p>
464 */
465 static class KempSmallMeanPoissonSamplerBinarySearch
466 implements DiscreteSampler {
467 /**
468 * Store the cumulative probability table size for integer means so that 99.99%
469 * of the Poisson distribution is covered. This is done until the table size is
470 * 2 * mean.
471 *
472 * <p>At higher mean the expected range is mean + 4 * sqrt(mean). To avoid the sqrt
473 * the conservative limit of 2 * mean is used.
474 */
475 private static final int[] TABLE_SIZE = {
476 /* mean 1 to 10. */
477 8, 10, 12, 14, 16, 18, 20, 22, 24, 25,
478 /* mean 11 to 20. */
479 27, 29, 30, 32, 33, 35, 36, 38, 39, 41,
480 };
481
482 /** Underlying source of randomness. */
483 private final UniformRandomProvider rng;
484 /**
485 * The mean of the Poisson sample.
486 */
487 private final double mean;
488 /**
489 * Store the cumulative probability for all samples up to and including x.
490 * This is F(x) = sum of {@code p(X<=x)}.
491 *
492 * <p>This table is initialized to store cumulative probabilities for x up to 2 * mean
493 * or 99.99% (whichever is larger).
494 */
495 private final double[] fx;
496 /**
497 * Store the value x corresponding to the last stored cumulative probability.
498 */
499 private int lastX;
500 /**
501 * Store the probability value p(x) corresponding to last stored cumulative probability,
502 * allowing the algorithm to start from the point x.
503 */
504 private double px;
505
506 /**
507 * Create a new instance.
508 *
509 * <p>This is valid for means as large as approximately {@code 744}.</p>
510 *
511 * @param rng Generator of uniformly distributed random numbers.
512 * @param mean Mean.
513 * @throws IllegalArgumentException if {@code mean <= 0} or {@code Math.exp(-mean) == 0}.
514 */
515 KempSmallMeanPoissonSamplerBinarySearch(UniformRandomProvider rng,
516 double mean) {
517 if (mean <= 0) {
518 throw new IllegalArgumentException();
519 }
520
521 px = Math.exp(-mean);
522 if (px > 0) {
523 this.rng = rng;
524 this.mean = mean;
525
526 // Initialise the cumulative probability table.
527 // The supported mean where p(x=0) > 0 sets a limit of around 744 so this will always be
528 // possible.
529 final int upperMean = (int) Math.ceil(mean);
530 fx = new double[(upperMean < TABLE_SIZE.length) ? TABLE_SIZE[upperMean] : upperMean * 2];
531 fx[0] = px;
532 } else {
533 // This will catch NaN mean values
534 throw new IllegalArgumentException();
535 }
536 }
537
538 /** {@inheritDoc} */
539 @Override
540 public int sample() {
541 // Note on the algorithm:
542 // - X is the unknown sample deviate (the output of the algorithm)
543 // - x is the current value from the distribution
544 // - p is the probability of the current value x, p(X=x)
545 // - u is effectively the cumulative probability that the sample X
546 // is equal or above the current value x, p(X>=x)
547 // So if p(X>=x) > p(X=x) the sample must be above x, otherwise it is x
548 final double u = rng.nextDouble();
549
550 if (u <= fx[lastX]) {
551 // Binary search within known cumulative probability table.
552 // Find x so that u > f[x-1] and u <= f[x].
553 // This is a looser search than Arrays.binarySearch:
554 // - The output is x = upper.
555 // - The pre-condition check ensures u <= f[upper] at the start.
556 // - The table stores probabilities where f[0] is non-zero.
557 // - u should be >= 0 (or the random generator is broken).
558 // - It avoids comparisons using Double.doubleToLongBits.
559 // - It avoids the low likelihood of equality between two doubles so uses
560 // only 1 compare per loop.
561 // - It uses a weighted middle anticipating that the cumulative distribution
562 // is skewed as the expected use case is a small mean.
563 int lower = 0;
564 int upper = lastX;
565 while (lower < upper) {
566 // Weighted middle is 1/4 of the range between lower and upper
567 final int mid = (3 * lower + upper) >>> 2;
568 final double midVal = fx[mid];
569 if (u > midVal) {
570 // Change lower such that
571 // u > f[lower - 1]
572 lower = mid + 1;
573 } else {
574 // Change upper such that
575 // u <= f[upper]
576 upper = mid;
577 }
578 }
579 return upper;
580 }
581
582 // The sample is above x
583 int x1 = lastX + 1;
584
585 // Fill the cumulative probability table if possible
586 while (x1 < fx.length) {
587 // Compute the next probability using a recurrence relation.
588 // p(x+1) = p(x) * mean / (x+1)
589 px = nextProbability(px, x1);
590 // Compute the next cumulative probability f(x+1) and update
591 final double sum = fx[lastX] + px;
592 fx[++lastX] = sum;
593 // Check if this is the correct sample
594 if (u <= sum) {
595 return lastX;
596 }
597 x1 = lastX + 1;
598 }
599
600 // The sample is above the range of the cumulative probability table.
601 // Compute using the Kemp method.
602 // This requires the remaining cumulative probability and the probability for x+1.
603 return sampleWithinLongTail(u - fx[lastX], x1, nextProbability(px, x1));
604 }
605
606 /**
607 * Compute the next probability using a recurrence relation.
608 *
609 * <pre>
610 * p(x + 1) = p(x) * mean / (x + 1)
611 * </pre>
612 *
613 * @param p the probability of x
614 * @param x1 the value of x+1
615 * @return the probability of x+1
616 */
617 private double nextProbability(double p, int x1) {
618 return p * mean / x1;
619 }
620
621 /**
622 * Compute the sample assuming it is in the long tail of the distribution.
623 *
624 * <p>This requires a check on the next probability value which is expected to asymptote to zero.
625 *
626 * @param u the remaining cumulative probability
627 * @param x the current sample value X
628 * @param p the current probability of the sample (p(X=x))
629 * @return the sample X
630 */
631 private int sampleWithinLongTail(double u, int x, double p) {
632 while (u > p) {
633 // Update the remaining cumulative probability
634 u -= p;
635 p = nextProbability(p, ++x);
636 // The algorithm listed in Kemp (1981) does not check that the rolling probability
637 // is positive. This check is added to ensure no errors when the limit of the summation
638 // 1 - sum(p(x)) is above 0 due to cumulative error in floating point arithmetic when
639 // in the long tail of the distribution.
640 if (p == 0) {
641 return x;
642 }
643 }
644 return x;
645 }
646 }
647 /**
648 * Sampler for the <a href="http://mathworld.wolfram.com/PoissonDistribution.html">Poisson
649 * distribution</a>.
650 *
651 * <p>Note: This is a modification of the original algorithm by Kemp. It stores the
652 * cumulative probability table for repeat use. The table is computed dynamically and
653 * searched using a guide table.</p>
654 */
655 static class KempSmallMeanPoissonSamplerGuideTable implements DiscreteSampler {
656 /**
657 * Store the cumulative probability table size for integer means so that 99.99% of the
658 * Poisson distribution is covered. This is done until the table size is 2 * mean.
659 *
660 * <p>At higher mean the expected range is mean + 4 * sqrt(mean). To avoid the sqrt
661 * the conservative limit of 2 * mean is used.
662 */
663 private static final int[] TABLE_SIZE = {
664 /* mean 1 to 10. */
665 8, 10, 12, 14, 16, 18, 20, 22, 24, 25,
666 /* mean 11 to 20. */
667 27, 29, 30, 32, 33, 35, 36, 38, 39, 41, };
668
669 /** Underlying source of randomness. */
670 private final UniformRandomProvider rng;
671 /**
672 * The mean of the Poisson sample.
673 */
674 private final double mean;
675 /**
676 * Store the cumulative probability for all samples up to and including x. This is
677 * F(x) = sum of {@code p(X<=x)}.
678 *
679 * <p>This table is initialized to store cumulative probabilities for x up to 2 * mean
680 * or 99.99% (whichever is larger).
681 */
682 private final double[] cumulativeProbability;
683 /**
684 * Store the value x corresponding to the last stored cumulative probability.
685 */
686 private int tabulatedX;
687 /**
688 * Store the probability value p(x), allowing the algorithm to start from the point x.
689 */
690 private double probabilityX;
691 /**
692 * The inverse cumulative probability guide table. This is a map between the cumulative
693 * probability (f(x)) and the value x. It is used to set the initial point for search
694 * of the cumulative probability table.
695 *
696 * <p>The index into the table is obtained using {@code f(x) * guideTable.length}. The value
697 * stored at the index is value {@code x+1} such that it is the exclusive upper bound
698 * on the sample value for searching the cumulative probability table. It requires the
699 * table search is towards zero.</p>
700 *
701 * <p>Note: Using x+1 ensures that the table can be zero filled upon initialisation and
702 * any index with zero has yet to be allocated.</p>
703 *
704 * <p>The guide table should never be used when the input f(x) is above the current range of
705 * the cumulative probability table. This would create an index that has not been
706 * allocated a mapping.
707 */
708 private final int[] guideTable;
709
710 /**
711 * Create a new instance.
712 *
713 * <p>This is valid for means as large as approximately {@code 744}.</p>
714 *
715 * @param rng Generator of uniformly distributed random numbers.
716 * @param mean Mean.
717 * @throws IllegalArgumentException if {@code mean <= 0} or
718 * {@code Math.exp(-mean) == 0}.
719 */
720 KempSmallMeanPoissonSamplerGuideTable(UniformRandomProvider rng, double mean) {
721 if (mean <= 0) {
722 throw new IllegalArgumentException("mean is not strictly positive: " + mean);
723 }
724
725 probabilityX = Math.exp(-mean);
726 if (probabilityX > 0) {
727 this.rng = rng;
728 this.mean = mean;
729
730 // Initialise the cumulative probability table.
731 // The supported mean where p(x=0) > 0 sets a limit of around 744 so the cast to int
732 // will always be possible.
733 final int upperMean = (int) Math.ceil(mean);
734 final int size = (upperMean < TABLE_SIZE.length) ? TABLE_SIZE[upperMean] : upperMean * 2;
735 cumulativeProbability = new double[size];
736 cumulativeProbability[0] = probabilityX;
737
738 guideTable = new int[cumulativeProbability.length + 1];
739 initializeGuideTable(probabilityX);
740 } else {
741 // This will catch NaN mean values
742 throw new IllegalArgumentException("No probability for mean " + mean);
743 }
744 }
745
746 /**
747 * Initialise the cumulative probability guide table. All guide indices at or below the
748 * index corresponding to the given probability will be set to 1.
749 *
750 * @param p0 the probability for x=0
751 */
752 private void initializeGuideTable(double p0) {
753 for (int index = getGuideTableIndex(p0); index >= 0; index--) {
754 guideTable[index] = 1;
755 }
756 }
757
758 /**
759 * Fill the inverse cumulative probability guide table. Set the index corresponding to the
760 * given probability to x+1 establishing an exclusive upper bound on x for the probability.
761 * All unused guide indices below the index will also be set to x+1.
762 *
763 * @param p the cumulative probability
764 * @param x the sample value x
765 */
766 private void updateGuideTable(double p, int x) {
767 // Always set the current index as the guide table is the exclusive upper bound
768 // for searching the cumulative probability table for any value p.
769 // Then fill any lower positions that are not allocated.
770 final int x1 = x + 1;
771 int index = getGuideTableIndex(p);
772 do {
773 guideTable[index--] = x1;
774 } while (index > 0 && guideTable[index] == 0);
775 }
776
777 /**
778 * Gets the guide table index for the probability. This is obtained using
779 * {@code p * (guideTable.length - 1)} so is inside the length of the table.
780 *
781 * @param p the cumulative probability
782 * @return the guide table index
783 */
784 private int getGuideTableIndex(double p) {
785 // Note: This is only ever called when p is in the range of the cumulative
786 // probability table. So assume 0 <= p <= 1.
787 return (int) (p * (guideTable.length - 1));
788 }
789
790 /** {@inheritDoc} */
791 @Override
792 public int sample() {
793 // Note on the algorithm:
794 // 1. Compute a cumulative probability with a uniform deviate (u).
795 // 2. If the probability lies within the tabulated cumulative probabilities
796 // then find the sample value.
797 // 3. If possible expand the tabulated cumulative probabilities up to the value u.
798 // 4. If value u exceeds the capacity for the tabulated cumulative probabilities
799 // then compute the sample value dynamically without storing the probabilities.
800
801 // Compute a probability
802 final double u = rng.nextDouble();
803
804 // Check the tabulated cumulative probability table
805 if (u <= cumulativeProbability[tabulatedX]) {
806 // Initialise the search using a guide table to find an initial guess.
807 // The table provides an upper bound on the sample for a known cumulative probability.
808 int sample = guideTable[getGuideTableIndex(u)] - 1;
809 // If u is above the sample probability (this occurs due to truncation)
810 // then return the next value up.
811 if (u > cumulativeProbability[sample]) {
812 return sample + 1;
813 }
814 // Search down
815 while (sample != 0 && u <= cumulativeProbability[sample - 1]) {
816 sample--;
817 }
818 return sample;
819 }
820
821 // The sample is above the tabulated cumulative probability for x
822 int x1 = tabulatedX + 1;
823
824 // Fill the cumulative probability table if possible
825 while (x1 < cumulativeProbability.length) {
826 probabilityX = nextProbability(probabilityX, x1);
827 // Compute the next cumulative probability f(x+1) and update
828 final double sum = cumulativeProbability[tabulatedX] + probabilityX;
829 cumulativeProbability[++tabulatedX] = sum;
830 updateGuideTable(sum, tabulatedX);
831 // Check if this is the correct sample
832 if (u <= sum) {
833 return tabulatedX;
834 }
835 x1 = tabulatedX + 1;
836 }
837
838 // The sample is above the range of the cumulative probability table.
839 // Compute using the Kemp method.
840 // This requires the remaining cumulative probability and the probability for x+1.
841 return sampleWithinLongTail(u - cumulativeProbability[tabulatedX], x1, nextProbability(probabilityX, x1));
842 }
843
844 /**
845 * Compute the next probability using a recurrence relation.
846 *
847 * <pre>
848 * p(x + 1) = p(x) * mean / (x + 1)
849 * </pre>
850 *
851 * @param px the probability of x
852 * @param x1 the value of x+1
853 * @return the probability of x+1
854 */
855 private double nextProbability(double px, int x1) {
856 return px * mean / x1;
857 }
858
859 /**
860 * Compute the sample assuming it is in the long tail of the distribution.
861 *
862 * <p>This requires a check on the next probability value which is expected to
863 * asymptote to zero.
864 *
865 * @param u the remaining cumulative probability
866 * @param x the current sample value X
867 * @param p the current probability of the sample (p(X=x))
868 * @return the sample X
869 */
870 private int sampleWithinLongTail(double u, int x, double p) {
871 // Note on the algorithm:
872 // - X is the unknown sample deviate (the output of the algorithm)
873 // - x is the current value from the distribution
874 // - p is the probability of the current value x, p(X=x)
875 // - u is effectively the cumulative probability that the sample X
876 // is equal or above the current value x, p(X>=x)
877 // So if p(X>=x) > p(X=x) the sample must be above x, otherwise it is x
878 while (u > p) {
879 // Update the remaining cumulative probability
880 u -= p;
881 p = nextProbability(p, ++x);
882 // The algorithm listed in Kemp (1981) does not check that the rolling
883 // probability is positive. This check is added to ensure no errors when the
884 // limit of the summation 1 - sum(p(x)) is above 0 due to cumulative error in
885 // floating point arithmetic when in the long tail of the distribution.
886 if (p == 0) {
887 break;
888 }
889 }
890 return x;
891 }
892 }
893
894 /**
895 * Sampler for the <a href="http://mathworld.wolfram.com/PoissonDistribution.html">Poisson distribution</a>.
896 *
897 * <ul>
898 * <li>
899 * For small means, a Poisson process is simulated using uniform deviates, as
900 * described in Knuth (1969). Seminumerical Algorithms. The Art of Computer Programming,
901 * Volume 2. Addison Wesley.
902 * </li>
903 * </ul>
904 *
905 * <p>This sampler is suitable for {@code mean < 20}.</p>
906 *
907 * <p>Sampling uses {@link UniformRandomProvider#nextInt()} and 32-bit integer arithmetic.</p>
908 *
909 * @see <a href="https://en.wikipedia.org/wiki/Poisson_distribution#Generating_Poisson-distributed_random_variables">
910 * Poisson random variables</a>
911 */
912 static class TinyMeanPoissonSampler
913 implements DiscreteSampler {
914 /** Pre-compute Poisson probability p(n=0) mapped to the range of a 32-bit unsigned fraction. */
915 private final long p0;
916 /** Underlying source of randomness. */
917 private final UniformRandomProvider rng;
918
919 /**
920 * @param rng Generator of uniformly distributed random numbers.
921 * @param mean Mean.
922 * @throws IllegalArgumentException if {@code mean <= 0} or {@code Math.exp(-mean) * (1L << 32)}
923 * is not positive.
924 */
925 TinyMeanPoissonSampler(UniformRandomProvider rng,
926 double mean) {
927 this.rng = rng;
928 if (mean <= 0) {
929 throw new IllegalArgumentException();
930 }
931 // Math.exp(-mean) is the probability of a Poisson distribution for n=0 (p(n=0)).
932 // This is mapped to a 32-bit range as the numerator of a 32-bit fraction
933 // for use in optimised 32-bit arithmetic.
934 p0 = (long) (Math.exp(-mean) * 0x100000000L);
935 if (p0 == 0) {
936 throw new IllegalArgumentException("No p(x=0) probability for mean: " + mean);
937 }
938 }
939
940 /** {@inheritDoc} */
941 @Override
942 public int sample() {
943 int n = 0;
944 // The unsigned 32-bit sample represents the fraction x / 2^32 where x is [0,2^32-1].
945 // The upper bound is exclusive hence the fraction is a uniform deviate from [0,1).
946 long r = nextUnsigned32();
947 // The limit is the probability p(n=0) converted to an unsigned fraction.
948 while (r >= p0) {
949 // Compute the fraction:
950 // r [0,2^32) 2^32
951 // ---- * -------- / ----
952 // 2^32 2^32 2^32
953 // This rounds down the fraction numerator when the lower 32-bits are discarded.
954 r = (r * nextUnsigned32()) >>> 32;
955 n++;
956 }
957 // Ensure the value is positive in the worst case scenario of a broken
958 // generator that returns 0xffffffff for each sample. This will cause
959 // the integer counter to overflow 2^31-1 but not exceed 2^32. The fraction
960 // multiplication effectively turns r into a counter decrementing from 2^32-1
961 // to zero.
962 return (n >= 0) ? n : Integer.MAX_VALUE;
963 }
964
965 /**
966 * Get the next unsigned 32-bit random integer.
967 *
968 * @return the random integer
969 */
970 private long nextUnsigned32() {
971 return rng.nextInt() & 0xffffffffL;
972 }
973 }
974
975 // Benchmarks methods below.
976
977 /**
978 * Baseline for the JMH timing overhead for production of an {@code int} value.
979 *
980 * @return the {@code int} value
981 */
982 @Benchmark
983 public int baselineInt() {
984 return value;
985 }
986
987 /**
988 * Baseline for {@link Math#exp(double)}.
989 *
990 * @param mean the mean
991 * @return the value
992 */
993 @Benchmark
994 public double baselineExp(Means mean) {
995 return Math.exp(-mean.getMean());
996 }
997
998 /**
999 * Run the sampler.
1000 *
1001 * @param sources Source of randomness.
1002 * @return the sample value
1003 */
1004 @Benchmark
1005 public int sample(Sources sources) {
1006 return sources.getSampler().sample();
1007 }
1008
1009 /**
1010 * Run the sampler.
1011 *
1012 * @param sources Source of randomness.
1013 * @return the sample value
1014 */
1015 @Benchmark
1016 public int singleSample(Sources sources) {
1017 return sources.createSampler().sample();
1018 }
1019 }