Source code

001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.commons.rng.sampling.distribution;
018
019import org.apache.commons.rng.UniformRandomProvider;
020
021/**
022 * Sampler for a discrete distribution using an optimised look-up table.
023 *
024 * <ul>
025 *  <li>
026 *   The method requires 30-bit integer probabilities that sum to 2<sup>30</sup> as described
027 *   in George Marsaglia, Wai Wan Tsang, Jingbo Wang (2004) Fast Generation of Discrete
028 *   Random Variables. Journal of Statistical Software. Vol. 11, Issue. 3, pp. 1-11.
029 *  </li>
030 * </ul>
031 *
032 * <p>Sampling uses 1 call to {@link UniformRandomProvider#nextInt()}.</p>
033 *
034 * <p>Memory requirements depend on the maximum number of possible sample values, {@code n},
035 * and the values for the probabilities. Storage is optimised for {@code n}. The worst case
036 * scenario is a uniform distribution of the maximum sample size. This is capped at 0.06MB for
037 * {@code n <= } 2<sup>8</sup>, 17.0MB for {@code n <= } 2<sup>16</sup>, and 4.3GB for
038 * {@code n <=} 2<sup>30</sup>. Realistic requirements will be in the kB range.</p>
039 *
040 * <p>The sampler supports the following distributions:</p>
041 *
042 * <ul>
043 *  <li>Enumerated distribution (probabilities must be provided for each sample)</li>
044 *  <li>Poisson distribution up to {@code mean = 1024}</li>
045 *  <li>Binomial distribution up to {@code trials = 65535}</li>
046 * </ul>
047 *
048 * @see <a href="https://dx.doi.org/10.18637/jss.v011.i03">Margsglia, et al (2004) JSS Vol.
049 * 11, Issue 3</a>
050 * @since 1.3
051 */
052public final class MarsagliaTsangWangDiscreteSampler {
053    /** The value 2<sup>8</sup> as an {@code int}. */
054    private static final int INT_8 = 1 << 8;
055    /** The value 2<sup>16</sup> as an {@code int}. */
056    private static final int INT_16 = 1 << 16;
057    /** The value 2<sup>30</sup> as an {@code int}. */
058    private static final int INT_30 = 1 << 30;
059    /** The value 2<sup>31</sup> as a {@code double}. */
060    private static final double DOUBLE_31 = 1L << 31;
061
062    // =========================================================================
063    // Implementation note:
064    //
065    // This sampler uses prepared look-up tables that are searched using a single
066    // random int variate. The look-up tables contain the sample value. The tables
067    // are constructed using probabilities that sum to 2^30. The original paper
068    // by Marsaglia, et al (2004) describes the use of 5, 3, or 2 look-up tables
069    // indexed using digits of base 2^6, 2^10 or 2^15. Currently only base 64 (2^6)
070    // is supported using 5 look-up tables.
071    //
072    // The implementations use 8, 16 or 32 bit storage tables to support different
073    // distribution sizes with optimal storage. Separate class implementations of
074    // the same algorithm allow array storage to be accessed directly from 1D tables.
075    // This provides a performance gain over using: abstracted storage accessed via
076    // an interface; or a single 2D table.
077    //
078    // To allow the optimal implementation to be chosen the sampler is created
079    // using factory methods. The sampler supports any probability distribution
080    // when provided via an array of probabilities and the Poisson and Binomial
081    // distributions for a restricted set of parameters. The restrictions are
082    // imposed by the requirement to compute the entire probability distribution
083    // from the controlling parameter(s) using a recursive method. Factory
084    // constructors return a SharedStateDiscreteSampler instance. Each distribution
085    // type is contained in an inner class.
086    // =========================================================================
087
088    /**
089     * The base class for Marsaglia-Tsang-Wang samplers.
090     */
091    private abstract static class AbstractMarsagliaTsangWangDiscreteSampler
092            implements SharedStateDiscreteSampler {
093        /** Underlying source of randomness. */
094        protected final UniformRandomProvider rng;
095
096        /** The name of the distribution. */
097        private final String distributionName;
098
099        /**
100         * @param rng Generator of uniformly distributed random numbers.
101         * @param distributionName Distribution name.
102         */
103        AbstractMarsagliaTsangWangDiscreteSampler(UniformRandomProvider rng,
104                                                  String distributionName) {
105            this.rng = rng;
106            this.distributionName = distributionName;
107        }
108
109        /**
110         * @param rng Generator of uniformly distributed random numbers.
111         * @param source Source to copy.
112         */
113        AbstractMarsagliaTsangWangDiscreteSampler(UniformRandomProvider rng,
114                                                  AbstractMarsagliaTsangWangDiscreteSampler source) {
115            this.rng = rng;
116            this.distributionName = source.distributionName;
117        }
118
119        /** {@inheritDoc} */
120        @Override
121        public String toString() {
122            return "Marsaglia Tsang Wang " + distributionName + " deviate [" + rng.toString() + "]";
123        }
124    }
125
126    /**
127     * An implementation for the sample algorithm based on the decomposition of the
128     * index in the range {@code [0,2^30)} into 5 base-64 digits with 8-bit backing storage.
129     */
130    private static final class MarsagliaTsangWangBase64Int8DiscreteSampler
131        extends AbstractMarsagliaTsangWangDiscreteSampler {
132        /** The mask to convert a {@code byte} to an unsigned 8-bit integer. */
133        private static final int MASK = 0xff;
134
135        /** Limit for look-up table 1. */
136        private final int t1;
137        /** Limit for look-up table 2. */
138        private final int t2;
139        /** Limit for look-up table 3. */
140        private final int t3;
141        /** Limit for look-up table 4. */
142        private final int t4;
143
144        /** Look-up table table1. */
145        private final byte[] table1;
146        /** Look-up table table2. */
147        private final byte[] table2;
148        /** Look-up table table3. */
149        private final byte[] table3;
150        /** Look-up table table4. */
151        private final byte[] table4;
152        /** Look-up table table5. */
153        private final byte[] table5;
154
155        /**
156         * @param rng Generator of uniformly distributed random numbers.
157         * @param distributionName Distribution name.
158         * @param prob The probabilities.
159         * @param offset The offset (must be positive).
160         */
161        MarsagliaTsangWangBase64Int8DiscreteSampler(UniformRandomProvider rng,
162                                                    String distributionName,
163                                                    int[] prob,
164                                                    int offset) {
165            super(rng, distributionName);
166
167            // Get table sizes for each base-64 digit
168            int n1 = 0;
169            int n2 = 0;
170            int n3 = 0;
171            int n4 = 0;
172            int n5 = 0;
173            for (final int m : prob) {
174                n1 += getBase64Digit(m, 1);
175                n2 += getBase64Digit(m, 2);
176                n3 += getBase64Digit(m, 3);
177                n4 += getBase64Digit(m, 4);
178                n5 += getBase64Digit(m, 5);
179            }
180
181            table1 = new byte[n1];
182            table2 = new byte[n2];
183            table3 = new byte[n3];
184            table4 = new byte[n4];
185            table5 = new byte[n5];
186
187            // Compute offsets
188            t1 = n1 << 24;
189            t2 = t1 + (n2 << 18);
190            t3 = t2 + (n3 << 12);
191            t4 = t3 + (n4 << 6);
192            n1 = 0;
193            n2 = 0;
194            n3 = 0;
195            n4 = 0;
196            n5 = 0;
197
198            // Fill tables
199            for (int i = 0; i < prob.length; i++) {
200                final int m = prob[i];
201                // Primitive type conversion will extract lower 8 bits
202                final byte k = (byte) (i + offset);
203                n1 = fill(table1, n1, n1 + getBase64Digit(m, 1), k);
204                n2 = fill(table2, n2, n2 + getBase64Digit(m, 2), k);
205                n3 = fill(table3, n3, n3 + getBase64Digit(m, 3), k);
206                n4 = fill(table4, n4, n4 + getBase64Digit(m, 4), k);
207                n5 = fill(table5, n5, n5 + getBase64Digit(m, 5), k);
208            }
209        }
210
211        /**
212         * @param rng Generator of uniformly distributed random numbers.
213         * @param source Source to copy.
214         */
215        private MarsagliaTsangWangBase64Int8DiscreteSampler(UniformRandomProvider rng,
216                MarsagliaTsangWangBase64Int8DiscreteSampler source) {
217            super(rng, source);
218            t1 = source.t1;
219            t2 = source.t2;
220            t3 = source.t3;
221            t4 = source.t4;
222            table1 = source.table1;
223            table2 = source.table2;
224            table3 = source.table3;
225            table4 = source.table4;
226            table5 = source.table5;
227        }
228
229        /**
230         * Fill the table with the value.
231         *
232         * @param table Table.
233         * @param from Lower bound index (inclusive)
234         * @param to Upper bound index (exclusive)
235         * @param value Value.
236         * @return the upper bound index
237         */
238        private static int fill(byte[] table, int from, int to, byte value) {
239            for (int i = from; i < to; i++) {
240                table[i] = value;
241            }
242            return to;
243        }
244
245        @Override
246        public int sample() {
247            final int j = rng.nextInt() >>> 2;
248            if (j < t1) {
249                return table1[j >>> 24] & MASK;
250            }
251            if (j < t2) {
252                return table2[(j - t1) >>> 18] & MASK;
253            }
254            if (j < t3) {
255                return table3[(j - t2) >>> 12] & MASK;
256            }
257            if (j < t4) {
258                return table4[(j - t3) >>> 6] & MASK;
259            }
260            // Note the tables are filled on the assumption that the sum of the probabilities.
261            // is >=2^30. If this is not true then the final table table5 will be smaller by the
262            // difference. So the tables *must* be constructed correctly.
263            return table5[j - t4] & MASK;
264        }
265
266        @Override
267        public SharedStateDiscreteSampler withUniformRandomProvider(UniformRandomProvider rng) {
268            return new MarsagliaTsangWangBase64Int8DiscreteSampler(rng, this);
269        }
270    }
271
272    /**
273     * An implementation for the sample algorithm based on the decomposition of the
274     * index in the range {@code [0,2^30)} into 5 base-64 digits with 16-bit backing storage.
275     */
276    private static final class MarsagliaTsangWangBase64Int16DiscreteSampler
277        extends AbstractMarsagliaTsangWangDiscreteSampler {
278        /** The mask to convert a {@code byte} to an unsigned 16-bit integer. */
279        private static final int MASK = 0xffff;
280
281        /** Limit for look-up table 1. */
282        private final int t1;
283        /** Limit for look-up table 2. */
284        private final int t2;
285        /** Limit for look-up table 3. */
286        private final int t3;
287        /** Limit for look-up table 4. */
288        private final int t4;
289
290        /** Look-up table table1. */
291        private final short[] table1;
292        /** Look-up table table2. */
293        private final short[] table2;
294        /** Look-up table table3. */
295        private final short[] table3;
296        /** Look-up table table4. */
297        private final short[] table4;
298        /** Look-up table table5. */
299        private final short[] table5;
300
301        /**
302         * @param rng Generator of uniformly distributed random numbers.
303         * @param distributionName Distribution name.
304         * @param prob The probabilities.
305         * @param offset The offset (must be positive).
306         */
307        MarsagliaTsangWangBase64Int16DiscreteSampler(UniformRandomProvider rng,
308                                                     String distributionName,
309                                                     int[] prob,
310                                                     int offset) {
311            super(rng, distributionName);
312
313            // Get table sizes for each base-64 digit
314            int n1 = 0;
315            int n2 = 0;
316            int n3 = 0;
317            int n4 = 0;
318            int n5 = 0;
319            for (final int m : prob) {
320                n1 += getBase64Digit(m, 1);
321                n2 += getBase64Digit(m, 2);
322                n3 += getBase64Digit(m, 3);
323                n4 += getBase64Digit(m, 4);
324                n5 += getBase64Digit(m, 5);
325            }
326
327            table1 = new short[n1];
328            table2 = new short[n2];
329            table3 = new short[n3];
330            table4 = new short[n4];
331            table5 = new short[n5];
332
333            // Compute offsets
334            t1 = n1 << 24;
335            t2 = t1 + (n2 << 18);
336            t3 = t2 + (n3 << 12);
337            t4 = t3 + (n4 << 6);
338            n1 = 0;
339            n2 = 0;
340            n3 = 0;
341            n4 = 0;
342            n5 = 0;
343
344            // Fill tables
345            for (int i = 0; i < prob.length; i++) {
346                final int m = prob[i];
347                // Primitive type conversion will extract lower 16 bits
348                final short k = (short) (i + offset);
349                n1 = fill(table1, n1, n1 + getBase64Digit(m, 1), k);
350                n2 = fill(table2, n2, n2 + getBase64Digit(m, 2), k);
351                n3 = fill(table3, n3, n3 + getBase64Digit(m, 3), k);
352                n4 = fill(table4, n4, n4 + getBase64Digit(m, 4), k);
353                n5 = fill(table5, n5, n5 + getBase64Digit(m, 5), k);
354            }
355        }
356
357        /**
358         * @param rng Generator of uniformly distributed random numbers.
359         * @param source Source to copy.
360         */
361        private MarsagliaTsangWangBase64Int16DiscreteSampler(UniformRandomProvider rng,
362                MarsagliaTsangWangBase64Int16DiscreteSampler source) {
363            super(rng, source);
364            t1 = source.t1;
365            t2 = source.t2;
366            t3 = source.t3;
367            t4 = source.t4;
368            table1 = source.table1;
369            table2 = source.table2;
370            table3 = source.table3;
371            table4 = source.table4;
372            table5 = source.table5;
373        }
374
375        /**
376         * Fill the table with the value.
377         *
378         * @param table Table.
379         * @param from Lower bound index (inclusive)
380         * @param to Upper bound index (exclusive)
381         * @param value Value.
382         * @return the upper bound index
383         */
384        private static int fill(short[] table, int from, int to, short value) {
385            for (int i = from; i < to; i++) {
386                table[i] = value;
387            }
388            return to;
389        }
390
391        @Override
392        public int sample() {
393            final int j = rng.nextInt() >>> 2;
394            if (j < t1) {
395                return table1[j >>> 24] & MASK;
396            }
397            if (j < t2) {
398                return table2[(j - t1) >>> 18] & MASK;
399            }
400            if (j < t3) {
401                return table3[(j - t2) >>> 12] & MASK;
402            }
403            if (j < t4) {
404                return table4[(j - t3) >>> 6] & MASK;
405            }
406            // Note the tables are filled on the assumption that the sum of the probabilities.
407            // is >=2^30. If this is not true then the final table table5 will be smaller by the
408            // difference. So the tables *must* be constructed correctly.
409            return table5[j - t4] & MASK;
410        }
411
412        @Override
413        public SharedStateDiscreteSampler withUniformRandomProvider(UniformRandomProvider rng) {
414            return new MarsagliaTsangWangBase64Int16DiscreteSampler(rng, this);
415        }
416    }
417
418    /**
419     * An implementation for the sample algorithm based on the decomposition of the
420     * index in the range {@code [0,2^30)} into 5 base-64 digits with 32-bit backing storage.
421     */
422    private static final class MarsagliaTsangWangBase64Int32DiscreteSampler
423        extends AbstractMarsagliaTsangWangDiscreteSampler {
424        /** Limit for look-up table 1. */
425        private final int t1;
426        /** Limit for look-up table 2. */
427        private final int t2;
428        /** Limit for look-up table 3. */
429        private final int t3;
430        /** Limit for look-up table 4. */
431        private final int t4;
432
433        /** Look-up table table1. */
434        private final int[] table1;
435        /** Look-up table table2. */
436        private final int[] table2;
437        /** Look-up table table3. */
438        private final int[] table3;
439        /** Look-up table table4. */
440        private final int[] table4;
441        /** Look-up table table5. */
442        private final int[] table5;
443
444        /**
445         * @param rng Generator of uniformly distributed random numbers.
446         * @param distributionName Distribution name.
447         * @param prob The probabilities.
448         * @param offset The offset (must be positive).
449         */
450        MarsagliaTsangWangBase64Int32DiscreteSampler(UniformRandomProvider rng,
451                                                     String distributionName,
452                                                     int[] prob,
453                                                     int offset) {
454            super(rng, distributionName);
455
456            // Get table sizes for each base-64 digit
457            int n1 = 0;
458            int n2 = 0;
459            int n3 = 0;
460            int n4 = 0;
461            int n5 = 0;
462            for (final int m : prob) {
463                n1 += getBase64Digit(m, 1);
464                n2 += getBase64Digit(m, 2);
465                n3 += getBase64Digit(m, 3);
466                n4 += getBase64Digit(m, 4);
467                n5 += getBase64Digit(m, 5);
468            }
469
470            table1 = new int[n1];
471            table2 = new int[n2];
472            table3 = new int[n3];
473            table4 = new int[n4];
474            table5 = new int[n5];
475
476            // Compute offsets
477            t1 = n1 << 24;
478            t2 = t1 + (n2 << 18);
479            t3 = t2 + (n3 << 12);
480            t4 = t3 + (n4 << 6);
481            n1 = 0;
482            n2 = 0;
483            n3 = 0;
484            n4 = 0;
485            n5 = 0;
486
487            // Fill tables
488            for (int i = 0; i < prob.length; i++) {
489                final int m = prob[i];
490                final int k = i + offset;
491                n1 = fill(table1, n1, n1 + getBase64Digit(m, 1), k);
492                n2 = fill(table2, n2, n2 + getBase64Digit(m, 2), k);
493                n3 = fill(table3, n3, n3 + getBase64Digit(m, 3), k);
494                n4 = fill(table4, n4, n4 + getBase64Digit(m, 4), k);
495                n5 = fill(table5, n5, n5 + getBase64Digit(m, 5), k);
496            }
497        }
498
499        /**
500         * @param rng Generator of uniformly distributed random numbers.
501         * @param source Source to copy.
502         */
503        private MarsagliaTsangWangBase64Int32DiscreteSampler(UniformRandomProvider rng,
504                MarsagliaTsangWangBase64Int32DiscreteSampler source) {
505            super(rng, source);
506            t1 = source.t1;
507            t2 = source.t2;
508            t3 = source.t3;
509            t4 = source.t4;
510            table1 = source.table1;
511            table2 = source.table2;
512            table3 = source.table3;
513            table4 = source.table4;
514            table5 = source.table5;
515        }
516
517        /**
518         * Fill the table with the value.
519         *
520         * @param table Table.
521         * @param from Lower bound index (inclusive)
522         * @param to Upper bound index (exclusive)
523         * @param value Value.
524         * @return the upper bound index
525         */
526        private static int fill(int[] table, int from, int to, int value) {
527            for (int i = from; i < to; i++) {
528                table[i] = value;
529            }
530            return to;
531        }
532
533        @Override
534        public int sample() {
535            final int j = rng.nextInt() >>> 2;
536            if (j < t1) {
537                return table1[j >>> 24];
538            }
539            if (j < t2) {
540                return table2[(j - t1) >>> 18];
541            }
542            if (j < t3) {
543                return table3[(j - t2) >>> 12];
544            }
545            if (j < t4) {
546                return table4[(j - t3) >>> 6];
547            }
548            // Note the tables are filled on the assumption that the sum of the probabilities.
549            // is >=2^30. If this is not true then the final table table5 will be smaller by the
550            // difference. So the tables *must* be constructed correctly.
551            return table5[j - t4];
552        }
553
554        @Override
555        public SharedStateDiscreteSampler withUniformRandomProvider(UniformRandomProvider rng) {
556            return new MarsagliaTsangWangBase64Int32DiscreteSampler(rng, this);
557        }
558    }
559
560
561
562    /** Class contains only static methods. */
563    private MarsagliaTsangWangDiscreteSampler() {}
564
565    /**
566     * Gets the k<sup>th</sup> base 64 digit of {@code m}.
567     *
568     * @param m the value m.
569     * @param k the digit.
570     * @return the base 64 digit
571     */
572    private static int getBase64Digit(int m, int k) {
573        return (m >>> (30 - 6 * k)) & 63;
574    }
575
576    /**
577     * Convert the probability to an integer in the range [0,2^30]. This is the numerator of
578     * a fraction with assumed denominator 2<sup>30</sup>.
579     *
580     * @param p Probability.
581     * @return the fraction numerator
582     */
583    private static int toUnsignedInt30(double p) {
584        return (int) (p * INT_30 + 0.5);
585    }
586
587    /**
588     * Create a new instance for probabilities {@code p(i)} where the sample value {@code x} is
589     * {@code i + offset}.
590     *
591     * <p>The sum of the probabilities must be {@code >=} 2<sup>30</sup>. Only the
592     * values for cumulative probability up to 2<sup>30</sup> will be sampled.</p>
593     *
594     * @param rng Generator of uniformly distributed random numbers.
595     * @param distributionName Distribution name.
596     * @param prob The probabilities.
597     * @param offset The offset (must be positive).
598     * @return Sampler.
599     */
600    private static SharedStateDiscreteSampler createSampler(UniformRandomProvider rng,
601                                                            String distributionName,
602                                                            int[] prob,
603                                                            int offset) {
604        // Note: No argument checks for private method.
605
606        // Choose implementation based on the maximum index
607        final int maxIndex = prob.length + offset - 1;
608        if (maxIndex < INT_8) {
609            return new MarsagliaTsangWangBase64Int8DiscreteSampler(rng, distributionName, prob, offset);
610        }
611        if (maxIndex < INT_16) {
612            return new MarsagliaTsangWangBase64Int16DiscreteSampler(rng, distributionName, prob, offset);
613        }
614        return new MarsagliaTsangWangBase64Int32DiscreteSampler(rng, distributionName, prob, offset);
615    }
616
617    // =========================================================================
618    // The following public classes provide factory methods to construct a sampler for:
619    // - Enumerated probability distribution (from provided double[] probabilities)
620    // - Poisson distribution for mean <= 1024
621    // - Binomial distribution for trials <= 65535
622    // =========================================================================
623
624    /**
625     * Create a sampler for an enumerated distribution of {@code n} values each with an
626     * associated probability.
627     * The samples corresponding to each probability are assumed to be a natural sequence
628     * starting at zero.
629     */
630    public static final class Enumerated {
631        /** The name of the enumerated probability distribution. */
632        private static final String ENUMERATED_NAME = "Enumerated";
633
634        /** Class contains only static methods. */
635        private Enumerated() {}
636
637        /**
638         * Creates a sampler for an enumerated distribution of {@code n} values each with an
639         * associated probability.
640         *
641         * <p>The probabilities will be normalised using their sum. The only requirement
642         * is the sum is positive.</p>
643         *
644         * <p>The sum of the probabilities is normalised to 2<sup>30</sup>. Note that
645         * probabilities are adjusted to the nearest 2<sup>-30</sup> due to round-off during
646         * the normalisation conversion. Consequently any probability less than 2<sup>-31</sup>
647         * will not be observed in samples.</p>
648         *
649         * @param rng Generator of uniformly distributed random numbers.
650         * @param probabilities The list of probabilities.
651         * @return Sampler.
652         * @throws IllegalArgumentException if {@code probabilities} is null or empty, a
653         * probability is negative, infinite or {@code NaN}, or the sum of all
654         * probabilities is not strictly positive.
655         */
656        public static SharedStateDiscreteSampler of(UniformRandomProvider rng,
657                                                    double[] probabilities) {
658            return createSampler(rng, ENUMERATED_NAME, normaliseProbabilities(probabilities), 0);
659        }
660
661        /**
662         * Normalise the probabilities to integers that sum to 2<sup>30</sup>.
663         *
664         * @param probabilities The list of probabilities.
665         * @return the normalised probabilities.
666         * @throws IllegalArgumentException if {@code probabilities} is null or empty, a
667         * probability is negative, infinite or {@code NaN}, or the sum of all
668         * probabilities is not strictly positive.
669         */
670        private static int[] normaliseProbabilities(double[] probabilities) {
671            final double sumProb = InternalUtils.validateProbabilities(probabilities);
672
673            // Compute the normalisation: 2^30 / sum
674            final double normalisation = INT_30 / sumProb;
675            final int[] prob = new int[probabilities.length];
676            int sum = 0;
677            int max = 0;
678            int mode = 0;
679            for (int i = 0; i < prob.length; i++) {
680                // Add 0.5 for rounding
681                final int p = (int) (probabilities[i] * normalisation + 0.5);
682                sum += p;
683                // Find the mode (maximum probability)
684                if (max < p) {
685                    max = p;
686                    mode = i;
687                }
688                prob[i] = p;
689            }
690
691            // The sum must be >= 2^30.
692            // Here just compensate the difference onto the highest probability.
693            prob[mode] += INT_30 - sum;
694
695            return prob;
696        }
697    }
698
699    /**
700     * Create a sampler for the Poisson distribution.
701     */
702    public static final class Poisson {
703        /** The name of the Poisson distribution. */
704        private static final String POISSON_NAME = "Poisson";
705
706        /**
707         * Upper bound on the mean for the Poisson distribution.
708         *
709         * <p>The original source code provided in Marsaglia, et al (2004) has no explicit
710         * limit but the code fails at mean {@code >= 1941} as the transform to compute p(x=mode)
711         * produces infinity. Use a conservative limit of 1024.</p>
712         */
713
714        private static final double MAX_MEAN = 1024;
715        /**
716         * The threshold for the mean of the Poisson distribution to switch the method used
717         * to compute the probabilities. This is taken from the example software provided by
718         * Marsaglia, et al (2004).
719         */
720        private static final double MEAN_THRESHOLD = 21.4;
721
722        /** Class contains only static methods. */
723        private Poisson() {}
724
725        /**
726         * Creates a sampler for the Poisson distribution.
727         *
728         * <p>Any probability less than 2<sup>-31</sup> will not be observed in samples.</p>
729         *
730         * <p>Storage requirements depend on the tabulated probability values. Example storage
731         * requirements are listed below.</p>
732         *
733         * <pre>
734         * mean      table size     kB
735         * 0.25      882            0.88
736         * 0.5       1135           1.14
737         * 1         1200           1.20
738         * 2         1451           1.45
739         * 4         1955           1.96
740         * 8         2961           2.96
741         * 16        4410           4.41
742         * 32        6115           6.11
743         * 64        8499           8.50
744         * 128       11528          11.53
745         * 256       15935          31.87
746         * 512       20912          41.82
747         * 1024      30614          61.23
748         * </pre>
749         *
750         * <p>Note: Storage changes to 2 bytes per index between {@code mean=128} and {@code mean=256}.</p>
751         *
752         * @param rng Generator of uniformly distributed random numbers.
753         * @param mean Mean.
754         * @return Sampler.
755         * @throws IllegalArgumentException if {@code mean <= 0} or {@code mean > 1024}.
756         */
757        public static SharedStateDiscreteSampler of(UniformRandomProvider rng,
758                                                    double mean) {
759            validatePoissonDistributionParameters(mean);
760
761            // Create the distribution either from X=0 or from X=mode when the mean is high.
762            return mean < MEAN_THRESHOLD ?
763                createPoissonDistributionFromX0(rng, mean) :
764                createPoissonDistributionFromXMode(rng, mean);
765        }
766
767        /**
768         * Validate the Poisson distribution parameters.
769         *
770         * @param mean Mean.
771         * @throws IllegalArgumentException if {@code mean <= 0} or {@code mean > 1024}.
772         */
773        private static void validatePoissonDistributionParameters(double mean) {
774            InternalUtils.requireStrictlyPositive(mean, "mean");
775            if (mean > MAX_MEAN) {
776                throw new IllegalArgumentException("mean " + mean + " > " + MAX_MEAN);
777            }
778        }
779
780        /**
781         * Creates the Poisson distribution by computing probabilities recursively from {@code X=0}.
782         *
783         * @param rng Generator of uniformly distributed random numbers.
784         * @param mean Mean.
785         * @return Sampler.
786         */
787        private static SharedStateDiscreteSampler createPoissonDistributionFromX0(
788                UniformRandomProvider rng, double mean) {
789            final double p0 = Math.exp(-mean);
790
791            // Recursive update of Poisson probability until the value is too small
792            // p(x + 1) = p(x) * mean / (x + 1)
793            double p = p0;
794            int i = 1;
795            while (p * DOUBLE_31 >= 1) {
796                p *= mean / i++;
797            }
798
799            // Probabilities are 30-bit integers, assumed denominator 2^30
800            final int size = i - 1;
801            final int[] prob = new int[size];
802
803            p = p0;
804            prob[0] = toUnsignedInt30(p);
805            // The sum must exceed 2^30. In edges cases this is false due to round-off.
806            int sum = prob[0];
807            for (i = 1; i < prob.length; i++) {
808                p *= mean / i;
809                prob[i] = toUnsignedInt30(p);
810                sum += prob[i];
811            }
812
813            // If the sum is < 2^30 add the remaining sum to the mode (floor(mean)).
814            prob[(int) mean] += Math.max(0, INT_30 - sum);
815
816            // Note: offset = 0
817            return createSampler(rng, POISSON_NAME, prob, 0);
818        }
819
820        /**
821         * Creates the Poisson distribution by computing probabilities recursively upward and downward
822         * from {@code X=mode}, the location of the largest p-value.
823         *
824         * @param rng Generator of uniformly distributed random numbers.
825         * @param mean Mean.
826         * @return Sampler.
827         */
828        private static SharedStateDiscreteSampler createPoissonDistributionFromXMode(
829                UniformRandomProvider rng, double mean) {
830            // If mean >= 21.4, generate from largest p-value up, then largest down.
831            // The largest p-value will be at the mode (floor(mean)).
832
833            // Find p(x=mode)
834            final int mode = (int) mean;
835            // This transform is stable until mean >= 1941 where p will result in Infinity
836            // before the divisor i is large enough to start reducing the product (i.e. i > c).
837            final double c = mean * Math.exp(-mean / mode);
838            double p = 1.0;
839            for (int i = 1; i <= mode; i++) {
840                p *= c / i;
841            }
842            final double pMode = p;
843
844            // Find the upper limit using recursive computation of the p-value.
845            // Note this will exit when i overflows to negative so no check on the range
846            int i = mode + 1;
847            while (p * DOUBLE_31 >= 1) {
848                p *= mean / i++;
849            }
850            final int last = i - 2;
851
852            // Find the lower limit using recursive computation of the p-value.
853            p = pMode;
854            int j = -1;
855            for (i = mode - 1; i >= 0; i--) {
856                p *= (i + 1) / mean;
857                if (p * DOUBLE_31 < 1) {
858                    j = i;
859                    break;
860                }
861            }
862
863            // Probabilities are 30-bit integers, assumed denominator 2^30.
864            // This is the minimum sample value: prob[x - offset] = p(x)
865            final int offset = j + 1;
866            final int size = last - offset + 1;
867            final int[] prob = new int[size];
868
869            p = pMode;
870            prob[mode - offset] = toUnsignedInt30(p);
871            // The sum must exceed 2^30. In edges cases this is false due to round-off.
872            int sum = prob[mode - offset];
873            // From mode to upper limit
874            for (i = mode + 1; i <= last; i++) {
875                p *= mean / i;
876                prob[i - offset] = toUnsignedInt30(p);
877                sum += prob[i - offset];
878            }
879            // From mode to lower limit
880            p = pMode;
881            for (i = mode - 1; i >= offset; i--) {
882                p *= (i + 1) / mean;
883                prob[i - offset] = toUnsignedInt30(p);
884                sum += prob[i - offset];
885            }
886
887            // If the sum is < 2^30 add the remaining sum to the mode.
888            // If above 2^30 then the effect is truncation of the long tail of the distribution.
889            prob[mode - offset] += Math.max(0, INT_30 - sum);
890
891            return createSampler(rng, POISSON_NAME, prob, offset);
892        }
893    }
894
895    /**
896     * Create a sampler for the Binomial distribution.
897     */
898    public static final class Binomial {
899        /** The name of the Binomial distribution. */
900        private static final String BINOMIAL_NAME = "Binomial";
901
902        /**
903         * Return a fixed result for the Binomial distribution. This is a special class to handle
904         * an edge case of probability of success equal to 0 or 1.
905         */
906        private static final class MarsagliaTsangWangFixedResultBinomialSampler
907            extends AbstractMarsagliaTsangWangDiscreteSampler {
908            /** The result. */
909            private final int result;
910
911            /**
912             * @param result Result.
913             */
914            MarsagliaTsangWangFixedResultBinomialSampler(int result) {
915                super(null, BINOMIAL_NAME);
916                this.result = result;
917            }
918
919            @Override
920            public int sample() {
921                return result;
922            }
923
924            @Override
925            public String toString() {
926                return BINOMIAL_NAME + " deviate";
927            }
928
929            @Override
930            public SharedStateDiscreteSampler withUniformRandomProvider(UniformRandomProvider rng) {
931                // No shared state
932                return this;
933            }
934        }
935
936        /**
937         * Return an inversion result for the Binomial distribution. This assumes the
938         * following:
939         *
940         * <pre>
941         * Binomial(n, p) = 1 - Binomial(n, 1 - p)
942         * </pre>
943         */
944        private static final class MarsagliaTsangWangInversionBinomialSampler
945            extends AbstractMarsagliaTsangWangDiscreteSampler {
946            /** The number of trials. */
947            private final int trials;
948            /** The Binomial distribution sampler. */
949            private final SharedStateDiscreteSampler sampler;
950
951            /**
952             * @param trials Number of trials.
953             * @param sampler Binomial distribution sampler.
954             */
955            MarsagliaTsangWangInversionBinomialSampler(int trials,
956                                                       SharedStateDiscreteSampler sampler) {
957                super(null, BINOMIAL_NAME);
958                this.trials = trials;
959                this.sampler = sampler;
960            }
961
962            @Override
963            public int sample() {
964                return trials - sampler.sample();
965            }
966
967            @Override
968            public String toString() {
969                return sampler.toString();
970            }
971
972            @Override
973            public SharedStateDiscreteSampler withUniformRandomProvider(UniformRandomProvider rng) {
974                return new MarsagliaTsangWangInversionBinomialSampler(this.trials,
975                    this.sampler.withUniformRandomProvider(rng));
976            }
977        }
978
979        /** Class contains only static methods. */
980        private Binomial() {}
981
982        /**
983         * Creates a sampler for the Binomial distribution.
984         *
985         * <p>Any probability less than 2<sup>-31</sup> will not be observed in samples.</p>
986         *
987         * <p>Storage requirements depend on the tabulated probability values. Example storage
988         * requirements are listed below (in kB).</p>
989         *
990         * <pre>
991         *          p
992         * trials   0.5    0.1   0.01  0.001
993         *    4    0.06   0.63   0.44   0.44
994         *   16    0.69   1.14   0.76   0.44
995         *   64    4.73   2.40   1.14   0.51
996         *  256    8.63   5.17   1.89   0.82
997         * 1024   31.12   9.45   3.34   0.89
998         * </pre>
999         *
1000         * <p>The method requires that the Binomial distribution probability at {@code x=0} can be computed.
1001         * This will fail when {@code (1 - p)^trials == 0} which requires {@code trials} to be large
1002         * and/or {@code p} to be small. In this case an exception is raised.</p>
1003         *
1004         * @param rng Generator of uniformly distributed random numbers.
1005         * @param trials Number of trials.
1006         * @param probabilityOfSuccess Probability of success (p).
1007         * @return Sampler.
1008         * @throws IllegalArgumentException if {@code trials < 0} or {@code trials >= 2^16},
1009         * {@code p} is not in the range {@code [0-1]}, or the probability distribution cannot
1010         * be computed.
1011         */
1012        public static SharedStateDiscreteSampler of(UniformRandomProvider rng,
1013                                                    int trials,
1014                                                    double probabilityOfSuccess) {
1015            validateBinomialDistributionParameters(trials, probabilityOfSuccess);
1016
1017            // Handle edge cases
1018            if (probabilityOfSuccess == 0) {
1019                return new MarsagliaTsangWangFixedResultBinomialSampler(0);
1020            }
1021            if (probabilityOfSuccess == 1) {
1022                return new MarsagliaTsangWangFixedResultBinomialSampler(trials);
1023            }
1024
1025            // Check the supported size.
1026            if (trials >= INT_16) {
1027                throw new IllegalArgumentException("Unsupported number of trials: " + trials);
1028            }
1029
1030            return createBinomialDistributionSampler(rng, trials, probabilityOfSuccess);
1031        }
1032
1033        /**
1034         * Validate the Binomial distribution parameters.
1035         *
1036         * @param trials Number of trials.
1037         * @param probabilityOfSuccess Probability of success (p).
1038         * @throws IllegalArgumentException if {@code trials < 0} or
1039         * {@code p} is not in the range {@code [0-1]}
1040         */
1041        private static void validateBinomialDistributionParameters(int trials, double probabilityOfSuccess) {
1042            if (trials < 0) {
1043                throw new IllegalArgumentException("Trials is not positive: " + trials);
1044            }
1045            InternalUtils.requireRangeClosed(0, 1, probabilityOfSuccess, "probability of success");
1046        }
1047
1048        /**
1049         * Creates the Binomial distribution sampler.
1050         *
1051         * <p>This assumes the parameters for the distribution are valid. The method
1052         * will only fail if the initial probability for {@code X=0} is zero.</p>
1053         *
1054         * @param rng Generator of uniformly distributed random numbers.
1055         * @param trials Number of trials.
1056         * @param probabilityOfSuccess Probability of success (p).
1057         * @return Sampler.
1058         * @throws IllegalArgumentException if the probability distribution cannot be
1059         * computed.
1060         */
1061        private static SharedStateDiscreteSampler createBinomialDistributionSampler(
1062                UniformRandomProvider rng, int trials, double probabilityOfSuccess) {
1063
1064            // The maximum supported value for Math.exp is approximately -744.
1065            // This occurs when trials is large and p is close to 1.
1066            // Handle this by using an inversion: generate j=Binomial(n,1-p), return n-j
1067            final boolean useInversion = probabilityOfSuccess > 0.5;
1068            final double p = useInversion ? 1 - probabilityOfSuccess : probabilityOfSuccess;
1069
1070            // Check if the distribution can be computed
1071            final double p0 = Math.exp(trials * Math.log(1 - p));
1072            if (p0 < Double.MIN_VALUE) {
1073                throw new IllegalArgumentException("Unable to compute distribution");
1074            }
1075
1076            // First find size of probability array
1077            double t = p0;
1078            final double h = p / (1 - p);
1079            // Find first probability above the threshold of 2^-31
1080            int begin = 0;
1081            if (t * DOUBLE_31 < 1) {
1082                // Somewhere after p(0)
1083                // Note:
1084                // If this loop is entered p(0) is < 2^-31.
1085                // This has been tested at the extreme for p(0)=Double.MIN_VALUE and either
1086                // p=0.5 or trials=2^16-1 and does not fail to find the beginning.
1087                for (int i = 1; i <= trials; i++) {
1088                    t *= (trials + 1 - i) * h / i;
1089                    if (t * DOUBLE_31 >= 1) {
1090                        begin = i;
1091                        break;
1092                    }
1093                }
1094            }
1095            // Find last probability
1096            int end = trials;
1097            for (int i = begin + 1; i <= trials; i++) {
1098                t *= (trials + 1 - i) * h / i;
1099                if (t * DOUBLE_31 < 1) {
1100                    end = i - 1;
1101                    break;
1102                }
1103            }
1104
1105            return createBinomialDistributionSamplerFromRange(rng, trials, p, useInversion,
1106                    p0, begin, end);
1107        }
1108
1109        /**
1110         * Creates the Binomial distribution sampler using only the probability values for {@code X}
1111         * between the begin and the end (inclusive).
1112         *
1113         * @param rng Generator of uniformly distributed random numbers.
1114         * @param trials Number of trials.
1115         * @param p Probability of success (p).
1116         * @param useInversion Set to {@code true} if the probability was inverted.
1117         * @param p0 Probability at {@code X=0}
1118         * @param begin Begin value {@code X} for the distribution.
1119         * @param end End value {@code X} for the distribution.
1120         * @return Sampler.
1121         */
1122        private static SharedStateDiscreteSampler createBinomialDistributionSamplerFromRange(
1123                UniformRandomProvider rng, int trials, double p,
1124                boolean useInversion, double p0, int begin, int end) {
1125
1126            // Assign probability values as 30-bit integers
1127            final int size = end - begin + 1;
1128            final int[] prob = new int[size];
1129            double t = p0;
1130            final double h = p / (1 - p);
1131            for (int i = 1; i <= begin; i++) {
1132                t *= (trials + 1 - i) * h / i;
1133            }
1134            int sum = toUnsignedInt30(t);
1135            prob[0] = sum;
1136            for (int i = begin + 1; i <= end; i++) {
1137                t *= (trials + 1 - i) * h / i;
1138                prob[i - begin] = toUnsignedInt30(t);
1139                sum += prob[i - begin];
1140            }
1141
1142            // If the sum is < 2^30 add the remaining sum to the mode (floor((n+1)p))).
1143            // If above 2^30 then the effect is truncation of the long tail of the distribution.
1144            final int mode = (int) ((trials + 1) * p) - begin;
1145            prob[mode] += Math.max(0, INT_30 - sum);
1146
1147            final SharedStateDiscreteSampler sampler = createSampler(rng, BINOMIAL_NAME, prob, begin);
1148
1149            // Check if an inversion was made
1150            return useInversion ?
1151                   new MarsagliaTsangWangInversionBinomialSampler(trials, sampler) :
1152                   sampler;
1153        }
1154    }
1155}