View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.apache.commons.rng.sampling.distribution;
18  
19  import org.apache.commons.rng.UniformRandomProvider;
20  import org.apache.commons.rng.sampling.distribution.LargeMeanPoissonSampler.LargeMeanPoissonSamplerState;
21  
22  /**
23   * Create a sampler for the
24   * <a href="http://mathworld.wolfram.com/PoissonDistribution.html">Poisson
25   * distribution</a> using a cache to minimise construction cost.
26   *
27   * <p>The cache will return a sampler equivalent to
28   * {@link PoissonSampler#PoissonSampler(UniformRandomProvider, double)}.</p>
29   *
30   * <p>The cache allows the {@link PoissonSampler} construction cost to be minimised
31   * for low size Poisson samples. The cache stores state for a range of integers where
32   * integer value {@code n} can be used to construct a sampler for the range
33   * {@code n <= mean < n+1}.</p>
34   *
35   * <p>The cache is advantageous under the following conditions:</p>
36   *
37   * <ul>
38   *   <li>The mean of the Poisson distribution falls within a known range.
39   *   <li>The sample size to be made with the <strong>same</strong> sampler is
40   *       small.
41   *   <li>The Poisson samples have different means with the same integer
42   *       value(s) after rounding down.
43   * </ul>
44   *
45   * <p>If the sample size to be made with the <strong>same</strong> sampler is large
46   * then the construction cost is low compared to the sampling time and the cache
47   * has minimal benefit.</p>
48   *
49   * <p>Performance improvement is dependent on the speed of the
50   * {@link UniformRandomProvider}. A fast provider can obtain a two-fold speed
51   * improvement for a single-use Poisson sampler.</p>
52   *
53   * <p>The cache is thread safe. Note that concurrent threads using the cache
54   * must ensure a thread safe {@link UniformRandomProvider} is used when creating
55   * samplers, e.g. a unique sampler per thread.</p>
56   *
57   * <p>Sampling uses:</p>
58   *
59   * <ul>
60   *   <li>{@link UniformRandomProvider#nextDouble()}
61   *   <li>{@link UniformRandomProvider#nextLong()} (large means only)
62   * </ul>
63   *
64   * @since 1.2
65   */
66  public class PoissonSamplerCache {
67  
68      /**
69       * The minimum N covered by the cache where
70       * {@code N = (int)Math.floor(mean)}.
71       */
72      private final int minN;
73      /**
74       * The maximum N covered by the cache where
75       * {@code N = (int)Math.floor(mean)}.
76       */
77      private final int maxN;
78      /** The cache of states between {@link #minN} and {@link #maxN}. */
79      private final LargeMeanPoissonSamplerState[] values;
80  
81      /**
82       * Create an instance.
83       *
84       * @param minMean The minimum mean covered by the cache.
85       * @param maxMean The maximum mean covered by the cache.
86       * @throws IllegalArgumentException if {@code maxMean < minMean}
87       */
88      public PoissonSamplerCache(double minMean,
89                                 double maxMean) {
90          this(checkMeanRange(minMean, maxMean), maxMean, false);
91      }
92  
93      /**
94       * @param minMean The minimum mean covered by the cache.
95       * @param maxMean The maximum mean covered by the cache.
96       * @param ignored Ignored value.
97       */
98      private PoissonSamplerCache(double minMean,
99                                  double maxMean,
100                                 boolean ignored) {
101         // The cache can only be used for the LargeMeanPoissonSampler.
102         if (maxMean < PoissonSampler.PIVOT) {
103             // The upper limit is too small so no cache will be used.
104             // This class will just construct new samplers.
105             minN = 0;
106             maxN = 0;
107             values = null;
108         } else {
109             // Convert the mean into integers.
110             // Note the minimum is clipped to the algorithm switch point.
111             this.minN = (int) Math.floor(Math.max(minMean, PoissonSampler.PIVOT));
112             this.maxN = (int) Math.floor(Math.min(maxMean, Integer.MAX_VALUE));
113             values = new LargeMeanPoissonSamplerState[maxN - minN + 1];
114         }
115     }
116 
117     /**
118      * @param minN   The minimum N covered by the cache where {@code N = (int)Math.floor(mean)}.
119      * @param maxN   The maximum N covered by the cache where {@code N = (int)Math.floor(mean)}.
120      * @param states The precomputed states.
121      */
122     private PoissonSamplerCache(int minN,
123                                 int maxN,
124                                 LargeMeanPoissonSamplerState[] states) {
125         this.minN = minN;
126         this.maxN = maxN;
127         // Stored directly as the states were newly created within this class.
128         this.values = states;
129     }
130 
131     /**
132      * Check the mean range.
133      *
134      * <p>This method exists to raise an exception before invocation of the
135      * private constructor; this mitigates Finalizer attacks
136      * (see SpotBugs CT_CONSTRUCTOR_THROW).
137      *
138      * @param minMean The minimum mean covered by the cache.
139      * @param maxMean The maximum mean covered by the cache.
140      * @return the minimum mean
141      * @throws IllegalArgumentException if {@code maxMean < minMean}
142      */
143     private static double checkMeanRange(double minMean, double maxMean) {
144         // Note:
145         // Although a mean of 0 is invalid for a Poisson sampler this case
146         // is handled to make the cache user friendly. Any low means will
147         // be handled by the SmallMeanPoissonSampler and not cached.
148         // For this reason it is also OK if the means are negative.
149 
150         // Allow minMean == maxMean so that the cache can be used
151         // to create samplers with distinct RNGs and the same mean.
152         if (maxMean < minMean) {
153             throw new IllegalArgumentException(
154                     "Max mean: " + maxMean + " < " + minMean);
155         }
156         return minMean;
157     }
158 
159     /**
160      * Creates a new Poisson sampler.
161      *
162      * <p>The returned sampler will function exactly the
163      * same as {@link PoissonSampler#of(UniformRandomProvider, double)}.
164      *
165      * @param rng  Generator of uniformly distributed random numbers.
166      * @param mean Mean.
167      * @return A Poisson sampler
168      * @throws IllegalArgumentException if {@code mean <= 0} or
169      * {@code mean >} {@link Integer#MAX_VALUE}.
170      * @deprecated Use {@link #createSharedStateSampler(UniformRandomProvider, double)}.
171      */
172     @Deprecated
173     public DiscreteSampler createPoissonSampler(UniformRandomProvider rng,
174                                                 double mean) {
175         return createSharedStateSampler(rng, mean);
176     }
177 
178     /**
179      * Creates a new Poisson sampler.
180      *
181      * <p>The returned sampler will function exactly the
182      * same as {@link PoissonSampler#of(UniformRandomProvider, double)}.
183      *
184      * @param rng  Generator of uniformly distributed random numbers.
185      * @param mean Mean.
186      * @return A Poisson sampler
187      * @throws IllegalArgumentException if {@code mean <= 0} or
188      * {@code mean >} {@link Integer#MAX_VALUE}.
189      * @since 1.4
190      */
191     public SharedStateDiscreteSampler createSharedStateSampler(UniformRandomProvider rng,
192                                                                double mean) {
193         // Ensure the same functionality as the PoissonSampler by
194         // using a SmallMeanPoissonSampler under the switch point.
195         if (mean < PoissonSampler.PIVOT) {
196             return SmallMeanPoissonSampler.of(rng, mean);
197         }
198         if (mean > maxN) {
199             // Outside the range of the cache.
200             // This avoids extra parameter checks and handles the case when
201             // the cache is empty or if Math.floor(mean) is not an integer.
202             return LargeMeanPoissonSampler.of(rng, mean);
203         }
204 
205         // Convert the mean into an integer.
206         final int n = (int) Math.floor(mean);
207         if (n < minN) {
208             // Outside the lower range of the cache.
209             return LargeMeanPoissonSampler.of(rng, mean);
210         }
211 
212         // Look in the cache for a state that can be reused.
213         // Note: The cache is offset by minN.
214         final int index = n - minN;
215         final LargeMeanPoissonSamplerState state = values[index];
216         if (state == null) {
217             // Create a sampler and store the state for reuse.
218             // Do not worry about thread contention
219             // as the state is effectively immutable.
220             // If recomputed and replaced it will the same.
221             final LargeMeanPoissonSampler sampler = new LargeMeanPoissonSampler(rng, mean);
222             values[index] = sampler.getState();
223             return sampler;
224         }
225         // Compute the remaining fraction of the mean
226         final double lambdaFractional = mean - n;
227         return new LargeMeanPoissonSampler(rng, state, lambdaFractional);
228     }
229 
230     /**
231      * Check if the mean is within the range where the cache can minimise the
232      * construction cost of the {@link PoissonSampler}.
233      *
234      * @param mean
235      *            the mean
236      * @return true, if within the cache range
237      */
238     public boolean withinRange(double mean) {
239         if (mean < PoissonSampler.PIVOT) {
240             // Construction is optimal
241             return true;
242         }
243         // Convert the mean into an integer.
244         final int n = (int) Math.floor(mean);
245         return n <= maxN && n >= minN;
246     }
247 
248     /**
249      * Checks if the cache covers a valid range of mean values.
250      *
251      * <p>Note that the cache is only valid for one of the Poisson sampling
252      * algorithms. In the instance that a range was requested that was too
253      * low then there is nothing to cache and this functions returns
254      * {@code false}.
255      *
256      * <p>The cache can still be used to create a {@link PoissonSampler} using
257      * {@link #createSharedStateSampler(UniformRandomProvider, double)}.
258      *
259      * <p>This method can be used to determine if the cache has a potential
260      * performance benefit.
261      *
262      * @return true, if the cache covers a range of mean values
263      */
264     public boolean isValidRange() {
265         return values != null;
266     }
267 
268     /**
269      * Gets the minimum mean covered by the cache.
270      *
271      * <p>This value is the inclusive lower bound and is equal to
272      * the lowest integer-valued mean that is covered by the cache.
273      *
274      * <p>Note that this value may not match the value passed to the constructor
275      * due to the following reasons:
276      *
277      * <ul>
278      *   <li>At small mean values a different algorithm is used for Poisson
279      *       sampling and the cache is unnecessary.
280      *   <li>The minimum is always an integer so may be below the constructor
281      *       minimum mean.
282      * </ul>
283      *
284      * <p>If {@link #isValidRange()} returns {@code true} the cache will store
285      * state to reduce construction cost of samplers in
286      * the range {@link #getMinMean()} inclusive to {@link #getMaxMean()}
287      * inclusive. Otherwise this method returns 0;
288      *
289      * @return The minimum mean covered by the cache.
290      */
291     public double getMinMean() {
292         return minN;
293     }
294 
295     /**
296      * Gets the maximum mean covered by the cache.
297      *
298      * <p>This value is the inclusive upper bound and is equal to
299      * the double value below the first integer-valued mean that is
300      * above range covered by the cache.
301      *
302      * <p>Note that this value may not match the value passed to the constructor
303      * due to the following reasons:
304      * <ul>
305      *   <li>At small mean values a different algorithm is used for Poisson
306      *       sampling and the cache is unnecessary.
307      *   <li>The maximum is always the double value below an integer so
308      *       may be above the constructor maximum mean.
309      * </ul>
310      *
311      * <p>If {@link #isValidRange()} returns {@code true} the cache will store
312      * state to reduce construction cost of samplers in
313      * the range {@link #getMinMean()} inclusive to {@link #getMaxMean()}
314      * inclusive. Otherwise this method returns 0;
315      *
316      * @return The maximum mean covered by the cache.
317      */
318     public double getMaxMean() {
319         if (isValidRange()) {
320             return Math.nextDown(maxN + 1.0);
321         }
322         return 0;
323     }
324 
325     /**
326      * Gets the minimum mean value that can be cached.
327      *
328      * <p>Any {@link PoissonSampler} created with a mean below this level will not
329      * have any state that can be cached.
330      *
331      * @return the minimum cached mean
332      */
333     public static double getMinimumCachedMean() {
334         return PoissonSampler.PIVOT;
335     }
336 
337     /**
338      * Create a new {@link PoissonSamplerCache} with the given range
339      * reusing the current cache values.
340      *
341      * <p>This will create a new object even if the range is smaller or the
342      * same as the current cache.
343      *
344      * @param minMean The minimum mean covered by the cache.
345      * @param maxMean The maximum mean covered by the cache.
346      * @throws IllegalArgumentException if {@code maxMean < minMean}
347      * @return the poisson sampler cache
348      */
349     public PoissonSamplerCache withRange(double minMean,
350                                          double maxMean) {
351         if (values == null) {
352             // Nothing to reuse
353             return new PoissonSamplerCache(minMean, maxMean);
354         }
355         checkMeanRange(minMean, maxMean);
356 
357         // The cache can only be used for the LargeMeanPoissonSampler.
358         if (maxMean < PoissonSampler.PIVOT) {
359             return new PoissonSamplerCache(0, 0);
360         }
361 
362         // Convert the mean into integers.
363         // Note the minimum is clipped to the algorithm switch point.
364         final int withMinN = (int) Math.floor(Math.max(minMean, PoissonSampler.PIVOT));
365         final int withMaxN = (int) Math.floor(maxMean);
366         final LargeMeanPoissonSamplerState[] states =
367                 new LargeMeanPoissonSamplerState[withMaxN - withMinN + 1];
368 
369         // Preserve values from the current array to the next
370         final int currentIndex;
371         final int nextIndex;
372         if (this.minN <= withMinN) {
373             // The current array starts before the new array
374             currentIndex = withMinN - this.minN;
375             nextIndex = 0;
376         } else {
377             // The new array starts before the current array
378             currentIndex = 0;
379             nextIndex = this.minN - withMinN;
380         }
381         final int length = Math.min(values.length - currentIndex, states.length - nextIndex);
382         if (length > 0) {
383             System.arraycopy(values, currentIndex, states, nextIndex, length);
384         }
385 
386         return new PoissonSamplerCache(withMinN, withMaxN, states);
387     }
388 }