1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17 package org.apache.commons.rng.sampling.distribution;
18
19 import org.apache.commons.rng.UniformRandomProvider;
20 import org.apache.commons.rng.sampling.distribution.LargeMeanPoissonSampler.LargeMeanPoissonSamplerState;
21
22 /**
23 * Create a sampler for the
24 * <a href="http://mathworld.wolfram.com/PoissonDistribution.html">Poisson
25 * distribution</a> using a cache to minimise construction cost.
26 *
27 * <p>The cache will return a sampler equivalent to
28 * {@link PoissonSampler#PoissonSampler(UniformRandomProvider, double)}.</p>
29 *
30 * <p>The cache allows the {@link PoissonSampler} construction cost to be minimised
31 * for low size Poisson samples. The cache stores state for a range of integers where
32 * integer value {@code n} can be used to construct a sampler for the range
33 * {@code n <= mean < n+1}.</p>
34 *
35 * <p>The cache is advantageous under the following conditions:</p>
36 *
37 * <ul>
38 * <li>The mean of the Poisson distribution falls within a known range.
39 * <li>The sample size to be made with the <strong>same</strong> sampler is
40 * small.
41 * <li>The Poisson samples have different means with the same integer
42 * value(s) after rounding down.
43 * </ul>
44 *
45 * <p>If the sample size to be made with the <strong>same</strong> sampler is large
46 * then the construction cost is low compared to the sampling time and the cache
47 * has minimal benefit.</p>
48 *
49 * <p>Performance improvement is dependent on the speed of the
50 * {@link UniformRandomProvider}. A fast provider can obtain a two-fold speed
51 * improvement for a single-use Poisson sampler.</p>
52 *
53 * <p>The cache is thread safe. Note that concurrent threads using the cache
54 * must ensure a thread safe {@link UniformRandomProvider} is used when creating
55 * samplers, e.g. a unique sampler per thread.</p>
56 *
57 * <p>Sampling uses:</p>
58 *
59 * <ul>
60 * <li>{@link UniformRandomProvider#nextDouble()}
61 * <li>{@link UniformRandomProvider#nextLong()} (large means only)
62 * </ul>
63 *
64 * @since 1.2
65 */
66 public class PoissonSamplerCache {
67
68 /**
69 * The minimum N covered by the cache where
70 * {@code N = (int)Math.floor(mean)}.
71 */
72 private final int minN;
73 /**
74 * The maximum N covered by the cache where
75 * {@code N = (int)Math.floor(mean)}.
76 */
77 private final int maxN;
78 /** The cache of states between {@link #minN} and {@link #maxN}. */
79 private final LargeMeanPoissonSamplerState[] values;
80
81 /**
82 * Create an instance.
83 *
84 * @param minMean The minimum mean covered by the cache.
85 * @param maxMean The maximum mean covered by the cache.
86 * @throws IllegalArgumentException if {@code maxMean < minMean}
87 */
88 public PoissonSamplerCache(double minMean,
89 double maxMean) {
90 this(checkMeanRange(minMean, maxMean), maxMean, false);
91 }
92
93 /**
94 * @param minMean The minimum mean covered by the cache.
95 * @param maxMean The maximum mean covered by the cache.
96 * @param ignored Ignored value.
97 */
98 private PoissonSamplerCache(double minMean,
99 double maxMean,
100 boolean ignored) {
101 // The cache can only be used for the LargeMeanPoissonSampler.
102 if (maxMean < PoissonSampler.PIVOT) {
103 // The upper limit is too small so no cache will be used.
104 // This class will just construct new samplers.
105 minN = 0;
106 maxN = 0;
107 values = null;
108 } else {
109 // Convert the mean into integers.
110 // Note the minimum is clipped to the algorithm switch point.
111 this.minN = (int) Math.floor(Math.max(minMean, PoissonSampler.PIVOT));
112 this.maxN = (int) Math.floor(Math.min(maxMean, Integer.MAX_VALUE));
113 values = new LargeMeanPoissonSamplerState[maxN - minN + 1];
114 }
115 }
116
117 /**
118 * @param minN The minimum N covered by the cache where {@code N = (int)Math.floor(mean)}.
119 * @param maxN The maximum N covered by the cache where {@code N = (int)Math.floor(mean)}.
120 * @param states The precomputed states.
121 */
122 private PoissonSamplerCache(int minN,
123 int maxN,
124 LargeMeanPoissonSamplerState[] states) {
125 this.minN = minN;
126 this.maxN = maxN;
127 // Stored directly as the states were newly created within this class.
128 this.values = states;
129 }
130
131 /**
132 * Check the mean range.
133 *
134 * <p>This method exists to raise an exception before invocation of the
135 * private constructor; this mitigates Finalizer attacks
136 * (see SpotBugs CT_CONSTRUCTOR_THROW).
137 *
138 * @param minMean The minimum mean covered by the cache.
139 * @param maxMean The maximum mean covered by the cache.
140 * @return the minimum mean
141 * @throws IllegalArgumentException if {@code maxMean < minMean}
142 */
143 private static double checkMeanRange(double minMean, double maxMean) {
144 // Note:
145 // Although a mean of 0 is invalid for a Poisson sampler this case
146 // is handled to make the cache user friendly. Any low means will
147 // be handled by the SmallMeanPoissonSampler and not cached.
148 // For this reason it is also OK if the means are negative.
149
150 // Allow minMean == maxMean so that the cache can be used
151 // to create samplers with distinct RNGs and the same mean.
152 if (maxMean < minMean) {
153 throw new IllegalArgumentException(
154 "Max mean: " + maxMean + " < " + minMean);
155 }
156 return minMean;
157 }
158
159 /**
160 * Creates a new Poisson sampler.
161 *
162 * <p>The returned sampler will function exactly the
163 * same as {@link PoissonSampler#of(UniformRandomProvider, double)}.
164 *
165 * @param rng Generator of uniformly distributed random numbers.
166 * @param mean Mean.
167 * @return A Poisson sampler
168 * @throws IllegalArgumentException if {@code mean <= 0} or
169 * {@code mean >} {@link Integer#MAX_VALUE}.
170 * @deprecated Use {@link #createSharedStateSampler(UniformRandomProvider, double)}.
171 */
172 @Deprecated
173 public DiscreteSampler createPoissonSampler(UniformRandomProvider rng,
174 double mean) {
175 return createSharedStateSampler(rng, mean);
176 }
177
178 /**
179 * Creates a new Poisson sampler.
180 *
181 * <p>The returned sampler will function exactly the
182 * same as {@link PoissonSampler#of(UniformRandomProvider, double)}.
183 *
184 * @param rng Generator of uniformly distributed random numbers.
185 * @param mean Mean.
186 * @return A Poisson sampler
187 * @throws IllegalArgumentException if {@code mean <= 0} or
188 * {@code mean >} {@link Integer#MAX_VALUE}.
189 * @since 1.4
190 */
191 public SharedStateDiscreteSampler createSharedStateSampler(UniformRandomProvider rng,
192 double mean) {
193 // Ensure the same functionality as the PoissonSampler by
194 // using a SmallMeanPoissonSampler under the switch point.
195 if (mean < PoissonSampler.PIVOT) {
196 return SmallMeanPoissonSampler.of(rng, mean);
197 }
198 if (mean > maxN) {
199 // Outside the range of the cache.
200 // This avoids extra parameter checks and handles the case when
201 // the cache is empty or if Math.floor(mean) is not an integer.
202 return LargeMeanPoissonSampler.of(rng, mean);
203 }
204
205 // Convert the mean into an integer.
206 final int n = (int) Math.floor(mean);
207 if (n < minN) {
208 // Outside the lower range of the cache.
209 return LargeMeanPoissonSampler.of(rng, mean);
210 }
211
212 // Look in the cache for a state that can be reused.
213 // Note: The cache is offset by minN.
214 final int index = n - minN;
215 final LargeMeanPoissonSamplerState state = values[index];
216 if (state == null) {
217 // Create a sampler and store the state for reuse.
218 // Do not worry about thread contention
219 // as the state is effectively immutable.
220 // If recomputed and replaced it will the same.
221 final LargeMeanPoissonSampler sampler = new LargeMeanPoissonSampler(rng, mean);
222 values[index] = sampler.getState();
223 return sampler;
224 }
225 // Compute the remaining fraction of the mean
226 final double lambdaFractional = mean - n;
227 return new LargeMeanPoissonSampler(rng, state, lambdaFractional);
228 }
229
230 /**
231 * Check if the mean is within the range where the cache can minimise the
232 * construction cost of the {@link PoissonSampler}.
233 *
234 * @param mean
235 * the mean
236 * @return true, if within the cache range
237 */
238 public boolean withinRange(double mean) {
239 if (mean < PoissonSampler.PIVOT) {
240 // Construction is optimal
241 return true;
242 }
243 // Convert the mean into an integer.
244 final int n = (int) Math.floor(mean);
245 return n <= maxN && n >= minN;
246 }
247
248 /**
249 * Checks if the cache covers a valid range of mean values.
250 *
251 * <p>Note that the cache is only valid for one of the Poisson sampling
252 * algorithms. In the instance that a range was requested that was too
253 * low then there is nothing to cache and this functions returns
254 * {@code false}.
255 *
256 * <p>The cache can still be used to create a {@link PoissonSampler} using
257 * {@link #createSharedStateSampler(UniformRandomProvider, double)}.
258 *
259 * <p>This method can be used to determine if the cache has a potential
260 * performance benefit.
261 *
262 * @return true, if the cache covers a range of mean values
263 */
264 public boolean isValidRange() {
265 return values != null;
266 }
267
268 /**
269 * Gets the minimum mean covered by the cache.
270 *
271 * <p>This value is the inclusive lower bound and is equal to
272 * the lowest integer-valued mean that is covered by the cache.
273 *
274 * <p>Note that this value may not match the value passed to the constructor
275 * due to the following reasons:
276 *
277 * <ul>
278 * <li>At small mean values a different algorithm is used for Poisson
279 * sampling and the cache is unnecessary.
280 * <li>The minimum is always an integer so may be below the constructor
281 * minimum mean.
282 * </ul>
283 *
284 * <p>If {@link #isValidRange()} returns {@code true} the cache will store
285 * state to reduce construction cost of samplers in
286 * the range {@link #getMinMean()} inclusive to {@link #getMaxMean()}
287 * inclusive. Otherwise this method returns 0;
288 *
289 * @return The minimum mean covered by the cache.
290 */
291 public double getMinMean() {
292 return minN;
293 }
294
295 /**
296 * Gets the maximum mean covered by the cache.
297 *
298 * <p>This value is the inclusive upper bound and is equal to
299 * the double value below the first integer-valued mean that is
300 * above range covered by the cache.
301 *
302 * <p>Note that this value may not match the value passed to the constructor
303 * due to the following reasons:
304 * <ul>
305 * <li>At small mean values a different algorithm is used for Poisson
306 * sampling and the cache is unnecessary.
307 * <li>The maximum is always the double value below an integer so
308 * may be above the constructor maximum mean.
309 * </ul>
310 *
311 * <p>If {@link #isValidRange()} returns {@code true} the cache will store
312 * state to reduce construction cost of samplers in
313 * the range {@link #getMinMean()} inclusive to {@link #getMaxMean()}
314 * inclusive. Otherwise this method returns 0;
315 *
316 * @return The maximum mean covered by the cache.
317 */
318 public double getMaxMean() {
319 if (isValidRange()) {
320 return Math.nextDown(maxN + 1.0);
321 }
322 return 0;
323 }
324
325 /**
326 * Gets the minimum mean value that can be cached.
327 *
328 * <p>Any {@link PoissonSampler} created with a mean below this level will not
329 * have any state that can be cached.
330 *
331 * @return the minimum cached mean
332 */
333 public static double getMinimumCachedMean() {
334 return PoissonSampler.PIVOT;
335 }
336
337 /**
338 * Create a new {@link PoissonSamplerCache} with the given range
339 * reusing the current cache values.
340 *
341 * <p>This will create a new object even if the range is smaller or the
342 * same as the current cache.
343 *
344 * @param minMean The minimum mean covered by the cache.
345 * @param maxMean The maximum mean covered by the cache.
346 * @throws IllegalArgumentException if {@code maxMean < minMean}
347 * @return the poisson sampler cache
348 */
349 public PoissonSamplerCache withRange(double minMean,
350 double maxMean) {
351 if (values == null) {
352 // Nothing to reuse
353 return new PoissonSamplerCache(minMean, maxMean);
354 }
355 checkMeanRange(minMean, maxMean);
356
357 // The cache can only be used for the LargeMeanPoissonSampler.
358 if (maxMean < PoissonSampler.PIVOT) {
359 return new PoissonSamplerCache(0, 0);
360 }
361
362 // Convert the mean into integers.
363 // Note the minimum is clipped to the algorithm switch point.
364 final int withMinN = (int) Math.floor(Math.max(minMean, PoissonSampler.PIVOT));
365 final int withMaxN = (int) Math.floor(maxMean);
366 final LargeMeanPoissonSamplerState[] states =
367 new LargeMeanPoissonSamplerState[withMaxN - withMinN + 1];
368
369 // Preserve values from the current array to the next
370 final int currentIndex;
371 final int nextIndex;
372 if (this.minN <= withMinN) {
373 // The current array starts before the new array
374 currentIndex = withMinN - this.minN;
375 nextIndex = 0;
376 } else {
377 // The new array starts before the current array
378 currentIndex = 0;
379 nextIndex = this.minN - withMinN;
380 }
381 final int length = Math.min(values.length - currentIndex, states.length - nextIndex);
382 if (length > 0) {
383 System.arraycopy(values, currentIndex, states, nextIndex, length);
384 }
385
386 return new PoissonSamplerCache(withMinN, withMaxN, states);
387 }
388 }