001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.commons.rng.sampling.distribution;
018
019import org.apache.commons.rng.UniformRandomProvider;
020import org.apache.commons.rng.sampling.distribution.LargeMeanPoissonSampler.LargeMeanPoissonSamplerState;
021
022/**
023 * Create a sampler for the
024 * <a href="http://mathworld.wolfram.com/PoissonDistribution.html">Poisson
025 * distribution</a> using a cache to minimise construction cost.
026 *
027 * <p>The cache will return a sampler equivalent to
028 * {@link PoissonSampler#PoissonSampler(UniformRandomProvider, double)}.</p>
029 *
030 * <p>The cache allows the {@link PoissonSampler} construction cost to be minimised
031 * for low size Poisson samples. The cache stores state for a range of integers where
032 * integer value {@code n} can be used to construct a sampler for the range
033 * {@code n <= mean < n+1}.</p>
034 *
035 * <p>The cache is advantageous under the following conditions:</p>
036 *
037 * <ul>
038 *   <li>The mean of the Poisson distribution falls within a known range.
039 *   <li>The sample size to be made with the <strong>same</strong> sampler is
040 *       small.
041 *   <li>The Poisson samples have different means with the same integer
042 *       value(s) after rounding down.
043 * </ul>
044 *
045 * <p>If the sample size to be made with the <strong>same</strong> sampler is large
046 * then the construction cost is low compared to the sampling time and the cache
047 * has minimal benefit.</p>
048 *
049 * <p>Performance improvement is dependent on the speed of the
050 * {@link UniformRandomProvider}. A fast provider can obtain a two-fold speed
051 * improvement for a single-use Poisson sampler.</p>
052 *
053 * <p>The cache is thread safe. Note that concurrent threads using the cache
054 * must ensure a thread safe {@link UniformRandomProvider} is used when creating
055 * samplers, e.g. a unique sampler per thread.</p>
056 *
057 * <p>Sampling uses:</p>
058 *
059 * <ul>
060 *   <li>{@link UniformRandomProvider#nextDouble()}
061 *   <li>{@link UniformRandomProvider#nextLong()} (large means only)
062 * </ul>
063 *
064 * @since 1.2
065 */
066public class PoissonSamplerCache {
067
068    /**
069     * The minimum N covered by the cache where
070     * {@code N = (int)Math.floor(mean)}.
071     */
072    private final int minN;
073    /**
074     * The maximum N covered by the cache where
075     * {@code N = (int)Math.floor(mean)}.
076     */
077    private final int maxN;
078    /** The cache of states between {@link #minN} and {@link #maxN}. */
079    private final LargeMeanPoissonSamplerState[] values;
080
081    /**
082     * Create an instance.
083     *
084     * @param minMean The minimum mean covered by the cache.
085     * @param maxMean The maximum mean covered by the cache.
086     * @throws IllegalArgumentException if {@code maxMean < minMean}
087     */
088    public PoissonSamplerCache(double minMean,
089                               double maxMean) {
090        this(checkMeanRange(minMean, maxMean), maxMean, false);
091    }
092
093    /**
094     * @param minMean The minimum mean covered by the cache.
095     * @param maxMean The maximum mean covered by the cache.
096     * @param ignored Ignored value.
097     */
098    private PoissonSamplerCache(double minMean,
099                                double maxMean,
100                                boolean ignored) {
101        // The cache can only be used for the LargeMeanPoissonSampler.
102        if (maxMean < PoissonSampler.PIVOT) {
103            // The upper limit is too small so no cache will be used.
104            // This class will just construct new samplers.
105            minN = 0;
106            maxN = 0;
107            values = null;
108        } else {
109            // Convert the mean into integers.
110            // Note the minimum is clipped to the algorithm switch point.
111            this.minN = (int) Math.floor(Math.max(minMean, PoissonSampler.PIVOT));
112            this.maxN = (int) Math.floor(Math.min(maxMean, Integer.MAX_VALUE));
113            values = new LargeMeanPoissonSamplerState[maxN - minN + 1];
114        }
115    }
116
117    /**
118     * @param minN   The minimum N covered by the cache where {@code N = (int)Math.floor(mean)}.
119     * @param maxN   The maximum N covered by the cache where {@code N = (int)Math.floor(mean)}.
120     * @param states The precomputed states.
121     */
122    private PoissonSamplerCache(int minN,
123                                int maxN,
124                                LargeMeanPoissonSamplerState[] states) {
125        this.minN = minN;
126        this.maxN = maxN;
127        // Stored directly as the states were newly created within this class.
128        this.values = states;
129    }
130
131    /**
132     * Check the mean range.
133     *
134     * <p>This method exists to raise an exception before invocation of the
135     * private constructor; this mitigates Finalizer attacks
136     * (see SpotBugs CT_CONSTRUCTOR_THROW).
137     *
138     * @param minMean The minimum mean covered by the cache.
139     * @param maxMean The maximum mean covered by the cache.
140     * @return the minimum mean
141     * @throws IllegalArgumentException if {@code maxMean < minMean}
142     */
143    private static double checkMeanRange(double minMean, double maxMean) {
144        // Note:
145        // Although a mean of 0 is invalid for a Poisson sampler this case
146        // is handled to make the cache user friendly. Any low means will
147        // be handled by the SmallMeanPoissonSampler and not cached.
148        // For this reason it is also OK if the means are negative.
149
150        // Allow minMean == maxMean so that the cache can be used
151        // to create samplers with distinct RNGs and the same mean.
152        if (maxMean < minMean) {
153            throw new IllegalArgumentException(
154                    "Max mean: " + maxMean + " < " + minMean);
155        }
156        return minMean;
157    }
158
159    /**
160     * Creates a new Poisson sampler.
161     *
162     * <p>The returned sampler will function exactly the
163     * same as {@link PoissonSampler#of(UniformRandomProvider, double)}.
164     *
165     * @param rng  Generator of uniformly distributed random numbers.
166     * @param mean Mean.
167     * @return A Poisson sampler
168     * @throws IllegalArgumentException if {@code mean <= 0} or
169     * {@code mean >} {@link Integer#MAX_VALUE}.
170     * @deprecated Use {@link #createSharedStateSampler(UniformRandomProvider, double)}.
171     */
172    @Deprecated
173    public DiscreteSampler createPoissonSampler(UniformRandomProvider rng,
174                                                double mean) {
175        return createSharedStateSampler(rng, mean);
176    }
177
178    /**
179     * Creates a new Poisson sampler.
180     *
181     * <p>The returned sampler will function exactly the
182     * same as {@link PoissonSampler#of(UniformRandomProvider, double)}.
183     *
184     * @param rng  Generator of uniformly distributed random numbers.
185     * @param mean Mean.
186     * @return A Poisson sampler
187     * @throws IllegalArgumentException if {@code mean <= 0} or
188     * {@code mean >} {@link Integer#MAX_VALUE}.
189     * @since 1.4
190     */
191    public SharedStateDiscreteSampler createSharedStateSampler(UniformRandomProvider rng,
192                                                               double mean) {
193        // Ensure the same functionality as the PoissonSampler by
194        // using a SmallMeanPoissonSampler under the switch point.
195        if (mean < PoissonSampler.PIVOT) {
196            return SmallMeanPoissonSampler.of(rng, mean);
197        }
198        if (mean > maxN) {
199            // Outside the range of the cache.
200            // This avoids extra parameter checks and handles the case when
201            // the cache is empty or if Math.floor(mean) is not an integer.
202            return LargeMeanPoissonSampler.of(rng, mean);
203        }
204
205        // Convert the mean into an integer.
206        final int n = (int) Math.floor(mean);
207        if (n < minN) {
208            // Outside the lower range of the cache.
209            return LargeMeanPoissonSampler.of(rng, mean);
210        }
211
212        // Look in the cache for a state that can be reused.
213        // Note: The cache is offset by minN.
214        final int index = n - minN;
215        final LargeMeanPoissonSamplerState state = values[index];
216        if (state == null) {
217            // Create a sampler and store the state for reuse.
218            // Do not worry about thread contention
219            // as the state is effectively immutable.
220            // If recomputed and replaced it will the same.
221            final LargeMeanPoissonSampler sampler = new LargeMeanPoissonSampler(rng, mean);
222            values[index] = sampler.getState();
223            return sampler;
224        }
225        // Compute the remaining fraction of the mean
226        final double lambdaFractional = mean - n;
227        return new LargeMeanPoissonSampler(rng, state, lambdaFractional);
228    }
229
230    /**
231     * Check if the mean is within the range where the cache can minimise the
232     * construction cost of the {@link PoissonSampler}.
233     *
234     * @param mean
235     *            the mean
236     * @return true, if within the cache range
237     */
238    public boolean withinRange(double mean) {
239        if (mean < PoissonSampler.PIVOT) {
240            // Construction is optimal
241            return true;
242        }
243        // Convert the mean into an integer.
244        final int n = (int) Math.floor(mean);
245        return n <= maxN && n >= minN;
246    }
247
248    /**
249     * Checks if the cache covers a valid range of mean values.
250     *
251     * <p>Note that the cache is only valid for one of the Poisson sampling
252     * algorithms. In the instance that a range was requested that was too
253     * low then there is nothing to cache and this functions returns
254     * {@code false}.
255     *
256     * <p>The cache can still be used to create a {@link PoissonSampler} using
257     * {@link #createSharedStateSampler(UniformRandomProvider, double)}.
258     *
259     * <p>This method can be used to determine if the cache has a potential
260     * performance benefit.
261     *
262     * @return true, if the cache covers a range of mean values
263     */
264    public boolean isValidRange() {
265        return values != null;
266    }
267
268    /**
269     * Gets the minimum mean covered by the cache.
270     *
271     * <p>This value is the inclusive lower bound and is equal to
272     * the lowest integer-valued mean that is covered by the cache.
273     *
274     * <p>Note that this value may not match the value passed to the constructor
275     * due to the following reasons:
276     *
277     * <ul>
278     *   <li>At small mean values a different algorithm is used for Poisson
279     *       sampling and the cache is unnecessary.
280     *   <li>The minimum is always an integer so may be below the constructor
281     *       minimum mean.
282     * </ul>
283     *
284     * <p>If {@link #isValidRange()} returns {@code true} the cache will store
285     * state to reduce construction cost of samplers in
286     * the range {@link #getMinMean()} inclusive to {@link #getMaxMean()}
287     * inclusive. Otherwise this method returns 0;
288     *
289     * @return The minimum mean covered by the cache.
290     */
291    public double getMinMean() {
292        return minN;
293    }
294
295    /**
296     * Gets the maximum mean covered by the cache.
297     *
298     * <p>This value is the inclusive upper bound and is equal to
299     * the double value below the first integer-valued mean that is
300     * above range covered by the cache.
301     *
302     * <p>Note that this value may not match the value passed to the constructor
303     * due to the following reasons:
304     * <ul>
305     *   <li>At small mean values a different algorithm is used for Poisson
306     *       sampling and the cache is unnecessary.
307     *   <li>The maximum is always the double value below an integer so
308     *       may be above the constructor maximum mean.
309     * </ul>
310     *
311     * <p>If {@link #isValidRange()} returns {@code true} the cache will store
312     * state to reduce construction cost of samplers in
313     * the range {@link #getMinMean()} inclusive to {@link #getMaxMean()}
314     * inclusive. Otherwise this method returns 0;
315     *
316     * @return The maximum mean covered by the cache.
317     */
318    public double getMaxMean() {
319        if (isValidRange()) {
320            return Math.nextDown(maxN + 1.0);
321        }
322        return 0;
323    }
324
325    /**
326     * Gets the minimum mean value that can be cached.
327     *
328     * <p>Any {@link PoissonSampler} created with a mean below this level will not
329     * have any state that can be cached.
330     *
331     * @return the minimum cached mean
332     */
333    public static double getMinimumCachedMean() {
334        return PoissonSampler.PIVOT;
335    }
336
337    /**
338     * Create a new {@link PoissonSamplerCache} with the given range
339     * reusing the current cache values.
340     *
341     * <p>This will create a new object even if the range is smaller or the
342     * same as the current cache.
343     *
344     * @param minMean The minimum mean covered by the cache.
345     * @param maxMean The maximum mean covered by the cache.
346     * @throws IllegalArgumentException if {@code maxMean < minMean}
347     * @return the poisson sampler cache
348     */
349    public PoissonSamplerCache withRange(double minMean,
350                                         double maxMean) {
351        if (values == null) {
352            // Nothing to reuse
353            return new PoissonSamplerCache(minMean, maxMean);
354        }
355        checkMeanRange(minMean, maxMean);
356
357        // The cache can only be used for the LargeMeanPoissonSampler.
358        if (maxMean < PoissonSampler.PIVOT) {
359            return new PoissonSamplerCache(0, 0);
360        }
361
362        // Convert the mean into integers.
363        // Note the minimum is clipped to the algorithm switch point.
364        final int withMinN = (int) Math.floor(Math.max(minMean, PoissonSampler.PIVOT));
365        final int withMaxN = (int) Math.floor(maxMean);
366        final LargeMeanPoissonSamplerState[] states =
367                new LargeMeanPoissonSamplerState[withMaxN - withMinN + 1];
368
369        // Preserve values from the current array to the next
370        final int currentIndex;
371        final int nextIndex;
372        if (this.minN <= withMinN) {
373            // The current array starts before the new array
374            currentIndex = withMinN - this.minN;
375            nextIndex = 0;
376        } else {
377            // The new array starts before the current array
378            currentIndex = 0;
379            nextIndex = this.minN - withMinN;
380        }
381        final int length = Math.min(values.length - currentIndex, states.length - nextIndex);
382        if (length > 0) {
383            System.arraycopy(values, currentIndex, states, nextIndex, length);
384        }
385
386        return new PoissonSamplerCache(withMinN, withMaxN, states);
387    }
388}