001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.commons.rng.sampling.distribution;
018
019import org.apache.commons.rng.UniformRandomProvider;
020import org.apache.commons.rng.sampling.distribution.LargeMeanPoissonSampler.LargeMeanPoissonSamplerState;
021
022/**
023 * Create a sampler for the
024 * <a href="http://mathworld.wolfram.com/PoissonDistribution.html">Poisson
025 * distribution</a> using a cache to minimise construction cost.
026 *
027 * <p>The cache will return a sampler equivalent to
028 * {@link PoissonSampler#PoissonSampler(UniformRandomProvider, double)}.</p>
029 *
030 * <p>The cache allows the {@link PoissonSampler} construction cost to be minimised
031 * for low size Poisson samples. The cache stores state for a range of integers where
032 * integer value {@code n} can be used to construct a sampler for the range
033 * {@code n <= mean < n+1}.</p>
034 *
035 * <p>The cache is advantageous under the following conditions:</p>
036 *
037 * <ul>
038 *   <li>The mean of the Poisson distribution falls within a known range.
039 *   <li>The sample size to be made with the <strong>same</strong> sampler is
040 *       small.
041 *   <li>The Poisson samples have different means with the same integer
042 *       value(s) after rounding down.
043 * </ul>
044 *
045 * <p>If the sample size to be made with the <strong>same</strong> sampler is large
046 * then the construction cost is low compared to the sampling time and the cache
047 * has minimal benefit.</p>
048 *
049 * <p>Performance improvement is dependent on the speed of the
050 * {@link UniformRandomProvider}. A fast provider can obtain a two-fold speed
051 * improvement for a single-use Poisson sampler.</p>
052 *
053 * <p>The cache is thread safe. Note that concurrent threads using the cache
054 * must ensure a thread safe {@link UniformRandomProvider} is used when creating
055 * samplers, e.g. a unique sampler per thread.</p>
056 *
057 * <p>Sampling uses:</p>
058 *
059 * <ul>
060 *   <li>{@link UniformRandomProvider#nextDouble()}
061 *   <li>{@link UniformRandomProvider#nextLong()} (large means only)
062 * </ul>
063 *
064 * @since 1.2
065 */
066public class PoissonSamplerCache {
067
068    /**
069     * The minimum N covered by the cache where
070     * {@code N = (int)Math.floor(mean)}.
071     */
072    private final int minN;
073    /**
074     * The maximum N covered by the cache where
075     * {@code N = (int)Math.floor(mean)}.
076     */
077    private final int maxN;
078    /** The cache of states between {@link #minN} and {@link #maxN}. */
079    private final LargeMeanPoissonSamplerState[] values;
080
081    /**
082     * @param minMean The minimum mean covered by the cache.
083     * @param maxMean The maximum mean covered by the cache.
084     * @throws IllegalArgumentException if {@code maxMean < minMean}
085     */
086    public PoissonSamplerCache(double minMean,
087                               double maxMean) {
088
089        checkMeanRange(minMean, maxMean);
090
091        // The cache can only be used for the LargeMeanPoissonSampler.
092        if (maxMean < PoissonSampler.PIVOT) {
093            // The upper limit is too small so no cache will be used.
094            // This class will just construct new samplers.
095            minN = 0;
096            maxN = 0;
097            values = null;
098        } else {
099            // Convert the mean into integers.
100            // Note the minimum is clipped to the algorithm switch point.
101            this.minN = (int) Math.floor(Math.max(minMean, PoissonSampler.PIVOT));
102            this.maxN = (int) Math.floor(Math.min(maxMean, Integer.MAX_VALUE));
103            values = new LargeMeanPoissonSamplerState[maxN - minN + 1];
104        }
105    }
106
107    /**
108     * @param minN   The minimum N covered by the cache where {@code N = (int)Math.floor(mean)}.
109     * @param maxN   The maximum N covered by the cache where {@code N = (int)Math.floor(mean)}.
110     * @param states The precomputed states.
111     */
112    private PoissonSamplerCache(int minN,
113                                int maxN,
114                                LargeMeanPoissonSamplerState[] states) {
115        this.minN = minN;
116        this.maxN = maxN;
117        // Stored directly as the states were newly created within this class.
118        this.values = states;
119    }
120
121    /**
122     * Check the mean range.
123     *
124     * @param minMean The minimum mean covered by the cache.
125     * @param maxMean The maximum mean covered by the cache.
126     * @throws IllegalArgumentException if {@code maxMean < minMean}
127     */
128    private static void checkMeanRange(double minMean, double maxMean) {
129        // Note:
130        // Although a mean of 0 is invalid for a Poisson sampler this case
131        // is handled to make the cache user friendly. Any low means will
132        // be handled by the SmallMeanPoissonSampler and not cached.
133        // For this reason it is also OK if the means are negative.
134
135        // Allow minMean == maxMean so that the cache can be used
136        // to create samplers with distinct RNGs and the same mean.
137        if (maxMean < minMean) {
138            throw new IllegalArgumentException(
139                    "Max mean: " + maxMean + " < " + minMean);
140        }
141    }
142
143    /**
144     * Creates a new Poisson sampler.
145     *
146     * <p>The returned sampler will function exactly the
147     * same as {@link PoissonSampler#of(UniformRandomProvider, double)}.
148     *
149     * @param rng  Generator of uniformly distributed random numbers.
150     * @param mean Mean.
151     * @return A Poisson sampler
152     * @throws IllegalArgumentException if {@code mean <= 0} or
153     * {@code mean >} {@link Integer#MAX_VALUE}.
154     * @deprecated Use {@link #createSharedStateSampler(UniformRandomProvider, double)}.
155     */
156    @Deprecated
157    public DiscreteSampler createPoissonSampler(UniformRandomProvider rng,
158                                                double mean) {
159        return createSharedStateSampler(rng, mean);
160    }
161
162    /**
163     * Creates a new Poisson sampler.
164     *
165     * <p>The returned sampler will function exactly the
166     * same as {@link PoissonSampler#of(UniformRandomProvider, double)}.
167     *
168     * @param rng  Generator of uniformly distributed random numbers.
169     * @param mean Mean.
170     * @return A Poisson sampler
171     * @throws IllegalArgumentException if {@code mean <= 0} or
172     * {@code mean >} {@link Integer#MAX_VALUE}.
173     * @since 1.4
174     */
175    public SharedStateDiscreteSampler createSharedStateSampler(UniformRandomProvider rng,
176                                                               double mean) {
177        // Ensure the same functionality as the PoissonSampler by
178        // using a SmallMeanPoissonSampler under the switch point.
179        if (mean < PoissonSampler.PIVOT) {
180            return SmallMeanPoissonSampler.of(rng, mean);
181        }
182        if (mean > maxN) {
183            // Outside the range of the cache.
184            // This avoids extra parameter checks and handles the case when
185            // the cache is empty or if Math.floor(mean) is not an integer.
186            return LargeMeanPoissonSampler.of(rng, mean);
187        }
188
189        // Convert the mean into an integer.
190        final int n = (int) Math.floor(mean);
191        if (n < minN) {
192            // Outside the lower range of the cache.
193            return LargeMeanPoissonSampler.of(rng, mean);
194        }
195
196        // Look in the cache for a state that can be reused.
197        // Note: The cache is offset by minN.
198        final int index = n - minN;
199        final LargeMeanPoissonSamplerState state = values[index];
200        if (state == null) {
201            // Create a sampler and store the state for reuse.
202            // Do not worry about thread contention
203            // as the state is effectively immutable.
204            // If recomputed and replaced it will the same.
205            final LargeMeanPoissonSampler sampler = new LargeMeanPoissonSampler(rng, mean);
206            values[index] = sampler.getState();
207            return sampler;
208        }
209        // Compute the remaining fraction of the mean
210        final double lambdaFractional = mean - n;
211        return new LargeMeanPoissonSampler(rng, state, lambdaFractional);
212    }
213
214    /**
215     * Check if the mean is within the range where the cache can minimise the
216     * construction cost of the {@link PoissonSampler}.
217     *
218     * @param mean
219     *            the mean
220     * @return true, if within the cache range
221     */
222    public boolean withinRange(double mean) {
223        if (mean < PoissonSampler.PIVOT) {
224            // Construction is optimal
225            return true;
226        }
227        // Convert the mean into an integer.
228        final int n = (int) Math.floor(mean);
229        return n <= maxN && n >= minN;
230    }
231
232    /**
233     * Checks if the cache covers a valid range of mean values.
234     *
235     * <p>Note that the cache is only valid for one of the Poisson sampling
236     * algorithms. In the instance that a range was requested that was too
237     * low then there is nothing to cache and this functions returns
238     * {@code false}.
239     *
240     * <p>The cache can still be used to create a {@link PoissonSampler} using
241     * {@link #createSharedStateSampler(UniformRandomProvider, double)}.
242     *
243     * <p>This method can be used to determine if the cache has a potential
244     * performance benefit.
245     *
246     * @return true, if the cache covers a range of mean values
247     */
248    public boolean isValidRange() {
249        return values != null;
250    }
251
252    /**
253     * Gets the minimum mean covered by the cache.
254     *
255     * <p>This value is the inclusive lower bound and is equal to
256     * the lowest integer-valued mean that is covered by the cache.
257     *
258     * <p>Note that this value may not match the value passed to the constructor
259     * due to the following reasons:
260     *
261     * <ul>
262     *   <li>At small mean values a different algorithm is used for Poisson
263     *       sampling and the cache is unnecessary.
264     *   <li>The minimum is always an integer so may be below the constructor
265     *       minimum mean.
266     * </ul>
267     *
268     * <p>If {@link #isValidRange()} returns {@code true} the cache will store
269     * state to reduce construction cost of samplers in
270     * the range {@link #getMinMean()} inclusive to {@link #getMaxMean()}
271     * inclusive. Otherwise this method returns 0;
272     *
273     * @return The minimum mean covered by the cache.
274     */
275    public double getMinMean() {
276        return minN;
277    }
278
279    /**
280     * Gets the maximum mean covered by the cache.
281     *
282     * <p>This value is the inclusive upper bound and is equal to
283     * the double value below the first integer-valued mean that is
284     * above range covered by the cache.
285     *
286     * <p>Note that this value may not match the value passed to the constructor
287     * due to the following reasons:
288     * <ul>
289     *   <li>At small mean values a different algorithm is used for Poisson
290     *       sampling and the cache is unnecessary.
291     *   <li>The maximum is always the double value below an integer so
292     *       may be above the constructor maximum mean.
293     * </ul>
294     *
295     * <p>If {@link #isValidRange()} returns {@code true} the cache will store
296     * state to reduce construction cost of samplers in
297     * the range {@link #getMinMean()} inclusive to {@link #getMaxMean()}
298     * inclusive. Otherwise this method returns 0;
299     *
300     * @return The maximum mean covered by the cache.
301     */
302    public double getMaxMean() {
303        if (isValidRange()) {
304            return Math.nextDown(maxN + 1.0);
305        }
306        return 0;
307    }
308
309    /**
310     * Gets the minimum mean value that can be cached.
311     *
312     * <p>Any {@link PoissonSampler} created with a mean below this level will not
313     * have any state that can be cached.
314     *
315     * @return the minimum cached mean
316     */
317    public static double getMinimumCachedMean() {
318        return PoissonSampler.PIVOT;
319    }
320
321    /**
322     * Create a new {@link PoissonSamplerCache} with the given range
323     * reusing the current cache values.
324     *
325     * <p>This will create a new object even if the range is smaller or the
326     * same as the current cache.
327     *
328     * @param minMean The minimum mean covered by the cache.
329     * @param maxMean The maximum mean covered by the cache.
330     * @throws IllegalArgumentException if {@code maxMean < minMean}
331     * @return the poisson sampler cache
332     */
333    public PoissonSamplerCache withRange(double minMean,
334                                         double maxMean) {
335        if (values == null) {
336            // Nothing to reuse
337            return new PoissonSamplerCache(minMean, maxMean);
338        }
339        checkMeanRange(minMean, maxMean);
340
341        // The cache can only be used for the LargeMeanPoissonSampler.
342        if (maxMean < PoissonSampler.PIVOT) {
343            return new PoissonSamplerCache(0, 0);
344        }
345
346        // Convert the mean into integers.
347        // Note the minimum is clipped to the algorithm switch point.
348        final int withMinN = (int) Math.floor(Math.max(minMean, PoissonSampler.PIVOT));
349        final int withMaxN = (int) Math.floor(maxMean);
350        final LargeMeanPoissonSamplerState[] states =
351                new LargeMeanPoissonSamplerState[withMaxN - withMinN + 1];
352
353        // Preserve values from the current array to the next
354        int currentIndex;
355        int nextIndex;
356        if (this.minN <= withMinN) {
357            // The current array starts before the new array
358            currentIndex = withMinN - this.minN;
359            nextIndex = 0;
360        } else {
361            // The new array starts before the current array
362            currentIndex = 0;
363            nextIndex = this.minN - withMinN;
364        }
365        final int length = Math.min(values.length - currentIndex, states.length - nextIndex);
366        if (length > 0) {
367            System.arraycopy(values, currentIndex, states, nextIndex, length);
368        }
369
370        return new PoissonSamplerCache(withMinN, withMaxN, states);
371    }
372}