001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.commons.rng.sampling.distribution;
018
019import org.apache.commons.rng.UniformRandomProvider;
020import org.apache.commons.rng.sampling.distribution.LargeMeanPoissonSampler.LargeMeanPoissonSamplerState;
021
022/**
023 * Create a sampler for the
024 * <a href="http://mathworld.wolfram.com/PoissonDistribution.html">Poisson
025 * distribution</a> using a cache to minimise construction cost.
026 *
027 * <p>The cache will return a sampler equivalent to
028 * {@link PoissonSampler#PoissonSampler(UniformRandomProvider, double)}.
029 *
030 * <p>The cache allows the {@link PoissonSampler} construction cost to be minimised
031 * for low size Poisson samples. The cache stores state for a range of integers where
032 * integer value {@code n} can be used to construct a sampler for the range
033 * {@code n <= mean < n+1}.
034 *
035 * <p>The cache is advantageous under the following conditions:
036 *
037 * <ul>
038 *   <li>The mean of the Poisson distribution falls within a known range.
039 *   <li>The sample size to be made with the <strong>same</strong> sampler is
040 *       small.
041 *   <li>The Poisson samples have different means with the same integer
042 *       value(s) after rounding down.
043 * </ul>
044 *
045 * <p>If the sample size to be made with the <strong>same</strong> sampler is large
046 * then the construction cost is low compared to the sampling time and the cache
047 * has minimal benefit.
048 *
049 * <p>Performance improvement is dependent on the speed of the
050 * {@link UniformRandomProvider}. A fast provider can obtain a two-fold speed
051 * improvement for a single-use Poisson sampler.
052 *
053 * <p>The cache is thread safe. Note that concurrent threads using the cache
054 * must ensure a thread safe {@link UniformRandomProvider} is used when creating
055 * samplers, e.g. a unique sampler per thread.
056 *
057 * @since 1.2
058 */
059public class PoissonSamplerCache {
060
061    /**
062     * The minimum N covered by the cache where
063     * {@code N = (int)Math.floor(mean)}.
064     */
065    private final int minN;
066    /**
067     * The maximum N covered by the cache where
068     * {@code N = (int)Math.floor(mean)}.
069     */
070    private final int maxN;
071    /** The cache of states between {@link minN} and {@link maxN}. */
072    private final LargeMeanPoissonSamplerState[] values;
073
074    /**
075     * @param minMean The minimum mean covered by the cache.
076     * @param maxMean The maximum mean covered by the cache.
077     * @throws IllegalArgumentException if {@code maxMean < minMean}
078     */
079    public PoissonSamplerCache(double minMean,
080                               double maxMean) {
081
082        checkMeanRange(minMean, maxMean);
083
084        // The cache can only be used for the LargeMeanPoissonSampler.
085        if (maxMean < PoissonSampler.PIVOT) {
086            // The upper limit is too small so no cache will be used.
087            // This class will just construct new samplers.
088            minN = 0;
089            maxN = 0;
090            values = null;
091        } else {
092            // Convert the mean into integers.
093            // Note the minimum is clipped to the algorithm switch point.
094            this.minN = (int) Math.floor(Math.max(minMean, PoissonSampler.PIVOT));
095            this.maxN = (int) Math.floor(Math.min(maxMean, Integer.MAX_VALUE));
096            values = new LargeMeanPoissonSamplerState[maxN - minN + 1];
097        }
098    }
099
100    /**
101     * @param minN   The minimum N covered by the cache where {@code N = (int)Math.floor(mean)}.
102     * @param maxN   The maximum N covered by the cache where {@code N = (int)Math.floor(mean)}.
103     * @param states The precomputed states.
104     */
105    private PoissonSamplerCache(int minN,
106                                int maxN,
107                                LargeMeanPoissonSamplerState[] states) {
108        this.minN = minN;
109        this.maxN = maxN;
110        // Stored directly as the states were newly created within this class.
111        this.values = states;
112    }
113
114    /**
115     * Check the mean range.
116     *
117     * @param minMean The minimum mean covered by the cache.
118     * @param maxMean The maximum mean covered by the cache.
119     * @throws IllegalArgumentException if {@code maxMean < minMean}
120     */
121    private static void checkMeanRange(double minMean, double maxMean)
122    {
123        // Note:
124        // Although a mean of 0 is invalid for a Poisson sampler this case
125        // is handled to make the cache user friendly. Any low means will
126        // be handled by the SmallMeanPoissonSampler and not cached.
127        // For this reason it is also OK if the means are negative.
128
129        // Allow minMean == maxMean so that the cache can be used
130        // to create samplers with distinct RNGs and the same mean.
131        if (maxMean < minMean) {
132            throw new IllegalArgumentException(
133                    "Max mean: " + maxMean + " < " + minMean);
134        }
135    }
136
137    /**
138     * Creates a new Poisson sampler.
139     *
140     * <p>The returned sampler will function exactly the
141     * same as {@link PoissonSampler#PoissonSampler(UniformRandomProvider, double)}.
142     *
143     * @param rng  Generator of uniformly distributed random numbers.
144     * @param mean Mean.
145     * @return A Poisson sampler
146     * @throws IllegalArgumentException if {@code mean <= 0} or
147     * {@code mean >} {@link Integer#MAX_VALUE}.
148     */
149    public DiscreteSampler createPoissonSampler(UniformRandomProvider rng,
150                                                double mean) {
151        // Ensure the same functionality as the PoissonSampler by
152        // using a SmallMeanPoissonSampler under the switch point.
153        if (mean < PoissonSampler.PIVOT) {
154            return new SmallMeanPoissonSampler(rng, mean);
155        }
156        if (mean > maxN) {
157            // Outside the range of the cache.
158            // This avoids extra parameter checks and handles the case when
159            // the cache is empty or if Math.floor(mean) is not an integer.
160            return new LargeMeanPoissonSampler(rng, mean);
161        }
162
163        // Convert the mean into an integer.
164        final int n = (int) Math.floor(mean);
165        if (n < minN) {
166            // Outside the lower range of the cache.
167            return new LargeMeanPoissonSampler(rng, mean);
168        }
169
170        // Look in the cache for a state that can be reused.
171        // Note: The cache is offset by minN.
172        final int index = n - minN;
173        final LargeMeanPoissonSamplerState state = values[index];
174        if (state == null) {
175            // Create a sampler and store the state for reuse.
176            // Do not worry about thread contention
177            // as the state is effectively immutable.
178            // If recomputed and replaced it will the same.
179            final LargeMeanPoissonSampler sampler = new LargeMeanPoissonSampler(rng, mean);
180            values[index] = sampler.getState();
181            return sampler;
182        }
183        // Compute the remaining fraction of the mean
184        final double lambdaFractional = mean - n;
185        return new LargeMeanPoissonSampler(rng, state, lambdaFractional);
186    }
187
188    /**
189     * Check if the mean is within the range where the cache can minimise the
190     * construction cost of the {@link PoissonSampler}.
191     *
192     * @param mean
193     *            the mean
194     * @return true, if within the cache range
195     */
196    public boolean withinRange(double mean) {
197        if (mean < PoissonSampler.PIVOT) {
198            // Construction is optimal
199            return true;
200        }
201        // Convert the mean into an integer.
202        final int n = (int) Math.floor(mean);
203        return n <= maxN && n >= minN;
204    }
205
206    /**
207     * Checks if the cache covers a valid range of mean values.
208     *
209     * <p>Note that the cache is only valid for one of the Poisson sampling
210     * algorithms. In the instance that a range was requested that was too
211     * low then there is nothing to cache and this functions returns
212     * {@code false}.
213     *
214     * <p>The cache can still be used to create a {@link PoissonSampler} using
215     * {@link #createPoissonSampler(UniformRandomProvider, double)}.
216     *
217     * <p>This method can be used to determine if the cache has a potential
218     * performance benefit.
219     *
220     * @return true, if the cache covers a range of mean values
221     */
222    public boolean isValidRange() {
223        return values != null;
224    }
225
226    /**
227     * Gets the minimum mean covered by the cache.
228     *
229     * <p>This value is the inclusive lower bound and is equal to
230     * the lowest integer-valued mean that is covered by the cache.
231     *
232     * <p>Note that this value may not match the value passed to the constructor
233     * due to the following reasons:
234     *
235     * <ul>
236     *   <li>At small mean values a different algorithm is used for Poisson
237     *       sampling and the cache is unnecessary.
238     *   <li>The minimum is always an integer so may be below the constructor
239     *       minimum mean.
240     * </ul>
241     *
242     * <p>If {@link #isValidRange()} returns {@code true} the cache will store
243     * state to reduce construction cost of samplers in
244     * the range {@link #getMinMean()} inclusive to {@link #getMaxMean()}
245     * inclusive. Otherwise this method returns 0;
246     *
247     * @return The minimum mean covered by the cache.
248     */
249    public double getMinMean()
250    {
251        return minN;
252    }
253
254    /**
255     * Gets the maximum mean covered by the cache.
256     *
257     * <p>This value is the inclusive upper bound and is equal to
258     * the double value below the first integer-valued mean that is
259     * above range covered by the cache.
260     *
261     * <p>Note that this value may not match the value passed to the constructor
262     * due to the following reasons:
263     * <ul>
264     *   <li>At small mean values a different algorithm is used for Poisson
265     *       sampling and the cache is unnecessary.
266     *   <li>The maximum is always the double value below an integer so
267     *       may be above the constructor maximum mean.
268     * </ul>
269     *
270     * <p>If {@link #isValidRange()} returns {@code true} the cache will store
271     * state to reduce construction cost of samplers in
272     * the range {@link #getMinMean()} inclusive to {@link #getMaxMean()}
273     * inclusive. Otherwise this method returns 0;
274     *
275     * @return The maximum mean covered by the cache.
276     */
277    public double getMaxMean()
278    {
279        if (isValidRange()) {
280            return Math.nextAfter(maxN + 1.0, -1);
281        }
282        return 0;
283    }
284
285    /**
286     * Gets the minimum mean value that can be cached.
287     *
288     * <p>Any {@link PoissonSampler} created with a mean below this level will not
289     * have any state that can be cached.
290     *
291     * @return the minimum cached mean
292     */
293    public static double getMinimumCachedMean() {
294        return PoissonSampler.PIVOT;
295    }
296
297    /**
298     * Create a new {@link PoissonSamplerCache} with the given range
299     * reusing the current cache values.
300     *
301     * <p>This will create a new object even if the range is smaller or the
302     * same as the current cache.
303     *
304     * @param minMean The minimum mean covered by the cache.
305     * @param maxMean The maximum mean covered by the cache.
306     * @throws IllegalArgumentException if {@code maxMean < minMean}
307     * @return the poisson sampler cache
308     */
309    public PoissonSamplerCache withRange(double minMean,
310                                         double maxMean) {
311        if (values == null) {
312            // Nothing to reuse
313            return new PoissonSamplerCache(minMean, maxMean);
314        }
315        checkMeanRange(minMean, maxMean);
316
317        // The cache can only be used for the LargeMeanPoissonSampler.
318        if (maxMean < PoissonSampler.PIVOT) {
319            return new PoissonSamplerCache(0, 0);
320        }
321
322        // Convert the mean into integers.
323        // Note the minimum is clipped to the algorithm switch point.
324        final int withMinN = (int) Math.floor(Math.max(minMean, PoissonSampler.PIVOT));
325        final int withMaxN = (int) Math.floor(maxMean);
326        final LargeMeanPoissonSamplerState[] states =
327                new LargeMeanPoissonSamplerState[withMaxN - withMinN + 1];
328
329        // Preserve values from the current array to the next
330        int currentIndex;
331        int nextIndex;
332        if (this.minN <= withMinN) {
333            // The current array starts before the new array
334            currentIndex = withMinN - this.minN;
335            nextIndex = 0;
336        } else {
337            // The new array starts before the current array
338            currentIndex = 0;
339            nextIndex = this.minN - withMinN;
340        }
341        final int length = Math.min(values.length - currentIndex, states.length - nextIndex);
342        if (length > 0) {
343            System.arraycopy(values, currentIndex, states, nextIndex, length);
344        }
345
346        return new PoissonSamplerCache(withMinN, withMaxN, states);
347    }
348}