PoissonSamplerCache.java

  1. /*
  2.  * Licensed to the Apache Software Foundation (ASF) under one or more
  3.  * contributor license agreements.  See the NOTICE file distributed with
  4.  * this work for additional information regarding copyright ownership.
  5.  * The ASF licenses this file to You under the Apache License, Version 2.0
  6.  * (the "License"); you may not use this file except in compliance with
  7.  * the License.  You may obtain a copy of the License at
  8.  *
  9.  *      http://www.apache.org/licenses/LICENSE-2.0
  10.  *
  11.  * Unless required by applicable law or agreed to in writing, software
  12.  * distributed under the License is distributed on an "AS IS" BASIS,
  13.  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14.  * See the License for the specific language governing permissions and
  15.  * limitations under the License.
  16.  */
  17. package org.apache.commons.rng.sampling.distribution;

  18. import org.apache.commons.rng.UniformRandomProvider;
  19. import org.apache.commons.rng.sampling.distribution.LargeMeanPoissonSampler.LargeMeanPoissonSamplerState;

  20. /**
  21.  * Create a sampler for the
  22.  * <a href="http://mathworld.wolfram.com/PoissonDistribution.html">Poisson
  23.  * distribution</a> using a cache to minimise construction cost.
  24.  *
  25.  * <p>The cache will return a sampler equivalent to
  26.  * {@link PoissonSampler#PoissonSampler(UniformRandomProvider, double)}.</p>
  27.  *
  28.  * <p>The cache allows the {@link PoissonSampler} construction cost to be minimised
  29.  * for low size Poisson samples. The cache stores state for a range of integers where
  30.  * integer value {@code n} can be used to construct a sampler for the range
  31.  * {@code n <= mean < n+1}.</p>
  32.  *
  33.  * <p>The cache is advantageous under the following conditions:</p>
  34.  *
  35.  * <ul>
  36.  *   <li>The mean of the Poisson distribution falls within a known range.
  37.  *   <li>The sample size to be made with the <strong>same</strong> sampler is
  38.  *       small.
  39.  *   <li>The Poisson samples have different means with the same integer
  40.  *       value(s) after rounding down.
  41.  * </ul>
  42.  *
  43.  * <p>If the sample size to be made with the <strong>same</strong> sampler is large
  44.  * then the construction cost is low compared to the sampling time and the cache
  45.  * has minimal benefit.</p>
  46.  *
  47.  * <p>Performance improvement is dependent on the speed of the
  48.  * {@link UniformRandomProvider}. A fast provider can obtain a two-fold speed
  49.  * improvement for a single-use Poisson sampler.</p>
  50.  *
  51.  * <p>The cache is thread safe. Note that concurrent threads using the cache
  52.  * must ensure a thread safe {@link UniformRandomProvider} is used when creating
  53.  * samplers, e.g. a unique sampler per thread.</p>
  54.  *
  55.  * <p>Sampling uses:</p>
  56.  *
  57.  * <ul>
  58.  *   <li>{@link UniformRandomProvider#nextDouble()}
  59.  *   <li>{@link UniformRandomProvider#nextLong()} (large means only)
  60.  * </ul>
  61.  *
  62.  * @since 1.2
  63.  */
  64. public class PoissonSamplerCache {

  65.     /**
  66.      * The minimum N covered by the cache where
  67.      * {@code N = (int)Math.floor(mean)}.
  68.      */
  69.     private final int minN;
  70.     /**
  71.      * The maximum N covered by the cache where
  72.      * {@code N = (int)Math.floor(mean)}.
  73.      */
  74.     private final int maxN;
  75.     /** The cache of states between {@link #minN} and {@link #maxN}. */
  76.     private final LargeMeanPoissonSamplerState[] values;

  77.     /**
  78.      * Create an instance.
  79.      *
  80.      * @param minMean The minimum mean covered by the cache.
  81.      * @param maxMean The maximum mean covered by the cache.
  82.      * @throws IllegalArgumentException if {@code maxMean < minMean}
  83.      */
  84.     public PoissonSamplerCache(double minMean,
  85.                                double maxMean) {
  86.         this(checkMeanRange(minMean, maxMean), maxMean, false);
  87.     }

  88.     /**
  89.      * @param minMean The minimum mean covered by the cache.
  90.      * @param maxMean The maximum mean covered by the cache.
  91.      * @param ignored Ignored value.
  92.      */
  93.     private PoissonSamplerCache(double minMean,
  94.                                 double maxMean,
  95.                                 boolean ignored) {
  96.         // The cache can only be used for the LargeMeanPoissonSampler.
  97.         if (maxMean < PoissonSampler.PIVOT) {
  98.             // The upper limit is too small so no cache will be used.
  99.             // This class will just construct new samplers.
  100.             minN = 0;
  101.             maxN = 0;
  102.             values = null;
  103.         } else {
  104.             // Convert the mean into integers.
  105.             // Note the minimum is clipped to the algorithm switch point.
  106.             this.minN = (int) Math.floor(Math.max(minMean, PoissonSampler.PIVOT));
  107.             this.maxN = (int) Math.floor(Math.min(maxMean, Integer.MAX_VALUE));
  108.             values = new LargeMeanPoissonSamplerState[maxN - minN + 1];
  109.         }
  110.     }

  111.     /**
  112.      * @param minN   The minimum N covered by the cache where {@code N = (int)Math.floor(mean)}.
  113.      * @param maxN   The maximum N covered by the cache where {@code N = (int)Math.floor(mean)}.
  114.      * @param states The precomputed states.
  115.      */
  116.     private PoissonSamplerCache(int minN,
  117.                                 int maxN,
  118.                                 LargeMeanPoissonSamplerState[] states) {
  119.         this.minN = minN;
  120.         this.maxN = maxN;
  121.         // Stored directly as the states were newly created within this class.
  122.         this.values = states;
  123.     }

  124.     /**
  125.      * Check the mean range.
  126.      *
  127.      * <p>This method exists to raise an exception before invocation of the
  128.      * private constructor; this mitigates Finalizer attacks
  129.      * (see SpotBugs CT_CONSTRUCTOR_THROW).
  130.      *
  131.      * @param minMean The minimum mean covered by the cache.
  132.      * @param maxMean The maximum mean covered by the cache.
  133.      * @return the minimum mean
  134.      * @throws IllegalArgumentException if {@code maxMean < minMean}
  135.      */
  136.     private static double checkMeanRange(double minMean, double maxMean) {
  137.         // Note:
  138.         // Although a mean of 0 is invalid for a Poisson sampler this case
  139.         // is handled to make the cache user friendly. Any low means will
  140.         // be handled by the SmallMeanPoissonSampler and not cached.
  141.         // For this reason it is also OK if the means are negative.

  142.         // Allow minMean == maxMean so that the cache can be used
  143.         // to create samplers with distinct RNGs and the same mean.
  144.         if (maxMean < minMean) {
  145.             throw new IllegalArgumentException(
  146.                     "Max mean: " + maxMean + " < " + minMean);
  147.         }
  148.         return minMean;
  149.     }

  150.     /**
  151.      * Creates a new Poisson sampler.
  152.      *
  153.      * <p>The returned sampler will function exactly the
  154.      * same as {@link PoissonSampler#of(UniformRandomProvider, double)}.
  155.      *
  156.      * @param rng  Generator of uniformly distributed random numbers.
  157.      * @param mean Mean.
  158.      * @return A Poisson sampler
  159.      * @throws IllegalArgumentException if {@code mean <= 0} or
  160.      * {@code mean >} {@link Integer#MAX_VALUE}.
  161.      * @deprecated Use {@link #createSharedStateSampler(UniformRandomProvider, double)}.
  162.      */
  163.     @Deprecated
  164.     public DiscreteSampler createPoissonSampler(UniformRandomProvider rng,
  165.                                                 double mean) {
  166.         return createSharedStateSampler(rng, mean);
  167.     }

  168.     /**
  169.      * Creates a new Poisson sampler.
  170.      *
  171.      * <p>The returned sampler will function exactly the
  172.      * same as {@link PoissonSampler#of(UniformRandomProvider, double)}.
  173.      *
  174.      * @param rng  Generator of uniformly distributed random numbers.
  175.      * @param mean Mean.
  176.      * @return A Poisson sampler
  177.      * @throws IllegalArgumentException if {@code mean <= 0} or
  178.      * {@code mean >} {@link Integer#MAX_VALUE}.
  179.      * @since 1.4
  180.      */
  181.     public SharedStateDiscreteSampler createSharedStateSampler(UniformRandomProvider rng,
  182.                                                                double mean) {
  183.         // Ensure the same functionality as the PoissonSampler by
  184.         // using a SmallMeanPoissonSampler under the switch point.
  185.         if (mean < PoissonSampler.PIVOT) {
  186.             return SmallMeanPoissonSampler.of(rng, mean);
  187.         }
  188.         if (mean > maxN) {
  189.             // Outside the range of the cache.
  190.             // This avoids extra parameter checks and handles the case when
  191.             // the cache is empty or if Math.floor(mean) is not an integer.
  192.             return LargeMeanPoissonSampler.of(rng, mean);
  193.         }

  194.         // Convert the mean into an integer.
  195.         final int n = (int) Math.floor(mean);
  196.         if (n < minN) {
  197.             // Outside the lower range of the cache.
  198.             return LargeMeanPoissonSampler.of(rng, mean);
  199.         }

  200.         // Look in the cache for a state that can be reused.
  201.         // Note: The cache is offset by minN.
  202.         final int index = n - minN;
  203.         final LargeMeanPoissonSamplerState state = values[index];
  204.         if (state == null) {
  205.             // Create a sampler and store the state for reuse.
  206.             // Do not worry about thread contention
  207.             // as the state is effectively immutable.
  208.             // If recomputed and replaced it will the same.
  209.             final LargeMeanPoissonSampler sampler = new LargeMeanPoissonSampler(rng, mean);
  210.             values[index] = sampler.getState();
  211.             return sampler;
  212.         }
  213.         // Compute the remaining fraction of the mean
  214.         final double lambdaFractional = mean - n;
  215.         return new LargeMeanPoissonSampler(rng, state, lambdaFractional);
  216.     }

  217.     /**
  218.      * Check if the mean is within the range where the cache can minimise the
  219.      * construction cost of the {@link PoissonSampler}.
  220.      *
  221.      * @param mean
  222.      *            the mean
  223.      * @return true, if within the cache range
  224.      */
  225.     public boolean withinRange(double mean) {
  226.         if (mean < PoissonSampler.PIVOT) {
  227.             // Construction is optimal
  228.             return true;
  229.         }
  230.         // Convert the mean into an integer.
  231.         final int n = (int) Math.floor(mean);
  232.         return n <= maxN && n >= minN;
  233.     }

  234.     /**
  235.      * Checks if the cache covers a valid range of mean values.
  236.      *
  237.      * <p>Note that the cache is only valid for one of the Poisson sampling
  238.      * algorithms. In the instance that a range was requested that was too
  239.      * low then there is nothing to cache and this functions returns
  240.      * {@code false}.
  241.      *
  242.      * <p>The cache can still be used to create a {@link PoissonSampler} using
  243.      * {@link #createSharedStateSampler(UniformRandomProvider, double)}.
  244.      *
  245.      * <p>This method can be used to determine if the cache has a potential
  246.      * performance benefit.
  247.      *
  248.      * @return true, if the cache covers a range of mean values
  249.      */
  250.     public boolean isValidRange() {
  251.         return values != null;
  252.     }

  253.     /**
  254.      * Gets the minimum mean covered by the cache.
  255.      *
  256.      * <p>This value is the inclusive lower bound and is equal to
  257.      * the lowest integer-valued mean that is covered by the cache.
  258.      *
  259.      * <p>Note that this value may not match the value passed to the constructor
  260.      * due to the following reasons:
  261.      *
  262.      * <ul>
  263.      *   <li>At small mean values a different algorithm is used for Poisson
  264.      *       sampling and the cache is unnecessary.
  265.      *   <li>The minimum is always an integer so may be below the constructor
  266.      *       minimum mean.
  267.      * </ul>
  268.      *
  269.      * <p>If {@link #isValidRange()} returns {@code true} the cache will store
  270.      * state to reduce construction cost of samplers in
  271.      * the range {@link #getMinMean()} inclusive to {@link #getMaxMean()}
  272.      * inclusive. Otherwise this method returns 0;
  273.      *
  274.      * @return The minimum mean covered by the cache.
  275.      */
  276.     public double getMinMean() {
  277.         return minN;
  278.     }

  279.     /**
  280.      * Gets the maximum mean covered by the cache.
  281.      *
  282.      * <p>This value is the inclusive upper bound and is equal to
  283.      * the double value below the first integer-valued mean that is
  284.      * above range covered by the cache.
  285.      *
  286.      * <p>Note that this value may not match the value passed to the constructor
  287.      * due to the following reasons:
  288.      * <ul>
  289.      *   <li>At small mean values a different algorithm is used for Poisson
  290.      *       sampling and the cache is unnecessary.
  291.      *   <li>The maximum is always the double value below an integer so
  292.      *       may be above the constructor maximum mean.
  293.      * </ul>
  294.      *
  295.      * <p>If {@link #isValidRange()} returns {@code true} the cache will store
  296.      * state to reduce construction cost of samplers in
  297.      * the range {@link #getMinMean()} inclusive to {@link #getMaxMean()}
  298.      * inclusive. Otherwise this method returns 0;
  299.      *
  300.      * @return The maximum mean covered by the cache.
  301.      */
  302.     public double getMaxMean() {
  303.         if (isValidRange()) {
  304.             return Math.nextDown(maxN + 1.0);
  305.         }
  306.         return 0;
  307.     }

  308.     /**
  309.      * Gets the minimum mean value that can be cached.
  310.      *
  311.      * <p>Any {@link PoissonSampler} created with a mean below this level will not
  312.      * have any state that can be cached.
  313.      *
  314.      * @return the minimum cached mean
  315.      */
  316.     public static double getMinimumCachedMean() {
  317.         return PoissonSampler.PIVOT;
  318.     }

  319.     /**
  320.      * Create a new {@link PoissonSamplerCache} with the given range
  321.      * reusing the current cache values.
  322.      *
  323.      * <p>This will create a new object even if the range is smaller or the
  324.      * same as the current cache.
  325.      *
  326.      * @param minMean The minimum mean covered by the cache.
  327.      * @param maxMean The maximum mean covered by the cache.
  328.      * @throws IllegalArgumentException if {@code maxMean < minMean}
  329.      * @return the poisson sampler cache
  330.      */
  331.     public PoissonSamplerCache withRange(double minMean,
  332.                                          double maxMean) {
  333.         if (values == null) {
  334.             // Nothing to reuse
  335.             return new PoissonSamplerCache(minMean, maxMean);
  336.         }
  337.         checkMeanRange(minMean, maxMean);

  338.         // The cache can only be used for the LargeMeanPoissonSampler.
  339.         if (maxMean < PoissonSampler.PIVOT) {
  340.             return new PoissonSamplerCache(0, 0);
  341.         }

  342.         // Convert the mean into integers.
  343.         // Note the minimum is clipped to the algorithm switch point.
  344.         final int withMinN = (int) Math.floor(Math.max(minMean, PoissonSampler.PIVOT));
  345.         final int withMaxN = (int) Math.floor(maxMean);
  346.         final LargeMeanPoissonSamplerState[] states =
  347.                 new LargeMeanPoissonSamplerState[withMaxN - withMinN + 1];

  348.         // Preserve values from the current array to the next
  349.         final int currentIndex;
  350.         final int nextIndex;
  351.         if (this.minN <= withMinN) {
  352.             // The current array starts before the new array
  353.             currentIndex = withMinN - this.minN;
  354.             nextIndex = 0;
  355.         } else {
  356.             // The new array starts before the current array
  357.             currentIndex = 0;
  358.             nextIndex = this.minN - withMinN;
  359.         }
  360.         final int length = Math.min(values.length - currentIndex, states.length - nextIndex);
  361.         if (length > 0) {
  362.             System.arraycopy(values, currentIndex, states, nextIndex, length);
  363.         }

  364.         return new PoissonSamplerCache(withMinN, withMaxN, states);
  365.     }
  366. }