001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.commons.rng.sampling.distribution; 018 019import org.apache.commons.rng.UniformRandomProvider; 020import org.apache.commons.rng.sampling.distribution.LargeMeanPoissonSampler.LargeMeanPoissonSamplerState; 021 022/** 023 * Create a sampler for the 024 * <a href="http://mathworld.wolfram.com/PoissonDistribution.html">Poisson 025 * distribution</a> using a cache to minimise construction cost. 026 * 027 * <p>The cache will return a sampler equivalent to 028 * {@link PoissonSampler#PoissonSampler(UniformRandomProvider, double)}.</p> 029 * 030 * <p>The cache allows the {@link PoissonSampler} construction cost to be minimised 031 * for low size Poisson samples. The cache stores state for a range of integers where 032 * integer value {@code n} can be used to construct a sampler for the range 033 * {@code n <= mean < n+1}.</p> 034 * 035 * <p>The cache is advantageous under the following conditions:</p> 036 * 037 * <ul> 038 * <li>The mean of the Poisson distribution falls within a known range. 039 * <li>The sample size to be made with the <strong>same</strong> sampler is 040 * small. 041 * <li>The Poisson samples have different means with the same integer 042 * value(s) after rounding down. 043 * </ul> 044 * 045 * <p>If the sample size to be made with the <strong>same</strong> sampler is large 046 * then the construction cost is low compared to the sampling time and the cache 047 * has minimal benefit.</p> 048 * 049 * <p>Performance improvement is dependent on the speed of the 050 * {@link UniformRandomProvider}. A fast provider can obtain a two-fold speed 051 * improvement for a single-use Poisson sampler.</p> 052 * 053 * <p>The cache is thread safe. Note that concurrent threads using the cache 054 * must ensure a thread safe {@link UniformRandomProvider} is used when creating 055 * samplers, e.g. a unique sampler per thread.</p> 056 * 057 * <p>Sampling uses:</p> 058 * 059 * <ul> 060 * <li>{@link UniformRandomProvider#nextDouble()} 061 * <li>{@link UniformRandomProvider#nextLong()} (large means only) 062 * </ul> 063 * 064 * @since 1.2 065 */ 066public class PoissonSamplerCache { 067 068 /** 069 * The minimum N covered by the cache where 070 * {@code N = (int)Math.floor(mean)}. 071 */ 072 private final int minN; 073 /** 074 * The maximum N covered by the cache where 075 * {@code N = (int)Math.floor(mean)}. 076 */ 077 private final int maxN; 078 /** The cache of states between {@link minN} and {@link maxN}. */ 079 private final LargeMeanPoissonSamplerState[] values; 080 081 /** 082 * @param minMean The minimum mean covered by the cache. 083 * @param maxMean The maximum mean covered by the cache. 084 * @throws IllegalArgumentException if {@code maxMean < minMean} 085 */ 086 public PoissonSamplerCache(double minMean, 087 double maxMean) { 088 089 checkMeanRange(minMean, maxMean); 090 091 // The cache can only be used for the LargeMeanPoissonSampler. 092 if (maxMean < PoissonSampler.PIVOT) { 093 // The upper limit is too small so no cache will be used. 094 // This class will just construct new samplers. 095 minN = 0; 096 maxN = 0; 097 values = null; 098 } else { 099 // Convert the mean into integers. 100 // Note the minimum is clipped to the algorithm switch point. 101 this.minN = (int) Math.floor(Math.max(minMean, PoissonSampler.PIVOT)); 102 this.maxN = (int) Math.floor(Math.min(maxMean, Integer.MAX_VALUE)); 103 values = new LargeMeanPoissonSamplerState[maxN - minN + 1]; 104 } 105 } 106 107 /** 108 * @param minN The minimum N covered by the cache where {@code N = (int)Math.floor(mean)}. 109 * @param maxN The maximum N covered by the cache where {@code N = (int)Math.floor(mean)}. 110 * @param states The precomputed states. 111 */ 112 private PoissonSamplerCache(int minN, 113 int maxN, 114 LargeMeanPoissonSamplerState[] states) { 115 this.minN = minN; 116 this.maxN = maxN; 117 // Stored directly as the states were newly created within this class. 118 this.values = states; 119 } 120 121 /** 122 * Check the mean range. 123 * 124 * @param minMean The minimum mean covered by the cache. 125 * @param maxMean The maximum mean covered by the cache. 126 * @throws IllegalArgumentException if {@code maxMean < minMean} 127 */ 128 private static void checkMeanRange(double minMean, double maxMean) { 129 // Note: 130 // Although a mean of 0 is invalid for a Poisson sampler this case 131 // is handled to make the cache user friendly. Any low means will 132 // be handled by the SmallMeanPoissonSampler and not cached. 133 // For this reason it is also OK if the means are negative. 134 135 // Allow minMean == maxMean so that the cache can be used 136 // to create samplers with distinct RNGs and the same mean. 137 if (maxMean < minMean) { 138 throw new IllegalArgumentException( 139 "Max mean: " + maxMean + " < " + minMean); 140 } 141 } 142 143 /** 144 * Creates a new Poisson sampler. 145 * 146 * <p>The returned sampler will function exactly the 147 * same as {@link PoissonSampler#PoissonSampler(UniformRandomProvider, double)}. 148 * 149 * @param rng Generator of uniformly distributed random numbers. 150 * @param mean Mean. 151 * @return A Poisson sampler 152 * @throws IllegalArgumentException if {@code mean <= 0} or 153 * {@code mean >} {@link Integer#MAX_VALUE}. 154 */ 155 public DiscreteSampler createPoissonSampler(UniformRandomProvider rng, 156 double mean) { 157 // Ensure the same functionality as the PoissonSampler by 158 // using a SmallMeanPoissonSampler under the switch point. 159 if (mean < PoissonSampler.PIVOT) { 160 return SmallMeanPoissonSampler.of(rng, mean); 161 } 162 if (mean > maxN) { 163 // Outside the range of the cache. 164 // This avoids extra parameter checks and handles the case when 165 // the cache is empty or if Math.floor(mean) is not an integer. 166 return LargeMeanPoissonSampler.of(rng, mean); 167 } 168 169 // Convert the mean into an integer. 170 final int n = (int) Math.floor(mean); 171 if (n < minN) { 172 // Outside the lower range of the cache. 173 return LargeMeanPoissonSampler.of(rng, mean); 174 } 175 176 // Look in the cache for a state that can be reused. 177 // Note: The cache is offset by minN. 178 final int index = n - minN; 179 final LargeMeanPoissonSamplerState state = values[index]; 180 if (state == null) { 181 // Create a sampler and store the state for reuse. 182 // Do not worry about thread contention 183 // as the state is effectively immutable. 184 // If recomputed and replaced it will the same. 185 final LargeMeanPoissonSampler sampler = new LargeMeanPoissonSampler(rng, mean); 186 values[index] = sampler.getState(); 187 return sampler; 188 } 189 // Compute the remaining fraction of the mean 190 final double lambdaFractional = mean - n; 191 return new LargeMeanPoissonSampler(rng, state, lambdaFractional); 192 } 193 194 /** 195 * Check if the mean is within the range where the cache can minimise the 196 * construction cost of the {@link PoissonSampler}. 197 * 198 * @param mean 199 * the mean 200 * @return true, if within the cache range 201 */ 202 public boolean withinRange(double mean) { 203 if (mean < PoissonSampler.PIVOT) { 204 // Construction is optimal 205 return true; 206 } 207 // Convert the mean into an integer. 208 final int n = (int) Math.floor(mean); 209 return n <= maxN && n >= minN; 210 } 211 212 /** 213 * Checks if the cache covers a valid range of mean values. 214 * 215 * <p>Note that the cache is only valid for one of the Poisson sampling 216 * algorithms. In the instance that a range was requested that was too 217 * low then there is nothing to cache and this functions returns 218 * {@code false}. 219 * 220 * <p>The cache can still be used to create a {@link PoissonSampler} using 221 * {@link #createPoissonSampler(UniformRandomProvider, double)}. 222 * 223 * <p>This method can be used to determine if the cache has a potential 224 * performance benefit. 225 * 226 * @return true, if the cache covers a range of mean values 227 */ 228 public boolean isValidRange() { 229 return values != null; 230 } 231 232 /** 233 * Gets the minimum mean covered by the cache. 234 * 235 * <p>This value is the inclusive lower bound and is equal to 236 * the lowest integer-valued mean that is covered by the cache. 237 * 238 * <p>Note that this value may not match the value passed to the constructor 239 * due to the following reasons: 240 * 241 * <ul> 242 * <li>At small mean values a different algorithm is used for Poisson 243 * sampling and the cache is unnecessary. 244 * <li>The minimum is always an integer so may be below the constructor 245 * minimum mean. 246 * </ul> 247 * 248 * <p>If {@link #isValidRange()} returns {@code true} the cache will store 249 * state to reduce construction cost of samplers in 250 * the range {@link #getMinMean()} inclusive to {@link #getMaxMean()} 251 * inclusive. Otherwise this method returns 0; 252 * 253 * @return The minimum mean covered by the cache. 254 */ 255 public double getMinMean() { 256 return minN; 257 } 258 259 /** 260 * Gets the maximum mean covered by the cache. 261 * 262 * <p>This value is the inclusive upper bound and is equal to 263 * the double value below the first integer-valued mean that is 264 * above range covered by the cache. 265 * 266 * <p>Note that this value may not match the value passed to the constructor 267 * due to the following reasons: 268 * <ul> 269 * <li>At small mean values a different algorithm is used for Poisson 270 * sampling and the cache is unnecessary. 271 * <li>The maximum is always the double value below an integer so 272 * may be above the constructor maximum mean. 273 * </ul> 274 * 275 * <p>If {@link #isValidRange()} returns {@code true} the cache will store 276 * state to reduce construction cost of samplers in 277 * the range {@link #getMinMean()} inclusive to {@link #getMaxMean()} 278 * inclusive. Otherwise this method returns 0; 279 * 280 * @return The maximum mean covered by the cache. 281 */ 282 public double getMaxMean() { 283 if (isValidRange()) { 284 return Math.nextAfter(maxN + 1.0, -1); 285 } 286 return 0; 287 } 288 289 /** 290 * Gets the minimum mean value that can be cached. 291 * 292 * <p>Any {@link PoissonSampler} created with a mean below this level will not 293 * have any state that can be cached. 294 * 295 * @return the minimum cached mean 296 */ 297 public static double getMinimumCachedMean() { 298 return PoissonSampler.PIVOT; 299 } 300 301 /** 302 * Create a new {@link PoissonSamplerCache} with the given range 303 * reusing the current cache values. 304 * 305 * <p>This will create a new object even if the range is smaller or the 306 * same as the current cache. 307 * 308 * @param minMean The minimum mean covered by the cache. 309 * @param maxMean The maximum mean covered by the cache. 310 * @throws IllegalArgumentException if {@code maxMean < minMean} 311 * @return the poisson sampler cache 312 */ 313 public PoissonSamplerCache withRange(double minMean, 314 double maxMean) { 315 if (values == null) { 316 // Nothing to reuse 317 return new PoissonSamplerCache(minMean, maxMean); 318 } 319 checkMeanRange(minMean, maxMean); 320 321 // The cache can only be used for the LargeMeanPoissonSampler. 322 if (maxMean < PoissonSampler.PIVOT) { 323 return new PoissonSamplerCache(0, 0); 324 } 325 326 // Convert the mean into integers. 327 // Note the minimum is clipped to the algorithm switch point. 328 final int withMinN = (int) Math.floor(Math.max(minMean, PoissonSampler.PIVOT)); 329 final int withMaxN = (int) Math.floor(maxMean); 330 final LargeMeanPoissonSamplerState[] states = 331 new LargeMeanPoissonSamplerState[withMaxN - withMinN + 1]; 332 333 // Preserve values from the current array to the next 334 int currentIndex; 335 int nextIndex; 336 if (this.minN <= withMinN) { 337 // The current array starts before the new array 338 currentIndex = withMinN - this.minN; 339 nextIndex = 0; 340 } else { 341 // The new array starts before the current array 342 currentIndex = 0; 343 nextIndex = this.minN - withMinN; 344 } 345 final int length = Math.min(values.length - currentIndex, states.length - nextIndex); 346 if (length > 0) { 347 System.arraycopy(values, currentIndex, states, nextIndex, length); 348 } 349 350 return new PoissonSamplerCache(withMinN, withMaxN, states); 351 } 352}