001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.commons.rng.sampling.distribution; 018 019import org.apache.commons.rng.UniformRandomProvider; 020import org.apache.commons.rng.sampling.distribution.LargeMeanPoissonSampler.LargeMeanPoissonSamplerState; 021 022/** 023 * Create a sampler for the 024 * <a href="http://mathworld.wolfram.com/PoissonDistribution.html">Poisson 025 * distribution</a> using a cache to minimise construction cost. 026 * 027 * <p>The cache will return a sampler equivalent to 028 * {@link PoissonSampler#PoissonSampler(UniformRandomProvider, double)}.</p> 029 * 030 * <p>The cache allows the {@link PoissonSampler} construction cost to be minimised 031 * for low size Poisson samples. The cache stores state for a range of integers where 032 * integer value {@code n} can be used to construct a sampler for the range 033 * {@code n <= mean < n+1}.</p> 034 * 035 * <p>The cache is advantageous under the following conditions:</p> 036 * 037 * <ul> 038 * <li>The mean of the Poisson distribution falls within a known range. 039 * <li>The sample size to be made with the <strong>same</strong> sampler is 040 * small. 041 * <li>The Poisson samples have different means with the same integer 042 * value(s) after rounding down. 043 * </ul> 044 * 045 * <p>If the sample size to be made with the <strong>same</strong> sampler is large 046 * then the construction cost is low compared to the sampling time and the cache 047 * has minimal benefit.</p> 048 * 049 * <p>Performance improvement is dependent on the speed of the 050 * {@link UniformRandomProvider}. A fast provider can obtain a two-fold speed 051 * improvement for a single-use Poisson sampler.</p> 052 * 053 * <p>The cache is thread safe. Note that concurrent threads using the cache 054 * must ensure a thread safe {@link UniformRandomProvider} is used when creating 055 * samplers, e.g. a unique sampler per thread.</p> 056 * 057 * <p>Sampling uses:</p> 058 * 059 * <ul> 060 * <li>{@link UniformRandomProvider#nextDouble()} 061 * <li>{@link UniformRandomProvider#nextLong()} (large means only) 062 * </ul> 063 * 064 * @since 1.2 065 */ 066public class PoissonSamplerCache { 067 068 /** 069 * The minimum N covered by the cache where 070 * {@code N = (int)Math.floor(mean)}. 071 */ 072 private final int minN; 073 /** 074 * The maximum N covered by the cache where 075 * {@code N = (int)Math.floor(mean)}. 076 */ 077 private final int maxN; 078 /** The cache of states between {@link #minN} and {@link #maxN}. */ 079 private final LargeMeanPoissonSamplerState[] values; 080 081 /** 082 * Create an instance. 083 * 084 * @param minMean The minimum mean covered by the cache. 085 * @param maxMean The maximum mean covered by the cache. 086 * @throws IllegalArgumentException if {@code maxMean < minMean} 087 */ 088 public PoissonSamplerCache(double minMean, 089 double maxMean) { 090 this(checkMeanRange(minMean, maxMean), maxMean, false); 091 } 092 093 /** 094 * @param minMean The minimum mean covered by the cache. 095 * @param maxMean The maximum mean covered by the cache. 096 * @param ignored Ignored value. 097 */ 098 private PoissonSamplerCache(double minMean, 099 double maxMean, 100 boolean ignored) { 101 // The cache can only be used for the LargeMeanPoissonSampler. 102 if (maxMean < PoissonSampler.PIVOT) { 103 // The upper limit is too small so no cache will be used. 104 // This class will just construct new samplers. 105 minN = 0; 106 maxN = 0; 107 values = null; 108 } else { 109 // Convert the mean into integers. 110 // Note the minimum is clipped to the algorithm switch point. 111 this.minN = (int) Math.floor(Math.max(minMean, PoissonSampler.PIVOT)); 112 this.maxN = (int) Math.floor(Math.min(maxMean, Integer.MAX_VALUE)); 113 values = new LargeMeanPoissonSamplerState[maxN - minN + 1]; 114 } 115 } 116 117 /** 118 * @param minN The minimum N covered by the cache where {@code N = (int)Math.floor(mean)}. 119 * @param maxN The maximum N covered by the cache where {@code N = (int)Math.floor(mean)}. 120 * @param states The precomputed states. 121 */ 122 private PoissonSamplerCache(int minN, 123 int maxN, 124 LargeMeanPoissonSamplerState[] states) { 125 this.minN = minN; 126 this.maxN = maxN; 127 // Stored directly as the states were newly created within this class. 128 this.values = states; 129 } 130 131 /** 132 * Check the mean range. 133 * 134 * <p>This method exists to raise an exception before invocation of the 135 * private constructor; this mitigates Finalizer attacks 136 * (see SpotBugs CT_CONSTRUCTOR_THROW). 137 * 138 * @param minMean The minimum mean covered by the cache. 139 * @param maxMean The maximum mean covered by the cache. 140 * @return the minimum mean 141 * @throws IllegalArgumentException if {@code maxMean < minMean} 142 */ 143 private static double checkMeanRange(double minMean, double maxMean) { 144 // Note: 145 // Although a mean of 0 is invalid for a Poisson sampler this case 146 // is handled to make the cache user friendly. Any low means will 147 // be handled by the SmallMeanPoissonSampler and not cached. 148 // For this reason it is also OK if the means are negative. 149 150 // Allow minMean == maxMean so that the cache can be used 151 // to create samplers with distinct RNGs and the same mean. 152 if (maxMean < minMean) { 153 throw new IllegalArgumentException( 154 "Max mean: " + maxMean + " < " + minMean); 155 } 156 return minMean; 157 } 158 159 /** 160 * Creates a new Poisson sampler. 161 * 162 * <p>The returned sampler will function exactly the 163 * same as {@link PoissonSampler#of(UniformRandomProvider, double)}. 164 * 165 * @param rng Generator of uniformly distributed random numbers. 166 * @param mean Mean. 167 * @return A Poisson sampler 168 * @throws IllegalArgumentException if {@code mean <= 0} or 169 * {@code mean >} {@link Integer#MAX_VALUE}. 170 * @deprecated Use {@link #createSharedStateSampler(UniformRandomProvider, double)}. 171 */ 172 @Deprecated 173 public DiscreteSampler createPoissonSampler(UniformRandomProvider rng, 174 double mean) { 175 return createSharedStateSampler(rng, mean); 176 } 177 178 /** 179 * Creates a new Poisson sampler. 180 * 181 * <p>The returned sampler will function exactly the 182 * same as {@link PoissonSampler#of(UniformRandomProvider, double)}. 183 * 184 * @param rng Generator of uniformly distributed random numbers. 185 * @param mean Mean. 186 * @return A Poisson sampler 187 * @throws IllegalArgumentException if {@code mean <= 0} or 188 * {@code mean >} {@link Integer#MAX_VALUE}. 189 * @since 1.4 190 */ 191 public SharedStateDiscreteSampler createSharedStateSampler(UniformRandomProvider rng, 192 double mean) { 193 // Ensure the same functionality as the PoissonSampler by 194 // using a SmallMeanPoissonSampler under the switch point. 195 if (mean < PoissonSampler.PIVOT) { 196 return SmallMeanPoissonSampler.of(rng, mean); 197 } 198 if (mean > maxN) { 199 // Outside the range of the cache. 200 // This avoids extra parameter checks and handles the case when 201 // the cache is empty or if Math.floor(mean) is not an integer. 202 return LargeMeanPoissonSampler.of(rng, mean); 203 } 204 205 // Convert the mean into an integer. 206 final int n = (int) Math.floor(mean); 207 if (n < minN) { 208 // Outside the lower range of the cache. 209 return LargeMeanPoissonSampler.of(rng, mean); 210 } 211 212 // Look in the cache for a state that can be reused. 213 // Note: The cache is offset by minN. 214 final int index = n - minN; 215 final LargeMeanPoissonSamplerState state = values[index]; 216 if (state == null) { 217 // Create a sampler and store the state for reuse. 218 // Do not worry about thread contention 219 // as the state is effectively immutable. 220 // If recomputed and replaced it will the same. 221 final LargeMeanPoissonSampler sampler = new LargeMeanPoissonSampler(rng, mean); 222 values[index] = sampler.getState(); 223 return sampler; 224 } 225 // Compute the remaining fraction of the mean 226 final double lambdaFractional = mean - n; 227 return new LargeMeanPoissonSampler(rng, state, lambdaFractional); 228 } 229 230 /** 231 * Check if the mean is within the range where the cache can minimise the 232 * construction cost of the {@link PoissonSampler}. 233 * 234 * @param mean 235 * the mean 236 * @return true, if within the cache range 237 */ 238 public boolean withinRange(double mean) { 239 if (mean < PoissonSampler.PIVOT) { 240 // Construction is optimal 241 return true; 242 } 243 // Convert the mean into an integer. 244 final int n = (int) Math.floor(mean); 245 return n <= maxN && n >= minN; 246 } 247 248 /** 249 * Checks if the cache covers a valid range of mean values. 250 * 251 * <p>Note that the cache is only valid for one of the Poisson sampling 252 * algorithms. In the instance that a range was requested that was too 253 * low then there is nothing to cache and this functions returns 254 * {@code false}. 255 * 256 * <p>The cache can still be used to create a {@link PoissonSampler} using 257 * {@link #createSharedStateSampler(UniformRandomProvider, double)}. 258 * 259 * <p>This method can be used to determine if the cache has a potential 260 * performance benefit. 261 * 262 * @return true, if the cache covers a range of mean values 263 */ 264 public boolean isValidRange() { 265 return values != null; 266 } 267 268 /** 269 * Gets the minimum mean covered by the cache. 270 * 271 * <p>This value is the inclusive lower bound and is equal to 272 * the lowest integer-valued mean that is covered by the cache. 273 * 274 * <p>Note that this value may not match the value passed to the constructor 275 * due to the following reasons: 276 * 277 * <ul> 278 * <li>At small mean values a different algorithm is used for Poisson 279 * sampling and the cache is unnecessary. 280 * <li>The minimum is always an integer so may be below the constructor 281 * minimum mean. 282 * </ul> 283 * 284 * <p>If {@link #isValidRange()} returns {@code true} the cache will store 285 * state to reduce construction cost of samplers in 286 * the range {@link #getMinMean()} inclusive to {@link #getMaxMean()} 287 * inclusive. Otherwise this method returns 0; 288 * 289 * @return The minimum mean covered by the cache. 290 */ 291 public double getMinMean() { 292 return minN; 293 } 294 295 /** 296 * Gets the maximum mean covered by the cache. 297 * 298 * <p>This value is the inclusive upper bound and is equal to 299 * the double value below the first integer-valued mean that is 300 * above range covered by the cache. 301 * 302 * <p>Note that this value may not match the value passed to the constructor 303 * due to the following reasons: 304 * <ul> 305 * <li>At small mean values a different algorithm is used for Poisson 306 * sampling and the cache is unnecessary. 307 * <li>The maximum is always the double value below an integer so 308 * may be above the constructor maximum mean. 309 * </ul> 310 * 311 * <p>If {@link #isValidRange()} returns {@code true} the cache will store 312 * state to reduce construction cost of samplers in 313 * the range {@link #getMinMean()} inclusive to {@link #getMaxMean()} 314 * inclusive. Otherwise this method returns 0; 315 * 316 * @return The maximum mean covered by the cache. 317 */ 318 public double getMaxMean() { 319 if (isValidRange()) { 320 return Math.nextDown(maxN + 1.0); 321 } 322 return 0; 323 } 324 325 /** 326 * Gets the minimum mean value that can be cached. 327 * 328 * <p>Any {@link PoissonSampler} created with a mean below this level will not 329 * have any state that can be cached. 330 * 331 * @return the minimum cached mean 332 */ 333 public static double getMinimumCachedMean() { 334 return PoissonSampler.PIVOT; 335 } 336 337 /** 338 * Create a new {@link PoissonSamplerCache} with the given range 339 * reusing the current cache values. 340 * 341 * <p>This will create a new object even if the range is smaller or the 342 * same as the current cache. 343 * 344 * @param minMean The minimum mean covered by the cache. 345 * @param maxMean The maximum mean covered by the cache. 346 * @throws IllegalArgumentException if {@code maxMean < minMean} 347 * @return the poisson sampler cache 348 */ 349 public PoissonSamplerCache withRange(double minMean, 350 double maxMean) { 351 if (values == null) { 352 // Nothing to reuse 353 return new PoissonSamplerCache(minMean, maxMean); 354 } 355 checkMeanRange(minMean, maxMean); 356 357 // The cache can only be used for the LargeMeanPoissonSampler. 358 if (maxMean < PoissonSampler.PIVOT) { 359 return new PoissonSamplerCache(0, 0); 360 } 361 362 // Convert the mean into integers. 363 // Note the minimum is clipped to the algorithm switch point. 364 final int withMinN = (int) Math.floor(Math.max(minMean, PoissonSampler.PIVOT)); 365 final int withMaxN = (int) Math.floor(maxMean); 366 final LargeMeanPoissonSamplerState[] states = 367 new LargeMeanPoissonSamplerState[withMaxN - withMinN + 1]; 368 369 // Preserve values from the current array to the next 370 final int currentIndex; 371 final int nextIndex; 372 if (this.minN <= withMinN) { 373 // The current array starts before the new array 374 currentIndex = withMinN - this.minN; 375 nextIndex = 0; 376 } else { 377 // The new array starts before the current array 378 currentIndex = 0; 379 nextIndex = this.minN - withMinN; 380 } 381 final int length = Math.min(values.length - currentIndex, states.length - nextIndex); 382 if (length > 0) { 383 System.arraycopy(values, currentIndex, states, nextIndex, length); 384 } 385 386 return new PoissonSamplerCache(withMinN, withMaxN, states); 387 } 388}