001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.commons.rng.sampling.distribution; 018 019import org.apache.commons.rng.UniformRandomProvider; 020import org.apache.commons.rng.sampling.distribution.LargeMeanPoissonSampler.LargeMeanPoissonSamplerState; 021 022/** 023 * Create a sampler for the 024 * <a href="http://mathworld.wolfram.com/PoissonDistribution.html">Poisson 025 * distribution</a> using a cache to minimise construction cost. 026 * 027 * <p>The cache will return a sampler equivalent to 028 * {@link PoissonSampler#PoissonSampler(UniformRandomProvider, double)}.</p> 029 * 030 * <p>The cache allows the {@link PoissonSampler} construction cost to be minimised 031 * for low size Poisson samples. The cache stores state for a range of integers where 032 * integer value {@code n} can be used to construct a sampler for the range 033 * {@code n <= mean < n+1}.</p> 034 * 035 * <p>The cache is advantageous under the following conditions:</p> 036 * 037 * <ul> 038 * <li>The mean of the Poisson distribution falls within a known range. 039 * <li>The sample size to be made with the <strong>same</strong> sampler is 040 * small. 041 * <li>The Poisson samples have different means with the same integer 042 * value(s) after rounding down. 043 * </ul> 044 * 045 * <p>If the sample size to be made with the <strong>same</strong> sampler is large 046 * then the construction cost is low compared to the sampling time and the cache 047 * has minimal benefit.</p> 048 * 049 * <p>Performance improvement is dependent on the speed of the 050 * {@link UniformRandomProvider}. A fast provider can obtain a two-fold speed 051 * improvement for a single-use Poisson sampler.</p> 052 * 053 * <p>The cache is thread safe. Note that concurrent threads using the cache 054 * must ensure a thread safe {@link UniformRandomProvider} is used when creating 055 * samplers, e.g. a unique sampler per thread.</p> 056 * 057 * <p>Sampling uses:</p> 058 * 059 * <ul> 060 * <li>{@link UniformRandomProvider#nextDouble()} 061 * <li>{@link UniformRandomProvider#nextLong()} (large means only) 062 * </ul> 063 * 064 * @since 1.2 065 */ 066public class PoissonSamplerCache { 067 068 /** 069 * The minimum N covered by the cache where 070 * {@code N = (int)Math.floor(mean)}. 071 */ 072 private final int minN; 073 /** 074 * The maximum N covered by the cache where 075 * {@code N = (int)Math.floor(mean)}. 076 */ 077 private final int maxN; 078 /** The cache of states between {@link #minN} and {@link #maxN}. */ 079 private final LargeMeanPoissonSamplerState[] values; 080 081 /** 082 * @param minMean The minimum mean covered by the cache. 083 * @param maxMean The maximum mean covered by the cache. 084 * @throws IllegalArgumentException if {@code maxMean < minMean} 085 */ 086 public PoissonSamplerCache(double minMean, 087 double maxMean) { 088 089 checkMeanRange(minMean, maxMean); 090 091 // The cache can only be used for the LargeMeanPoissonSampler. 092 if (maxMean < PoissonSampler.PIVOT) { 093 // The upper limit is too small so no cache will be used. 094 // This class will just construct new samplers. 095 minN = 0; 096 maxN = 0; 097 values = null; 098 } else { 099 // Convert the mean into integers. 100 // Note the minimum is clipped to the algorithm switch point. 101 this.minN = (int) Math.floor(Math.max(minMean, PoissonSampler.PIVOT)); 102 this.maxN = (int) Math.floor(Math.min(maxMean, Integer.MAX_VALUE)); 103 values = new LargeMeanPoissonSamplerState[maxN - minN + 1]; 104 } 105 } 106 107 /** 108 * @param minN The minimum N covered by the cache where {@code N = (int)Math.floor(mean)}. 109 * @param maxN The maximum N covered by the cache where {@code N = (int)Math.floor(mean)}. 110 * @param states The precomputed states. 111 */ 112 private PoissonSamplerCache(int minN, 113 int maxN, 114 LargeMeanPoissonSamplerState[] states) { 115 this.minN = minN; 116 this.maxN = maxN; 117 // Stored directly as the states were newly created within this class. 118 this.values = states; 119 } 120 121 /** 122 * Check the mean range. 123 * 124 * @param minMean The minimum mean covered by the cache. 125 * @param maxMean The maximum mean covered by the cache. 126 * @throws IllegalArgumentException if {@code maxMean < minMean} 127 */ 128 private static void checkMeanRange(double minMean, double maxMean) { 129 // Note: 130 // Although a mean of 0 is invalid for a Poisson sampler this case 131 // is handled to make the cache user friendly. Any low means will 132 // be handled by the SmallMeanPoissonSampler and not cached. 133 // For this reason it is also OK if the means are negative. 134 135 // Allow minMean == maxMean so that the cache can be used 136 // to create samplers with distinct RNGs and the same mean. 137 if (maxMean < minMean) { 138 throw new IllegalArgumentException( 139 "Max mean: " + maxMean + " < " + minMean); 140 } 141 } 142 143 /** 144 * Creates a new Poisson sampler. 145 * 146 * <p>The returned sampler will function exactly the 147 * same as {@link PoissonSampler#of(UniformRandomProvider, double)}. 148 * 149 * @param rng Generator of uniformly distributed random numbers. 150 * @param mean Mean. 151 * @return A Poisson sampler 152 * @throws IllegalArgumentException if {@code mean <= 0} or 153 * {@code mean >} {@link Integer#MAX_VALUE}. 154 * @deprecated Use {@link #createSharedStateSampler(UniformRandomProvider, double)}. 155 */ 156 @Deprecated 157 public DiscreteSampler createPoissonSampler(UniformRandomProvider rng, 158 double mean) { 159 return createSharedStateSampler(rng, mean); 160 } 161 162 /** 163 * Creates a new Poisson sampler. 164 * 165 * <p>The returned sampler will function exactly the 166 * same as {@link PoissonSampler#of(UniformRandomProvider, double)}. 167 * 168 * @param rng Generator of uniformly distributed random numbers. 169 * @param mean Mean. 170 * @return A Poisson sampler 171 * @throws IllegalArgumentException if {@code mean <= 0} or 172 * {@code mean >} {@link Integer#MAX_VALUE}. 173 * @since 1.4 174 */ 175 public SharedStateDiscreteSampler createSharedStateSampler(UniformRandomProvider rng, 176 double mean) { 177 // Ensure the same functionality as the PoissonSampler by 178 // using a SmallMeanPoissonSampler under the switch point. 179 if (mean < PoissonSampler.PIVOT) { 180 return SmallMeanPoissonSampler.of(rng, mean); 181 } 182 if (mean > maxN) { 183 // Outside the range of the cache. 184 // This avoids extra parameter checks and handles the case when 185 // the cache is empty or if Math.floor(mean) is not an integer. 186 return LargeMeanPoissonSampler.of(rng, mean); 187 } 188 189 // Convert the mean into an integer. 190 final int n = (int) Math.floor(mean); 191 if (n < minN) { 192 // Outside the lower range of the cache. 193 return LargeMeanPoissonSampler.of(rng, mean); 194 } 195 196 // Look in the cache for a state that can be reused. 197 // Note: The cache is offset by minN. 198 final int index = n - minN; 199 final LargeMeanPoissonSamplerState state = values[index]; 200 if (state == null) { 201 // Create a sampler and store the state for reuse. 202 // Do not worry about thread contention 203 // as the state is effectively immutable. 204 // If recomputed and replaced it will the same. 205 final LargeMeanPoissonSampler sampler = new LargeMeanPoissonSampler(rng, mean); 206 values[index] = sampler.getState(); 207 return sampler; 208 } 209 // Compute the remaining fraction of the mean 210 final double lambdaFractional = mean - n; 211 return new LargeMeanPoissonSampler(rng, state, lambdaFractional); 212 } 213 214 /** 215 * Check if the mean is within the range where the cache can minimise the 216 * construction cost of the {@link PoissonSampler}. 217 * 218 * @param mean 219 * the mean 220 * @return true, if within the cache range 221 */ 222 public boolean withinRange(double mean) { 223 if (mean < PoissonSampler.PIVOT) { 224 // Construction is optimal 225 return true; 226 } 227 // Convert the mean into an integer. 228 final int n = (int) Math.floor(mean); 229 return n <= maxN && n >= minN; 230 } 231 232 /** 233 * Checks if the cache covers a valid range of mean values. 234 * 235 * <p>Note that the cache is only valid for one of the Poisson sampling 236 * algorithms. In the instance that a range was requested that was too 237 * low then there is nothing to cache and this functions returns 238 * {@code false}. 239 * 240 * <p>The cache can still be used to create a {@link PoissonSampler} using 241 * {@link #createSharedStateSampler(UniformRandomProvider, double)}. 242 * 243 * <p>This method can be used to determine if the cache has a potential 244 * performance benefit. 245 * 246 * @return true, if the cache covers a range of mean values 247 */ 248 public boolean isValidRange() { 249 return values != null; 250 } 251 252 /** 253 * Gets the minimum mean covered by the cache. 254 * 255 * <p>This value is the inclusive lower bound and is equal to 256 * the lowest integer-valued mean that is covered by the cache. 257 * 258 * <p>Note that this value may not match the value passed to the constructor 259 * due to the following reasons: 260 * 261 * <ul> 262 * <li>At small mean values a different algorithm is used for Poisson 263 * sampling and the cache is unnecessary. 264 * <li>The minimum is always an integer so may be below the constructor 265 * minimum mean. 266 * </ul> 267 * 268 * <p>If {@link #isValidRange()} returns {@code true} the cache will store 269 * state to reduce construction cost of samplers in 270 * the range {@link #getMinMean()} inclusive to {@link #getMaxMean()} 271 * inclusive. Otherwise this method returns 0; 272 * 273 * @return The minimum mean covered by the cache. 274 */ 275 public double getMinMean() { 276 return minN; 277 } 278 279 /** 280 * Gets the maximum mean covered by the cache. 281 * 282 * <p>This value is the inclusive upper bound and is equal to 283 * the double value below the first integer-valued mean that is 284 * above range covered by the cache. 285 * 286 * <p>Note that this value may not match the value passed to the constructor 287 * due to the following reasons: 288 * <ul> 289 * <li>At small mean values a different algorithm is used for Poisson 290 * sampling and the cache is unnecessary. 291 * <li>The maximum is always the double value below an integer so 292 * may be above the constructor maximum mean. 293 * </ul> 294 * 295 * <p>If {@link #isValidRange()} returns {@code true} the cache will store 296 * state to reduce construction cost of samplers in 297 * the range {@link #getMinMean()} inclusive to {@link #getMaxMean()} 298 * inclusive. Otherwise this method returns 0; 299 * 300 * @return The maximum mean covered by the cache. 301 */ 302 public double getMaxMean() { 303 if (isValidRange()) { 304 return Math.nextDown(maxN + 1.0); 305 } 306 return 0; 307 } 308 309 /** 310 * Gets the minimum mean value that can be cached. 311 * 312 * <p>Any {@link PoissonSampler} created with a mean below this level will not 313 * have any state that can be cached. 314 * 315 * @return the minimum cached mean 316 */ 317 public static double getMinimumCachedMean() { 318 return PoissonSampler.PIVOT; 319 } 320 321 /** 322 * Create a new {@link PoissonSamplerCache} with the given range 323 * reusing the current cache values. 324 * 325 * <p>This will create a new object even if the range is smaller or the 326 * same as the current cache. 327 * 328 * @param minMean The minimum mean covered by the cache. 329 * @param maxMean The maximum mean covered by the cache. 330 * @throws IllegalArgumentException if {@code maxMean < minMean} 331 * @return the poisson sampler cache 332 */ 333 public PoissonSamplerCache withRange(double minMean, 334 double maxMean) { 335 if (values == null) { 336 // Nothing to reuse 337 return new PoissonSamplerCache(minMean, maxMean); 338 } 339 checkMeanRange(minMean, maxMean); 340 341 // The cache can only be used for the LargeMeanPoissonSampler. 342 if (maxMean < PoissonSampler.PIVOT) { 343 return new PoissonSamplerCache(0, 0); 344 } 345 346 // Convert the mean into integers. 347 // Note the minimum is clipped to the algorithm switch point. 348 final int withMinN = (int) Math.floor(Math.max(minMean, PoissonSampler.PIVOT)); 349 final int withMaxN = (int) Math.floor(maxMean); 350 final LargeMeanPoissonSamplerState[] states = 351 new LargeMeanPoissonSamplerState[withMaxN - withMinN + 1]; 352 353 // Preserve values from the current array to the next 354 int currentIndex; 355 int nextIndex; 356 if (this.minN <= withMinN) { 357 // The current array starts before the new array 358 currentIndex = withMinN - this.minN; 359 nextIndex = 0; 360 } else { 361 // The new array starts before the current array 362 currentIndex = 0; 363 nextIndex = this.minN - withMinN; 364 } 365 final int length = Math.min(values.length - currentIndex, states.length - nextIndex); 366 if (length > 0) { 367 System.arraycopy(values, currentIndex, states, nextIndex, length); 368 } 369 370 return new PoissonSamplerCache(withMinN, withMaxN, states); 371 } 372}