001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.commons.rng.sampling.distribution; 018 019import org.apache.commons.rng.UniformRandomProvider; 020import org.apache.commons.rng.sampling.distribution.LargeMeanPoissonSampler.LargeMeanPoissonSamplerState; 021 022/** 023 * Create a sampler for the 024 * <a href="http://mathworld.wolfram.com/PoissonDistribution.html">Poisson 025 * distribution</a> using a cache to minimise construction cost. 026 * 027 * <p>The cache will return a sampler equivalent to 028 * {@link PoissonSampler#PoissonSampler(UniformRandomProvider, double)}. 029 * 030 * <p>The cache allows the {@link PoissonSampler} construction cost to be minimised 031 * for low size Poisson samples. The cache stores state for a range of integers where 032 * integer value {@code n} can be used to construct a sampler for the range 033 * {@code n <= mean < n+1}. 034 * 035 * <p>The cache is advantageous under the following conditions: 036 * 037 * <ul> 038 * <li>The mean of the Poisson distribution falls within a known range. 039 * <li>The sample size to be made with the <strong>same</strong> sampler is 040 * small. 041 * <li>The Poisson samples have different means with the same integer 042 * value(s) after rounding down. 043 * </ul> 044 * 045 * <p>If the sample size to be made with the <strong>same</strong> sampler is large 046 * then the construction cost is low compared to the sampling time and the cache 047 * has minimal benefit. 048 * 049 * <p>Performance improvement is dependent on the speed of the 050 * {@link UniformRandomProvider}. A fast provider can obtain a two-fold speed 051 * improvement for a single-use Poisson sampler. 052 * 053 * <p>The cache is thread safe. Note that concurrent threads using the cache 054 * must ensure a thread safe {@link UniformRandomProvider} is used when creating 055 * samplers, e.g. a unique sampler per thread. 056 * 057 * @since 1.2 058 */ 059public class PoissonSamplerCache { 060 061 /** 062 * The minimum N covered by the cache where 063 * {@code N = (int)Math.floor(mean)}. 064 */ 065 private final int minN; 066 /** 067 * The maximum N covered by the cache where 068 * {@code N = (int)Math.floor(mean)}. 069 */ 070 private final int maxN; 071 /** The cache of states between {@link minN} and {@link maxN}. */ 072 private final LargeMeanPoissonSamplerState[] values; 073 074 /** 075 * @param minMean The minimum mean covered by the cache. 076 * @param maxMean The maximum mean covered by the cache. 077 * @throws IllegalArgumentException if {@code maxMean < minMean} 078 */ 079 public PoissonSamplerCache(double minMean, 080 double maxMean) { 081 082 checkMeanRange(minMean, maxMean); 083 084 // The cache can only be used for the LargeMeanPoissonSampler. 085 if (maxMean < PoissonSampler.PIVOT) { 086 // The upper limit is too small so no cache will be used. 087 // This class will just construct new samplers. 088 minN = 0; 089 maxN = 0; 090 values = null; 091 } else { 092 // Convert the mean into integers. 093 // Note the minimum is clipped to the algorithm switch point. 094 this.minN = (int) Math.floor(Math.max(minMean, PoissonSampler.PIVOT)); 095 this.maxN = (int) Math.floor(Math.min(maxMean, Integer.MAX_VALUE)); 096 values = new LargeMeanPoissonSamplerState[maxN - minN + 1]; 097 } 098 } 099 100 /** 101 * @param minN The minimum N covered by the cache where {@code N = (int)Math.floor(mean)}. 102 * @param maxN The maximum N covered by the cache where {@code N = (int)Math.floor(mean)}. 103 * @param states The precomputed states. 104 */ 105 private PoissonSamplerCache(int minN, 106 int maxN, 107 LargeMeanPoissonSamplerState[] states) { 108 this.minN = minN; 109 this.maxN = maxN; 110 // Stored directly as the states were newly created within this class. 111 this.values = states; 112 } 113 114 /** 115 * Check the mean range. 116 * 117 * @param minMean The minimum mean covered by the cache. 118 * @param maxMean The maximum mean covered by the cache. 119 * @throws IllegalArgumentException if {@code maxMean < minMean} 120 */ 121 private static void checkMeanRange(double minMean, double maxMean) 122 { 123 // Note: 124 // Although a mean of 0 is invalid for a Poisson sampler this case 125 // is handled to make the cache user friendly. Any low means will 126 // be handled by the SmallMeanPoissonSampler and not cached. 127 // For this reason it is also OK if the means are negative. 128 129 // Allow minMean == maxMean so that the cache can be used 130 // to create samplers with distinct RNGs and the same mean. 131 if (maxMean < minMean) { 132 throw new IllegalArgumentException( 133 "Max mean: " + maxMean + " < " + minMean); 134 } 135 } 136 137 /** 138 * Creates a new Poisson sampler. 139 * 140 * <p>The returned sampler will function exactly the 141 * same as {@link PoissonSampler#PoissonSampler(UniformRandomProvider, double)}. 142 * 143 * @param rng Generator of uniformly distributed random numbers. 144 * @param mean Mean. 145 * @return A Poisson sampler 146 * @throws IllegalArgumentException if {@code mean <= 0} or 147 * {@code mean >} {@link Integer#MAX_VALUE}. 148 */ 149 public DiscreteSampler createPoissonSampler(UniformRandomProvider rng, 150 double mean) { 151 // Ensure the same functionality as the PoissonSampler by 152 // using a SmallMeanPoissonSampler under the switch point. 153 if (mean < PoissonSampler.PIVOT) { 154 return new SmallMeanPoissonSampler(rng, mean); 155 } 156 if (mean > maxN) { 157 // Outside the range of the cache. 158 // This avoids extra parameter checks and handles the case when 159 // the cache is empty or if Math.floor(mean) is not an integer. 160 return new LargeMeanPoissonSampler(rng, mean); 161 } 162 163 // Convert the mean into an integer. 164 final int n = (int) Math.floor(mean); 165 if (n < minN) { 166 // Outside the lower range of the cache. 167 return new LargeMeanPoissonSampler(rng, mean); 168 } 169 170 // Look in the cache for a state that can be reused. 171 // Note: The cache is offset by minN. 172 final int index = n - minN; 173 final LargeMeanPoissonSamplerState state = values[index]; 174 if (state == null) { 175 // Create a sampler and store the state for reuse. 176 // Do not worry about thread contention 177 // as the state is effectively immutable. 178 // If recomputed and replaced it will the same. 179 final LargeMeanPoissonSampler sampler = new LargeMeanPoissonSampler(rng, mean); 180 values[index] = sampler.getState(); 181 return sampler; 182 } 183 // Compute the remaining fraction of the mean 184 final double lambdaFractional = mean - n; 185 return new LargeMeanPoissonSampler(rng, state, lambdaFractional); 186 } 187 188 /** 189 * Check if the mean is within the range where the cache can minimise the 190 * construction cost of the {@link PoissonSampler}. 191 * 192 * @param mean 193 * the mean 194 * @return true, if within the cache range 195 */ 196 public boolean withinRange(double mean) { 197 if (mean < PoissonSampler.PIVOT) { 198 // Construction is optimal 199 return true; 200 } 201 // Convert the mean into an integer. 202 final int n = (int) Math.floor(mean); 203 return n <= maxN && n >= minN; 204 } 205 206 /** 207 * Checks if the cache covers a valid range of mean values. 208 * 209 * <p>Note that the cache is only valid for one of the Poisson sampling 210 * algorithms. In the instance that a range was requested that was too 211 * low then there is nothing to cache and this functions returns 212 * {@code false}. 213 * 214 * <p>The cache can still be used to create a {@link PoissonSampler} using 215 * {@link #createPoissonSampler(UniformRandomProvider, double)}. 216 * 217 * <p>This method can be used to determine if the cache has a potential 218 * performance benefit. 219 * 220 * @return true, if the cache covers a range of mean values 221 */ 222 public boolean isValidRange() { 223 return values != null; 224 } 225 226 /** 227 * Gets the minimum mean covered by the cache. 228 * 229 * <p>This value is the inclusive lower bound and is equal to 230 * the lowest integer-valued mean that is covered by the cache. 231 * 232 * <p>Note that this value may not match the value passed to the constructor 233 * due to the following reasons: 234 * 235 * <ul> 236 * <li>At small mean values a different algorithm is used for Poisson 237 * sampling and the cache is unnecessary. 238 * <li>The minimum is always an integer so may be below the constructor 239 * minimum mean. 240 * </ul> 241 * 242 * <p>If {@link #isValidRange()} returns {@code true} the cache will store 243 * state to reduce construction cost of samplers in 244 * the range {@link #getMinMean()} inclusive to {@link #getMaxMean()} 245 * inclusive. Otherwise this method returns 0; 246 * 247 * @return The minimum mean covered by the cache. 248 */ 249 public double getMinMean() 250 { 251 return minN; 252 } 253 254 /** 255 * Gets the maximum mean covered by the cache. 256 * 257 * <p>This value is the inclusive upper bound and is equal to 258 * the double value below the first integer-valued mean that is 259 * above range covered by the cache. 260 * 261 * <p>Note that this value may not match the value passed to the constructor 262 * due to the following reasons: 263 * <ul> 264 * <li>At small mean values a different algorithm is used for Poisson 265 * sampling and the cache is unnecessary. 266 * <li>The maximum is always the double value below an integer so 267 * may be above the constructor maximum mean. 268 * </ul> 269 * 270 * <p>If {@link #isValidRange()} returns {@code true} the cache will store 271 * state to reduce construction cost of samplers in 272 * the range {@link #getMinMean()} inclusive to {@link #getMaxMean()} 273 * inclusive. Otherwise this method returns 0; 274 * 275 * @return The maximum mean covered by the cache. 276 */ 277 public double getMaxMean() 278 { 279 if (isValidRange()) { 280 return Math.nextAfter(maxN + 1.0, -1); 281 } 282 return 0; 283 } 284 285 /** 286 * Gets the minimum mean value that can be cached. 287 * 288 * <p>Any {@link PoissonSampler} created with a mean below this level will not 289 * have any state that can be cached. 290 * 291 * @return the minimum cached mean 292 */ 293 public static double getMinimumCachedMean() { 294 return PoissonSampler.PIVOT; 295 } 296 297 /** 298 * Create a new {@link PoissonSamplerCache} with the given range 299 * reusing the current cache values. 300 * 301 * <p>This will create a new object even if the range is smaller or the 302 * same as the current cache. 303 * 304 * @param minMean The minimum mean covered by the cache. 305 * @param maxMean The maximum mean covered by the cache. 306 * @throws IllegalArgumentException if {@code maxMean < minMean} 307 * @return the poisson sampler cache 308 */ 309 public PoissonSamplerCache withRange(double minMean, 310 double maxMean) { 311 if (values == null) { 312 // Nothing to reuse 313 return new PoissonSamplerCache(minMean, maxMean); 314 } 315 checkMeanRange(minMean, maxMean); 316 317 // The cache can only be used for the LargeMeanPoissonSampler. 318 if (maxMean < PoissonSampler.PIVOT) { 319 return new PoissonSamplerCache(0, 0); 320 } 321 322 // Convert the mean into integers. 323 // Note the minimum is clipped to the algorithm switch point. 324 final int withMinN = (int) Math.floor(Math.max(minMean, PoissonSampler.PIVOT)); 325 final int withMaxN = (int) Math.floor(maxMean); 326 final LargeMeanPoissonSamplerState[] states = 327 new LargeMeanPoissonSamplerState[withMaxN - withMinN + 1]; 328 329 // Preserve values from the current array to the next 330 int currentIndex; 331 int nextIndex; 332 if (this.minN <= withMinN) { 333 // The current array starts before the new array 334 currentIndex = withMinN - this.minN; 335 nextIndex = 0; 336 } else { 337 // The new array starts before the current array 338 currentIndex = 0; 339 nextIndex = this.minN - withMinN; 340 } 341 final int length = Math.min(values.length - currentIndex, states.length - nextIndex); 342 if (length > 0) { 343 System.arraycopy(values, currentIndex, states, nextIndex, length); 344 } 345 346 return new PoissonSamplerCache(withMinN, withMaxN, states); 347 } 348}