1 /* 2 * Licensed to the Apache Software Foundation (ASF) under one or more 3 * contributor license agreements. See the NOTICE file distributed with 4 * this work for additional information regarding copyright ownership. 5 * The ASF licenses this file to You under the Apache License, Version 2.0 6 * (the "License"); you may not use this file except in compliance with 7 * the License. You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 */ 17 18 package org.apache.commons.rng.examples.jmh.sampling.distribution; 19 20 import java.util.concurrent.TimeUnit; 21 import java.util.function.DoubleFunction; 22 import org.apache.commons.rng.RandomProviderState; 23 import org.apache.commons.rng.RestorableUniformRandomProvider; 24 import org.apache.commons.rng.UniformRandomProvider; 25 import org.apache.commons.rng.sampling.PermutationSampler; 26 import org.apache.commons.rng.sampling.distribution.DiscreteSampler; 27 import org.apache.commons.rng.sampling.distribution.PoissonSampler; 28 import org.apache.commons.rng.sampling.distribution.PoissonSamplerCache; 29 import org.apache.commons.rng.simple.RandomSource; 30 import org.openjdk.jmh.annotations.Benchmark; 31 import org.openjdk.jmh.annotations.BenchmarkMode; 32 import org.openjdk.jmh.annotations.Fork; 33 import org.openjdk.jmh.annotations.Measurement; 34 import org.openjdk.jmh.annotations.Mode; 35 import org.openjdk.jmh.annotations.OutputTimeUnit; 36 import org.openjdk.jmh.annotations.Param; 37 import org.openjdk.jmh.annotations.Scope; 38 import org.openjdk.jmh.annotations.Setup; 39 import org.openjdk.jmh.annotations.State; 40 import org.openjdk.jmh.annotations.Warmup; 41 import org.openjdk.jmh.infra.Blackhole; 42 43 /** 44 * Executes benchmark to compare the speed of generation of Poisson random numbers when using a 45 * cache. 46 * 47 * <p>The benchmark is designed for a worse case scenario of Poisson means that are uniformly spread 48 * over a range and non-integer. A single sample is required per mean, E.g.</p> 49 * 50 * <pre> 51 * int min = 40; 52 * int max = 1000; 53 * int range = max - min; 54 * UniformRandomProvider rng = ...; 55 * 56 * // Compare ... 57 * for (int i = 0; i < 1000; i++) { 58 * PoissonSampler.of(rng, min + rng.nextDouble() * range).sample(); 59 * } 60 * 61 * // To ... 62 * PoissonSamplerCache cache = new PoissonSamplerCache(min, max); 63 * for (int i = 0; i < 1000; i++) { 64 * PoissonSamplerCache.createPoissonSampler(rng, min + rng.nextDouble() * range).sample(); 65 * } 66 * </pre> 67 * 68 * <p>The alternative scenario where the means are integer is not considered as this could be easily 69 * handled by creating an array to hold the PoissonSamplers for each mean. This does not require any 70 * specialised caching of state and is simple enough to perform for single threaded applications:</p> 71 * 72 * <pre> 73 * public class SimpleUnsafePoissonSamplerCache { 74 * int min = 50; 75 * int max = 100; 76 * PoissonSampler[] samplers = new PoissonSampler[max - min + 1]; 77 * 78 * public PoissonSampler createPoissonSampler(UniformRandomProvider rng, int mean) { 79 * if (mean < min || mean > max) { 80 * return PoissonSampler.of(rng, mean); 81 * } 82 * int index = mean - min; 83 * PoissonSampler sample = samplers[index]; 84 * if (sampler == null) { 85 * sampler = PoissonSampler.of(rng, mean); 86 * samplers[index] = sampler; 87 * } 88 * return sampler; 89 * } 90 * } 91 * </pre> 92 * 93 * <p>Note that in this example the UniformRandomProvider is also cached and so this is only 94 * applicable to a single threaded application. Thread safety could be ensured using the 95 * {@link org.apache.commons.rng.sampling.SharedStateSampler SharedStateSampler} functionality 96 * of the cached sampler.</p> 97 * 98 * <p>Re-written to use the PoissonSamplerCache would provide a new PoissonSampler per call in a 99 * thread-safe manner: 100 * 101 * <pre> 102 * public class SimplePoissonSamplerCache { 103 * int min = 50; 104 * int max = 100; 105 * PoissonSamplerCache samplers = new PoissonSamplerCache(min, max); 106 * 107 * public PoissonSampler createPoissonSampler(UniformRandomProvider rng, int mean) { 108 * return samplers.createPoissonSampler(rng, mean); 109 * } 110 * } 111 * </pre> 112 */ 113 @BenchmarkMode(Mode.AverageTime) 114 @OutputTimeUnit(TimeUnit.MICROSECONDS) 115 @Warmup(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) 116 @Measurement(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) 117 @State(Scope.Benchmark) 118 @Fork(value = 1, jvmArgs = { "-server", "-Xms128M", "-Xmx128M" }) 119 public class PoissonSamplerCachePerformance { 120 /** Number of samples per run. */ 121 private static final int NUM_SAMPLES = 100_000; 122 /** 123 * Number of range samples. 124 * 125 * <p>Note: The LargeMeanPoissonSampler will not use a SmallMeanPoissonSampler 126 * if the mean is an integer. This will occur if the [range sample] * range is 127 * an integer. 128 * 129 * <p>If the SmallMeanPoissonSampler is not used then the cache has more 130 * advantage over the uncached version as relatively more time is spent in 131 * initialising the algorithm. 132 * 133 * <p>To avoid this use a prime number above the maximum range 134 * (currently 4096). Any number (n/RANGE_SAMPLES) * range will not be integer 135 * with {@code n < RANGE_SAMPLES} and {@code range < RANGE_SAMPLES} (unless n==0). 136 */ 137 private static final int RANGE_SAMPLE_SIZE = 4099; 138 /** The size of the seed. */ 139 private static final int SEED_SIZE = 128; 140 141 /** 142 * Seed used to ensure the tests are the same. This can be different per 143 * benchmark, but should be the same within the benchmark. 144 */ 145 private static final int[] SEED; 146 147 /** 148 * The range sample. Should contain doubles in the range 0 inclusive to 1 exclusive. 149 * 150 * <p>The range sample is used to create a mean using: 151 * rangeMin + sample * (rangeMax - rangeMin). 152 * 153 * <p>Ideally this should be large enough to fully sample the 154 * range when expressed as discrete integers, i.e. no sparseness, and random. 155 */ 156 private static final double[] RANGE_SAMPLE; 157 158 static { 159 // Build a random seed for all the tests 160 SEED = new int[SEED_SIZE]; 161 final UniformRandomProvider rng = RandomSource.MWC_256.create(); 162 for (int i = 0; i < SEED.length; i++) { 163 SEED[i] = rng.nextInt(); 164 } 165 166 final int size = RANGE_SAMPLE_SIZE; 167 final int[] sample = PermutationSampler.natural(size); 168 PermutationSampler.shuffle(rng, sample); 169 170 RANGE_SAMPLE = new double[size]; 171 for (int i = 0; i < size; i++) { 172 // Note: This will have one occurrence of zero in the range. 173 // This will create at least one LargeMeanPoissonSampler that will 174 // not use a SmallMeanPoissonSampler. The different performance of this 175 // will be lost among the other samples. 176 RANGE_SAMPLE[i] = (double) sample[i] / size; 177 } 178 } 179 180 /** 181 * The benchmark state (retrieve the various "RandomSource"s). 182 */ 183 @State(Scope.Benchmark) 184 public static class Sources { 185 /** 186 * RNG providers. 187 * 188 * <p>Use different speeds.</p> 189 * 190 * @see <a href="https://commons.apache.org/proper/commons-rng/userguide/rng.html"> 191 * Commons RNG user guide</a> 192 */ 193 @Param({ "SPLIT_MIX_64", 194 // Comment in for slower generators 195 //"MWC_256", "KISS", "WELL_1024_A", "WELL_44497_B" 196 }) 197 private String randomSourceName; 198 199 /** RNG. */ 200 private RestorableUniformRandomProvider generator; 201 202 /** 203 * The state of the generator at the start of the test (for reproducible 204 * results). 205 */ 206 private RandomProviderState state; 207 208 /** 209 * @return the RNG. 210 */ 211 public UniformRandomProvider getGenerator() { 212 generator.restoreState(state); 213 return generator; 214 } 215 216 /** Instantiates generator. */ 217 @Setup 218 public void setup() { 219 final RandomSource randomSource = RandomSource 220 .valueOf(randomSourceName); 221 // Use the same seed 222 generator = randomSource.create(SEED.clone()); 223 state = generator.saveState(); 224 } 225 } 226 227 /** 228 * The range of mean values for testing the cache. 229 */ 230 @State(Scope.Benchmark) 231 public static class MeanRange { 232 /** 233 * Test range. 234 * 235 * <p>The covers the best case scenario of caching everything (range=1) and upwards 236 * in powers of 4. 237 */ 238 @Param({ "1", "4", "16", "64", "256", "1024", "4096"}) 239 private double range; 240 241 /** 242 * Gets the mean. 243 * 244 * @param i the index 245 * @return the mean 246 */ 247 public double getMean(int i) { 248 return getMin() + RANGE_SAMPLE[i % RANGE_SAMPLE.length] * range; 249 } 250 251 /** 252 * Gets the min of the range. 253 * 254 * @return the min 255 */ 256 public double getMin() { 257 return PoissonSamplerCache.getMinimumCachedMean(); 258 } 259 260 /** 261 * Gets the max of the range. 262 * 263 * @return the max 264 */ 265 public double getMax() { 266 return getMin() + range; 267 } 268 } 269 270 /** 271 * Exercises a poisson sampler created for a single use with a range of means. 272 * 273 * @param factory The factory. 274 * @param range The range of means. 275 * @param bh Data sink. 276 */ 277 private static void runSample(DoubleFunction<DiscreteSampler> factory, 278 MeanRange range, 279 Blackhole bh) { 280 for (int i = 0; i < NUM_SAMPLES; i++) { 281 bh.consume(factory.apply(range.getMean(i)).sample()); 282 } 283 } 284 285 // Benchmarks methods below. 286 287 /** 288 * @param sources Source of randomness. 289 * @param range The range. 290 * @param bh Data sink. 291 */ 292 @Benchmark 293 public void runPoissonSampler(Sources sources, 294 MeanRange range, 295 Blackhole bh) { 296 final UniformRandomProvider r = sources.getGenerator(); 297 final DoubleFunction<DiscreteSampler> factory = mean -> PoissonSampler.of(r, mean); 298 runSample(factory, range, bh); 299 } 300 301 /** 302 * @param sources Source of randomness. 303 * @param range The range. 304 * @param bh Data sink. 305 */ 306 @Benchmark 307 public void runPoissonSamplerCacheWhenEmpty(Sources sources, 308 MeanRange range, 309 Blackhole bh) { 310 final UniformRandomProvider r = sources.getGenerator(); 311 final PoissonSamplerCache cache = new PoissonSamplerCache(0, 0); 312 final DoubleFunction<DiscreteSampler> factory = mean -> cache.createSharedStateSampler(r, mean); 313 runSample(factory, range, bh); 314 } 315 316 /** 317 * @param sources Source of randomness. 318 * @param range The range. 319 * @param bh Data sink. 320 */ 321 @Benchmark 322 public void runPoissonSamplerCache(Sources sources, 323 MeanRange range, 324 Blackhole bh) { 325 final UniformRandomProvider r = sources.getGenerator(); 326 final PoissonSamplerCache cache = new PoissonSamplerCache( 327 range.getMin(), range.getMax()); 328 final DoubleFunction<DiscreteSampler> factory = mean -> cache.createSharedStateSampler(r, mean); 329 runSample(factory, range, bh); 330 } 331 }