GuideTableDiscreteSampler.java
- /*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
- package org.apache.commons.rng.sampling.distribution;
- import org.apache.commons.rng.UniformRandomProvider;
- /**
- * Compute a sample from {@code n} values each with an associated probability. If all unique items
- * are assigned the same probability it is more efficient to use the {@link DiscreteUniformSampler}.
- *
- * <p>The cumulative probability distribution is searched using a guide table to set an
- * initial start point. This implementation is based on:</p>
- *
- * <blockquote>
- * Devroye, Luc (1986). Non-Uniform Random Variate Generation.
- * New York: Springer-Verlag. Chapter 3.2.4 "The method of guide tables" p. 96.
- * </blockquote>
- *
- * <p>The size of the guide table can be controlled using a parameter. A larger guide table
- * will improve performance at the cost of storage space.</p>
- *
- * <p>Sampling uses {@link UniformRandomProvider#nextDouble()}.</p>
- *
- * @see <a href="http://en.wikipedia.org/wiki/Probability_distribution#Discrete_probability_distribution">
- * Discrete probability distribution (Wikipedia)</a>
- * @since 1.3
- */
- public final class GuideTableDiscreteSampler
- implements SharedStateDiscreteSampler {
- /** The default value for {@code alpha}. */
- private static final double DEFAULT_ALPHA = 1.0;
- /** Underlying source of randomness. */
- private final UniformRandomProvider rng;
- /**
- * The cumulative probability table ({@code f(x)}).
- */
- private final double[] cumulativeProbabilities;
- /**
- * The inverse cumulative probability guide table. This is a guide map between the cumulative
- * probability (f(x)) and the value x. It is used to set the initial point for search
- * of the cumulative probability table.
- *
- * <p>The index in the map is obtained using {@code p * map.length} where {@code p} is the
- * known cumulative probability {@code f(x)} or a uniform random deviate {@code u}. The value
- * stored at the index is value {@code x+1} when {@code p = f(x)} such that it is the
- * exclusive upper bound on the sample value {@code x} for searching the cumulative probability
- * table {@code f(x)}. The search of the cumulative probability is towards zero.</p>
- */
- private final int[] guideTable;
- /**
- * @param rng Generator of uniformly distributed random numbers.
- * @param cumulativeProbabilities The cumulative probability table ({@code f(x)}).
- * @param guideTable The inverse cumulative probability guide table.
- */
- private GuideTableDiscreteSampler(UniformRandomProvider rng,
- double[] cumulativeProbabilities,
- int[] guideTable) {
- this.rng = rng;
- this.cumulativeProbabilities = cumulativeProbabilities;
- this.guideTable = guideTable;
- }
- /** {@inheritDoc} */
- @Override
- public int sample() {
- // Compute a probability
- final double u = rng.nextDouble();
- // Initialise the search using the guide table to find an initial guess.
- // The table provides an upper bound on the sample (x+1) for a known
- // cumulative probability (f(x)).
- int x = guideTable[getGuideTableIndex(u, guideTable.length)];
- // Search down.
- // In the edge case where u is 1.0 then 'x' will be 1 outside the range of the
- // cumulative probability table and this will decrement to a valid range.
- // In the case where 'u' is mapped to the same guide table index as a lower
- // cumulative probability f(x) (due to rounding down) then this will not decrement
- // and return the exclusive upper bound (x+1).
- while (x != 0 && u <= cumulativeProbabilities[x - 1]) {
- x--;
- }
- return x;
- }
- /** {@inheritDoc} */
- @Override
- public String toString() {
- return "Guide table deviate [" + rng.toString() + "]";
- }
- /** {@inheritDoc} */
- @Override
- public SharedStateDiscreteSampler withUniformRandomProvider(UniformRandomProvider rng) {
- return new GuideTableDiscreteSampler(rng, cumulativeProbabilities, guideTable);
- }
- /**
- * Create a new sampler for an enumerated distribution using the given {@code probabilities}.
- * The samples corresponding to each probability are assumed to be a natural sequence
- * starting at zero.
- *
- * <p>The size of the guide table is {@code probabilities.length}.</p>
- *
- * @param rng Generator of uniformly distributed random numbers.
- * @param probabilities The probabilities.
- * @return the sampler
- * @throws IllegalArgumentException if {@code probabilities} is null or empty, a
- * probability is negative, infinite or {@code NaN}, or the sum of all
- * probabilities is not strictly positive.
- */
- public static SharedStateDiscreteSampler of(UniformRandomProvider rng,
- double[] probabilities) {
- return of(rng, probabilities, DEFAULT_ALPHA);
- }
- /**
- * Create a new sampler for an enumerated distribution using the given {@code probabilities}.
- * The samples corresponding to each probability are assumed to be a natural sequence
- * starting at zero.
- *
- * <p>The size of the guide table is {@code alpha * probabilities.length}.</p>
- *
- * @param rng Generator of uniformly distributed random numbers.
- * @param probabilities The probabilities.
- * @param alpha The alpha factor used to set the guide table size.
- * @return the sampler
- * @throws IllegalArgumentException if {@code probabilities} is null or empty, a
- * probability is negative, infinite or {@code NaN}, the sum of all
- * probabilities is not strictly positive, or {@code alpha} is not strictly positive.
- */
- public static SharedStateDiscreteSampler of(UniformRandomProvider rng,
- double[] probabilities,
- double alpha) {
- validateParameters(probabilities, alpha);
- final int size = probabilities.length;
- final double[] cumulativeProbabilities = new double[size];
- double sumProb = 0;
- int count = 0;
- for (final double prob : probabilities) {
- // Compute and store cumulative probability.
- sumProb += InternalUtils.requirePositiveFinite(prob, "probability");
- cumulativeProbabilities[count++] = sumProb;
- }
- InternalUtils.requireStrictlyPositiveFinite(sumProb, "sum of probabilities");
- // Note: The guide table is at least length 1. Compute the size avoiding overflow
- // in case (alpha * size) is too large.
- final int guideTableSize = (int) Math.ceil(alpha * size);
- final int[] guideTable = new int[Math.max(guideTableSize, guideTableSize + 1)];
- // Compute and store cumulative probability.
- for (int x = 0; x < size; x++) {
- final double norm = cumulativeProbabilities[x] / sumProb;
- cumulativeProbabilities[x] = (norm < 1) ? norm : 1.0;
- // Set the guide table value as an exclusive upper bound (x + 1)
- final int index = getGuideTableIndex(cumulativeProbabilities[x], guideTable.length);
- guideTable[index] = x + 1;
- }
- // Edge case for round-off
- cumulativeProbabilities[size - 1] = 1.0;
- // The final guide table entry is (maximum value of x + 1)
- guideTable[guideTable.length - 1] = size;
- // The first non-zero value in the guide table is from f(x=0).
- // Any probabilities mapped below this must be sample x=0 so the
- // table may initially be filled with zeros.
- // Fill missing values in the guide table.
- for (int i = 1; i < guideTable.length; i++) {
- guideTable[i] = Math.max(guideTable[i - 1], guideTable[i]);
- }
- return new GuideTableDiscreteSampler(rng, cumulativeProbabilities, guideTable);
- }
- /**
- * Validate the parameters.
- *
- * @param probabilities The probabilities.
- * @param alpha The alpha factor used to set the guide table size.
- * @throws IllegalArgumentException if {@code probabilities} is null or empty, or
- * {@code alpha} is not strictly positive.
- */
- private static void validateParameters(double[] probabilities, double alpha) {
- if (probabilities == null || probabilities.length == 0) {
- throw new IllegalArgumentException("Probabilities must not be empty.");
- }
- InternalUtils.requireStrictlyPositive(alpha, "alpha");
- }
- /**
- * Gets the guide table index for the probability. This is obtained using
- * {@code p * (tableLength - 1)} so is inside the length of the table.
- *
- * @param p Cumulative probability.
- * @param tableLength Table length.
- * @return the guide table index.
- */
- private static int getGuideTableIndex(double p, int tableLength) {
- // Note: This is only ever called when p is in the range of the cumulative
- // probability table. So assume 0 <= p <= 1.
- return (int) (p * (tableLength - 1));
- }
- }