GuideTableDiscreteSampler.java

  1. /*
  2.  * Licensed to the Apache Software Foundation (ASF) under one or more
  3.  * contributor license agreements.  See the NOTICE file distributed with
  4.  * this work for additional information regarding copyright ownership.
  5.  * The ASF licenses this file to You under the Apache License, Version 2.0
  6.  * (the "License"); you may not use this file except in compliance with
  7.  * the License.  You may obtain a copy of the License at
  8.  *
  9.  *      http://www.apache.org/licenses/LICENSE-2.0
  10.  *
  11.  * Unless required by applicable law or agreed to in writing, software
  12.  * distributed under the License is distributed on an "AS IS" BASIS,
  13.  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14.  * See the License for the specific language governing permissions and
  15.  * limitations under the License.
  16.  */
  17. package org.apache.commons.rng.sampling.distribution;

  18. import org.apache.commons.rng.UniformRandomProvider;

  19. /**
  20.  * Compute a sample from {@code n} values each with an associated probability. If all unique items
  21.  * are assigned the same probability it is more efficient to use the {@link DiscreteUniformSampler}.
  22.  *
  23.  * <p>The cumulative probability distribution is searched using a guide table to set an
  24.  * initial start point. This implementation is based on:</p>
  25.  *
  26.  * <blockquote>
  27.  *  Devroye, Luc (1986). Non-Uniform Random Variate Generation.
  28.  *  New York: Springer-Verlag. Chapter 3.2.4 "The method of guide tables" p. 96.
  29.  * </blockquote>
  30.  *
  31.  * <p>The size of the guide table can be controlled using a parameter. A larger guide table
  32.  * will improve performance at the cost of storage space.</p>
  33.  *
  34.  * <p>Sampling uses {@link UniformRandomProvider#nextDouble()}.</p>
  35.  *
  36.  * @see <a href="http://en.wikipedia.org/wiki/Probability_distribution#Discrete_probability_distribution">
  37.  * Discrete probability distribution (Wikipedia)</a>
  38.  * @since 1.3
  39.  */
  40. public final class GuideTableDiscreteSampler
  41.     implements SharedStateDiscreteSampler {
  42.     /** The default value for {@code alpha}. */
  43.     private static final double DEFAULT_ALPHA = 1.0;
  44.     /** Underlying source of randomness. */
  45.     private final UniformRandomProvider rng;
  46.     /**
  47.      * The cumulative probability table ({@code f(x)}).
  48.      */
  49.     private final double[] cumulativeProbabilities;
  50.     /**
  51.      * The inverse cumulative probability guide table. This is a guide map between the cumulative
  52.      * probability (f(x)) and the value x. It is used to set the initial point for search
  53.      * of the cumulative probability table.
  54.      *
  55.      * <p>The index in the map is obtained using {@code p * map.length} where {@code p} is the
  56.      * known cumulative probability {@code f(x)} or a uniform random deviate {@code u}. The value
  57.      * stored at the index is value {@code x+1} when {@code p = f(x)} such that it is the
  58.      * exclusive upper bound on the sample value {@code x} for searching the cumulative probability
  59.      * table {@code f(x)}. The search of the cumulative probability is towards zero.</p>
  60.      */
  61.     private final int[] guideTable;

  62.     /**
  63.      * @param rng Generator of uniformly distributed random numbers.
  64.      * @param cumulativeProbabilities The cumulative probability table ({@code f(x)}).
  65.      * @param guideTable The inverse cumulative probability guide table.
  66.      */
  67.     private GuideTableDiscreteSampler(UniformRandomProvider rng,
  68.                                       double[] cumulativeProbabilities,
  69.                                       int[] guideTable) {
  70.         this.rng = rng;
  71.         this.cumulativeProbabilities = cumulativeProbabilities;
  72.         this.guideTable = guideTable;
  73.     }

  74.     /** {@inheritDoc} */
  75.     @Override
  76.     public int sample() {
  77.         // Compute a probability
  78.         final double u = rng.nextDouble();

  79.         // Initialise the search using the guide table to find an initial guess.
  80.         // The table provides an upper bound on the sample (x+1) for a known
  81.         // cumulative probability (f(x)).
  82.         int x = guideTable[getGuideTableIndex(u, guideTable.length)];
  83.         // Search down.
  84.         // In the edge case where u is 1.0 then 'x' will be 1 outside the range of the
  85.         // cumulative probability table and this will decrement to a valid range.
  86.         // In the case where 'u' is mapped to the same guide table index as a lower
  87.         // cumulative probability f(x) (due to rounding down) then this will not decrement
  88.         // and return the exclusive upper bound (x+1).
  89.         while (x != 0 && u <= cumulativeProbabilities[x - 1]) {
  90.             x--;
  91.         }
  92.         return x;
  93.     }

  94.     /** {@inheritDoc} */
  95.     @Override
  96.     public String toString() {
  97.         return "Guide table deviate [" + rng.toString() + "]";
  98.     }

  99.     /** {@inheritDoc} */
  100.     @Override
  101.     public SharedStateDiscreteSampler withUniformRandomProvider(UniformRandomProvider rng) {
  102.         return new GuideTableDiscreteSampler(rng, cumulativeProbabilities, guideTable);
  103.     }

  104.     /**
  105.      * Create a new sampler for an enumerated distribution using the given {@code probabilities}.
  106.      * The samples corresponding to each probability are assumed to be a natural sequence
  107.      * starting at zero.
  108.      *
  109.      * <p>The size of the guide table is {@code probabilities.length}.</p>
  110.      *
  111.      * @param rng Generator of uniformly distributed random numbers.
  112.      * @param probabilities The probabilities.
  113.      * @return the sampler
  114.      * @throws IllegalArgumentException if {@code probabilities} is null or empty, a
  115.      * probability is negative, infinite or {@code NaN}, or the sum of all
  116.      * probabilities is not strictly positive.
  117.      */
  118.     public static SharedStateDiscreteSampler of(UniformRandomProvider rng,
  119.                                                 double[] probabilities) {
  120.         return of(rng, probabilities, DEFAULT_ALPHA);
  121.     }

  122.     /**
  123.      * Create a new sampler for an enumerated distribution using the given {@code probabilities}.
  124.      * The samples corresponding to each probability are assumed to be a natural sequence
  125.      * starting at zero.
  126.      *
  127.      * <p>The size of the guide table is {@code alpha * probabilities.length}.</p>
  128.      *
  129.      * @param rng Generator of uniformly distributed random numbers.
  130.      * @param probabilities The probabilities.
  131.      * @param alpha The alpha factor used to set the guide table size.
  132.      * @return the sampler
  133.      * @throws IllegalArgumentException if {@code probabilities} is null or empty, a
  134.      * probability is negative, infinite or {@code NaN}, the sum of all
  135.      * probabilities is not strictly positive, or {@code alpha} is not strictly positive.
  136.      */
  137.     public static SharedStateDiscreteSampler of(UniformRandomProvider rng,
  138.                                                 double[] probabilities,
  139.                                                 double alpha) {
  140.         validateParameters(probabilities, alpha);

  141.         final int size = probabilities.length;
  142.         final double[] cumulativeProbabilities = new double[size];

  143.         double sumProb = 0;
  144.         int count = 0;
  145.         for (final double prob : probabilities) {
  146.             // Compute and store cumulative probability.
  147.             sumProb += InternalUtils.requirePositiveFinite(prob, "probability");
  148.             cumulativeProbabilities[count++] = sumProb;
  149.         }

  150.         InternalUtils.requireStrictlyPositiveFinite(sumProb, "sum of probabilities");

  151.         // Note: The guide table is at least length 1. Compute the size avoiding overflow
  152.         // in case (alpha * size) is too large.
  153.         final int guideTableSize = (int) Math.ceil(alpha * size);
  154.         final int[] guideTable = new int[Math.max(guideTableSize, guideTableSize + 1)];

  155.         // Compute and store cumulative probability.
  156.         for (int x = 0; x < size; x++) {
  157.             final double norm = cumulativeProbabilities[x] / sumProb;
  158.             cumulativeProbabilities[x] = (norm < 1) ? norm : 1.0;

  159.             // Set the guide table value as an exclusive upper bound (x + 1)
  160.             final int index = getGuideTableIndex(cumulativeProbabilities[x], guideTable.length);
  161.             guideTable[index] = x + 1;
  162.         }

  163.         // Edge case for round-off
  164.         cumulativeProbabilities[size - 1] = 1.0;
  165.         // The final guide table entry is (maximum value of x + 1)
  166.         guideTable[guideTable.length - 1] = size;

  167.         // The first non-zero value in the guide table is from f(x=0).
  168.         // Any probabilities mapped below this must be sample x=0 so the
  169.         // table may initially be filled with zeros.

  170.         // Fill missing values in the guide table.
  171.         for (int i = 1; i < guideTable.length; i++) {
  172.             guideTable[i] = Math.max(guideTable[i - 1], guideTable[i]);
  173.         }

  174.         return new GuideTableDiscreteSampler(rng, cumulativeProbabilities, guideTable);
  175.     }

  176.     /**
  177.      * Validate the parameters.
  178.      *
  179.      * @param probabilities The probabilities.
  180.      * @param alpha The alpha factor used to set the guide table size.
  181.      * @throws IllegalArgumentException if {@code probabilities} is null or empty, or
  182.      * {@code alpha} is not strictly positive.
  183.      */
  184.     private static void validateParameters(double[] probabilities, double alpha) {
  185.         if (probabilities == null || probabilities.length == 0) {
  186.             throw new IllegalArgumentException("Probabilities must not be empty.");
  187.         }
  188.         InternalUtils.requireStrictlyPositive(alpha, "alpha");
  189.     }

  190.     /**
  191.      * Gets the guide table index for the probability. This is obtained using
  192.      * {@code p * (tableLength - 1)} so is inside the length of the table.
  193.      *
  194.      * @param p Cumulative probability.
  195.      * @param tableLength Table length.
  196.      * @return the guide table index.
  197.      */
  198.     private static int getGuideTableIndex(double p, int tableLength) {
  199.         // Note: This is only ever called when p is in the range of the cumulative
  200.         // probability table. So assume 0 <= p <= 1.
  201.         return (int) (p * (tableLength - 1));
  202.     }
  203. }