1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17 package org.apache.commons.rng.sampling.distribution;
18
19 import org.apache.commons.rng.UniformRandomProvider;
20
21 /**
22 * Compute a sample from {@code n} values each with an associated probability. If all unique items
23 * are assigned the same probability it is more efficient to use the {@link DiscreteUniformSampler}.
24 *
25 * <p>The cumulative probability distribution is searched using a guide table to set an
26 * initial start point. This implementation is based on:</p>
27 *
28 * <blockquote>
29 * Devroye, Luc (1986). Non-Uniform Random Variate Generation.
30 * New York: Springer-Verlag. Chapter 3.2.4 "The method of guide tables" p. 96.
31 * </blockquote>
32 *
33 * <p>The size of the guide table can be controlled using a parameter. A larger guide table
34 * will improve performance at the cost of storage space.</p>
35 *
36 * <p>Sampling uses {@link UniformRandomProvider#nextDouble()}.</p>
37 *
38 * @see <a href="http://en.wikipedia.org/wiki/Probability_distribution#Discrete_probability_distribution">
39 * Discrete probability distribution (Wikipedia)</a>
40 * @since 1.3
41 */
42 public final class GuideTableDiscreteSampler
43 implements SharedStateDiscreteSampler {
44 /** The default value for {@code alpha}. */
45 private static final double DEFAULT_ALPHA = 1.0;
46 /** Underlying source of randomness. */
47 private final UniformRandomProvider rng;
48 /**
49 * The cumulative probability table ({@code f(x)}).
50 */
51 private final double[] cumulativeProbabilities;
52 /**
53 * The inverse cumulative probability guide table. This is a guide map between the cumulative
54 * probability (f(x)) and the value x. It is used to set the initial point for search
55 * of the cumulative probability table.
56 *
57 * <p>The index in the map is obtained using {@code p * map.length} where {@code p} is the
58 * known cumulative probability {@code f(x)} or a uniform random deviate {@code u}. The value
59 * stored at the index is value {@code x+1} when {@code p = f(x)} such that it is the
60 * exclusive upper bound on the sample value {@code x} for searching the cumulative probability
61 * table {@code f(x)}. The search of the cumulative probability is towards zero.</p>
62 */
63 private final int[] guideTable;
64
65 /**
66 * @param rng Generator of uniformly distributed random numbers.
67 * @param cumulativeProbabilities The cumulative probability table ({@code f(x)}).
68 * @param guideTable The inverse cumulative probability guide table.
69 */
70 private GuideTableDiscreteSampler(UniformRandomProvider rng,
71 double[] cumulativeProbabilities,
72 int[] guideTable) {
73 this.rng = rng;
74 this.cumulativeProbabilities = cumulativeProbabilities;
75 this.guideTable = guideTable;
76 }
77
78 /** {@inheritDoc} */
79 @Override
80 public int sample() {
81 // Compute a probability
82 final double u = rng.nextDouble();
83
84 // Initialise the search using the guide table to find an initial guess.
85 // The table provides an upper bound on the sample (x+1) for a known
86 // cumulative probability (f(x)).
87 int x = guideTable[getGuideTableIndex(u, guideTable.length)];
88 // Search down.
89 // In the edge case where u is 1.0 then 'x' will be 1 outside the range of the
90 // cumulative probability table and this will decrement to a valid range.
91 // In the case where 'u' is mapped to the same guide table index as a lower
92 // cumulative probability f(x) (due to rounding down) then this will not decrement
93 // and return the exclusive upper bound (x+1).
94 while (x != 0 && u <= cumulativeProbabilities[x - 1]) {
95 x--;
96 }
97 return x;
98 }
99
100 /** {@inheritDoc} */
101 @Override
102 public String toString() {
103 return "Guide table deviate [" + rng.toString() + "]";
104 }
105
106 /** {@inheritDoc} */
107 @Override
108 public SharedStateDiscreteSampler withUniformRandomProvider(UniformRandomProvider rng) {
109 return new GuideTableDiscreteSampler(rng, cumulativeProbabilities, guideTable);
110 }
111
112 /**
113 * Create a new sampler for an enumerated distribution using the given {@code probabilities}.
114 * The samples corresponding to each probability are assumed to be a natural sequence
115 * starting at zero.
116 *
117 * <p>The size of the guide table is {@code probabilities.length}.</p>
118 *
119 * @param rng Generator of uniformly distributed random numbers.
120 * @param probabilities The probabilities.
121 * @return the sampler
122 * @throws IllegalArgumentException if {@code probabilities} is null or empty, a
123 * probability is negative, infinite or {@code NaN}, or the sum of all
124 * probabilities is not strictly positive.
125 */
126 public static SharedStateDiscreteSampler of(UniformRandomProvider rng,
127 double[] probabilities) {
128 return of(rng, probabilities, DEFAULT_ALPHA);
129 }
130
131 /**
132 * Create a new sampler for an enumerated distribution using the given {@code probabilities}.
133 * The samples corresponding to each probability are assumed to be a natural sequence
134 * starting at zero.
135 *
136 * <p>The size of the guide table is {@code alpha * probabilities.length}.</p>
137 *
138 * @param rng Generator of uniformly distributed random numbers.
139 * @param probabilities The probabilities.
140 * @param alpha The alpha factor used to set the guide table size.
141 * @return the sampler
142 * @throws IllegalArgumentException if {@code probabilities} is null or empty, a
143 * probability is negative, infinite or {@code NaN}, the sum of all
144 * probabilities is not strictly positive, or {@code alpha} is not strictly positive.
145 */
146 public static SharedStateDiscreteSampler of(UniformRandomProvider rng,
147 double[] probabilities,
148 double alpha) {
149 validateParameters(probabilities, alpha);
150
151 final int size = probabilities.length;
152 final double[] cumulativeProbabilities = new double[size];
153
154 double sumProb = 0;
155 int count = 0;
156 for (final double prob : probabilities) {
157 // Compute and store cumulative probability.
158 sumProb += InternalUtils.requirePositiveFinite(prob, "probability");
159 cumulativeProbabilities[count++] = sumProb;
160 }
161
162 InternalUtils.requireStrictlyPositiveFinite(sumProb, "sum of probabilities");
163
164 // Note: The guide table is at least length 1. Compute the size avoiding overflow
165 // in case (alpha * size) is too large.
166 final int guideTableSize = (int) Math.ceil(alpha * size);
167 final int[] guideTable = new int[Math.max(guideTableSize, guideTableSize + 1)];
168
169 // Compute and store cumulative probability.
170 for (int x = 0; x < size; x++) {
171 final double norm = cumulativeProbabilities[x] / sumProb;
172 cumulativeProbabilities[x] = (norm < 1) ? norm : 1.0;
173
174 // Set the guide table value as an exclusive upper bound (x + 1)
175 final int index = getGuideTableIndex(cumulativeProbabilities[x], guideTable.length);
176 guideTable[index] = x + 1;
177 }
178
179 // Edge case for round-off
180 cumulativeProbabilities[size - 1] = 1.0;
181 // The final guide table entry is (maximum value of x + 1)
182 guideTable[guideTable.length - 1] = size;
183
184 // The first non-zero value in the guide table is from f(x=0).
185 // Any probabilities mapped below this must be sample x=0 so the
186 // table may initially be filled with zeros.
187
188 // Fill missing values in the guide table.
189 for (int i = 1; i < guideTable.length; i++) {
190 guideTable[i] = Math.max(guideTable[i - 1], guideTable[i]);
191 }
192
193 return new GuideTableDiscreteSampler(rng, cumulativeProbabilities, guideTable);
194 }
195
196 /**
197 * Validate the parameters.
198 *
199 * @param probabilities The probabilities.
200 * @param alpha The alpha factor used to set the guide table size.
201 * @throws IllegalArgumentException if {@code probabilities} is null or empty, or
202 * {@code alpha} is not strictly positive.
203 */
204 private static void validateParameters(double[] probabilities, double alpha) {
205 if (probabilities == null || probabilities.length == 0) {
206 throw new IllegalArgumentException("Probabilities must not be empty.");
207 }
208 InternalUtils.requireStrictlyPositive(alpha, "alpha");
209 }
210
211 /**
212 * Gets the guide table index for the probability. This is obtained using
213 * {@code p * (tableLength - 1)} so is inside the length of the table.
214 *
215 * @param p Cumulative probability.
216 * @param tableLength Table length.
217 * @return the guide table index.
218 */
219 private static int getGuideTableIndex(double p, int tableLength) {
220 // Note: This is only ever called when p is in the range of the cumulative
221 // probability table. So assume 0 <= p <= 1.
222 return (int) (p * (tableLength - 1));
223 }
224 }