1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18 package org.apache.commons.rng.sampling.distribution;
19
20 import org.apache.commons.rng.UniformRandomProvider;
21
22 /**
23 * Implementation of the <a href="https://en.wikipedia.org/wiki/Zipf's_law">Zipf distribution</a>.
24 *
25 * <p>Sampling uses {@link UniformRandomProvider#nextDouble()}.</p>
26 *
27 * @since 1.0
28 */
29 public class RejectionInversionZipfSampler
30 extends SamplerBase
31 implements SharedStateDiscreteSampler {
32
33 /** The implementation of the sample method. */
34 private final SharedStateDiscreteSampler delegate;
35
36 /**
37 * Implements the rejection-inversion method for the Zipf distribution.
38 */
39 private static final class RejectionInversionZipfSamplerImpl implements SharedStateDiscreteSampler {
40 /** Threshold below which Taylor series will be used. */
41 private static final double TAYLOR_THRESHOLD = 1e-8;
42 /** 1/2. */
43 private static final double F_1_2 = 0.5;
44 /** 1/3. */
45 private static final double F_1_3 = 1d / 3;
46 /** 1/4. */
47 private static final double F_1_4 = 0.25;
48 /** Number of elements. */
49 private final int numberOfElements;
50 /** Exponent parameter of the distribution. */
51 private final double exponent;
52 /** {@code hIntegral(1.5) - 1}. */
53 private final double hIntegralX1;
54 /** {@code hIntegral(numberOfElements + 0.5)}. */
55 private final double hIntegralNumberOfElements;
56 /** {@code hIntegralX1 - hIntegralNumberOfElements}. */
57 private final double r;
58 /** {@code 2 - hIntegralInverse(hIntegral(2.5) - h(2)}. */
59 private final double s;
60 /** Underlying source of randomness. */
61 private final UniformRandomProvider rng;
62
63 /**
64 * @param rng Generator of uniformly distributed random numbers.
65 * @param numberOfElements Number of elements (must be {@code > 0}).
66 * @param exponent Exponent (must be {@code > 0}).
67 */
68 RejectionInversionZipfSamplerImpl(UniformRandomProvider rng,
69 int numberOfElements,
70 double exponent) {
71 this.rng = rng;
72 this.numberOfElements = numberOfElements;
73 this.exponent = exponent;
74 this.hIntegralX1 = hIntegral(1.5) - 1;
75 this.hIntegralNumberOfElements = hIntegral(numberOfElements + F_1_2);
76 this.r = hIntegralX1 - hIntegralNumberOfElements;
77 this.s = 2 - hIntegralInverse(hIntegral(2.5) - h(2));
78 }
79
80 /**
81 * @param rng Generator of uniformly distributed random numbers.
82 * @param source Source to copy.
83 */
84 private RejectionInversionZipfSamplerImpl(UniformRandomProvider rng,
85 RejectionInversionZipfSamplerImpl source) {
86 this.rng = rng;
87 this.numberOfElements = source.numberOfElements;
88 this.exponent = source.exponent;
89 this.hIntegralX1 = source.hIntegralX1;
90 this.hIntegralNumberOfElements = source.hIntegralNumberOfElements;
91 this.r = source.r;
92 this.s = source.s;
93 }
94
95 @Override
96 public int sample() {
97 // The paper describes an algorithm for exponents larger than 1
98 // (Algorithm ZRI).
99 // The original method uses
100 // H(x) = (v + x)^(1 - q) / (1 - q)
101 // as the integral of the hat function.
102 // This function is undefined for q = 1, which is the reason for
103 // the limitation of the exponent.
104 // If instead the integral function
105 // H(x) = ((v + x)^(1 - q) - 1) / (1 - q)
106 // is used,
107 // for which a meaningful limit exists for q = 1, the method works
108 // for all positive exponents.
109 // The following implementation uses v = 0 and generates integral
110 // number in the range [1, numberOfElements].
111 // This is different to the original method where v is defined to
112 // be positive and numbers are taken from [0, i_max].
113 // This explains why the implementation looks slightly different.
114
115 while (true) {
116 final double u = hIntegralNumberOfElements + rng.nextDouble() * r;
117 // u is uniformly distributed in (hIntegralX1, hIntegralNumberOfElements]
118
119 final double x = hIntegralInverse(u);
120 int k = (int) (x + F_1_2);
121
122 // Limit k to the range [1, numberOfElements] if it would be outside
123 // due to numerical inaccuracies.
124 if (k < 1) {
125 k = 1;
126 } else if (k > numberOfElements) {
127 k = numberOfElements;
128 }
129
130 // Here, the distribution of k is given by:
131 //
132 // P(k = 1) = C * (hIntegral(1.5) - hIntegralX1) = C
133 // P(k = m) = C * (hIntegral(m + 1/2) - hIntegral(m - 1/2)) for m >= 2
134 //
135 // where C = 1 / (hIntegralNumberOfElements - hIntegralX1)
136
137 if (k - x <= s || u >= hIntegral(k + F_1_2) - h(k)) {
138
139 // Case k = 1:
140 //
141 // The right inequality is always true, because replacing k by 1 gives
142 // u >= hIntegral(1.5) - h(1) = hIntegralX1 and u is taken from
143 // (hIntegralX1, hIntegralNumberOfElements].
144 //
145 // Therefore, the acceptance rate for k = 1 is P(accepted | k = 1) = 1
146 // and the probability that 1 is returned as random value is
147 // P(k = 1 and accepted) = P(accepted | k = 1) * P(k = 1) = C = C / 1^exponent
148 //
149 // Case k >= 2:
150 //
151 // The left inequality (k - x <= s) is just a short cut
152 // to avoid the more expensive evaluation of the right inequality
153 // (u >= hIntegral(k + 0.5) - h(k)) in many cases.
154 //
155 // If the left inequality is true, the right inequality is also true:
156 // Theorem 2 in the paper is valid for all positive exponents, because
157 // the requirements h'(x) = -exponent/x^(exponent + 1) < 0 and
158 // (-1/hInverse'(x))'' = (1+1/exponent) * x^(1/exponent-1) >= 0
159 // are both fulfilled.
160 // Therefore, f(x) = x - hIntegralInverse(hIntegral(x + 0.5) - h(x))
161 // is a non-decreasing function. If k - x <= s holds,
162 // k - x <= s + f(k) - f(2) is obviously also true which is equivalent to
163 // -x <= -hIntegralInverse(hIntegral(k + 0.5) - h(k)),
164 // -hIntegralInverse(u) <= -hIntegralInverse(hIntegral(k + 0.5) - h(k)),
165 // and finally u >= hIntegral(k + 0.5) - h(k).
166 //
167 // Hence, the right inequality determines the acceptance rate:
168 // P(accepted | k = m) = h(m) / (hIntegrated(m+1/2) - hIntegrated(m-1/2))
169 // The probability that m is returned is given by
170 // P(k = m and accepted) = P(accepted | k = m) * P(k = m) = C * h(m) = C / m^exponent.
171 //
172 // In both cases the probabilities are proportional to the probability mass function
173 // of the Zipf distribution.
174
175 return k;
176 }
177 }
178 }
179
180 /** {@inheritDoc} */
181 @Override
182 public String toString() {
183 return "Rejection inversion Zipf deviate [" + rng.toString() + "]";
184 }
185
186 @Override
187 public SharedStateDiscreteSampler withUniformRandomProvider(UniformRandomProvider rng) {
188 return new RejectionInversionZipfSamplerImpl(rng, this);
189 }
190
191 /**
192 * {@code H(x)} is defined as
193 * <ul>
194 * <li>{@code (x^(1 - exponent) - 1) / (1 - exponent)}, if {@code exponent != 1}</li>
195 * <li>{@code log(x)}, if {@code exponent == 1}</li>
196 * </ul>
197 * H(x) is an integral function of h(x), the derivative of H(x) is h(x).
198 *
199 * @param x Free parameter.
200 * @return {@code H(x)}.
201 */
202 private double hIntegral(final double x) {
203 final double logX = Math.log(x);
204 return helper2((1 - exponent) * logX) * logX;
205 }
206
207 /**
208 * {@code h(x) = 1 / x^exponent}.
209 *
210 * @param x Free parameter.
211 * @return {@code h(x)}.
212 */
213 private double h(final double x) {
214 return Math.exp(-exponent * Math.log(x));
215 }
216
217 /**
218 * The inverse function of {@code H(x)}.
219 *
220 * @param x Free parameter
221 * @return y for which {@code H(y) = x}.
222 */
223 private double hIntegralInverse(final double x) {
224 double t = x * (1 - exponent);
225 if (t < -1) {
226 // Limit value to the range [-1, +inf).
227 // t could be smaller than -1 in some rare cases due to numerical errors.
228 t = -1;
229 }
230 return Math.exp(helper1(t) * x);
231 }
232
233 /**
234 * Helper function that calculates {@code log(1 + x) / x}.
235 * <p>
236 * A Taylor series expansion is used, if x is close to 0.
237 * </p>
238 *
239 * @param x A value larger than or equal to -1.
240 * @return {@code log(1 + x) / x}.
241 */
242 private static double helper1(final double x) {
243 if (Math.abs(x) > TAYLOR_THRESHOLD) {
244 return Math.log1p(x) / x;
245 }
246 return 1 - x * (F_1_2 - x * (F_1_3 - F_1_4 * x));
247 }
248
249 /**
250 * Helper function to calculate {@code (exp(x) - 1) / x}.
251 * <p>
252 * A Taylor series expansion is used, if x is close to 0.
253 * </p>
254 *
255 * @param x Free parameter.
256 * @return {@code (exp(x) - 1) / x} if x is non-zero, or 1 if x = 0.
257 */
258 private static double helper2(final double x) {
259 if (Math.abs(x) > TAYLOR_THRESHOLD) {
260 return Math.expm1(x) / x;
261 }
262 return 1 + x * F_1_2 * (1 + x * F_1_3 * (1 + F_1_4 * x));
263 }
264 }
265
266 /**
267 * This instance delegates sampling. Use the factory method
268 * {@link #of(UniformRandomProvider, int, double)} to create an optimal sampler.
269 *
270 * @param rng Generator of uniformly distributed random numbers.
271 * @param numberOfElements Number of elements.
272 * @param exponent Exponent.
273 * @throws IllegalArgumentException if {@code numberOfElements <= 0}
274 * or {@code exponent < 0}.
275 */
276 public RejectionInversionZipfSampler(UniformRandomProvider rng,
277 int numberOfElements,
278 double exponent) {
279 this(of(rng, numberOfElements, exponent));
280 }
281
282 /**
283 * Private constructor used by to prevent partially initialized object if the construction
284 * of the delegate throws. In future versions the public constructor should be removed.
285 *
286 * @param delegate Delegate.
287 */
288 private RejectionInversionZipfSampler(SharedStateDiscreteSampler delegate) {
289 super(null);
290 this.delegate = delegate;
291 }
292
293 /**
294 * Rejection inversion sampling method for a discrete, bounded Zipf
295 * distribution that is based on the method described in
296 * <blockquote>
297 * Wolfgang Hörmann and Gerhard Derflinger.
298 * <i>"Rejection-inversion to generate variates from monotone discrete
299 * distributions",</i><br>
300 * <strong>ACM Transactions on Modeling and Computer Simulation</strong> (TOMACS) 6.3 (1996): 169-184.
301 * </blockquote>
302 */
303 @Override
304 public int sample() {
305 return delegate.sample();
306 }
307
308 /** {@inheritDoc} */
309 @Override
310 public String toString() {
311 return delegate.toString();
312 }
313
314 /**
315 * {@inheritDoc}
316 *
317 * @since 1.3
318 */
319 @Override
320 public SharedStateDiscreteSampler withUniformRandomProvider(UniformRandomProvider rng) {
321 return delegate.withUniformRandomProvider(rng);
322 }
323
324 /**
325 * Creates a new Zipf distribution sampler.
326 *
327 * <p>Note when {@code exponent = 0} the Zipf distribution reduces to a
328 * discrete uniform distribution over the interval {@code [1, n]} with {@code n}
329 * the number of elements.
330 *
331 * @param rng Generator of uniformly distributed random numbers.
332 * @param numberOfElements Number of elements.
333 * @param exponent Exponent.
334 * @return the sampler
335 * @throws IllegalArgumentException if {@code numberOfElements <= 0} or
336 * {@code exponent < 0}.
337 * @since 1.3
338 */
339 public static SharedStateDiscreteSampler of(UniformRandomProvider rng,
340 int numberOfElements,
341 double exponent) {
342 if (numberOfElements <= 0) {
343 throw new IllegalArgumentException("number of elements is not strictly positive: " + numberOfElements);
344 }
345 InternalUtils.requirePositive(exponent, "exponent");
346
347 // When the exponent is at the limit of 0 the distribution PMF reduces to 1 / n
348 // and sampling can use a discrete uniform sampler.
349 return exponent > 0 ?
350 new RejectionInversionZipfSamplerImpl(rng, numberOfElements, exponent) :
351 DiscreteUniformSampler.of(rng, 1, numberOfElements);
352 }
353 }