001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017 018package org.apache.commons.rng.sampling.distribution; 019 020import org.apache.commons.rng.UniformRandomProvider; 021 022/** 023 * Implementation of the <a href="https://en.wikipedia.org/wiki/Zipf's_law">Zipf distribution</a>. 024 * 025 * <p>Sampling uses {@link UniformRandomProvider#nextDouble()}.</p> 026 * 027 * @since 1.0 028 */ 029public class RejectionInversionZipfSampler 030 extends SamplerBase 031 implements SharedStateDiscreteSampler { 032 033 /** The implementation of the sample method. */ 034 private final SharedStateDiscreteSampler delegate; 035 036 /** 037 * Implements the rejection-inversion method for the Zipf distribution. 038 */ 039 private static final class RejectionInversionZipfSamplerImpl implements SharedStateDiscreteSampler { 040 /** Threshold below which Taylor series will be used. */ 041 private static final double TAYLOR_THRESHOLD = 1e-8; 042 /** 1/2. */ 043 private static final double F_1_2 = 0.5; 044 /** 1/3. */ 045 private static final double F_1_3 = 1d / 3; 046 /** 1/4. */ 047 private static final double F_1_4 = 0.25; 048 /** Number of elements. */ 049 private final int numberOfElements; 050 /** Exponent parameter of the distribution. */ 051 private final double exponent; 052 /** {@code hIntegral(1.5) - 1}. */ 053 private final double hIntegralX1; 054 /** {@code hIntegral(numberOfElements + 0.5)}. */ 055 private final double hIntegralNumberOfElements; 056 /** {@code hIntegralX1 - hIntegralNumberOfElements}. */ 057 private final double r; 058 /** {@code 2 - hIntegralInverse(hIntegral(2.5) - h(2)}. */ 059 private final double s; 060 /** Underlying source of randomness. */ 061 private final UniformRandomProvider rng; 062 063 /** 064 * @param rng Generator of uniformly distributed random numbers. 065 * @param numberOfElements Number of elements (must be {@code > 0}). 066 * @param exponent Exponent (must be {@code > 0}). 067 */ 068 RejectionInversionZipfSamplerImpl(UniformRandomProvider rng, 069 int numberOfElements, 070 double exponent) { 071 this.rng = rng; 072 this.numberOfElements = numberOfElements; 073 this.exponent = exponent; 074 this.hIntegralX1 = hIntegral(1.5) - 1; 075 this.hIntegralNumberOfElements = hIntegral(numberOfElements + F_1_2); 076 this.r = hIntegralX1 - hIntegralNumberOfElements; 077 this.s = 2 - hIntegralInverse(hIntegral(2.5) - h(2)); 078 } 079 080 /** 081 * @param rng Generator of uniformly distributed random numbers. 082 * @param source Source to copy. 083 */ 084 private RejectionInversionZipfSamplerImpl(UniformRandomProvider rng, 085 RejectionInversionZipfSamplerImpl source) { 086 this.rng = rng; 087 this.numberOfElements = source.numberOfElements; 088 this.exponent = source.exponent; 089 this.hIntegralX1 = source.hIntegralX1; 090 this.hIntegralNumberOfElements = source.hIntegralNumberOfElements; 091 this.r = source.r; 092 this.s = source.s; 093 } 094 095 @Override 096 public int sample() { 097 // The paper describes an algorithm for exponents larger than 1 098 // (Algorithm ZRI). 099 // The original method uses 100 // H(x) = (v + x)^(1 - q) / (1 - q) 101 // as the integral of the hat function. 102 // This function is undefined for q = 1, which is the reason for 103 // the limitation of the exponent. 104 // If instead the integral function 105 // H(x) = ((v + x)^(1 - q) - 1) / (1 - q) 106 // is used, 107 // for which a meaningful limit exists for q = 1, the method works 108 // for all positive exponents. 109 // The following implementation uses v = 0 and generates integral 110 // number in the range [1, numberOfElements]. 111 // This is different to the original method where v is defined to 112 // be positive and numbers are taken from [0, i_max]. 113 // This explains why the implementation looks slightly different. 114 115 while (true) { 116 final double u = hIntegralNumberOfElements + rng.nextDouble() * r; 117 // u is uniformly distributed in (hIntegralX1, hIntegralNumberOfElements] 118 119 final double x = hIntegralInverse(u); 120 int k = (int) (x + F_1_2); 121 122 // Limit k to the range [1, numberOfElements] if it would be outside 123 // due to numerical inaccuracies. 124 if (k < 1) { 125 k = 1; 126 } else if (k > numberOfElements) { 127 k = numberOfElements; 128 } 129 130 // Here, the distribution of k is given by: 131 // 132 // P(k = 1) = C * (hIntegral(1.5) - hIntegralX1) = C 133 // P(k = m) = C * (hIntegral(m + 1/2) - hIntegral(m - 1/2)) for m >= 2 134 // 135 // where C = 1 / (hIntegralNumberOfElements - hIntegralX1) 136 137 if (k - x <= s || u >= hIntegral(k + F_1_2) - h(k)) { 138 139 // Case k = 1: 140 // 141 // The right inequality is always true, because replacing k by 1 gives 142 // u >= hIntegral(1.5) - h(1) = hIntegralX1 and u is taken from 143 // (hIntegralX1, hIntegralNumberOfElements]. 144 // 145 // Therefore, the acceptance rate for k = 1 is P(accepted | k = 1) = 1 146 // and the probability that 1 is returned as random value is 147 // P(k = 1 and accepted) = P(accepted | k = 1) * P(k = 1) = C = C / 1^exponent 148 // 149 // Case k >= 2: 150 // 151 // The left inequality (k - x <= s) is just a short cut 152 // to avoid the more expensive evaluation of the right inequality 153 // (u >= hIntegral(k + 0.5) - h(k)) in many cases. 154 // 155 // If the left inequality is true, the right inequality is also true: 156 // Theorem 2 in the paper is valid for all positive exponents, because 157 // the requirements h'(x) = -exponent/x^(exponent + 1) < 0 and 158 // (-1/hInverse'(x))'' = (1+1/exponent) * x^(1/exponent-1) >= 0 159 // are both fulfilled. 160 // Therefore, f(x) = x - hIntegralInverse(hIntegral(x + 0.5) - h(x)) 161 // is a non-decreasing function. If k - x <= s holds, 162 // k - x <= s + f(k) - f(2) is obviously also true which is equivalent to 163 // -x <= -hIntegralInverse(hIntegral(k + 0.5) - h(k)), 164 // -hIntegralInverse(u) <= -hIntegralInverse(hIntegral(k + 0.5) - h(k)), 165 // and finally u >= hIntegral(k + 0.5) - h(k). 166 // 167 // Hence, the right inequality determines the acceptance rate: 168 // P(accepted | k = m) = h(m) / (hIntegrated(m+1/2) - hIntegrated(m-1/2)) 169 // The probability that m is returned is given by 170 // P(k = m and accepted) = P(accepted | k = m) * P(k = m) = C * h(m) = C / m^exponent. 171 // 172 // In both cases the probabilities are proportional to the probability mass function 173 // of the Zipf distribution. 174 175 return k; 176 } 177 } 178 } 179 180 /** {@inheritDoc} */ 181 @Override 182 public String toString() { 183 return "Rejection inversion Zipf deviate [" + rng.toString() + "]"; 184 } 185 186 @Override 187 public SharedStateDiscreteSampler withUniformRandomProvider(UniformRandomProvider rng) { 188 return new RejectionInversionZipfSamplerImpl(rng, this); 189 } 190 191 /** 192 * {@code H(x)} is defined as 193 * <ul> 194 * <li>{@code (x^(1 - exponent) - 1) / (1 - exponent)}, if {@code exponent != 1}</li> 195 * <li>{@code log(x)}, if {@code exponent == 1}</li> 196 * </ul> 197 * H(x) is an integral function of h(x), the derivative of H(x) is h(x). 198 * 199 * @param x Free parameter. 200 * @return {@code H(x)}. 201 */ 202 private double hIntegral(final double x) { 203 final double logX = Math.log(x); 204 return helper2((1 - exponent) * logX) * logX; 205 } 206 207 /** 208 * {@code h(x) = 1 / x^exponent}. 209 * 210 * @param x Free parameter. 211 * @return {@code h(x)}. 212 */ 213 private double h(final double x) { 214 return Math.exp(-exponent * Math.log(x)); 215 } 216 217 /** 218 * The inverse function of {@code H(x)}. 219 * 220 * @param x Free parameter 221 * @return y for which {@code H(y) = x}. 222 */ 223 private double hIntegralInverse(final double x) { 224 double t = x * (1 - exponent); 225 if (t < -1) { 226 // Limit value to the range [-1, +inf). 227 // t could be smaller than -1 in some rare cases due to numerical errors. 228 t = -1; 229 } 230 return Math.exp(helper1(t) * x); 231 } 232 233 /** 234 * Helper function that calculates {@code log(1 + x) / x}. 235 * <p> 236 * A Taylor series expansion is used, if x is close to 0. 237 * </p> 238 * 239 * @param x A value larger than or equal to -1. 240 * @return {@code log(1 + x) / x}. 241 */ 242 private static double helper1(final double x) { 243 if (Math.abs(x) > TAYLOR_THRESHOLD) { 244 return Math.log1p(x) / x; 245 } 246 return 1 - x * (F_1_2 - x * (F_1_3 - F_1_4 * x)); 247 } 248 249 /** 250 * Helper function to calculate {@code (exp(x) - 1) / x}. 251 * <p> 252 * A Taylor series expansion is used, if x is close to 0. 253 * </p> 254 * 255 * @param x Free parameter. 256 * @return {@code (exp(x) - 1) / x} if x is non-zero, or 1 if x = 0. 257 */ 258 private static double helper2(final double x) { 259 if (Math.abs(x) > TAYLOR_THRESHOLD) { 260 return Math.expm1(x) / x; 261 } 262 return 1 + x * F_1_2 * (1 + x * F_1_3 * (1 + F_1_4 * x)); 263 } 264 } 265 266 /** 267 * This instance delegates sampling. Use the factory method 268 * {@link #of(UniformRandomProvider, int, double)} to create an optimal sampler. 269 * 270 * @param rng Generator of uniformly distributed random numbers. 271 * @param numberOfElements Number of elements. 272 * @param exponent Exponent. 273 * @throws IllegalArgumentException if {@code numberOfElements <= 0} 274 * or {@code exponent < 0}. 275 */ 276 public RejectionInversionZipfSampler(UniformRandomProvider rng, 277 int numberOfElements, 278 double exponent) { 279 this(of(rng, numberOfElements, exponent)); 280 } 281 282 /** 283 * Private constructor used by to prevent partially initialized object if the construction 284 * of the delegate throws. In future versions the public constructor should be removed. 285 * 286 * @param delegate Delegate. 287 */ 288 private RejectionInversionZipfSampler(SharedStateDiscreteSampler delegate) { 289 super(null); 290 this.delegate = delegate; 291 } 292 293 /** 294 * Rejection inversion sampling method for a discrete, bounded Zipf 295 * distribution that is based on the method described in 296 * <blockquote> 297 * Wolfgang Hörmann and Gerhard Derflinger. 298 * <i>"Rejection-inversion to generate variates from monotone discrete 299 * distributions",</i><br> 300 * <strong>ACM Transactions on Modeling and Computer Simulation</strong> (TOMACS) 6.3 (1996): 169-184. 301 * </blockquote> 302 */ 303 @Override 304 public int sample() { 305 return delegate.sample(); 306 } 307 308 /** {@inheritDoc} */ 309 @Override 310 public String toString() { 311 return delegate.toString(); 312 } 313 314 /** 315 * {@inheritDoc} 316 * 317 * @since 1.3 318 */ 319 @Override 320 public SharedStateDiscreteSampler withUniformRandomProvider(UniformRandomProvider rng) { 321 return delegate.withUniformRandomProvider(rng); 322 } 323 324 /** 325 * Creates a new Zipf distribution sampler. 326 * 327 * <p>Note when {@code exponent = 0} the Zipf distribution reduces to a 328 * discrete uniform distribution over the interval {@code [1, n]} with {@code n} 329 * the number of elements. 330 * 331 * @param rng Generator of uniformly distributed random numbers. 332 * @param numberOfElements Number of elements. 333 * @param exponent Exponent. 334 * @return the sampler 335 * @throws IllegalArgumentException if {@code numberOfElements <= 0} or 336 * {@code exponent < 0}. 337 * @since 1.3 338 */ 339 public static SharedStateDiscreteSampler of(UniformRandomProvider rng, 340 int numberOfElements, 341 double exponent) { 342 if (numberOfElements <= 0) { 343 throw new IllegalArgumentException("number of elements is not strictly positive: " + numberOfElements); 344 } 345 InternalUtils.requirePositive(exponent, "exponent"); 346 347 // When the exponent is at the limit of 0 the distribution PMF reduces to 1 / n 348 // and sampling can use a discrete uniform sampler. 349 return exponent > 0 ? 350 new RejectionInversionZipfSamplerImpl(rng, numberOfElements, exponent) : 351 DiscreteUniformSampler.of(rng, 1, numberOfElements); 352 } 353}