1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18 package org.apache.commons.rng.core.source32;
19
20 import org.apache.commons.rng.ArbitrarilyJumpableUniformRandomProvider;
21 import org.apache.commons.rng.JumpableUniformRandomProvider;
22 import org.apache.commons.rng.LongJumpableUniformRandomProvider;
23 import org.apache.commons.rng.UniformRandomProvider;
24 import org.apache.commons.rng.core.util.NumberFactory;
25
26 import java.util.Arrays;
27 import java.util.stream.Stream;
28
29 /**
30 * This class implements the Philox4x32 128-bit counter-based generator with 10 rounds.
31 *
32 * <p>This is a member of the Philox family of generators. Memory footprint is 192 bits
33 * and the period is 2<sup>130</sup>.</p>
34 *
35 * <p>Jumping in the sequence is essentially instantaneous.
36 * This generator provides both subsequences and arbitrary jumps for easy parallelization.
37 *
38 * <p>References:
39 * <ol>
40 * <li>
41 * Salmon, J.K. <i>et al</i> (2011)
42 * <a href="https://dl.acm.org/doi/epdf/10.1145/2063384.2063405">
43 * Parallel Random Numbers: As Easy as 1,2,3</a>.</li>
44 * </ol>
45 *
46 * @since 1.7
47 */
48 public final class Philox4x32 extends IntProvider implements LongJumpableUniformRandomProvider,
49 ArbitrarilyJumpableUniformRandomProvider {
50 /** Philox 32-bit mixing constant for counter 0. */
51 private static final int K_PHILOX_10_A = 0x9E3779B9;
52 /** Philox 32-bit mixing constant for counter 1. */
53 private static final int K_PHILOX_10_B = 0xBB67AE85;
54 /** Philox 32-bit constant for key 0. */
55 private static final int K_PHILOX_SA = 0xD2511F53;
56 /** Philox 32-bit constant for key 1. */
57 private static final int K_PHILOX_SB = 0xCD9E8D57;
58 /** Internal buffer size. */
59 private static final int PHILOX_BUFFER_SIZE = 4;
60 /** Number of state variables. */
61 private static final int STATE_SIZE = 7;
62 /** The base-2 logarithm of the period. */
63 private static final int LOG_PERIOD = 130;
64 /** The period of 2^130 as a double. */
65 private static final double PERIOD = 0x1.0p130;
66 /** 2^54. Threshold for a double that cannot have the 2 least
67 * significant bits set when converted to a long. */
68 private static final double TWO_POW_54 = 0x1.0p54;
69
70 /** Counter 0. */
71 private int counter0;
72 /** Counter 1. */
73 private int counter1;
74 /** Counter 2. */
75 private int counter2;
76 /** Counter 3. */
77 private int counter3;
78 /** Output buffer. */
79 private final int[] buffer = new int[PHILOX_BUFFER_SIZE];
80 /** Key low bits. */
81 private int key0;
82 /** Key high bits. */
83 private int key1;
84 /** Output buffer index. When at the end of the buffer the counter is
85 * incremented and the buffer regenerated. */
86 private int bufferPosition;
87
88 /**
89 * Creates a new instance based on an array of int containing, key (first two ints) and
90 * the counter (next 4 ints, low bits = first int). The counter is not scrambled and may
91 * be used to create contiguous blocks with size a multiple of 4 ints. For example,
92 * setting seed[2] = 1 is equivalent to start with seed[2]=0 and calling {@link #next()} 4 times.
93 *
94 * @param seed Array of size 6 defining key0,key1,counter0,counter1,counter2,counter3.
95 * If the size is smaller, zero values are assumed.
96 */
97 public Philox4x32(int[] seed) {
98 final int[] input = seed.length < 6 ? Arrays.copyOf(seed, 6) : seed;
99 setState(input);
100 bufferPosition = PHILOX_BUFFER_SIZE;
101 }
102
103 /**
104 * Copy constructor.
105 *
106 * @param source Source to copy.
107 */
108 private Philox4x32(Philox4x32 source) {
109 super(source);
110 counter0 = source.counter0;
111 counter1 = source.counter1;
112 counter2 = source.counter2;
113 counter3 = source.counter3;
114 key0 = source.key0;
115 key1 = source.key1;
116 bufferPosition = source.bufferPosition;
117 System.arraycopy(source.buffer, 0, buffer, 0, PHILOX_BUFFER_SIZE);
118 }
119
120 /**
121 * Copies the state from the array into the generator state.
122 *
123 * @param state New state.
124 */
125 private void setState(int[] state) {
126 key0 = state[0];
127 key1 = state[1];
128 counter0 = state[2];
129 counter1 = state[3];
130 counter2 = state[4];
131 counter3 = state[5];
132 }
133
134 /** {@inheritDoc} */
135 @Override
136 protected byte[] getStateInternal() {
137 return composeStateInternal(
138 NumberFactory.makeByteArray(new int[] {
139 key0, key1,
140 counter0, counter1, counter2, counter3,
141 bufferPosition}),
142 super.getStateInternal());
143 }
144
145 /** {@inheritDoc} */
146 @Override
147 protected void setStateInternal(byte[] s) {
148 final byte[][] c = splitStateInternal(s, STATE_SIZE * Integer.BYTES);
149 final int[] state = NumberFactory.makeIntArray(c[0]);
150 setState(state);
151 bufferPosition = state[6];
152 super.setStateInternal(c[1]);
153 // Regenerate the internal buffer
154 rand10();
155 }
156
157 /** {@inheritDoc} */
158 @Override
159 public int next() {
160 final int p = bufferPosition;
161 if (p < PHILOX_BUFFER_SIZE) {
162 bufferPosition = p + 1;
163 return buffer[p];
164 }
165 incrementCounter();
166 rand10();
167 bufferPosition = 1;
168 return buffer[0];
169 }
170
171 /**
172 * Increment the counter by one.
173 */
174 private void incrementCounter() {
175 counter0++;
176 if (counter0 != 0) {
177 return;
178 }
179 counter1++;
180 if (counter1 != 0) {
181 return;
182 }
183 counter2++;
184 if (counter2 != 0) {
185 return;
186 }
187 counter3++;
188 }
189
190 /**
191 * Perform 10 rounds, using counter0, counter1, counter2, counter3 as starting point.
192 * It updates the buffer member variable, but no others.
193 */
194 private void rand10() {
195 buffer[0] = counter0;
196 buffer[1] = counter1;
197 buffer[2] = counter2;
198 buffer[3] = counter3;
199
200 int k0 = key0;
201 int k1 = key1;
202
203 //unrolled loop for performance
204 singleRound(buffer, k0, k1);
205 k0 += K_PHILOX_10_A;
206 k1 += K_PHILOX_10_B;
207 singleRound(buffer, k0, k1);
208 k0 += K_PHILOX_10_A;
209 k1 += K_PHILOX_10_B;
210 singleRound(buffer, k0, k1);
211 k0 += K_PHILOX_10_A;
212 k1 += K_PHILOX_10_B;
213 singleRound(buffer, k0, k1);
214 k0 += K_PHILOX_10_A;
215 k1 += K_PHILOX_10_B;
216 singleRound(buffer, k0, k1);
217 k0 += K_PHILOX_10_A;
218 k1 += K_PHILOX_10_B;
219 singleRound(buffer, k0, k1);
220 k0 += K_PHILOX_10_A;
221 k1 += K_PHILOX_10_B;
222 singleRound(buffer, k0, k1);
223 k0 += K_PHILOX_10_A;
224 k1 += K_PHILOX_10_B;
225 singleRound(buffer, k0, k1);
226 k0 += K_PHILOX_10_A;
227 k1 += K_PHILOX_10_B;
228 singleRound(buffer, k0, k1);
229 k0 += K_PHILOX_10_A;
230 k1 += K_PHILOX_10_B;
231 singleRound(buffer, k0, k1);
232 }
233
234 /**
235 * Performs a single round of philox.
236 *
237 * @param counter Counter, which will be updated after each call.
238 * @param key0 Key low bits.
239 * @param key1 Key high bits.
240 */
241 private static void singleRound(int[] counter, int key0, int key1) {
242 final long product0 = (K_PHILOX_SA & 0xffff_ffffL) * (counter[0] & 0xffff_ffffL);
243 final int hi0 = (int) (product0 >>> 32);
244 final int lo0 = (int) product0;
245 final long product1 = (K_PHILOX_SB & 0xffff_ffffL) * (counter[2] & 0xffff_ffffL);
246 final int hi1 = (int) (product1 >>> 32);
247 final int lo1 = (int) product1;
248
249 counter[0] = hi1 ^ counter[1] ^ key0;
250 counter[1] = lo1;
251 counter[2] = hi0 ^ counter[3] ^ key1;
252 counter[3] = lo0;
253 }
254
255 /**
256 * {@inheritDoc}
257 *
258 * <p>The jump size is the equivalent of 2<sup>66</sup>
259 * calls to {@link UniformRandomProvider#nextInt() nextInt()}. It can provide
260 * up to 2<sup>64</sup> non-overlapping subsequences.</p>
261 */
262 @Override
263 public UniformRandomProvider jump() {
264 final Philox4x32 copy = new Philox4x32(this);
265 if (++counter2 == 0) {
266 counter3++;
267 }
268 finishJump();
269 return copy;
270 }
271
272 /**
273 * {@inheritDoc}
274 *
275 * <p>The jump size is the equivalent of 2<sup>98</sup> calls to
276 * {@link UniformRandomProvider#nextLong() nextLong()}. It can provide up to
277 * 2<sup>32</sup> non-overlapping subsequences of length 2<sup>98</sup>; each
278 * subsequence can provide up to 2<sup>32</sup> non-overlapping subsequences of
279 * length 2<sup>66</sup> using the {@link #jump()} method.</p>
280 */
281 @Override
282 public JumpableUniformRandomProvider longJump() {
283 final Philox4x32 copy = new Philox4x32(this);
284 counter3++;
285 finishJump();
286 return copy;
287 }
288
289 /** {@inheritDoc} */
290 @Override
291 public ArbitrarilyJumpableUniformRandomProvider jump(double distance) {
292 IntJumpDistances.validateJump(distance, PERIOD);
293 // Decompose into an increment for the buffer position and counter
294 final int skip = getBufferPositionIncrement(distance);
295 final int[] increment = getCounterIncrement(distance);
296 return copyAndJump(skip, increment);
297 }
298
299 /** {@inheritDoc} */
300 @Override
301 public ArbitrarilyJumpableUniformRandomProvider jumpPowerOfTwo(int logDistance) {
302 IntJumpDistances.validateJumpPowerOfTwo(logDistance, LOG_PERIOD);
303 // For simplicity this re-uses code to increment the buffer position and counter
304 // when only one or the other is required for a power of 2.
305 // In practice the jump should be much larger than 1 and the necessary regeneration
306 // of the buffer is the most time consuming step.
307 int skip = 0;
308 final int[] increment = new int[PHILOX_BUFFER_SIZE];
309 if (logDistance >= 0) {
310 if (logDistance <= 1) {
311 // The first 2 powers update the buffer position.
312 skip = 1 << logDistance;
313 } else {
314 // Remaining powers update the 128-bit counter
315 final int n = logDistance - 2;
316 // Create the increment.
317 // Start at n / 32 with a 1-bit shifted n % 32
318 increment[n >> 5] = 1 << (n & 0x1f);
319 }
320 }
321 return copyAndJump(skip, increment);
322 }
323
324 /** {@inheritDoc} */
325 @Override
326 public Stream<ArbitrarilyJumpableUniformRandomProvider> jumps(double distance) {
327 IntJumpDistances.validateJump(distance, PERIOD);
328 // Decompose into an increment for the buffer position and counter
329 final int skip = getBufferPositionIncrement(distance);
330 final int[] increment = getCounterIncrement(distance);
331 return Stream.generate(() -> copyAndJump(skip, increment)).sequential();
332 }
333
334 /**
335 * Gets the buffer position increment from the jump distance.
336 *
337 * @param distance Jump distance.
338 * @return the buffer position increment
339 */
340 private static int getBufferPositionIncrement(double distance) {
341 return distance < TWO_POW_54 ?
342 // 2 least significant digits from the integer representation
343 (int)((long) distance) & 0x3 :
344 0;
345 }
346
347 /**
348 * Gets the counter increment from the jump distance.
349 *
350 * @param distance Jump distance.
351 * @return the counter increment
352 */
353 private static int[] getCounterIncrement(double distance) {
354 final int[] increment = new int[PHILOX_BUFFER_SIZE];
355 // The counter is incremented if the distance is above the buffer size
356 // (increment = distance / 4).
357 if (distance >= PHILOX_BUFFER_SIZE) {
358 IntJumpDistances.writeUnsignedInteger(distance * 0.25, increment);
359 }
360 return increment;
361 }
362
363 /**
364 * Copy the generator and advance the internal state. The copy is returned.
365 *
366 * <p>This method: (1) assumes that the arguments have been validated;
367 * and (2) regenerates the output buffer if required.
368 *
369 * @param skip Amount to skip the buffer position in [0, 3].
370 * @param increment Unsigned 128-bit increment, least significant bits first.
371 * @return the copy
372 */
373 private ArbitrarilyJumpableUniformRandomProvider copyAndJump(int skip, int[] increment) {
374 final Philox4x32 copy = new Philox4x32(this);
375
376 // Skip the buffer position forward.
377 // Assumes position is in [0, 4] and skip is less than 4.
378 // Handle rollover but allow position=4 to regenerate buffer on next output call.
379 bufferPosition += skip;
380 if (bufferPosition > PHILOX_BUFFER_SIZE) {
381 bufferPosition -= PHILOX_BUFFER_SIZE;
382 incrementCounter();
383 }
384
385 // Increment the 128-bit counter.
386 // Addition using unsigned int as longs.
387 // Any overflow bit is carried to the next counter.
388 // Unrolled branchless loop for performance.
389 long r;
390 r = (counter0 & 0xffff_ffffL) + (increment[0] & 0xffff_ffffL);
391 counter0 = (int) r;
392 r = (counter1 & 0xffff_ffffL) + (increment[1] & 0xffff_ffffL) + (r >>> 32);
393 counter1 = (int) r;
394 r = (counter2 & 0xffff_ffffL) + (increment[2] & 0xffff_ffffL) + (r >>> 32);
395 counter2 = (int) r;
396 r = (counter3 & 0xffff_ffffL) + (increment[3] & 0xffff_ffffL) + (r >>> 32);
397 counter3 = (int) r;
398
399 finishJump();
400 return copy;
401 }
402
403 /**
404 * Finish the jump of this generator. Resets the cached state and regenerates
405 * the output buffer if required.
406 */
407 private void finishJump() {
408 resetCachedState();
409 // Regenerate the internal buffer only if the buffer position is
410 // within the output buffer. Otherwise regeneration is delayed until
411 // next output. This allows more efficient consecutive jumping when
412 // the buffer is due to be regenerated.
413 if (bufferPosition < PHILOX_BUFFER_SIZE) {
414 rand10();
415 }
416 }
417 }