View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  
18  package org.apache.commons.rng.core.source32;
19  
20  import org.apache.commons.rng.ArbitrarilyJumpableUniformRandomProvider;
21  import org.apache.commons.rng.JumpableUniformRandomProvider;
22  import org.apache.commons.rng.LongJumpableUniformRandomProvider;
23  import org.apache.commons.rng.UniformRandomProvider;
24  import org.apache.commons.rng.core.util.NumberFactory;
25  
26  import java.util.Arrays;
27  import java.util.stream.Stream;
28  
29  /**
30   * This class implements the Philox4x32 128-bit counter-based generator with 10 rounds.
31   *
32   * <p>This is a member of the Philox family of generators. Memory footprint is 192 bits
33   * and the period is 2<sup>130</sup>.</p>
34   *
35   * <p>Jumping in the sequence is essentially instantaneous.
36   * This generator provides both subsequences and arbitrary jumps for easy parallelization.
37   *
38   * <p>References:
39   * <ol>
40   * <li>
41   * Salmon, J.K. <i>et al</i> (2011)
42   * <a href="https://dl.acm.org/doi/epdf/10.1145/2063384.2063405">
43   * Parallel Random Numbers: As Easy as 1,2,3</a>.</li>
44   * </ol>
45   *
46   * @since 1.7
47   */
48  public final class Philox4x32 extends IntProvider implements LongJumpableUniformRandomProvider,
49          ArbitrarilyJumpableUniformRandomProvider {
50      /** Philox 32-bit mixing constant for counter 0. */
51      private static final int K_PHILOX_10_A = 0x9E3779B9;
52      /** Philox 32-bit mixing constant for counter 1. */
53      private static final int K_PHILOX_10_B = 0xBB67AE85;
54      /** Philox 32-bit constant for key 0. */
55      private static final int K_PHILOX_SA = 0xD2511F53;
56      /** Philox 32-bit constant for key 1. */
57      private static final int K_PHILOX_SB = 0xCD9E8D57;
58      /** Internal buffer size. */
59      private static final int PHILOX_BUFFER_SIZE = 4;
60      /** Number of state variables. */
61      private static final int STATE_SIZE = 7;
62      /** The base-2 logarithm of the period. */
63      private static final int LOG_PERIOD = 130;
64      /** The period of 2^130 as a double. */
65      private static final double PERIOD = 0x1.0p130;
66      /** 2^54. Threshold for a double that cannot have the 2 least
67       * significant bits set when converted to a long. */
68      private static final double TWO_POW_54 = 0x1.0p54;
69  
70      /** Counter 0. */
71      private int counter0;
72      /** Counter 1. */
73      private int counter1;
74      /** Counter 2. */
75      private int counter2;
76      /** Counter 3. */
77      private int counter3;
78      /** Output buffer. */
79      private final int[] buffer = new int[PHILOX_BUFFER_SIZE];
80      /** Key low bits. */
81      private int key0;
82      /** Key high bits. */
83      private int key1;
84      /** Output buffer index. When at the end of the buffer the counter is
85       * incremented and the buffer regenerated. */
86      private int bufferPosition;
87  
88      /**
89       * Creates a new instance based on an array of int containing, key (first two ints) and
90       * the counter (next 4 ints, low bits = first int). The counter is not scrambled and may
91       * be used to create contiguous blocks with size a multiple of 4 ints. For example,
92       * setting seed[2] = 1 is equivalent to start with seed[2]=0 and calling {@link #next()} 4 times.
93       *
94       * @param seed Array of size 6 defining key0,key1,counter0,counter1,counter2,counter3.
95       *             If the size is smaller, zero values are assumed.
96       */
97      public Philox4x32(int[] seed) {
98          final int[] input = seed.length < 6 ? Arrays.copyOf(seed, 6) : seed;
99          setState(input);
100         bufferPosition = PHILOX_BUFFER_SIZE;
101     }
102 
103     /**
104      * Copy constructor.
105      *
106      * @param source Source to copy.
107      */
108     private Philox4x32(Philox4x32 source) {
109         super(source);
110         counter0 = source.counter0;
111         counter1 = source.counter1;
112         counter2 = source.counter2;
113         counter3 = source.counter3;
114         key0 = source.key0;
115         key1 = source.key1;
116         bufferPosition = source.bufferPosition;
117         System.arraycopy(source.buffer, 0, buffer, 0, PHILOX_BUFFER_SIZE);
118     }
119 
120     /**
121      * Copies the state from the array into the generator state.
122      *
123      * @param state New state.
124      */
125     private void setState(int[] state) {
126         key0 = state[0];
127         key1 = state[1];
128         counter0 = state[2];
129         counter1 = state[3];
130         counter2 = state[4];
131         counter3 = state[5];
132     }
133 
134     /** {@inheritDoc} */
135     @Override
136     protected byte[] getStateInternal() {
137         return composeStateInternal(
138             NumberFactory.makeByteArray(new int[] {
139                 key0, key1,
140                 counter0, counter1, counter2, counter3,
141                 bufferPosition}),
142             super.getStateInternal());
143     }
144 
145     /** {@inheritDoc} */
146     @Override
147     protected void setStateInternal(byte[] s) {
148         final byte[][] c = splitStateInternal(s, STATE_SIZE * Integer.BYTES);
149         final int[] state = NumberFactory.makeIntArray(c[0]);
150         setState(state);
151         bufferPosition = state[6];
152         super.setStateInternal(c[1]);
153         // Regenerate the internal buffer
154         rand10();
155     }
156 
157     /** {@inheritDoc} */
158     @Override
159     public int next() {
160         final int p = bufferPosition;
161         if (p < PHILOX_BUFFER_SIZE) {
162             bufferPosition = p + 1;
163             return buffer[p];
164         }
165         incrementCounter();
166         rand10();
167         bufferPosition = 1;
168         return buffer[0];
169     }
170 
171     /**
172      * Increment the counter by one.
173      */
174     private void incrementCounter() {
175         counter0++;
176         if (counter0 != 0) {
177             return;
178         }
179         counter1++;
180         if (counter1 != 0) {
181             return;
182         }
183         counter2++;
184         if (counter2 != 0) {
185             return;
186         }
187         counter3++;
188     }
189 
190     /**
191      * Perform 10 rounds, using counter0, counter1, counter2, counter3 as starting point.
192      * It updates the buffer member variable, but no others.
193      */
194     private void rand10() {
195         buffer[0] = counter0;
196         buffer[1] = counter1;
197         buffer[2] = counter2;
198         buffer[3] = counter3;
199 
200         int k0 = key0;
201         int k1 = key1;
202 
203         //unrolled loop for performance
204         singleRound(buffer, k0, k1);
205         k0 += K_PHILOX_10_A;
206         k1 += K_PHILOX_10_B;
207         singleRound(buffer, k0, k1);
208         k0 += K_PHILOX_10_A;
209         k1 += K_PHILOX_10_B;
210         singleRound(buffer, k0, k1);
211         k0 += K_PHILOX_10_A;
212         k1 += K_PHILOX_10_B;
213         singleRound(buffer, k0, k1);
214         k0 += K_PHILOX_10_A;
215         k1 += K_PHILOX_10_B;
216         singleRound(buffer, k0, k1);
217         k0 += K_PHILOX_10_A;
218         k1 += K_PHILOX_10_B;
219         singleRound(buffer, k0, k1);
220         k0 += K_PHILOX_10_A;
221         k1 += K_PHILOX_10_B;
222         singleRound(buffer, k0, k1);
223         k0 += K_PHILOX_10_A;
224         k1 += K_PHILOX_10_B;
225         singleRound(buffer, k0, k1);
226         k0 += K_PHILOX_10_A;
227         k1 += K_PHILOX_10_B;
228         singleRound(buffer, k0, k1);
229         k0 += K_PHILOX_10_A;
230         k1 += K_PHILOX_10_B;
231         singleRound(buffer, k0, k1);
232     }
233 
234     /**
235      * Performs a single round of philox.
236      *
237      * @param counter Counter, which will be updated after each call.
238      * @param key0 Key low bits.
239      * @param key1 Key high bits.
240      */
241     private static void singleRound(int[] counter, int key0, int key1) {
242         final long product0 = (K_PHILOX_SA & 0xffff_ffffL) * (counter[0] & 0xffff_ffffL);
243         final int hi0 = (int) (product0 >>> 32);
244         final int lo0 = (int) product0;
245         final long product1 = (K_PHILOX_SB & 0xffff_ffffL) * (counter[2] & 0xffff_ffffL);
246         final int hi1 = (int) (product1 >>> 32);
247         final int lo1 = (int) product1;
248 
249         counter[0] = hi1 ^ counter[1] ^ key0;
250         counter[1] = lo1;
251         counter[2] = hi0 ^ counter[3] ^ key1;
252         counter[3] = lo0;
253     }
254 
255     /**
256      * {@inheritDoc}
257      *
258      * <p>The jump size is the equivalent of 2<sup>66</sup>
259      * calls to {@link UniformRandomProvider#nextInt() nextInt()}. It can provide
260      * up to 2<sup>64</sup> non-overlapping subsequences.</p>
261      */
262     @Override
263     public UniformRandomProvider jump() {
264         final Philox4x32 copy = new Philox4x32(this);
265         if (++counter2 == 0) {
266             counter3++;
267         }
268         finishJump();
269         return copy;
270     }
271 
272     /**
273      * {@inheritDoc}
274      *
275      * <p>The jump size is the equivalent of 2<sup>98</sup> calls to
276      * {@link UniformRandomProvider#nextLong() nextLong()}. It can provide up to
277      * 2<sup>32</sup> non-overlapping subsequences of length 2<sup>98</sup>; each
278      * subsequence can provide up to 2<sup>32</sup> non-overlapping subsequences of
279      * length 2<sup>66</sup> using the {@link #jump()} method.</p>
280      */
281     @Override
282     public JumpableUniformRandomProvider longJump() {
283         final Philox4x32 copy = new Philox4x32(this);
284         counter3++;
285         finishJump();
286         return copy;
287     }
288 
289     /** {@inheritDoc} */
290     @Override
291     public ArbitrarilyJumpableUniformRandomProvider jump(double distance) {
292         IntJumpDistances.validateJump(distance, PERIOD);
293         // Decompose into an increment for the buffer position and counter
294         final int skip = getBufferPositionIncrement(distance);
295         final int[] increment = getCounterIncrement(distance);
296         return copyAndJump(skip, increment);
297     }
298 
299     /** {@inheritDoc} */
300     @Override
301     public ArbitrarilyJumpableUniformRandomProvider jumpPowerOfTwo(int logDistance) {
302         IntJumpDistances.validateJumpPowerOfTwo(logDistance, LOG_PERIOD);
303         // For simplicity this re-uses code to increment the buffer position and counter
304         // when only one or the other is required for a power of 2.
305         // In practice the jump should be much larger than 1 and the necessary regeneration
306         // of the buffer is the most time consuming step.
307         int skip = 0;
308         final int[] increment = new int[PHILOX_BUFFER_SIZE];
309         if (logDistance >= 0) {
310             if (logDistance <= 1) {
311                 // The first 2 powers update the buffer position.
312                 skip = 1 << logDistance;
313             } else {
314                 // Remaining powers update the 128-bit counter
315                 final int n = logDistance - 2;
316                 // Create the increment.
317                 // Start at n / 32 with a 1-bit shifted n % 32
318                 increment[n >> 5] = 1 << (n & 0x1f);
319             }
320         }
321         return copyAndJump(skip, increment);
322     }
323 
324     /** {@inheritDoc} */
325     @Override
326     public Stream<ArbitrarilyJumpableUniformRandomProvider> jumps(double distance) {
327         IntJumpDistances.validateJump(distance, PERIOD);
328         // Decompose into an increment for the buffer position and counter
329         final int skip = getBufferPositionIncrement(distance);
330         final int[] increment = getCounterIncrement(distance);
331         return Stream.generate(() -> copyAndJump(skip, increment)).sequential();
332     }
333 
334     /**
335      * Gets the buffer position increment from the jump distance.
336      *
337      * @param distance Jump distance.
338      * @return the buffer position increment
339      */
340     private static int getBufferPositionIncrement(double distance) {
341         return distance < TWO_POW_54 ?
342             // 2 least significant digits from the integer representation
343             (int)((long) distance) & 0x3 :
344             0;
345     }
346 
347     /**
348      * Gets the counter increment from the jump distance.
349      *
350      * @param distance Jump distance.
351      * @return the counter increment
352      */
353     private static int[] getCounterIncrement(double distance) {
354         final int[] increment = new int[PHILOX_BUFFER_SIZE];
355         // The counter is incremented if the distance is above the buffer size
356         // (increment = distance / 4).
357         if (distance >= PHILOX_BUFFER_SIZE) {
358             IntJumpDistances.writeUnsignedInteger(distance * 0.25, increment);
359         }
360         return increment;
361     }
362 
363     /**
364      * Copy the generator and advance the internal state. The copy is returned.
365      *
366      * <p>This method: (1) assumes that the arguments have been validated;
367      * and (2) regenerates the output buffer if required.
368      *
369      * @param skip Amount to skip the buffer position in [0, 3].
370      * @param increment Unsigned 128-bit increment, least significant bits first.
371      * @return the copy
372      */
373     private ArbitrarilyJumpableUniformRandomProvider copyAndJump(int skip, int[] increment) {
374         final Philox4x32 copy = new Philox4x32(this);
375 
376         // Skip the buffer position forward.
377         // Assumes position is in [0, 4] and skip is less than 4.
378         // Handle rollover but allow position=4 to regenerate buffer on next output call.
379         bufferPosition += skip;
380         if (bufferPosition > PHILOX_BUFFER_SIZE) {
381             bufferPosition -= PHILOX_BUFFER_SIZE;
382             incrementCounter();
383         }
384 
385         // Increment the 128-bit counter.
386         // Addition using unsigned int as longs.
387         // Any overflow bit is carried to the next counter.
388         // Unrolled branchless loop for performance.
389         long r;
390         r = (counter0 & 0xffff_ffffL) + (increment[0] & 0xffff_ffffL);
391         counter0 = (int) r;
392         r = (counter1 & 0xffff_ffffL) + (increment[1] & 0xffff_ffffL) + (r >>> 32);
393         counter1 = (int) r;
394         r = (counter2 & 0xffff_ffffL) + (increment[2] & 0xffff_ffffL) + (r >>> 32);
395         counter2 = (int) r;
396         r = (counter3 & 0xffff_ffffL) + (increment[3] & 0xffff_ffffL) + (r >>> 32);
397         counter3 = (int) r;
398 
399         finishJump();
400         return copy;
401     }
402 
403     /**
404      * Finish the jump of this generator. Resets the cached state and regenerates
405      * the output buffer if required.
406      */
407     private void finishJump() {
408         resetCachedState();
409         // Regenerate the internal buffer only if the buffer position is
410         // within the output buffer. Otherwise regeneration is delayed until
411         // next output. This allows more efficient consecutive jumping when
412         // the buffer is due to be regenerated.
413         if (bufferPosition < PHILOX_BUFFER_SIZE) {
414             rand10();
415         }
416     }
417 }