View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.apache.commons.rng.core.source64;
18  
19  /**
20   * Utility support for the LXM family of generators. The LXM family is described
21   * in further detail in:
22   *
23   * <blockquote>Steele and Vigna (2021) LXM: better splittable pseudorandom number generators
24   * (and almost as fast). Proceedings of the ACM on Programming Languages, Volume 5,
25   * Article 148, pp 1–31.</blockquote>
26   *
27   * <p>Contains methods to compute unsigned multiplication of 64-bit
28   * and 128-bit values to create 128-bit results for use in a 128-bit
29   * linear congruential generator (LCG). Constants are provided to advance the state
30   * of an LCG by a power of 2 in a single multiply operation to support jump
31   * operations.
32   *
33   * @see <a href="https://doi.org/10.1145/3485525">Steele &amp; Vigna (2021) Proc. ACM Programming
34   *      Languages 5, 1-31</a>
35   * @since 1.5
36   */
37  final class LXMSupport {
38      /** 64-bit LCG multiplier. Note: (M % 8) = 5. */
39      static final long M64 = 0xd1342543de82ef95L;
40      /** Jump constant {@code m'} for an advance of the 64-bit LCG by 2^32.
41       * Computed as: {@code m' = m^(2^32) (mod 2^64)}. */
42      static final long M64P = 0x8d23804c00000001L;
43      /** Jump constant precursor for {@code c'} for an advance of the 64-bit LCG by 2^32.
44       * Computed as:
45       * <pre>
46       * product_{i=0}^{31} { M^(2^i) + 1 } (mod 2^64)
47       * </pre>
48       * <p>The jump is computed for the LCG with an update step of {@code s = m * s + c} as:
49       * <pre>
50       * s = m' * s + c' * c
51       * </pre>
52       */
53      static final long C64P = 0x16691c9700000000L;
54  
55      /** Low half of 128-bit LCG multiplier. The upper half is {@code 1L}. */
56      static final long M128L = 0xd605bbb58c8abbfdL;
57      /** High half of the jump constant {@code m'} for an advance of the 128-bit LCG by 2^64.
58       * The low half is 1. Computed as: {@code m' = m^(2^64) (mod 2^128)}. */
59      static final long M128PH = 0x31f179f5224754f4L;
60      /** High half of the jump constant for an advance of the 128-bit LCG by 2^64.
61       * The low half is zero. Computed as:
62       * <pre>
63       * product_{i=0}^{63} { M^(2^i) + 1 } (mod 2^128)
64       * </pre>
65       * <p>The jump is computed for the LCG with an update step of {@code s = m * s + c} as:
66       * <pre>
67       * s = m' * s + c' * c
68       * </pre>
69       */
70      static final long C128PH = 0x61139b28883277c3L;
71      /**
72       * The fractional part of the golden ratio, phi, scaled to 64-bits and rounded to odd.
73       * <pre>
74       * phi = (sqrt(5) - 1) / 2) * 2^64
75       * </pre>
76       * @see <a href="https://en.wikipedia.org/wiki/Golden_ratio">Golden ratio</a>
77       */
78      static final long GOLDEN_RATIO_64 = 0x9e3779b97f4a7c15L;
79  
80      /** A mask to convert an {@code int} to an unsigned integer stored as a {@code long}. */
81      private static final long INT_TO_UNSIGNED_BYTE_MASK = 0xffff_ffffL;
82  
83      /** No instances. */
84      private LXMSupport() {}
85  
86      /**
87       * Perform a 64-bit mixing function using Doug Lea's 64-bit mix constants and shifts.
88       *
89       * <p>This is based on the original 64-bit mix function of Austin Appleby's
90       * MurmurHash3 modified to use a single mix constant and 32-bit shifts, which may have
91       * a performance advantage on some processors. The code is provided in Steele and
92       * Vigna's paper.
93       *
94       * @param x the input value
95       * @return the output value
96       */
97      static long lea64(long x) {
98          x = (x ^ (x >>> 32)) * 0xdaba0b6eb09322e3L;
99          x = (x ^ (x >>> 32)) * 0xdaba0b6eb09322e3L;
100         return x ^ (x >>> 32);
101     }
102 
103     /**
104      * Multiply the two values as if unsigned 64-bit longs to produce the high 64-bits
105      * of the 128-bit unsigned result.
106      *
107      * <p>This method computes the equivalent of:
108      * <pre>
109      * Math.multiplyHigh(a, b) + ((a >> 63) & b) + ((b >> 63) & a)
110      * </pre>
111      *
112      * <p>Note: The method {@code Math.multiplyHigh} was added in JDK 9
113      * and should be used as above when the source code targets Java 11
114      * to exploit the intrinsic method.
115      *
116      * <p>Note: The method {@code Math.unsignedMultiplyHigh} was added in JDK 18
117      * and should be used when the source code target allows.
118      *
119      * @param value1 the first value
120      * @param value2 the second value
121      * @return the high 64-bits of the 128-bit result
122      */
123     static long unsignedMultiplyHigh(long value1, long value2) {
124         // Computation is based on the following observation about the upper (a and x)
125         // and lower (b and y) bits of unsigned big-endian integers:
126         //   ab * xy
127         // =  b *  y
128         // +  b * x0
129         // + a0 *  y
130         // + a0 * x0
131         // = b * y
132         // + b * x * 2^32
133         // + a * y * 2^32
134         // + a * x * 2^64
135         //
136         // Summation using a character for each byte:
137         //
138         //             byby byby
139         // +      bxbx bxbx 0000
140         // +      ayay ayay 0000
141         // + axax axax 0000 0000
142         //
143         // The summation can be rearranged to ensure no overflow given
144         // that the result of two unsigned 32-bit integers multiplied together
145         // plus two full 32-bit integers cannot overflow 64 bits:
146         // > long x = (1L << 32) - 1
147         // > x * x + x + x == -1 (all bits set, no overflow)
148         //
149         // The carry is a composed intermediate which will never overflow:
150         //
151         //             byby byby
152         // +           bxbx 0000
153         // +      ayay ayay 0000
154         //
155         // +      bxbx 0000 0000
156         // + axax axax 0000 0000
157 
158         final long a = value1 >>> 32;
159         final long b = value1 & INT_TO_UNSIGNED_BYTE_MASK;
160         final long x = value2 >>> 32;
161         final long y = value2 & INT_TO_UNSIGNED_BYTE_MASK;
162 
163         final long by = b * y;
164         final long bx = b * x;
165         final long ay = a * y;
166         final long ax = a * x;
167 
168         // Cannot overflow
169         final long carry = (by >>> 32) +
170                            (bx & INT_TO_UNSIGNED_BYTE_MASK) +
171                             ay;
172         // Note:
173         // low = (carry << 32) | (by & INT_TO_UNSIGNED_BYTE_MASK)
174         // Benchmarking shows outputting low to a long[] output argument
175         // has no benefit over computing 'low = value1 * value2' separately.
176 
177         return (bx >>> 32) + (carry >>> 32) + ax;
178     }
179 
180     /**
181      * Add the two values as if unsigned 64-bit longs to produce the high 64-bits
182      * of the 128-bit unsigned result.
183      *
184      * <h2>Warning</h2>
185      *
186      * <p>This method is computing a carry bit for a 128-bit linear congruential
187      * generator (LCG). The method is <em>not</em> applicable to all arguments.
188      * Some computations can be dropped if the {@code right} argument is assumed to
189      * be the LCG addition, which should be odd to ensure a full period LCG.
190      *
191      * @param left the left argument
192      * @param right the right argument (assumed to have the lowest bit set to 1)
193      * @return the carry (either 0 or 1)
194      */
195     static long unsignedAddHigh(long left, long right) {
196         // Method compiles to 13 bytes as Java byte code.
197         // This is below the default of 35 for code inlining.
198         //
199         // The unsigned add of left + right may have a 65-bit result.
200         // If both values are shifted right by 1 then the sum will be
201         // within a 64-bit long. The right is assumed to have a low
202         // bit of 1 which has been lost in the shift. The method must
203         // compute if a 1 was shifted off the left which would have
204         // triggered a carry when adding to the right's assumed 1.
205         // The intermediate 64-bit result is shifted
206         // 63 bits to obtain the most significant bit of the 65-bit result.
207         // Using -1 is the same as a shift of (64 - 1) as only the last 6 bits
208         // are used by the shift but requires 1 less byte in java byte code.
209         //
210         //    01100001      left
211         // +  10011111      right always has low bit set to 1
212         //
213         //    0110000   1   carry last bit of left
214         // +  1001111   |
215         // +        1 <-+
216         // = 10000000       carry bit generated
217         return ((left >>> 1) + (right >>> 1) + (left & 1)) >>> -1;
218     }
219 }