1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17 package org.apache.commons.rng.core.source64;
18
19 /**
20 * Utility support for the LXM family of generators. The LXM family is described
21 * in further detail in:
22 *
23 * <blockquote>Steele and Vigna (2021) LXM: better splittable pseudorandom number generators
24 * (and almost as fast). Proceedings of the ACM on Programming Languages, Volume 5,
25 * Article 148, pp 1–31.</blockquote>
26 *
27 * <p>Contains methods to compute unsigned multiplication of 64-bit
28 * and 128-bit values to create 128-bit results for use in a 128-bit
29 * linear congruential generator (LCG). Constants are provided to advance the state
30 * of an LCG by a power of 2 in a single multiply operation to support jump
31 * operations.
32 *
33 * @see <a href="https://doi.org/10.1145/3485525">Steele & Vigna (2021) Proc. ACM Programming
34 * Languages 5, 1-31</a>
35 * @since 1.5
36 */
37 final class LXMSupport {
38 /** 64-bit LCG multiplier. Note: (M % 8) = 5. */
39 static final long M64 = 0xd1342543de82ef95L;
40 /** Jump constant {@code m'} for an advance of the 64-bit LCG by 2^32.
41 * Computed as: {@code m' = m^(2^32) (mod 2^64)}. */
42 static final long M64P = 0x8d23804c00000001L;
43 /** Jump constant precursor for {@code c'} for an advance of the 64-bit LCG by 2^32.
44 * Computed as:
45 * <pre>
46 * product_{i=0}^{31} { M^(2^i) + 1 } (mod 2^64)
47 * </pre>
48 * <p>The jump is computed for the LCG with an update step of {@code s = m * s + c} as:
49 * <pre>
50 * s = m' * s + c' * c
51 * </pre>
52 */
53 static final long C64P = 0x16691c9700000000L;
54
55 /** Low half of 128-bit LCG multiplier. The upper half is {@code 1L}. */
56 static final long M128L = 0xd605bbb58c8abbfdL;
57 /** High half of the jump constant {@code m'} for an advance of the 128-bit LCG by 2^64.
58 * The low half is 1. Computed as: {@code m' = m^(2^64) (mod 2^128)}. */
59 static final long M128PH = 0x31f179f5224754f4L;
60 /** High half of the jump constant for an advance of the 128-bit LCG by 2^64.
61 * The low half is zero. Computed as:
62 * <pre>
63 * product_{i=0}^{63} { M^(2^i) + 1 } (mod 2^128)
64 * </pre>
65 * <p>The jump is computed for the LCG with an update step of {@code s = m * s + c} as:
66 * <pre>
67 * s = m' * s + c' * c
68 * </pre>
69 */
70 static final long C128PH = 0x61139b28883277c3L;
71 /**
72 * The fractional part of the golden ratio, phi, scaled to 64-bits and rounded to odd.
73 * <pre>
74 * phi = (sqrt(5) - 1) / 2) * 2^64
75 * </pre>
76 * @see <a href="https://en.wikipedia.org/wiki/Golden_ratio">Golden ratio</a>
77 */
78 static final long GOLDEN_RATIO_64 = 0x9e3779b97f4a7c15L;
79
80 /** A mask to convert an {@code int} to an unsigned integer stored as a {@code long}. */
81 private static final long INT_TO_UNSIGNED_BYTE_MASK = 0xffff_ffffL;
82
83 /** No instances. */
84 private LXMSupport() {}
85
86 /**
87 * Perform a 64-bit mixing function using Doug Lea's 64-bit mix constants and shifts.
88 *
89 * <p>This is based on the original 64-bit mix function of Austin Appleby's
90 * MurmurHash3 modified to use a single mix constant and 32-bit shifts, which may have
91 * a performance advantage on some processors. The code is provided in Steele and
92 * Vigna's paper.
93 *
94 * @param x the input value
95 * @return the output value
96 */
97 static long lea64(long x) {
98 x = (x ^ (x >>> 32)) * 0xdaba0b6eb09322e3L;
99 x = (x ^ (x >>> 32)) * 0xdaba0b6eb09322e3L;
100 return x ^ (x >>> 32);
101 }
102
103 /**
104 * Multiply the two values as if unsigned 64-bit longs to produce the high 64-bits
105 * of the 128-bit unsigned result.
106 *
107 * <p>This method computes the equivalent of:
108 * <pre>{@code
109 * Math.multiplyHigh(a, b) + ((a >> 63) & b) + ((b >> 63) & a)
110 * }</pre>
111 *
112 * <p>Note: The method {@code Math.multiplyHigh} was added in JDK 9
113 * and should be used as above when the source code targets Java 11
114 * to exploit the intrinsic method.
115 *
116 * <p>Note: The method {@code Math.unsignedMultiplyHigh} was added in JDK 18
117 * and should be used when the source code target allows.
118 *
119 * @param value1 the first value
120 * @param value2 the second value
121 * @return the high 64-bits of the 128-bit result
122 */
123 static long unsignedMultiplyHigh(long value1, long value2) {
124 // Computation is based on the following observation about the upper (a and x)
125 // and lower (b and y) bits of unsigned big-endian integers:
126 // ab * xy
127 // = b * y
128 // + b * x0
129 // + a0 * y
130 // + a0 * x0
131 // = b * y
132 // + b * x * 2^32
133 // + a * y * 2^32
134 // + a * x * 2^64
135 //
136 // Summation using a character for each byte:
137 //
138 // byby byby
139 // + bxbx bxbx 0000
140 // + ayay ayay 0000
141 // + axax axax 0000 0000
142 //
143 // The summation can be rearranged to ensure no overflow given
144 // that the result of two unsigned 32-bit integers multiplied together
145 // plus two full 32-bit integers cannot overflow 64 bits:
146 // > long x = (1L << 32) - 1
147 // > x * x + x + x == -1 (all bits set, no overflow)
148 //
149 // The carry is a composed intermediate which will never overflow:
150 //
151 // byby byby
152 // + bxbx 0000
153 // + ayay ayay 0000
154 //
155 // + bxbx 0000 0000
156 // + axax axax 0000 0000
157
158 final long a = value1 >>> 32;
159 final long b = value1 & INT_TO_UNSIGNED_BYTE_MASK;
160 final long x = value2 >>> 32;
161 final long y = value2 & INT_TO_UNSIGNED_BYTE_MASK;
162
163 final long by = b * y;
164 final long bx = b * x;
165 final long ay = a * y;
166 final long ax = a * x;
167
168 // Cannot overflow
169 final long carry = (by >>> 32) +
170 (bx & INT_TO_UNSIGNED_BYTE_MASK) +
171 ay;
172 // Note:
173 // low = (carry << 32) | (by & INT_TO_UNSIGNED_BYTE_MASK)
174 // Benchmarking shows outputting low to a long[] output argument
175 // has no benefit over computing 'low = value1 * value2' separately.
176
177 return (bx >>> 32) + (carry >>> 32) + ax;
178 }
179
180 /**
181 * Add the two values as if unsigned 64-bit longs to produce the high 64-bits
182 * of the 128-bit unsigned result.
183 *
184 * <h2>Warning</h2>
185 *
186 * <p>This method is computing a carry bit for a 128-bit linear congruential
187 * generator (LCG). The method is <em>not</em> applicable to all arguments.
188 * Some computations can be dropped if the {@code right} argument is assumed to
189 * be the LCG addition, which should be odd to ensure a full period LCG.
190 *
191 * @param left the left argument
192 * @param right the right argument (assumed to have the lowest bit set to 1)
193 * @return the carry (either 0 or 1)
194 */
195 static long unsignedAddHigh(long left, long right) {
196 // Method compiles to 13 bytes as Java byte code.
197 // This is below the default of 35 for code inlining.
198 //
199 // The unsigned add of left + right may have a 65-bit result.
200 // If both values are shifted right by 1 then the sum will be
201 // within a 64-bit long. The right is assumed to have a low
202 // bit of 1 which has been lost in the shift. The method must
203 // compute if a 1 was shifted off the left which would have
204 // triggered a carry when adding to the right's assumed 1.
205 // The intermediate 64-bit result is shifted
206 // 63 bits to obtain the most significant bit of the 65-bit result.
207 // Using -1 is the same as a shift of (64 - 1) as only the last 6 bits
208 // are used by the shift but requires 1 less byte in java byte code.
209 //
210 // 01100001 left
211 // + 10011111 right always has low bit set to 1
212 //
213 // 0110000 1 carry last bit of left
214 // + 1001111 |
215 // + 1 <-+
216 // = 10000000 carry bit generated
217 return ((left >>> 1) + (right >>> 1) + (left & 1)) >>> -1;
218 }
219 }