1 /* 2 * Licensed to the Apache Software Foundation (ASF) under one or more 3 * contributor license agreements. See the NOTICE file distributed with 4 * this work for additional information regarding copyright ownership. 5 * The ASF licenses this file to You under the Apache License, Version 2.0 6 * (the "License"); you may not use this file except in compliance with 7 * the License. You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 */ 17 package org.apache.commons.rng.core.source64; 18 19 /** 20 * Utility support for the LXM family of generators. The LXM family is described 21 * in further detail in: 22 * 23 * <blockquote>Steele and Vigna (2021) LXM: better splittable pseudorandom number generators 24 * (and almost as fast). Proceedings of the ACM on Programming Languages, Volume 5, 25 * Article 148, pp 1–31.</blockquote> 26 * 27 * <p>Contains methods to compute unsigned multiplication of 64-bit 28 * and 128-bit values to create 128-bit results for use in a 128-bit 29 * linear congruential generator (LCG). Constants are provided to advance the state 30 * of an LCG by a power of 2 in a single multiply operation to support jump 31 * operations. 32 * 33 * @see <a href="https://doi.org/10.1145/3485525">Steele & Vigna (2021) Proc. ACM Programming 34 * Languages 5, 1-31</a> 35 * @since 1.5 36 */ 37 final class LXMSupport { 38 /** 64-bit LCG multiplier. Note: (M % 8) = 5. */ 39 static final long M64 = 0xd1342543de82ef95L; 40 /** Jump constant {@code m'} for an advance of the 64-bit LCG by 2^32. 41 * Computed as: {@code m' = m^(2^32) (mod 2^64)}. */ 42 static final long M64P = 0x8d23804c00000001L; 43 /** Jump constant precursor for {@code c'} for an advance of the 64-bit LCG by 2^32. 44 * Computed as: 45 * <pre> 46 * product_{i=0}^{31} { M^(2^i) + 1 } (mod 2^64) 47 * </pre> 48 * <p>The jump is computed for the LCG with an update step of {@code s = m * s + c} as: 49 * <pre> 50 * s = m' * s + c' * c 51 * </pre> 52 */ 53 static final long C64P = 0x16691c9700000000L; 54 55 /** Low half of 128-bit LCG multiplier. The upper half is {@code 1L}. */ 56 static final long M128L = 0xd605bbb58c8abbfdL; 57 /** High half of the jump constant {@code m'} for an advance of the 128-bit LCG by 2^64. 58 * The low half is 1. Computed as: {@code m' = m^(2^64) (mod 2^128)}. */ 59 static final long M128PH = 0x31f179f5224754f4L; 60 /** High half of the jump constant for an advance of the 128-bit LCG by 2^64. 61 * The low half is zero. Computed as: 62 * <pre> 63 * product_{i=0}^{63} { M^(2^i) + 1 } (mod 2^128) 64 * </pre> 65 * <p>The jump is computed for the LCG with an update step of {@code s = m * s + c} as: 66 * <pre> 67 * s = m' * s + c' * c 68 * </pre> 69 */ 70 static final long C128PH = 0x61139b28883277c3L; 71 /** 72 * The fractional part of the golden ratio, phi, scaled to 64-bits and rounded to odd. 73 * <pre> 74 * phi = (sqrt(5) - 1) / 2) * 2^64 75 * </pre> 76 * @see <a href="https://en.wikipedia.org/wiki/Golden_ratio">Golden ratio</a> 77 */ 78 static final long GOLDEN_RATIO_64 = 0x9e3779b97f4a7c15L; 79 80 /** A mask to convert an {@code int} to an unsigned integer stored as a {@code long}. */ 81 private static final long INT_TO_UNSIGNED_BYTE_MASK = 0xffff_ffffL; 82 83 /** No instances. */ 84 private LXMSupport() {} 85 86 /** 87 * Perform a 64-bit mixing function using Doug Lea's 64-bit mix constants and shifts. 88 * 89 * <p>This is based on the original 64-bit mix function of Austin Appleby's 90 * MurmurHash3 modified to use a single mix constant and 32-bit shifts, which may have 91 * a performance advantage on some processors. The code is provided in Steele and 92 * Vigna's paper. 93 * 94 * @param x the input value 95 * @return the output value 96 */ 97 static long lea64(long x) { 98 x = (x ^ (x >>> 32)) * 0xdaba0b6eb09322e3L; 99 x = (x ^ (x >>> 32)) * 0xdaba0b6eb09322e3L; 100 return x ^ (x >>> 32); 101 } 102 103 /** 104 * Multiply the two values as if unsigned 64-bit longs to produce the high 64-bits 105 * of the 128-bit unsigned result. 106 * 107 * <p>This method computes the equivalent of: 108 * <pre> 109 * Math.multiplyHigh(a, b) + ((a >> 63) & b) + ((b >> 63) & a) 110 * </pre> 111 * 112 * <p>Note: The method {@code Math.multiplyHigh} was added in JDK 9 113 * and should be used as above when the source code targets Java 11 114 * to exploit the intrinsic method. 115 * 116 * <p>Note: The method {@code Math.unsignedMultiplyHigh} was added in JDK 18 117 * and should be used when the source code target allows. 118 * 119 * @param value1 the first value 120 * @param value2 the second value 121 * @return the high 64-bits of the 128-bit result 122 */ 123 static long unsignedMultiplyHigh(long value1, long value2) { 124 // Computation is based on the following observation about the upper (a and x) 125 // and lower (b and y) bits of unsigned big-endian integers: 126 // ab * xy 127 // = b * y 128 // + b * x0 129 // + a0 * y 130 // + a0 * x0 131 // = b * y 132 // + b * x * 2^32 133 // + a * y * 2^32 134 // + a * x * 2^64 135 // 136 // Summation using a character for each byte: 137 // 138 // byby byby 139 // + bxbx bxbx 0000 140 // + ayay ayay 0000 141 // + axax axax 0000 0000 142 // 143 // The summation can be rearranged to ensure no overflow given 144 // that the result of two unsigned 32-bit integers multiplied together 145 // plus two full 32-bit integers cannot overflow 64 bits: 146 // > long x = (1L << 32) - 1 147 // > x * x + x + x == -1 (all bits set, no overflow) 148 // 149 // The carry is a composed intermediate which will never overflow: 150 // 151 // byby byby 152 // + bxbx 0000 153 // + ayay ayay 0000 154 // 155 // + bxbx 0000 0000 156 // + axax axax 0000 0000 157 158 final long a = value1 >>> 32; 159 final long b = value1 & INT_TO_UNSIGNED_BYTE_MASK; 160 final long x = value2 >>> 32; 161 final long y = value2 & INT_TO_UNSIGNED_BYTE_MASK; 162 163 final long by = b * y; 164 final long bx = b * x; 165 final long ay = a * y; 166 final long ax = a * x; 167 168 // Cannot overflow 169 final long carry = (by >>> 32) + 170 (bx & INT_TO_UNSIGNED_BYTE_MASK) + 171 ay; 172 // Note: 173 // low = (carry << 32) | (by & INT_TO_UNSIGNED_BYTE_MASK) 174 // Benchmarking shows outputting low to a long[] output argument 175 // has no benefit over computing 'low = value1 * value2' separately. 176 177 return (bx >>> 32) + (carry >>> 32) + ax; 178 } 179 180 /** 181 * Add the two values as if unsigned 64-bit longs to produce the high 64-bits 182 * of the 128-bit unsigned result. 183 * 184 * <h2>Warning</h2> 185 * 186 * <p>This method is computing a carry bit for a 128-bit linear congruential 187 * generator (LCG). The method is <em>not</em> applicable to all arguments. 188 * Some computations can be dropped if the {@code right} argument is assumed to 189 * be the LCG addition, which should be odd to ensure a full period LCG. 190 * 191 * @param left the left argument 192 * @param right the right argument (assumed to have the lowest bit set to 1) 193 * @return the carry (either 0 or 1) 194 */ 195 static long unsignedAddHigh(long left, long right) { 196 // Method compiles to 13 bytes as Java byte code. 197 // This is below the default of 35 for code inlining. 198 // 199 // The unsigned add of left + right may have a 65-bit result. 200 // If both values are shifted right by 1 then the sum will be 201 // within a 64-bit long. The right is assumed to have a low 202 // bit of 1 which has been lost in the shift. The method must 203 // compute if a 1 was shifted off the left which would have 204 // triggered a carry when adding to the right's assumed 1. 205 // The intermediate 64-bit result is shifted 206 // 63 bits to obtain the most significant bit of the 65-bit result. 207 // Using -1 is the same as a shift of (64 - 1) as only the last 6 bits 208 // are used by the shift but requires 1 less byte in java byte code. 209 // 210 // 01100001 left 211 // + 10011111 right always has low bit set to 1 212 // 213 // 0110000 1 carry last bit of left 214 // + 1001111 | 215 // + 1 <-+ 216 // = 10000000 carry bit generated 217 return ((left >>> 1) + (right >>> 1) + (left & 1)) >>> -1; 218 } 219 }