LXMSupport.java

  1. /*
  2.  * Licensed to the Apache Software Foundation (ASF) under one or more
  3.  * contributor license agreements.  See the NOTICE file distributed with
  4.  * this work for additional information regarding copyright ownership.
  5.  * The ASF licenses this file to You under the Apache License, Version 2.0
  6.  * (the "License"); you may not use this file except in compliance with
  7.  * the License.  You may obtain a copy of the License at
  8.  *
  9.  *      http://www.apache.org/licenses/LICENSE-2.0
  10.  *
  11.  * Unless required by applicable law or agreed to in writing, software
  12.  * distributed under the License is distributed on an "AS IS" BASIS,
  13.  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14.  * See the License for the specific language governing permissions and
  15.  * limitations under the License.
  16.  */
  17. package org.apache.commons.rng.core.source64;

  18. /**
  19.  * Utility support for the LXM family of generators. The LXM family is described
  20.  * in further detail in:
  21.  *
  22.  * <blockquote>Steele and Vigna (2021) LXM: better splittable pseudorandom number generators
  23.  * (and almost as fast). Proceedings of the ACM on Programming Languages, Volume 5,
  24.  * Article 148, pp 1–31.</blockquote>
  25.  *
  26.  * <p>Contains methods to compute unsigned multiplication of 64-bit
  27.  * and 128-bit values to create 128-bit results for use in a 128-bit
  28.  * linear congruential generator (LCG). Constants are provided to advance the state
  29.  * of an LCG by a power of 2 in a single multiply operation to support jump
  30.  * operations.
  31.  *
  32.  * @see <a href="https://doi.org/10.1145/3485525">Steele &amp; Vigna (2021) Proc. ACM Programming
  33.  *      Languages 5, 1-31</a>
  34.  * @since 1.5
  35.  */
  36. final class LXMSupport {
  37.     /** 64-bit LCG multiplier. Note: (M % 8) = 5. */
  38.     static final long M64 = 0xd1342543de82ef95L;
  39.     /** Jump constant {@code m'} for an advance of the 64-bit LCG by 2^32.
  40.      * Computed as: {@code m' = m^(2^32) (mod 2^64)}. */
  41.     static final long M64P = 0x8d23804c00000001L;
  42.     /** Jump constant precursor for {@code c'} for an advance of the 64-bit LCG by 2^32.
  43.      * Computed as:
  44.      * <pre>
  45.      * product_{i=0}^{31} { M^(2^i) + 1 } (mod 2^64)
  46.      * </pre>
  47.      * <p>The jump is computed for the LCG with an update step of {@code s = m * s + c} as:
  48.      * <pre>
  49.      * s = m' * s + c' * c
  50.      * </pre>
  51.      */
  52.     static final long C64P = 0x16691c9700000000L;

  53.     /** Low half of 128-bit LCG multiplier. The upper half is {@code 1L}. */
  54.     static final long M128L = 0xd605bbb58c8abbfdL;
  55.     /** High half of the jump constant {@code m'} for an advance of the 128-bit LCG by 2^64.
  56.      * The low half is 1. Computed as: {@code m' = m^(2^64) (mod 2^128)}. */
  57.     static final long M128PH = 0x31f179f5224754f4L;
  58.     /** High half of the jump constant for an advance of the 128-bit LCG by 2^64.
  59.      * The low half is zero. Computed as:
  60.      * <pre>
  61.      * product_{i=0}^{63} { M^(2^i) + 1 } (mod 2^128)
  62.      * </pre>
  63.      * <p>The jump is computed for the LCG with an update step of {@code s = m * s + c} as:
  64.      * <pre>
  65.      * s = m' * s + c' * c
  66.      * </pre>
  67.      */
  68.     static final long C128PH = 0x61139b28883277c3L;
  69.     /**
  70.      * The fractional part of the golden ratio, phi, scaled to 64-bits and rounded to odd.
  71.      * <pre>
  72.      * phi = (sqrt(5) - 1) / 2) * 2^64
  73.      * </pre>
  74.      * @see <a href="https://en.wikipedia.org/wiki/Golden_ratio">Golden ratio</a>
  75.      */
  76.     static final long GOLDEN_RATIO_64 = 0x9e3779b97f4a7c15L;

  77.     /** A mask to convert an {@code int} to an unsigned integer stored as a {@code long}. */
  78.     private static final long INT_TO_UNSIGNED_BYTE_MASK = 0xffff_ffffL;

  79.     /** No instances. */
  80.     private LXMSupport() {}

  81.     /**
  82.      * Perform a 64-bit mixing function using Doug Lea's 64-bit mix constants and shifts.
  83.      *
  84.      * <p>This is based on the original 64-bit mix function of Austin Appleby's
  85.      * MurmurHash3 modified to use a single mix constant and 32-bit shifts, which may have
  86.      * a performance advantage on some processors. The code is provided in Steele and
  87.      * Vigna's paper.
  88.      *
  89.      * @param x the input value
  90.      * @return the output value
  91.      */
  92.     static long lea64(long x) {
  93.         x = (x ^ (x >>> 32)) * 0xdaba0b6eb09322e3L;
  94.         x = (x ^ (x >>> 32)) * 0xdaba0b6eb09322e3L;
  95.         return x ^ (x >>> 32);
  96.     }

  97.     /**
  98.      * Multiply the two values as if unsigned 64-bit longs to produce the high 64-bits
  99.      * of the 128-bit unsigned result.
  100.      *
  101.      * <p>This method computes the equivalent of:
  102.      * <pre>{@code
  103.      * Math.multiplyHigh(a, b) + ((a >> 63) & b) + ((b >> 63) & a)
  104.      * }</pre>
  105.      *
  106.      * <p>Note: The method {@code Math.multiplyHigh} was added in JDK 9
  107.      * and should be used as above when the source code targets Java 11
  108.      * to exploit the intrinsic method.
  109.      *
  110.      * <p>Note: The method {@code Math.unsignedMultiplyHigh} was added in JDK 18
  111.      * and should be used when the source code target allows.
  112.      *
  113.      * @param value1 the first value
  114.      * @param value2 the second value
  115.      * @return the high 64-bits of the 128-bit result
  116.      */
  117.     static long unsignedMultiplyHigh(long value1, long value2) {
  118.         // Computation is based on the following observation about the upper (a and x)
  119.         // and lower (b and y) bits of unsigned big-endian integers:
  120.         //   ab * xy
  121.         // =  b *  y
  122.         // +  b * x0
  123.         // + a0 *  y
  124.         // + a0 * x0
  125.         // = b * y
  126.         // + b * x * 2^32
  127.         // + a * y * 2^32
  128.         // + a * x * 2^64
  129.         //
  130.         // Summation using a character for each byte:
  131.         //
  132.         //             byby byby
  133.         // +      bxbx bxbx 0000
  134.         // +      ayay ayay 0000
  135.         // + axax axax 0000 0000
  136.         //
  137.         // The summation can be rearranged to ensure no overflow given
  138.         // that the result of two unsigned 32-bit integers multiplied together
  139.         // plus two full 32-bit integers cannot overflow 64 bits:
  140.         // > long x = (1L << 32) - 1
  141.         // > x * x + x + x == -1 (all bits set, no overflow)
  142.         //
  143.         // The carry is a composed intermediate which will never overflow:
  144.         //
  145.         //             byby byby
  146.         // +           bxbx 0000
  147.         // +      ayay ayay 0000
  148.         //
  149.         // +      bxbx 0000 0000
  150.         // + axax axax 0000 0000

  151.         final long a = value1 >>> 32;
  152.         final long b = value1 & INT_TO_UNSIGNED_BYTE_MASK;
  153.         final long x = value2 >>> 32;
  154.         final long y = value2 & INT_TO_UNSIGNED_BYTE_MASK;

  155.         final long by = b * y;
  156.         final long bx = b * x;
  157.         final long ay = a * y;
  158.         final long ax = a * x;

  159.         // Cannot overflow
  160.         final long carry = (by >>> 32) +
  161.                            (bx & INT_TO_UNSIGNED_BYTE_MASK) +
  162.                             ay;
  163.         // Note:
  164.         // low = (carry << 32) | (by & INT_TO_UNSIGNED_BYTE_MASK)
  165.         // Benchmarking shows outputting low to a long[] output argument
  166.         // has no benefit over computing 'low = value1 * value2' separately.

  167.         return (bx >>> 32) + (carry >>> 32) + ax;
  168.     }

  169.     /**
  170.      * Add the two values as if unsigned 64-bit longs to produce the high 64-bits
  171.      * of the 128-bit unsigned result.
  172.      *
  173.      * <h2>Warning</h2>
  174.      *
  175.      * <p>This method is computing a carry bit for a 128-bit linear congruential
  176.      * generator (LCG). The method is <em>not</em> applicable to all arguments.
  177.      * Some computations can be dropped if the {@code right} argument is assumed to
  178.      * be the LCG addition, which should be odd to ensure a full period LCG.
  179.      *
  180.      * @param left the left argument
  181.      * @param right the right argument (assumed to have the lowest bit set to 1)
  182.      * @return the carry (either 0 or 1)
  183.      */
  184.     static long unsignedAddHigh(long left, long right) {
  185.         // Method compiles to 13 bytes as Java byte code.
  186.         // This is below the default of 35 for code inlining.
  187.         //
  188.         // The unsigned add of left + right may have a 65-bit result.
  189.         // If both values are shifted right by 1 then the sum will be
  190.         // within a 64-bit long. The right is assumed to have a low
  191.         // bit of 1 which has been lost in the shift. The method must
  192.         // compute if a 1 was shifted off the left which would have
  193.         // triggered a carry when adding to the right's assumed 1.
  194.         // The intermediate 64-bit result is shifted
  195.         // 63 bits to obtain the most significant bit of the 65-bit result.
  196.         // Using -1 is the same as a shift of (64 - 1) as only the last 6 bits
  197.         // are used by the shift but requires 1 less byte in java byte code.
  198.         //
  199.         //    01100001      left
  200.         // +  10011111      right always has low bit set to 1
  201.         //
  202.         //    0110000   1   carry last bit of left
  203.         // +  1001111   |
  204.         // +        1 <-+
  205.         // = 10000000       carry bit generated
  206.         return ((left >>> 1) + (right >>> 1) + (left & 1)) >>> -1;
  207.     }
  208. }