| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| package org.apache.commons.rng.core.source64; |
| |
| /** |
| * Utility support for the LXM family of generators. The LXM family is described |
| * in further detail in: |
| * |
| * <blockquote>Steele and Vigna (2021) LXM: better splittable pseudorandom number generators |
| * (and almost as fast). Proceedings of the ACM on Programming Languages, Volume 5, |
| * Article 148, pp 1–31.</blockquote> |
| * |
| * <p>Contains methods to compute unsigned multiplication of 64-bit |
| * and 128-bit values to create 128-bit results for use in a 128-bit |
| * linear congruential generator (LCG). Constants are provided to advance the state |
| * of an LCG by a power of 2 in a single multiply operation to support jump |
| * operations. |
| * |
| * @see <a href="https://doi.org/10.1145/3485525">Steele & Vigna (2021) Proc. ACM Programming |
| * Languages 5, 1-31</a> |
| * @since 1.5 |
| */ |
| final class LXMSupport { |
| /** 64-bit LCG multiplier. Note: (M % 8) = 5. */ |
| static final long M64 = 0xd1342543de82ef95L; |
| /** Jump constant {@code m'} for an advance of the 64-bit LCG by 2^32. |
| * Computed as: {@code m' = m^(2^32) (mod 2^64)}. */ |
| static final long M64P = 0x8d23804c00000001L; |
| /** Jump constant precursor for {@code c'} for an advance of the 64-bit LCG by 2^32. |
| * Computed as: |
| * <pre> |
| * product_{i=0}^{31} { M^(2^i) + 1 } (mod 2^64) |
| * </pre> |
| * <p>The jump is computed for the LCG with an update step of {@code s = m * s + c} as: |
| * <pre> |
| * s = m' * s + c' * c |
| * </pre> |
| */ |
| static final long C64P = 0x16691c9700000000L; |
| |
| /** Low half of 128-bit LCG multiplier. The upper half is {@code 1L}. */ |
| static final long M128L = 0xd605bbb58c8abbfdL; |
| /** High half of the jump constant {@code m'} for an advance of the 128-bit LCG by 2^64. |
| * The low half is 1. Computed as: {@code m' = m^(2^64) (mod 2^128)}. */ |
| static final long M128PH = 0x31f179f5224754f4L; |
| /** High half of the jump constant for an advance of the 128-bit LCG by 2^64. |
| * The low half is zero. Computed as: |
| * <pre> |
| * product_{i=0}^{63} { M^(2^i) + 1 } (mod 2^128) |
| * </pre> |
| * <p>The jump is computed for the LCG with an update step of {@code s = m * s + c} as: |
| * <pre> |
| * s = m' * s + c' * c |
| * </pre> |
| */ |
| static final long C128PH = 0x61139b28883277c3L; |
| |
| /** A mask to convert an {@code int} to an unsigned integer stored as a {@code long}. */ |
| private static final long INT_TO_UNSIGNED_BYTE_MASK = 0xffff_ffffL; |
| |
| /** No instances. */ |
| private LXMSupport() {} |
| |
| /** |
| * Perform a 64-bit mixing function using Doug Lea's 64-bit mix constants and shifts. |
| * |
| * <p>This is based on the original 64-bit mix function of Austin Appleby's |
| * MurmurHash3 modified to use a single mix constant and 32-bit shifts, which may have |
| * a performance advantage on some processors. The code is provided in Steele and |
| * Vigna's paper. |
| * |
| * @param x the input value |
| * @return the output value |
| */ |
| static long lea64(long x) { |
| x = (x ^ (x >>> 32)) * 0xdaba0b6eb09322e3L; |
| x = (x ^ (x >>> 32)) * 0xdaba0b6eb09322e3L; |
| return x ^ (x >>> 32); |
| } |
| |
| /** |
| * Multiply the two values as if unsigned 64-bit longs to produce the high 64-bits |
| * of the 128-bit unsigned result. |
| * |
| * <p>This method computes the equivalent of: |
| * <pre> |
| * Math.multiplyHigh(a, b) + ((a >> 63) & b) + ((b >> 63) & a) |
| * </pre> |
| * |
| * <p>Note: The method {@code Math.multiplyHigh} was added in JDK 9 |
| * and should be used as above when the source code targets Java 11 |
| * to exploit the intrinsic method. |
| * |
| * <p>Note: The method {@code Math.unsignedMultiplyHigh} was added in JDK 18 |
| * and should be used when the source code target allows. |
| * |
| * @param value1 the first value |
| * @param value2 the second value |
| * @return the high 64-bits of the 128-bit result |
| */ |
| static long unsignedMultiplyHigh(long value1, long value2) { |
| // Computation is based on the following observation about the upper (a and x) |
| // and lower (b and y) bits of unsigned big-endian integers: |
| // ab * xy |
| // = b * y |
| // + b * x0 |
| // + a0 * y |
| // + a0 * x0 |
| // = b * y |
| // + b * x * 2^32 |
| // + a * y * 2^32 |
| // + a * x * 2^64 |
| // |
| // Summation using a character for each byte: |
| // |
| // byby byby |
| // + bxbx bxbx 0000 |
| // + ayay ayay 0000 |
| // + axax axax 0000 0000 |
| // |
| // The summation can be rearranged to ensure no overflow given |
| // that the result of two unsigned 32-bit integers multiplied together |
| // plus two full 32-bit integers cannot overflow 64 bits: |
| // > long x = (1L << 32) - 1 |
| // > x * x + x + x == -1 (all bits set, no overflow) |
| // |
| // The carry is a composed intermediate which will never overflow: |
| // |
| // byby byby |
| // + bxbx 0000 |
| // + ayay ayay 0000 |
| // |
| // + bxbx 0000 0000 |
| // + axax axax 0000 0000 |
| |
| final long a = value1 >>> 32; |
| final long b = value1 & INT_TO_UNSIGNED_BYTE_MASK; |
| final long x = value2 >>> 32; |
| final long y = value2 & INT_TO_UNSIGNED_BYTE_MASK; |
| |
| final long by = b * y; |
| final long bx = b * x; |
| final long ay = a * y; |
| final long ax = a * x; |
| |
| // Cannot overflow |
| final long carry = (by >>> 32) + |
| (bx & INT_TO_UNSIGNED_BYTE_MASK) + |
| ay; |
| // Note: |
| // low = (carry << 32) | (by & INT_TO_UNSIGNED_BYTE_MASK) |
| // Benchmarking shows outputting low to a long[] output argument |
| // has no benefit over computing 'low = value1 * value2' separately. |
| |
| return (bx >>> 32) + (carry >>> 32) + ax; |
| } |
| |
| /** |
| * Add the two values as if unsigned 64-bit longs to produce the high 64-bits |
| * of the 128-bit unsigned result. |
| * |
| * <h2>Warning</h2> |
| * |
| * <p>This method is computing a carry bit for a 128-bit linear congruential |
| * generator (LCG). The method is <em>not</em> applicable to all arguments. |
| * Some computations can be dropped if the {@code right} argument is assumed to |
| * be the LCG addition, which should be odd to ensure a full period LCG. |
| * |
| * @param left the left argument |
| * @param right the right argument (assumed to have the lowest bit set to 1) |
| * @return the carry (either 0 or 1) |
| */ |
| static long unsignedAddHigh(long left, long right) { |
| // Method compiles to 13 bytes as Java byte code. |
| // This is below the default of 35 for code inlining. |
| // |
| // The unsigned add of left + right may have a 65-bit result. |
| // If both values are shifted right by 1 then the sum will be |
| // within a 64-bit long. The right is assumed to have a low |
| // bit of 1 which has been lost in the shift. The method must |
| // compute if a 1 was shifted off the left which would have |
| // triggered a carry when adding to the right's assumed 1. |
| // The intermediate 64-bit result is shifted |
| // 63 bits to obtain the most significant bit of the 65-bit result. |
| // Using -1 is the same as a shift of (64 - 1) as only the last 6 bits |
| // are used by the shift but requires 1 less byte in java byte code. |
| // |
| // 01100001 left |
| // + 10011111 right always has low bit set to 1 |
| // |
| // 0110000 1 carry last bit of left |
| // + 1001111 | |
| // + 1 <-+ |
| // = 10000000 carry bit generated |
| return ((left >>> 1) + (right >>> 1) + (left & 1)) >>> -1; |
| } |
| } |