blob: 540df88d604fffb90ce85a38578e1b1e1eabed74 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.rng.core.source64;
/**
* Utility support for the LXM family of generators. The LXM family is described
* in further detail in:
*
* <blockquote>Steele and Vigna (2021) LXM: better splittable pseudorandom number generators
* (and almost as fast). Proceedings of the ACM on Programming Languages, Volume 5,
* Article 148, pp 1–31.</blockquote>
*
* <p>Contains methods to compute unsigned multiplication of 64-bit
* and 128-bit values to create 128-bit results for use in a 128-bit
* linear congruential generator (LCG). Constants are provided to advance the state
* of an LCG by a power of 2 in a single multiply operation to support jump
* operations.
*
* @see <a href="https://doi.org/10.1145/3485525">Steele &amp; Vigna (2021) Proc. ACM Programming
* Languages 5, 1-31</a>
* @since 1.5
*/
final class LXMSupport {
/** 64-bit LCG multiplier. Note: (M % 8) = 5. */
static final long M64 = 0xd1342543de82ef95L;
/** Jump constant {@code m'} for an advance of the 64-bit LCG by 2^32.
* Computed as: {@code m' = m^(2^32) (mod 2^64)}. */
static final long M64P = 0x8d23804c00000001L;
/** Jump constant precursor for {@code c'} for an advance of the 64-bit LCG by 2^32.
* Computed as:
* <pre>
* product_{i=0}^{31} { M^(2^i) + 1 } (mod 2^64)
* </pre>
* <p>The jump is computed for the LCG with an update step of {@code s = m * s + c} as:
* <pre>
* s = m' * s + c' * c
* </pre>
*/
static final long C64P = 0x16691c9700000000L;
/** Low half of 128-bit LCG multiplier. The upper half is {@code 1L}. */
static final long M128L = 0xd605bbb58c8abbfdL;
/** High half of the jump constant {@code m'} for an advance of the 128-bit LCG by 2^64.
* The low half is 1. Computed as: {@code m' = m^(2^64) (mod 2^128)}. */
static final long M128PH = 0x31f179f5224754f4L;
/** High half of the jump constant for an advance of the 128-bit LCG by 2^64.
* The low half is zero. Computed as:
* <pre>
* product_{i=0}^{63} { M^(2^i) + 1 } (mod 2^128)
* </pre>
* <p>The jump is computed for the LCG with an update step of {@code s = m * s + c} as:
* <pre>
* s = m' * s + c' * c
* </pre>
*/
static final long C128PH = 0x61139b28883277c3L;
/** A mask to convert an {@code int} to an unsigned integer stored as a {@code long}. */
private static final long INT_TO_UNSIGNED_BYTE_MASK = 0xffff_ffffL;
/** No instances. */
private LXMSupport() {}
/**
* Perform a 64-bit mixing function using Doug Lea's 64-bit mix constants and shifts.
*
* <p>This is based on the original 64-bit mix function of Austin Appleby's
* MurmurHash3 modified to use a single mix constant and 32-bit shifts, which may have
* a performance advantage on some processors. The code is provided in Steele and
* Vigna's paper.
*
* @param x the input value
* @return the output value
*/
static long lea64(long x) {
x = (x ^ (x >>> 32)) * 0xdaba0b6eb09322e3L;
x = (x ^ (x >>> 32)) * 0xdaba0b6eb09322e3L;
return x ^ (x >>> 32);
}
/**
* Multiply the two values as if unsigned 64-bit longs to produce the high 64-bits
* of the 128-bit unsigned result.
*
* <p>This method computes the equivalent of:
* <pre>
* Math.multiplyHigh(a, b) + ((a >> 63) & b) + ((b >> 63) & a)
* </pre>
*
* <p>Note: The method {@code Math.multiplyHigh} was added in JDK 9
* and should be used as above when the source code targets Java 11
* to exploit the intrinsic method.
*
* <p>Note: The method {@code Math.unsignedMultiplyHigh} was added in JDK 18
* and should be used when the source code target allows.
*
* @param value1 the first value
* @param value2 the second value
* @return the high 64-bits of the 128-bit result
*/
static long unsignedMultiplyHigh(long value1, long value2) {
// Computation is based on the following observation about the upper (a and x)
// and lower (b and y) bits of unsigned big-endian integers:
// ab * xy
// = b * y
// + b * x0
// + a0 * y
// + a0 * x0
// = b * y
// + b * x * 2^32
// + a * y * 2^32
// + a * x * 2^64
//
// Summation using a character for each byte:
//
// byby byby
// + bxbx bxbx 0000
// + ayay ayay 0000
// + axax axax 0000 0000
//
// The summation can be rearranged to ensure no overflow given
// that the result of two unsigned 32-bit integers multiplied together
// plus two full 32-bit integers cannot overflow 64 bits:
// > long x = (1L << 32) - 1
// > x * x + x + x == -1 (all bits set, no overflow)
//
// The carry is a composed intermediate which will never overflow:
//
// byby byby
// + bxbx 0000
// + ayay ayay 0000
//
// + bxbx 0000 0000
// + axax axax 0000 0000
final long a = value1 >>> 32;
final long b = value1 & INT_TO_UNSIGNED_BYTE_MASK;
final long x = value2 >>> 32;
final long y = value2 & INT_TO_UNSIGNED_BYTE_MASK;
final long by = b * y;
final long bx = b * x;
final long ay = a * y;
final long ax = a * x;
// Cannot overflow
final long carry = (by >>> 32) +
(bx & INT_TO_UNSIGNED_BYTE_MASK) +
ay;
// Note:
// low = (carry << 32) | (by & INT_TO_UNSIGNED_BYTE_MASK)
// Benchmarking shows outputting low to a long[] output argument
// has no benefit over computing 'low = value1 * value2' separately.
return (bx >>> 32) + (carry >>> 32) + ax;
}
/**
* Add the two values as if unsigned 64-bit longs to produce the high 64-bits
* of the 128-bit unsigned result.
*
* <h2>Warning</h2>
*
* <p>This method is computing a carry bit for a 128-bit linear congruential
* generator (LCG). The method is <em>not</em> applicable to all arguments.
* Some computations can be dropped if the {@code right} argument is assumed to
* be the LCG addition, which should be odd to ensure a full period LCG.
*
* @param left the left argument
* @param right the right argument (assumed to have the lowest bit set to 1)
* @return the carry (either 0 or 1)
*/
static long unsignedAddHigh(long left, long right) {
// Method compiles to 13 bytes as Java byte code.
// This is below the default of 35 for code inlining.
//
// The unsigned add of left + right may have a 65-bit result.
// If both values are shifted right by 1 then the sum will be
// within a 64-bit long. The right is assumed to have a low
// bit of 1 which has been lost in the shift. The method must
// compute if a 1 was shifted off the left which would have
// triggered a carry when adding to the right's assumed 1.
// The intermediate 64-bit result is shifted
// 63 bits to obtain the most significant bit of the 65-bit result.
// Using -1 is the same as a shift of (64 - 1) as only the last 6 bits
// are used by the shift but requires 1 less byte in java byte code.
//
// 01100001 left
// + 10011111 right always has low bit set to 1
//
// 0110000 1 carry last bit of left
// + 1001111 |
// + 1 <-+
// = 10000000 carry bit generated
return ((left >>> 1) + (right >>> 1) + (left & 1)) >>> -1;
}
}