commons-rng-core/src/main/java/org/apache/commons/rng/core/source64/LXMSupport.java - commons-rng - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
  * The ASF licenses this file to You under the Apache License, Version 2.0
  * (the "License"); you may not use this file except in compliance with
  * the License.  You may obtain a copy of the License at
  *
  *      http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
 package org.apache.commons.rng.core.source64;

 /**
  * Utility support for the LXM family of generators. The LXM family is described
  * in further detail in:
  *
  * <blockquote>Steele and Vigna (2021) LXM: better splittable pseudorandom number generators
  * (and almost as fast). Proceedings of the ACM on Programming Languages, Volume 5,
  * Article 148, pp 1–31.</blockquote>
  *
  * <p>Contains methods to compute unsigned multiplication of 64-bit
  * and 128-bit values to create 128-bit results for use in a 128-bit
  * linear congruential generator (LCG). Constants are provided to advance the state
  * of an LCG by a power of 2 in a single multiply operation to support jump
  * operations.
  *
  * @see <a href="https://doi.org/10.1145/3485525">Steele &amp; Vigna (2021) Proc. ACM Programming
  *      Languages 5, 1-31</a>
  * @since 1.5
  */
 final class LXMSupport {
     /** 64-bit LCG multiplier. Note: (M % 8) = 5. */
     static final long M64 = 0xd1342543de82ef95L;
     /** Jump constant {@code m'} for an advance of the 64-bit LCG by 2^32.
      * Computed as: {@code m' = m^(2^32) (mod 2^64)}. */
     static final long M64P = 0x8d23804c00000001L;
     /** Jump constant precursor for {@code c'} for an advance of the 64-bit LCG by 2^32.
      * Computed as:
      * <pre>
      * product_{i=0}^{31} { M^(2^i) + 1 } (mod 2^64)
      * </pre>
      * <p>The jump is computed for the LCG with an update step of {@code s = m * s + c} as:
      * <pre>
      * s = m' * s + c' * c
      * </pre>
      */
     static final long C64P = 0x16691c9700000000L;

     /** Low half of 128-bit LCG multiplier. The upper half is {@code 1L}. */
     static final long M128L = 0xd605bbb58c8abbfdL;
     /** High half of the jump constant {@code m'} for an advance of the 128-bit LCG by 2^64.
      * The low half is 1. Computed as: {@code m' = m^(2^64) (mod 2^128)}. */
     static final long M128PH = 0x31f179f5224754f4L;
     /** High half of the jump constant for an advance of the 128-bit LCG by 2^64.
      * The low half is zero. Computed as:
      * <pre>
      * product_{i=0}^{63} { M^(2^i) + 1 } (mod 2^128)
      * </pre>
      * <p>The jump is computed for the LCG with an update step of {@code s = m * s + c} as:
      * <pre>
      * s = m' * s + c' * c
      * </pre>
      */
     static final long C128PH = 0x61139b28883277c3L;

     /** A mask to convert an {@code int} to an unsigned integer stored as a {@code long}. */
     private static final long INT_TO_UNSIGNED_BYTE_MASK = 0xffff_ffffL;

     /** No instances. */
     private LXMSupport() {}

     /**
      * Perform a 64-bit mixing function using Doug Lea's 64-bit mix constants and shifts.
      *
      * <p>This is based on the original 64-bit mix function of Austin Appleby's
      * MurmurHash3 modified to use a single mix constant and 32-bit shifts, which may have
      * a performance advantage on some processors. The code is provided in Steele and
      * Vigna's paper.
      *
      * @param x the input value
      * @return the output value
      */
     static long lea64(long x) {
         x = (x ^ (x >>> 32)) * 0xdaba0b6eb09322e3L;
         x = (x ^ (x >>> 32)) * 0xdaba0b6eb09322e3L;
         return x ^ (x >>> 32);
     }

     /**
      * Multiply the two values as if unsigned 64-bit longs to produce the high 64-bits
      * of the 128-bit unsigned result.
      *
      * <p>This method computes the equivalent of:
      * <pre>
      * Math.multiplyHigh(a, b) + ((a >> 63) & b) + ((b >> 63) & a)
      * </pre>
      *
      * <p>Note: The method {@code Math.multiplyHigh} was added in JDK 9
      * and should be used as above when the source code targets Java 11
      * to exploit the intrinsic method.
      *
      * <p>Note: The method {@code Math.unsignedMultiplyHigh} was added in JDK 18
      * and should be used when the source code target allows.
      *
      * @param value1 the first value
      * @param value2 the second value
      * @return the high 64-bits of the 128-bit result
      */
     static long unsignedMultiplyHigh(long value1, long value2) {
         // Computation is based on the following observation about the upper (a and x)
         // and lower (b and y) bits of unsigned big-endian integers:
         //   ab * xy
         // =  b *  y
         // +  b * x0
         // + a0 *  y
         // + a0 * x0
         // = b * y
         // + b * x * 2^32
         // + a * y * 2^32
         // + a * x * 2^64
         //
         // Summation using a character for each byte:
         //
         //             byby byby
         // +      bxbx bxbx 0000
         // +      ayay ayay 0000
         // + axax axax 0000 0000
         //
         // The summation can be rearranged to ensure no overflow given
         // that the result of two unsigned 32-bit integers multiplied together
         // plus two full 32-bit integers cannot overflow 64 bits:
         // > long x = (1L << 32) - 1
         // > x * x + x + x == -1 (all bits set, no overflow)
         //
         // The carry is a composed intermediate which will never overflow:
         //
         //             byby byby
         // +           bxbx 0000
         // +      ayay ayay 0000
         //
         // +      bxbx 0000 0000
         // + axax axax 0000 0000

         final long a = value1 >>> 32;
         final long b = value1 & INT_TO_UNSIGNED_BYTE_MASK;
         final long x = value2 >>> 32;
         final long y = value2 & INT_TO_UNSIGNED_BYTE_MASK;

         final long by = b * y;
         final long bx = b * x;
         final long ay = a * y;
         final long ax = a * x;

         // Cannot overflow
         final long carry = (by >>> 32) +
                            (bx & INT_TO_UNSIGNED_BYTE_MASK) +
                             ay;
         // Note:
         // low = (carry << 32) | (by & INT_TO_UNSIGNED_BYTE_MASK)
         // Benchmarking shows outputting low to a long[] output argument
         // has no benefit over computing 'low = value1 * value2' separately.

         return (bx >>> 32) + (carry >>> 32) + ax;
     }

     /**
      * Add the two values as if unsigned 64-bit longs to produce the high 64-bits
      * of the 128-bit unsigned result.
      *
      * <h2>Warning</h2>
      *
      * <p>This method is computing a carry bit for a 128-bit linear congruential
      * generator (LCG). The method is <em>not</em> applicable to all arguments.
      * Some computations can be dropped if the {@code right} argument is assumed to
      * be the LCG addition, which should be odd to ensure a full period LCG.
      *
      * @param left the left argument
      * @param right the right argument (assumed to have the lowest bit set to 1)
      * @return the carry (either 0 or 1)
      */
     static long unsignedAddHigh(long left, long right) {
         // Method compiles to 13 bytes as Java byte code.
         // This is below the default of 35 for code inlining.
         //
         // The unsigned add of left + right may have a 65-bit result.
         // If both values are shifted right by 1 then the sum will be
         // within a 64-bit long. The right is assumed to have a low
         // bit of 1 which has been lost in the shift. The method must
         // compute if a 1 was shifted off the left which would have
         // triggered a carry when adding to the right's assumed 1.
         // The intermediate 64-bit result is shifted
         // 63 bits to obtain the most significant bit of the 65-bit result.
         // Using -1 is the same as a shift of (64 - 1) as only the last 6 bits
         // are used by the shift but requires 1 less byte in java byte code.
         //
         //    01100001      left
         // +  10011111      right always has low bit set to 1
         //
         //    0110000   1   carry last bit of left
         // +  1001111   |
         // +        1 <-+
         // = 10000000       carry bit generated
         return ((left >>> 1) + (right >>> 1) + (left & 1)) >>> -1;
     }
 }
	/*
	* Licensed to the Apache Software Foundation (ASF) under one or more
	* contributor license agreements. See the NOTICE file distributed with
	* this work for additional information regarding copyright ownership.
	* The ASF licenses this file to You under the Apache License, Version 2.0
	* (the "License"); you may not use this file except in compliance with
	* the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/
	package org.apache.commons.rng.core.source64;

	/**
	* Utility support for the LXM family of generators. The LXM family is described
	* in further detail in:
	*
	* <blockquote>Steele and Vigna (2021) LXM: better splittable pseudorandom number generators
	* (and almost as fast). Proceedings of the ACM on Programming Languages, Volume 5,
	* Article 148, pp 1–31.</blockquote>
	*
	* <p>Contains methods to compute unsigned multiplication of 64-bit
	* and 128-bit values to create 128-bit results for use in a 128-bit
	* linear congruential generator (LCG). Constants are provided to advance the state
	* of an LCG by a power of 2 in a single multiply operation to support jump
	* operations.
	*
	* @see <a href="https://doi.org/10.1145/3485525">Steele & Vigna (2021) Proc. ACM Programming
	* Languages 5, 1-31</a>
	* @since 1.5
	*/
	final class LXMSupport {
	/** 64-bit LCG multiplier. Note: (M % 8) = 5. */
	static final long M64 = 0xd1342543de82ef95L;
	/** Jump constant {@code m'} for an advance of the 64-bit LCG by 2^32.
	* Computed as: {@code m' = m^(2^32) (mod 2^64)}. */
	static final long M64P = 0x8d23804c00000001L;
	/** Jump constant precursor for {@code c'} for an advance of the 64-bit LCG by 2^32.
	* Computed as:
	* <pre>
	* product_{i=0}^{31} { M^(2^i) + 1 } (mod 2^64)
	* </pre>
	* <p>The jump is computed for the LCG with an update step of {@code s = m * s + c} as:
	* <pre>
	* s = m' * s + c' * c
	* </pre>
	*/
	static final long C64P = 0x16691c9700000000L;

	/** Low half of 128-bit LCG multiplier. The upper half is {@code 1L}. */
	static final long M128L = 0xd605bbb58c8abbfdL;
	/** High half of the jump constant {@code m'} for an advance of the 128-bit LCG by 2^64.
	* The low half is 1. Computed as: {@code m' = m^(2^64) (mod 2^128)}. */
	static final long M128PH = 0x31f179f5224754f4L;
	/** High half of the jump constant for an advance of the 128-bit LCG by 2^64.
	* The low half is zero. Computed as:
	* <pre>
	* product_{i=0}^{63} { M^(2^i) + 1 } (mod 2^128)
	* </pre>
	* <p>The jump is computed for the LCG with an update step of {@code s = m * s + c} as:
	* <pre>
	* s = m' * s + c' * c
	* </pre>
	*/
	static final long C128PH = 0x61139b28883277c3L;

	/** A mask to convert an {@code int} to an unsigned integer stored as a {@code long}. */
	private static final long INT_TO_UNSIGNED_BYTE_MASK = 0xffff_ffffL;

	/** No instances. */
	private LXMSupport() {}

	/**
	* Perform a 64-bit mixing function using Doug Lea's 64-bit mix constants and shifts.
	*
	* <p>This is based on the original 64-bit mix function of Austin Appleby's
	* MurmurHash3 modified to use a single mix constant and 32-bit shifts, which may have
	* a performance advantage on some processors. The code is provided in Steele and
	* Vigna's paper.
	*
	* @param x the input value
	* @return the output value
	*/
	static long lea64(long x) {
	x = (x ^ (x >>> 32)) * 0xdaba0b6eb09322e3L;
	x = (x ^ (x >>> 32)) * 0xdaba0b6eb09322e3L;
	return x ^ (x >>> 32);
	}

	/**
	* Multiply the two values as if unsigned 64-bit longs to produce the high 64-bits
	* of the 128-bit unsigned result.
	*
	* <p>This method computes the equivalent of:
	* <pre>
	* Math.multiplyHigh(a, b) + ((a >> 63) & b) + ((b >> 63) & a)
	* </pre>
	*
	* <p>Note: The method {@code Math.multiplyHigh} was added in JDK 9
	* and should be used as above when the source code targets Java 11
	* to exploit the intrinsic method.
	*
	* <p>Note: The method {@code Math.unsignedMultiplyHigh} was added in JDK 18
	* and should be used when the source code target allows.
	*
	* @param value1 the first value
	* @param value2 the second value
	* @return the high 64-bits of the 128-bit result
	*/
	static long unsignedMultiplyHigh(long value1, long value2) {
	// Computation is based on the following observation about the upper (a and x)
	// and lower (b and y) bits of unsigned big-endian integers:
	// ab * xy
	// = b * y
	// + b * x0
	// + a0 * y
	// + a0 * x0
	// = b * y
	// + b * x * 2^32
	// + a * y * 2^32
	// + a * x * 2^64
	//
	// Summation using a character for each byte:
	//
	// byby byby
	// + bxbx bxbx 0000
	// + ayay ayay 0000
	// + axax axax 0000 0000
	//
	// The summation can be rearranged to ensure no overflow given
	// that the result of two unsigned 32-bit integers multiplied together
	// plus two full 32-bit integers cannot overflow 64 bits:
	// > long x = (1L << 32) - 1
	// > x * x + x + x == -1 (all bits set, no overflow)
	//
	// The carry is a composed intermediate which will never overflow:
	//
	// byby byby
	// + bxbx 0000
	// + ayay ayay 0000
	//
	// + bxbx 0000 0000
	// + axax axax 0000 0000

	final long a = value1 >>> 32;
	final long b = value1 & INT_TO_UNSIGNED_BYTE_MASK;
	final long x = value2 >>> 32;
	final long y = value2 & INT_TO_UNSIGNED_BYTE_MASK;

	final long by = b * y;
	final long bx = b * x;
	final long ay = a * y;
	final long ax = a * x;

	// Cannot overflow
	final long carry = (by >>> 32) +
	(bx & INT_TO_UNSIGNED_BYTE_MASK) +
	ay;
	// Note:
	// low = (carry << 32) \| (by & INT_TO_UNSIGNED_BYTE_MASK)
	// Benchmarking shows outputting low to a long[] output argument
	// has no benefit over computing 'low = value1 * value2' separately.

	return (bx >>> 32) + (carry >>> 32) + ax;
	}

	/**
	* Add the two values as if unsigned 64-bit longs to produce the high 64-bits
	* of the 128-bit unsigned result.
	*
	* <h2>Warning</h2>
	*
	* <p>This method is computing a carry bit for a 128-bit linear congruential
	* generator (LCG). The method is <em>not</em> applicable to all arguments.
	* Some computations can be dropped if the {@code right} argument is assumed to
	* be the LCG addition, which should be odd to ensure a full period LCG.
	*
	* @param left the left argument
	* @param right the right argument (assumed to have the lowest bit set to 1)
	* @return the carry (either 0 or 1)
	*/
	static long unsignedAddHigh(long left, long right) {
	// Method compiles to 13 bytes as Java byte code.
	// This is below the default of 35 for code inlining.
	//
	// The unsigned add of left + right may have a 65-bit result.
	// If both values are shifted right by 1 then the sum will be
	// within a 64-bit long. The right is assumed to have a low
	// bit of 1 which has been lost in the shift. The method must
	// compute if a 1 was shifted off the left which would have
	// triggered a carry when adding to the right's assumed 1.
	// The intermediate 64-bit result is shifted
	// 63 bits to obtain the most significant bit of the 65-bit result.
	// Using -1 is the same as a shift of (64 - 1) as only the last 6 bits
	// are used by the shift but requires 1 less byte in java byte code.
	//
	// 01100001 left
	// + 10011111 right always has low bit set to 1
	//
	// 0110000 1 carry last bit of left
	// + 1001111 \|
	// + 1 <-+
	// = 10000000 carry bit generated
	return ((left >>> 1) + (right >>> 1) + (left & 1)) >>> -1;
	}
	}