Merge pull request #361 from apache/extendMurmurHash3
Extend murmur hash3
diff --git a/src/main/java/org/apache/datasketches/hash/MurmurHash3.java b/src/main/java/org/apache/datasketches/hash/MurmurHash3.java
index c4deef6..1060e40 100644
--- a/src/main/java/org/apache/datasketches/hash/MurmurHash3.java
+++ b/src/main/java/org/apache/datasketches/hash/MurmurHash3.java
@@ -22,7 +22,11 @@
import java.io.Serializable;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
+import java.util.Objects;
+import org.apache.datasketches.SketchesArgumentException;
+import org.apache.datasketches.Util;
+import org.apache.datasketches.memory.Memory;
/**
* <p>
@@ -55,6 +59,10 @@
* structure. This also makes the code more readable and suitable for future extensions.
* </p>
*
+ * <p>Note that even though this hash function produces 128 bits, the entropy of the resulting hash cannot
+ * be greater than the entropy of the input. For example, if the input is only a single long of 64 bits,
+ * the entropy of the resulting 128 bit hash is no greater than 64 bits.
+ *
* @author Lee Rhodes
*/
public final class MurmurHash3 implements Serializable {
@@ -62,217 +70,315 @@
private MurmurHash3() {}
- //--Hash of long[]----------------------------------------------------
+ //--Hash of long---------------------------------------------------------
/**
- * Returns a long array of size 2, which is a 128-bit hash of the input.
+ * Hash the given long.
*
- * @param key The input long[] array. Must be non-null and non-empty.
+ * @param key The input long.
* @param seed A long valued seed.
- * @return the hash.
+ * @return a 128-bit hash of the input as a long array of size 2.
+ */
+ public static long[] hash(final long key, final long seed) {
+ final HashState hashState = new HashState(seed, seed);
+ return hashState.finalMix128(key, 0, Long.BYTES);
+ }
+
+ //--Hash of long[]-------------------------------------------------------
+ /**
+ * Hash the given long[] array.
+ *
+ * @param key The input long[] array. It must be non-null and non-empty.
+ * @param seed A long valued seed.
+ * @return a 128-bit hash of the input as a long array of size 2.
*/
public static long[] hash(final long[] key, final long seed) {
+ return hash(key, 0, key.length, seed);
+ }
+
+ /**
+ * Hash a portion of the given long[] array.
+ *
+ * @param key The input long[] array. It must be non-null and non-empty.
+ * @param offsetLongs the starting offset in longs.
+ * @param lengthLongs the length in longs of the portion of the array to be hashed.
+ * @param seed A long valued seed.
+ * @return a 128-bit hash of the input as a long array of size 2
+ */
+ public static long[] hash(final long[] key, final int offsetLongs, final int lengthLongs, final long seed) {
+ Objects.requireNonNull(key);
+ final int arrLen = key.length;
+ checkPositive(arrLen);
+ Util.checkBounds(offsetLongs, lengthLongs, arrLen);
final HashState hashState = new HashState(seed, seed);
- final int longs = key.length; //in longs
// Number of full 128-bit blocks of 2 longs (the body).
// Possible exclusion of a remainder of 1 long.
- final int nblocks = longs >>> 1; //longs / 2
+ final int nblocks = lengthLongs >>> 1; //longs / 2
// Process the 128-bit blocks (the body) into the hash
for (int i = 0; i < nblocks; i++ ) {
- final long k1 = key[i << 1]; //0, 2, 4, ...
- final long k2 = key[(i << 1) + 1]; //1, 3, 5, ...
+ final long k1 = key[offsetLongs + (i << 1)]; //offsetLongs + 0, 2, 4, ...
+ final long k2 = key[offsetLongs + (i << 1) + 1]; //offsetLongs + 1, 3, 5, ...
hashState.blockMix128(k1, k2);
}
- // Get the tail index, remainder length
+ // Get the tail index wrt hashed portion, remainder length
final int tail = nblocks << 1; // 2 longs / block
- final int rem = longs - tail; // remainder longs: 0,1
+ final int rem = lengthLongs - tail; // remainder longs: 0,1
// Get the tail
- final long k1 = (rem == 0) ? 0 : key[tail]; //k2 -> 0
+ final long k1 = rem == 0 ? 0 : key[offsetLongs + tail]; //k2 -> 0
// Mix the tail into the hash and return
- return hashState.finalMix128(k1, 0, longs << 3); //convert to bytes
+ return hashState.finalMix128(k1, 0, lengthLongs << 3); //convert to bytes
}
- //--Hash of int[]----------------------------------------------------
+ //--Hash of int[]--------------------------------------------------------
/**
- * Returns a long array of size 2, which is a 128-bit hash of the input.
+ * Hash the given int[] array.
*
- * @param key The input int[] array. Must be non-null and non-empty.
+ * @param key The input int[] array. It must be non-null and non-empty.
* @param seed A long valued seed.
- * @return the hash.
+ * @return a 128-bit hash of the input as a long array of size 2.
*/
public static long[] hash(final int[] key, final long seed) {
+ return hash(key, 0, key.length, seed);
+ }
+
+ /**
+ * Hash a portion of the given int[] array.
+ *
+ * @param key The input int[] array. It must be non-null and non-empty.
+ * @param offsetInts the starting offset in ints.
+ * @param lengthInts the length in ints of the portion of the array to be hashed.
+ * @param seed A long valued seed.
+ * @return a 128-bit hash of the input as a long array of size 2.
+ */
+ public static long[] hash(final int[] key, final int offsetInts, final int lengthInts, final long seed) {
+ Objects.requireNonNull(key);
+ final int arrLen = key.length;
+ checkPositive(arrLen);
+ Util.checkBounds(offsetInts, lengthInts, arrLen);
final HashState hashState = new HashState(seed, seed);
- final int ints = key.length; //in ints
// Number of full 128-bit blocks of 4 ints.
// Possible exclusion of a remainder of up to 3 ints.
- final int nblocks = ints >>> 2; //ints / 4
+ final int nblocks = lengthInts >>> 2; //ints / 4
// Process the 128-bit blocks (the body) into the hash
for (int i = 0; i < nblocks; i++ ) { //4 ints per block
- final long k1 = getLong(key, i << 2, 2); //0, 4, 8, ...
- final long k2 = getLong(key, (i << 2) + 2, 2); //2, 6, 10, ...
+ final long k1 = getLong(key, offsetInts + (i << 2), 2); //offsetInts + 0, 4, 8, ...
+ final long k2 = getLong(key, offsetInts + (i << 2) + 2, 2); //offsetInts + 2, 6, 10, ...
hashState.blockMix128(k1, k2);
}
- // Get the tail index, remainder length
+ // Get the tail index wrt hashed portion, remainder length
final int tail = nblocks << 2; // 4 ints per block
- final int rem = ints - tail; // remainder ints: 0,1,2,3
+ final int rem = lengthInts - tail; // remainder ints: 0,1,2,3
// Get the tail
final long k1;
final long k2;
if (rem > 2) { //k1 -> whole; k2 -> partial
- k1 = getLong(key, tail, 2);
- k2 = getLong(key, tail + 2, rem - 2);
+ k1 = getLong(key, offsetInts + tail, 2);
+ k2 = getLong(key, offsetInts + tail + 2, rem - 2);
}
- else { //k1 -> whole, partial or 0; k2 == 0
- k1 = (rem == 0) ? 0 : getLong(key, tail, rem);
+ else { //k1 -> whole(2), partial(1) or 0; k2 == 0
+ k1 = rem == 0 ? 0 : getLong(key, offsetInts + tail, rem);
k2 = 0;
}
// Mix the tail into the hash and return
- return hashState.finalMix128(k1, k2, ints << 2); //convert to bytes
+ return hashState.finalMix128(k1, k2, lengthInts << 2); //convert to bytes
}
- //--Hash of char[]----------------------------------------------------
+
+ //--Hash of char[]-------------------------------------------------------
/**
- * Returns a long array of size 2, which is a 128-bit hash of the input.
+ * Hash the given char[] array.
*
- * @param key The input char[] array. Must be non-null and non-empty.
+ * @param key The input char[] array. It must be non-null and non-empty.
* @param seed A long valued seed.
- * @return the hash.
+ * @return a 128-bit hash of the input as a long array of size 2
*/
public static long[] hash(final char[] key, final long seed) {
+ return hash(key, 0, key.length, seed);
+ }
+
+ /**
+ * Hash a portion of the given char[] array.
+ *
+ * @param key The input char[] array. It must be non-null and non-empty.
+ * @param offsetChars the starting offset in chars.
+ * @param lengthChars the length in chars of the portion of the array to be hashed.
+ * @param seed A long valued seed.
+ * @return a 128-bit hash of the input as a long array of size 2
+ */
+ public static long[] hash(final char[] key, final int offsetChars, final int lengthChars, final long seed) {
+ Objects.requireNonNull(key);
+ final int arrLen = key.length;
+ checkPositive(arrLen);
+ Util.checkBounds(offsetChars, lengthChars, arrLen);
final HashState hashState = new HashState(seed, seed);
- final int chars = key.length; //in chars
+
// Number of full 128-bit blocks of 8 chars.
// Possible exclusion of a remainder of up to 7 chars.
- final int nblocks = chars >>> 3; //chars / 8
+ final int nblocks = lengthChars >>> 3; //chars / 8
// Process the 128-bit blocks (the body) into the hash
for (int i = 0; i < nblocks; i++ ) { //8 chars per block
- final long k1 = getLong(key, i << 3, 4); //0, 8, 16, ...
- final long k2 = getLong(key, (i << 3) + 4, 4); //4, 12, 20, ...
+ final long k1 = getLong(key, offsetChars + (i << 3), 4); //offsetChars + 0, 8, 16, ...
+ final long k2 = getLong(key, offsetChars + (i << 3) + 4, 4); //offsetChars + 4, 12, 20, ...
hashState.blockMix128(k1, k2);
}
- // Get the tail index, remainder length
+ // Get the tail index wrt hashed portion, remainder length
final int tail = nblocks << 3; // 8 chars per block
- final int rem = chars - tail; // remainder chars: 0,1,2,3,4,5,6,7
+ final int rem = lengthChars - tail; // remainder chars: 0,1,2,3,4,5,6,7
// Get the tail
final long k1;
final long k2;
if (rem > 4) { //k1 -> whole; k2 -> partial
- k1 = getLong(key, tail, 4);
- k2 = getLong(key, tail + 4, rem - 4);
+ k1 = getLong(key, offsetChars + tail, 4);
+ k2 = getLong(key, offsetChars + tail + 4, rem - 4);
}
else { //k1 -> whole, partial or 0; k2 == 0
- k1 = (rem == 0) ? 0 : getLong(key, tail, rem);
+ k1 = rem == 0 ? 0 : getLong(key, offsetChars + tail, rem);
k2 = 0;
}
// Mix the tail into the hash and return
- return hashState.finalMix128(k1, k2, chars << 1); //convert to bytes
+ return hashState.finalMix128(k1, k2, lengthChars << 1); //convert to bytes
}
- //--Hash of ByteBuffer------------------------------------------------
+ //--Hash of byte[]-------------------------------------------------------
/**
- * Returns a long array of size 2, which is a 128-bit hash of the input.
+ * Hash the given byte[] array.
*
- * @param buf The input byte buffer. Must be non-null and non-empty.
+ * @param key The input byte[] array. It must be non-null and non-empty.
* @param seed A long valued seed.
- * @return the hash.
- */
- public static long[] hash(final ByteBuffer buf, final long seed) {
- final HashState hashState = new HashState(seed, seed);
- final ByteBuffer littleEndianBuf;
-
- if (buf.order() == ByteOrder.LITTLE_ENDIAN) {
- littleEndianBuf = buf;
- } else {
- littleEndianBuf = buf.duplicate().order(ByteOrder.LITTLE_ENDIAN);
- }
-
- final int bytes = littleEndianBuf.remaining(); //in bytes
- final int offset = littleEndianBuf.position();
-
- // Number of full 128-bit blocks of 16 bytes.
- // Possible exclusion of a remainder of up to 15 bytes.
- final int nblocks = bytes >>> 4; //bytes / 16
-
- // Process the 128-bit blocks (the body) into the hash
- for (int i = 0; i < nblocks; i++ ) { //16 bytes per block
- final long k1 = getLong(littleEndianBuf, offset + (i << 4), 8); //0, 16, 32, ...
- final long k2 = getLong(littleEndianBuf, offset + (i << 4) + 8, 8); //8, 24, 40, ...
- hashState.blockMix128(k1, k2);
- }
-
- // Get the tail index, remainder length
- final int tail = nblocks << 4; //16 bytes per block
- final int rem = bytes - tail; // remainder bytes: 0,1,...,15
-
- // Get the tail
- final long k1;
- final long k2;
- if (rem > 8) { //k1 -> whole; k2 -> partial
- k1 = getLong(littleEndianBuf, offset + tail, 8);
- k2 = getLong(littleEndianBuf, offset + tail + 8, rem - 8);
- }
- else { //k1 -> whole, partial or 0; k2 == 0
- k1 = (rem == 0) ? 0 : getLong(littleEndianBuf, offset + tail, rem);
- k2 = 0;
- }
- // Mix the tail into the hash and return
- return hashState.finalMix128(k1, k2, bytes);
- }
-
- //--Hash of byte[]----------------------------------------------------
- /**
- * Returns a long array of size 2, which is a 128-bit hash of the input.
- *
- * @param key The input byte[] array. Must be non-null and non-empty.
- * @param seed A long valued seed.
- * @return the hash.
+ * @return a 128-bit hash of the input as a long array of size 2.
*/
public static long[] hash(final byte[] key, final long seed) {
+ return hash(key, 0, key.length, seed);
+ }
+
+ /**
+ * Hash a portion of the given byte[] array.
+ *
+ * @param key The input byte[] array. It must be non-null and non-empty.
+ * @param offsetBytes the starting offset in bytes.
+ * @param lengthBytes the length in bytes of the portion of the array to be hashed.
+ * @param seed A long valued seed.
+ * @return a 128-bit hash of the input as a long array of size 2.
+ */
+ public static long[] hash(final byte[] key, final int offsetBytes, final int lengthBytes, final long seed) {
+ Objects.requireNonNull(key);
+ final int arrLen = key.length;
+ checkPositive(arrLen);
+ Util.checkBounds(offsetBytes, lengthBytes, arrLen);
final HashState hashState = new HashState(seed, seed);
- final int bytes = key.length; //in bytes
// Number of full 128-bit blocks of 16 bytes.
// Possible exclusion of a remainder of up to 15 bytes.
- final int nblocks = bytes >>> 4; //bytes / 16
+ final int nblocks = lengthBytes >>> 4; //bytes / 16
// Process the 128-bit blocks (the body) into the hash
for (int i = 0; i < nblocks; i++ ) { //16 bytes per block
- final long k1 = getLong(key, i << 4, 8); //0, 16, 32, ...
- final long k2 = getLong(key, (i << 4) + 8, 8); //8, 24, 40, ...
+ final long k1 = getLong(key, offsetBytes + (i << 4), 8); //offsetBytes + 0, 16, 32, ...
+ final long k2 = getLong(key, offsetBytes + (i << 4) + 8, 8); //offsetBytes + 8, 24, 40, ...
hashState.blockMix128(k1, k2);
}
- // Get the tail index, remainder length
+ // Get the tail index wrt hashed portion, remainder length
final int tail = nblocks << 4; //16 bytes per block
- final int rem = bytes - tail; // remainder bytes: 0,1,...,15
+ final int rem = lengthBytes - tail; // remainder bytes: 0,1,...,15
// Get the tail
final long k1;
final long k2;
if (rem > 8) { //k1 -> whole; k2 -> partial
- k1 = getLong(key, tail, 8);
- k2 = getLong(key, tail + 8, rem - 8);
+ k1 = getLong(key, offsetBytes + tail, 8);
+ k2 = getLong(key, offsetBytes + tail + 8, rem - 8);
}
else { //k1 -> whole, partial or 0; k2 == 0
- k1 = (rem == 0) ? 0 : getLong(key, tail, rem);
+ k1 = rem == 0 ? 0 : getLong(key, offsetBytes + tail, rem);
k2 = 0;
}
// Mix the tail into the hash and return
- return hashState.finalMix128(k1, k2, bytes);
+ return hashState.finalMix128(k1, k2, lengthBytes);
}
- //--HashState class---------------------------------------------------
+ //--Hash of ByteBuffer---------------------------------------------------
+ /**
+ * Hash the remaining bytes of the given ByteBuffer starting at position().
+ *
+ * @param buf The input ByteBuffer. It must be non-null and non-empty.
+ * @param seed A long valued seed.
+ * @return a 128-bit hash of the input as a long array of size 2.
+ */
+ public static long[] hash(final ByteBuffer buf, final long seed) {
+ Objects.requireNonNull(buf);
+ final int pos = buf.position();
+ final int rem = buf.remaining();
+ checkPositive(rem);
+ Memory mem = Memory.wrap(buf, ByteOrder.LITTLE_ENDIAN).region(pos, rem);
+ return hash(mem, seed);
+ }
+
+ //--Hash of Memory-------------------------------------------------------
+ /**
+ * Hash the given Memory.
+ *
+ * <p>Note: if you want to hash only a portion of Memory, convert it to the
+ * appropriate Region first with ByteOrder = Little Endian. If it is not
+ * Little Endian a new region view will be created as Little Endian.
+ * This does not change the underlying data.
+ *
+ * @param mem The input Memory. It must be non-null and non-empty.
+ * @param seed A long valued seed.
+ * @return a 128-bit hash of the input as a long array of size 2.
+ */
+ public static long[] hash(final Memory mem, final long seed) {
+ Objects.requireNonNull(mem);
+ final long lengthBytes = mem.getCapacity();
+ checkPositive(lengthBytes);
+
+ Memory memLE = mem.getTypeByteOrder() == ByteOrder.LITTLE_ENDIAN
+ ? mem : mem.region(0, lengthBytes, ByteOrder.LITTLE_ENDIAN);
+
+ final HashState hashState = new HashState(seed, seed);
+
+ // Number of full 128-bit blocks of 16 bytes.
+ // Possible exclusion of a remainder of up to 15 bytes.
+ final long nblocks = lengthBytes >>> 4; //bytes / 16
+
+ // Process the 128-bit blocks (the body) into the hash
+ for (long i = 0; i < nblocks; i++ ) { //16 bytes per block
+ final long k1 = memLE.getLong(i << 4); //0, 16, 32, ...
+ final long k2 = memLE.getLong((i << 4) + 8); //8, 24, 40, ...
+ hashState.blockMix128(k1, k2);
+ }
+
+ // Get the tail index wrt hashed portion, remainder length
+ final long tail = nblocks << 4; //16 bytes per block
+ final int rem = (int)(lengthBytes - tail); // remainder bytes: 0,1,...,15
+
+ // Get the tail
+ final long k1;
+ final long k2;
+ if (rem > 8) { //k1 -> whole; k2 -> partial
+ k1 = memLE.getLong(tail);
+ k2 = getLong(memLE, tail + 8, rem - 8);
+ }
+ else { //k1 -> whole, partial or 0; k2 == 0
+ k1 = rem == 0 ? 0 : getLong(memLE, tail, rem);
+ k2 = 0;
+ }
+ // Mix the tail into the hash and return
+ return hashState.finalMix128(k1, k2, lengthBytes);
+ }
+
+ //--HashState class------------------------------------------------------
/**
* Common processing of the 128-bit hash state independent of input type.
*/
@@ -297,12 +403,12 @@
h1 ^= mixK1(k1);
h1 = Long.rotateLeft(h1, 27);
h1 += h2;
- h1 = (h1 * 5) + 0x52dce729;
+ h1 = h1 * 5 + 0x52dce729;
h2 ^= mixK2(k2);
h2 = Long.rotateLeft(h2, 31);
h2 += h1;
- h2 = (h2 * 5) + 0x38495ab5;
+ h2 = h2 * 5 + 0x38495ab5;
}
long[] finalMix128(final long k1, final long k2, final long inputLengthBytes) {
@@ -361,81 +467,7 @@
}
}
- //--Helper methods----------------------------------------------------
- /**
- * Gets a long from the given byte buffer starting at the given position index and continuing for
- * remainder (rem) bytes. The buffer must be in little-endian order. The bytes are extracted in
- * little-endian order. The buffer endianness and limit are not checked.
- *
- * @param buf The given input byte buffer.
- * @param index Zero-based index from the start of the byte array.
- * @param rem Remainder bytes. An integer in the range [1,8].
- * @return long
- */
- private static long getLong(final ByteBuffer buf, final int index, final int rem) {
- long out = 0L;
-
- switch (rem) {
- case 8:
- out = buf.getLong(index);
- break;
- case 7:
- out ^= (buf.get(index + 6) & 0xFFL) << 48;
- case 6:
- out ^= (buf.get(index + 5) & 0xFFL) << 40;
- case 5:
- out ^= (buf.get(index + 4) & 0xFFL) << 32;
- case 4:
- out ^= (buf.get(index + 3) & 0xFFL) << 24;
- case 3:
- out ^= (buf.get(index + 2) & 0xFFL) << 16;
- case 2:
- out ^= (buf.get(index + 1) & 0xFFL) << 8;
- case 1:
- out ^= buf.get(index) & 0xFFL;
- }
-
- return out;
- }
-
- //--Helper methods----------------------------------------------------
- /**
- * Gets a long from the given byte array starting at the given byte array index and continuing for
- * remainder (rem) bytes. The bytes are extracted in little-endian order. There is no limit
- * checking.
- *
- * @param bArr The given input byte array.
- * @param index Zero-based index from the start of the byte array.
- * @param rem Remainder bytes. An integer in the range [1,8].
- * @return long
- */
- private static long getLong(final byte[] bArr, final int index, final int rem) {
- long out = 0L;
- for (int i = rem; i-- > 0;) { //i= 7,6,5,4,3,2,1,0
- final byte b = bArr[index + i];
- out ^= (b & 0xFFL) << (i * 8); //equivalent to |=
- }
- return out;
- }
-
- /**
- * Gets a long from the given char array starting at the given char array index and continuing for
- * remainder (rem) chars. The chars are extracted in little-endian order. There is no limit
- * checking.
- *
- * @param charArr The given input char array.
- * @param index Zero-based index from the start of the char array.
- * @param rem Remainder chars. An integer in the range [1,4].
- * @return long
- */
- private static long getLong(final char[] charArr, final int index, final int rem) {
- long out = 0L;
- for (int i = rem; i-- > 0;) { //i= 3,2,1,0
- final char c = charArr[index + i];
- out ^= (c & 0xFFFFL) << (i * 16); //equivalent to |=
- }
- return out;
- }
+ //--Helper methods-------------------------------------------------------
/**
* Gets a long from the given int array starting at the given int array index and continuing for
@@ -451,9 +483,74 @@
long out = 0L;
for (int i = rem; i-- > 0;) { //i= 1,0
final int v = intArr[index + i];
- out ^= (v & 0xFFFFFFFFL) << (i * 32); //equivalent to |=
+ out ^= (v & 0xFFFFFFFFL) << i * 32; //equivalent to |=
}
return out;
}
+ /**
+ * Gets a long from the given char array starting at the given char array index and continuing for
+ * remainder (rem) chars. The chars are extracted in little-endian order. There is no limit
+ * checking.
+ *
+ * @param charArr The given input char array.
+ * @param index Zero-based index from the start of the char array.
+ * @param rem Remainder chars. An integer in the range [1,4].
+ * @return a long
+ */
+ private static long getLong(final char[] charArr, final int index, final int rem) {
+ long out = 0L;
+ for (int i = rem; i-- > 0;) { //i= 3,2,1,0
+ final char c = charArr[index + i];
+ out ^= (c & 0xFFFFL) << i * 16; //equivalent to |=
+ }
+ return out;
+ }
+
+ /**
+ * Gets a long from the given byte array starting at the given byte array index and continuing for
+ * remainder (rem) bytes. The bytes are extracted in little-endian order. There is no limit
+ * checking.
+ *
+ * @param bArr The given input byte array.
+ * @param index Zero-based index from the start of the byte array.
+ * @param rem Remainder bytes. An integer in the range [1,8].
+ * @return a long
+ */
+ private static long getLong(final byte[] bArr, final int index, final int rem) {
+ long out = 0L;
+ for (int i = rem; i-- > 0;) { //i= 7,6,5,4,3,2,1,0
+ final byte b = bArr[index + i];
+ out ^= (b & 0xFFL) << i * 8; //equivalent to |=
+ }
+ return out;
+ }
+
+ /**
+ * Gets a long from the given Memory starting at the given offsetBytes and continuing for
+ * remainder (rem) bytes. The bytes are extracted in little-endian order. There is no limit
+ * checking.
+ *
+ * @param mem The given input Memory.
+ * @param offsetBytes Zero-based offset in bytes from the start of the Memory.
+ * @param rem Remainder bytes. An integer in the range [1,8].
+ * @return a long
+ */
+ private static long getLong(final Memory mem, final long offsetBytes, final int rem) {
+ long out = 0L;
+ if (rem == 8) {
+ return mem.getLong(offsetBytes);
+ }
+ for (int i = rem; i-- > 0; ) { //i= 7,6,5,4,3,2,1,0
+ final byte b = mem.getByte(offsetBytes + i);
+ out ^= (b & 0xFFL) << (i << 3); //equivalent to |=
+ }
+ return out;
+ }
+
+ private static void checkPositive(long size) {
+ if (size <= 0) {
+ throw new SketchesArgumentException("Array size must not be negative or zero: " + size);
+ }
+ }
}
diff --git a/src/test/java/org/apache/datasketches/hash/MurmurHash3Test.java b/src/test/java/org/apache/datasketches/hash/MurmurHash3Test.java
index d582338..2db281c 100644
--- a/src/test/java/org/apache/datasketches/hash/MurmurHash3Test.java
+++ b/src/test/java/org/apache/datasketches/hash/MurmurHash3Test.java
@@ -304,18 +304,18 @@
private static long[] stringToLongs(String in) {
byte[] bArr = in.getBytes(UTF_8);
int inLen = bArr.length;
- int outLen = (inLen / 8) + (((inLen % 8) != 0) ? 1 : 0);
+ int outLen = inLen / 8 + (inLen % 8 != 0 ? 1 : 0);
long[] out = new long[outLen];
- for (int i = 0; i < (outLen - 1); i++ ) {
+ for (int i = 0; i < outLen - 1; i++ ) {
for (int j = 0; j < 8; j++ ) {
- out[i] |= ((bArr[(i * 8) + j] & 0xFFL) << (j * 8));
+ out[i] |= (bArr[i * 8 + j] & 0xFFL) << j * 8;
}
}
int inTail = 8 * (outLen - 1);
int rem = inLen - inTail;
for (int j = 0; j < rem; j++ ) {
- out[outLen - 1] |= ((bArr[inTail + j] & 0xFFL) << (j * 8));
+ out[outLen - 1] |= (bArr[inTail + j] & 0xFFL) << j * 8;
}
return out;
}
@@ -323,18 +323,18 @@
private static int[] stringToInts(String in) {
byte[] bArr = in.getBytes(UTF_8);
int inLen = bArr.length;
- int outLen = (inLen / 4) + (((inLen % 4) != 0) ? 1 : 0);
+ int outLen = inLen / 4 + (inLen % 4 != 0 ? 1 : 0);
int[] out = new int[outLen];
- for (int i = 0; i < (outLen - 1); i++ ) {
+ for (int i = 0; i < outLen - 1; i++ ) {
for (int j = 0; j < 4; j++ ) {
- out[i] |= ((bArr[(i * 4) + j] & 0xFFL) << (j * 8));
+ out[i] |= (bArr[i * 4 + j] & 0xFFL) << j * 8;
}
}
int inTail = 4 * (outLen - 1);
int rem = inLen - inTail;
for (int j = 0; j < rem; j++ ) {
- out[outLen - 1] |= ((bArr[inTail + j] & 0xFFL) << (j * 8));
+ out[outLen - 1] |= (bArr[inTail + j] & 0xFFL) << j * 8;
}
return out;
}
@@ -348,31 +348,22 @@
*/
private static void checkHashByteBuf(byte[] key, long h1, long h2) {
// Include dummy byte at start, end to make sure position, limit are respected.
- ByteBuffer bigEndianBuf = ByteBuffer.allocate(key.length + 2).order(ByteOrder.BIG_ENDIAN);
- bigEndianBuf.position(1);
- bigEndianBuf.put(key);
- bigEndianBuf.limit(1 + key.length);
- bigEndianBuf.position(1);
+ ByteBuffer buf = ByteBuffer.allocate(key.length + 2).order(ByteOrder.LITTLE_ENDIAN);
+ buf.position(1);
+ buf.put(key);
+ buf.limit(1 + key.length);
+ buf.position(1);
- // Test with little endian too.
- ByteBuffer littleEndianBuf = bigEndianBuf.duplicate().order(ByteOrder.LITTLE_ENDIAN);
-
- long[] result1 = MurmurHash3.hash(bigEndianBuf, 0);
- long[] result2 = MurmurHash3.hash(littleEndianBuf, 0);
+ long[] result1 = MurmurHash3.hash(buf, 0);
// Position, limit, order should not be changed.
- Assert.assertEquals(1, bigEndianBuf.position());
- Assert.assertEquals(1, littleEndianBuf.position());
- Assert.assertEquals(1 + key.length, bigEndianBuf.limit());
- Assert.assertEquals(1 + key.length, littleEndianBuf.limit());
- Assert.assertEquals(ByteOrder.BIG_ENDIAN, bigEndianBuf.order());
- Assert.assertEquals(ByteOrder.LITTLE_ENDIAN, littleEndianBuf.order());
+ Assert.assertEquals(1, buf.position());
+ Assert.assertEquals(1 + key.length, buf.limit());
+ Assert.assertEquals(ByteOrder.LITTLE_ENDIAN, buf.order());
// Check the actual hashes.
Assert.assertEquals(result1[0], h1);
Assert.assertEquals(result1[1], h2);
- Assert.assertEquals(result2[0], h1);
- Assert.assertEquals(result2[1], h2);
}
@Test