blob: d58233833eb361150057cb9981605a5d6bef7d59 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.datasketches.hash;
import static java.nio.charset.StandardCharsets.UTF_8;
import static org.apache.datasketches.hash.MurmurHash3.hash;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import org.testng.Assert;
import org.testng.annotations.Test;
/**
* Tests the MurmurHash3 against specific, known hash results given known
* inputs obtained from the public domain C++ version 150.
*
* @author Lee Rhodes
*/
@SuppressWarnings("javadoc")
public class MurmurHash3Test {
@Test
public void checkByteArrRemainderGT8() { //byte[], remainder > 8
String keyStr = "The quick brown fox jumps over the lazy dog";
byte[] key = keyStr.getBytes(UTF_8);
long[] result = hash(key, 0);
//Should be:
long h1 = 0xe34bbc7bbc071b6cL;
long h2 = 0x7a433ca9c49a9347L;
Assert.assertEquals(result[0], h1);
Assert.assertEquals(result[1], h2);
}
@Test
public void checkByteBufRemainderGT8() { //byte buffer, remainder > 8
String keyStr = "The quick brown fox jumps over the lazy dog";
byte[] key = keyStr.getBytes(UTF_8);
//Should be:
long h1 = 0xe34bbc7bbc071b6cL;
long h2 = 0x7a433ca9c49a9347L;
checkHashByteBuf(key, h1, h2);
}
@Test
public void checkByteArrChange1bit() { //byte[], change one bit
String keyStr = "The quick brown fox jumps over the lazy eog";
byte[] key = keyStr.getBytes(UTF_8);
long[] result = hash(key, 0);
//Should be:
long h1 = 0x362108102c62d1c9L;
long h2 = 0x3285cd100292b305L;
Assert.assertEquals(result[0], h1);
Assert.assertEquals(result[1], h2);
}
@Test
public void checkByteBufChange1bit() { //byte buffer, change one bit
String keyStr = "The quick brown fox jumps over the lazy eog";
byte[] key = keyStr.getBytes(UTF_8);
//Should be:
long h1 = 0x362108102c62d1c9L;
long h2 = 0x3285cd100292b305L;
checkHashByteBuf(key, h1, h2);
}
@Test
public void checkByteArrRemainderLt8() { //byte[], test a remainder < 8
String keyStr = "The quick brown fox jumps over the lazy dogdogdog";
byte[] key = keyStr.getBytes(UTF_8);
long[] result = hash(key, 0);
//Should be;
long h1 = 0x9c8205300e612fc4L;
long h2 = 0xcbc0af6136aa3df9L;
Assert.assertEquals(result[0], h1);
Assert.assertEquals(result[1], h2);
}
@Test
public void checkByteBufRemainderLt8() { //byte buffer, test a remainder < 8
String keyStr = "The quick brown fox jumps over the lazy dogdogdog";
byte[] key = keyStr.getBytes(UTF_8);
//Should be;
long h1 = 0x9c8205300e612fc4L;
long h2 = 0xcbc0af6136aa3df9L;
checkHashByteBuf(key, h1, h2);
}
@Test
public void checkByteArrReaminderEQ8() { //byte[], test a remainder = 8
String keyStr = "The quick brown fox jumps over the lazy1";
byte[] key = keyStr.getBytes(UTF_8);
long[] result = hash(key, 0);
//Should be:
long h1 = 0xe3301a827e5cdfe3L;
long h2 = 0xbdbf05f8da0f0392L;
Assert.assertEquals(result[0], h1);
Assert.assertEquals(result[1], h2);
}
@Test
public void checkByteBufReaminderEQ8() { //byte buffer, test a remainder = 8
String keyStr = "The quick brown fox jumps over the lazy1";
byte[] key = keyStr.getBytes(UTF_8);
//Should be:
long h1 = 0xe3301a827e5cdfe3L;
long h2 = 0xbdbf05f8da0f0392L;
checkHashByteBuf(key, h1, h2);
}
/**
* This test should have the exact same output as Test4
*/
@Test
public void checkLongArrRemainderEQ8() { //long[], test a remainder = 8
String keyStr = "The quick brown fox jumps over the lazy1";
long[] key = stringToLongs(keyStr);
long[] result = hash(key, 0);
//Should be:
long h1 = 0xe3301a827e5cdfe3L;
long h2 = 0xbdbf05f8da0f0392L;
Assert.assertEquals(result[0], h1);
Assert.assertEquals(result[1], h2);
}
/**
* This test should have the exact same output as Test4
*/
@Test
public void checkIntArrRemainderEQ8() { //int[], test a remainder = 8
String keyStr = "The quick brown fox jumps over the lazy1"; //40B
int[] key = stringToInts(keyStr);
long[] result = hash(key, 0);
//Should be:
long h1 = 0xe3301a827e5cdfe3L;
long h2 = 0xbdbf05f8da0f0392L;
Assert.assertEquals(result[0], h1);
Assert.assertEquals(result[1], h2);
}
@Test
public void checkIntArrRemainderEQ0() { //int[], test a remainder = 0
String keyStr = "The quick brown fox jumps over t"; //32B
int[] key = stringToInts(keyStr);
long[] result = hash(key, 0);
//Should be:
long h1 = 0xdf6af91bb29bdacfL;
long h2 = 0x91a341c58df1f3a6L;
Assert.assertEquals(result[0], h1);
Assert.assertEquals(result[1], h2);
}
/**
* Tests an odd remainder of int[].
*/
@Test
public void checkIntArrOddRemainder() { //int[], odd remainder
String keyStr = "The quick brown fox jumps over the lazy dog"; //43B
int[] key = stringToInts(keyStr);
long[] result = hash(key, 0);
//Should be:
long h1 = 0x1eb232b0087543f5L;
long h2 = 0xfc4c1383c3ace40fL;
Assert.assertEquals(result[0], h1);
Assert.assertEquals(result[1], h2);
}
/**
* Tests an odd remainder of int[].
*/
@Test
public void checkCharArrOddRemainder() { //char[], odd remainder
String keyStr = "The quick brown fox jumps over the lazy dog.."; //45B
char[] key = keyStr.toCharArray();
long[] result = hash(key, 0);
//Should be:
long h1 = 0xca77b498ea9ed953L;
long h2 = 0x8b8f8ec3a8f4657eL;
Assert.assertEquals(result[0], h1);
Assert.assertEquals(result[1], h2);
}
/**
* Tests an odd remainder of int[].
*/
@Test
public void checkCharArrRemainderEQ0() { //char[], remainder of 0
String keyStr = "The quick brown fox jumps over the lazy "; //40B
char[] key = keyStr.toCharArray();
long[] result = hash(key, 0);
//Should be:
long h1 = 0x51b15e9d0887f9f1L;
long h2 = 0x8106d226786511ebL;
Assert.assertEquals(result[0], h1);
Assert.assertEquals(result[1], h2);
}
@Test
public void checkByteArrAllOnesZeros() { //byte[], test a ones byte and a zeros byte
byte[] key = {
0x54, 0x68, 0x65, 0x20, 0x71, 0x75, 0x69, 0x63, 0x6b, 0x20, 0x62, 0x72, 0x6f, 0x77, 0x6e,
0x20, 0x66, 0x6f, 0x78, 0x20, 0x6a, 0x75, 0x6d, 0x70, 0x73, 0x20, 0x6f, 0x76, 0x65,
0x72, 0x20, 0x74, 0x68, 0x65, 0x20, 0x6c, 0x61, 0x7a, 0x79, 0x20, 0x64, 0x6f, 0x67,
(byte) 0xff, 0x64, 0x6f, 0x67, 0x00
};
long[] result = MurmurHash3.hash(key, 0);
//Should be:
long h1 = 0xe88abda785929c9eL;
long h2 = 0x96b98587cacc83d6L;
Assert.assertEquals(result[0], h1);
Assert.assertEquals(result[1], h2);
}
@Test
public void checkByteBufAllOnesZeros() { //byte[], test a ones byte and a zeros byte
byte[] key = {
0x54, 0x68, 0x65, 0x20, 0x71, 0x75, 0x69, 0x63, 0x6b, 0x20, 0x62, 0x72, 0x6f, 0x77, 0x6e,
0x20, 0x66, 0x6f, 0x78, 0x20, 0x6a, 0x75, 0x6d, 0x70, 0x73, 0x20, 0x6f, 0x76, 0x65,
0x72, 0x20, 0x74, 0x68, 0x65, 0x20, 0x6c, 0x61, 0x7a, 0x79, 0x20, 0x64, 0x6f, 0x67,
(byte) 0xff, 0x64, 0x6f, 0x67, 0x00
};
long h1 = 0xe88abda785929c9eL;
long h2 = 0x96b98587cacc83d6L;
checkHashByteBuf(key, h1, h2);
}
/**
* This test demonstrates that the hash of byte[], char[], int[], or long[] will produce the
* same hash result if, and only if, all the arrays have the same exact length in bytes, and if
* the contents of the values in the arrays have the same byte endianness and overall order.
*/
@Test
public void checkCrossTypeHashConsistency() {
long[] out;
println("Bytes");
byte[] bArr = {1,2,3,4,5,6,7,8, 9,10,11,12,13,14,15,16, 17,18,19,20,21,22,23,24};
long[] out1 = hash(bArr, 0L);
println(org.apache.datasketches.Util.longToHexBytes(out1[0]));
println(org.apache.datasketches.Util.longToHexBytes(out1[1]));
println("ByteBuffer");
ByteBuffer bBuf = ByteBuffer.wrap(bArr);
out = hash(bBuf, 0L);
Assert.assertEquals(out, out1);
println(org.apache.datasketches.Util.longToHexBytes(out1[0]));
println(org.apache.datasketches.Util.longToHexBytes(out1[1]));
println("Chars");
char[] cArr = {0X0201, 0X0403, 0X0605, 0X0807, 0X0a09, 0X0c0b, 0X0e0d, 0X100f,
0X1211, 0X1413, 0X1615, 0X1817};
out = hash(cArr, 0L);
Assert.assertEquals(out, out1);
println(org.apache.datasketches.Util.longToHexBytes(out[0]));
println(org.apache.datasketches.Util.longToHexBytes(out[1]));
println("Ints");
int[] iArr = {0X04030201, 0X08070605, 0X0c0b0a09, 0X100f0e0d, 0X14131211, 0X18171615};
out = hash(iArr, 0L);
Assert.assertEquals(out, out1);
println(org.apache.datasketches.Util.longToHexBytes(out[0]));
println(org.apache.datasketches.Util.longToHexBytes(out[1]));
println("Longs");
long[] lArr = {0X0807060504030201L, 0X100f0e0d0c0b0a09L, 0X1817161514131211L};
out = hash(lArr, 0L);
Assert.assertEquals(out, out1);
println(org.apache.datasketches.Util.longToHexBytes(out[0]));
println(org.apache.datasketches.Util.longToHexBytes(out[1]));
}
//Helper methods
private static long[] stringToLongs(String in) {
byte[] bArr = in.getBytes(UTF_8);
int inLen = bArr.length;
int outLen = (inLen / 8) + (((inLen % 8) != 0) ? 1 : 0);
long[] out = new long[outLen];
for (int i = 0; i < (outLen - 1); i++ ) {
for (int j = 0; j < 8; j++ ) {
out[i] |= ((bArr[(i * 8) + j] & 0xFFL) << (j * 8));
}
}
int inTail = 8 * (outLen - 1);
int rem = inLen - inTail;
for (int j = 0; j < rem; j++ ) {
out[outLen - 1] |= ((bArr[inTail + j] & 0xFFL) << (j * 8));
}
return out;
}
private static int[] stringToInts(String in) {
byte[] bArr = in.getBytes(UTF_8);
int inLen = bArr.length;
int outLen = (inLen / 4) + (((inLen % 4) != 0) ? 1 : 0);
int[] out = new int[outLen];
for (int i = 0; i < (outLen - 1); i++ ) {
for (int j = 0; j < 4; j++ ) {
out[i] |= ((bArr[(i * 4) + j] & 0xFFL) << (j * 8));
}
}
int inTail = 4 * (outLen - 1);
int rem = inLen - inTail;
for (int j = 0; j < rem; j++ ) {
out[outLen - 1] |= ((bArr[inTail + j] & 0xFFL) << (j * 8));
}
return out;
}
/**
* Tests {@link MurmurHash3#hash(ByteBuffer, long)} on the provided key.
*
* @param key byte array to hash
* @param h1 first half of expected hash
* @param h2 second half of expected hash
*/
private static void checkHashByteBuf(byte[] key, long h1, long h2) {
// Include dummy byte at start, end to make sure position, limit are respected.
ByteBuffer bigEndianBuf = ByteBuffer.allocate(key.length + 2).order(ByteOrder.BIG_ENDIAN);
bigEndianBuf.position(1);
bigEndianBuf.put(key);
bigEndianBuf.limit(1 + key.length);
bigEndianBuf.position(1);
// Test with little endian too.
ByteBuffer littleEndianBuf = bigEndianBuf.duplicate().order(ByteOrder.LITTLE_ENDIAN);
long[] result1 = MurmurHash3.hash(bigEndianBuf, 0);
long[] result2 = MurmurHash3.hash(littleEndianBuf, 0);
// Position, limit, order should not be changed.
Assert.assertEquals(1, bigEndianBuf.position());
Assert.assertEquals(1, littleEndianBuf.position());
Assert.assertEquals(1 + key.length, bigEndianBuf.limit());
Assert.assertEquals(1 + key.length, littleEndianBuf.limit());
Assert.assertEquals(ByteOrder.BIG_ENDIAN, bigEndianBuf.order());
Assert.assertEquals(ByteOrder.LITTLE_ENDIAN, littleEndianBuf.order());
// Check the actual hashes.
Assert.assertEquals(result1[0], h1);
Assert.assertEquals(result1[1], h2);
Assert.assertEquals(result2[0], h1);
Assert.assertEquals(result2[1], h2);
}
@Test
public void printlnTest() {
println("PRINTING: "+this.getClass().getName());
}
/**
* @param s value to print
*/
static void println(String s) {
//System.out.println(s); //disable here
}
}