blob: 284a6cb648fed8c7378e0513843427ac9a4de167 [file] [log] [blame]
/*
* Copyright 2009-2010 by The Regents of the University of California
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* you may obtain a copy of the License from
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package edu.uci.ics.hyracks.storage.am.bloomfilter;
import java.nio.ByteBuffer;
import java.util.Random;
import java.util.logging.Level;
import org.junit.Assert;
import org.junit.Before;
import org.junit.Test;
import edu.uci.ics.hyracks.api.dataflow.value.ISerializerDeserializer;
import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
import edu.uci.ics.hyracks.dataflow.common.comm.io.ArrayTupleBuilder;
import edu.uci.ics.hyracks.dataflow.common.comm.io.ArrayTupleReference;
import edu.uci.ics.hyracks.dataflow.common.data.accessors.ITupleReference;
import edu.uci.ics.hyracks.dataflow.common.data.marshalling.UTF8StringSerializerDeserializer;
import edu.uci.ics.hyracks.dataflow.common.util.TupleUtils;
import edu.uci.ics.hyracks.storage.am.bloomfilter.impls.MurmurHash128Bit;
import edu.uci.ics.hyracks.storage.am.bloomfilter.util.AbstractBloomFilterTest;
@SuppressWarnings("rawtypes")
public class MurmurHashForITupleReferenceTest extends AbstractBloomFilterTest {
private final static int NUM_LONG_VARS_FOR_128_BIT_HASH = 2;
private final static int DUMMY_FIELD = 0;
private final Random rnd = new Random(50);
@Before
public void setUp() throws HyracksDataException {
super.setUp();
}
@Test
public void murmurhashONEIntegerFieldTest() throws Exception {
if (LOGGER.isLoggable(Level.INFO)) {
LOGGER.info("TESTING MURMUR HASH ONE INTEGER FIELD");
}
int fieldCount = 2;
ArrayTupleBuilder tupleBuilder = new ArrayTupleBuilder(fieldCount);
ArrayTupleReference tuple = new ArrayTupleReference();
TupleUtils.createIntegerTuple(tupleBuilder, tuple, rnd.nextInt());
tuple.reset(tupleBuilder.getFieldEndOffsets(), tupleBuilder.getByteArray());
int keyFields[] = { 0 };
int length = getTupleSize(tuple, keyFields);
long actuals[] = new long[NUM_LONG_VARS_FOR_128_BIT_HASH];
MurmurHash128Bit.hash3_x64_128(tuple, keyFields, 0L, actuals);
ByteBuffer buffer;
byte[] array = new byte[length];
fillArrayWithData(array, keyFields, tuple, length);
buffer = ByteBuffer.wrap(array);
long[] expecteds = hash3_x64_128(buffer, 0, length, 0L);
Assert.assertArrayEquals(expecteds, actuals);
}
@Test
public void murmurhashTwoIntegerFieldsTest() throws Exception {
if (LOGGER.isLoggable(Level.INFO)) {
LOGGER.info("TESTING MURMUR HASH TWO INTEGER FIELDS");
}
int fieldCount = 2;
ArrayTupleBuilder tupleBuilder = new ArrayTupleBuilder(fieldCount);
ArrayTupleReference tuple = new ArrayTupleReference();
TupleUtils.createIntegerTuple(tupleBuilder, tuple, rnd.nextInt(), rnd.nextInt());
tuple.reset(tupleBuilder.getFieldEndOffsets(), tupleBuilder.getByteArray());
int keyFields[] = { 0, 1 };
int length = getTupleSize(tuple, keyFields);
long actuals[] = new long[NUM_LONG_VARS_FOR_128_BIT_HASH];
MurmurHash128Bit.hash3_x64_128(tuple, keyFields, 0L, actuals);
ByteBuffer buffer;
byte[] array = new byte[length];
fillArrayWithData(array, keyFields, tuple, length);
buffer = ByteBuffer.wrap(array);
long[] expecteds = hash3_x64_128(buffer, 0, length, 0L);
Assert.assertArrayEquals(expecteds, actuals);
}
@Test
public void murmurhashOneStringFieldTest() throws Exception {
if (LOGGER.isLoggable(Level.INFO)) {
LOGGER.info("TESTING MURMUR HASH ONE STRING FIELD");
}
int fieldCount = 2;
ISerializerDeserializer[] fieldSerdes = { UTF8StringSerializerDeserializer.INSTANCE };
ArrayTupleBuilder tupleBuilder = new ArrayTupleBuilder(fieldCount);
ArrayTupleReference tuple = new ArrayTupleReference();
String s = randomString(100, rnd);
TupleUtils.createTuple(tupleBuilder, tuple, fieldSerdes, s);
int keyFields[] = { 0 };
int length = getTupleSize(tuple, keyFields);
long actuals[] = new long[NUM_LONG_VARS_FOR_128_BIT_HASH];
MurmurHash128Bit.hash3_x64_128(tuple, keyFields, 0L, actuals);
byte[] array = new byte[length];
ByteBuffer buffer;
fillArrayWithData(array, keyFields, tuple, length);
buffer = ByteBuffer.wrap(array);
long[] expecteds = hash3_x64_128(buffer, 0, length, 0L);
Assert.assertArrayEquals(expecteds, actuals);
}
@Test
public void murmurhashThreeStringFieldsTest() throws Exception {
if (LOGGER.isLoggable(Level.INFO)) {
LOGGER.info("TESTING MURMUR HASH THREE STRING FIELDS");
}
int fieldCount = 3;
ISerializerDeserializer[] fieldSerdes = { UTF8StringSerializerDeserializer.INSTANCE,
UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE };
ArrayTupleBuilder tupleBuilder = new ArrayTupleBuilder(fieldCount);
ArrayTupleReference tuple = new ArrayTupleReference();
String s1 = randomString(40, rnd);
String s2 = randomString(60, rnd);
String s3 = randomString(20, rnd);
TupleUtils.createTuple(tupleBuilder, tuple, fieldSerdes, s1, s2, s3);
int keyFields[] = { 2, 0, 1 };
int length = getTupleSize(tuple, keyFields);
long actuals[] = new long[NUM_LONG_VARS_FOR_128_BIT_HASH];
MurmurHash128Bit.hash3_x64_128(tuple, keyFields, 0L, actuals);
byte[] array = new byte[length];
ByteBuffer buffer;
fillArrayWithData(array, keyFields, tuple, length);
buffer = ByteBuffer.wrap(array);
long[] expecteds = hash3_x64_128(buffer, 0, length, 0L);
Assert.assertArrayEquals(expecteds, actuals);
}
private void fillArrayWithData(byte[] array, int[] keyFields, ITupleReference tuple, int length) {
int currentFieldIndex = 0;
int bytePos = 0;
for (int i = 0; i < length; ++i) {
array[i] = tuple.getFieldData(DUMMY_FIELD)[tuple.getFieldStart(keyFields[currentFieldIndex]) + bytePos];
++bytePos;
if (tuple.getFieldLength(keyFields[currentFieldIndex]) == bytePos) {
++currentFieldIndex;
bytePos = 0;
}
}
}
private int getTupleSize(ITupleReference tuple, int[] keyFields) {
int length = 0;
for (int i = 0; i < keyFields.length; ++i) {
length += tuple.getFieldLength(keyFields[i]);
}
return length;
}
/**
* The hash3_x64_128 and getblock functions are borrowed from cassandra source code for testing purpose
**/
protected static long getblock(ByteBuffer key, int offset, int index) {
int i_8 = index << 3;
int blockOffset = offset + i_8;
return ((long) key.get(blockOffset + 0) & 0xff) + (((long) key.get(blockOffset + 1) & 0xff) << 8)
+ (((long) key.get(blockOffset + 2) & 0xff) << 16) + (((long) key.get(blockOffset + 3) & 0xff) << 24)
+ (((long) key.get(blockOffset + 4) & 0xff) << 32) + (((long) key.get(blockOffset + 5) & 0xff) << 40)
+ (((long) key.get(blockOffset + 6) & 0xff) << 48) + (((long) key.get(blockOffset + 7) & 0xff) << 56);
}
public static long[] hash3_x64_128(ByteBuffer key, int offset, int length, long seed) {
final int nblocks = length >> 4; // Process as 128-bit blocks.
long h1 = seed;
long h2 = seed;
long c1 = 0x87c37b91114253d5L;
long c2 = 0x4cf5ad432745937fL;
//----------
// body
for (int i = 0; i < nblocks; i++) {
long k1 = getblock(key, offset, i * 2 + 0);
long k2 = getblock(key, offset, i * 2 + 1);
k1 *= c1;
k1 = MurmurHash128Bit.rotl64(k1, 31);
k1 *= c2;
h1 ^= k1;
h1 = MurmurHash128Bit.rotl64(h1, 27);
h1 += h2;
h1 = h1 * 5 + 0x52dce729;
k2 *= c2;
k2 = MurmurHash128Bit.rotl64(k2, 33);
k2 *= c1;
h2 ^= k2;
h2 = MurmurHash128Bit.rotl64(h2, 31);
h2 += h1;
h2 = h2 * 5 + 0x38495ab5;
}
//----------
// tail
// Advance offset to the unprocessed tail of the data.
offset += nblocks * 16;
long k1 = 0;
long k2 = 0;
switch (length & 15) {
case 15:
k2 ^= ((long) key.get(offset + 14)) << 48;
case 14:
k2 ^= ((long) key.get(offset + 13)) << 40;
case 13:
k2 ^= ((long) key.get(offset + 12)) << 32;
case 12:
k2 ^= ((long) key.get(offset + 11)) << 24;
case 11:
k2 ^= ((long) key.get(offset + 10)) << 16;
case 10:
k2 ^= ((long) key.get(offset + 9)) << 8;
case 9:
k2 ^= ((long) key.get(offset + 8)) << 0;
k2 *= c2;
k2 = MurmurHash128Bit.rotl64(k2, 33);
k2 *= c1;
h2 ^= k2;
case 8:
k1 ^= ((long) key.get(offset + 7)) << 56;
case 7:
k1 ^= ((long) key.get(offset + 6)) << 48;
case 6:
k1 ^= ((long) key.get(offset + 5)) << 40;
case 5:
k1 ^= ((long) key.get(offset + 4)) << 32;
case 4:
k1 ^= ((long) key.get(offset + 3)) << 24;
case 3:
k1 ^= ((long) key.get(offset + 2)) << 16;
case 2:
k1 ^= ((long) key.get(offset + 1)) << 8;
case 1:
k1 ^= ((long) key.get(offset));
k1 *= c1;
k1 = MurmurHash128Bit.rotl64(k1, 31);
k1 *= c2;
h1 ^= k1;
};
//----------
// finalization
h1 ^= length;
h2 ^= length;
h1 += h2;
h2 += h1;
h1 = MurmurHash128Bit.fmix(h1);
h2 = MurmurHash128Bit.fmix(h2);
h1 += h2;
h2 += h1;
return (new long[] { h1, h2 });
}
}