src/main/java/org/apache/commons/codec/digest/MurmurHash3.java - commons-codec - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
  * The ASF licenses this file to You under the Apache License, Version 2.0
  * (the "License"); you may not use this file except in compliance with
  * the License.  You may obtain a copy of the License at
  *
  *      http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */

 package org.apache.commons.codec.digest;

 import org.apache.commons.codec.binary.StringUtils;

 /**
  * Implementation of the MurmurHash3 32-bit and 128-bit hash functions.
  *
  * <p>
  * MurmurHash is a non-cryptographic hash function suitable for general hash-based lookup. The name comes from two basic
  * operations, multiply (MU) and rotate (R), used in its inner loop. Unlike cryptographic hash functions, it is not
  * specifically designed to be difficult to reverse by an adversary, making it unsuitable for cryptographic purposes.
  * </p>
  *
  * <p>
  * This contains a Java port of the 32-bit hash function {@code MurmurHash3_x86_32} and the 128-bit hash function
  * {@code MurmurHash3_x64_128} from Austin Applyby's original {@code c++} code in SMHasher.
  * </p>
  *
  * <p>
  * This is public domain code with no copyrights. From home page of
  * <a href="https://github.com/aappleby/smhasher">SMHasher</a>:
  * </p>
  *
  * <blockquote> "All MurmurHash versions are public domain software, and the author disclaims all copyright to their
  * code." </blockquote>
  *
  * <p>
  * Original adaption from Apache Hive. That adaption contains a {@code hash64} method that is not part of the original
  * MurmurHash3 code. It is not recommended to use these methods. They will be removed in a future release. To obtain a
  * 64-bit hash use half of the bits from the {@code hash128x64} methods using the input data converted to bytes.
  * <p>
  *
  * @see <a href="https://en.wikipedia.org/wiki/MurmurHash">MurmurHash</a>
  * @see <a href="https://github.com/aappleby/smhasher/blob/master/src/MurmurHash3.cpp"> Original MurmurHash3 c++
  *      code</a>
  * @see <a href=
  *      "https://github.com/apache/hive/blob/master/storage-api/src/java/org/apache/hive/common/util/Murmur3.java">
  *      Apache Hive Murmer3</a>
  * @since 1.13
  */
 public final class MurmurHash3 {

     /**
      * A random number to use for a hash code.
      *
      * @deprecated This is not used internally and will be removed in a future release.
      */
     @Deprecated
     public static final long NULL_HASHCODE = 2862933555777941757L;

     /**
      * A default seed to use for the murmur hash algorithm.
      * Has the value {@code 104729}.
      */
     public static final int DEFAULT_SEED = 104729;

     /** TODO Replace on Java 8 with Long.BYTES. */
     static final int LONG_BYTES = Long.SIZE / Byte.SIZE;

     /** TODO Replace on Java 8 with Integer.BYTES. */
     static final int INTEGER_BYTES = Integer.SIZE / Byte.SIZE;

     /** TODO Replace on Java 8 with Short.BYTES. */
     static final int SHORT_BYTES = Short.SIZE / Byte.SIZE;

     // Constants for 32-bit variant
     private static final int C1_32 = 0xcc9e2d51;
     private static final int C2_32 = 0x1b873593;
     private static final int R1_32 = 15;
     private static final int R2_32 = 13;
     private static final int M_32 = 5;
     private static final int N_32 = 0xe6546b64;

     // Constants for 128-bit variant
     private static final long C1 = 0x87c37b91114253d5L;
     private static final long C2 = 0x4cf5ad432745937fL;
     private static final int R1 = 31;
     private static final int R2 = 27;
     private static final int R3 = 33;
     private static final int M = 5;
     private static final int N1 = 0x52dce729;
     private static final int N2 = 0x38495ab5;

     /** No instance methods. */
     private MurmurHash3() {
     }

     /**
      * Generates 32-bit hash from two longs with a default seed value.
      * This is a helper method that will produce the same result as:
      *
      * <pre>
      * int offset = 0;
      * int seed = 104729;
      * int hash = MurmurHash3.hash32x86(ByteBuffer.allocate(16)
      *                                            .putLong(data1)
      *                                            .putLong(data2)
      *                                            .array(), offset, 16, seed);
      * </pre>
      *
      * @param data1 The first long to hash
      * @param data2 The second long to hash
      * @return The 32-bit hash
      * @see #hash32x86(byte[], int, int, int)
      */
     public static int hash32(final long data1, final long data2) {
         return hash32(data1, data2, DEFAULT_SEED);
     }

     /**
      * Generates 32-bit hash from two longs with the given seed.
      * This is a helper method that will produce the same result as:
      *
      * <pre>
      * int offset = 0;
      * int hash = MurmurHash3.hash32x86(ByteBuffer.allocate(16)
      *                                            .putLong(data1)
      *                                            .putLong(data2)
      *                                            .array(), offset, 16, seed);
      * </pre>
      *
      * @param data1 The first long to hash
      * @param data2 The second long to hash
      * @param seed The initial seed value
      * @return The 32-bit hash
      * @see #hash32x86(byte[], int, int, int)
      */
     public static int hash32(final long data1, final long data2, final int seed) {
         int hash = seed;
         final long r0 = Long.reverseBytes(data1);
         final long r1 = Long.reverseBytes(data2);

         hash = mix32((int) r0, hash);
         hash = mix32((int) (r0 >>> 32), hash);
         hash = mix32((int) (r1), hash);
         hash = mix32((int) (r1 >>> 32), hash);

         hash ^= LONG_BYTES * 2;
         return fmix32(hash);
     }

     /**
      * Generates 32-bit hash from a long with a default seed value.
      * This is a helper method that will produce the same result as:
      *
      * <pre>
      * int offset = 0;
      * int seed = 104729;
      * int hash = MurmurHash3.hash32x86(ByteBuffer.allocate(8)
      *                                            .putLong(data)
      *                                            .array(), offset, 8, seed);
      * </pre>
      *
      * @param data The long to hash
      * @return The 32-bit hash
      * @see #hash32x86(byte[], int, int, int)
      */
     public static int hash32(final long data) {
         return hash32(data, DEFAULT_SEED);
     }

     /**
      * Generates 32-bit hash from a long with the given seed.
      * This is a helper method that will produce the same result as:
      *
      * <pre>
      * int offset = 0;
      * int hash = MurmurHash3.hash32x86(ByteBuffer.allocate(8)
      *                                            .putLong(data)
      *                                            .array(), offset, 8, seed);
      * </pre>
      *
      * @param data The long to hash
      * @param seed The initial seed value
      * @return The 32-bit hash
      * @see #hash32x86(byte[], int, int, int)
      */
     public static int hash32(final long data, final int seed) {
         int hash = seed;
         final long r0 = Long.reverseBytes(data);

         hash = mix32((int) r0, hash);
         hash = mix32((int) (r0 >>> 32), hash);

         hash ^= LONG_BYTES;
         return fmix32(hash);
     }

     /**
      * Generates 32-bit hash from the byte array with a default seed.
      * This is a helper method that will produce the same result as:
      *
      * <pre>
      * int offset = 0;
      * int seed = 104729;
      * int hash = MurmurHash3.hash32(data, offset, data.length, seed);
      * </pre>
      *
      * <p>This implementation contains a sign-extension bug in the finalization step of
      * any bytes left over from dividing the length by 4. This manifests if any of these
      * bytes are negative.<p>
      *
      * @param data The input byte array
      * @return The 32-bit hash
      * @see #hash32(byte[], int, int, int)
      * @deprecated Use {@link #hash32x86(byte[], int, int, int)}. This corrects the processing of trailing bytes.
      */
     @Deprecated
     public static int hash32(final byte[] data) {
         return hash32(data, 0, data.length, DEFAULT_SEED);
     }

     /**
      * Generates 32-bit hash from a string with a default seed.
      * <p>
      * Before 1.14 the string was converted using default encoding.
      * Since 1.14 the string is converted to bytes using UTF-8 encoding.
      * </p>
      * This is a helper method that will produce the same result as:
      *
      * <pre>
      * int offset = 0;
      * int seed = 104729;
      * byte[] bytes = data.getBytes(StandardCharsets.UTF_8);
      * int hash = MurmurHash3.hash32(bytes, offset, bytes.length, seed);
      * </pre>
      *
      * <p>This implementation contains a sign-extension bug in the finalization step of
      * any bytes left over from dividing the length by 4. This manifests if any of these
      * bytes are negative.<p>
      *
      * @param data The input string
      * @return The 32-bit hash
      * @see #hash32(byte[], int, int, int)
      * @deprecated Use {@link #hash32x86(byte[], int, int, int)} with the bytes returned from
      * {@link String#getBytes(java.nio.charset.Charset)}. This corrects the processing of trailing bytes.
      */
     @Deprecated
     public static int hash32(final String data) {
         final byte[] bytes = StringUtils.getBytesUtf8(data);
         return hash32(bytes, 0, bytes.length, DEFAULT_SEED);
     }

     /**
      * Generates 32-bit hash from the byte array with the given length and a default seed.
      * This is a helper method that will produce the same result as:
      *
      * <pre>
      * int offset = 0;
      * int seed = 104729;
      * int hash = MurmurHash3.hash32(data, offset, length, seed);
      * </pre>
      *
      * <p>This implementation contains a sign-extension bug in the finalization step of
      * any bytes left over from dividing the length by 4. This manifests if any of these
      * bytes are negative.<p>
      *
      * @param data The input byte array
      * @param length The length of array
      * @return The 32-bit hash
      * @see #hash32(byte[], int, int, int)
      * @deprecated Use {@link #hash32x86(byte[], int, int, int)}. This corrects the processing of trailing bytes.
      */
     @Deprecated
     public static int hash32(final byte[] data, final int length) {
         return hash32(data, length, DEFAULT_SEED);
     }

     /**
      * Generates 32-bit hash from the byte array with the given length and seed. This is a
      * helper method that will produce the same result as:
      *
      * <pre>
      * int offset = 0;
      * int hash = MurmurHash3.hash32(data, offset, length, seed);
      * </pre>
      *
      * <p>This implementation contains a sign-extension bug in the finalization step of
      * any bytes left over from dividing the length by 4. This manifests if any of these
      * bytes are negative.<p>
      *
      * @param data The input byte array
      * @param length The length of array
      * @param seed The initial seed value
      * @return The 32-bit hash
      * @see #hash32(byte[], int, int, int)
      * @deprecated Use {@link #hash32x86(byte[], int, int, int)}. This corrects the processing of trailing bytes.
      */
     @Deprecated
     public static int hash32(final byte[] data, final int length, final int seed) {
         return hash32(data, 0, length, seed);
     }

     /**
      * Generates 32-bit hash from the byte array with the given offset, length and seed.
      *
      * <p>This is an implementation of the 32-bit hash function {@code MurmurHash3_x86_32}
      * from Austin Applyby's original MurmurHash3 {@code c++} code in SMHasher.</p>
      *
      * <p>This implementation contains a sign-extension bug in the finalization step of
      * any bytes left over from dividing the length by 4. This manifests if any of these
      * bytes are negative.<p>
      *
      * @param data The input byte array
      * @param offset The offset of data
      * @param length The length of array
      * @param seed The initial seed value
      * @return The 32-bit hash
      * @deprecated Use {@link #hash32x86(byte[], int, int, int)}. This corrects the processing of trailing bytes.
      */
     @Deprecated
     public static int hash32(final byte[] data, final int offset, final int length, final int seed) {
         int hash = seed;
         final int nblocks = length >> 2;

         // body
         for (int i = 0; i < nblocks; i++) {
             final int index = offset + (i << 2);
             final int k = getLittleEndianInt(data, index);
             hash = mix32(k, hash);
         }

         // tail
         // ************
         // Note: This fails to apply masking using 0xff to the 3 remaining bytes.
         // ************
         final int index = offset + (nblocks << 2);
         int k1 = 0;
         switch (offset + length - index) {
         case 3:
             k1 ^= data[index + 2] << 16;
         case 2:
             k1 ^= data[index + 1] << 8;
         case 1:
             k1 ^= data[index];

             // mix functions
             k1 *= C1_32;
             k1 = Integer.rotateLeft(k1, R1_32);
             k1 *= C2_32;
             hash ^= k1;
         }

         hash ^= length;
         return fmix32(hash);
     }

     /**
      * Generates 32-bit hash from the byte array with a seed of zero.
      * This is a helper method that will produce the same result as:
      *
      * <pre>
      * int offset = 0;
      * int seed = 0;
      * int hash = MurmurHash3.hash32x86(data, offset, data.length, seed);
      * </pre>
      *
      * @param data The input byte array
      * @return The 32-bit hash
      * @see #hash32x86(byte[], int, int, int)
      * @since 1.14
      */
     public static int hash32x86(final byte[] data) {
         return hash32x86(data, 0, data.length, 0);
     }

     /**
      * Generates 32-bit hash from the byte array with the given offset, length and seed.
      *
      * <p>This is an implementation of the 32-bit hash function {@code MurmurHash3_x86_32}
      * from Austin Applyby's original MurmurHash3 {@code c++} code in SMHasher.</p>
      *
      * @param data The input byte array
      * @param offset The offset of data
      * @param length The length of array
      * @param seed The initial seed value
      * @return The 32-bit hash
      * @since 1.14
      */
     public static int hash32x86(final byte[] data, final int offset, final int length, final int seed) {
         int hash = seed;
         final int nblocks = length >> 2;

         // body
         for (int i = 0; i < nblocks; i++) {
             final int index = offset + (i << 2);
             final int k = getLittleEndianInt(data, index);
             hash = mix32(k, hash);
         }

         // tail
         final int index = offset + (nblocks << 2);
         int k1 = 0;
         switch (offset + length - index) {
         case 3:
             k1 ^= (data[index + 2] & 0xff) << 16;
         case 2:
             k1 ^= (data[index + 1] & 0xff) << 8;
         case 1:
             k1 ^= (data[index] & 0xff);

             // mix functions
             k1 *= C1_32;
             k1 = Integer.rotateLeft(k1, R1_32);
             k1 *= C2_32;
             hash ^= k1;
         }

         hash ^= length;
         return fmix32(hash);
     }

     /**
      * Generates 64-bit hash from a long with a default seed.
      *
      * <p><strong>This is not part of the original MurmurHash3 {@code c++} implementation.</strong></p>
      *
      * <p>This is a Murmur3-like 64-bit variant.
      * The method does not produce the same result as either half of the hash bytes from
      * {@linkplain #hash128x64(byte[])} with the same byte data from the {@code long}.
      * This method will be removed in a future release.</p>
      *
      * <p>Note: The sign extension bug in {@link #hash64(byte[], int, int, int)} does not effect
      * this result as the default seed is positive.</p>
      *
      * <p>This is a helper method that will produce the same result as:</p>
      *
      * <pre>
      * int offset = 0;
      * int seed = 104729;
      * long hash = MurmurHash3.hash64(ByteBuffer.allocate(8)
      *                                          .putLong(data)
      *                                          .array(), offset, 8, seed);
      * </pre>
      *
      * @param data The long to hash
      * @return The 64-bit hash
      * @see #hash64(byte[], int, int, int)
      * @deprecated Not part of the MurmurHash3 implementation.
      * Use half of the hash bytes from {@link #hash128x64(byte[])} with the bytes from the {@code long}.
      */
     @Deprecated
     public static long hash64(final long data) {
         long hash = DEFAULT_SEED;
         long k = Long.reverseBytes(data);
         final int length = LONG_BYTES;
         // mix functions
         k *= C1;
         k = Long.rotateLeft(k, R1);
         k *= C2;
         hash ^= k;
         hash = Long.rotateLeft(hash, R2) * M + N1;
         // finalization
         hash ^= length;
         hash = fmix64(hash);
         return hash;
     }

     /**
      * Generates 64-bit hash from an int with a default seed.
      *
      * <p><strong>This is not part of the original MurmurHash3 {@code c++} implementation.</strong></p>
      *
      * <p>This is a Murmur3-like 64-bit variant.
      * The method does not produce the same result as either half of the hash bytes from
      * {@linkplain #hash128x64(byte[])} with the same byte data from the {@code int}.
      * This method will be removed in a future release.</p>
      *
      * <p>Note: The sign extension bug in {@link #hash64(byte[], int, int, int)} does not effect
      * this result as the default seed is positive.</p>
      *
      * <p>This is a helper method that will produce the same result as:</p>
      *
      * <pre>
      * int offset = 0;
      * int seed = 104729;
      * long hash = MurmurHash3.hash64(ByteBuffer.allocate(4)
      *                                          .putInt(data)
      *                                          .array(), offset, 4, seed);
      * </pre>
      *
      * @param data The int to hash
      * @return The 64-bit hash
      * @see #hash64(byte[], int, int, int)
      * @deprecated Not part of the MurmurHash3 implementation.
      * Use half of the hash bytes from {@link #hash128x64(byte[])} with the bytes from the {@code int}.
      */
     @Deprecated
     public static long hash64(final int data) {
         long k1 = Integer.reverseBytes(data) & (-1L >>> 32);
         final int length = INTEGER_BYTES;
         long hash = DEFAULT_SEED;
         k1 *= C1;
         k1 = Long.rotateLeft(k1, R1);
         k1 *= C2;
         hash ^= k1;
         // finalization
         hash ^= length;
         hash = fmix64(hash);
         return hash;
     }

     /**
      * Generates 64-bit hash from a short with a default seed.
      *
      * <p><strong>This is not part of the original MurmurHash3 {@code c++} implementation.</strong></p>
      *
      * <p>This is a Murmur3-like 64-bit variant.
      * The method does not produce the same result as either half of the hash bytes from
      * {@linkplain #hash128x64(byte[])} with the same byte data from the {@code short}.
      * This method will be removed in a future release.</p>
      *
      * <p>Note: The sign extension bug in {@link #hash64(byte[], int, int, int)} does not effect
      * this result as the default seed is positive.</p>
      *
      * <p>This is a helper method that will produce the same result as:</p>
      *
      * <pre>
      * int offset = 0;
      * int seed = 104729;
      * long hash = MurmurHash3.hash64(ByteBuffer.allocate(2)
      *                                          .putShort(data)
      *                                          .array(), offset, 2, seed);
      * </pre>
      *
      * @param data The short to hash
      * @return The 64-bit hash
      * @see #hash64(byte[], int, int, int)
      * @deprecated Not part of the MurmurHash3 implementation.
      * Use half of the hash bytes from {@link #hash128x64(byte[])} with the bytes from the {@code short}.
      */
     @Deprecated
     public static long hash64(final short data) {
         long hash = DEFAULT_SEED;
         long k1 = 0;
         k1 ^= ((long) data & 0xff) << 8;
         k1 ^= ((long) ((data & 0xFF00) >> 8) & 0xff);
         k1 *= C1;
         k1 = Long.rotateLeft(k1, R1);
         k1 *= C2;
         hash ^= k1;

         // finalization
         hash ^= SHORT_BYTES;
         hash = fmix64(hash);
         return hash;
     }

     /**
      * Generates 64-bit hash from a byte array with a default seed.
      *
      * <p><strong>This is not part of the original MurmurHash3 {@code c++} implementation.</strong></p>
      *
      * <p>This is a Murmur3-like 64-bit variant.
      * The method does not produce the same result as either half of the hash bytes from
      * {@linkplain #hash128x64(byte[])} with the same byte data.
      * This method will be removed in a future release.</p>
      *
      * <p>Note: The sign extension bug in {@link #hash64(byte[], int, int, int)} does not effect
      * this result as the default seed is positive.</p>
      *
      * <p>This is a helper method that will produce the same result as:</p>
      *
      * <pre>
      * int offset = 0;
      * int seed = 104729;
      * long hash = MurmurHash3.hash64(data, offset, data.length, seed);
      * </pre>
      *
      * @param data The input byte array
      * @return The 64-bit hash
      * @see #hash64(byte[], int, int, int)
      * @deprecated Not part of the MurmurHash3 implementation.
      * Use half of the hash bytes from {@link #hash128x64(byte[])}.
      */
     @Deprecated
     public static long hash64(final byte[] data) {
         return hash64(data, 0, data.length, DEFAULT_SEED);
     }

     /**
      * Generates 64-bit hash from a byte array with the given offset and length and a default seed.
      *
      * <p><strong>This is not part of the original MurmurHash3 {@code c++} implementation.</strong></p>
      *
      * <p>This is a Murmur3-like 64-bit variant.
      * The method does not produce the same result as either half of the hash bytes from
      * {@linkplain #hash128x64(byte[])} with the same byte data.
      * This method will be removed in a future release.</p>
      *
      * <p>Note: The sign extension bug in {@link #hash64(byte[], int, int, int)} does not effect
      * this result as the default seed is positive.</p>
      *
      * <p>This is a helper method that will produce the same result as:</p>
      *
      * <pre>
      * int seed = 104729;
      * long hash = MurmurHash3.hash64(data, offset, length, seed);
      * </pre>
      *
      * @param data The input byte array
      * @param offset The offset of data
      * @param length The length of array
      * @return The 64-bit hash
      * @see #hash64(byte[], int, int, int)
      * @deprecated Not part of the MurmurHash3 implementation.
      * Use half of the hash bytes from {@link #hash128x64(byte[], int, int, int)}.
      */
     @Deprecated
     public static long hash64(final byte[] data, final int offset, final int length) {
         return hash64(data, offset, length, DEFAULT_SEED);
     }

     /**
      * Generates 64-bit hash from a byte array with the given offset, length and seed.
      *
      * <p><strong>This is not part of the original MurmurHash3 {@code c++} implementation.</strong></p>
      *
      * <p>This is a Murmur3-like 64-bit variant.
      * This method will be removed in a future release.</p>
      *
      * <p>This implementation contains a sign-extension bug in the seed initialization.
      * This manifests if the seed is negative.</p>
      *
      * <p>This algorithm processes 8 bytes chunks of data in a manner similar to the 16 byte chunks
      * of data processed in the MurmurHash3 {@code MurmurHash3_x64_128} method. However the hash
      * is not mixed with a hash chunk from the next 8 bytes of data. The method will not return
      * the same value as the first or second 64-bits of the function
      * {@link #hash128(byte[], int, int, int)}.</p>
      *
      * <p>Use of this method is not advised. Use the first long returned from
      * {@link #hash128x64(byte[], int, int, int)}.<p>
      *
      * @param data The input byte array
      * @param offset The offset of data
      * @param length The length of array
      * @param seed The initial seed value
      * @return The 64-bit hash
      * @deprecated Not part of the MurmurHash3 implementation.
      * Use half of the hash bytes from {@link #hash128x64(byte[], int, int, int)}.
      */
     @Deprecated
     public static long hash64(final byte[] data, final int offset, final int length, final int seed) {
         // ************
         // Note: This fails to apply masking using 0xffffffffL to the seed.
         // ************
         long hash = seed;
         final int nblocks = length >> 3;

         // body
         for (int i = 0; i < nblocks; i++) {
             final int index = offset + (i << 3);
             long k = getLittleEndianLong(data, index);

             // mix functions
             k *= C1;
             k = Long.rotateLeft(k, R1);
             k *= C2;
             hash ^= k;
             hash = Long.rotateLeft(hash, R2) * M + N1;
         }

         // tail
         long k1 = 0;
         final int index = offset + (nblocks << 3);
         switch (offset + length - index) {
         case 7:
             k1 ^= ((long) data[index + 6] & 0xff) << 48;
         case 6:
             k1 ^= ((long) data[index + 5] & 0xff) << 40;
         case 5:
             k1 ^= ((long) data[index + 4] & 0xff) << 32;
         case 4:
             k1 ^= ((long) data[index + 3] & 0xff) << 24;
         case 3:
             k1 ^= ((long) data[index + 2] & 0xff) << 16;
         case 2:
             k1 ^= ((long) data[index + 1] & 0xff) << 8;
         case 1:
             k1 ^= ((long) data[index] & 0xff);
             k1 *= C1;
             k1 = Long.rotateLeft(k1, R1);
             k1 *= C2;
             hash ^= k1;
         }

         // finalization
         hash ^= length;
         hash = fmix64(hash);

         return hash;
     }

     /**
      * Generates 128-bit hash from the byte array with a default seed.
      * This is a helper method that will produce the same result as:
      *
      * <pre>
      * int offset = 0;
      * int seed = 104729;
      * int hash = MurmurHash3.hash128(data, offset, data.length, seed);
      * </pre>
      *
      * <p>Note: The sign extension bug in {@link #hash128(byte[], int, int, int)} does not effect
      * this result as the default seed is positive.</p>
      *
      * @param data The input byte array
      * @return The 128-bit hash (2 longs)
      * @see #hash128(byte[], int, int, int)
      */
     public static long[] hash128(final byte[] data) {
         return hash128(data, 0, data.length, DEFAULT_SEED);
     }

     /**
      * Generates 128-bit hash from the byte array with a seed of zero.
      * This is a helper method that will produce the same result as:
      *
      * <pre>
      * int offset = 0;
      * int seed = 0;
      * int hash = MurmurHash3.hash128x64(data, offset, data.length, seed);
      * </pre>
      *
      * @param data The input byte array
      * @return The 128-bit hash (2 longs)
      * @see #hash128x64(byte[], int, int, int)
      * @since 1.14
      */
     public static long[] hash128x64(final byte[] data) {
         return hash128x64(data, 0, data.length, 0);
     }

     /**
      * Generates 128-bit hash from a string with a default seed.
      * <p>
      * Before 1.14 the string was converted using default encoding.
      * Since 1.14 the string is converted to bytes using UTF-8 encoding.
      * </p>
      * This is a helper method that will produce the same result as:
      *
      * <pre>
      * int offset = 0;
      * int seed = 104729;
      * byte[] bytes = data.getBytes(StandardCharsets.UTF_8);
      * int hash = MurmurHash3.hash128(bytes, offset, bytes.length, seed);
      * </pre>
      *
      * <p>Note: The sign extension bug in {@link #hash128(byte[], int, int, int)} does not effect
      * this result as the default seed is positive.</p>
      *
      * @param data The input String
      * @return The 128-bit hash (2 longs)
      * @see #hash128(byte[], int, int, int)
      * @deprecated Use {@link #hash128x64(byte[])} using the bytes returned from
      * {@link String#getBytes(java.nio.charset.Charset)}.
      */
     @Deprecated
     public static long[] hash128(final String data) {
         final byte[] bytes = StringUtils.getBytesUtf8(data);
         return hash128(bytes, 0, bytes.length, DEFAULT_SEED);
     }

     /**
      * Generates 128-bit hash from the byte array with the given offset, length and seed.
      *
      * <p>This is an implementation of the 128-bit hash function {@code MurmurHash3_x64_128}
      * from Austin Applyby's original MurmurHash3 {@code c++} code in SMHasher.</p>
      *
      * <p>This implementation contains a sign-extension bug in the seed initialization.
      * This manifests if the seed is negative.<p>
      *
      * @param data The input byte array
      * @param offset The first element of array
      * @param length The length of array
      * @param seed The initial seed value
      * @return The 128-bit hash (2 longs)
      * @deprecated Use {@link #hash128x64(byte[], int, int, int)}. This corrects the seed initialization.
      */
     @Deprecated
     public static long[] hash128(final byte[] data, final int offset, final int length, final int seed) {
         // ************
         // Note: This deliberately fails to apply masking using 0xffffffffL to the seed
         // to maintain behavioral compatibility with the original version.
         // The implicit conversion to a long will extend a negative sign
         // bit through the upper 32-bits of the long seed. These should be zero.
         // ************
         return hash128x64Internal(data, offset, length, seed);
     }

     /**
      * Generates 128-bit hash from the byte array with the given offset, length and seed.
      *
      * <p>This is an implementation of the 128-bit hash function {@code MurmurHash3_x64_128}
      * from Austin Applyby's original MurmurHash3 {@code c++} code in SMHasher.</p>
      *
      * @param data The input byte array
      * @param offset The first element of array
      * @param length The length of array
      * @param seed The initial seed value
      * @return The 128-bit hash (2 longs)
      * @since 1.14
      */
     public static long[] hash128x64(final byte[] data, final int offset, final int length, final int seed) {
         // Use an unsigned 32-bit integer as the seed
         return hash128x64Internal(data, offset, length, seed & 0xffffffffL);
     }

     /**
      * Generates 128-bit hash from the byte array with the given offset, length and seed.
      *
      * <p>This is an implementation of the 128-bit hash function {@code MurmurHash3_x64_128}
      * from Austin Applyby's original MurmurHash3 {@code c++} code in SMHasher.</p>
      *
      * @param data The input byte array
      * @param offset The first element of array
      * @param length The length of array
      * @param seed The initial seed value
      * @return The 128-bit hash (2 longs)
      */
     private static long[] hash128x64Internal(final byte[] data, final int offset, final int length, final long seed) {
         long h1 = seed;
         long h2 = seed;
         final int nblocks = length >> 4;

         // body
         for (int i = 0; i < nblocks; i++) {
             final int index = offset + (i << 4);
             long k1 = getLittleEndianLong(data, index);
             long k2 = getLittleEndianLong(data, index + 8);

             // mix functions for k1
             k1 *= C1;
             k1 = Long.rotateLeft(k1, R1);
             k1 *= C2;
             h1 ^= k1;
             h1 = Long.rotateLeft(h1, R2);
             h1 += h2;
             h1 = h1 * M + N1;

             // mix functions for k2
             k2 *= C2;
             k2 = Long.rotateLeft(k2, R3);
             k2 *= C1;
             h2 ^= k2;
             h2 = Long.rotateLeft(h2, R1);
             h2 += h1;
             h2 = h2 * M + N2;
         }

         // tail
         long k1 = 0;
         long k2 = 0;
         final int index = offset + (nblocks << 4);
         switch (offset + length - index) {
         case 15:
             k2 ^= ((long) data[index + 14] & 0xff) << 48;
         case 14:
             k2 ^= ((long) data[index + 13] & 0xff) << 40;
         case 13:
             k2 ^= ((long) data[index + 12] & 0xff) << 32;
         case 12:
             k2 ^= ((long) data[index + 11] & 0xff) << 24;
         case 11:
             k2 ^= ((long) data[index + 10] & 0xff) << 16;
         case 10:
             k2 ^= ((long) data[index + 9] & 0xff) << 8;
         case 9:
             k2 ^= data[index + 8] & 0xff;
             k2 *= C2;
             k2 = Long.rotateLeft(k2, R3);
             k2 *= C1;
             h2 ^= k2;

         case 8:
             k1 ^= ((long) data[index + 7] & 0xff) << 56;
         case 7:
             k1 ^= ((long) data[index + 6] & 0xff) << 48;
         case 6:
             k1 ^= ((long) data[index + 5] & 0xff) << 40;
         case 5:
             k1 ^= ((long) data[index + 4] & 0xff) << 32;
         case 4:
             k1 ^= ((long) data[index + 3] & 0xff) << 24;
         case 3:
             k1 ^= ((long) data[index + 2] & 0xff) << 16;
         case 2:
             k1 ^= ((long) data[index + 1] & 0xff) << 8;
         case 1:
             k1 ^= data[index] & 0xff;
             k1 *= C1;
             k1 = Long.rotateLeft(k1, R1);
             k1 *= C2;
             h1 ^= k1;
         }

         // finalization
         h1 ^= length;
         h2 ^= length;

         h1 += h2;
         h2 += h1;

         h1 = fmix64(h1);
         h2 = fmix64(h2);

         h1 += h2;
         h2 += h1;

         return new long[] { h1, h2 };
     }

     /**
      * Gets the little-endian long from 8 bytes starting at the specified index.
      *
      * @param data The data
      * @param index The index
      * @return The little-endian long
      */
     private static long getLittleEndianLong(final byte[] data, final int index) {
         return (((long) data[index    ] & 0xff)      ) |
                (((long) data[index + 1] & 0xff) <<  8) |
                (((long) data[index + 2] & 0xff) << 16) |
                (((long) data[index + 3] & 0xff) << 24) |
                (((long) data[index + 4] & 0xff) << 32) |
                (((long) data[index + 5] & 0xff) << 40) |
                (((long) data[index + 6] & 0xff) << 48) |
                (((long) data[index + 7] & 0xff) << 56);
     }

     /**
      * Gets the little-endian int from 4 bytes starting at the specified index.
      *
      * @param data The data
      * @param index The index
      * @return The little-endian int
      */
     private static int getLittleEndianInt(final byte[] data, final int index) {
         return ((data[index    ] & 0xff)      ) |
                ((data[index + 1] & 0xff) <<  8) |
                ((data[index + 2] & 0xff) << 16) |
                ((data[index + 3] & 0xff) << 24);
     }

     /**
      * Performs the intermediate mix step of the 32-bit hash function {@code MurmurHash3_x86_32}.
      *
      * @param k The data to add to the hash
      * @param hash The current hash
      * @return The new hash
      */
     private static int mix32(int k, int hash) {
         k *= C1_32;
         k = Integer.rotateLeft(k, R1_32);
         k *= C2_32;
         hash ^= k;
         return Integer.rotateLeft(hash, R2_32) * M_32 + N_32;
     }

     /**
      * Performs the final avalanche mix step of the 32-bit hash function {@code MurmurHash3_x86_32}.
      *
      * @param hash The current hash
      * @return The final hash
      */
     private static int fmix32(int hash) {
         hash ^= (hash >>> 16);
         hash *= 0x85ebca6b;
         hash ^= (hash >>> 13);
         hash *= 0xc2b2ae35;
         hash ^= (hash >>> 16);
         return hash;
     }

     /**
      * Performs the final avalanche mix step of the 64-bit hash function {@code MurmurHash3_x64_128}.
      *
      * @param hash The current hash
      * @return The final hash
      */
     private static long fmix64(long hash) {
         hash ^= (hash >>> 33);
         hash *= 0xff51afd7ed558ccdL;
         hash ^= (hash >>> 33);
         hash *= 0xc4ceb9fe1a85ec53L;
         hash ^= (hash >>> 33);
         return hash;
     }

     /**
      * Generates 32-bit hash from input bytes. Bytes can be added incrementally and the new
      * hash computed.
      *
      * <p>This is an implementation of the 32-bit hash function {@code MurmurHash3_x86_32}
      * from Austin Applyby's original MurmurHash3 {@code c++} code in SMHasher.</p>
      *
      * @since 1.14
      */
     public static class IncrementalHash32x86 {

         /** The size of byte blocks that are processed together. */
         private static final int BLOCK_SIZE = 4;

         /** Up to 3 unprocessed bytes from input data. */
         private final byte[] unprocessed = new byte[3];

         /** The number of unprocessed bytes in the tail data. */
         private int unprocessedLength;

         /** The total number of input bytes added since the start. */
         private int totalLen;

         /**
          * The current running hash.
          * This must be finalised to generate the 32-bit hash value.
          */
         private int hash;

         /**
          * Starts a new incremental hash.
          *
          * @param seed The initial seed value
          */
         public final void start(final int seed) {
             // Reset
             unprocessedLength = totalLen = 0;
             this.hash = seed;
         }

         /**
          * Adds the byte array to the current incremental hash.
          *
          * @param data The input byte array
          * @param offset The offset of data
          * @param length The length of array
          */
         public final void add(final byte[] data, final int offset, final int length) {
             if (length <= 0) {
                 // Nothing to add
                 return;
             }
             totalLen += length;

             // Process the bytes in blocks of 4.
             // New bytes must be added to any current unprocessed bytes,
             // then processed in blocks of 4 and the remaining bytes saved:
             //
             //    |--|---------------------------|--|
             // unprocessed
             //                main block
             //                                remaining

             // Check if the unprocessed bytes and new bytes can fill a block of 4.
             // Make this overflow safe in the event that length is Integer.MAX_VALUE.
             // Equivalent to: (unprocessedLength + length < BLOCK_SIZE)
             if (unprocessedLength + length - BLOCK_SIZE < 0) {
                 // Not enough so add to the unprocessed bytes
                 System.arraycopy(data, offset, unprocessed, unprocessedLength, length);
                 unprocessedLength += length;
                 return;
             }

             // Combine unprocessed bytes with new bytes.
             final int newOffset;
             final int newLength;
             if (unprocessedLength > 0) {
                 int k = -1;
                 switch (unprocessedLength) {
                 case 1:
                     k = orBytes(unprocessed[0], data[offset], data[offset + 1], data[offset + 2]);
                     break;
                 case 2:
                     k = orBytes(unprocessed[0], unprocessed[1], data[offset], data[offset + 1]);
                     break;
                 case 3:
                     k = orBytes(unprocessed[0], unprocessed[1], unprocessed[2], data[offset]);
                     break;
                 default:
                     throw new IllegalStateException("Unprocessed length should be 1, 2, or 3: " + unprocessedLength);
                 }
                 hash = mix32(k, hash);
                 // Update the offset and length
                 final int consumed = BLOCK_SIZE - unprocessedLength;
                 newOffset = offset + consumed;
                 newLength = length - consumed;
             } else {
                 newOffset = offset;
                 newLength = length;
             }

             // Main processing of blocks of 4 bytes
             final int nblocks = newLength >> 2;

             for (int i = 0; i < nblocks; i++) {
                 final int index = newOffset + (i << 2);
                 final int k = getLittleEndianInt(data, index);
                 hash = mix32(k, hash);
             }

             // Save left-over unprocessed bytes
             final int consumed = (nblocks << 2);
             unprocessedLength = newLength - consumed;
             if (unprocessedLength != 0) {
                 System.arraycopy(data, newOffset + consumed, unprocessed, 0, unprocessedLength);
             }
         }

         /**
          * Generate the 32-bit hash value. Repeat calls to this method with no additional data
          * will generate the same hash value.
          *
          * @return The 32-bit hash
          */
         public final int end() {
             // Allow calling end() again after adding no data to return the same result.
             return finalise(hash, unprocessedLength, unprocessed, totalLen);
         }

         /**
          * Finalize the running hash to the output 32-bit hash by processing remaining bytes
          * and performing final mixing.
          *
          * @param hash The running hash
          * @param unprocessedLength The number of unprocessed bytes in the tail data.
          * @param unprocessed Up to 3 unprocessed bytes from input data.
          * @param totalLen The total number of input bytes added since the start.
          * @return The 32-bit hash
          */
         int finalise(final int hash, final int unprocessedLength, final byte[] unprocessed, final int totalLen) {
             int result = hash;
             int k1 = 0;
             switch (unprocessedLength) {
             case 3:
                 k1 ^= (unprocessed[2] & 0xff) << 16;
             case 2:
                 k1 ^= (unprocessed[1] & 0xff) << 8;
             case 1:
                 k1 ^= (unprocessed[0] & 0xff);

                 // mix functions
                 k1 *= C1_32;
                 k1 = Integer.rotateLeft(k1, R1_32);
                 k1 *= C2_32;
                 result ^= k1;
             }

             // finalization
             result ^= totalLen;
             return fmix32(result);
         }

         /**
          * Combines the bytes using an Or operation ({@code | } in a little-endian representation
          * of a 32-bit integer; byte 1 will be the least significant byte, byte 4 the most
          * significant.
          *
          * @param b1 The first byte
          * @param b2 The second byte
          * @param b3 The third byte
          * @param b4 The fourth byte
          * @return The 32-bit integer
          */
         private static int orBytes(final byte b1, final byte b2, final byte b3, final byte b4) {
             return (b1 & 0xff) | ((b2 & 0xff) << 8) | ((b3 & 0xff) << 16) | ((b4 & 0xff) << 24);
         }
     }

     /**
      * Generates 32-bit hash from input bytes. Bytes can be added incrementally and the new
      * hash computed.
      *
      * <p>This is an implementation of the 32-bit hash function {@code MurmurHash3_x86_32}
      * from Austin Applyby's original MurmurHash3 {@code c++} code in SMHasher.</p>
      *
      * <p>This implementation contains a sign-extension bug in the finalization step of
      * any bytes left over from dividing the length by 4. This manifests if any of these
      * bytes are negative.<p>
      *
      * @deprecated Use IncrementalHash32x86. This corrects the processing of trailing bytes.
      */
     @Deprecated
     public static class IncrementalHash32 extends IncrementalHash32x86 {

         /**
          * {@inheritDoc}
          *
          * <p>This implementation contains a sign-extension bug in the finalization step of
          * any bytes left over from dividing the length by 4. This manifests if any of these
          * bytes are negative.<p>
          *
          * @deprecated Use IncrementalHash32x86. This corrects the processing of trailing bytes.
          */
         @Override
         @Deprecated
         int finalise(final int hash, final int unprocessedLength, final byte[] unprocessed, final int totalLen) {
             int result = hash;
             // ************
             // Note: This fails to apply masking using 0xff to the 3 remaining bytes.
             // ************
             int k1 = 0;
             switch (unprocessedLength) {
             case 3:
                 k1 ^= unprocessed[2] << 16;
             case 2:
                 k1 ^= unprocessed[1] << 8;
             case 1:
                 k1 ^= unprocessed[0];

                 // mix functions
                 k1 *= C1_32;
                 k1 = Integer.rotateLeft(k1, R1_32);
                 k1 *= C2_32;
                 result ^= k1;
             }

             // finalization
             result ^= totalLen;
             return fmix32(result);
         }
     }
 }