hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/FastByteComparisons.java - hadoop - Git at Google

 /**
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
  * regarding copyright ownership.  The ASF licenses this file
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
 package org.apache.hadoop.io;

 import java.lang.reflect.Field;
 import java.nio.ByteOrder;
 import java.security.AccessController;
 import java.security.PrivilegedAction;

 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import sun.misc.Unsafe;

 import com.google.common.primitives.Longs;
 import com.google.common.primitives.UnsignedBytes;

 /**
  * Utility code to do optimized byte-array comparison.
  * This is borrowed and slightly modified from Guava's {@link UnsignedBytes}
  * class to be able to compare arrays that start at non-zero offsets.
  */
 abstract class FastByteComparisons {
   static final Logger LOG = LoggerFactory.getLogger(FastByteComparisons.class);

   /**
    * Lexicographically compare two byte arrays.
    */
   public static int compareTo(byte[] b1, int s1, int l1, byte[] b2, int s2,
       int l2) {
     return LexicographicalComparerHolder.BEST_COMPARER.compareTo(
         b1, s1, l1, b2, s2, l2);
   }


   private interface Comparer<T> {
     abstract public int compareTo(T buffer1, int offset1, int length1,
         T buffer2, int offset2, int length2);
   }

   private static Comparer<byte[]> lexicographicalComparerJavaImpl() {
     return LexicographicalComparerHolder.PureJavaComparer.INSTANCE;
   }


   /**
    * Provides a lexicographical comparer implementation; either a Java
    * implementation or a faster implementation based on {@link Unsafe}.
    *
    * <p>Uses reflection to gracefully fall back to the Java implementation if
    * {@code Unsafe} isn't available.
    */
   private static class LexicographicalComparerHolder {
     static final String UNSAFE_COMPARER_NAME =
         LexicographicalComparerHolder.class.getName() + "$UnsafeComparer";

     static final Comparer<byte[]> BEST_COMPARER = getBestComparer();
     /**
      * Returns the Unsafe-using Comparer, or falls back to the pure-Java
      * implementation if unable to do so.
      */
     static Comparer<byte[]> getBestComparer() {
       if (System.getProperty("os.arch").equals("sparc")) {
         if (LOG.isTraceEnabled()) {
           LOG.trace("Lexicographical comparer selected for "
               + "byte aligned system architecture");
         }
         return lexicographicalComparerJavaImpl();
       }
       try {
         Class<?> theClass = Class.forName(UNSAFE_COMPARER_NAME);

         // yes, UnsafeComparer does implement Comparer<byte[]>
         @SuppressWarnings("unchecked")
         Comparer<byte[]> comparer =
           (Comparer<byte[]>) theClass.getEnumConstants()[0];
         if (LOG.isTraceEnabled()) {
           LOG.trace("Unsafe comparer selected for "
               + "byte unaligned system architecture");
         }
         return comparer;
       } catch (Throwable t) { // ensure we really catch *everything*
         if (LOG.isTraceEnabled()) {
           LOG.trace(t.getMessage());
           LOG.trace("Lexicographical comparer selected");
         }
         return lexicographicalComparerJavaImpl();
       }
     }

     private enum PureJavaComparer implements Comparer<byte[]> {
       INSTANCE;

       @Override
       public int compareTo(byte[] buffer1, int offset1, int length1,
           byte[] buffer2, int offset2, int length2) {
         // Short circuit equal case
         if (buffer1 == buffer2 &&
             offset1 == offset2 &&
             length1 == length2) {
           return 0;
         }
         // Bring WritableComparator code local
         int end1 = offset1 + length1;
         int end2 = offset2 + length2;
         for (int i = offset1, j = offset2; i < end1 && j < end2; i++, j++) {
           int a = (buffer1[i] & 0xff);
           int b = (buffer2[j] & 0xff);
           if (a != b) {
             return a - b;
           }
         }
         return length1 - length2;
       }
     }

     @SuppressWarnings("unused") // used via reflection
     private enum UnsafeComparer implements Comparer<byte[]> {
       INSTANCE;

       static final Unsafe theUnsafe;

       /** The offset to the first element in a byte array. */
       static final int BYTE_ARRAY_BASE_OFFSET;

       static {
         theUnsafe = (Unsafe) AccessController.doPrivileged(
             new PrivilegedAction<Object>() {
               @Override
               public Object run() {
                 try {
                   Field f = Unsafe.class.getDeclaredField("theUnsafe");
                   f.setAccessible(true);
                   return f.get(null);
                 } catch (NoSuchFieldException e) {
                   // It doesn't matter what we throw;
                   // it's swallowed in getBestComparer().
                   throw new Error();
                 } catch (IllegalAccessException e) {
                   throw new Error();
                 }
               }
             });

         BYTE_ARRAY_BASE_OFFSET = theUnsafe.arrayBaseOffset(byte[].class);

         // sanity check - this should never fail
         if (theUnsafe.arrayIndexScale(byte[].class) != 1) {
           throw new AssertionError();
         }
       }

       static final boolean littleEndian =
         ByteOrder.nativeOrder().equals(ByteOrder.LITTLE_ENDIAN);

       /**
        * Returns true if x1 is less than x2, when both values are treated as
        * unsigned.
        */
       static boolean lessThanUnsigned(long x1, long x2) {
         return (x1 + Long.MIN_VALUE) < (x2 + Long.MIN_VALUE);
       }

       /**
        * Lexicographically compare two arrays.
        *
        * @param buffer1 left operand
        * @param buffer2 right operand
        * @param offset1 Where to start comparing in the left buffer
        * @param offset2 Where to start comparing in the right buffer
        * @param length1 How much to compare from the left buffer
        * @param length2 How much to compare from the right buffer
        * @return 0 if equal, < 0 if left is less than right, etc.
        */
       @Override
       public int compareTo(byte[] buffer1, int offset1, int length1,
           byte[] buffer2, int offset2, int length2) {
         // Short circuit equal case
         if (buffer1 == buffer2 &&
             offset1 == offset2 &&
             length1 == length2) {
           return 0;
         }
         int minLength = Math.min(length1, length2);
         int minWords = minLength / Longs.BYTES;
         int offset1Adj = offset1 + BYTE_ARRAY_BASE_OFFSET;
         int offset2Adj = offset2 + BYTE_ARRAY_BASE_OFFSET;

         /*
          * Compare 8 bytes at a time. Benchmarking shows comparing 8 bytes at a
          * time is no slower than comparing 4 bytes at a time even on 32-bit.
          * On the other hand, it is substantially faster on 64-bit.
          */
         for (int i = 0; i < minWords * Longs.BYTES; i += Longs.BYTES) {
           long lw = theUnsafe.getLong(buffer1, offset1Adj + (long) i);
           long rw = theUnsafe.getLong(buffer2, offset2Adj + (long) i);
           long diff = lw ^ rw;

           if (diff != 0) {
             if (!littleEndian) {
               return lessThanUnsigned(lw, rw) ? -1 : 1;
             }

             // Use binary search
             int n = 0;
             int y;
             int x = (int) diff;
             if (x == 0) {
               x = (int) (diff >>> 32);
               n = 32;
             }

             y = x << 16;
             if (y == 0) {
               n += 16;
             } else {
               x = y;
             }

             y = x << 8;
             if (y == 0) {
               n += 8;
             }
             return (int) (((lw >>> n) & 0xFFL) - ((rw >>> n) & 0xFFL));
           }
         }

         // The epilogue to cover the last (minLength % 8) elements.
         for (int i = minWords * Longs.BYTES; i < minLength; i++) {
           int result = UnsignedBytes.compare(
               buffer1[offset1 + i],
               buffer2[offset2 + i]);
           if (result != 0) {
             return result;
           }
         }
         return length1 - length2;
       }
     }
   }
 }
	/**
	* Licensed to the Apache Software Foundation (ASF) under one
	* or more contributor license agreements. See the NOTICE file
	* distributed with this work for additional information
	* regarding copyright ownership. The ASF licenses this file
	* to you under the Apache License, Version 2.0 (the
	* "License"); you may not use this file except in compliance
	* with the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/
	package org.apache.hadoop.io;

	import java.lang.reflect.Field;
	import java.nio.ByteOrder;
	import java.security.AccessController;
	import java.security.PrivilegedAction;

	import org.slf4j.Logger;
	import org.slf4j.LoggerFactory;
	import sun.misc.Unsafe;

	import com.google.common.primitives.Longs;
	import com.google.common.primitives.UnsignedBytes;

	/**
	* Utility code to do optimized byte-array comparison.
	* This is borrowed and slightly modified from Guava's {@link UnsignedBytes}
	* class to be able to compare arrays that start at non-zero offsets.
	*/
	abstract class FastByteComparisons {
	static final Logger LOG = LoggerFactory.getLogger(FastByteComparisons.class);

	/**
	* Lexicographically compare two byte arrays.
	*/
	public static int compareTo(byte[] b1, int s1, int l1, byte[] b2, int s2,
	int l2) {
	return LexicographicalComparerHolder.BEST_COMPARER.compareTo(
	b1, s1, l1, b2, s2, l2);
	}


	private interface Comparer<T> {
	abstract public int compareTo(T buffer1, int offset1, int length1,
	T buffer2, int offset2, int length2);
	}

	private static Comparer<byte[]> lexicographicalComparerJavaImpl() {
	return LexicographicalComparerHolder.PureJavaComparer.INSTANCE;
	}


	/**
	* Provides a lexicographical comparer implementation; either a Java
	* implementation or a faster implementation based on {@link Unsafe}.
	*
	* <p>Uses reflection to gracefully fall back to the Java implementation if
	* {@code Unsafe} isn't available.
	*/
	private static class LexicographicalComparerHolder {
	static final String UNSAFE_COMPARER_NAME =
	LexicographicalComparerHolder.class.getName() + "$UnsafeComparer";

	static final Comparer<byte[]> BEST_COMPARER = getBestComparer();
	/**
	* Returns the Unsafe-using Comparer, or falls back to the pure-Java
	* implementation if unable to do so.
	*/
	static Comparer<byte[]> getBestComparer() {
	if (System.getProperty("os.arch").equals("sparc")) {
	if (LOG.isTraceEnabled()) {
	LOG.trace("Lexicographical comparer selected for "
	+ "byte aligned system architecture");
	}
	return lexicographicalComparerJavaImpl();
	}
	try {
	Class<?> theClass = Class.forName(UNSAFE_COMPARER_NAME);

	// yes, UnsafeComparer does implement Comparer<byte[]>
	@SuppressWarnings("unchecked")
	Comparer<byte[]> comparer =
	(Comparer<byte[]>) theClass.getEnumConstants()[0];
	if (LOG.isTraceEnabled()) {
	LOG.trace("Unsafe comparer selected for "
	+ "byte unaligned system architecture");
	}
	return comparer;
	} catch (Throwable t) { // ensure we really catch everything
	if (LOG.isTraceEnabled()) {
	LOG.trace(t.getMessage());
	LOG.trace("Lexicographical comparer selected");
	}
	return lexicographicalComparerJavaImpl();
	}
	}

	private enum PureJavaComparer implements Comparer<byte[]> {
	INSTANCE;

	@Override
	public int compareTo(byte[] buffer1, int offset1, int length1,
	byte[] buffer2, int offset2, int length2) {
	// Short circuit equal case
	if (buffer1 == buffer2 &&
	offset1 == offset2 &&
	length1 == length2) {
	return 0;
	}
	// Bring WritableComparator code local
	int end1 = offset1 + length1;
	int end2 = offset2 + length2;
	for (int i = offset1, j = offset2; i < end1 && j < end2; i++, j++) {
	int a = (buffer1[i] & 0xff);
	int b = (buffer2[j] & 0xff);
	if (a != b) {
	return a - b;
	}
	}
	return length1 - length2;
	}
	}

	@SuppressWarnings("unused") // used via reflection
	private enum UnsafeComparer implements Comparer<byte[]> {
	INSTANCE;

	static final Unsafe theUnsafe;

	/** The offset to the first element in a byte array. */
	static final int BYTE_ARRAY_BASE_OFFSET;

	static {
	theUnsafe = (Unsafe) AccessController.doPrivileged(
	new PrivilegedAction<Object>() {
	@Override
	public Object run() {
	try {
	Field f = Unsafe.class.getDeclaredField("theUnsafe");
	f.setAccessible(true);
	return f.get(null);
	} catch (NoSuchFieldException e) {
	// It doesn't matter what we throw;
	// it's swallowed in getBestComparer().
	throw new Error();
	} catch (IllegalAccessException e) {
	throw new Error();
	}
	}
	});

	BYTE_ARRAY_BASE_OFFSET = theUnsafe.arrayBaseOffset(byte[].class);

	// sanity check - this should never fail
	if (theUnsafe.arrayIndexScale(byte[].class) != 1) {
	throw new AssertionError();
	}
	}

	static final boolean littleEndian =
	ByteOrder.nativeOrder().equals(ByteOrder.LITTLE_ENDIAN);

	/**
	* Returns true if x1 is less than x2, when both values are treated as
	* unsigned.
	*/
	static boolean lessThanUnsigned(long x1, long x2) {
	return (x1 + Long.MIN_VALUE) < (x2 + Long.MIN_VALUE);
	}

	/**
	* Lexicographically compare two arrays.
	*
	* @param buffer1 left operand
	* @param buffer2 right operand
	* @param offset1 Where to start comparing in the left buffer
	* @param offset2 Where to start comparing in the right buffer
	* @param length1 How much to compare from the left buffer
	* @param length2 How much to compare from the right buffer
	* @return 0 if equal, < 0 if left is less than right, etc.
	*/
	@Override
	public int compareTo(byte[] buffer1, int offset1, int length1,
	byte[] buffer2, int offset2, int length2) {
	// Short circuit equal case
	if (buffer1 == buffer2 &&
	offset1 == offset2 &&
	length1 == length2) {
	return 0;
	}
	int minLength = Math.min(length1, length2);
	int minWords = minLength / Longs.BYTES;
	int offset1Adj = offset1 + BYTE_ARRAY_BASE_OFFSET;
	int offset2Adj = offset2 + BYTE_ARRAY_BASE_OFFSET;

	/*
	* Compare 8 bytes at a time. Benchmarking shows comparing 8 bytes at a
	* time is no slower than comparing 4 bytes at a time even on 32-bit.
	* On the other hand, it is substantially faster on 64-bit.
	*/
	for (int i = 0; i < minWords * Longs.BYTES; i += Longs.BYTES) {
	long lw = theUnsafe.getLong(buffer1, offset1Adj + (long) i);
	long rw = theUnsafe.getLong(buffer2, offset2Adj + (long) i);
	long diff = lw ^ rw;

	if (diff != 0) {
	if (!littleEndian) {
	return lessThanUnsigned(lw, rw) ? -1 : 1;
	}

	// Use binary search
	int n = 0;
	int y;
	int x = (int) diff;
	if (x == 0) {
	x = (int) (diff >>> 32);
	n = 32;
	}

	y = x << 16;
	if (y == 0) {
	n += 16;
	} else {
	x = y;
	}

	y = x << 8;
	if (y == 0) {
	n += 8;
	}
	return (int) (((lw >>> n) & 0xFFL) - ((rw >>> n) & 0xFFL));
	}
	}

	// The epilogue to cover the last (minLength % 8) elements.
	for (int i = minWords * Longs.BYTES; i < minLength; i++) {
	int result = UnsignedBytes.compare(
	buffer1[offset1 + i],
	buffer2[offset2 + i]);
	if (result != 0) {
	return result;
	}
	}
	return length1 - length2;
	}
	}
	}
	}