src/java/org/apache/cassandra/utils/bytecomparable/ByteSourceInverse.java - cassandra - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
  * regarding copyright ownership.  The ASF licenses this file
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
 package org.apache.cassandra.utils.bytecomparable;

 import java.nio.charset.StandardCharsets;
 import java.util.Arrays;

 import com.google.common.base.Preconditions;

 import org.apache.cassandra.db.marshal.ValueAccessor;

 /**
  * Contains inverse transformation utilities for {@link ByteSource}s.
  *
  * See ByteComparable.md for details about the encoding scheme.
  */
 public final class ByteSourceInverse
 {
     private static final int INITIAL_BUFFER_CAPACITY = 32;
     private static final int BYTE_ALL_BITS = 0xFF;
     private static final int BYTE_NO_BITS = 0x00;
     private static final int BYTE_SIGN_BIT = 1 << 7;
     private static final int SHORT_SIGN_BIT = 1 << 15;
     private static final int INT_SIGN_BIT = 1 << 31;
     private static final long LONG_SIGN_BIT = 1L << 63;

     /**
      * Consume the given number of bytes and produce a long from them, effectively treating the bytes as a big-endian
      * unsigned encoding of the number.
      */
     public static long getUnsignedFixedLengthAsLong(ByteSource byteSource, int length)
     {
         Preconditions.checkNotNull(byteSource);
         Preconditions.checkArgument(length >= 1 && length <= 8, "Between 1 and 8 bytes can be read at a time");

         long result = 0;
         for (int i = 0; i < length; ++i)
             result = (result << 8) | getAndCheckByte(byteSource, i, length);  // note: this must use the unsigned byte value

         return result;
     }

     /**
      * Produce the bytes for an encoded signed fixed-length number.
      * The first byte has its sign bit inverted, and the rest are passed unchanged.
      */
     public static <V> V getSignedFixedLength(ValueAccessor<V> accessor, ByteSource byteSource, int length)
     {
         Preconditions.checkNotNull(byteSource);
         Preconditions.checkArgument(length >= 1, "At least 1 byte should be read");

         V result = accessor.allocate(length);
         // The first byte needs to have its sign flipped
         accessor.putByte(result, 0, (byte) (getAndCheckByte(byteSource, 0, length) ^ BYTE_SIGN_BIT));
         // and the rest can be retrieved unchanged.
         for (int i = 1; i < length; ++i)
             accessor.putByte(result, i, (byte) getAndCheckByte(byteSource, i, length));
         return result;
     }

     /**
      * Produce the bytes for an encoded signed fixed-length number, also translating null to empty buffer.
      * The first byte has its sign bit inverted, and the rest are passed unchanged.
      */
     public static <V> V getOptionalSignedFixedLength(ValueAccessor<V> accessor, ByteSource byteSource, int length)
     {
         return byteSource == null ? accessor.empty() : getSignedFixedLength(accessor, byteSource, length);
     }

     /**
      * Produce the bytes for an encoded signed fixed-length floating-point number.
      * If sign bit is on, returns negated bytes. If not, clears the sign bit and passes the rest of the bytes unchanged.
      */
     public static <V> V getSignedFixedLengthFloat(ValueAccessor<V> accessor, ByteSource byteSource, int length)
     {
         Preconditions.checkNotNull(byteSource);
         Preconditions.checkArgument(length >= 1, "At least 1 byte should be read");

         V result = accessor.allocate(length);

         int xor;
         int first = getAndCheckByte(byteSource, 0, length);
         if (first < 0x80)
         {
             // Negative number. Invert all bits.
             xor = BYTE_ALL_BITS;
             first ^= xor;
         }
         else
         {
             // Positive number. Invert only the sign bit.
             xor = BYTE_NO_BITS;
             first ^= BYTE_SIGN_BIT;
         }
         accessor.putByte(result, 0, (byte) first);

         // xor is now applied to the rest of the bytes to flip their bits if necessary.
         for (int i = 1; i < length; ++i)
             accessor.putByte(result, i, (byte) (getAndCheckByte(byteSource, i, length) ^ xor));

         return result;
     }

     /**
      * Produce the bytes for an encoded signed fixed-length floating-point number, also translating null to an empty
      * buffer.
      * If sign bit is on, returns negated bytes. If not, clears the sign bit and passes the rest of the bytes unchanged.
      */
     public static <V> V getOptionalSignedFixedLengthFloat(ValueAccessor<V> accessor, ByteSource byteSource, int length)
     {
         return byteSource == null ? accessor.empty() : getSignedFixedLengthFloat(accessor, byteSource, length);
     }

     /**
      * Consume the next length bytes from the source unchanged.
      */
     public static <V> V getFixedLength(ValueAccessor<V> accessor, ByteSource byteSource, int length)
     {
         Preconditions.checkNotNull(byteSource);
         Preconditions.checkArgument(length >= 1, "At least 1 byte should be read");

         V result = accessor.allocate(length);
         for (int i = 0; i < length; ++i)
             accessor.putByte(result, i, (byte) getAndCheckByte(byteSource, i, length));
         return result;
     }

     /**
      * Consume the next length bytes from the source unchanged, also translating null to an empty buffer.
      */
     public static <V> V getOptionalFixedLength(ValueAccessor<V> accessor, ByteSource byteSource, int length)
     {
         return byteSource == null ? accessor.empty() : getFixedLength(accessor, byteSource, length);
     }

     /**
      * Consume the next {@code int} from the current position of the given {@link ByteSource}. The source position is
      * modified accordingly (moved 4 bytes forward).
      * <p>
      * The source is not strictly required to represent just the encoding of an {@code int} value, so theoretically
      * this API could be used for reading data in 4-byte strides. Nevertheless its usage is fairly limited because:
      * <ol>
      *     <li>...it presupposes signed fixed-length encoding for the encoding of the original value</li>
      *     <li>...it decodes the data returned on each stride as an {@code int} (i.e. it inverts its leading bit)</li>
      *     <li>...it doesn't provide any meaningful guarantees (with regard to throwing) in case there are not enough
      *     bytes to read, in case a special escape value was not interpreted as such, etc.</li>
      * </ol>
      * </p>
      *
      * @param byteSource A non-null byte source, containing at least 4 bytes.
      */
     public static int getSignedInt(ByteSource byteSource)
     {
         return (int) getUnsignedFixedLengthAsLong(byteSource, 4) ^ INT_SIGN_BIT;
     }

     /**
      * Consume the next {@code long} from the current position of the given {@link ByteSource}. The source position is
      * modified accordingly (moved 8 bytes forward).
      * <p>
      * The source is not strictly required to represent just the encoding of a {@code long} value, so theoretically
      * this API could be used for reading data in 8-byte strides. Nevertheless its usage is fairly limited because:
      * <ol>
      *     <li>...it presupposes signed fixed-length encoding for the encoding of the original value</li>
      *     <li>...it decodes the data returned on each stride as a {@code long} (i.e. it inverts its leading bit)</li>
      *     <li>...it doesn't provide any meaningful guarantees (with regard to throwing) in case there are not enough
      *     bytes to read, in case a special escape value was not interpreted as such, etc.</li>
      * </ol>
      * </p>
      *
      * @param byteSource A non-null byte source, containing at least 8 bytes.
      */
     public static long getSignedLong(ByteSource byteSource)
     {
         return getUnsignedFixedLengthAsLong(byteSource, 8) ^ LONG_SIGN_BIT;
     }

     /**
      * Converts the given {@link ByteSource} to a {@code byte}.
      *
      * @param byteSource A non-null byte source, containing at least 1 byte.
      */
     public static byte getSignedByte(ByteSource byteSource)
     {
         return (byte) (getAndCheckByte(Preconditions.checkNotNull(byteSource), 0, 1) ^ BYTE_SIGN_BIT);
     }

     /**
      * Converts the given {@link ByteSource} to a {@code short}. All terms and conditions valid for
      * {@link #getSignedInt(ByteSource)} and {@link #getSignedLong(ByteSource)} translate to this as well.
      *
      * @param byteSource A non-null byte source, containing at least 2 bytes.
      *
      * @see #getSignedInt(ByteSource)
      * @see #getSignedLong(ByteSource)
      */
     public static short getSignedShort(ByteSource byteSource)
     {
         return (short) (getUnsignedFixedLengthAsLong(byteSource, 2) ^ SHORT_SIGN_BIT);
     }

     /**
      * Decode a variable-length signed integer.
      */
     public static long getVariableLengthInteger(ByteSource byteSource)
     {
         int signAndMask = getAndCheckByte(byteSource);

         long sum = 0;
         int bytes;
         // For every bit after the sign that matches the sign, read one more byte.
         for (bytes = 0; bytes < 7 && sameByteSign(signAndMask << (bytes + 1), signAndMask); ++bytes)
             sum = (sum << 8) | getAndCheckByte(byteSource);

         // The eighth length bit is stored in the second byte.
         if (bytes == 7 && sameByteSign((int) (sum >> 48), signAndMask))
             return ((sum << 8) | getAndCheckByte(byteSource)) ^ LONG_SIGN_BIT;    // 9-byte encoding, use bytes 2-9 with inverted sign
         else
         {
             sum |= (((long) signAndMask) << bytes * 8);     // add the rest of the bits
             long signMask = -0x40L << bytes * 7;            // mask of the bits that should be replaced by the sign
             long sign = (byte) (signAndMask ^ 0x80) >> 7;   // -1 if negative (0 leading bit), 0 otherwise
             return sum & ~signMask | sign & signMask;
         }
     }

     /**
      * Decode a variable-length unsigned integer, passing all bytes read through XOR with the given xorWith parameter.
      *
      * Used in BigInteger encoding to read number length, where negative numbers have their length negated
      * (i.e. xorWith = 0xFF) to ensure correct ordering.
      */
     public static long getVariableLengthUnsignedIntegerXoring(ByteSource byteSource, int xorWith)
     {
         int signAndMask = getAndCheckByte(byteSource) ^ xorWith;

         long sum = 0;
         int bytes;
         // Read an extra byte while the next most significant bit is 1.
         for (bytes = 0; bytes <= 7 && ((signAndMask << bytes) & 0x80) != 0; ++bytes)
             sum = (sum << 8) | getAndCheckByte(byteSource) ^ xorWith;

         // Strip the length bits from the leading byte.
         signAndMask &= ~(-256 >> bytes);
         return sum | (((long) signAndMask) << bytes * 8);     // Add the rest of the bits of the leading byte.
     }

     /** Returns true if the two parameters treated as bytes have the same sign. */
     private static boolean sameByteSign(int a, int b)
     {
         return ((a ^ b) & 0x80) == 0;
     }


     private static int getAndCheckByte(ByteSource byteSource)
     {
         return getAndCheckByte(byteSource, -1, -1);
     }

     private static int getAndCheckByte(ByteSource byteSource, int pos, int length)
     {
         int data = byteSource.next();
         if (data == ByteSource.END_OF_STREAM)
             throw new IllegalArgumentException(
                 length > 0 ? String.format("Unexpected end of stream reached after %d bytes (expected >= %d)", pos, length)
                            : "Unexpected end of stream");
         assert data >= BYTE_NO_BITS && data <= BYTE_ALL_BITS
             : "A ByteSource must produce unsigned bytes and end in END_OF_STREAM";
         return data;
     }

     /**
      * Reads a single variable-length byte sequence (blob, string, ...) encoded according to the scheme described
      * in ByteComparable.md, decoding it back to its original, unescaped form.
      *
      * @param byteSource The source of the variable-length bytes sequence.
      * @return A byte array containing the original, unescaped bytes of the given source. Unescaped here means
      * not including any of the escape sequences of the encoding scheme used for variable-length byte sequences.
      */
     public static byte[] getUnescapedBytes(ByteSource.Peekable byteSource)
     {
         return byteSource == null ? null : readBytes(unescape(byteSource));
     }

     /**
      * As above, but converts the result to a ByteSource.
      */
     public static ByteSource unescape(ByteSource.Peekable byteSource)
     {
         return new ByteSource() {
             boolean escaped = false;

             @Override
             public int next()
             {
                 if (!escaped)
                 {
                     int data = byteSource.next(); // we consume this byte no matter what it is
                     if (data > ByteSource.ESCAPE)
                         return data;        // most used path leads here

                     assert data != ByteSource.END_OF_STREAM : "Invalid escaped byte sequence";
                     escaped = true;
                 }

                 int next = byteSource.peek();
                 switch (next)
                 {
                     case END_OF_STREAM:
                         // The end of a byte-comparable outside of a multi-component sequence. No matter what we have
                         // seen or peeked before, we should stop now.
                         byteSource.next();
                         return END_OF_STREAM;
                     case ESCAPED_0_DONE:
                         // The end of 1 or more consecutive 0x00 value bytes.
                         escaped = false;
                         byteSource.next();
                         return ESCAPE;
                     case ESCAPED_0_CONT:
                         // Escaped sequence continues
                         byteSource.next();
                         return ESCAPE;
                     default:
                         // An ESCAPE or ESCAPED_0_CONT won't be followed by either another ESCAPED_0_CONT, an
                         // ESCAPED_0_DONE, or an END_OF_STREAM only when the byte-comparable is part of a multi-component
                         // sequence and we have reached the end of the encoded byte-comparable. In this case, the byte
                         // we have just peeked is the separator or terminator byte between or at the end of components
                         // (which by contact must be 0x10 - 0xFE, which cannot conflict with our special bytes).
                         assert next >= ByteSource.MIN_SEPARATOR && next <= ByteSource.MAX_SEPARATOR : next;
                         // Unlike above, we don't consume this byte (the sequence decoding needs it).
                         return END_OF_STREAM;
                 }
             }
         };
     }

     /**
      * Reads the bytes of the given source into a byte array. Doesn't do any transformation on the bytes, just reads
      * them until it reads an {@link ByteSource#END_OF_STREAM} byte, after which it returns an array of all the read
      * bytes, <strong>excluding the {@link ByteSource#END_OF_STREAM}</strong>.
      * <p>
      * This method sizes a tentative internal buffer array at {@code initialBufferCapacity}.  However, if
      * {@code byteSource} exceeds this size, the buffer array is recreated with doubled capacity as many times as
      * necessary.  If, after {@code byteSource} is fully exhausted, the number of bytes read from it does not exactly
      * match the current size of the tentative buffer array, then it is copied into another array sized to fit the
      * number of bytes read; otherwise, it is returned without that final copy step.
      *
      * @param byteSource The source which bytes we're interested in.
      * @param initialBufferCapacity The initial size of the internal buffer.
      * @return A byte array containing exactly all the read bytes. In case of a {@code null} source, the returned byte
      * array will be empty.
      */
     public static byte[] readBytes(ByteSource byteSource, final int initialBufferCapacity)
     {
         Preconditions.checkNotNull(byteSource);

         int readBytes = 0;
         byte[] buf = new byte[initialBufferCapacity];
         int data;
         while ((data = byteSource.next()) != ByteSource.END_OF_STREAM)
         {
             buf = ensureCapacity(buf, readBytes);
             buf[readBytes++] = (byte) data;
         }

         if (readBytes != buf.length)
         {
             buf = Arrays.copyOf(buf, readBytes);
         }
         return buf;
     }

     /**
      * Reads the bytes of the given source into a byte array. Doesn't do any transformation on the bytes, just reads
      * them until it reads an {@link ByteSource#END_OF_STREAM} byte, after which it returns an array of all the read
      * bytes, <strong>excluding the {@link ByteSource#END_OF_STREAM}</strong>.
      * <p>
      * This is equivalent to {@link #readBytes(ByteSource, int)} where the second actual parameter is
      * {@linkplain #INITIAL_BUFFER_CAPACITY} ({@value INITIAL_BUFFER_CAPACITY}).
      *
      * @param byteSource The source which bytes we're interested in.
      * @return A byte array containing exactly all the read bytes. In case of a {@code null} source, the returned byte
      * array will be empty.
      */
     public static byte[] readBytes(ByteSource byteSource)
     {
         return readBytes(byteSource, INITIAL_BUFFER_CAPACITY);
     }

     /**
      * Ensures the given buffer has capacity for taking data with the given length - if it doesn't, it returns a copy
      * of the buffer, but with double the capacity.
      */
     private static byte[] ensureCapacity(byte[] buf, int dataLengthInBytes)
     {
         if (dataLengthInBytes == buf.length)
             // We won't gain much with guarding against overflow. We'll overflow when dataLengthInBytes >= 1 << 30,
             // and if we do guard, we'll be able to extend the capacity to Integer.MAX_VALUE (which is 1 << 31 - 1).
             // Controlling the exception that will be thrown shouldn't matter that much, and  in practice, we almost
             // surely won't be reading gigabytes of ByteSource data at once.
             return Arrays.copyOf(buf, dataLengthInBytes * 2);
         else
             return buf;
     }

     /**
      * Converts the given {@link ByteSource} to a UTF-8 {@link String}.
      *
      * @param byteSource The source we're interested in.
      * @return A UTF-8 string corresponding to the given source.
      */
     public static String getString(ByteSource.Peekable byteSource)
     {
         if (byteSource == null)
             return null;

         byte[] data = getUnescapedBytes(byteSource);

         return new String(data, StandardCharsets.UTF_8);
     }

     /*
      * Multi-component sequence utilities.
      */

     /**
      * A utility for consuming components from a peekable multi-component sequence.
      * It uses the component separators, so the given sequence needs to have its last component fully consumed, in
      * order for the next consumable byte to be a separator. Identifying the end of the component that will then be
      * consumed is the responsibility of the consumer (the user of this method).
      * @param source A peekable multi-component sequence, which next byte is a component separator.
      * @return the given multi-component sequence if its next component is not null, or {@code null} if it is.
      */
     public static ByteSource.Peekable nextComponentSource(ByteSource.Peekable source)
     {
         return nextComponentSource(source, source.next());
     }

     /**
      * A utility for consuming components from a peekable multi-component sequence, very similar to
      * {@link #nextComponentSource(ByteSource.Peekable)} - the difference being that here the separator can be passed
      * in case it had to be consumed beforehand.
      */
     public static ByteSource.Peekable nextComponentSource(ByteSource.Peekable source, int separator)
     {
         return nextComponentNull(separator)
                ? null
                : source;
     }

     public static boolean nextComponentNull(int separator)
     {
         return separator == ByteSource.NEXT_COMPONENT_NULL || separator == ByteSource.NEXT_COMPONENT_EMPTY
                || separator == ByteSource.NEXT_COMPONENT_EMPTY_REVERSED;
     }
 }
	/*
	* Licensed to the Apache Software Foundation (ASF) under one
	* or more contributor license agreements. See the NOTICE file
	* distributed with this work for additional information
	* regarding copyright ownership. The ASF licenses this file
	* to you under the Apache License, Version 2.0 (the
	* "License"); you may not use this file except in compliance
	* with the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/
	package org.apache.cassandra.utils.bytecomparable;

	import java.nio.charset.StandardCharsets;
	import java.util.Arrays;

	import com.google.common.base.Preconditions;

	import org.apache.cassandra.db.marshal.ValueAccessor;

	/**
	* Contains inverse transformation utilities for {@link ByteSource}s.
	*
	* See ByteComparable.md for details about the encoding scheme.
	*/
	public final class ByteSourceInverse
	{
	private static final int INITIAL_BUFFER_CAPACITY = 32;
	private static final int BYTE_ALL_BITS = 0xFF;
	private static final int BYTE_NO_BITS = 0x00;
	private static final int BYTE_SIGN_BIT = 1 << 7;
	private static final int SHORT_SIGN_BIT = 1 << 15;
	private static final int INT_SIGN_BIT = 1 << 31;
	private static final long LONG_SIGN_BIT = 1L << 63;

	/**
	* Consume the given number of bytes and produce a long from them, effectively treating the bytes as a big-endian
	* unsigned encoding of the number.
	*/
	public static long getUnsignedFixedLengthAsLong(ByteSource byteSource, int length)
	{
	Preconditions.checkNotNull(byteSource);
	Preconditions.checkArgument(length >= 1 && length <= 8, "Between 1 and 8 bytes can be read at a time");

	long result = 0;
	for (int i = 0; i < length; ++i)
	result = (result << 8) \| getAndCheckByte(byteSource, i, length); // note: this must use the unsigned byte value

	return result;
	}

	/**
	* Produce the bytes for an encoded signed fixed-length number.
	* The first byte has its sign bit inverted, and the rest are passed unchanged.
	*/
	public static <V> V getSignedFixedLength(ValueAccessor<V> accessor, ByteSource byteSource, int length)
	{
	Preconditions.checkNotNull(byteSource);
	Preconditions.checkArgument(length >= 1, "At least 1 byte should be read");

	V result = accessor.allocate(length);
	// The first byte needs to have its sign flipped
	accessor.putByte(result, 0, (byte) (getAndCheckByte(byteSource, 0, length) ^ BYTE_SIGN_BIT));
	// and the rest can be retrieved unchanged.
	for (int i = 1; i < length; ++i)
	accessor.putByte(result, i, (byte) getAndCheckByte(byteSource, i, length));
	return result;
	}

	/**
	* Produce the bytes for an encoded signed fixed-length number, also translating null to empty buffer.
	* The first byte has its sign bit inverted, and the rest are passed unchanged.
	*/
	public static <V> V getOptionalSignedFixedLength(ValueAccessor<V> accessor, ByteSource byteSource, int length)
	{
	return byteSource == null ? accessor.empty() : getSignedFixedLength(accessor, byteSource, length);
	}

	/**
	* Produce the bytes for an encoded signed fixed-length floating-point number.
	* If sign bit is on, returns negated bytes. If not, clears the sign bit and passes the rest of the bytes unchanged.
	*/
	public static <V> V getSignedFixedLengthFloat(ValueAccessor<V> accessor, ByteSource byteSource, int length)
	{
	Preconditions.checkNotNull(byteSource);
	Preconditions.checkArgument(length >= 1, "At least 1 byte should be read");

	V result = accessor.allocate(length);

	int xor;
	int first = getAndCheckByte(byteSource, 0, length);
	if (first < 0x80)
	{
	// Negative number. Invert all bits.
	xor = BYTE_ALL_BITS;
	first ^= xor;
	}
	else
	{
	// Positive number. Invert only the sign bit.
	xor = BYTE_NO_BITS;
	first ^= BYTE_SIGN_BIT;
	}
	accessor.putByte(result, 0, (byte) first);

	// xor is now applied to the rest of the bytes to flip their bits if necessary.
	for (int i = 1; i < length; ++i)
	accessor.putByte(result, i, (byte) (getAndCheckByte(byteSource, i, length) ^ xor));

	return result;
	}

	/**
	* Produce the bytes for an encoded signed fixed-length floating-point number, also translating null to an empty
	* buffer.
	* If sign bit is on, returns negated bytes. If not, clears the sign bit and passes the rest of the bytes unchanged.
	*/
	public static <V> V getOptionalSignedFixedLengthFloat(ValueAccessor<V> accessor, ByteSource byteSource, int length)
	{
	return byteSource == null ? accessor.empty() : getSignedFixedLengthFloat(accessor, byteSource, length);
	}

	/**
	* Consume the next length bytes from the source unchanged.
	*/
	public static <V> V getFixedLength(ValueAccessor<V> accessor, ByteSource byteSource, int length)
	{
	Preconditions.checkNotNull(byteSource);
	Preconditions.checkArgument(length >= 1, "At least 1 byte should be read");

	V result = accessor.allocate(length);
	for (int i = 0; i < length; ++i)
	accessor.putByte(result, i, (byte) getAndCheckByte(byteSource, i, length));
	return result;
	}

	/**
	* Consume the next length bytes from the source unchanged, also translating null to an empty buffer.
	*/
	public static <V> V getOptionalFixedLength(ValueAccessor<V> accessor, ByteSource byteSource, int length)
	{
	return byteSource == null ? accessor.empty() : getFixedLength(accessor, byteSource, length);
	}

	/**
	* Consume the next {@code int} from the current position of the given {@link ByteSource}. The source position is
	* modified accordingly (moved 4 bytes forward).
	* <p>
	* The source is not strictly required to represent just the encoding of an {@code int} value, so theoretically
	* this API could be used for reading data in 4-byte strides. Nevertheless its usage is fairly limited because:
	* <ol>
	* <li>...it presupposes signed fixed-length encoding for the encoding of the original value</li>
	* <li>...it decodes the data returned on each stride as an {@code int} (i.e. it inverts its leading bit)</li>
	* <li>...it doesn't provide any meaningful guarantees (with regard to throwing) in case there are not enough
	* bytes to read, in case a special escape value was not interpreted as such, etc.</li>
	* </ol>
	* </p>
	*
	* @param byteSource A non-null byte source, containing at least 4 bytes.
	*/
	public static int getSignedInt(ByteSource byteSource)
	{
	return (int) getUnsignedFixedLengthAsLong(byteSource, 4) ^ INT_SIGN_BIT;
	}

	/**
	* Consume the next {@code long} from the current position of the given {@link ByteSource}. The source position is
	* modified accordingly (moved 8 bytes forward).
	* <p>
	* The source is not strictly required to represent just the encoding of a {@code long} value, so theoretically
	* this API could be used for reading data in 8-byte strides. Nevertheless its usage is fairly limited because:
	* <ol>
	* <li>...it presupposes signed fixed-length encoding for the encoding of the original value</li>
	* <li>...it decodes the data returned on each stride as a {@code long} (i.e. it inverts its leading bit)</li>
	* <li>...it doesn't provide any meaningful guarantees (with regard to throwing) in case there are not enough
	* bytes to read, in case a special escape value was not interpreted as such, etc.</li>
	* </ol>
	* </p>
	*
	* @param byteSource A non-null byte source, containing at least 8 bytes.
	*/
	public static long getSignedLong(ByteSource byteSource)
	{
	return getUnsignedFixedLengthAsLong(byteSource, 8) ^ LONG_SIGN_BIT;
	}

	/**
	* Converts the given {@link ByteSource} to a {@code byte}.
	*
	* @param byteSource A non-null byte source, containing at least 1 byte.
	*/
	public static byte getSignedByte(ByteSource byteSource)
	{
	return (byte) (getAndCheckByte(Preconditions.checkNotNull(byteSource), 0, 1) ^ BYTE_SIGN_BIT);
	}

	/**
	* Converts the given {@link ByteSource} to a {@code short}. All terms and conditions valid for
	* {@link #getSignedInt(ByteSource)} and {@link #getSignedLong(ByteSource)} translate to this as well.
	*
	* @param byteSource A non-null byte source, containing at least 2 bytes.
	*
	* @see #getSignedInt(ByteSource)
	* @see #getSignedLong(ByteSource)
	*/
	public static short getSignedShort(ByteSource byteSource)
	{
	return (short) (getUnsignedFixedLengthAsLong(byteSource, 2) ^ SHORT_SIGN_BIT);
	}

	/**
	* Decode a variable-length signed integer.
	*/
	public static long getVariableLengthInteger(ByteSource byteSource)
	{
	int signAndMask = getAndCheckByte(byteSource);

	long sum = 0;
	int bytes;
	// For every bit after the sign that matches the sign, read one more byte.
	for (bytes = 0; bytes < 7 && sameByteSign(signAndMask << (bytes + 1), signAndMask); ++bytes)
	sum = (sum << 8) \| getAndCheckByte(byteSource);

	// The eighth length bit is stored in the second byte.
	if (bytes == 7 && sameByteSign((int) (sum >> 48), signAndMask))
	return ((sum << 8) \| getAndCheckByte(byteSource)) ^ LONG_SIGN_BIT; // 9-byte encoding, use bytes 2-9 with inverted sign
	else
	{
	sum \|= (((long) signAndMask) << bytes * 8); // add the rest of the bits
	long signMask = -0x40L << bytes * 7; // mask of the bits that should be replaced by the sign
	long sign = (byte) (signAndMask ^ 0x80) >> 7; // -1 if negative (0 leading bit), 0 otherwise
	return sum & ~signMask \| sign & signMask;
	}
	}

	/**
	* Decode a variable-length unsigned integer, passing all bytes read through XOR with the given xorWith parameter.
	*
	* Used in BigInteger encoding to read number length, where negative numbers have their length negated
	* (i.e. xorWith = 0xFF) to ensure correct ordering.
	*/
	public static long getVariableLengthUnsignedIntegerXoring(ByteSource byteSource, int xorWith)
	{
	int signAndMask = getAndCheckByte(byteSource) ^ xorWith;

	long sum = 0;
	int bytes;
	// Read an extra byte while the next most significant bit is 1.
	for (bytes = 0; bytes <= 7 && ((signAndMask << bytes) & 0x80) != 0; ++bytes)
	sum = (sum << 8) \| getAndCheckByte(byteSource) ^ xorWith;

	// Strip the length bits from the leading byte.
	signAndMask &= ~(-256 >> bytes);
	return sum \| (((long) signAndMask) << bytes * 8); // Add the rest of the bits of the leading byte.
	}

	/** Returns true if the two parameters treated as bytes have the same sign. */
	private static boolean sameByteSign(int a, int b)
	{
	return ((a ^ b) & 0x80) == 0;
	}


	private static int getAndCheckByte(ByteSource byteSource)
	{
	return getAndCheckByte(byteSource, -1, -1);
	}

	private static int getAndCheckByte(ByteSource byteSource, int pos, int length)
	{
	int data = byteSource.next();
	if (data == ByteSource.END_OF_STREAM)
	throw new IllegalArgumentException(
	length > 0 ? String.format("Unexpected end of stream reached after %d bytes (expected >= %d)", pos, length)
	: "Unexpected end of stream");
	assert data >= BYTE_NO_BITS && data <= BYTE_ALL_BITS
	: "A ByteSource must produce unsigned bytes and end in END_OF_STREAM";
	return data;
	}

	/**
	* Reads a single variable-length byte sequence (blob, string, ...) encoded according to the scheme described
	* in ByteComparable.md, decoding it back to its original, unescaped form.
	*
	* @param byteSource The source of the variable-length bytes sequence.
	* @return A byte array containing the original, unescaped bytes of the given source. Unescaped here means
	* not including any of the escape sequences of the encoding scheme used for variable-length byte sequences.
	*/
	public static byte[] getUnescapedBytes(ByteSource.Peekable byteSource)
	{
	return byteSource == null ? null : readBytes(unescape(byteSource));
	}

	/**
	* As above, but converts the result to a ByteSource.
	*/
	public static ByteSource unescape(ByteSource.Peekable byteSource)
	{
	return new ByteSource() {
	boolean escaped = false;

	@Override
	public int next()
	{
	if (!escaped)
	{
	int data = byteSource.next(); // we consume this byte no matter what it is
	if (data > ByteSource.ESCAPE)
	return data; // most used path leads here

	assert data != ByteSource.END_OF_STREAM : "Invalid escaped byte sequence";
	escaped = true;
	}

	int next = byteSource.peek();
	switch (next)
	{
	case END_OF_STREAM:
	// The end of a byte-comparable outside of a multi-component sequence. No matter what we have
	// seen or peeked before, we should stop now.
	byteSource.next();
	return END_OF_STREAM;
	case ESCAPED_0_DONE:
	// The end of 1 or more consecutive 0x00 value bytes.
	escaped = false;
	byteSource.next();
	return ESCAPE;
	case ESCAPED_0_CONT:
	// Escaped sequence continues
	byteSource.next();
	return ESCAPE;
	default:
	// An ESCAPE or ESCAPED_0_CONT won't be followed by either another ESCAPED_0_CONT, an
	// ESCAPED_0_DONE, or an END_OF_STREAM only when the byte-comparable is part of a multi-component
	// sequence and we have reached the end of the encoded byte-comparable. In this case, the byte
	// we have just peeked is the separator or terminator byte between or at the end of components
	// (which by contact must be 0x10 - 0xFE, which cannot conflict with our special bytes).
	assert next >= ByteSource.MIN_SEPARATOR && next <= ByteSource.MAX_SEPARATOR : next;
	// Unlike above, we don't consume this byte (the sequence decoding needs it).
	return END_OF_STREAM;
	}
	}
	};
	}

	/**
	* Reads the bytes of the given source into a byte array. Doesn't do any transformation on the bytes, just reads
	* them until it reads an {@link ByteSource#END_OF_STREAM} byte, after which it returns an array of all the read
	* bytes, <strong>excluding the {@link ByteSource#END_OF_STREAM}</strong>.
	* <p>
	* This method sizes a tentative internal buffer array at {@code initialBufferCapacity}. However, if
	* {@code byteSource} exceeds this size, the buffer array is recreated with doubled capacity as many times as
	* necessary. If, after {@code byteSource} is fully exhausted, the number of bytes read from it does not exactly
	* match the current size of the tentative buffer array, then it is copied into another array sized to fit the
	* number of bytes read; otherwise, it is returned without that final copy step.
	*
	* @param byteSource The source which bytes we're interested in.
	* @param initialBufferCapacity The initial size of the internal buffer.
	* @return A byte array containing exactly all the read bytes. In case of a {@code null} source, the returned byte
	* array will be empty.
	*/
	public static byte[] readBytes(ByteSource byteSource, final int initialBufferCapacity)
	{
	Preconditions.checkNotNull(byteSource);

	int readBytes = 0;
	byte[] buf = new byte[initialBufferCapacity];
	int data;
	while ((data = byteSource.next()) != ByteSource.END_OF_STREAM)
	{
	buf = ensureCapacity(buf, readBytes);
	buf[readBytes++] = (byte) data;
	}

	if (readBytes != buf.length)
	{
	buf = Arrays.copyOf(buf, readBytes);
	}
	return buf;
	}

	/**
	* Reads the bytes of the given source into a byte array. Doesn't do any transformation on the bytes, just reads
	* them until it reads an {@link ByteSource#END_OF_STREAM} byte, after which it returns an array of all the read
	* bytes, <strong>excluding the {@link ByteSource#END_OF_STREAM}</strong>.
	* <p>
	* This is equivalent to {@link #readBytes(ByteSource, int)} where the second actual parameter is
	* {@linkplain #INITIAL_BUFFER_CAPACITY} ({@value INITIAL_BUFFER_CAPACITY}).
	*
	* @param byteSource The source which bytes we're interested in.
	* @return A byte array containing exactly all the read bytes. In case of a {@code null} source, the returned byte
	* array will be empty.
	*/
	public static byte[] readBytes(ByteSource byteSource)
	{
	return readBytes(byteSource, INITIAL_BUFFER_CAPACITY);
	}

	/**
	* Ensures the given buffer has capacity for taking data with the given length - if it doesn't, it returns a copy
	* of the buffer, but with double the capacity.
	*/
	private static byte[] ensureCapacity(byte[] buf, int dataLengthInBytes)
	{
	if (dataLengthInBytes == buf.length)
	// We won't gain much with guarding against overflow. We'll overflow when dataLengthInBytes >= 1 << 30,
	// and if we do guard, we'll be able to extend the capacity to Integer.MAX_VALUE (which is 1 << 31 - 1).
	// Controlling the exception that will be thrown shouldn't matter that much, and in practice, we almost
	// surely won't be reading gigabytes of ByteSource data at once.
	return Arrays.copyOf(buf, dataLengthInBytes * 2);
	else
	return buf;
	}

	/**
	* Converts the given {@link ByteSource} to a UTF-8 {@link String}.
	*
	* @param byteSource The source we're interested in.
	* @return A UTF-8 string corresponding to the given source.
	*/
	public static String getString(ByteSource.Peekable byteSource)
	{
	if (byteSource == null)
	return null;

	byte[] data = getUnescapedBytes(byteSource);

	return new String(data, StandardCharsets.UTF_8);
	}

	/*
	* Multi-component sequence utilities.
	*/

	/**
	* A utility for consuming components from a peekable multi-component sequence.
	* It uses the component separators, so the given sequence needs to have its last component fully consumed, in
	* order for the next consumable byte to be a separator. Identifying the end of the component that will then be
	* consumed is the responsibility of the consumer (the user of this method).
	* @param source A peekable multi-component sequence, which next byte is a component separator.
	* @return the given multi-component sequence if its next component is not null, or {@code null} if it is.
	*/
	public static ByteSource.Peekable nextComponentSource(ByteSource.Peekable source)
	{
	return nextComponentSource(source, source.next());
	}

	/**
	* A utility for consuming components from a peekable multi-component sequence, very similar to
	* {@link #nextComponentSource(ByteSource.Peekable)} - the difference being that here the separator can be passed
	* in case it had to be consumed beforehand.
	*/
	public static ByteSource.Peekable nextComponentSource(ByteSource.Peekable source, int separator)
	{
	return nextComponentNull(separator)
	? null
	: source;
	}

	public static boolean nextComponentNull(int separator)
	{
	return separator == ByteSource.NEXT_COMPONENT_NULL \|\| separator == ByteSource.NEXT_COMPONENT_EMPTY
	\|\| separator == ByteSource.NEXT_COMPONENT_EMPTY_REVERSED;
	}
	}