src/main/java/org/apache/commons/codec/binary/Base16.java - commons-codec - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
  * The ASF licenses this file to You under the Apache License, Version 2.0
  * (the "License"); you may not use this file except in compliance with
  * the License.  You may obtain a copy of the License at
  *
  *      http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */

 package org.apache.commons.codec.binary;

 import org.apache.commons.codec.CodecPolicy;

 /**
  * Provides Base16 encoding and decoding.
  *
  * <p>
  * This class is thread-safe.
  * </p>
  * <p>
  * This implementation strictly follows RFC 4648, and as such unlike
  * the {@link Base32} and {@link Base64} implementations,
  * it does not ignore invalid alphabet characters or whitespace,
  * neither does it offer chunking or padding characters.
  * </p>
  * <p>
  * The only additional feature above those specified in RFC 4648
  * is support for working with a lower-case alphabet in addition
  * to the default upper-case alphabet.
  * </p>
  *
  * @see <a href="https://tools.ietf.org/html/rfc4648#section-8">RFC 4648 - 8. Base 16 Encoding</a>
  *
  * @since 1.15
  */
 public class Base16 extends BaseNCodec {

     /**
      * BASE16 characters are 4 bits in length.
      * They are formed by taking an 8-bit group,
      * which is converted into two BASE16 characters.
      */
     private static final int BITS_PER_ENCODED_BYTE = 4;
     private static final int BYTES_PER_ENCODED_BLOCK = 2;
     private static final int BYTES_PER_UNENCODED_BLOCK = 1;

     /**
      * This array is a lookup table that translates Unicode characters drawn from the "Base16 Alphabet" (as specified
      * in Table 5 of RFC 4648) into their 4-bit positive integer equivalents. Characters that are not in the Base16
      * alphabet but fall within the bounds of the array are translated to -1.
      */
     private static final byte[] UPPER_CASE_DECODE_TABLE = {
             //  0   1   2   3   4   5   6   7   8   9   A   B   C   D   E   F
             -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 00-0f
             -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 10-1f
             -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 20-2f
              0,  1,  2,  3,  4,  5,  6,  7,  8,  9, -1, -1, -1, -1, -1, -1, // 30-3f 0-9
             -1, 10, 11, 12, 13, 14, 15                                      // 40-46 A-F
     };

     /**
      * This array is a lookup table that translates 4-bit positive integer index values into their "Base16 Alphabet"
      * equivalents as specified in Table 5 of RFC 4648.
      */
     private static final byte[] UPPER_CASE_ENCODE_TABLE = {
             '0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
             'A', 'B', 'C', 'D', 'E', 'F'
     };

     /**
      * This array is a lookup table that translates Unicode characters drawn from the a lower-case "Base16 Alphabet"
      * into their 4-bit positive integer equivalents. Characters that are not in the Base16
      * alphabet but fall within the bounds of the array are translated to -1.
      */
     private static final byte[] LOWER_CASE_DECODE_TABLE = {
             //  0   1   2   3   4   5   6   7   8   9   A   B   C   D   E   F
             -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 00-0f
             -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 10-1f
             -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 20-2f
              0,  1,  2,  3,  4,  5,  6,  7,  8,  9, -1, -1, -1, -1, -1, -1, // 30-3f 0-9
             -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 40-4f
             -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 50-5f
             -1, 10, 11, 12, 13, 14, 15                                      // 60-66 a-f
     };

     /**
      * This array is a lookup table that translates 4-bit positive integer index values into their "Base16 Alphabet"
      * lower-case equivalents.
      */
     private static final byte[] LOWER_CASE_ENCODE_TABLE = {
             '0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
             'a', 'b', 'c', 'd', 'e', 'f'
     };

     /** Mask used to extract 4 bits, used when decoding character. */
     private static final int MASK_4BITS = 0x0f;

     /**
      * Decode table to use.
      */
     private final byte[] decodeTable;

     /**
      * Encode table to use.
      */
     private final byte[] encodeTable;

     /**
      * Creates a Base16 codec used for decoding and encoding.
      */
     public Base16() {
         this(false);
     }

     /**
      * Creates a Base16 codec used for decoding and encoding.
      *
      * @param lowerCase if {@code true} then use a lower-case Base16 alphabet.
      */
     public Base16(final boolean lowerCase) {
         this(lowerCase, DECODING_POLICY_DEFAULT);
     }

     /**
      * Creates a Base16 codec used for decoding and encoding.
      *
      * @param lowerCase if {@code true} then use a lower-case Base16 alphabet.
      * @param decodingPolicy Decoding policy.
      */
     public Base16(final boolean lowerCase, final CodecPolicy decodingPolicy) {
         super(BYTES_PER_UNENCODED_BLOCK, BYTES_PER_ENCODED_BLOCK, 0, 0,
                 PAD_DEFAULT, decodingPolicy);
         if (lowerCase) {
             this.encodeTable = LOWER_CASE_ENCODE_TABLE;
             this.decodeTable = LOWER_CASE_DECODE_TABLE;
         } else {
             this.encodeTable = UPPER_CASE_ENCODE_TABLE;
             this.decodeTable = UPPER_CASE_DECODE_TABLE;
         }
     }

     @Override
     void decode(final byte[] data, int offset, final int length, final Context context) {
         if (context.eof || length < 0) {
             context.eof = true;
             if (context.ibitWorkArea != 0) {
                 validateTrailingCharacter();
             }
             return;
         }

         final int dataLen = Math.min(data.length - offset, length);
         final int availableChars = (context.ibitWorkArea != 0 ? 1 : 0) + dataLen;

         // small optimisation to short-cut the rest of this method when it is fed byte-by-byte
         if (availableChars == 1 && availableChars == dataLen) {
             // store 1/2 byte for next invocation of decode, we offset by +1 as empty-value is 0
             context.ibitWorkArea = decodeOctet(data[offset]) + 1;
             return;
         }

         // we must have an even number of chars to decode
         final int charsToProcess = availableChars % BYTES_PER_ENCODED_BLOCK == 0 ? availableChars : availableChars - 1;

         final byte[] buffer = ensureBufferSize(charsToProcess / BYTES_PER_ENCODED_BLOCK, context);

         int result;
         int i = 0;
         if (dataLen < availableChars) {
             // we have 1/2 byte from previous invocation to decode
             result = (context.ibitWorkArea - 1) << BITS_PER_ENCODED_BYTE;
             result |= decodeOctet(data[offset++]);
             i = 2;

             buffer[context.pos++] = (byte)result;

             // reset to empty-value for next invocation!
             context.ibitWorkArea = 0;
         }

         while (i < charsToProcess) {
             result = decodeOctet(data[offset++]) << BITS_PER_ENCODED_BYTE;
             result |= decodeOctet(data[offset++]);
             i += 2;
             buffer[context.pos++] = (byte)result;
         }

         // we have one char of a hex-pair left over
         if (i < dataLen) {
             // store 1/2 byte for next invocation of decode, we offset by +1 as empty-value is 0
             context.ibitWorkArea = decodeOctet(data[i]) + 1;
         }
     }

     private int decodeOctet(final byte octet) {
         int decoded = -1;
         if ((octet & 0xff) < decodeTable.length) {
             decoded = decodeTable[octet];
         }

         if (decoded == -1) {
             throw new IllegalArgumentException("Invalid octet in encoded value: " + (int)octet);
         }

         return decoded;
     }

     @Override
     void encode(final byte[] data, final int offset, final int length, final Context context) {
         if (context.eof) {
             return;
         }

         if (length < 0) {
             context.eof = true;
             return;
         }

         final int size = length * BYTES_PER_ENCODED_BLOCK;
         if (size < 0) {
             throw new IllegalArgumentException("Input length exceeds maximum size for encoded data: " + length);
         }

         final byte[] buffer = ensureBufferSize(size, context);

         final int end = offset + length;
         for (int i = offset; i < end; i++) {
             final int value = data[i];
             final int high = (value >> BITS_PER_ENCODED_BYTE) & MASK_4BITS;
             final int low = value & MASK_4BITS;
             buffer[context.pos++] = encodeTable[high];
             buffer[context.pos++] = encodeTable[low];
         }
     }

     /**
      * Returns whether or not the {@code octet} is in the Base16 alphabet.
      *
      * @param octet The value to test.
      *
      * @return {@code true} if the value is defined in the the Base16 alphabet {@code false} otherwise.
      */
     @Override
     public boolean isInAlphabet(final byte octet) {
         return (octet & 0xff) < decodeTable.length && decodeTable[octet] != -1;
     }

     /**
      * Validates whether decoding allows an entire final trailing character that cannot be
      * used for a complete byte.
      *
      * @throws IllegalArgumentException if strict decoding is enabled
      */
     private void validateTrailingCharacter() {
         if (isStrictDecoding()) {
             throw new IllegalArgumentException("Strict decoding: Last encoded character is a valid base 16 alphabet" +
                     "character but not a possible encoding. " +
                     "Decoding requires at least two characters to create one byte.");
         }
     }
 }
	/*
	* Licensed to the Apache Software Foundation (ASF) under one or more
	* contributor license agreements. See the NOTICE file distributed with
	* this work for additional information regarding copyright ownership.
	* The ASF licenses this file to You under the Apache License, Version 2.0
	* (the "License"); you may not use this file except in compliance with
	* the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/

	package org.apache.commons.codec.binary;

	import org.apache.commons.codec.CodecPolicy;

	/**
	* Provides Base16 encoding and decoding.
	*
	* <p>
	* This class is thread-safe.
	* </p>
	* <p>
	* This implementation strictly follows RFC 4648, and as such unlike
	* the {@link Base32} and {@link Base64} implementations,
	* it does not ignore invalid alphabet characters or whitespace,
	* neither does it offer chunking or padding characters.
	* </p>
	* <p>
	* The only additional feature above those specified in RFC 4648
	* is support for working with a lower-case alphabet in addition
	* to the default upper-case alphabet.
	* </p>
	*
	* @see <a href="https://tools.ietf.org/html/rfc4648#section-8">RFC 4648 - 8. Base 16 Encoding</a>
	*
	* @since 1.15
	*/
	public class Base16 extends BaseNCodec {

	/**
	* BASE16 characters are 4 bits in length.
	* They are formed by taking an 8-bit group,
	* which is converted into two BASE16 characters.
	*/
	private static final int BITS_PER_ENCODED_BYTE = 4;
	private static final int BYTES_PER_ENCODED_BLOCK = 2;
	private static final int BYTES_PER_UNENCODED_BLOCK = 1;

	/**
	* This array is a lookup table that translates Unicode characters drawn from the "Base16 Alphabet" (as specified
	* in Table 5 of RFC 4648) into their 4-bit positive integer equivalents. Characters that are not in the Base16
	* alphabet but fall within the bounds of the array are translated to -1.
	*/
	private static final byte[] UPPER_CASE_DECODE_TABLE = {
	// 0 1 2 3 4 5 6 7 8 9 A B C D E F
	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 00-0f
	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 10-1f
	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 20-2f
	0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1, // 30-3f 0-9
	-1, 10, 11, 12, 13, 14, 15 // 40-46 A-F
	};

	/**
	* This array is a lookup table that translates 4-bit positive integer index values into their "Base16 Alphabet"
	* equivalents as specified in Table 5 of RFC 4648.
	*/
	private static final byte[] UPPER_CASE_ENCODE_TABLE = {
	'0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
	'A', 'B', 'C', 'D', 'E', 'F'
	};

	/**
	* This array is a lookup table that translates Unicode characters drawn from the a lower-case "Base16 Alphabet"
	* into their 4-bit positive integer equivalents. Characters that are not in the Base16
	* alphabet but fall within the bounds of the array are translated to -1.
	*/
	private static final byte[] LOWER_CASE_DECODE_TABLE = {
	// 0 1 2 3 4 5 6 7 8 9 A B C D E F
	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 00-0f
	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 10-1f
	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 20-2f
	0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1, // 30-3f 0-9
	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 40-4f
	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 50-5f
	-1, 10, 11, 12, 13, 14, 15 // 60-66 a-f
	};

	/**
	* This array is a lookup table that translates 4-bit positive integer index values into their "Base16 Alphabet"
	* lower-case equivalents.
	*/
	private static final byte[] LOWER_CASE_ENCODE_TABLE = {
	'0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
	'a', 'b', 'c', 'd', 'e', 'f'
	};

	/** Mask used to extract 4 bits, used when decoding character. */
	private static final int MASK_4BITS = 0x0f;

	/**
	* Decode table to use.
	*/
	private final byte[] decodeTable;

	/**
	* Encode table to use.
	*/
	private final byte[] encodeTable;

	/**
	* Creates a Base16 codec used for decoding and encoding.
	*/
	public Base16() {
	this(false);
	}

	/**
	* Creates a Base16 codec used for decoding and encoding.
	*
	* @param lowerCase if {@code true} then use a lower-case Base16 alphabet.
	*/
	public Base16(final boolean lowerCase) {
	this(lowerCase, DECODING_POLICY_DEFAULT);
	}

	/**
	* Creates a Base16 codec used for decoding and encoding.
	*
	* @param lowerCase if {@code true} then use a lower-case Base16 alphabet.
	* @param decodingPolicy Decoding policy.
	*/
	public Base16(final boolean lowerCase, final CodecPolicy decodingPolicy) {
	super(BYTES_PER_UNENCODED_BLOCK, BYTES_PER_ENCODED_BLOCK, 0, 0,
	PAD_DEFAULT, decodingPolicy);
	if (lowerCase) {
	this.encodeTable = LOWER_CASE_ENCODE_TABLE;
	this.decodeTable = LOWER_CASE_DECODE_TABLE;
	} else {
	this.encodeTable = UPPER_CASE_ENCODE_TABLE;
	this.decodeTable = UPPER_CASE_DECODE_TABLE;
	}
	}

	@Override
	void decode(final byte[] data, int offset, final int length, final Context context) {
	if (context.eof \|\| length < 0) {
	context.eof = true;
	if (context.ibitWorkArea != 0) {
	validateTrailingCharacter();
	}
	return;
	}

	final int dataLen = Math.min(data.length - offset, length);
	final int availableChars = (context.ibitWorkArea != 0 ? 1 : 0) + dataLen;

	// small optimisation to short-cut the rest of this method when it is fed byte-by-byte
	if (availableChars == 1 && availableChars == dataLen) {
	// store 1/2 byte for next invocation of decode, we offset by +1 as empty-value is 0
	context.ibitWorkArea = decodeOctet(data[offset]) + 1;
	return;
	}

	// we must have an even number of chars to decode
	final int charsToProcess = availableChars % BYTES_PER_ENCODED_BLOCK == 0 ? availableChars : availableChars - 1;

	final byte[] buffer = ensureBufferSize(charsToProcess / BYTES_PER_ENCODED_BLOCK, context);

	int result;
	int i = 0;
	if (dataLen < availableChars) {
	// we have 1/2 byte from previous invocation to decode
	result = (context.ibitWorkArea - 1) << BITS_PER_ENCODED_BYTE;
	result \|= decodeOctet(data[offset++]);
	i = 2;

	buffer[context.pos++] = (byte)result;

	// reset to empty-value for next invocation!
	context.ibitWorkArea = 0;
	}

	while (i < charsToProcess) {
	result = decodeOctet(data[offset++]) << BITS_PER_ENCODED_BYTE;
	result \|= decodeOctet(data[offset++]);
	i += 2;
	buffer[context.pos++] = (byte)result;
	}

	// we have one char of a hex-pair left over
	if (i < dataLen) {
	// store 1/2 byte for next invocation of decode, we offset by +1 as empty-value is 0
	context.ibitWorkArea = decodeOctet(data[i]) + 1;
	}
	}

	private int decodeOctet(final byte octet) {
	int decoded = -1;
	if ((octet & 0xff) < decodeTable.length) {
	decoded = decodeTable[octet];
	}

	if (decoded == -1) {
	throw new IllegalArgumentException("Invalid octet in encoded value: " + (int)octet);
	}

	return decoded;
	}

	@Override
	void encode(final byte[] data, final int offset, final int length, final Context context) {
	if (context.eof) {
	return;
	}

	if (length < 0) {
	context.eof = true;
	return;
	}

	final int size = length * BYTES_PER_ENCODED_BLOCK;
	if (size < 0) {
	throw new IllegalArgumentException("Input length exceeds maximum size for encoded data: " + length);
	}

	final byte[] buffer = ensureBufferSize(size, context);

	final int end = offset + length;
	for (int i = offset; i < end; i++) {
	final int value = data[i];
	final int high = (value >> BITS_PER_ENCODED_BYTE) & MASK_4BITS;
	final int low = value & MASK_4BITS;
	buffer[context.pos++] = encodeTable[high];
	buffer[context.pos++] = encodeTable[low];
	}
	}

	/**
	* Returns whether or not the {@code octet} is in the Base16 alphabet.
	*
	* @param octet The value to test.
	*
	* @return {@code true} if the value is defined in the the Base16 alphabet {@code false} otherwise.
	*/
	@Override
	public boolean isInAlphabet(final byte octet) {
	return (octet & 0xff) < decodeTable.length && decodeTable[octet] != -1;
	}

	/**
	* Validates whether decoding allows an entire final trailing character that cannot be
	* used for a complete byte.
	*
	* @throws IllegalArgumentException if strict decoding is enabled
	*/
	private void validateTrailingCharacter() {
	if (isStrictDecoding()) {
	throw new IllegalArgumentException("Strict decoding: Last encoded character is a valid base 16 alphabet" +
	"character but not a possible encoding. " +
	"Decoding requires at least two characters to create one byte.");
	}
	}
	}