blob: df6f2524d5ff9d9eca32ba887f954877fd19fdd1 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.codec.binary;
import org.apache.commons.codec.CodecPolicy;
/**
* Provides Base16 encoding and decoding.
*
* <p>
* This class is thread-safe.
* </p>
* <p>
* This implementation strictly follows RFC 4648, and as such unlike
* the {@link Base32} and {@link Base64} implementations,
* it does not ignore invalid alphabet characters or whitespace,
* neither does it offer chunking or padding characters.
* </p>
* <p>
* The only additional feature above those specified in RFC 4648
* is support for working with a lower-case alphabet in addition
* to the default upper-case alphabet.
* </p>
*
* @see <a href="https://tools.ietf.org/html/rfc4648#section-8">RFC 4648 - 8. Base 16 Encoding</a>
*
* @since 1.15
*/
public class Base16 extends BaseNCodec {
/**
* BASE16 characters are 4 bits in length.
* They are formed by taking an 8-bit group,
* which is converted into two BASE16 characters.
*/
private static final int BITS_PER_ENCODED_BYTE = 4;
private static final int BYTES_PER_ENCODED_BLOCK = 2;
private static final int BYTES_PER_UNENCODED_BLOCK = 1;
/**
* This array is a lookup table that translates Unicode characters drawn from the "Base16 Alphabet" (as specified
* in Table 5 of RFC 4648) into their 4-bit positive integer equivalents. Characters that are not in the Base16
* alphabet but fall within the bounds of the array are translated to -1.
*/
private static final byte[] UPPER_CASE_DECODE_TABLE = {
// 0 1 2 3 4 5 6 7 8 9 A B C D E F
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 00-0f
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 10-1f
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 20-2f
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1, // 30-3f 0-9
-1, 10, 11, 12, 13, 14, 15 // 40-46 A-F
};
/**
* This array is a lookup table that translates 4-bit positive integer index values into their "Base16 Alphabet"
* equivalents as specified in Table 5 of RFC 4648.
*/
private static final byte[] UPPER_CASE_ENCODE_TABLE = {
'0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
'A', 'B', 'C', 'D', 'E', 'F'
};
/**
* This array is a lookup table that translates Unicode characters drawn from the a lower-case "Base16 Alphabet"
* into their 4-bit positive integer equivalents. Characters that are not in the Base16
* alphabet but fall within the bounds of the array are translated to -1.
*/
private static final byte[] LOWER_CASE_DECODE_TABLE = {
// 0 1 2 3 4 5 6 7 8 9 A B C D E F
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 00-0f
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 10-1f
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 20-2f
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1, // 30-3f 0-9
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 40-4f
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 50-5f
-1, 10, 11, 12, 13, 14, 15 // 60-66 a-f
};
/**
* This array is a lookup table that translates 4-bit positive integer index values into their "Base16 Alphabet"
* lower-case equivalents.
*/
private static final byte[] LOWER_CASE_ENCODE_TABLE = {
'0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
'a', 'b', 'c', 'd', 'e', 'f'
};
/** Mask used to extract 4 bits, used when decoding character. */
private static final int MASK_4BITS = 0x0f;
/**
* Decode table to use.
*/
private final byte[] decodeTable;
/**
* Encode table to use.
*/
private final byte[] encodeTable;
/**
* Creates a Base16 codec used for decoding and encoding.
*/
public Base16() {
this(false);
}
/**
* Creates a Base16 codec used for decoding and encoding.
*
* @param lowerCase if {@code true} then use a lower-case Base16 alphabet.
*/
public Base16(final boolean lowerCase) {
this(lowerCase, DECODING_POLICY_DEFAULT);
}
/**
* Creates a Base16 codec used for decoding and encoding.
*
* @param lowerCase if {@code true} then use a lower-case Base16 alphabet.
* @param decodingPolicy Decoding policy.
*/
public Base16(final boolean lowerCase, final CodecPolicy decodingPolicy) {
super(BYTES_PER_UNENCODED_BLOCK, BYTES_PER_ENCODED_BLOCK, 0, 0,
PAD_DEFAULT, decodingPolicy);
if (lowerCase) {
this.encodeTable = LOWER_CASE_ENCODE_TABLE;
this.decodeTable = LOWER_CASE_DECODE_TABLE;
} else {
this.encodeTable = UPPER_CASE_ENCODE_TABLE;
this.decodeTable = UPPER_CASE_DECODE_TABLE;
}
}
@Override
void decode(final byte[] data, int offset, final int length, final Context context) {
if (context.eof || length < 0) {
context.eof = true;
if (context.ibitWorkArea != 0) {
validateTrailingCharacter();
}
return;
}
final int dataLen = Math.min(data.length - offset, length);
final int availableChars = (context.ibitWorkArea != 0 ? 1 : 0) + dataLen;
// small optimisation to short-cut the rest of this method when it is fed byte-by-byte
if (availableChars == 1 && availableChars == dataLen) {
// store 1/2 byte for next invocation of decode, we offset by +1 as empty-value is 0
context.ibitWorkArea = decodeOctet(data[offset]) + 1;
return;
}
// we must have an even number of chars to decode
final int charsToProcess = availableChars % BYTES_PER_ENCODED_BLOCK == 0 ? availableChars : availableChars - 1;
final byte[] buffer = ensureBufferSize(charsToProcess / BYTES_PER_ENCODED_BLOCK, context);
int result;
int i = 0;
if (dataLen < availableChars) {
// we have 1/2 byte from previous invocation to decode
result = (context.ibitWorkArea - 1) << BITS_PER_ENCODED_BYTE;
result |= decodeOctet(data[offset++]);
i = 2;
buffer[context.pos++] = (byte)result;
// reset to empty-value for next invocation!
context.ibitWorkArea = 0;
}
while (i < charsToProcess) {
result = decodeOctet(data[offset++]) << BITS_PER_ENCODED_BYTE;
result |= decodeOctet(data[offset++]);
i += 2;
buffer[context.pos++] = (byte)result;
}
// we have one char of a hex-pair left over
if (i < dataLen) {
// store 1/2 byte for next invocation of decode, we offset by +1 as empty-value is 0
context.ibitWorkArea = decodeOctet(data[i]) + 1;
}
}
private int decodeOctet(final byte octet) {
int decoded = -1;
if ((octet & 0xff) < decodeTable.length) {
decoded = decodeTable[octet];
}
if (decoded == -1) {
throw new IllegalArgumentException("Invalid octet in encoded value: " + (int)octet);
}
return decoded;
}
@Override
void encode(final byte[] data, final int offset, final int length, final Context context) {
if (context.eof) {
return;
}
if (length < 0) {
context.eof = true;
return;
}
final int size = length * BYTES_PER_ENCODED_BLOCK;
if (size < 0) {
throw new IllegalArgumentException("Input length exceeds maximum size for encoded data: " + length);
}
final byte[] buffer = ensureBufferSize(size, context);
final int end = offset + length;
for (int i = offset; i < end; i++) {
final int value = data[i];
final int high = (value >> BITS_PER_ENCODED_BYTE) & MASK_4BITS;
final int low = value & MASK_4BITS;
buffer[context.pos++] = encodeTable[high];
buffer[context.pos++] = encodeTable[low];
}
}
/**
* Returns whether or not the {@code octet} is in the Base16 alphabet.
*
* @param octet The value to test.
*
* @return {@code true} if the value is defined in the the Base16 alphabet {@code false} otherwise.
*/
@Override
public boolean isInAlphabet(final byte octet) {
return (octet & 0xff) < decodeTable.length && decodeTable[octet] != -1;
}
/**
* Validates whether decoding allows an entire final trailing character that cannot be
* used for a complete byte.
*
* @throws IllegalArgumentException if strict decoding is enabled
*/
private void validateTrailingCharacter() {
if (isStrictDecoding()) {
throw new IllegalArgumentException("Strict decoding: Last encoded character is a valid base 16 alphabet" +
"character but not a possible encoding. " +
"Decoding requires at least two characters to create one byte.");
}
}
}