blob: 82335f1252262b57af272b2b27f77fca888237f7 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.fileupload2.util.mime;
import java.io.ByteArrayOutputStream;
import java.io.UnsupportedEncodingException;
/**
* Utility class to decode/encode character set on HTTP Header fields based on RFC 2231.
* This implementation adheres to RFC 5987 in particular, which was defined for HTTP headers
*
* RFC 5987 builds on RFC 2231, but has lesser scope like <a href="https://tools.ietf.org/html/rfc5987#section-3.2">mandatory charset definition</a>
* and <a href="https://tools.ietf.org/html/rfc5987#section-4">no parameter continuation</a>
*
* <p>
* @see <a href="https://tools.ietf.org/html/rfc2231">RFC 2231</a>
* @see <a href="https://tools.ietf.org/html/rfc5987">RFC 5987</a>
*/
public final class RFC2231Utility {
private static final char[] HEX_DIGITS = "0123456789ABCDEF".toCharArray();
private static final byte[] HEX_DECODE = new byte[0x80];
// create a ASCII decoded array of Hexadecimal values
static {
for (int i = 0; i < HEX_DIGITS.length; i++) {
HEX_DECODE[HEX_DIGITS[i]] = (byte) i;
HEX_DECODE[Character.toLowerCase(HEX_DIGITS[i])] = (byte) i;
}
}
/**
* Checks if Asterisk (*) at the end of parameter name to indicate,
* if it has charset and language information to decode the value
* @param paramName The parameter, which is being checked.
* @return {@code true}, if encoded as per RFC 2231, {@code false} otherwise
*/
public static boolean hasEncodedValue(final String paramName) {
if (paramName != null) {
return paramName.lastIndexOf("*") == (paramName.length() - 1);
}
return false;
}
/**
* If {@code paramName} has Asterisk (*) at the end, it will be stripped off,
* else the passed value will be returned
* @param paramName The parameter, which is being inspected.
* @return stripped {@code paramName} of Asterisk (*), if RFC2231 encoded
*/
public static String stripDelimiter(final String paramName) {
if (hasEncodedValue(paramName)) {
final StringBuilder paramBuilder = new StringBuilder(paramName);
paramBuilder.deleteCharAt(paramName.lastIndexOf("*"));
return paramBuilder.toString();
}
return paramName;
}
/**
* Decode a string of text obtained from a HTTP header as per RFC 2231
*
* <b>Eg 1.</b> {@code us-ascii'en-us'This%20is%20%2A%2A%2Afun%2A%2A%2A}
* will be decoded to {@code This is ***fun***}
*
* <b>Eg 2.</b> {@code iso-8859-1'en'%A3%20rate}
* will be decoded to {@code £ rate}.
*
* <b>Eg 3.</b> {@code UTF-8''%c2%a3%20and%20%e2%82%ac%20rates}
* will be decoded to {@code £ and € rates}.
*
* @param encodedText - Text to be decoded has a format of {@code <charset>'<language>'<encoded_value>} and ASCII only
* @return Decoded text based on charset encoding
* @throws UnsupportedEncodingException The requested character set wasn't found.
*/
public static String decodeText(final String encodedText) throws UnsupportedEncodingException {
final int langDelimitStart = encodedText.indexOf('\'');
if (langDelimitStart == -1) {
// missing charset
return encodedText;
}
final String mimeCharset = encodedText.substring(0, langDelimitStart);
final int langDelimitEnd = encodedText.indexOf('\'', langDelimitStart + 1);
if (langDelimitEnd == -1) {
// missing language
return encodedText;
}
final byte[] bytes = fromHex(encodedText.substring(langDelimitEnd + 1));
return new String(bytes, getJavaCharset(mimeCharset));
}
/**
* Convert {@code text} to their corresponding Hex value
* @param text - ASCII text input
* @return Byte array of characters decoded from ASCII table
*/
private static byte[] fromHex(final String text) {
final ByteArrayOutputStream out = new ByteArrayOutputStream(text.length());
for (int i = 0; i < text.length();) {
final char c = text.charAt(i++);
if (c == '%') {
if (i > text.length() - 2) {
break; // unterminated sequence
}
final byte b1 = HEX_DECODE[text.charAt(i++) & 0x7f];
final byte b2 = HEX_DECODE[text.charAt(i++) & 0x7f];
out.write((b1 << 4) | b2);
} else {
out.write((byte) c);
}
}
return out.toByteArray();
}
private static String getJavaCharset(final String mimeCharset) {
// good enough for standard values
return mimeCharset;
}
}