src/main/java/org/apache/commons/fileupload2/util/mime/RFC2231Utility.java - commons-fileupload - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
  * The ASF licenses this file to You under the Apache License, Version 2.0
  * (the "License"); you may not use this file except in compliance with
  * the License.  You may obtain a copy of the License at
  *
  *      http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
 package org.apache.commons.fileupload2.util.mime;

 import java.io.ByteArrayOutputStream;
 import java.io.UnsupportedEncodingException;
 /**
  * Utility class to decode/encode character set on HTTP Header fields based on RFC 2231.
  * This implementation adheres to RFC 5987 in particular, which was defined for HTTP headers
  *
  * RFC 5987 builds on RFC 2231, but has lesser scope like <a href="https://tools.ietf.org/html/rfc5987#section-3.2">mandatory charset definition</a>
  * and <a href="https://tools.ietf.org/html/rfc5987#section-4">no parameter continuation</a>
  *
  * <p>
  * @see <a href="https://tools.ietf.org/html/rfc2231">RFC 2231</a>
  * @see <a href="https://tools.ietf.org/html/rfc5987">RFC 5987</a>
  */
 public final class RFC2231Utility {

     private static final char[] HEX_DIGITS = "0123456789ABCDEF".toCharArray();

     private static final byte[] HEX_DECODE = new byte[0x80];

     // create a ASCII decoded array of Hexadecimal values
     static {
         for (int i = 0; i < HEX_DIGITS.length; i++) {
             HEX_DECODE[HEX_DIGITS[i]] = (byte) i;
             HEX_DECODE[Character.toLowerCase(HEX_DIGITS[i])] = (byte) i;
         }
     }

     /**
      * Checks if Asterisk (*) at the end of parameter name to indicate,
      * if it has charset and language information to decode the value
      * @param paramName The parameter, which is being checked.
      * @return {@code true}, if encoded as per RFC 2231, {@code false} otherwise
      */
     public static boolean hasEncodedValue(final String paramName) {
         if (paramName != null) {
             return paramName.lastIndexOf("*") == (paramName.length() - 1);
         }
         return false;
     }

     /**
      * If {@code paramName} has Asterisk (*) at the end, it will be stripped off,
      * else the passed value will be returned
      * @param paramName The parameter, which is being inspected.
      * @return stripped {@code paramName} of Asterisk (*), if RFC2231 encoded
      */
     public static String stripDelimiter(final String paramName) {
         if (hasEncodedValue(paramName)) {
             final StringBuilder paramBuilder = new StringBuilder(paramName);
             paramBuilder.deleteCharAt(paramName.lastIndexOf("*"));
             return paramBuilder.toString();
         }
         return paramName;
     }

     /**
      * Decode a string of text obtained from a HTTP header as per RFC 2231
      *
      * <b>Eg 1.</b> {@code us-ascii'en-us'This%20is%20%2A%2A%2Afun%2A%2A%2A}
      * will be decoded to {@code This is ***fun***}
      *
      * <b>Eg 2.</b> {@code iso-8859-1'en'%A3%20rate}
      * will be decoded to {@code £ rate}.
      *
      * <b>Eg 3.</b> {@code UTF-8''%c2%a3%20and%20%e2%82%ac%20rates}
      * will be decoded to {@code £ and € rates}.
      *
      * @param encodedText - Text to be decoded has a format of {@code <charset>'<language>'<encoded_value>} and ASCII only
      * @return Decoded text based on charset encoding
      * @throws UnsupportedEncodingException The requested character set wasn't found.
      */
     public static String decodeText(final String encodedText) throws UnsupportedEncodingException {
         final int langDelimitStart = encodedText.indexOf('\'');
         if (langDelimitStart == -1) {
             // missing charset
             return encodedText;
         }
         final String mimeCharset = encodedText.substring(0, langDelimitStart);
         final int langDelimitEnd = encodedText.indexOf('\'', langDelimitStart + 1);
         if (langDelimitEnd == -1) {
             // missing language
             return encodedText;
         }
         final byte[] bytes = fromHex(encodedText.substring(langDelimitEnd + 1));
         return new String(bytes, getJavaCharset(mimeCharset));
     }

     /**
      * Convert {@code text} to their corresponding Hex value
      * @param text - ASCII text input
      * @return Byte array of characters decoded from ASCII table
      */
     private static byte[] fromHex(final String text) {
         final ByteArrayOutputStream out = new ByteArrayOutputStream(text.length());
         for (int i = 0; i < text.length();) {
             final char c = text.charAt(i++);
             if (c == '%') {
                 if (i > text.length() - 2) {
                     break; // unterminated sequence
                 }
                 final byte b1 = HEX_DECODE[text.charAt(i++) & 0x7f];
                 final byte b2 = HEX_DECODE[text.charAt(i++) & 0x7f];
                 out.write((b1 << 4) | b2);
             } else {
                 out.write((byte) c);
             }
         }
         return out.toByteArray();
     }

     private static String getJavaCharset(final String mimeCharset) {
         // good enough for standard values
         return mimeCharset;
     }
 }
	/*
	* Licensed to the Apache Software Foundation (ASF) under one or more
	* contributor license agreements. See the NOTICE file distributed with
	* this work for additional information regarding copyright ownership.
	* The ASF licenses this file to You under the Apache License, Version 2.0
	* (the "License"); you may not use this file except in compliance with
	* the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/
	package org.apache.commons.fileupload2.util.mime;

	import java.io.ByteArrayOutputStream;
	import java.io.UnsupportedEncodingException;
	/**
	* Utility class to decode/encode character set on HTTP Header fields based on RFC 2231.
	* This implementation adheres to RFC 5987 in particular, which was defined for HTTP headers
	*
	* RFC 5987 builds on RFC 2231, but has lesser scope like <a href="https://tools.ietf.org/html/rfc5987#section-3.2">mandatory charset definition</a>
	* and <a href="https://tools.ietf.org/html/rfc5987#section-4">no parameter continuation</a>
	*
	* <p>
	* @see <a href="https://tools.ietf.org/html/rfc2231">RFC 2231</a>
	* @see <a href="https://tools.ietf.org/html/rfc5987">RFC 5987</a>
	*/
	public final class RFC2231Utility {

	private static final char[] HEX_DIGITS = "0123456789ABCDEF".toCharArray();

	private static final byte[] HEX_DECODE = new byte[0x80];

	// create a ASCII decoded array of Hexadecimal values
	static {
	for (int i = 0; i < HEX_DIGITS.length; i++) {
	HEX_DECODE[HEX_DIGITS[i]] = (byte) i;
	HEX_DECODE[Character.toLowerCase(HEX_DIGITS[i])] = (byte) i;
	}
	}

	/**
	* Checks if Asterisk (*) at the end of parameter name to indicate,
	* if it has charset and language information to decode the value
	* @param paramName The parameter, which is being checked.
	* @return {@code true}, if encoded as per RFC 2231, {@code false} otherwise
	*/
	public static boolean hasEncodedValue(final String paramName) {
	if (paramName != null) {
	return paramName.lastIndexOf("*") == (paramName.length() - 1);
	}
	return false;
	}

	/**
	* If {@code paramName} has Asterisk (*) at the end, it will be stripped off,
	* else the passed value will be returned
	* @param paramName The parameter, which is being inspected.
	* @return stripped {@code paramName} of Asterisk (*), if RFC2231 encoded
	*/
	public static String stripDelimiter(final String paramName) {
	if (hasEncodedValue(paramName)) {
	final StringBuilder paramBuilder = new StringBuilder(paramName);
	paramBuilder.deleteCharAt(paramName.lastIndexOf("*"));
	return paramBuilder.toString();
	}
	return paramName;
	}

	/**
	* Decode a string of text obtained from a HTTP header as per RFC 2231
	*
	* <b>Eg 1.</b> {@code us-ascii'en-us'This%20is%20%2A%2A%2Afun%2A%2A%2A}
	* will be decoded to {@code This is *fun*}
	*
	* <b>Eg 2.</b> {@code iso-8859-1'en'%A3%20rate}
	* will be decoded to {@code £ rate}.
	*
	* <b>Eg 3.</b> {@code UTF-8''%c2%a3%20and%20%e2%82%ac%20rates}
	* will be decoded to {@code £ and € rates}.
	*
	* @param encodedText - Text to be decoded has a format of {@code <charset>'<language>'<encoded_value>} and ASCII only
	* @return Decoded text based on charset encoding
	* @throws UnsupportedEncodingException The requested character set wasn't found.
	*/
	public static String decodeText(final String encodedText) throws UnsupportedEncodingException {
	final int langDelimitStart = encodedText.indexOf('\'');
	if (langDelimitStart == -1) {
	// missing charset
	return encodedText;
	}
	final String mimeCharset = encodedText.substring(0, langDelimitStart);
	final int langDelimitEnd = encodedText.indexOf('\'', langDelimitStart + 1);
	if (langDelimitEnd == -1) {
	// missing language
	return encodedText;
	}
	final byte[] bytes = fromHex(encodedText.substring(langDelimitEnd + 1));
	return new String(bytes, getJavaCharset(mimeCharset));
	}

	/**
	* Convert {@code text} to their corresponding Hex value
	* @param text - ASCII text input
	* @return Byte array of characters decoded from ASCII table
	*/
	private static byte[] fromHex(final String text) {
	final ByteArrayOutputStream out = new ByteArrayOutputStream(text.length());
	for (int i = 0; i < text.length();) {
	final char c = text.charAt(i++);
	if (c == '%') {
	if (i > text.length() - 2) {
	break; // unterminated sequence
	}
	final byte b1 = HEX_DECODE[text.charAt(i++) & 0x7f];
	final byte b2 = HEX_DECODE[text.charAt(i++) & 0x7f];
	out.write((b1 << 4) \| b2);
	} else {
	out.write((byte) c);
	}
	}
	return out.toByteArray();
	}

	private static String getJavaCharset(final String mimeCharset) {
	// good enough for standard values
	return mimeCharset;
	}
	}