modules/tomcat-lite/java/org/apache/tomcat/lite/io/UrlEncoding.java - tomcat80 - Git at Google

 /*
  *  Licensed to the Apache Software Foundation (ASF) under one or more
  *  contributor license agreements.  See the NOTICE file distributed with
  *  this work for additional information regarding copyright ownership.
  *  The ASF licenses this file to You under the Apache License, Version 2.0
  *  (the "License"); you may not use this file except in compliance with
  *  the License.  You may obtain a copy of the License at
  *
  *      http://www.apache.org/licenses/LICENSE-2.0
  *
  *  Unless required by applicable law or agreed to in writing, software
  *  distributed under the License is distributed on an "AS IS" BASIS,
  *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  *  See the License for the specific language governing permissions and
  *  limitations under the License.
  */

 package org.apache.tomcat.lite.io;

 import java.io.CharConversionException;
 import java.io.IOException;
 import java.nio.charset.CharsetEncoder;
 import java.util.BitSet;


 /**
  * Support for %xx URL encoding.
  *
  * @author Costin Manolache
  */
 public final class UrlEncoding {

     protected static final boolean ALLOW_ENCODED_SLASH =
         Boolean.valueOf(
                 System.getProperty(
                         "org.apache.tomcat.util.buf.UDecoder.ALLOW_ENCODED_SLASH",
                 "false")).booleanValue();

     public UrlEncoding() {
     }

     // Utilities for URL encoding.
     static BitSet SAFE_CHARS_URL = new BitSet(128);
     static BitSet SAFE_CHARS = new BitSet(128);
     BBuffer tmpBuffer = BBuffer.allocate(1024);
     CBuffer tmpCharBuffer = CBuffer.newInstance();

     public void urlEncode(CBuffer url, CBuffer encoded, IOWriter enc) {
         tmpBuffer.recycle();
         urlEncode(url, tmpBuffer, encoded, enc.getEncoder("UTF-8"),
                 SAFE_CHARS_URL, true, enc);
     }

     public void urlEncode(String url, CBuffer encoded, IOWriter enc) {
         tmpCharBuffer.recycle();
         tmpCharBuffer.append(url);
         urlEncode(tmpCharBuffer, encoded, enc);
     }

     /** Only works for UTF-8 or charsets preserving ascii.
      *
      * @param url
      * @param tmpBuffer
      * @param encoded
      * @param utf8Enc
      * @param safeChars
      */
     public void urlEncode(CBuffer url,
             BBuffer tmpBuffer,
             CBuffer encoded,
             CharsetEncoder utf8Enc,
             BitSet safeChars, boolean last, IOWriter enc) {
         // tomcat charset-encoded each character first. I don't think
         // this is needed.

         // TODO: space to +
         enc.encodeAll(url, tmpBuffer, utf8Enc, last);
         byte[] array = tmpBuffer.array();
         for (int i = tmpBuffer.position(); i < tmpBuffer.limit(); i++) {
             int c = array[i];
             if (safeChars.get(c)) {
                 encoded.append((char) c);
             } else {
                 encoded.append('%');
                 char ch = Character.forDigit((c >> 4) & 0xF, 16);
                 encoded.append(ch);
                 ch = Character.forDigit(c & 0xF, 16);
                 encoded.append(ch);
             }
         }
     }

     static {
         initSafeChars(SAFE_CHARS);
         initSafeChars(SAFE_CHARS_URL);
         SAFE_CHARS_URL.set('/');
     }

     private static void initSafeChars(BitSet safeChars) {
         int i;
         for (i = 'a'; i <= 'z'; i++) {
             safeChars.set(i);
         }
         for (i = 'A'; i <= 'Z'; i++) {
             safeChars.set(i);
         }
         for (i = '0'; i <= '9'; i++) {
             safeChars.set(i);
         }
         // safe
         safeChars.set('-');
         safeChars.set('_');
         safeChars.set('.');

         // Dangerous: someone may treat this as " "
         // RFC1738 does allow it, it's not reserved
         // safeChars.set('+');
         // extra
         safeChars.set('*');
         // tomcat has them - not sure if this is correct
         safeChars.set('$'); // ?
         safeChars.set('!'); // ?
         safeChars.set('\''); // ?
         safeChars.set('('); // ?
         safeChars.set(')'); // ?
         safeChars.set(','); // ?
     }

     public void urlDecode(BBuffer bb, CBuffer dest, boolean q,
             IOReader charDec) throws IOException {
         // Replace %xx
         tmpBuffer.append(bb);
         urlDecode(tmpBuffer, q);
         charDec.decodeAll(bb, dest);
     }


     public void urlDecode(BBuffer bb, CBuffer dest,
             IOReader charDec) throws IOException {
         // Replace %xx
         tmpBuffer.append(bb);
         urlDecode(tmpBuffer, true);
         charDec.decodeAll(bb, dest);
     }


     /**
      * URLDecode, will modify the source. This is only at byte level -
      * it needs conversion to chars using the right charset.
      *
      * @param query Converts '+' to ' ' and allow '/'
      */
     public void urlDecode(BBuffer mb, boolean query) throws IOException {
         int start = mb.getOffset();
         byte buff[] = mb.array();
         int end = mb.getEnd();

         int idx = BBuffer.indexOf(buff, start, end, '%');
         int idx2 = -1;
         if (query)
             idx2 = BBuffer.indexOf(buff, start, end, '+');
         if (idx < 0 && idx2 < 0) {
             return;
         }

         // idx will be the smallest positive inxes ( first % or + )
         if (idx2 >= 0 && idx2 < idx)
             idx = idx2;
         if (idx < 0)
             idx = idx2;

         //boolean noSlash = !query;

         for (int j = idx; j < end; j++, idx++) {
             if (buff[j] == '+' && query) {
                 buff[idx] = (byte) ' ';
             } else if (buff[j] != '%') {
                 buff[idx] = buff[j];
             } else {
                 // read next 2 digits
                 if (j + 2 >= end) {
                     throw new CharConversionException("EOF");
                 }
                 byte b1 = buff[j + 1];
                 byte b2 = buff[j + 2];
                 if (!isHexDigit(b1) || !isHexDigit(b2))
                     throw new CharConversionException("isHexDigit");

                 j += 2;
                 int res = x2c(b1, b2);
 //                if (noSlash && (res == '/')) {
 //                    throw new CharConversionException("noSlash " + mb);
 //                }
                 buff[idx] = (byte) res;
             }
         }

         mb.setEnd(idx);

         return;
     }


     private static boolean isHexDigit(int c) {
         return ((c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F'));
     }

     private static int x2c(byte b1, byte b2) {
         int digit = (b1 >= 'A') ? ((b1 & 0xDF) - 'A') + 10 : (b1 - '0');
         digit *= 16;
         digit += (b2 >= 'A') ? ((b2 & 0xDF) - 'A') + 10 : (b2 - '0');
         return digit;
     }

 }
	/*
	* Licensed to the Apache Software Foundation (ASF) under one or more
	* contributor license agreements. See the NOTICE file distributed with
	* this work for additional information regarding copyright ownership.
	* The ASF licenses this file to You under the Apache License, Version 2.0
	* (the "License"); you may not use this file except in compliance with
	* the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/

	package org.apache.tomcat.lite.io;

	import java.io.CharConversionException;
	import java.io.IOException;
	import java.nio.charset.CharsetEncoder;
	import java.util.BitSet;


	/**
	* Support for %xx URL encoding.
	*
	* @author Costin Manolache
	*/
	public final class UrlEncoding {

	protected static final boolean ALLOW_ENCODED_SLASH =
	Boolean.valueOf(
	System.getProperty(
	"org.apache.tomcat.util.buf.UDecoder.ALLOW_ENCODED_SLASH",
	"false")).booleanValue();

	public UrlEncoding() {
	}

	// Utilities for URL encoding.
	static BitSet SAFE_CHARS_URL = new BitSet(128);
	static BitSet SAFE_CHARS = new BitSet(128);
	BBuffer tmpBuffer = BBuffer.allocate(1024);
	CBuffer tmpCharBuffer = CBuffer.newInstance();

	public void urlEncode(CBuffer url, CBuffer encoded, IOWriter enc) {
	tmpBuffer.recycle();
	urlEncode(url, tmpBuffer, encoded, enc.getEncoder("UTF-8"),
	SAFE_CHARS_URL, true, enc);
	}

	public void urlEncode(String url, CBuffer encoded, IOWriter enc) {
	tmpCharBuffer.recycle();
	tmpCharBuffer.append(url);
	urlEncode(tmpCharBuffer, encoded, enc);
	}

	/** Only works for UTF-8 or charsets preserving ascii.
	*
	* @param url
	* @param tmpBuffer
	* @param encoded
	* @param utf8Enc
	* @param safeChars
	*/
	public void urlEncode(CBuffer url,
	BBuffer tmpBuffer,
	CBuffer encoded,
	CharsetEncoder utf8Enc,
	BitSet safeChars, boolean last, IOWriter enc) {
	// tomcat charset-encoded each character first. I don't think
	// this is needed.

	// TODO: space to +
	enc.encodeAll(url, tmpBuffer, utf8Enc, last);
	byte[] array = tmpBuffer.array();
	for (int i = tmpBuffer.position(); i < tmpBuffer.limit(); i++) {
	int c = array[i];
	if (safeChars.get(c)) {
	encoded.append((char) c);
	} else {
	encoded.append('%');
	char ch = Character.forDigit((c >> 4) & 0xF, 16);
	encoded.append(ch);
	ch = Character.forDigit(c & 0xF, 16);
	encoded.append(ch);
	}
	}
	}

	static {
	initSafeChars(SAFE_CHARS);
	initSafeChars(SAFE_CHARS_URL);
	SAFE_CHARS_URL.set('/');
	}

	private static void initSafeChars(BitSet safeChars) {
	int i;
	for (i = 'a'; i <= 'z'; i++) {
	safeChars.set(i);
	}
	for (i = 'A'; i <= 'Z'; i++) {
	safeChars.set(i);
	}
	for (i = '0'; i <= '9'; i++) {
	safeChars.set(i);
	}
	// safe
	safeChars.set('-');
	safeChars.set('_');
	safeChars.set('.');

	// Dangerous: someone may treat this as " "
	// RFC1738 does allow it, it's not reserved
	// safeChars.set('+');
	// extra
	safeChars.set('*');
	// tomcat has them - not sure if this is correct
	safeChars.set('$'); // ?
	safeChars.set('!'); // ?
	safeChars.set('\''); // ?
	safeChars.set('('); // ?
	safeChars.set(')'); // ?
	safeChars.set(','); // ?
	}

	public void urlDecode(BBuffer bb, CBuffer dest, boolean q,
	IOReader charDec) throws IOException {
	// Replace %xx
	tmpBuffer.append(bb);
	urlDecode(tmpBuffer, q);
	charDec.decodeAll(bb, dest);
	}


	public void urlDecode(BBuffer bb, CBuffer dest,
	IOReader charDec) throws IOException {
	// Replace %xx
	tmpBuffer.append(bb);
	urlDecode(tmpBuffer, true);
	charDec.decodeAll(bb, dest);
	}


	/**
	* URLDecode, will modify the source. This is only at byte level -
	* it needs conversion to chars using the right charset.
	*
	* @param query Converts '+' to ' ' and allow '/'
	*/
	public void urlDecode(BBuffer mb, boolean query) throws IOException {
	int start = mb.getOffset();
	byte buff[] = mb.array();
	int end = mb.getEnd();

	int idx = BBuffer.indexOf(buff, start, end, '%');
	int idx2 = -1;
	if (query)
	idx2 = BBuffer.indexOf(buff, start, end, '+');
	if (idx < 0 && idx2 < 0) {
	return;
	}

	// idx will be the smallest positive inxes ( first % or + )
	if (idx2 >= 0 && idx2 < idx)
	idx = idx2;
	if (idx < 0)
	idx = idx2;

	//boolean noSlash = !query;

	for (int j = idx; j < end; j++, idx++) {
	if (buff[j] == '+' && query) {
	buff[idx] = (byte) ' ';
	} else if (buff[j] != '%') {
	buff[idx] = buff[j];
	} else {
	// read next 2 digits
	if (j + 2 >= end) {
	throw new CharConversionException("EOF");
	}
	byte b1 = buff[j + 1];
	byte b2 = buff[j + 2];
	if (!isHexDigit(b1) \|\| !isHexDigit(b2))
	throw new CharConversionException("isHexDigit");

	j += 2;
	int res = x2c(b1, b2);
	// if (noSlash && (res == '/')) {
	// throw new CharConversionException("noSlash " + mb);
	// }
	buff[idx] = (byte) res;
	}
	}

	mb.setEnd(idx);

	return;
	}


	private static boolean isHexDigit(int c) {
	return ((c >= '0' && c <= '9') \|\| (c >= 'a' && c <= 'f') \|\| (c >= 'A' && c <= 'F'));
	}

	private static int x2c(byte b1, byte b2) {
	int digit = (b1 >= 'A') ? ((b1 & 0xDF) - 'A') + 10 : (b1 - '0');
	digit *= 16;
	digit += (b2 >= 'A') ? ((b2 & 0xDF) - 'A') + 10 : (b2 - '0');
	return digit;
	}

	}