/* | |
* Licensed to the Apache Software Foundation (ASF) under one | |
* or more contributor license agreements. See the NOTICE file | |
* distributed with this work for additional information | |
* regarding copyright ownership. The ASF licenses this file | |
* to you under the Apache License, Version 2.0 (the | |
* "License"); you may not use this file except in compliance | |
* with the License. You may obtain a copy of the License at | |
* | |
* http://www.apache.org/licenses/LICENSE-2.0 | |
* | |
* Unless required by applicable law or agreed to in writing, | |
* software distributed under the License is distributed on an | |
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | |
* KIND, either express or implied. See the License for the | |
* specific language governing permissions and limitations | |
* under the License. | |
*/ | |
package org.apache.axis2.transport.http.util; | |
import java.io.ByteArrayOutputStream; | |
import java.io.UnsupportedEncodingException; | |
import java.net.URISyntaxException; | |
public class URIEncoderDecoder { | |
static final String digits = "0123456789ABCDEF"; //$NON-NLS-1$ | |
static final String encoding = "UTF8"; //$NON-NLS-1$ | |
/** | |
* Validate a string by checking if it contains any characters other than: | |
* <p/> | |
* 1. letters ('a'..'z', 'A'..'Z') 2. numbers ('0'..'9') 3. characters in | |
* the legalset parameter 4. others (Unicode characters that are not in | |
* US-ASCII set, and are not ISO Control or are not ISO Space characters) | |
* <p/> | |
* called from URI.Helper.parseURI() to validate each component | |
* <p/> | |
* | |
* @param s java.lang.String the string to be validated | |
* @param legal java.lang.String the characters allowed in the String s | |
*/ | |
static void validate(String s, String legal) throws URISyntaxException { | |
for (int i = 0; i < s.length();) { | |
char ch = s.charAt(i); | |
if (ch == '%') { | |
do { | |
if (i + 2 >= s.length()) { | |
throw new URISyntaxException(s, "Incomplete % sequence"); | |
} | |
int d1 = Character.digit(s.charAt(i + 1), 16); | |
int d2 = Character.digit(s.charAt(i + 2), 16); | |
if (d1 == -1 || d2 == -1) { | |
throw new URISyntaxException(s, | |
"Invalid % sequence " + s.substring(i, i + 3) | |
, i); | |
} | |
i += 3; | |
} while (i < s.length() && s.charAt(i) == '%'); | |
continue; | |
} | |
if (!((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') | |
|| (ch >= '0' && ch <= '9') || legal.indexOf(ch) > -1 || (ch > 127 | |
&& !Character.isSpaceChar(ch) && !Character | |
.isISOControl(ch)))) { | |
throw new URISyntaxException(s, "Illegal character", i); | |
} | |
i++; | |
} | |
} | |
static void validateSimple(String s, String legal) | |
throws URISyntaxException { | |
for (int i = 0; i < s.length();) { | |
char ch = s.charAt(i); | |
if (!((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') | |
|| (ch >= '0' && ch <= '9') || legal.indexOf(ch) > -1)) { | |
throw new URISyntaxException(s, "Illegal character", i); //$NON-NLS-1$ | |
} | |
i++; | |
} | |
} | |
/** | |
* All characters except letters ('a'..'z', 'A'..'Z') and numbers ('0'..'9') | |
* and legal characters are converted into their hexidecimal value prepended | |
* by '%'. | |
* <p/> | |
* For example: '#' -> %23 | |
* <p/> | |
* Other characters, which are Unicode chars that are not US-ASCII, and are | |
* not ISO Control or are not ISO Space chars, are preserved. | |
* <p/> | |
* Called from URI.quoteComponent() (for multiple argument constructors) | |
* <p/> | |
* | |
* @param s java.lang.String the string to be converted | |
* @param legal java.lang.String the characters allowed to be preserved in the | |
* string s | |
* @return java.lang.String the converted string | |
*/ | |
public static String quoteIllegal(String s, String legal) | |
throws UnsupportedEncodingException { | |
StringBuffer buf = new StringBuffer(); | |
for (int i = 0; i < s.length(); i++) { | |
char ch = s.charAt(i); | |
if ((ch >= 'a' && ch <= 'z') | |
|| (ch >= 'A' && ch <= 'Z') | |
|| (ch >= '0' && ch <= '9') | |
|| legal.indexOf(ch) > -1) { | |
buf.append(ch); | |
} else { | |
byte[] bytes = new String(new char[]{ch}).getBytes(encoding); | |
for (int j = 0; j < bytes.length; j++) { | |
buf.append('%'); | |
buf.append(digits.charAt((bytes[j] & 0xf0) >> 4)); | |
buf.append(digits.charAt(bytes[j] & 0xf)); | |
} | |
} | |
} | |
return buf.toString(); | |
} | |
/** | |
* Other characters, which are Unicode chars that are not US-ASCII, and are | |
* not ISO Control or are not ISO Space chars are not preserved. They are | |
* converted into their hexidecimal value prepended by '%'. | |
* <p/> | |
* For example: Euro currency symbol -> "%E2%82%AC". | |
* <p/> | |
* Called from URI.toASCIIString() | |
* <p/> | |
* | |
* @param s java.lang.String the string to be converted | |
* @return java.lang.String the converted string | |
*/ | |
static String encodeOthers(String s) throws UnsupportedEncodingException { | |
StringBuffer buf = new StringBuffer(); | |
for (int i = 0; i < s.length(); i++) { | |
char ch = s.charAt(i); | |
if (ch <= 127) { | |
buf.append(ch); | |
} else { | |
byte[] bytes = new String(new char[]{ch}).getBytes(encoding); | |
for (int j = 0; j < bytes.length; j++) { | |
buf.append('%'); | |
buf.append(digits.charAt((bytes[j] & 0xf0) >> 4)); | |
buf.append(digits.charAt(bytes[j] & 0xf)); | |
} | |
} | |
} | |
return buf.toString(); | |
} | |
/** | |
* Decodes the string argument which is assumed to be encoded in the | |
* <code>x-www-form-urlencoded</code> MIME content type using the UTF-8 | |
* encoding scheme. | |
* <p/> | |
* '%' and two following hex digit characters are converted to the | |
* equivalent byte value. All other characters are passed through | |
* unmodified. | |
* <p/> | |
* <p/> | |
* e.g. "A%20B%20C %24%25" -> "A B C $%" | |
* <p/> | |
* Called from URI.getXYZ() methods | |
* <p/> | |
* | |
* @param s java.lang.String The encoded string. | |
* @return java.lang.String The decoded version. | |
*/ | |
public static String decode(String s) throws UnsupportedEncodingException { | |
StringBuffer result = new StringBuffer(); | |
ByteArrayOutputStream out = new ByteArrayOutputStream(); | |
for (int i = 0; i < s.length();) { | |
char c = s.charAt(i); | |
if (c == '%') { | |
out.reset(); | |
do { | |
if (i + 2 >= s.length()) { | |
throw new IllegalArgumentException("Incomplete % sequence at " + i); | |
} | |
int d1 = Character.digit(s.charAt(i + 1), 16); | |
int d2 = Character.digit(s.charAt(i + 2), 16); | |
if (d1 == -1 || d2 == -1) { | |
throw new IllegalArgumentException( | |
"Invalid % sequence" + s.substring(i, i + 3) + "at " + | |
String.valueOf(i)); | |
} | |
out.write((byte) ((d1 << 4) + d2)); | |
i += 3; | |
} while (i < s.length() && s.charAt(i) == '%'); | |
result.append(out.toString(encoding)); | |
continue; | |
} | |
result.append(c); | |
i++; | |
} | |
return result.toString(); | |
} | |
} |