| /* |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, |
| * software distributed under the License is distributed on an |
| * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| * KIND, either express or implied. See the License for the |
| * specific language governing permissions and limitations |
| * under the License. |
| */ |
| package org.apache.sling.resourceresolver.impl.helper; |
| |
| import java.io.ByteArrayOutputStream; |
| import java.io.IOException; |
| import java.io.ObjectInputStream; |
| import java.io.ObjectOutputStream; |
| import java.io.Serializable; |
| import java.io.UnsupportedEncodingException; |
| import java.util.Arrays; |
| import java.util.BitSet; |
| import java.util.HashMap; |
| import java.util.Locale; |
| |
| import org.apache.sling.api.SlingException; |
| |
| /** |
| * The interface for the URI(Uniform Resource Identifiers) version of RFC 2396. |
| * This class has the purpose of supportting of parsing a URI reference to |
| * extend any specific protocols, the character encoding of the protocol to be |
| * transported and the charset of the document. |
| * |
| * A URI is always in an "escaped" form, since escaping or unescaping a |
| * completed URI might change its semantics. |
| * |
| * Implementers should be careful not to escape or unescape the same string more |
| * than once, since unescaping an already unescaped string might lead to |
| * misinterpreting a percent data character as another escaped character, or |
| * vice versa in the case of escaping an already escaped string. |
| * |
| * In order to avoid these problems, data types used as follows: |
| * |
| * <blockquote> |
| * |
| * <pre> |
| * URI character sequence: char |
| * octet sequence: byte |
| * original character sequence: String |
| * </pre> |
| * |
| * </blockquote> |
| * |
| * So, a URI is a sequence of characters as an array of a char type, which is |
| * not always represented as a sequence of octets as an array of byte. |
| * |
| * URI Syntactic Components |
| * |
| * <blockquote> |
| * |
| * <pre> |
| * - In general, written as follows: |
| * Absolute URI = <scheme>:<scheme-specific-part> |
| * Generic URI = <scheme>://<authority><path>?<query> |
| * - Syntax |
| * absoluteURI = scheme ":" ( hier_part | opaque_part ) |
| * hier_part = ( net_path | abs_path ) [ "?" query ] |
| * net_path = "//" authority [ abs_path ] |
| * abs_path = "/" path_segments |
| * </pre> |
| * |
| * </blockquote> |
| * |
| * The following examples illustrate URI that are in common use. |
| * |
| * <pre> |
| * ftp://ftp.is.co.za/rfc/rfc1808.txt |
| * -- ftp scheme for File Transfer Protocol services |
| * gopher://spinaltap.micro.umn.edu/00/Weather/California/Los%20Angeles |
| * -- gopher scheme for Gopher and Gopher+ Protocol services |
| * http://www.math.uio.no/faq/compression-faq/part1.html |
| * -- http scheme for Hypertext Transfer Protocol services |
| * mailto:mduerst@ifi.unizh.ch |
| * -- mailto scheme for electronic mail addresses |
| * news:comp.infosystems.www.servers.unix |
| * -- news scheme for USENET news groups and articles |
| * telnet://melvyl.ucop.edu/ |
| * -- telnet scheme for interactive services via the TELNET Protocol |
| * </pre> |
| * |
| * Please, notice that there are many modifications from URL(RFC 1738) and |
| * relative URL(RFC 1808). |
| * |
| * <b>The expressions for a URI</b> |
| * |
| * |
| * <pre> |
| * For escaped URI forms |
| * - URI(char[]) // constructor |
| * - char[] getRawXxx() // method |
| * - String getEscapedXxx() // method |
| * - String toString() // method |
| * |
| * For unescaped URI forms |
| * - URI(String) // constructor |
| * - String getXXX() // method |
| * </pre> |
| * |
| * This class is a slightly modified version of the URI class distributed with |
| * Http Client 3.1. The changes involve removing dependencies to other Http |
| * Client classes and the Commons Codec library. To this avail the following |
| * methods have been added to this class: |
| * <ul> |
| * <li>getBytes, getAsciiString, getString, getAsciiBytes has been copied from |
| * the Http Client 3.1 EncodingUtils class.</li> |
| * <li>encodeUrl and decodeUrl have been copied from the Commons Codec URLCodec |
| * class.</li> |
| * </ul> |
| * The signatures have been simplified and adapted to the use in this class. |
| * Also the exception thrown has been changed to be {@link URIException}. |
| */ |
| public class URI implements Cloneable, Comparable<URI>, Serializable { |
| |
| // ----------------------------------------------------------- Constructors |
| |
| /** Create an instance as an internal use */ |
| protected URI() { |
| } |
| |
| /** |
| * Construct a URI from a string with the given charset. The input string |
| * can be either in escaped or unescaped form. |
| * |
| * @param s URI character sequence |
| * @param escaped <tt>true</tt> if URI character sequence is in escaped |
| * form. <tt>false</tt> otherwise. |
| * @param charset the charset string to do escape encoding, if required |
| * @throws URIException If the URI cannot be created. |
| * @throws NullPointerException if input string is <code>null</code> |
| * @see #getProtocolCharset |
| * @since 3.0 |
| */ |
| public URI(String s, boolean escaped, String charset) throws URIException, |
| NullPointerException { |
| protocolCharset = charset; |
| parseUriReference(s, escaped); |
| } |
| |
| /** |
| * Construct a URI from a string with the given charset. The input string |
| * can be either in escaped or unescaped form. |
| * |
| * @param s URI character sequence |
| * @param escaped <tt>true</tt> if URI character sequence is in escaped |
| * form. <tt>false</tt> otherwise. |
| * @throws URIException If the URI cannot be created. |
| * @throws NullPointerException if input string is <code>null</code> |
| * @see #getProtocolCharset |
| * @since 3.0 |
| */ |
| public URI(String s, boolean escaped) throws URIException, |
| NullPointerException { |
| parseUriReference(s, escaped); |
| } |
| |
| /** |
| * Construct a general URI from the given components. |
| * |
| * <blockquote> |
| * |
| * <pre> |
| * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ] |
| * absoluteURI = scheme ":" ( hier_part | opaque_part ) |
| * opaque_part = uric_no_slash *uric |
| * </pre> |
| * |
| * </blockquote> |
| * |
| * It's for absolute URI = <scheme>:<scheme-specific-part># |
| * <fragment>. |
| * |
| * @param scheme the scheme string |
| * @param schemeSpecificPart scheme_specific_part |
| * @param fragment the fragment string |
| * @throws URIException If the URI cannot be created. |
| * @see #getDefaultProtocolCharset |
| */ |
| public URI(String scheme, String schemeSpecificPart, String fragment) |
| throws URIException { |
| |
| // validate and contruct the URI character sequence |
| if (scheme == null) { |
| throw new URIException(URIException.PARSING, "scheme required"); |
| } |
| char[] s = scheme.toLowerCase().toCharArray(); |
| if (validate(s, URI.scheme)) { |
| _scheme = s; // is_absoluteURI |
| } else { |
| throw new URIException(URIException.PARSING, "incorrect scheme"); |
| } |
| _opaque = encode(schemeSpecificPart, allowed_opaque_part, |
| getProtocolCharset()); |
| // Set flag |
| _is_opaque_part = true; |
| _fragment = fragment == null ? null : fragment.toCharArray(); |
| setURI(); |
| } |
| |
| /** |
| * Construct a general URI from the given components. |
| * |
| * <blockquote> |
| * |
| * <pre> |
| * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ] |
| * absoluteURI = scheme ":" ( hier_part | opaque_part ) |
| * relativeURI = ( net_path | abs_path | rel_path ) [ "?" query ] |
| * hier_part = ( net_path | abs_path ) [ "?" query ] |
| * </pre> |
| * |
| * </blockquote> |
| * |
| * It's for absolute URI = <scheme>:<path>?<query>#< |
| * fragment> and relative URI = <path>?<query>#<fragment |
| * >. |
| * |
| * @param scheme the scheme string |
| * @param authority the authority string |
| * @param path the path string |
| * @param query the query string |
| * @param fragment the fragment string |
| * @throws URIException If the new URI cannot be created. |
| * @see #getDefaultProtocolCharset |
| */ |
| public URI(String scheme, String authority, String path, String query, |
| String fragment) throws URIException { |
| |
| // validate and contruct the URI character sequence |
| StringBuilder buff = new StringBuilder(); |
| if (scheme != null) { |
| buff.append(scheme); |
| buff.append(':'); |
| } |
| if (authority != null) { |
| buff.append("//"); |
| buff.append(authority); |
| } |
| if (path != null) { // accept empty path |
| if ((scheme != null || authority != null) && !path.startsWith("/")) { |
| throw new URIException(URIException.PARSING, |
| "abs_path requested"); |
| } |
| buff.append(path); |
| } |
| if (query != null) { |
| buff.append('?'); |
| buff.append(query); |
| } |
| if (fragment != null) { |
| buff.append('#'); |
| buff.append(fragment); |
| } |
| parseUriReference(buff.toString(), false); |
| } |
| |
| /** |
| * Construct a general URI from the given components. |
| * |
| * @param scheme the scheme string |
| * @param userinfo the userinfo string |
| * @param host the host string |
| * @param port the port number |
| * @throws URIException If the new URI cannot be created. |
| * @see #getDefaultProtocolCharset |
| */ |
| public URI(String scheme, String userinfo, String host, int port) |
| throws URIException { |
| |
| this(scheme, userinfo, host, port, null, null, null); |
| } |
| |
| /** |
| * Construct a general URI from the given components. |
| * |
| * @param scheme the scheme string |
| * @param userinfo the userinfo string |
| * @param host the host string |
| * @param port the port number |
| * @param path the path string |
| * @throws URIException If the new URI cannot be created. |
| * @see #getDefaultProtocolCharset |
| */ |
| public URI(String scheme, String userinfo, String host, int port, |
| String path) throws URIException { |
| |
| this(scheme, userinfo, host, port, path, null, null); |
| } |
| |
| /** |
| * Construct a general URI from the given components. |
| * |
| * @param scheme the scheme string |
| * @param userinfo the userinfo string |
| * @param host the host string |
| * @param port the port number |
| * @param path the path string |
| * @param query the query string |
| * @throws URIException If the new URI cannot be created. |
| * @see #getDefaultProtocolCharset |
| */ |
| public URI(String scheme, String userinfo, String host, int port, |
| String path, String query) throws URIException { |
| |
| this(scheme, userinfo, host, port, path, query, null); |
| } |
| |
| /** |
| * Construct a general URI from the given components. |
| * |
| * @param scheme the scheme string |
| * @param userinfo the userinfo string |
| * @param host the host string |
| * @param port the port number |
| * @param path the path string |
| * @param query the query string |
| * @param fragment the fragment string |
| * @throws URIException If the new URI cannot be created. |
| * @see #getDefaultProtocolCharset |
| */ |
| public URI(String scheme, String userinfo, String host, int port, |
| String path, String query, String fragment) throws URIException { |
| |
| this(scheme, (host == null) ? null : ((userinfo != null) |
| ? userinfo + '@' |
| : "") |
| + host + ((port != -1) ? ":" + port : ""), path, query, fragment); |
| } |
| |
| /** |
| * Construct a general URI from the given components. |
| * |
| * @param scheme the scheme string |
| * @param host the host string |
| * @param path the path string |
| * @param fragment the fragment string |
| * @throws URIException If the new URI cannot be created. |
| * @see #getDefaultProtocolCharset |
| */ |
| public URI(String scheme, String host, String path, String fragment) |
| throws URIException { |
| |
| this(scheme, host, path, null, fragment); |
| } |
| |
| /** |
| * Construct a general URI with the given relative URI string. |
| * |
| * @param base the base URI |
| * @param relative the relative URI string |
| * @param escaped <tt>true</tt> if URI character sequence is in escaped |
| * form. <tt>false</tt> otherwise. |
| * @throws URIException If the new URI cannot be created. |
| * @since 3.0 |
| */ |
| public URI(URI base, String relative, boolean escaped) throws URIException { |
| this(base, new URI(relative, escaped)); |
| } |
| |
| /** |
| * Construct a general URI with the given relative URI. |
| * |
| * <blockquote> |
| * |
| * <pre> |
| * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ] |
| * relativeURI = ( net_path | abs_path | rel_path ) [ "?" query ] |
| * </pre> |
| * |
| * </blockquote> |
| * |
| * Resolving Relative References to Absolute Form. <strong>Examples of |
| * Resolving Relative URI References</strong> Within an object with a |
| * well-defined base URI of |
| * |
| * <blockquote> |
| * |
| * <pre> |
| * http://a/b/c/d;p?q |
| * </pre> |
| * |
| * </blockquote> |
| * |
| * the relative URI would be resolved as follows: Normal Examples |
| * |
| * <blockquote> |
| * |
| * <pre> |
| * g:h = g:h |
| * g = http://a/b/c/g |
| * ./g = http://a/b/c/g |
| * g/ = http://a/b/c/g/ |
| * /g = http://a/g |
| * //g = http://g |
| * ?y = http://a/b/c/?y |
| * g?y = http://a/b/c/g?y |
| * #s = (current document)#s |
| * g#s = http://a/b/c/g#s |
| * g?y#s = http://a/b/c/g?y#s |
| * ;x = http://a/b/c/;x |
| * g;x = http://a/b/c/g;x |
| * g;x?y#s = http://a/b/c/g;x?y#s |
| * . = http://a/b/c/ |
| * ./ = http://a/b/c/ |
| * .. = http://a/b/ |
| * ../ = http://a/b/ |
| * ../g = http://a/b/g |
| * ../.. = http://a/ |
| * ../../ = http://a/ |
| * ../../g = http://a/g |
| * </pre> |
| * |
| * </blockquote> |
| * |
| * Some URI schemes do not allow a hierarchical syntax matching the |
| * <hier_part> syntax, and thus cannot use relative references. |
| * |
| * @param base the base URI |
| * @param relative the relative URI |
| * @throws URIException If the new URI cannot be created. |
| */ |
| public URI(URI base, URI relative) throws URIException { |
| |
| if (base._scheme == null) { |
| throw new URIException(URIException.PARSING, "base URI required"); |
| } |
| if (base._scheme != null) { |
| this._scheme = base._scheme; |
| this._authority = base._authority; |
| this._is_net_path = base._is_net_path; |
| } |
| if (base._is_opaque_part || relative._is_opaque_part) { |
| this._scheme = base._scheme; |
| this._is_opaque_part = base._is_opaque_part |
| || relative._is_opaque_part; |
| this._opaque = relative._opaque; |
| this._fragment = relative._fragment; |
| this.setURI(); |
| return; |
| } |
| boolean schemesEqual = Arrays.equals(base._scheme, relative._scheme); |
| if (relative._scheme != null |
| && (!schemesEqual || relative._authority != null)) { |
| this._scheme = relative._scheme; |
| this._is_net_path = relative._is_net_path; |
| this._authority = relative._authority; |
| if (relative._is_server) { |
| this._is_server = relative._is_server; |
| this._userinfo = relative._userinfo; |
| this._host = relative._host; |
| this._port = relative._port; |
| } else if (relative._is_reg_name) { |
| this._is_reg_name = relative._is_reg_name; |
| } |
| this._is_abs_path = relative._is_abs_path; |
| this._is_rel_path = relative._is_rel_path; |
| this._path = relative._path; |
| } else if (base._authority != null && relative._scheme == null) { |
| this._is_net_path = base._is_net_path; |
| this._authority = base._authority; |
| if (base._is_server) { |
| this._is_server = base._is_server; |
| this._userinfo = base._userinfo; |
| this._host = base._host; |
| this._port = base._port; |
| } else if (base._is_reg_name) { |
| this._is_reg_name = base._is_reg_name; |
| } |
| } |
| if (relative._authority != null) { |
| this._is_net_path = relative._is_net_path; |
| this._authority = relative._authority; |
| if (relative._is_server) { |
| this._is_server = relative._is_server; |
| this._userinfo = relative._userinfo; |
| this._host = relative._host; |
| this._port = relative._port; |
| } else if (relative._is_reg_name) { |
| this._is_reg_name = relative._is_reg_name; |
| } |
| this._is_abs_path = relative._is_abs_path; |
| this._is_rel_path = relative._is_rel_path; |
| this._path = relative._path; |
| } |
| // resolve the path and query if necessary |
| if (relative._authority == null |
| && (relative._scheme == null || schemesEqual)) { |
| if ((relative._path == null || relative._path.length == 0) |
| && relative._query == null) { |
| // handle a reference to the current document, see RFC 2396 |
| // section 5.2 step 2 |
| this._path = base._path; |
| this._query = base._query; |
| } else { |
| this._path = resolvePath(base._path, relative._path); |
| } |
| } |
| // base._query removed |
| if (relative._query != null) { |
| this._query = relative._query; |
| } |
| // base._fragment removed |
| if (relative._fragment != null) { |
| this._fragment = relative._fragment; |
| } |
| this.setURI(); |
| // reparse the newly built URI, this will ensure that all flags are set |
| // correctly. |
| // TODO there must be a better way to do this |
| parseUriReference(new String(_uri), true); |
| } |
| |
| // --------------------------------------------------- Instance Variables |
| |
| /** Version ID for serialization */ |
| static final long serialVersionUID = 604752400577948726L; |
| |
| /** |
| * Cache the hash code for this URI. |
| */ |
| protected int hash = 0; |
| |
| /** |
| * This Uniform Resource Identifier (URI). The URI is always in an "escaped" |
| * form, since escaping or unescaping a completed URI might change its |
| * semantics. |
| */ |
| protected char[] _uri = null; |
| |
| /** |
| * The charset of the protocol used by this URI instance. |
| */ |
| protected String protocolCharset = null; |
| |
| /** |
| * The default charset of the protocol. RFC 2277, 2396 |
| */ |
| protected static String defaultProtocolCharset = "UTF-8"; |
| |
| /** |
| * The default charset of the document. RFC 2277, 2396 The platform's |
| * charset is used for the document by default. |
| */ |
| protected static String defaultDocumentCharset = null; |
| |
| protected static String defaultDocumentCharsetByLocale = null; |
| |
| protected static String defaultDocumentCharsetByPlatform = null; |
| // Static initializer for defaultDocumentCharset |
| static { |
| Locale locale = Locale.getDefault(); |
| // in order to support backward compatiblity |
| if (locale != null) { |
| defaultDocumentCharsetByLocale = LocaleToCharsetMap.getCharset(locale); |
| // set the default document charset |
| defaultDocumentCharset = defaultDocumentCharsetByLocale; |
| } |
| // in order to support platform encoding |
| try { |
| defaultDocumentCharsetByPlatform = System.getProperty("file.encoding"); |
| } catch (SecurityException ignore) { |
| } |
| if (defaultDocumentCharset == null) { |
| // set the default document charset |
| defaultDocumentCharset = defaultDocumentCharsetByPlatform; |
| } |
| } |
| |
| /** |
| * The scheme. |
| */ |
| protected char[] _scheme = null; |
| |
| /** |
| * The opaque. |
| */ |
| protected char[] _opaque = null; |
| |
| /** |
| * The authority. |
| */ |
| protected char[] _authority = null; |
| |
| /** |
| * The userinfo. |
| */ |
| protected char[] _userinfo = null; |
| |
| /** |
| * The host. |
| */ |
| protected char[] _host = null; |
| |
| /** |
| * The port. |
| */ |
| protected int _port = -1; |
| |
| /** |
| * The path. |
| */ |
| protected char[] _path = null; |
| |
| /** |
| * The query. |
| */ |
| protected char[] _query = null; |
| |
| /** |
| * The fragment. |
| */ |
| protected char[] _fragment = null; |
| |
| /** |
| * The root path. |
| */ |
| protected static final char[] rootPath = { '/' }; |
| |
| // ---------------------- Generous characters for each component validation |
| |
| /** |
| * The percent "%" character always has the reserved purpose of being the |
| * escape indicator, it must be escaped as "%25" in order to be used as data |
| * within a URI. |
| */ |
| protected static final BitSet percent = new BitSet(256); |
| // Static initializer for percent |
| static { |
| percent.set('%'); |
| } |
| |
| /** |
| * BitSet for digit. |
| * |
| * <blockquote> |
| * |
| * <pre> |
| * digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" |
| * </pre> |
| * |
| * </blockquote> |
| * |
| */ |
| protected static final BitSet digit = new BitSet(256); |
| // Static initializer for digit |
| static { |
| for (int i = '0'; i <= '9'; i++) { |
| digit.set(i); |
| } |
| } |
| |
| /** |
| * BitSet for alpha. |
| * |
| * <blockquote> |
| * |
| * <pre> |
| * alpha = lowalpha | upalpha |
| * </pre> |
| * |
| * </blockquote> |
| * |
| */ |
| protected static final BitSet alpha = new BitSet(256); |
| // Static initializer for alpha |
| static { |
| for (int i = 'a'; i <= 'z'; i++) { |
| alpha.set(i); |
| } |
| for (int i = 'A'; i <= 'Z'; i++) { |
| alpha.set(i); |
| } |
| } |
| |
| /** |
| * BitSet for alphanum (join of alpha & digit). |
| * |
| * <blockquote> |
| * |
| * <pre> |
| * alphanum = alpha | digit |
| * </pre> |
| * |
| * </blockquote> |
| * |
| */ |
| protected static final BitSet alphanum = new BitSet(256); |
| // Static initializer for alphanum |
| static { |
| alphanum.or(alpha); |
| alphanum.or(digit); |
| } |
| |
| /** |
| * BitSet for hex. |
| * |
| * <blockquote> |
| * |
| * <pre> |
| * hex = digit | "A" | "B" | "C" | "D" | "E" | "F" | "a" | "b" | "c" | "d" | "e" |
| * | "f" |
| * </pre> |
| * |
| * </blockquote> |
| * |
| */ |
| protected static final BitSet hex = new BitSet(256); |
| // Static initializer for hex |
| static { |
| hex.or(digit); |
| for (int i = 'a'; i <= 'f'; i++) { |
| hex.set(i); |
| } |
| for (int i = 'A'; i <= 'F'; i++) { |
| hex.set(i); |
| } |
| } |
| |
| /** |
| * BitSet for escaped. |
| * |
| * <blockquote> |
| * |
| * <pre> |
| * escaped = "%" hex hex |
| * </pre> |
| * |
| * </blockquote> |
| * |
| */ |
| protected static final BitSet escaped = new BitSet(256); |
| // Static initializer for escaped |
| static { |
| escaped.or(percent); |
| escaped.or(hex); |
| } |
| |
| /** |
| * BitSet for mark. |
| * |
| * <blockquote> |
| * |
| * <pre> |
| * mark = "-" | "_" | "." | "!" | "˜" | "*" | "'" | "(" | ")" |
| * </pre> |
| * |
| * </blockquote> |
| * |
| */ |
| protected static final BitSet mark = new BitSet(256); |
| // Static initializer for mark |
| static { |
| mark.set('-'); |
| mark.set('_'); |
| mark.set('.'); |
| mark.set('!'); |
| mark.set('~'); |
| mark.set('*'); |
| mark.set('\''); |
| mark.set('('); |
| mark.set(')'); |
| } |
| |
| /** |
| * Data characters that are allowed in a URI but do not have a reserved |
| * purpose are called unreserved. |
| * |
| * <blockquote> |
| * |
| * <pre> |
| * unreserved = alphanum | mark |
| * </pre> |
| * |
| * </blockquote> |
| * |
| */ |
| protected static final BitSet unreserved = new BitSet(256); |
| // Static initializer for unreserved |
| static { |
| unreserved.or(alphanum); |
| unreserved.or(mark); |
| } |
| |
| /** |
| * BitSet for reserved. |
| * |
| * <blockquote> |
| * |
| * <pre> |
| * reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" | "$" | "," |
| * </pre> |
| * |
| * </blockquote> |
| * |
| */ |
| protected static final BitSet reserved = new BitSet(256); |
| // Static initializer for reserved |
| static { |
| reserved.set(';'); |
| reserved.set('/'); |
| reserved.set('?'); |
| reserved.set(':'); |
| reserved.set('@'); |
| reserved.set('&'); |
| reserved.set('='); |
| reserved.set('+'); |
| reserved.set('$'); |
| reserved.set(','); |
| } |
| |
| /** |
| * BitSet for uric. |
| * |
| * <blockquote> |
| * |
| * <pre> |
| * uric = reserved | unreserved | escaped |
| * </pre> |
| * |
| * </blockquote> |
| * |
| */ |
| protected static final BitSet uric = new BitSet(256); |
| // Static initializer for uric |
| static { |
| uric.or(reserved); |
| uric.or(unreserved); |
| uric.or(escaped); |
| } |
| |
| /** |
| * BitSet for fragment (alias for uric). |
| * |
| * <blockquote> |
| * |
| * <pre> |
| * fragment = *uric |
| * </pre> |
| * |
| * </blockquote> |
| * |
| */ |
| protected static final BitSet fragment = uric; |
| |
| /** |
| * BitSet for query (alias for uric). |
| * |
| * <blockquote> |
| * |
| * <pre> |
| * query = *uric |
| * </pre> |
| * |
| * </blockquote> |
| * |
| */ |
| protected static final BitSet query = uric; |
| |
| /** |
| * BitSet for pchar. |
| * |
| * <blockquote> |
| * |
| * <pre> |
| * pchar = unreserved | escaped | ":" | "@" | "&" | "=" | "+" | "$" | "," |
| * </pre> |
| * |
| * </blockquote> |
| * |
| */ |
| protected static final BitSet pchar = new BitSet(256); |
| // Static initializer for pchar |
| static { |
| pchar.or(unreserved); |
| pchar.or(escaped); |
| pchar.set(':'); |
| pchar.set('@'); |
| pchar.set('&'); |
| pchar.set('='); |
| pchar.set('+'); |
| pchar.set('$'); |
| pchar.set(','); |
| } |
| |
| /** |
| * BitSet for param (alias for pchar). |
| * |
| * <blockquote> |
| * |
| * <pre> |
| * param = *pchar |
| * </pre> |
| * |
| * </blockquote> |
| * |
| */ |
| protected static final BitSet param = pchar; |
| |
| /** |
| * BitSet for segment. |
| * |
| * <blockquote> |
| * |
| * <pre> |
| * segment = *pchar *( ";" param ) |
| * </pre> |
| * |
| * </blockquote> |
| * |
| */ |
| protected static final BitSet segment = new BitSet(256); |
| // Static initializer for segment |
| static { |
| segment.or(pchar); |
| segment.set(';'); |
| segment.or(param); |
| } |
| |
| /** |
| * BitSet for path segments. |
| * |
| * <blockquote> |
| * |
| * <pre> |
| * path_segments = segment *( "/" segment ) |
| * </pre> |
| * |
| * </blockquote> |
| * |
| */ |
| protected static final BitSet path_segments = new BitSet(256); |
| // Static initializer for path_segments |
| static { |
| path_segments.set('/'); |
| path_segments.or(segment); |
| } |
| |
| /** |
| * URI absolute path. |
| * |
| * <blockquote> |
| * |
| * <pre> |
| * abs_path = "/" path_segments |
| * </pre> |
| * |
| * </blockquote> |
| * |
| */ |
| protected static final BitSet abs_path = new BitSet(256); |
| // Static initializer for abs_path |
| static { |
| abs_path.set('/'); |
| abs_path.or(path_segments); |
| } |
| |
| /** |
| * URI bitset for encoding typical non-slash characters. |
| * |
| * <blockquote> |
| * |
| * <pre> |
| * uric_no_slash = unreserved | escaped | ";" | "?" | ":" | "@" | "&" | "=" | "+" |
| * | "$" | "," |
| * </pre> |
| * |
| * </blockquote> |
| * |
| */ |
| protected static final BitSet uric_no_slash = new BitSet(256); |
| // Static initializer for uric_no_slash |
| static { |
| uric_no_slash.or(unreserved); |
| uric_no_slash.or(escaped); |
| uric_no_slash.set(';'); |
| uric_no_slash.set('?'); |
| uric_no_slash.set(';'); |
| uric_no_slash.set('@'); |
| uric_no_slash.set('&'); |
| uric_no_slash.set('='); |
| uric_no_slash.set('+'); |
| uric_no_slash.set('$'); |
| uric_no_slash.set(','); |
| } |
| |
| /** |
| * URI bitset that combines uric_no_slash and uric. |
| * |
| * <blockquote> |
| * |
| * <pre> |
| * opaque_part = uric_no_slash * uric |
| * </pre> |
| * |
| * </blockquote> |
| * |
| */ |
| protected static final BitSet opaque_part = new BitSet(256); |
| // Static initializer for opaque_part |
| static { |
| // it's generous. because first character must not include a slash |
| opaque_part.or(uric_no_slash); |
| opaque_part.or(uric); |
| } |
| |
| /** |
| * URI bitset that combines absolute path and opaque part. |
| * |
| * <blockquote> |
| * |
| * <pre> |
| * path = [ abs_path | opaque_part ] |
| * </pre> |
| * |
| * </blockquote> |
| * |
| */ |
| protected static final BitSet path = new BitSet(256); |
| // Static initializer for path |
| static { |
| path.or(abs_path); |
| path.or(opaque_part); |
| } |
| |
| /** |
| * Port, a logical alias for digit. |
| */ |
| protected static final BitSet port = digit; |
| |
| /** |
| * Bitset that combines digit and dot fo IPv$address. |
| * |
| * <blockquote> |
| * |
| * <pre> |
| * IPv4address = 1*digit "." 1*digit "." 1*digit "." 1*digit |
| * </pre> |
| * |
| * </blockquote> |
| * |
| */ |
| protected static final BitSet IPv4address = new BitSet(256); |
| // Static initializer for IPv4address |
| static { |
| IPv4address.or(digit); |
| IPv4address.set('.'); |
| } |
| |
| /** |
| * RFC 2373. |
| * |
| * <blockquote> |
| * |
| * <pre> |
| * IPv6address = hexpart [ ":" IPv4address ] |
| * </pre> |
| * |
| * </blockquote> |
| * |
| */ |
| protected static final BitSet IPv6address = new BitSet(256); |
| // Static initializer for IPv6address reference |
| static { |
| IPv6address.or(hex); // hexpart |
| IPv6address.set(':'); |
| IPv6address.or(IPv4address); |
| } |
| |
| /** |
| * RFC 2732, 2373. |
| * |
| * <blockquote> |
| * |
| * <pre> |
| * IPv6reference = "[" IPv6address "]" |
| * </pre> |
| * |
| * </blockquote> |
| * |
| */ |
| protected static final BitSet IPv6reference = new BitSet(256); |
| // Static initializer for IPv6reference |
| static { |
| IPv6reference.set('['); |
| IPv6reference.or(IPv6address); |
| IPv6reference.set(']'); |
| } |
| |
| /** |
| * BitSet for toplabel. |
| * |
| * <blockquote> |
| * |
| * <pre> |
| * toplabel = alpha | alpha *( alphanum | "-" ) alphanum |
| * </pre> |
| * |
| * </blockquote> |
| * |
| */ |
| protected static final BitSet toplabel = new BitSet(256); |
| // Static initializer for toplabel |
| static { |
| toplabel.or(alphanum); |
| toplabel.set('-'); |
| } |
| |
| /** |
| * BitSet for domainlabel. |
| * |
| * <blockquote> |
| * |
| * <pre> |
| * domainlabel = alphanum | alphanum *( alphanum | "-" ) alphanum |
| * </pre> |
| * |
| * </blockquote> |
| * |
| */ |
| protected static final BitSet domainlabel = toplabel; |
| |
| /** |
| * BitSet for hostname. |
| * |
| * <blockquote> |
| * |
| * <pre> |
| * hostname = *( domainlabel "." ) toplabel [ "." ] |
| * </pre> |
| * |
| * </blockquote> |
| * |
| */ |
| protected static final BitSet hostname = new BitSet(256); |
| // Static initializer for hostname |
| static { |
| hostname.or(toplabel); |
| // hostname.or(domainlabel); |
| hostname.set('.'); |
| } |
| |
| /** |
| * BitSet for host. |
| * |
| * <blockquote> |
| * |
| * <pre> |
| * host = hostname | IPv4address | IPv6reference |
| * </pre> |
| * |
| * </blockquote> |
| * |
| */ |
| protected static final BitSet host = new BitSet(256); |
| // Static initializer for host |
| static { |
| host.or(hostname); |
| // host.or(IPv4address); |
| host.or(IPv6reference); // IPv4address |
| } |
| |
| /** |
| * BitSet for hostport. |
| * |
| * <blockquote> |
| * |
| * <pre> |
| * hostport = host [ ":" port ] |
| * </pre> |
| * |
| * </blockquote> |
| * |
| */ |
| protected static final BitSet hostport = new BitSet(256); |
| // Static initializer for hostport |
| static { |
| hostport.or(host); |
| hostport.set(':'); |
| hostport.or(port); |
| } |
| |
| /** |
| * Bitset for userinfo. |
| * |
| * <blockquote> |
| * |
| * <pre> |
| * userinfo = *( unreserved | escaped | |
| * ";" | ":" | "&" | "=" | "+" | "$" | "," ) |
| * </pre> |
| * |
| * </blockquote> |
| * |
| */ |
| protected static final BitSet userinfo = new BitSet(256); |
| // Static initializer for userinfo |
| static { |
| userinfo.or(unreserved); |
| userinfo.or(escaped); |
| userinfo.set(';'); |
| userinfo.set(':'); |
| userinfo.set('&'); |
| userinfo.set('='); |
| userinfo.set('+'); |
| userinfo.set('$'); |
| userinfo.set(','); |
| } |
| |
| /** |
| * BitSet for within the userinfo component like user and password. |
| */ |
| public static final BitSet within_userinfo = new BitSet(256); |
| // Static initializer for within_userinfo |
| static { |
| within_userinfo.or(userinfo); |
| within_userinfo.clear(';'); // reserved within authority |
| within_userinfo.clear(':'); |
| within_userinfo.clear('@'); |
| within_userinfo.clear('?'); |
| within_userinfo.clear('/'); |
| } |
| |
| /** |
| * Bitset for server. |
| * |
| * <blockquote> |
| * |
| * <pre> |
| * server = [ [ userinfo "@" ] hostport ] |
| * </pre> |
| * |
| * </blockquote> |
| * |
| */ |
| protected static final BitSet server = new BitSet(256); |
| // Static initializer for server |
| static { |
| server.or(userinfo); |
| server.set('@'); |
| server.or(hostport); |
| } |
| |
| /** |
| * BitSet for reg_name. |
| * |
| * <blockquote> |
| * |
| * <pre> |
| * reg_name = 1 * (unreserved | escaped | "$" | "," | ";" | ":" | "@" | "&" | "=" | "+") |
| * </pre> |
| * |
| * </blockquote> |
| * |
| */ |
| protected static final BitSet reg_name = new BitSet(256); |
| // Static initializer for reg_name |
| static { |
| reg_name.or(unreserved); |
| reg_name.or(escaped); |
| reg_name.set('$'); |
| reg_name.set(','); |
| reg_name.set(';'); |
| reg_name.set(':'); |
| reg_name.set('@'); |
| reg_name.set('&'); |
| reg_name.set('='); |
| reg_name.set('+'); |
| } |
| |
| /** |
| * BitSet for authority. |
| * |
| * <blockquote> |
| * |
| * <pre> |
| * authority = server | reg_name |
| * </pre> |
| * |
| * </blockquote> |
| * |
| */ |
| protected static final BitSet authority = new BitSet(256); |
| // Static initializer for authority |
| static { |
| authority.or(server); |
| authority.or(reg_name); |
| } |
| |
| /** |
| * BitSet for scheme. |
| * |
| * <blockquote> |
| * |
| * <pre> |
| * scheme = alpha * (alpha | digit | "+" | "-" | ".") |
| * </pre> |
| * |
| * </blockquote> |
| * |
| */ |
| protected static final BitSet scheme = new BitSet(256); |
| // Static initializer for scheme |
| static { |
| scheme.or(alpha); |
| scheme.or(digit); |
| scheme.set('+'); |
| scheme.set('-'); |
| scheme.set('.'); |
| } |
| |
| /** |
| * BitSet for rel_segment. |
| * |
| * <blockquote> |
| * |
| * <pre> |
| * rel_segment = 1 * (unreserved | escaped | ";" | "@" | "&" | "=" | "+" | "$" | ",") |
| * </pre> |
| * |
| * </blockquote> |
| * |
| */ |
| protected static final BitSet rel_segment = new BitSet(256); |
| // Static initializer for rel_segment |
| static { |
| rel_segment.or(unreserved); |
| rel_segment.or(escaped); |
| rel_segment.set(';'); |
| rel_segment.set('@'); |
| rel_segment.set('&'); |
| rel_segment.set('='); |
| rel_segment.set('+'); |
| rel_segment.set('$'); |
| rel_segment.set(','); |
| } |
| |
| /** |
| * BitSet for rel_path. |
| * |
| * <blockquote> |
| * |
| * <pre> |
| * rel_path = rel_segment[abs_path] |
| * </pre> |
| * |
| * </blockquote> |
| * |
| */ |
| protected static final BitSet rel_path = new BitSet(256); |
| // Static initializer for rel_path |
| static { |
| rel_path.or(rel_segment); |
| rel_path.or(abs_path); |
| } |
| |
| /** |
| * BitSet for net_path. |
| * |
| * <blockquote> |
| * |
| * <pre> |
| * net_path = "//" authority [ abs_path ] |
| * </pre> |
| * |
| * </blockquote> |
| * |
| */ |
| protected static final BitSet net_path = new BitSet(256); |
| // Static initializer for net_path |
| static { |
| net_path.set('/'); |
| net_path.or(authority); |
| net_path.or(abs_path); |
| } |
| |
| /** |
| * BitSet for hier_part. |
| * |
| * <blockquote> |
| * |
| * <pre> |
| * hier_part = ( net_path | abs_path ) [ "?" query ] |
| * </pre> |
| * |
| * </blockquote> |
| * |
| */ |
| protected static final BitSet hier_part = new BitSet(256); |
| // Static initializer for hier_part |
| static { |
| hier_part.or(net_path); |
| hier_part.or(abs_path); |
| // hier_part.set('?'); aleady included |
| hier_part.or(query); |
| } |
| |
| /** |
| * BitSet for relativeURI. |
| * |
| * <blockquote> |
| * |
| * <pre> |
| * relativeURI = ( net_path | abs_path | rel_path ) [ "?" query ] |
| * </pre> |
| * |
| * </blockquote> |
| * |
| */ |
| protected static final BitSet relativeURI = new BitSet(256); |
| // Static initializer for relativeURI |
| static { |
| relativeURI.or(net_path); |
| relativeURI.or(abs_path); |
| relativeURI.or(rel_path); |
| // relativeURI.set('?'); aleady included |
| relativeURI.or(query); |
| } |
| |
| /** |
| * BitSet for absoluteURI. |
| * |
| * <blockquote> |
| * |
| * <pre> |
| * absoluteURI = scheme ":" ( hier_part | opaque_part ) |
| * </pre> |
| * |
| * </blockquote> |
| * |
| */ |
| protected static final BitSet absoluteURI = new BitSet(256); |
| // Static initializer for absoluteURI |
| static { |
| absoluteURI.or(scheme); |
| absoluteURI.set(':'); |
| absoluteURI.or(hier_part); |
| absoluteURI.or(opaque_part); |
| } |
| |
| /** |
| * BitSet for URI-reference. |
| * |
| * <blockquote> |
| * |
| * <pre> |
| * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ] |
| * </pre> |
| * |
| * </blockquote> |
| * |
| */ |
| protected static final BitSet URI_reference = new BitSet(256); |
| // Static initializer for URI_reference |
| static { |
| URI_reference.or(absoluteURI); |
| URI_reference.or(relativeURI); |
| URI_reference.set('#'); |
| URI_reference.or(fragment); |
| } |
| |
| // ---------------------------- Characters disallowed within the URI syntax |
| // Excluded US-ASCII Characters are like control, space, delims and unwise |
| |
| /** |
| * BitSet for control. |
| */ |
| public static final BitSet control = new BitSet(256); |
| // Static initializer for control |
| static { |
| for (int i = 0; i <= 0x1F; i++) { |
| control.set(i); |
| } |
| control.set(0x7F); |
| } |
| |
| /** |
| * BitSet for space. |
| */ |
| public static final BitSet space = new BitSet(256); |
| // Static initializer for space |
| static { |
| space.set(0x20); |
| } |
| |
| /** |
| * BitSet for delims. |
| */ |
| public static final BitSet delims = new BitSet(256); |
| // Static initializer for delims |
| static { |
| delims.set('<'); |
| delims.set('>'); |
| delims.set('#'); |
| delims.set('%'); |
| delims.set('"'); |
| } |
| |
| /** |
| * BitSet for unwise. |
| */ |
| public static final BitSet unwise = new BitSet(256); |
| // Static initializer for unwise |
| static { |
| unwise.set('{'); |
| unwise.set('}'); |
| unwise.set('|'); |
| unwise.set('\\'); |
| unwise.set('^'); |
| unwise.set('['); |
| unwise.set(']'); |
| unwise.set('`'); |
| } |
| |
| /** |
| * Disallowed rel_path before escaping. |
| */ |
| public static final BitSet disallowed_rel_path = new BitSet(256); |
| // Static initializer for disallowed_rel_path |
| static { |
| disallowed_rel_path.or(uric); |
| disallowed_rel_path.andNot(rel_path); |
| } |
| |
| /** |
| * Disallowed opaque_part before escaping. |
| */ |
| public static final BitSet disallowed_opaque_part = new BitSet(256); |
| // Static initializer for disallowed_opaque_part |
| static { |
| disallowed_opaque_part.or(uric); |
| disallowed_opaque_part.andNot(opaque_part); |
| } |
| |
| // ----------------------- Characters allowed within and for each component |
| |
| /** |
| * Those characters that are allowed for the authority component. |
| */ |
| public static final BitSet allowed_authority = new BitSet(256); |
| // Static initializer for allowed_authority |
| static { |
| allowed_authority.or(authority); |
| allowed_authority.clear('%'); |
| } |
| |
| /** |
| * Those characters that are allowed for the opaque_part. |
| */ |
| public static final BitSet allowed_opaque_part = new BitSet(256); |
| // Static initializer for allowed_opaque_part |
| static { |
| allowed_opaque_part.or(opaque_part); |
| allowed_opaque_part.clear('%'); |
| } |
| |
| /** |
| * Those characters that are allowed for the reg_name. |
| */ |
| public static final BitSet allowed_reg_name = new BitSet(256); |
| // Static initializer for allowed_reg_name |
| static { |
| allowed_reg_name.or(reg_name); |
| // allowed_reg_name.andNot(percent); |
| allowed_reg_name.clear('%'); |
| } |
| |
| /** |
| * Those characters that are allowed for the userinfo component. |
| */ |
| public static final BitSet allowed_userinfo = new BitSet(256); |
| // Static initializer for allowed_userinfo |
| static { |
| allowed_userinfo.or(userinfo); |
| // allowed_userinfo.andNot(percent); |
| allowed_userinfo.clear('%'); |
| } |
| |
| /** |
| * Those characters that are allowed for within the userinfo component. |
| */ |
| public static final BitSet allowed_within_userinfo = new BitSet(256); |
| // Static initializer for allowed_within_userinfo |
| static { |
| allowed_within_userinfo.or(within_userinfo); |
| allowed_within_userinfo.clear('%'); |
| } |
| |
| /** |
| * Those characters that are allowed for the IPv6reference component. The |
| * characters '[', ']' in IPv6reference should be excluded. |
| */ |
| public static final BitSet allowed_IPv6reference = new BitSet(256); |
| // Static initializer for allowed_IPv6reference |
| static { |
| allowed_IPv6reference.or(IPv6reference); |
| // allowed_IPv6reference.andNot(unwise); |
| allowed_IPv6reference.clear('['); |
| allowed_IPv6reference.clear(']'); |
| } |
| |
| /** |
| * Those characters that are allowed for the host component. The characters |
| * '[', ']' in IPv6reference should be excluded. |
| */ |
| public static final BitSet allowed_host = new BitSet(256); |
| // Static initializer for allowed_host |
| static { |
| allowed_host.or(hostname); |
| allowed_host.or(allowed_IPv6reference); |
| } |
| |
| /** |
| * Those characters that are allowed for the authority component. |
| */ |
| public static final BitSet allowed_within_authority = new BitSet(256); |
| // Static initializer for allowed_within_authority |
| static { |
| allowed_within_authority.or(server); |
| allowed_within_authority.or(reg_name); |
| allowed_within_authority.clear(';'); |
| allowed_within_authority.clear(':'); |
| allowed_within_authority.clear('@'); |
| allowed_within_authority.clear('?'); |
| allowed_within_authority.clear('/'); |
| } |
| |
| /** |
| * Those characters that are allowed for the abs_path. |
| */ |
| public static final BitSet allowed_abs_path = new BitSet(256); |
| // Static initializer for allowed_abs_path |
| static { |
| allowed_abs_path.or(abs_path); |
| // allowed_abs_path.set('/'); // aleady included |
| allowed_abs_path.andNot(percent); |
| allowed_abs_path.clear('+'); |
| } |
| |
| /** |
| * Those characters that are allowed for the rel_path. |
| */ |
| public static final BitSet allowed_rel_path = new BitSet(256); |
| // Static initializer for allowed_rel_path |
| static { |
| allowed_rel_path.or(rel_path); |
| allowed_rel_path.clear('%'); |
| allowed_rel_path.clear('+'); |
| } |
| |
| /** |
| * Those characters that are allowed within the path. |
| */ |
| public static final BitSet allowed_within_path = new BitSet(256); |
| // Static initializer for allowed_within_path |
| static { |
| allowed_within_path.or(abs_path); |
| allowed_within_path.clear('/'); |
| allowed_within_path.clear(';'); |
| allowed_within_path.clear('='); |
| allowed_within_path.clear('?'); |
| } |
| |
| /** |
| * Those characters that are allowed for the query component. |
| */ |
| public static final BitSet allowed_query = new BitSet(256); |
| // Static initializer for allowed_query |
| static { |
| allowed_query.or(uric); |
| allowed_query.clear('%'); |
| } |
| |
| /** |
| * Those characters that are allowed within the query component. |
| */ |
| public static final BitSet allowed_within_query = new BitSet(256); |
| // Static initializer for allowed_within_query |
| static { |
| allowed_within_query.or(allowed_query); |
| allowed_within_query.andNot(reserved); // excluded 'reserved' |
| } |
| |
| /** |
| * Those characters that are allowed for the fragment component. |
| */ |
| public static final BitSet allowed_fragment = new BitSet(256); |
| // Static initializer for allowed_fragment |
| static { |
| allowed_fragment.or(uric); |
| allowed_fragment.clear('%'); |
| } |
| |
| // ------------------------------------------- Flags for this URI-reference |
| |
| // TODO: Figure out what all these variables are for and provide javadoc |
| |
| // URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ] |
| // absoluteURI = scheme ":" ( hier_part | opaque_part ) |
| protected boolean _is_hier_part; |
| |
| protected boolean _is_opaque_part; |
| |
| // relativeURI = ( net_path | abs_path | rel_path ) [ "?" query ] |
| // hier_part = ( net_path | abs_path ) [ "?" query ] |
| protected boolean _is_net_path; |
| |
| protected boolean _is_abs_path; |
| |
| protected boolean _is_rel_path; |
| |
| // net_path = "//" authority [ abs_path ] |
| // authority = server | reg_name |
| protected boolean _is_reg_name; |
| |
| protected boolean _is_server; // = _has_server |
| |
| // server = [ [ userinfo "@" ] hostport ] |
| // host = hostname | IPv4address | IPv6reference |
| protected boolean _is_hostname; |
| |
| protected boolean _is_IPv4address; |
| |
| protected boolean _is_IPv6reference; |
| |
| // ------------------------------------------ Character and escape encoding |
| |
| /** |
| * Encodes URI string. This is a two mapping, one from original characters |
| * to octets, and subsequently a second from octets to URI characters: |
| * |
| * <blockquote> |
| * |
| * <pre> |
| * original character sequence->octet sequence->URI character sequence |
| * </pre> |
| * |
| * </blockquote> |
| * |
| * An escaped octet is encoded as a character triplet, consisting of the |
| * percent character "%" followed by the two hexadecimal digits representing |
| * the octet code. For example, "%20" is the escaped encoding for the |
| * US-ASCII space character. |
| * |
| * Conversion from the local filesystem character set to UTF-8 will normally |
| * involve a two step process. First convert the local character set to the |
| * UCS; then convert the UCS to UTF-8. The first step in the process can be |
| * performed by maintaining a mapping table that includes the local |
| * character set code and the corresponding UCS code. The next step is to |
| * convert the UCS character code to the UTF-8 encoding. |
| * |
| * Mapping between vendor codepages can be done in a very similar manner as |
| * described above. |
| * |
| * The only time escape encodings can allowedly be made is when a URI is |
| * being created from its component parts. The escape and validate methods |
| * are internally performed within this method. |
| * |
| * @param original the original character sequence |
| * @param allowed those characters that are allowed within a component |
| * @param charset the protocol charset |
| * @return URI character sequence |
| * @throws URIException null component or unsupported character encoding |
| */ |
| |
| protected static char[] encode(String original, BitSet allowed, |
| String charset) throws URIException { |
| if (original == null) { |
| throw new IllegalArgumentException( |
| "Original string may not be null"); |
| } |
| if (allowed == null) { |
| throw new IllegalArgumentException("Allowed bitset may not be null"); |
| } |
| byte[] rawdata = encodeUrl(allowed, getBytes(original, charset)); |
| return getAsciiString(rawdata).toCharArray(); |
| } |
| |
| /** |
| * Decodes URI encoded string. This is a two mapping, one from URI |
| * characters to octets, and subsequently a second from octets to original |
| * characters: |
| * |
| * <blockquote> |
| * |
| * <pre> |
| * URI character sequence->octet sequence->original character sequence |
| * </pre> |
| * |
| * </blockquote> |
| * |
| * A URI must be separated into its components before the escaped characters |
| * within those components can be allowedly decoded. |
| * |
| * Notice that there is a chance that URI characters that are non UTF-8 may |
| * be parsed as valid UTF-8. A recent non-scientific analysis found that EUC |
| * encoded Japanese words had a 2.7% false reading; SJIS had a 0.0005% false |
| * reading; other encoding such as ASCII or KOI-8 have a 0% false reading. |
| * |
| * The percent "%" character always has the reserved purpose of being the |
| * escape indicator, it must be escaped as "%25" in order to be used as data |
| * within a URI. |
| * |
| * The unescape method is internally performed within this method. |
| * |
| * @param component the URI character sequence |
| * @param charset the protocol charset |
| * @return original character sequence |
| * @throws URIException incomplete trailing escape pattern or unsupported |
| * character encoding |
| */ |
| protected static String decode(char[] component, String charset) |
| throws URIException { |
| if (component == null) { |
| throw new IllegalArgumentException( |
| "Component array of chars may not be null"); |
| } |
| return decode(new String(component), charset); |
| } |
| |
| /** |
| * Decodes URI encoded string. This is a two mapping, one from URI |
| * characters to octets, and subsequently a second from octets to original |
| * characters: |
| * |
| * <blockquote> |
| * |
| * <pre> |
| * URI character sequence->octet sequence->original character sequence |
| * </pre> |
| * |
| * </blockquote> |
| * |
| * A URI must be separated into its components before the escaped characters |
| * within those components can be allowedly decoded. |
| * |
| * Notice that there is a chance that URI characters that are non UTF-8 may |
| * be parsed as valid UTF-8. A recent non-scientific analysis found that EUC |
| * encoded Japanese words had a 2.7% false reading; SJIS had a 0.0005% false |
| * reading; other encoding such as ASCII or KOI-8 have a 0% false reading. |
| * |
| * The percent "%" character always has the reserved purpose of being the |
| * escape indicator, it must be escaped as "%25" in order to be used as data |
| * within a URI. |
| * |
| * The unescape method is internally performed within this method. |
| * |
| * @param component the URI character sequence |
| * @param charset the protocol charset |
| * @return original character sequence |
| * @throws URIException incomplete trailing escape pattern or unsupported |
| * character encoding |
| * @since 3.0 |
| */ |
| protected static String decode(String component, String charset) |
| throws URIException { |
| if (component == null) { |
| throw new IllegalArgumentException( |
| "Component array of chars may not be null"); |
| } |
| byte[] rawdata = decodeUrl(getAsciiBytes(component)); |
| return getString(rawdata, charset); |
| } |
| |
| /** |
| * Pre-validate the unescaped URI string within a specific component. |
| * |
| * @param component the component string within the component |
| * @param disallowed those characters disallowed within the component |
| * @return if true, it doesn't have the disallowed characters if false, the |
| * component is undefined or an incorrect one |
| */ |
| protected boolean prevalidate(String component, BitSet disallowed) { |
| // prevalidate the given component by disallowed characters |
| if (component == null) { |
| return false; // undefined |
| } |
| char[] target = component.toCharArray(); |
| for (int i = 0; i < target.length; i++) { |
| if (disallowed.get(target[i])) { |
| return false; |
| } |
| } |
| return true; |
| } |
| |
| /** |
| * Validate the URI characters within a specific component. The component |
| * must be performed after escape encoding. Or it doesn't include escaped |
| * characters. |
| * |
| * @param component the characters sequence within the component |
| * @param generous those characters that are allowed within a component |
| * @return if true, it's the correct URI character sequence |
| */ |
| protected boolean validate(char[] component, BitSet generous) { |
| // validate each component by generous characters |
| return validate(component, 0, -1, generous); |
| } |
| |
| /** |
| * Validate the URI characters within a specific component. The component |
| * must be performed after escape encoding. Or it doesn't include escaped |
| * characters. |
| * |
| * It's not that much strict, generous. The strict validation might be |
| * performed before being called this method. |
| * |
| * @param component the characters sequence within the component |
| * @param soffset the starting offset of the given component |
| * @param eoffset the ending offset of the given component if -1, it means |
| * the length of the component |
| * @param generous those characters that are allowed within a component |
| * @return if true, it's the correct URI character sequence |
| */ |
| protected boolean validate(char[] component, int soffset, int eoffset, |
| BitSet generous) { |
| // validate each component by generous characters |
| if (eoffset == -1) { |
| eoffset = component.length - 1; |
| } |
| for (int i = soffset; i <= eoffset; i++) { |
| if (!generous.get(component[i])) { |
| return false; |
| } |
| } |
| return true; |
| } |
| |
| /** |
| * In order to avoid any possilbity of conflict with non-ASCII characters, |
| * Parse a URI reference as a <code>String</code> with the character |
| * encoding of the local system or the document. |
| * |
| * The following line is the regular expression for breaking-down a URI |
| * reference into its components. |
| * |
| * <blockquote> |
| * |
| * <pre> |
| * ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))? |
| * 12 3 4 5 6 7 8 9 |
| * </pre> |
| * |
| * </blockquote> |
| * |
| * For example, matching the above expression to |
| * http://jakarta.apache.org/ietf/uri/#Related results in the following |
| * subexpression matches: |
| * |
| * <blockquote> |
| * |
| * <pre> |
| * $1 = http: |
| * scheme = $2 = http |
| * $3 = //jakarta.apache.org |
| * authority = $4 = jakarta.apache.org |
| * path = $5 = /ietf/uri/ |
| * $6 = <undefined> |
| * query = $7 = <undefined> |
| * $8 = #Related |
| * fragment = $9 = Related |
| * </pre> |
| * |
| * </blockquote> |
| * |
| * |
| * @param original the original character sequence |
| * @param escaped <code>true</code> if <code>original</code> is escaped |
| * @throws URIException If an error occurs. |
| */ |
| protected void parseUriReference(String original, boolean escaped) |
| throws URIException { |
| |
| // validate and contruct the URI character sequence |
| if (original == null) { |
| throw new URIException("URI-Reference required"); |
| } |
| |
| /* |
| * @ ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))? |
| */ |
| String tmp = original.trim(); |
| |
| /* |
| * The length of the string sequence of characters. It may not be equal |
| * to the length of the byte array. |
| */ |
| int length = tmp.length(); |
| |
| /* |
| * Remove the delimiters like angle brackets around an URI. |
| */ |
| boolean delim = false; |
| if (length > 0) { |
| char[] firstDelimiter = { tmp.charAt(0) }; |
| if (validate(firstDelimiter, delims)) { |
| if (length >= 2) { |
| char[] lastDelimiter = { tmp.charAt(length - 1) }; |
| if (validate(lastDelimiter, delims)) { |
| delim = true; |
| } |
| } |
| } |
| } |
| if (delim) { |
| tmp = tmp.substring(1, length - 1); |
| length = length - 2; |
| } |
| else { |
| tmp = original; |
| length = original.length(); |
| int idx = 0; |
| while (idx < length && tmp.charAt(idx) <= ' ') { |
| idx++; |
| } |
| if (idx > 0) { |
| if (idx < length) { |
| tmp = tmp.substring(idx); |
| length -= idx; |
| } |
| else { |
| tmp = ""; |
| length = 0; |
| } |
| } |
| } |
| |
| /* |
| * The starting index |
| */ |
| int from = 0; |
| |
| /* |
| * The test flag whether the URI is started from the path component. |
| */ |
| boolean isStartedFromPath = false; |
| int atColon = tmp.indexOf(':'); |
| int atSlash = tmp.indexOf('/'); |
| if ((atColon <= 0 && !tmp.startsWith("//")) |
| || (atSlash >= 0 && atSlash < atColon)) { |
| isStartedFromPath = true; |
| } |
| |
| /* |
| * <blockquote><pre> |
| * @@@@@@@@ ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))? |
| * </pre></blockquote> |
| */ |
| int at = indexFirstOf(tmp, isStartedFromPath ? "/?#" : ":/?#", from); |
| if (at == -1) { |
| at = 0; |
| } |
| |
| /* |
| * Parse the scheme. <blockquote><pre> scheme = $2 = http |
| * @ ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))? |
| * </pre></blockquote> |
| */ |
| if (at > 0 && at < length && tmp.charAt(at) == ':') { |
| char[] target = tmp.substring(0, at).toLowerCase().toCharArray(); |
| if (validate(target, scheme)) { |
| _scheme = target; |
| } else { |
| throw new URIException("incorrect scheme"); |
| } |
| from = ++at; |
| } |
| |
| /* |
| * Parse the authority component. <blockquote><pre> authority = $4 = |
| * jakarta.apache.org |
| * @@ ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))? |
| * </pre></blockquote> |
| */ |
| // Reset flags |
| _is_net_path = _is_abs_path = _is_rel_path = _is_hier_part = false; |
| if (0 <= at && at < length && tmp.charAt(at) == '/') { |
| // Set flag |
| _is_hier_part = true; |
| if (at + 2 < length && tmp.charAt(at + 1) == '/' |
| && !isStartedFromPath) { |
| // the temporary index to start the search from |
| int next = indexFirstOf(tmp, "/?#", at + 2); |
| if (next == -1) { |
| next = (tmp.substring(at + 2).length() == 0) |
| ? at + 2 |
| : tmp.length(); |
| } |
| parseAuthority(tmp.substring(at + 2, next), escaped); |
| from = at = next; |
| // Set flag |
| _is_net_path = true; |
| } |
| if (from == at) { |
| // Set flag |
| _is_abs_path = true; |
| } |
| } |
| |
| /* |
| * Parse the path component. <blockquote><pre> path = $5 = /ietf/uri/ |
| * @@@@@@ ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))? |
| * </pre></blockquote> |
| */ |
| if (from < length) { |
| // rel_path = rel_segment [ abs_path ] |
| int next = indexFirstOf(tmp, "?#", from); |
| if (next == -1) { |
| next = tmp.length(); |
| } |
| if (!_is_abs_path) { |
| if (!escaped |
| && prevalidate(tmp.substring(from, next), |
| disallowed_rel_path) |
| || escaped |
| && validate(tmp.substring(from, next).toCharArray(), |
| rel_path)) { |
| // Set flag |
| _is_rel_path = true; |
| } else if (!escaped |
| && prevalidate(tmp.substring(from, next), |
| disallowed_opaque_part) |
| || escaped |
| && validate(tmp.substring(from, next).toCharArray(), |
| opaque_part)) { |
| // Set flag |
| _is_opaque_part = true; |
| } else { |
| // the path component may be empty |
| _path = null; |
| } |
| } |
| String s = tmp.substring(from, next); |
| if (escaped) { |
| setRawPath(s.toCharArray()); |
| } else { |
| setPath(s); |
| } |
| at = next; |
| } |
| |
| // set the charset to do escape encoding |
| String charset = getProtocolCharset(); |
| |
| /* |
| * Parse the query component. <blockquote><pre> query = $7 = |
| * <undefined> |
| * @@@@@@@@@ ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))? |
| * </pre></blockquote> |
| */ |
| if (0 <= at && at + 1 < length && tmp.charAt(at) == '?') { |
| int next = tmp.indexOf('#', at + 1); |
| if (next == -1) { |
| next = tmp.length(); |
| } |
| if (escaped) { |
| _query = tmp.substring(at + 1, next).toCharArray(); |
| if (!validate(_query, uric)) { |
| throw new URIException("Invalid query"); |
| } |
| } else { |
| _query = encode(tmp.substring(at + 1, next), allowed_query, |
| charset); |
| } |
| at = next; |
| } |
| |
| /* |
| * Parse the fragment component. <blockquote><pre> fragment = $9 = |
| * Related |
| * @@@@@@@@ ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))? |
| * </pre></blockquote> |
| */ |
| if (0 <= at && at + 1 <= length && tmp.charAt(at) == '#') { |
| if (at + 1 == length) { // empty fragment |
| _fragment = "".toCharArray(); |
| } else { |
| _fragment = (escaped) |
| ? tmp.substring(at + 1).toCharArray() |
| : encode(tmp.substring(at + 1), allowed_fragment, |
| charset); |
| } |
| } |
| |
| // set this URI. |
| setURI(); |
| } |
| |
| /** |
| * Get the earlier index that to be searched for the first occurrance in one |
| * of any of the given string. |
| * |
| * @param s the string to be indexed |
| * @param delims the delimiters used to index |
| * @return the earlier index if there are delimiters |
| */ |
| protected int indexFirstOf(String s, String delims) { |
| return indexFirstOf(s, delims, -1); |
| } |
| |
| /** |
| * Get the earlier index that to be searched for the first occurrance in one |
| * of any of the given string. |
| * |
| * @param s the string to be indexed |
| * @param delims the delimiters used to index |
| * @param offset the from index |
| * @return the earlier index if there are delimiters |
| */ |
| protected int indexFirstOf(String s, String delims, int offset) { |
| if (s == null || s.length() == 0) { |
| return -1; |
| } |
| if (delims == null || delims.length() == 0) { |
| return -1; |
| } |
| // check boundaries |
| if (offset < 0) { |
| offset = 0; |
| } else if (offset > s.length()) { |
| return -1; |
| } |
| // s is never null |
| int min = s.length(); |
| char[] delim = delims.toCharArray(); |
| for (int i = 0; i < delim.length; i++) { |
| int at = s.indexOf(delim[i], offset); |
| if (at >= 0 && at < min) { |
| min = at; |
| } |
| } |
| return (min == s.length()) ? -1 : min; |
| } |
| |
| /** |
| * Get the earlier index that to be searched for the first occurrance in one |
| * of any of the given array. |
| * |
| * @param s the character array to be indexed |
| * @param delim the delimiter used to index |
| * @return the ealier index if there are a delimiter |
| */ |
| protected int indexFirstOf(char[] s, char delim) { |
| return indexFirstOf(s, delim, 0); |
| } |
| |
| /** |
| * Get the earlier index that to be searched for the first occurrance in one |
| * of any of the given array. |
| * |
| * @param s the character array to be indexed |
| * @param delim the delimiter used to index |
| * @param offset The offset. |
| * @return the ealier index if there is a delimiter |
| */ |
| protected int indexFirstOf(char[] s, char delim, int offset) { |
| if (s == null || s.length == 0) { |
| return -1; |
| } |
| // check boundaries |
| if (offset < 0) { |
| offset = 0; |
| } else if (offset > s.length) { |
| return -1; |
| } |
| for (int i = offset; i < s.length; i++) { |
| if (s[i] == delim) { |
| return i; |
| } |
| } |
| return -1; |
| } |
| |
| /** |
| * Parse the authority component. |
| * |
| * @param original the original character sequence of authority component |
| * @param escaped <code>true</code> if <code>original</code> is escaped |
| * @throws URIException If an error occurs. |
| */ |
| protected void parseAuthority(String original, boolean escaped) |
| throws URIException { |
| |
| // Reset flags |
| _is_reg_name = _is_server = _is_hostname = _is_IPv4address = _is_IPv6reference = false; |
| |
| // set the charset to do escape encoding |
| String charset = getProtocolCharset(); |
| |
| boolean hasPort = true; |
| int from = 0; |
| int next = original.indexOf('@'); |
| if (next != -1) { // neither -1 and 0 |
| // each protocol extented from URI supports the specific userinfo |
| _userinfo = (escaped) |
| ? original.substring(0, next).toCharArray() |
| : encode(original.substring(0, next), allowed_userinfo, |
| charset); |
| from = next + 1; |
| } |
| next = original.indexOf('[', from); |
| if (next >= from) { |
| next = original.indexOf(']', from); |
| if (next == -1) { |
| throw new URIException(URIException.PARSING, "IPv6reference"); |
| } |
| next++; |
| // In IPv6reference, '[', ']' should be excluded |
| _host = (escaped) |
| ? original.substring(from, next).toCharArray() |
| : encode(original.substring(from, next), |
| allowed_IPv6reference, charset); |
| // Set flag |
| _is_IPv6reference = true; |
| } else { // only for !_is_IPv6reference |
| next = original.indexOf(':', from); |
| if (next == -1) { |
| next = original.length(); |
| hasPort = false; |
| } |
| // REMINDME: it doesn't need the pre-validation |
| _host = original.substring(from, next).toCharArray(); |
| if (validate(_host, IPv4address)) { |
| // Set flag |
| _is_IPv4address = true; |
| } else if (validate(_host, hostname)) { |
| // Set flag |
| _is_hostname = true; |
| } else { |
| // Set flag |
| _is_reg_name = true; |
| } |
| } |
| if (_is_reg_name) { |
| // Reset flags for a server-based naming authority |
| _is_server = _is_hostname = _is_IPv4address = _is_IPv6reference = false; |
| // set a registry-based naming authority |
| if (escaped) { |
| _authority = original.toCharArray(); |
| if (!validate(_authority, reg_name)) { |
| throw new URIException("Invalid authority"); |
| } |
| } else { |
| _authority = encode(original, allowed_reg_name, charset); |
| } |
| } else { |
| if (original.length() - 1 > next && hasPort |
| && original.charAt(next) == ':') { // not empty |
| from = next + 1; |
| try { |
| _port = Integer.parseInt(original.substring(from)); |
| } catch (NumberFormatException error) { |
| throw new URIException(URIException.PARSING, |
| "invalid port number"); |
| } |
| } |
| // set a server-based naming authority |
| StringBuilder buf = new StringBuilder(); |
| if (_userinfo != null) { // has_userinfo |
| buf.append(_userinfo); |
| buf.append('@'); |
| } |
| if (_host != null) { |
| buf.append(_host); |
| if (_port != -1) { |
| buf.append(':'); |
| buf.append(_port); |
| } |
| } |
| _authority = buf.toString().toCharArray(); |
| // Set flag |
| _is_server = true; |
| } |
| } |
| |
| /** |
| * Once it's parsed successfully, set this URI. |
| * |
| * @see #getRawURI |
| */ |
| protected void setURI() { |
| // set _uri |
| StringBuilder buf = new StringBuilder(); |
| // ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))? |
| if (_scheme != null) { |
| buf.append(_scheme); |
| buf.append(':'); |
| } |
| if (_is_net_path) { |
| buf.append("//"); |
| if (_authority != null) { // has_authority |
| buf.append(_authority); |
| } |
| } |
| if (_opaque != null && _is_opaque_part) { |
| buf.append(_opaque); |
| } else if (_path != null) { |
| // _is_hier_part or _is_relativeURI |
| if (_path.length != 0) { |
| buf.append(_path); |
| } |
| } |
| if (_query != null) { // has_query |
| buf.append('?'); |
| buf.append(_query); |
| } |
| // ignore the fragment identifier |
| _uri = buf.toString().toCharArray(); |
| hash = 0; |
| } |
| |
| // ----------------------------------------------------------- Test methods |
| |
| /** |
| * Tell whether or not this URI is absolute. |
| * |
| * @return true iif this URI is absoluteURI |
| */ |
| public boolean isAbsoluteURI() { |
| return (_scheme != null); |
| } |
| |
| /** |
| * Tell whether or not this URI is relative. |
| * |
| * @return true iif this URI is relativeURI |
| */ |
| public boolean isRelativeURI() { |
| return (_scheme == null); |
| } |
| |
| /** |
| * Tell whether or not the absoluteURI of this URI is hier_part. |
| * |
| * @return true iif the absoluteURI is hier_part |
| */ |
| public boolean isHierPart() { |
| return _is_hier_part; |
| } |
| |
| /** |
| * Tell whether or not the absoluteURI of this URI is opaque_part. |
| * |
| * @return true iif the absoluteURI is opaque_part |
| */ |
| public boolean isOpaquePart() { |
| return _is_opaque_part; |
| } |
| |
| /** |
| * Tell whether or not the relativeURI or heir_part of this URI is net_path. |
| * It's the same function as the has_authority() method. |
| * |
| * @return true iif the relativeURI or heir_part is net_path |
| * @see #hasAuthority |
| */ |
| public boolean isNetPath() { |
| return _is_net_path || (_authority != null); |
| } |
| |
| /** |
| * Tell whether or not the relativeURI or hier_part of this URI is abs_path. |
| * |
| * @return true iif the relativeURI or hier_part is abs_path |
| */ |
| public boolean isAbsPath() { |
| return _is_abs_path; |
| } |
| |
| /** |
| * Tell whether or not the relativeURI of this URI is rel_path. |
| * |
| * @return true iif the relativeURI is rel_path |
| */ |
| public boolean isRelPath() { |
| return _is_rel_path; |
| } |
| |
| /** |
| * Tell whether or not this URI has authority. It's the same function as the |
| * is_net_path() method. |
| * |
| * @return true iif this URI has authority |
| * @see #isNetPath |
| */ |
| public boolean hasAuthority() { |
| return (_authority != null) || _is_net_path; |
| } |
| |
| /** |
| * Tell whether or not the authority component of this URI is reg_name. |
| * |
| * @return true iif the authority component is reg_name |
| */ |
| public boolean isRegName() { |
| return _is_reg_name; |
| } |
| |
| /** |
| * Tell whether or not the authority component of this URI is server. |
| * |
| * @return true iif the authority component is server |
| */ |
| public boolean isServer() { |
| return _is_server; |
| } |
| |
| /** |
| * Tell whether or not this URI has userinfo. |
| * |
| * @return true iif this URI has userinfo |
| */ |
| public boolean hasUserinfo() { |
| return (_userinfo != null); |
| } |
| |
| /** |
| * Tell whether or not the host part of this URI is hostname. |
| * |
| * @return true iif the host part is hostname |
| */ |
| public boolean isHostname() { |
| return _is_hostname; |
| } |
| |
| /** |
| * Tell whether or not the host part of this URI is IPv4address. |
| * |
| * @return true iif the host part is IPv4address |
| */ |
| public boolean isIPv4address() { |
| return _is_IPv4address; |
| } |
| |
| /** |
| * Tell whether or not the host part of this URI is IPv6reference. |
| * |
| * @return true iif the host part is IPv6reference |
| */ |
| public boolean isIPv6reference() { |
| return _is_IPv6reference; |
| } |
| |
| /** |
| * Tell whether or not this URI has query. |
| * |
| * @return true iif this URI has query |
| */ |
| public boolean hasQuery() { |
| return (_query != null); |
| } |
| |
| /** |
| * Tell whether or not this URI has fragment. |
| * |
| * @return true iif this URI has fragment |
| */ |
| public boolean hasFragment() { |
| return (_fragment != null); |
| } |
| |
| // ---------------------------------------------------------------- Charset |
| |
| /** |
| * Set the default charset of the protocol. |
| * |
| * The character set used to store files SHALL remain a local decision and |
| * MAY depend on the capability of local operating systems. Prior to the |
| * exchange of URIs they SHOULD be converted into a ISO/IEC 10646 format and |
| * UTF-8 encoded. This approach, while allowing international exchange of |
| * URIs, will still allow backward compatibility with older systems because |
| * the code set positions for ASCII characters are identical to the one byte |
| * sequence in UTF-8. |
| * |
| * An individual URI scheme may require a single charset, define a default |
| * charset, or provide a way to indicate the charset used. |
| * |
| * Always all the time, the setter method is always succeeded and throws |
| * <code>DefaultCharsetChanged</code> exception. So API programmer must |
| * follow the following way: <code><pre> |
| * import org.apache.util.URI$DefaultCharsetChanged; |
| * . |
| * . |
| * . |
| * try { |
| * URI.setDefaultProtocolCharset("UTF-8"); |
| * } catch (DefaultCharsetChanged cc) { |
| * // CASE 1: the exception could be ignored, when it is set by user |
| * if (cc.getReasonCode() == DefaultCharsetChanged.PROTOCOL_CHARSET) { |
| * // CASE 2: let user know the default protocol charset changed |
| * } else { |
| * // CASE 2: let user know the default document charset changed |
| * } |
| * } |
| * </pre></code> The API programmer is responsible to set the correct |
| * charset. And each application should remember its own charset to support. |
| * |
| * @param charset the default charset for each protocol |
| * @throws DefaultCharsetChanged default charset changed |
| */ |
| public static void setDefaultProtocolCharset(String charset) |
| throws DefaultCharsetChanged { |
| |
| defaultProtocolCharset = charset; |
| throw new DefaultCharsetChanged(DefaultCharsetChanged.PROTOCOL_CHARSET, |
| "the default protocol charset changed"); |
| } |
| |
| /** |
| * Get the default charset of the protocol. |
| * |
| * An individual URI scheme may require a single charset, define a default |
| * charset, or provide a way to indicate the charset used. |
| * |
| * To work globally either requires support of a number of character sets |
| * and to be able to convert between them, or the use of a single preferred |
| * character set. For support of global compatibility it is STRONGLY |
| * RECOMMENDED that clients and servers use UTF-8 encoding when exchanging |
| * URIs. |
| * |
| * @return the default charset string |
| */ |
| public static String getDefaultProtocolCharset() { |
| return defaultProtocolCharset; |
| } |
| |
| /** |
| * Get the protocol charset used by this current URI instance. It was set by |
| * the constructor for this instance. If it was not set by contructor, it |
| * will return the default protocol charset. |
| * |
| * @return the protocol charset string |
| * @see #getDefaultProtocolCharset |
| */ |
| public String getProtocolCharset() { |
| return (protocolCharset != null) |
| ? protocolCharset |
| : defaultProtocolCharset; |
| } |
| |
| /** |
| * Set the default charset of the document. |
| * |
| * Notice that it will be possible to contain mixed characters (e.g. |
| * ftp://host/KoreanNamespace/ChineseResource). To handle the Bi-directional |
| * display of these character sets, the protocol charset could be simply |
| * used again. Because it's not yet implemented that the insertion of BIDI |
| * control characters at different points during composition is extracted. |
| * |
| * Always all the time, the setter method is always succeeded and throws |
| * <code>DefaultCharsetChanged</code> exception. So API programmer must |
| * follow the following way: <code><pre> |
| * import org.apache.util.URI$DefaultCharsetChanged; |
| * . |
| * . |
| * . |
| * try { |
| * URI.setDefaultDocumentCharset("EUC-KR"); |
| * } catch (DefaultCharsetChanged cc) { |
| * // CASE 1: the exception could be ignored, when it is set by user |
| * if (cc.getReasonCode() == DefaultCharsetChanged.DOCUMENT_CHARSET) { |
| * // CASE 2: let user know the default document charset changed |
| * } else { |
| * // CASE 2: let user know the default protocol charset changed |
| * } |
| * } |
| * </pre></code> The API programmer is responsible to set the correct |
| * charset. And each application should remember its own charset to support. |
| * |
| * @param charset the default charset for the document |
| * @throws DefaultCharsetChanged default charset changed |
| */ |
| public static void setDefaultDocumentCharset(String charset) |
| throws DefaultCharsetChanged { |
| |
| defaultDocumentCharset = charset; |
| throw new DefaultCharsetChanged(DefaultCharsetChanged.DOCUMENT_CHARSET, |
| "the default document charset changed"); |
| } |
| |
| /** |
| * Get the recommended default charset of the document. |
| * |
| * @return the default charset string |
| */ |
| public static String getDefaultDocumentCharset() { |
| return defaultDocumentCharset; |
| } |
| |
| /** |
| * Get the default charset of the document by locale. |
| * |
| * @return the default charset string by locale |
| */ |
| public static String getDefaultDocumentCharsetByLocale() { |
| return defaultDocumentCharsetByLocale; |
| } |
| |
| /** |
| * Get the default charset of the document by platform. |
| * |
| * @return the default charset string by platform |
| */ |
| public static String getDefaultDocumentCharsetByPlatform() { |
| return defaultDocumentCharsetByPlatform; |
| } |
| |
| // ------------------------------------------------------------- The scheme |
| |
| /** |
| * Get the scheme. |
| * |
| * @return the scheme |
| */ |
| public char[] getRawScheme() { |
| return _scheme; |
| } |
| |
| /** |
| * Get the scheme. |
| * |
| * @return the scheme null if undefined scheme |
| */ |
| public String getScheme() { |
| return (_scheme == null) ? null : new String(_scheme); |
| } |
| |
| // ---------------------------------------------------------- The authority |
| |
| /** |
| * Set the authority. It can be one type of server, hostport, hostname, |
| * IPv4address, IPv6reference and reg_name. |
| * |
| * <blockquote> |
| * |
| * <pre> |
| * authority = server | reg_name |
| * </pre> |
| * |
| * </blockquote> |
| * |
| * |
| * @param escapedAuthority the raw escaped authority |
| * @throws URIException If {@link #parseAuthority(java.lang.String,boolean)} |
| * fails |
| * @throws NullPointerException null authority |
| */ |
| public void setRawAuthority(char[] escapedAuthority) throws URIException, |
| NullPointerException { |
| |
| parseAuthority(new String(escapedAuthority), true); |
| setURI(); |
| } |
| |
| /** |
| * Set the authority. It can be one type of server, hostport, hostname, |
| * IPv4address, IPv6reference and reg_name. Note that there is no |
| * setAuthority method by the escape encoding reason. |
| * |
| * @param escapedAuthority the escaped authority string |
| * @throws URIException If {@link #parseAuthority(java.lang.String,boolean)} |
| * fails |
| */ |
| public void setEscapedAuthority(String escapedAuthority) |
| throws URIException { |
| |
| parseAuthority(escapedAuthority, true); |
| setURI(); |
| } |
| |
| /** |
| * Get the raw-escaped authority. |
| * |
| * @return the raw-escaped authority |
| */ |
| public char[] getRawAuthority() { |
| return _authority; |
| } |
| |
| /** |
| * Get the escaped authority. |
| * |
| * @return the escaped authority |
| */ |
| public String getEscapedAuthority() { |
| return (_authority == null) ? null : new String(_authority); |
| } |
| |
| /** |
| * Get the authority. |
| * |
| * @return the authority |
| * @throws URIException If {@link #decode} fails |
| */ |
| public String getAuthority() throws URIException { |
| return (_authority == null) ? null : decode(_authority, |
| getProtocolCharset()); |
| } |
| |
| // ----------------------------------------------------------- The userinfo |
| |
| /** |
| * Get the raw-escaped userinfo. |
| * |
| * @return the raw-escaped userinfo |
| * @see #getAuthority |
| */ |
| public char[] getRawUserinfo() { |
| return _userinfo; |
| } |
| |
| /** |
| * Get the escaped userinfo. |
| * |
| * @return the escaped userinfo |
| * @see #getAuthority |
| */ |
| public String getEscapedUserinfo() { |
| return (_userinfo == null) ? null : new String(_userinfo); |
| } |
| |
| /** |
| * Get the userinfo. |
| * |
| * @return the userinfo |
| * @throws URIException If {@link #decode} fails |
| * @see #getAuthority |
| */ |
| public String getUserinfo() throws URIException { |
| return (_userinfo == null) ? null : decode(_userinfo, |
| getProtocolCharset()); |
| } |
| |
| // --------------------------------------------------------------- The host |
| |
| /** |
| * Get the host. |
| * |
| * <blockquote> |
| * |
| * <pre> |
| * host = hostname | IPv4address | IPv6reference |
| * </pre> |
| * |
| * </blockquote> |
| * |
| * |
| * @return the host |
| * @see #getAuthority |
| */ |
| public char[] getRawHost() { |
| return _host; |
| } |
| |
| /** |
| * Get the host. |
| * |
| * <blockquote> |
| * |
| * <pre> |
| * host = hostname | IPv4address | IPv6reference |
| * </pre> |
| * |
| * </blockquote> |
| * |
| * |
| * @return the host |
| * @throws URIException If {@link #decode} fails |
| * @see #getAuthority |
| */ |
| public String getHost() throws URIException { |
| if (_host != null) { |
| return decode(_host, getProtocolCharset()); |
| } |
| return null; |
| } |
| |
| // --------------------------------------------------------------- The port |
| |
| /** |
| * Get the port. In order to get the specfic default port, the specific |
| * protocol-supported class extended from the URI class should be used. It |
| * has the server-based naming authority. |
| * |
| * @return the port if -1, it has the default port for the scheme or the |
| * server-based naming authority is not supported in the specific |
| * URI. |
| */ |
| public int getPort() { |
| return _port; |
| } |
| |
| // --------------------------------------------------------------- The path |
| |
| /** |
| * Set the raw-escaped path. |
| * |
| * @param escapedPath the path character sequence |
| * @throws URIException encoding error or not proper for initial instance |
| * @see #encode |
| */ |
| public void setRawPath(char[] escapedPath) throws URIException { |
| if (escapedPath == null || escapedPath.length == 0) { |
| _path = _opaque = escapedPath; |
| setURI(); |
| return; |
| } |
| // remove the fragment identifier |
| escapedPath = removeFragmentIdentifier(escapedPath); |
| if (_is_net_path || _is_abs_path) { |
| if (escapedPath[0] != '/') { |
| throw new URIException(URIException.PARSING, |
| "not absolute path"); |
| } |
| if (!validate(escapedPath, abs_path)) { |
| throw new URIException(URIException.ESCAPING, |
| "escaped absolute path not valid"); |
| } |
| _path = escapedPath; |
| } else if (_is_rel_path) { |
| int at = indexFirstOf(escapedPath, '/'); |
| if (at == 0) { |
| throw new URIException(URIException.PARSING, "incorrect path"); |
| } |
| if (at > 0 && !validate(escapedPath, 0, at - 1, rel_segment) |
| && !validate(escapedPath, at, -1, abs_path) || at < 0 |
| && !validate(escapedPath, 0, -1, rel_segment)) { |
| |
| throw new URIException(URIException.ESCAPING, |
| "escaped relative path not valid"); |
| } |
| _path = escapedPath; |
| } else if (_is_opaque_part) { |
| if (!uric_no_slash.get(escapedPath[0]) |
| && !validate(escapedPath, 1, -1, uric)) { |
| throw new URIException(URIException.ESCAPING, |
| "escaped opaque part not valid"); |
| } |
| _opaque = escapedPath; |
| } else { |
| throw new URIException(URIException.PARSING, "incorrect path"); |
| } |
| setURI(); |
| } |
| |
| /** |
| * Set the escaped path. |
| * |
| * @param escapedPath the escaped path string |
| * @throws URIException encoding error or not proper for initial instance |
| * @see #encode |
| */ |
| public void setEscapedPath(String escapedPath) throws URIException { |
| if (escapedPath == null) { |
| _path = _opaque = null; |
| setURI(); |
| return; |
| } |
| setRawPath(escapedPath.toCharArray()); |
| } |
| |
| /** |
| * Set the path. |
| * |
| * @param path the path string |
| * @throws URIException set incorrectly or fragment only |
| * @see #encode |
| */ |
| public void setPath(String path) throws URIException { |
| |
| if (path == null || path.length() == 0) { |
| _path = _opaque = (path == null) ? null : path.toCharArray(); |
| setURI(); |
| return; |
| } |
| // set the charset to do escape encoding |
| String charset = getProtocolCharset(); |
| |
| if (_is_net_path || _is_abs_path) { |
| _path = encode(path, allowed_abs_path, charset); |
| } else if (_is_rel_path) { |
| StringBuilder buff = new StringBuilder(path.length()); |
| int at = path.indexOf('/'); |
| if (at == 0) { // never 0 |
| throw new URIException(URIException.PARSING, |
| "incorrect relative path"); |
| } |
| if (at > 0) { |
| buff.append(encode(path.substring(0, at), allowed_rel_path, |
| charset)); |
| buff.append(encode(path.substring(at), allowed_abs_path, |
| charset)); |
| } else { |
| buff.append(encode(path, allowed_rel_path, charset)); |
| } |
| _path = buff.toString().toCharArray(); |
| } else if (_is_opaque_part) { |
| StringBuilder buf = new StringBuilder(); |
| buf.insert(0, encode(path.substring(0, 1), uric_no_slash, charset)); |
| buf.insert(1, encode(path.substring(1), uric, charset)); |
| _opaque = buf.toString().toCharArray(); |
| } else { |
| throw new URIException(URIException.PARSING, "incorrect path"); |
| } |
| setURI(); |
| } |
| |
| /** |
| * Resolve the base and relative path. |
| * |
| * @param basePath a character array of the basePath |
| * @param relPath a character array of the relPath |
| * @return the resolved path |
| * @throws URIException no more higher path level to be resolved |
| */ |
| protected char[] resolvePath(char[] basePath, char[] relPath) |
| throws URIException { |
| |
| // REMINDME: paths are never null |
| String base = (basePath == null) ? "" : new String(basePath); |
| |
| // _path could be empty |
| if (relPath == null || relPath.length == 0) { |
| return normalize(basePath); |
| } else if (relPath[0] == '/') { |
| return normalize(relPath); |
| } else { |
| int at = base.lastIndexOf('/'); |
| if (at != -1) { |
| basePath = base.substring(0, at + 1).toCharArray(); |
| } |
| StringBuilder buff = new StringBuilder(base.length() + relPath.length); |
| buff.append((at != -1) ? base.substring(0, at + 1) : "/"); |
| buff.append(relPath); |
| return normalize(buff.toString().toCharArray()); |
| } |
| } |
| |
| /** |
| * Get the raw-escaped current hierarchy level in the given path. If the |
| * last namespace is a collection, the slash mark ('/') should be ended with |
| * at the last character of the path string. |
| * |
| * @param path the path |
| * @return the current hierarchy level |
| * @throws URIException no hierarchy level |
| */ |
| protected char[] getRawCurrentHierPath(char[] path) throws URIException { |
| |
| if (_is_opaque_part) { |
| throw new URIException(URIException.PARSING, "no hierarchy level"); |
| } |
| if (path == null) { |
| throw new URIException(URIException.PARSING, "empty path"); |
| } |
| String buff = new String(path); |
| int first = buff.indexOf('/'); |
| int last = buff.lastIndexOf('/'); |
| if (last == 0) { |
| return rootPath; |
| } else if (first != last && last != -1) { |
| return buff.substring(0, last).toCharArray(); |
| } |
| // FIXME: it could be a document on the server side |
| return path; |
| } |
| |
| /** |
| * Get the raw-escaped current hierarchy level. |
| * |
| * @return the raw-escaped current hierarchy level |
| * @throws URIException If {@link #getRawCurrentHierPath(char[])} fails. |
| */ |
| public char[] getRawCurrentHierPath() throws URIException { |
| return (_path == null) ? null : getRawCurrentHierPath(_path); |
| } |
| |
| /** |
| * Get the escaped current hierarchy level. |
| * |
| * @return the escaped current hierarchy level |
| * @throws URIException If {@link #getRawCurrentHierPath(char[])} fails. |
| */ |
| public String getEscapedCurrentHierPath() throws URIException { |
| char[] path = getRawCurrentHierPath(); |
| return (path == null) ? null : new String(path); |
| } |
| |
| /** |
| * Get the current hierarchy level. |
| * |
| * @return the current hierarchy level |
| * @throws URIException If {@link #getRawCurrentHierPath(char[])} fails. |
| * @see #decode |
| */ |
| public String getCurrentHierPath() throws URIException { |
| char[] path = getRawCurrentHierPath(); |
| return (path == null) ? null : decode(path, getProtocolCharset()); |
| } |
| |
| /** |
| * Get the level above the this hierarchy level. |
| * |
| * @return the raw above hierarchy level |
| * @throws URIException If {@link #getRawCurrentHierPath(char[])} fails. |
| */ |
| public char[] getRawAboveHierPath() throws URIException { |
| char[] path = getRawCurrentHierPath(); |
| return (path == null) ? null : getRawCurrentHierPath(path); |
| } |
| |
| /** |
| * Get the level above the this hierarchy level. |
| * |
| * @return the raw above hierarchy level |
| * @throws URIException If {@link #getRawCurrentHierPath(char[])} fails. |
| */ |
| public String getEscapedAboveHierPath() throws URIException { |
| char[] path = getRawAboveHierPath(); |
| return (path == null) ? null : new String(path); |
| } |
| |
| /** |
| * Get the level above the this hierarchy level. |
| * |
| * @return the above hierarchy level |
| * @throws URIException If {@link #getRawCurrentHierPath(char[])} fails. |
| * @see #decode |
| */ |
| public String getAboveHierPath() throws URIException { |
| char[] path = getRawAboveHierPath(); |
| return (path == null) ? null : decode(path, getProtocolCharset()); |
| } |
| |
| /** |
| * Get the raw-escaped path. |
| * |
| * <blockquote> |
| * |
| * <pre> |
| * path = [ abs_path | opaque_part ] |
| * </pre> |
| * |
| * </blockquote> |
| * |
| * |
| * @return the raw-escaped path |
| */ |
| public char[] getRawPath() { |
| return _is_opaque_part ? _opaque : _path; |
| } |
| |
| /** |
| * Get the escaped path. |
| * |
| * <blockquote> |
| * |
| * <pre> |
| * path = [ abs_path | opaque_part ] |
| * abs_path = "/" path_segments |
| * opaque_part = uric_no_slash *uric |
| * </pre> |
| * |
| * </blockquote> |
| * |
| * |
| * @return the escaped path string |
| */ |
| public String getEscapedPath() { |
| char[] path = getRawPath(); |
| return (path == null) ? null : new String(path); |
| } |
| |
| /** |
| * Get the path. |
| * |
| * <blockquote> |
| * |
| * <pre> |
| * path = [ abs_path | opaque_part ] |
| * </pre> |
| * |
| * </blockquote> |
| * |
| * |
| * @return the path string |
| * @throws URIException If {@link #decode} fails. |
| * @see #decode |
| */ |
| public String getPath() throws URIException { |
| char[] path = getRawPath(); |
| return (path == null) ? null : decode(path, getProtocolCharset()); |
| } |
| |
| /** |
| * Get the raw-escaped basename of the path. |
| * |
| * @return the raw-escaped basename |
| */ |
| public char[] getRawName() { |
| if (_path == null) { |
| return null; |
| } |
| |
| int at = 0; |
| for (int i = _path.length - 1; i >= 0; i--) { |
| if (_path[i] == '/') { |
| at = i + 1; |
| break; |
| } |
| } |
| int len = _path.length - at; |
| char[] basename = new char[len]; |
| System.arraycopy(_path, at, basename, 0, len); |
| return basename; |
| } |
| |
| /** |
| * Get the escaped basename of the path. |
| * |
| * @return the escaped basename string |
| */ |
| public String getEscapedName() { |
| char[] basename = getRawName(); |
| return (basename == null) ? null : new String(basename); |
| } |
| |
| /** |
| * Get the basename of the path. |
| * |
| * @return the basename string |
| * @throws URIException incomplete trailing escape pattern or unsupported |
| * character encoding |
| * @see #decode |
| */ |
| public String getName() throws URIException { |
| char[] basename = getRawName(); |
| return (basename == null) ? null : decode(getRawName(), |
| getProtocolCharset()); |
| } |
| |
| // ----------------------------------------------------- The path and query |
| |
| /** |
| * Get the raw-escaped path and query. |
| * |
| * @return the raw-escaped path and query |
| */ |
| public char[] getRawPathQuery() { |
| |
| if (_path == null && _query == null) { |
| return null; |
| } |
| StringBuilder buff = new StringBuilder(); |
| if (_path != null) { |
| buff.append(_path); |
| } |
| if (_query != null) { |
| buff.append('?'); |
| buff.append(_query); |
| } |
| return buff.toString().toCharArray(); |
| } |
| |
| /** |
| * Get the escaped query. |
| * |
| * @return the escaped path and query string |
| */ |
| public String getEscapedPathQuery() { |
| char[] rawPathQuery = getRawPathQuery(); |
| return (rawPathQuery == null) ? null : new String(rawPathQuery); |
| } |
| |
| /** |
| * Get the path and query. |
| * |
| * @return the path and query string. |
| * @throws URIException incomplete trailing escape pattern or unsupported |
| * character encoding |
| * @see #decode |
| */ |
| public String getPathQuery() throws URIException { |
| char[] rawPathQuery = getRawPathQuery(); |
| return (rawPathQuery == null) ? null : decode(rawPathQuery, |
| getProtocolCharset()); |
| } |
| |
| // -------------------------------------------------------------- The query |
| |
| /** |
| * Set the raw-escaped query. |
| * |
| * @param escapedQuery the raw-escaped query |
| * @throws URIException escaped query not valid |
| */ |
| public void setRawQuery(char[] escapedQuery) throws URIException { |
| if (escapedQuery == null || escapedQuery.length == 0) { |
| _query = escapedQuery; |
| setURI(); |
| return; |
| } |
| // remove the fragment identifier |
| escapedQuery = removeFragmentIdentifier(escapedQuery); |
| if (!validate(escapedQuery, query)) { |
| throw new URIException(URIException.ESCAPING, |
| "escaped query not valid"); |
| } |
| _query = escapedQuery; |
| setURI(); |
| } |
| |
| /** |
| * Set the escaped query string. |
| * |
| * @param escapedQuery the escaped query string |
| * @throws URIException escaped query not valid |
| */ |
| public void setEscapedQuery(String escapedQuery) throws URIException { |
| if (escapedQuery == null) { |
| _query = null; |
| setURI(); |
| return; |
| } |
| setRawQuery(escapedQuery.toCharArray()); |
| } |
| |
| /** |
| * Set the query. |
| * |
| * When a query string is not misunderstood the reserved special characters |
| * ("&", "=", "+", ",", and "$") within a query component, it is |
| * recommended to use in encoding the whole query with this method. |
| * |
| * The additional APIs for the special purpose using by the reserved special |
| * characters used in each protocol are implemented in each protocol classes |
| * inherited from <code>URI</code>. So refer to the same-named APIs |
| * implemented in each specific protocol instance. |
| * |
| * @param query the query string. |
| * @throws URIException incomplete trailing escape pattern or unsupported |
| * character encoding |
| * @see #encode |
| */ |
| public void setQuery(String query) throws URIException { |
| if (query == null || query.length() == 0) { |
| _query = (query == null) ? null : query.toCharArray(); |
| setURI(); |
| return; |
| } |
| setRawQuery(encode(query, allowed_query, getProtocolCharset())); |
| } |
| |
| /** |
| * Get the raw-escaped query. |
| * |
| * @return the raw-escaped query |
| */ |
| public char[] getRawQuery() { |
| return _query; |
| } |
| |
| /** |
| * Get the escaped query. |
| * |
| * @return the escaped query string |
| */ |
| public String getEscapedQuery() { |
| return (_query == null) ? null : new String(_query); |
| } |
| |
| /** |
| * Get the query. |
| * |
| * @return the query string. |
| * @throws URIException incomplete trailing escape pattern or unsupported |
| * character encoding |
| * @see #decode |
| */ |
| public String getQuery() throws URIException { |
| return (_query == null) ? null : decode(_query, getProtocolCharset()); |
| } |
| |
| // ----------------------------------------------------------- The fragment |
| |
| /** |
| * Set the raw-escaped fragment. |
| * |
| * @param escapedFragment the raw-escaped fragment |
| * @throws URIException escaped fragment not valid |
| */ |
| public void setRawFragment(char[] escapedFragment) throws URIException { |
| if (escapedFragment == null || escapedFragment.length == 0) { |
| _fragment = escapedFragment; |
| hash = 0; |
| return; |
| } |
| if (!validate(escapedFragment, fragment)) { |
| throw new URIException(URIException.ESCAPING, |
| "escaped fragment not valid"); |
| } |
| _fragment = escapedFragment; |
| hash = 0; |
| } |
| |
| /** |
| * Set the escaped fragment string. |
| * |
| * @param escapedFragment the escaped fragment string |
| * @throws URIException escaped fragment not valid |
| */ |
| public void setEscapedFragment(String escapedFragment) throws URIException { |
| if (escapedFragment == null) { |
| _fragment = null; |
| hash = 0; |
| return; |
| } |
| setRawFragment(escapedFragment.toCharArray()); |
| } |
| |
| /** |
| * Set the fragment. |
| * |
| * @param fragment the fragment string. |
| * @throws URIException If an error occurs. |
| */ |
| public void setFragment(String fragment) throws URIException { |
| if (fragment == null || fragment.length() == 0) { |
| _fragment = (fragment == null) ? null : fragment.toCharArray(); |
| hash = 0; |
| return; |
| } |
| _fragment = encode(fragment, allowed_fragment, getProtocolCharset()); |
| hash = 0; |
| } |
| |
| /** |
| * Get the raw-escaped fragment. |
| * |
| * The optional fragment identifier is not part of a URI, but is often used |
| * in conjunction with a URI. |
| * |
| * The format and interpretation of fragment identifiers is dependent on the |
| * media type [RFC2046] of the retrieval result. |
| * |
| * A fragment identifier is only meaningful when a URI reference is intended |
| * for retrieval and the result of that retrieval is a document for which |
| * the identified fragment is consistently defined. |
| * |
| * @return the raw-escaped fragment |
| */ |
| public char[] getRawFragment() { |
| return _fragment; |
| } |
| |
| /** |
| * Get the escaped fragment. |
| * |
| * @return the escaped fragment string |
| */ |
| public String getEscapedFragment() { |
| return (_fragment == null) ? null : new String(_fragment); |
| } |
| |
| /** |
| * Get the fragment. |
| * |
| * @return the fragment string |
| * @throws URIException incomplete trailing escape pattern or unsupported |
| * character encoding |
| * @see #decode |
| */ |
| public String getFragment() throws URIException { |
| return (_fragment == null) ? null : decode(_fragment, |
| getProtocolCharset()); |
| } |
| |
| // ------------------------------------------------------------- Utilities |
| |
| /** |
| * Remove the fragment identifier of the given component. |
| * |
| * @param component the component that a fragment may be included |
| * @return the component that the fragment identifier is removed |
| */ |
| protected char[] removeFragmentIdentifier(char[] component) { |
| if (component == null) { |
| return null; |
| } |
| int lastIndex = new String(component).indexOf('#'); |
| if (lastIndex != -1) { |
| component = new String(component).substring(0, lastIndex).toCharArray(); |
| } |
| return component; |
| } |
| |
| /** |
| * Normalize the given hier path part. |
| * |
| * Algorithm taken from URI reference parser at |
| * http://www.apache.org/~fielding/uri/rev-2002/issues.html. |
| * |
| * @param path the path to normalize |
| * @return the normalized path |
| * @throws URIException no more higher path level to be normalized |
| */ |
| protected char[] normalize(char[] path) throws URIException { |
| |
| if (path == null) { |
| return null; |
| } |
| |
| String normalized = new String(path); |
| |
| // If the buffer begins with "./" or "../", the "." or ".." is removed. |
| if (normalized.startsWith("./")) { |
| normalized = normalized.substring(1); |
| } else if (normalized.startsWith("../")) { |
| normalized = normalized.substring(2); |
| } else if (normalized.startsWith("..")) { |
| normalized = normalized.substring(2); |
| } |
| |
| // All occurrences of "/./" in the buffer are replaced with "/" |
| int index = -1; |
| while ((index = normalized.indexOf("/./")) != -1) { |
| normalized = normalized.substring(0, index) |
| + normalized.substring(index + 2); |
| } |
| |
| // If the buffer ends with "/.", the "." is removed. |
| if (normalized.endsWith("/.")) { |
| normalized = normalized.substring(0, normalized.length() - 1); |
| } |
| |
| int startIndex = 0; |
| |
| // All occurrences of "/<segment>/../" in the buffer, where ".." |
| // and <segment> are complete path segments, are iteratively replaced |
| // with "/" in order from left to right until no matching pattern |
| // remains. |
| // If the buffer ends with "/<segment>/..", that is also replaced |
| // with "/". Note that <segment> may be empty. |
| while ((index = normalized.indexOf("/../", startIndex)) != -1) { |
| int slashIndex = normalized.lastIndexOf('/', index - 1); |
| if (slashIndex >= 0) { |
| normalized = normalized.substring(0, slashIndex) |
| + normalized.substring(index + 3); |
| } else { |
| startIndex = index + 3; |
| } |
| } |
| if (normalized.endsWith("/..")) { |
| int slashIndex = normalized.lastIndexOf('/', |
| normalized.length() - 4); |
| if (slashIndex >= 0) { |
| normalized = normalized.substring(0, slashIndex + 1); |
| } |
| } |
| |
| // All prefixes of "<segment>/../" in the buffer, where ".." |
| // and <segment> are complete path segments, are iteratively replaced |
| // with "/" in order from left to right until no matching pattern |
| // remains. |
| // If the buffer ends with "<segment>/..", that is also replaced |
| // with "/". Note that <segment> may be empty. |
| while ((index = normalized.indexOf("/../")) != -1) { |
| int slashIndex = normalized.lastIndexOf('/', index - 1); |
| if (slashIndex >= 0) { |
| break; |
| } |
| normalized = normalized.substring(index + 3); |
| } |
| if (normalized.endsWith("/..")) { |
| int slashIndex = normalized.lastIndexOf('/', |
| normalized.length() - 4); |
| if (slashIndex < 0) { |
| normalized = "/"; |
| } |
| } |
| |
| return normalized.toCharArray(); |
| } |
| |
| /** |
| * Normalizes the path part of this URI. Normalization is only meant to be |
| * performed on URIs with an absolute path. Calling this method on a |
| * relative path URI will have no effect. |
| * |
| * @throws URIException no more higher path level to be normalized |
| * @see #isAbsPath() |
| */ |
| public void normalize() throws URIException { |
| if (isAbsPath()) { |
| _path = normalize(_path); |
| setURI(); |
| } |
| } |
| |
| /** |
| * Test if the first array is equal to the second array. |
| * |
| * @param first the first character array |
| * @param second the second character array |
| * @return true if they're equal |
| */ |
| protected boolean equals(char[] first, char[] second) { |
| |
| if (first == null && second == null) { |
| return true; |
| } |
| if (first == null || second == null) { |
| return false; |
| } |
| if (first.length != second.length) { |
| return false; |
| } |
| for (int i = 0; i < first.length; i++) { |
| if (first[i] != second[i]) { |
| return false; |
| } |
| } |
| return true; |
| } |
| |
| /** |
| * Test an object if this URI is equal to another. |
| * |
| * @param obj an object to compare |
| * @return true if two URI objects are equal |
| */ |
| public boolean equals(Object obj) { |
| |
| // normalize and test each components |
| if (obj == this) { |
| return true; |
| } |
| if (!(obj instanceof URI)) { |
| return false; |
| } |
| URI another = (URI) obj; |
| // scheme |
| if (!equals(_scheme, another._scheme)) { |
| return false; |
| } |
| // is_opaque_part or is_hier_part? and opaque |
| if (!equals(_opaque, another._opaque)) { |
| return false; |
| } |
| // is_hier_part |
| // has_authority |
| if (!equals(_authority, another._authority)) { |
| return false; |
| } |
| // path |
| if (!equals(_path, another._path)) { |
| return false; |
| } |
| // has_query |
| if (!equals(_query, another._query)) { |
| return false; |
| } |
| // has_fragment? should be careful of the only fragment case. |
| if (!equals(_fragment, another._fragment)) { |
| return false; |
| } |
| return true; |
| } |
| |
| // ---------------------------------------------------------- Serialization |
| |
| /** |
| * Write the content of this URI. |
| * |
| * @param oos the object-output stream |
| * @throws IOException If an IO problem occurs. |
| */ |
| private void writeObject(ObjectOutputStream oos) throws IOException { |
| |
| oos.defaultWriteObject(); |
| } |
| |
| /** |
| * Read a URI. |
| * |
| * @param ois the object-input stream |
| * @throws ClassNotFoundException If one of the classes specified in the |
| * input stream cannot be found. |
| * @throws IOException If an IO problem occurs. |
| */ |
| private void readObject(ObjectInputStream ois) |
| throws ClassNotFoundException, IOException { |
| |
| ois.defaultReadObject(); |
| } |
| |
| // -------------------------------------------------------------- Hash code |
| |
| /** |
| * Return a hash code for this URI. |
| * |
| * @return a has code value for this URI |
| */ |
| public int hashCode() { |
| if (hash == 0) { |
| char[] c = _uri; |
| if (c != null) { |
| for (int i = 0, len = c.length; i < len; i++) { |
| hash = 31 * hash + c[i]; |
| } |
| } |
| c = _fragment; |
| if (c != null) { |
| for (int i = 0, len = c.length; i < len; i++) { |
| hash = 31 * hash + c[i]; |
| } |
| } |
| } |
| return hash; |
| } |
| |
| // ------------------------------------------------------------- Comparison |
| |
| /** |
| * Compare this URI to another object. |
| * |
| * @param another the object to be compared. |
| * @return 0, if it's same, -1, if failed, first being compared with in the |
| * authority component |
| * @throws ClassCastException not URI argument |
| */ |
| public int compareTo(URI another) { |
| |
| if (!equals(_authority, another.getRawAuthority())) { |
| return -1; |
| } |
| return toString().compareTo(another.toString()); |
| } |
| |
| // ------------------------------------------------------------------ Clone |
| |
| /** |
| * Create and return a copy of this object, the URI-reference containing the |
| * userinfo component. Notice that the whole URI-reference including the |
| * userinfo component counld not be gotten as a <code>String</code>. |
| * |
| * To copy the identical <code>URI</code> object including the userinfo |
| * component, it should be used. |
| * |
| * @return a clone of this instance |
| */ |
| public synchronized Object clone() throws CloneNotSupportedException { |
| |
| URI instance = (URI) super.clone(); |
| |
| instance._uri = _uri; |
| instance._scheme = _scheme; |
| instance._opaque = _opaque; |
| instance._authority = _authority; |
| instance._userinfo = _userinfo; |
| instance._host = _host; |
| instance._port = _port; |
| instance._path = _path; |
| instance._query = _query; |
| instance._fragment = _fragment; |
| // the charset to do escape encoding for this instance |
| instance.protocolCharset = protocolCharset; |
| // flags |
| instance._is_hier_part = _is_hier_part; |
| instance._is_opaque_part = _is_opaque_part; |
| instance._is_net_path = _is_net_path; |
| instance._is_abs_path = _is_abs_path; |
| instance._is_rel_path = _is_rel_path; |
| instance._is_reg_name = _is_reg_name; |
| instance._is_server = _is_server; |
| instance._is_hostname = _is_hostname; |
| instance._is_IPv4address = _is_IPv4address; |
| instance._is_IPv6reference = _is_IPv6reference; |
| |
| return instance; |
| } |
| |
| // ------------------------------------------------------------ Get the URI |
| |
| /** |
| * It can be gotten the URI character sequence. It's raw-escaped. For the |
| * purpose of the protocol to be transported, it will be useful. |
| * |
| * It is clearly unwise to use a URL that contains a password which is |
| * intended to be secret. In particular, the use of a password within the |
| * 'userinfo' component of a URL is strongly disrecommended except in those |
| * rare cases where the 'password' parameter is intended to be public. |
| * |
| * When you want to get each part of the userinfo, you need to use the |
| * specific methods in the specific URL. It depends on the specific URL. |
| * |
| * @return the URI character sequence |
| */ |
| public char[] getRawURI() { |
| return _uri; |
| } |
| |
| /** |
| * It can be gotten the URI character sequence. It's escaped. For the |
| * purpose of the protocol to be transported, it will be useful. |
| * |
| * @return the escaped URI string |
| */ |
| public String getEscapedURI() { |
| return (_uri == null) ? null : new String(_uri); |
| } |
| |
| /** |
| * It can be gotten the URI character sequence. |
| * |
| * @return the original URI string |
| * @throws URIException incomplete trailing escape pattern or unsupported |
| * character encoding |
| * @see #decode |
| */ |
| public String getURI() throws URIException { |
| return (_uri == null) ? null : decode(_uri, getProtocolCharset()); |
| } |
| |
| /** |
| * Get the URI reference character sequence. |
| * |
| * @return the URI reference character sequence |
| */ |
| public char[] getRawURIReference() { |
| if (_fragment == null) { |
| return _uri; |
| } |
| if (_uri == null) { |
| return _fragment; |
| } |
| // if _uri != null && _fragment != null |
| String uriReference = new String(_uri) + "#" + new String(_fragment); |
| return uriReference.toCharArray(); |
| } |
| |
| /** |
| * Get the escaped URI reference string. |
| * |
| * @return the escaped URI reference string |
| */ |
| public String getEscapedURIReference() { |
| char[] uriReference = getRawURIReference(); |
| return (uriReference == null) ? null : new String(uriReference); |
| } |
| |
| /** |
| * Get the original URI reference string. |
| * |
| * @return the original URI reference string |
| * @throws URIException If {@link #decode} fails. |
| */ |
| public String getURIReference() throws URIException { |
| char[] uriReference = getRawURIReference(); |
| return (uriReference == null) ? null : decode(uriReference, |
| getProtocolCharset()); |
| } |
| |
| /** |
| * Get the escaped URI string. |
| * |
| * On the document, the URI-reference form is only used without the userinfo |
| * component like http://jakarta.apache.org/ by the security reason. But the |
| * URI-reference form with the userinfo component could be parsed. |
| * |
| * In other words, this URI and any its subclasses must not expose the |
| * URI-reference expression with the userinfo component like |
| * http://user:password@hostport/restricted_zone.<br> |
| * It means that the API client programmer should extract each user and |
| * password to access manually. Probably it will be supported in the each |
| * subclass, however, not a whole URI-reference expression. |
| * |
| * @return the escaped URI string |
| * @see #clone() |
| */ |
| public String toString() { |
| return getEscapedURI(); |
| } |
| |
| // ------------------------------------------------------------ Inner class |
| |
| /** |
| * The charset-changed normal operation to represent to be required to alert |
| * to user the fact the default charset is changed. |
| */ |
| @SuppressWarnings("serial") |
| public static class DefaultCharsetChanged extends SlingException { |
| |
| // ------------------------------------------------------- constructors |
| |
| /** |
| * The constructor with a reason string and its code arguments. |
| * |
| * @param reasonCode the reason code |
| * @param reason the reason |
| */ |
| public DefaultCharsetChanged(int reasonCode, String reason) { |
| super(reason); |
| this.reason = reason; |
| this.reasonCode = reasonCode; |
| } |
| |
| // ---------------------------------------------------------- constants |
| |
| /** No specified reason code. */ |
| public static final int UNKNOWN = 0; |
| |
| /** Protocol charset changed. */ |
| public static final int PROTOCOL_CHARSET = 1; |
| |
| /** Document charset changed. */ |
| public static final int DOCUMENT_CHARSET = 2; |
| |
| // ------------------------------------------------- instance variables |
| |
| /** The reason code. */ |
| private int reasonCode; |
| |
| /** The reason message. */ |
| private String reason; |
| |
| // ------------------------------------------------------------ methods |
| |
| /** |
| * Get the reason code. |
| * |
| * @return the reason code |
| */ |
| public int getReasonCode() { |
| return reasonCode; |
| } |
| |
| /** |
| * Get the reason message. |
| * |
| * @return the reason message |
| */ |
| public String getReason() { |
| return reason; |
| } |
| |
| } |
| |
| /** |
| * A mapping to determine the (somewhat arbitrarily) preferred charset for a |
| * given locale. Supports all locales recognized in JDK 1.1. |
| * |
| * The distribution of this class is Servlets.com. It was originally written |
| * by Jason Hunter [jhunter at acm.org] and used by with permission. |
| */ |
| public static class LocaleToCharsetMap { |
| |
| /** A mapping of language code to charset */ |
| private static final HashMap<String, String> LOCALE_TO_CHARSET_MAP; |
| static { |
| LOCALE_TO_CHARSET_MAP = new HashMap<String, String>(); |
| LOCALE_TO_CHARSET_MAP.put("ar", "ISO-8859-6"); |
| LOCALE_TO_CHARSET_MAP.put("be", "ISO-8859-5"); |
| LOCALE_TO_CHARSET_MAP.put("bg", "ISO-8859-5"); |
| LOCALE_TO_CHARSET_MAP.put("ca", "ISO-8859-1"); |
| LOCALE_TO_CHARSET_MAP.put("cs", "ISO-8859-2"); |
| LOCALE_TO_CHARSET_MAP.put("da", "ISO-8859-1"); |
| LOCALE_TO_CHARSET_MAP.put("de", "ISO-8859-1"); |
| LOCALE_TO_CHARSET_MAP.put("el", "ISO-8859-7"); |
| LOCALE_TO_CHARSET_MAP.put("en", "ISO-8859-1"); |
| LOCALE_TO_CHARSET_MAP.put("es", "ISO-8859-1"); |
| LOCALE_TO_CHARSET_MAP.put("et", "ISO-8859-1"); |
| LOCALE_TO_CHARSET_MAP.put("fi", "ISO-8859-1"); |
| LOCALE_TO_CHARSET_MAP.put("fr", "ISO-8859-1"); |
| LOCALE_TO_CHARSET_MAP.put("hr", "ISO-8859-2"); |
| LOCALE_TO_CHARSET_MAP.put("hu", "ISO-8859-2"); |
| LOCALE_TO_CHARSET_MAP.put("is", "ISO-8859-1"); |
| LOCALE_TO_CHARSET_MAP.put("it", "ISO-8859-1"); |
| LOCALE_TO_CHARSET_MAP.put("iw", "ISO-8859-8"); |
| LOCALE_TO_CHARSET_MAP.put("ja", "Shift_JIS"); |
| LOCALE_TO_CHARSET_MAP.put("ko", "EUC-KR"); |
| LOCALE_TO_CHARSET_MAP.put("lt", "ISO-8859-2"); |
| LOCALE_TO_CHARSET_MAP.put("lv", "ISO-8859-2"); |
| LOCALE_TO_CHARSET_MAP.put("mk", "ISO-8859-5"); |
| LOCALE_TO_CHARSET_MAP.put("nl", "ISO-8859-1"); |
| LOCALE_TO_CHARSET_MAP.put("no", "ISO-8859-1"); |
| LOCALE_TO_CHARSET_MAP.put("pl", "ISO-8859-2"); |
| LOCALE_TO_CHARSET_MAP.put("pt", "ISO-8859-1"); |
| LOCALE_TO_CHARSET_MAP.put("ro", "ISO-8859-2"); |
| LOCALE_TO_CHARSET_MAP.put("ru", "ISO-8859-5"); |
| LOCALE_TO_CHARSET_MAP.put("sh", "ISO-8859-5"); |
| LOCALE_TO_CHARSET_MAP.put("sk", "ISO-8859-2"); |
| LOCALE_TO_CHARSET_MAP.put("sl", "ISO-8859-2"); |
| LOCALE_TO_CHARSET_MAP.put("sq", "ISO-8859-2"); |
| LOCALE_TO_CHARSET_MAP.put("sr", "ISO-8859-5"); |
| LOCALE_TO_CHARSET_MAP.put("sv", "ISO-8859-1"); |
| LOCALE_TO_CHARSET_MAP.put("tr", "ISO-8859-9"); |
| LOCALE_TO_CHARSET_MAP.put("uk", "ISO-8859-5"); |
| LOCALE_TO_CHARSET_MAP.put("zh", "GB2312"); |
| LOCALE_TO_CHARSET_MAP.put("zh_TW", "Big5"); |
| } |
| |
| /** |
| * Get the preferred charset for the given locale. |
| * |
| * @param locale the locale |
| * @return the preferred charset or null if the locale is not |
| * recognized. |
| */ |
| public static String getCharset(Locale locale) { |
| // try for an full name match (may include country) |
| String charset = LOCALE_TO_CHARSET_MAP.get(locale.toString()); |
| if (charset != null) { |
| return charset; |
| } |
| |
| // if a full name didn't match, try just the language |
| charset = LOCALE_TO_CHARSET_MAP.get(locale.getLanguage()); |
| return charset; // may be null |
| } |
| |
| } |
| |
| // from EncodingUtils... |
| |
| /** |
| * Converts the specified string to a byte array. If the charset is not |
| * supported the default system charset is used. |
| * |
| * @param data the string to be encoded |
| * @param charset the desired character encoding |
| * @return The resulting byte array. |
| * @since 3.0 |
| */ |
| private static byte[] getBytes(final String data, String charset) { |
| |
| if (data == null) { |
| throw new IllegalArgumentException("data may not be null"); |
| } |
| |
| if (charset == null || charset.length() == 0) { |
| throw new IllegalArgumentException( |
| "charset may not be null or empty"); |
| } |
| |
| try { |
| return data.getBytes(charset); |
| } catch (UnsupportedEncodingException e) { |
| |
| // if (LOG.isWarnEnabled()) { |
| // LOG.warn("Unsupported encoding: " + charset + |
| // ". System encoding used."); |
| // } |
| |
| return data.getBytes(); |
| } |
| } |
| |
| /** |
| * Converts the byte array of ASCII characters to a string. This method is |
| * to be used when decoding content of HTTP elements (such as response |
| * headers) |
| * |
| * @param data the byte array to be encoded |
| * @param offset the index of the first byte to encode |
| * @param length the number of bytes to encode |
| * @return The string representation of the byte array |
| * @since 3.0 |
| */ |
| private static String getAsciiString(final byte[] data) { |
| |
| if (data == null) { |
| throw new IllegalArgumentException("Parameter may not be null"); |
| } |
| |
| try { |
| return new String(data, "US-ASCII"); |
| } catch (UnsupportedEncodingException e) { |
| throw new URIException("HttpClient requires ASCII support"); |
| } |
| } |
| |
| /** |
| * Converts the byte array of HTTP content characters to a string. If the |
| * specified charset is not supported, default system encoding is used. |
| * |
| * @param data the byte array to be encoded |
| * @param charset the desired character encoding |
| * @return The result of the conversion. |
| * @since 3.0 |
| */ |
| public static String getString(final byte[] data, String charset) { |
| |
| if (data == null) { |
| throw new IllegalArgumentException("Parameter may not be null"); |
| } |
| |
| if (charset == null || charset.length() == 0) { |
| throw new IllegalArgumentException( |
| "charset may not be null or empty"); |
| } |
| |
| try { |
| return new String(data, charset); |
| } catch (UnsupportedEncodingException e) { |
| |
| // if (LOG.isWarnEnabled()) { |
| // LOG.warn("Unsupported encoding: " + charset + |
| // ". System encoding used"); |
| // } |
| return new String(data); |
| } |
| } |
| |
| /** |
| * Converts the specified string to byte array of ASCII characters. |
| * |
| * @param data the string to be encoded |
| * @return The string as a byte array. |
| * @since 3.0 |
| */ |
| public static byte[] getAsciiBytes(final String data) { |
| |
| if (data == null) { |
| throw new IllegalArgumentException("Parameter may not be null"); |
| } |
| |
| try { |
| return data.getBytes("US-ASCII"); |
| } catch (UnsupportedEncodingException e) { |
| throw new URIException("HttpClient requires ASCII support"); |
| } |
| } |
| |
| /** |
| * Encodes an array of bytes into an array of URL safe 7-bit characters. |
| * Unsafe characters are escaped. |
| * |
| * @param urlsafe bitset of characters deemed URL safe |
| * @param bytes array of bytes to convert to URL safe characters |
| * @return array of bytes containing URL safe characters |
| */ |
| private static final byte[] encodeUrl(BitSet urlsafe, byte[] bytes) { |
| if (bytes == null) { |
| return null; |
| } |
| |
| ByteArrayOutputStream buffer = new ByteArrayOutputStream(); |
| for (int i = 0; i < bytes.length; i++) { |
| int b = bytes[i]; |
| if (b < 0) { |
| b = 256 + b; |
| } |
| if (urlsafe.get(b)) { |
| if (b == ' ') { |
| b = '+'; |
| } |
| buffer.write(b); |
| } else { |
| buffer.write('%'); |
| char hex1 = Character.toUpperCase(Character.forDigit( |
| (b >> 4) & 0xF, 16)); |
| char hex2 = Character.toUpperCase(Character.forDigit(b & 0xF, |
| 16)); |
| buffer.write(hex1); |
| buffer.write(hex2); |
| } |
| } |
| return buffer.toByteArray(); |
| } |
| |
| /** |
| * Decodes an array of URL safe 7-bit characters into an array of original |
| * bytes. Escaped characters are converted back to their original |
| * representation. |
| * |
| * @param bytes array of URL safe characters |
| * @return array of original bytes |
| * @throws URIException Thrown if URL decoding is unsuccessful |
| */ |
| private static final byte[] decodeUrl(byte[] bytes) { |
| if (bytes == null) { |
| return null; |
| } |
| ByteArrayOutputStream buffer = new ByteArrayOutputStream(); |
| for (int i = 0; i < bytes.length; i++) { |
| int b = bytes[i]; |
| if (b == '+') { |
| buffer.write(' '); |
| } else if (b == '%') { |
| try { |
| int u = Character.digit((char) bytes[++i], 16); |
| int l = Character.digit((char) bytes[++i], 16); |
| if (u == -1 || l == -1) { |
| throw new URIException("Invalid URL encoding"); |
| } |
| buffer.write((char) ((u << 4) + l)); |
| } catch (ArrayIndexOutOfBoundsException e) { |
| throw new URIException("Invalid URL encoding"); |
| } |
| } else { |
| buffer.write(b); |
| } |
| } |
| return buffer.toByteArray(); |
| } |
| } |