src/main/java/org/apache/sling/resourceresolver/impl/helper/URI.java - sling-org-apache-sling-resourceresolver - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
  * regarding copyright ownership.  The ASF licenses this file
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
  *
  *   http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing,
  * software distributed under the License is distributed on an
  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
  * KIND, either express or implied.  See the License for the
  * specific language governing permissions and limitations
  * under the License.
  */
 package org.apache.sling.resourceresolver.impl.helper;

 import java.io.ByteArrayOutputStream;
 import java.io.IOException;
 import java.io.ObjectInputStream;
 import java.io.ObjectOutputStream;
 import java.io.Serializable;
 import java.io.UnsupportedEncodingException;
 import java.util.Arrays;
 import java.util.BitSet;
 import java.util.HashMap;
 import java.util.Locale;

 import org.apache.sling.api.SlingException;

 /**
  * The interface for the URI(Uniform Resource Identifiers) version of RFC 2396.
  * This class has the purpose of supportting of parsing a URI reference to
  * extend any specific protocols, the character encoding of the protocol to be
  * transported and the charset of the document.
  *
  * A URI is always in an "escaped" form, since escaping or unescaping a
  * completed URI might change its semantics.
  *
  * Implementers should be careful not to escape or unescape the same string more
  * than once, since unescaping an already unescaped string might lead to
  * misinterpreting a percent data character as another escaped character, or
  * vice versa in the case of escaping an already escaped string.
  *
  * In order to avoid these problems, data types used as follows:
  *
  * <blockquote>
  *
  * <pre>
  *   URI character sequence: char
  *   octet sequence: byte
  *   original character sequence: String
  * </pre>
  *
  * </blockquote>
  *
  * So, a URI is a sequence of characters as an array of a char type, which is
  * not always represented as a sequence of octets as an array of byte.
  *
  * URI Syntactic Components
  *
  * <blockquote>
  *
  * <pre>
  * - In general, written as follows:
  *   Absolute URI = &lt;scheme&gt:&lt;scheme-specific-part&gt;
  *   Generic URI = &lt;scheme&gt;://&lt;authority&gt;&lt;path&gt;?&lt;query&gt;
  * - Syntax
  *   absoluteURI   = scheme ":" ( hier_part | opaque_part )
  *   hier_part     = ( net_path | abs_path ) [ "?" query ]
  *   net_path      = "//" authority [ abs_path ]
  *   abs_path      = "/"  path_segments
  * </pre>
  *
  * </blockquote>
  *
  * The following examples illustrate URI that are in common use.
  *
  * <pre>
  * ftp://ftp.is.co.za/rfc/rfc1808.txt
  *    -- ftp scheme for File Transfer Protocol services
  * gopher://spinaltap.micro.umn.edu/00/Weather/California/Los%20Angeles
  *    -- gopher scheme for Gopher and Gopher+ Protocol services
  * http://www.math.uio.no/faq/compression-faq/part1.html
  *    -- http scheme for Hypertext Transfer Protocol services
  * mailto:mduerst@ifi.unizh.ch
  *    -- mailto scheme for electronic mail addresses
  * news:comp.infosystems.www.servers.unix
  *    -- news scheme for USENET news groups and articles
  * telnet://melvyl.ucop.edu/
  *    -- telnet scheme for interactive services via the TELNET Protocol
  * </pre>
  *
  * Please, notice that there are many modifications from URL(RFC 1738) and
  * relative URL(RFC 1808).
  *
  * <b>The expressions for a URI</b>
  *
  *
  * <pre>
  * For escaped URI forms
  *  - URI(char[]) // constructor
  *  - char[] getRawXxx() // method
  *  - String getEscapedXxx() // method
  *  - String toString() // method
  *
  * For unescaped URI forms
  *  - URI(String) // constructor
  *  - String getXXX() // method
  * </pre>
  *
  * This class is a slightly modified version of the URI class distributed with
  * Http Client 3.1. The changes involve removing dependencies to other Http
  * Client classes and the Commons Codec library. To this avail the following
  * methods have been added to this class:
  * <ul>
  * <li>getBytes, getAsciiString, getString, getAsciiBytes has been copied from
  * the Http Client 3.1 EncodingUtils class.</li>
  * <li>encodeUrl and decodeUrl have been copied from the Commons Codec URLCodec
  * class.</li>
  * </ul>
  * The signatures have been simplified and adapted to the use in this class.
  * Also the exception thrown has been changed to be {@link URIException}.
  */
 public class URI implements Cloneable, Comparable<URI>, Serializable {

     // ----------------------------------------------------------- Constructors

     /** Create an instance as an internal use */
     protected URI() {
     }

     /**
      * Construct a URI from a string with the given charset. The input string
      * can be either in escaped or unescaped form.
      *
      * @param s URI character sequence
      * @param escaped <tt>true</tt> if URI character sequence is in escaped
      *            form. <tt>false</tt> otherwise.
      * @param charset the charset string to do escape encoding, if required
      * @throws URIException If the URI cannot be created.
      * @throws NullPointerException if input string is <code>null</code>
      * @see #getProtocolCharset
      * @since 3.0
      */
     public URI(String s, boolean escaped, String charset) throws URIException,
             NullPointerException {
         protocolCharset = charset;
         parseUriReference(s, escaped);
     }

     /**
      * Construct a URI from a string with the given charset. The input string
      * can be either in escaped or unescaped form.
      *
      * @param s URI character sequence
      * @param escaped <tt>true</tt> if URI character sequence is in escaped
      *            form. <tt>false</tt> otherwise.
      * @throws URIException If the URI cannot be created.
      * @throws NullPointerException if input string is <code>null</code>
      * @see #getProtocolCharset
      * @since 3.0
      */
     public URI(String s, boolean escaped) throws URIException,
             NullPointerException {
         parseUriReference(s, escaped);
     }

     /**
      * Construct a general URI from the given components.
      *
      * <blockquote>
      *
      * <pre>
      *   URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
      *   absoluteURI   = scheme ":" ( hier_part | opaque_part )
      *   opaque_part   = uric_no_slash *uric
      * </pre>
      *
      * </blockquote>
      *
      * It's for absolute URI = &lt;scheme&gt;:&lt;scheme-specific-part&gt;#
      * &lt;fragment&gt;.
      *
      * @param scheme the scheme string
      * @param schemeSpecificPart scheme_specific_part
      * @param fragment the fragment string
      * @throws URIException If the URI cannot be created.
      * @see #getDefaultProtocolCharset
      */
     public URI(String scheme, String schemeSpecificPart, String fragment)
             throws URIException {

         // validate and contruct the URI character sequence
         if (scheme == null) {
             throw new URIException(URIException.PARSING, "scheme required");
         }
         char[] s = scheme.toLowerCase().toCharArray();
         if (validate(s, URI.scheme)) {
             _scheme = s; // is_absoluteURI
         } else {
             throw new URIException(URIException.PARSING, "incorrect scheme");
         }
         _opaque = encode(schemeSpecificPart, allowed_opaque_part,
             getProtocolCharset());
         // Set flag
         _is_opaque_part = true;
         _fragment = fragment == null ? null : fragment.toCharArray();
         setURI();
     }

     /**
      * Construct a general URI from the given components.
      *
      * <blockquote>
      *
      * <pre>
      *   URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
      *   absoluteURI   = scheme ":" ( hier_part | opaque_part )
      *   relativeURI   = ( net_path | abs_path | rel_path ) [ "?" query ]
      *   hier_part     = ( net_path | abs_path ) [ "?" query ]
      * </pre>
      *
      * </blockquote>
      *
      * It's for absolute URI = &lt;scheme&gt;:&lt;path&gt;?&lt;query&gt;#&lt;
      * fragment&gt; and relative URI = &lt;path&gt;?&lt;query&gt;#&lt;fragment
      * &gt;.
      *
      * @param scheme the scheme string
      * @param authority the authority string
      * @param path the path string
      * @param query the query string
      * @param fragment the fragment string
      * @throws URIException If the new URI cannot be created.
      * @see #getDefaultProtocolCharset
      */
     public URI(String scheme, String authority, String path, String query,
             String fragment) throws URIException {

         // validate and contruct the URI character sequence
         StringBuilder buff = new StringBuilder();
         if (scheme != null) {
             buff.append(scheme);
             buff.append(':');
         }
         if (authority != null) {
             buff.append("//");
             buff.append(authority);
         }
         if (path != null) { // accept empty path
             if ((scheme != null || authority != null) && !path.startsWith("/")) {
                 throw new URIException(URIException.PARSING,
                     "abs_path requested");
             }
             buff.append(path);
         }
         if (query != null) {
             buff.append('?');
             buff.append(query);
         }
         if (fragment != null) {
             buff.append('#');
             buff.append(fragment);
         }
         parseUriReference(buff.toString(), false);
     }

     /**
      * Construct a general URI from the given components.
      *
      * @param scheme the scheme string
      * @param userinfo the userinfo string
      * @param host the host string
      * @param port the port number
      * @throws URIException If the new URI cannot be created.
      * @see #getDefaultProtocolCharset
      */
     public URI(String scheme, String userinfo, String host, int port)
             throws URIException {

         this(scheme, userinfo, host, port, null, null, null);
     }

     /**
      * Construct a general URI from the given components.
      *
      * @param scheme the scheme string
      * @param userinfo the userinfo string
      * @param host the host string
      * @param port the port number
      * @param path the path string
      * @throws URIException If the new URI cannot be created.
      * @see #getDefaultProtocolCharset
      */
     public URI(String scheme, String userinfo, String host, int port,
             String path) throws URIException {

         this(scheme, userinfo, host, port, path, null, null);
     }

     /**
      * Construct a general URI from the given components.
      *
      * @param scheme the scheme string
      * @param userinfo the userinfo string
      * @param host the host string
      * @param port the port number
      * @param path the path string
      * @param query the query string
      * @throws URIException If the new URI cannot be created.
      * @see #getDefaultProtocolCharset
      */
     public URI(String scheme, String userinfo, String host, int port,
             String path, String query) throws URIException {

         this(scheme, userinfo, host, port, path, query, null);
     }

     /**
      * Construct a general URI from the given components.
      *
      * @param scheme the scheme string
      * @param userinfo the userinfo string
      * @param host the host string
      * @param port the port number
      * @param path the path string
      * @param query the query string
      * @param fragment the fragment string
      * @throws URIException If the new URI cannot be created.
      * @see #getDefaultProtocolCharset
      */
     public URI(String scheme, String userinfo, String host, int port,
             String path, String query, String fragment) throws URIException {

         this(scheme, (host == null) ? null : ((userinfo != null)
                 ? userinfo + '@'
                 : "")
             + host + ((port != -1) ? ":" + port : ""), path, query, fragment);
     }

     /**
      * Construct a general URI from the given components.
      *
      * @param scheme the scheme string
      * @param host the host string
      * @param path the path string
      * @param fragment the fragment string
      * @throws URIException If the new URI cannot be created.
      * @see #getDefaultProtocolCharset
      */
     public URI(String scheme, String host, String path, String fragment)
             throws URIException {

         this(scheme, host, path, null, fragment);
     }

     /**
      * Construct a general URI with the given relative URI string.
      *
      * @param base the base URI
      * @param relative the relative URI string
      * @param escaped <tt>true</tt> if URI character sequence is in escaped
      *            form. <tt>false</tt> otherwise.
      * @throws URIException If the new URI cannot be created.
      * @since 3.0
      */
     public URI(URI base, String relative, boolean escaped) throws URIException {
         this(base, new URI(relative, escaped));
     }

     /**
      * Construct a general URI with the given relative URI.
      *
      * <blockquote>
      *
      * <pre>
      *   URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
      *   relativeURI   = ( net_path | abs_path | rel_path ) [ "?" query ]
      * </pre>
      *
      * </blockquote>
      *
      * Resolving Relative References to Absolute Form. <strong>Examples of
      * Resolving Relative URI References</strong> Within an object with a
      * well-defined base URI of
      *
      * <blockquote>
      *
      * <pre>
      *   http://a/b/c/d;p?q
      * </pre>
      *
      * </blockquote>
      *
      * the relative URI would be resolved as follows: Normal Examples
      *
      * <blockquote>
      *
      * <pre>
      *   g:h           =  g:h
      *   g             =  http://a/b/c/g
      *   ./g           =  http://a/b/c/g
      *   g/            =  http://a/b/c/g/
      *   /g            =  http://a/g
      *   //g           =  http://g
      *   ?y            =  http://a/b/c/?y
      *   g?y           =  http://a/b/c/g?y
      *   #s            =  (current document)#s
      *   g#s           =  http://a/b/c/g#s
      *   g?y#s         =  http://a/b/c/g?y#s
      *   ;x            =  http://a/b/c/;x
      *   g;x           =  http://a/b/c/g;x
      *   g;x?y#s       =  http://a/b/c/g;x?y#s
      *   .             =  http://a/b/c/
      *   ./            =  http://a/b/c/
      *   ..            =  http://a/b/
      *   ../           =  http://a/b/
      *   ../g          =  http://a/b/g
      *   ../..         =  http://a/
      *   ../../        =  http://a/
      *   ../../g       =  http://a/g
      * </pre>
      *
      * </blockquote>
      *
      * Some URI schemes do not allow a hierarchical syntax matching the
      * <hier_part> syntax, and thus cannot use relative references.
      *
      * @param base the base URI
      * @param relative the relative URI
      * @throws URIException If the new URI cannot be created.
      */
     public URI(URI base, URI relative) throws URIException {

         if (base._scheme == null) {
             throw new URIException(URIException.PARSING, "base URI required");
         }
         if (base._scheme != null) {
             this._scheme = base._scheme;
             this._authority = base._authority;
             this._is_net_path = base._is_net_path;
         }
         if (base._is_opaque_part || relative._is_opaque_part) {
             this._scheme = base._scheme;
             this._is_opaque_part = base._is_opaque_part
                 || relative._is_opaque_part;
             this._opaque = relative._opaque;
             this._fragment = relative._fragment;
             this.setURI();
             return;
         }
         boolean schemesEqual = Arrays.equals(base._scheme, relative._scheme);
         if (relative._scheme != null
             && (!schemesEqual || relative._authority != null)) {
             this._scheme = relative._scheme;
             this._is_net_path = relative._is_net_path;
             this._authority = relative._authority;
             if (relative._is_server) {
                 this._is_server = relative._is_server;
                 this._userinfo = relative._userinfo;
                 this._host = relative._host;
                 this._port = relative._port;
             } else if (relative._is_reg_name) {
                 this._is_reg_name = relative._is_reg_name;
             }
             this._is_abs_path = relative._is_abs_path;
             this._is_rel_path = relative._is_rel_path;
             this._path = relative._path;
         } else if (base._authority != null && relative._scheme == null) {
             this._is_net_path = base._is_net_path;
             this._authority = base._authority;
             if (base._is_server) {
                 this._is_server = base._is_server;
                 this._userinfo = base._userinfo;
                 this._host = base._host;
                 this._port = base._port;
             } else if (base._is_reg_name) {
                 this._is_reg_name = base._is_reg_name;
             }
         }
         if (relative._authority != null) {
             this._is_net_path = relative._is_net_path;
             this._authority = relative._authority;
             if (relative._is_server) {
                 this._is_server = relative._is_server;
                 this._userinfo = relative._userinfo;
                 this._host = relative._host;
                 this._port = relative._port;
             } else if (relative._is_reg_name) {
                 this._is_reg_name = relative._is_reg_name;
             }
             this._is_abs_path = relative._is_abs_path;
             this._is_rel_path = relative._is_rel_path;
             this._path = relative._path;
         }
         // resolve the path and query if necessary
         if (relative._authority == null
             && (relative._scheme == null || schemesEqual)) {
             if ((relative._path == null || relative._path.length == 0)
                 && relative._query == null) {
                 // handle a reference to the current document, see RFC 2396
                 // section 5.2 step 2
                 this._path = base._path;
                 this._query = base._query;
             } else {
                 this._path = resolvePath(base._path, relative._path);
             }
         }
         // base._query removed
         if (relative._query != null) {
             this._query = relative._query;
         }
         // base._fragment removed
         if (relative._fragment != null) {
             this._fragment = relative._fragment;
         }
         this.setURI();
         // reparse the newly built URI, this will ensure that all flags are set
         // correctly.
         // TODO there must be a better way to do this
         parseUriReference(new String(_uri), true);
     }

     // --------------------------------------------------- Instance Variables

     /** Version ID for serialization */
     static final long serialVersionUID = 604752400577948726L;

     /**
      * Cache the hash code for this URI.
      */
     protected int hash = 0;

     /**
      * This Uniform Resource Identifier (URI). The URI is always in an "escaped"
      * form, since escaping or unescaping a completed URI might change its
      * semantics.
      */
     protected char[] _uri = null;

     /**
      * The charset of the protocol used by this URI instance.
      */
     protected String protocolCharset = null;

     /**
      * The default charset of the protocol. RFC 2277, 2396
      */
     protected static String defaultProtocolCharset = "UTF-8";

     /**
      * The default charset of the document. RFC 2277, 2396 The platform's
      * charset is used for the document by default.
      */
     protected static String defaultDocumentCharset = null;

     protected static String defaultDocumentCharsetByLocale = null;

     protected static String defaultDocumentCharsetByPlatform = null;
     // Static initializer for defaultDocumentCharset
     static {
         Locale locale = Locale.getDefault();
         // in order to support backward compatiblity
         if (locale != null) {
             defaultDocumentCharsetByLocale = LocaleToCharsetMap.getCharset(locale);
             // set the default document charset
             defaultDocumentCharset = defaultDocumentCharsetByLocale;
         }
         // in order to support platform encoding
         try {
             defaultDocumentCharsetByPlatform = System.getProperty("file.encoding");
         } catch (SecurityException ignore) {
         }
         if (defaultDocumentCharset == null) {
             // set the default document charset
             defaultDocumentCharset = defaultDocumentCharsetByPlatform;
         }
     }

     /**
      * The scheme.
      */
     protected char[] _scheme = null;

     /**
      * The opaque.
      */
     protected char[] _opaque = null;

     /**
      * The authority.
      */
     protected char[] _authority = null;

     /**
      * The userinfo.
      */
     protected char[] _userinfo = null;

     /**
      * The host.
      */
     protected char[] _host = null;

     /**
      * The port.
      */
     protected int _port = -1;

     /**
      * The path.
      */
     protected char[] _path = null;

     /**
      * The query.
      */
     protected char[] _query = null;

     /**
      * The fragment.
      */
     protected char[] _fragment = null;

     /**
      * The root path.
      */
     protected static final char[] rootPath = { '/' };

     // ---------------------- Generous characters for each component validation

     /**
      * The percent "%" character always has the reserved purpose of being the
      * escape indicator, it must be escaped as "%25" in order to be used as data
      * within a URI.
      */
     protected static final BitSet percent = new BitSet(256);
     // Static initializer for percent
     static {
         percent.set('%');
     }

     /**
      * BitSet for digit.
      *
      * <blockquote>
      *
      * <pre>
      * digit = &quot;0&quot; | &quot;1&quot; | &quot;2&quot; | &quot;3&quot; | &quot;4&quot; | &quot;5&quot; | &quot;6&quot; | &quot;7&quot; | &quot;8&quot; | &quot;9&quot;
      * </pre>
      *
      * </blockquote>
      *
      */
     protected static final BitSet digit = new BitSet(256);
     // Static initializer for digit
     static {
         for (int i = '0'; i <= '9'; i++) {
             digit.set(i);
         }
     }

     /**
      * BitSet for alpha.
      *
      * <blockquote>
      *
      * <pre>
      * alpha = lowalpha | upalpha
      * </pre>
      *
      * </blockquote>
      *
      */
     protected static final BitSet alpha = new BitSet(256);
     // Static initializer for alpha
     static {
         for (int i = 'a'; i <= 'z'; i++) {
             alpha.set(i);
         }
         for (int i = 'A'; i <= 'Z'; i++) {
             alpha.set(i);
         }
     }

     /**
      * BitSet for alphanum (join of alpha &amp; digit).
      *
      * <blockquote>
      *
      * <pre>
      * alphanum = alpha | digit
      * </pre>
      *
      * </blockquote>
      *
      */
     protected static final BitSet alphanum = new BitSet(256);
     // Static initializer for alphanum
     static {
         alphanum.or(alpha);
         alphanum.or(digit);
     }

     /**
      * BitSet for hex.
      *
      * <blockquote>
      *
      * <pre>
      * hex = digit | &quot;A&quot; | &quot;B&quot; | &quot;C&quot; | &quot;D&quot; | &quot;E&quot; | &quot;F&quot; | &quot;a&quot; | &quot;b&quot; | &quot;c&quot; | &quot;d&quot; | &quot;e&quot;
      *     | &quot;f&quot;
      * </pre>
      *
      * </blockquote>
      *
      */
     protected static final BitSet hex = new BitSet(256);
     // Static initializer for hex
     static {
         hex.or(digit);
         for (int i = 'a'; i <= 'f'; i++) {
             hex.set(i);
         }
         for (int i = 'A'; i <= 'F'; i++) {
             hex.set(i);
         }
     }

     /**
      * BitSet for escaped.
      *
      * <blockquote>
      *
      * <pre>
      * escaped       = "%" hex hex
      * </pre>
      *
      * </blockquote>
      *
      */
     protected static final BitSet escaped = new BitSet(256);
     // Static initializer for escaped
     static {
         escaped.or(percent);
         escaped.or(hex);
     }

     /**
      * BitSet for mark.
      *
      * <blockquote>
      *
      * <pre>
      * mark = &quot;-&quot; | &quot;_&quot; | &quot;.&quot; | &quot;!&quot; | &quot;&tilde;&quot; | &quot;*&quot; | &quot;'&quot; | &quot;(&quot; | &quot;)&quot;
      * </pre>
      *
      * </blockquote>
      *
      */
     protected static final BitSet mark = new BitSet(256);
     // Static initializer for mark
     static {
         mark.set('-');
         mark.set('_');
         mark.set('.');
         mark.set('!');
         mark.set('~');
         mark.set('*');
         mark.set('\'');
         mark.set('(');
         mark.set(')');
     }

     /**
      * Data characters that are allowed in a URI but do not have a reserved
      * purpose are called unreserved.
      *
      * <blockquote>
      *
      * <pre>
      * unreserved = alphanum | mark
      * </pre>
      *
      * </blockquote>
      *
      */
     protected static final BitSet unreserved = new BitSet(256);
     // Static initializer for unreserved
     static {
         unreserved.or(alphanum);
         unreserved.or(mark);
     }

     /**
      * BitSet for reserved.
      *
      * <blockquote>
      *
      * <pre>
      * reserved = &quot;;&quot; | &quot;/&quot; | &quot;?&quot; | &quot;:&quot; | &quot;@&quot; | &quot;&amp;&quot; | &quot;=&quot; | &quot;+&quot; | &quot;$&quot; | &quot;,&quot;
      * </pre>
      *
      * </blockquote>
      *
      */
     protected static final BitSet reserved = new BitSet(256);
     // Static initializer for reserved
     static {
         reserved.set(';');
         reserved.set('/');
         reserved.set('?');
         reserved.set(':');
         reserved.set('@');
         reserved.set('&');
         reserved.set('=');
         reserved.set('+');
         reserved.set('$');
         reserved.set(',');
     }

     /**
      * BitSet for uric.
      *
      * <blockquote>
      *
      * <pre>
      * uric = reserved | unreserved | escaped
      * </pre>
      *
      * </blockquote>
      *
      */
     protected static final BitSet uric = new BitSet(256);
     // Static initializer for uric
     static {
         uric.or(reserved);
         uric.or(unreserved);
         uric.or(escaped);
     }

     /**
      * BitSet for fragment (alias for uric).
      *
      * <blockquote>
      *
      * <pre>
      * fragment      = *uric
      * </pre>
      *
      * </blockquote>
      *
      */
     protected static final BitSet fragment = uric;

     /**
      * BitSet for query (alias for uric).
      *
      * <blockquote>
      *
      * <pre>
      * query         = *uric
      * </pre>
      *
      * </blockquote>
      *
      */
     protected static final BitSet query = uric;

     /**
      * BitSet for pchar.
      *
      * <blockquote>
      *
      * <pre>
      * pchar = unreserved | escaped | &quot;:&quot; | &quot;@&quot; | &quot;&amp;&quot; | &quot;=&quot; | &quot;+&quot; | &quot;$&quot; | &quot;,&quot;
      * </pre>
      *
      * </blockquote>
      *
      */
     protected static final BitSet pchar = new BitSet(256);
     // Static initializer for pchar
     static {
         pchar.or(unreserved);
         pchar.or(escaped);
         pchar.set(':');
         pchar.set('@');
         pchar.set('&');
         pchar.set('=');
         pchar.set('+');
         pchar.set('$');
         pchar.set(',');
     }

     /**
      * BitSet for param (alias for pchar).
      *
      * <blockquote>
      *
      * <pre>
      * param         = *pchar
      * </pre>
      *
      * </blockquote>
      *
      */
     protected static final BitSet param = pchar;

     /**
      * BitSet for segment.
      *
      * <blockquote>
      *
      * <pre>
      * segment       = *pchar *( ";" param )
      * </pre>
      *
      * </blockquote>
      *
      */
     protected static final BitSet segment = new BitSet(256);
     // Static initializer for segment
     static {
         segment.or(pchar);
         segment.set(';');
         segment.or(param);
     }

     /**
      * BitSet for path segments.
      *
      * <blockquote>
      *
      * <pre>
      * path_segments = segment *( "/" segment )
      * </pre>
      *
      * </blockquote>
      *
      */
     protected static final BitSet path_segments = new BitSet(256);
     // Static initializer for path_segments
     static {
         path_segments.set('/');
         path_segments.or(segment);
     }

     /**
      * URI absolute path.
      *
      * <blockquote>
      *
      * <pre>
      * abs_path      = "/"  path_segments
      * </pre>
      *
      * </blockquote>
      *
      */
     protected static final BitSet abs_path = new BitSet(256);
     // Static initializer for abs_path
     static {
         abs_path.set('/');
         abs_path.or(path_segments);
     }

     /**
      * URI bitset for encoding typical non-slash characters.
      *
      * <blockquote>
      *
      * <pre>
      * uric_no_slash = unreserved | escaped | &quot;;&quot; | &quot;?&quot; | &quot;:&quot; | &quot;@&quot; | &quot;&amp;&quot; | &quot;=&quot; | &quot;+&quot;
      *     | &quot;$&quot; | &quot;,&quot;
      * </pre>
      *
      * </blockquote>
      *
      */
     protected static final BitSet uric_no_slash = new BitSet(256);
     // Static initializer for uric_no_slash
     static {
         uric_no_slash.or(unreserved);
         uric_no_slash.or(escaped);
         uric_no_slash.set(';');
         uric_no_slash.set('?');
         uric_no_slash.set(';');
         uric_no_slash.set('@');
         uric_no_slash.set('&');
         uric_no_slash.set('=');
         uric_no_slash.set('+');
         uric_no_slash.set('$');
         uric_no_slash.set(',');
     }

     /**
      * URI bitset that combines uric_no_slash and uric.
      *
      * <blockquote>
      *
      * <pre>
      * opaque_part = uric_no_slash * uric
      * </pre>
      *
      * </blockquote>
      *
      */
     protected static final BitSet opaque_part = new BitSet(256);
     // Static initializer for opaque_part
     static {
         // it's generous. because first character must not include a slash
         opaque_part.or(uric_no_slash);
         opaque_part.or(uric);
     }

     /**
      * URI bitset that combines absolute path and opaque part.
      *
      * <blockquote>
      *
      * <pre>
      * path          = [ abs_path | opaque_part ]
      * </pre>
      *
      * </blockquote>
      *
      */
     protected static final BitSet path = new BitSet(256);
     // Static initializer for path
     static {
         path.or(abs_path);
         path.or(opaque_part);
     }

     /**
      * Port, a logical alias for digit.
      */
     protected static final BitSet port = digit;

     /**
      * Bitset that combines digit and dot fo IPv$address.
      *
      * <blockquote>
      *
      * <pre>
      * IPv4address   = 1*digit "." 1*digit "." 1*digit "." 1*digit
      * </pre>
      *
      * </blockquote>
      *
      */
     protected static final BitSet IPv4address = new BitSet(256);
     // Static initializer for IPv4address
     static {
         IPv4address.or(digit);
         IPv4address.set('.');
     }

     /**
      * RFC 2373.
      *
      * <blockquote>
      *
      * <pre>
      * IPv6address = hexpart [ ":" IPv4address ]
      * </pre>
      *
      * </blockquote>
      *
      */
     protected static final BitSet IPv6address = new BitSet(256);
     // Static initializer for IPv6address reference
     static {
         IPv6address.or(hex); // hexpart
         IPv6address.set(':');
         IPv6address.or(IPv4address);
     }

     /**
      * RFC 2732, 2373.
      *
      * <blockquote>
      *
      * <pre>
      * IPv6reference   = "[" IPv6address "]"
      * </pre>
      *
      * </blockquote>
      *
      */
     protected static final BitSet IPv6reference = new BitSet(256);
     // Static initializer for IPv6reference
     static {
         IPv6reference.set('[');
         IPv6reference.or(IPv6address);
         IPv6reference.set(']');
     }

     /**
      * BitSet for toplabel.
      *
      * <blockquote>
      *
      * <pre>
      * toplabel      = alpha | alpha *( alphanum | "-" ) alphanum
      * </pre>
      *
      * </blockquote>
      *
      */
     protected static final BitSet toplabel = new BitSet(256);
     // Static initializer for toplabel
     static {
         toplabel.or(alphanum);
         toplabel.set('-');
     }

     /**
      * BitSet for domainlabel.
      *
      * <blockquote>
      *
      * <pre>
      * domainlabel   = alphanum | alphanum *( alphanum | "-" ) alphanum
      * </pre>
      *
      * </blockquote>
      *
      */
     protected static final BitSet domainlabel = toplabel;

     /**
      * BitSet for hostname.
      *
      * <blockquote>
      *
      * <pre>
      * hostname      = *( domainlabel "." ) toplabel [ "." ]
      * </pre>
      *
      * </blockquote>
      *
      */
     protected static final BitSet hostname = new BitSet(256);
     // Static initializer for hostname
     static {
         hostname.or(toplabel);
         // hostname.or(domainlabel);
         hostname.set('.');
     }

     /**
      * BitSet for host.
      *
      * <blockquote>
      *
      * <pre>
      * host = hostname | IPv4address | IPv6reference
      * </pre>
      *
      * </blockquote>
      *
      */
     protected static final BitSet host = new BitSet(256);
     // Static initializer for host
     static {
         host.or(hostname);
         // host.or(IPv4address);
         host.or(IPv6reference); // IPv4address
     }

     /**
      * BitSet for hostport.
      *
      * <blockquote>
      *
      * <pre>
      * hostport      = host [ ":" port ]
      * </pre>
      *
      * </blockquote>
      *
      */
     protected static final BitSet hostport = new BitSet(256);
     // Static initializer for hostport
     static {
         hostport.or(host);
         hostport.set(':');
         hostport.or(port);
     }

     /**
      * Bitset for userinfo.
      *
      * <blockquote>
      *
      * <pre>
      * userinfo      = *( unreserved | escaped |
      *                    ";" | ":" | "&amp;" | "=" | "+" | "$" | "," )
      * </pre>
      *
      * </blockquote>
      *
      */
     protected static final BitSet userinfo = new BitSet(256);
     // Static initializer for userinfo
     static {
         userinfo.or(unreserved);
         userinfo.or(escaped);
         userinfo.set(';');
         userinfo.set(':');
         userinfo.set('&');
         userinfo.set('=');
         userinfo.set('+');
         userinfo.set('$');
         userinfo.set(',');
     }

     /**
      * BitSet for within the userinfo component like user and password.
      */
     public static final BitSet within_userinfo = new BitSet(256);
     // Static initializer for within_userinfo
     static {
         within_userinfo.or(userinfo);
         within_userinfo.clear(';'); // reserved within authority
         within_userinfo.clear(':');
         within_userinfo.clear('@');
         within_userinfo.clear('?');
         within_userinfo.clear('/');
     }

     /**
      * Bitset for server.
      *
      * <blockquote>
      *
      * <pre>
      * server        = [ [ userinfo "@" ] hostport ]
      * </pre>
      *
      * </blockquote>
      *
      */
     protected static final BitSet server = new BitSet(256);
     // Static initializer for server
     static {
         server.or(userinfo);
         server.set('@');
         server.or(hostport);
     }

     /**
      * BitSet for reg_name.
      *
      * <blockquote>
      *
      * <pre>
      * reg_name = 1 * (unreserved | escaped | &quot;$&quot; | &quot;,&quot; | &quot;;&quot; | &quot;:&quot; | &quot;@&quot; | &quot;&amp;&quot; | &quot;=&quot; | &quot;+&quot;)
      * </pre>
      *
      * </blockquote>
      *
      */
     protected static final BitSet reg_name = new BitSet(256);
     // Static initializer for reg_name
     static {
         reg_name.or(unreserved);
         reg_name.or(escaped);
         reg_name.set('$');
         reg_name.set(',');
         reg_name.set(';');
         reg_name.set(':');
         reg_name.set('@');
         reg_name.set('&');
         reg_name.set('=');
         reg_name.set('+');
     }

     /**
      * BitSet for authority.
      *
      * <blockquote>
      *
      * <pre>
      * authority = server | reg_name
      * </pre>
      *
      * </blockquote>
      *
      */
     protected static final BitSet authority = new BitSet(256);
     // Static initializer for authority
     static {
         authority.or(server);
         authority.or(reg_name);
     }

     /**
      * BitSet for scheme.
      *
      * <blockquote>
      *
      * <pre>
      * scheme = alpha * (alpha | digit | &quot;+&quot; | &quot;-&quot; | &quot;.&quot;)
      * </pre>
      *
      * </blockquote>
      *
      */
     protected static final BitSet scheme = new BitSet(256);
     // Static initializer for scheme
     static {
         scheme.or(alpha);
         scheme.or(digit);
         scheme.set('+');
         scheme.set('-');
         scheme.set('.');
     }

     /**
      * BitSet for rel_segment.
      *
      * <blockquote>
      *
      * <pre>
      * rel_segment = 1 * (unreserved | escaped | &quot;;&quot; | &quot;@&quot; | &quot;&amp;&quot; | &quot;=&quot; | &quot;+&quot; | &quot;$&quot; | &quot;,&quot;)
      * </pre>
      *
      * </blockquote>
      *
      */
     protected static final BitSet rel_segment = new BitSet(256);
     // Static initializer for rel_segment
     static {
         rel_segment.or(unreserved);
         rel_segment.or(escaped);
         rel_segment.set(';');
         rel_segment.set('@');
         rel_segment.set('&');
         rel_segment.set('=');
         rel_segment.set('+');
         rel_segment.set('$');
         rel_segment.set(',');
     }

     /**
      * BitSet for rel_path.
      *
      * <blockquote>
      *
      * <pre>
      * rel_path = rel_segment[abs_path]
      * </pre>
      *
      * </blockquote>
      *
      */
     protected static final BitSet rel_path = new BitSet(256);
     // Static initializer for rel_path
     static {
         rel_path.or(rel_segment);
         rel_path.or(abs_path);
     }

     /**
      * BitSet for net_path.
      *
      * <blockquote>
      *
      * <pre>
      * net_path      = "//" authority [ abs_path ]
      * </pre>
      *
      * </blockquote>
      *
      */
     protected static final BitSet net_path = new BitSet(256);
     // Static initializer for net_path
     static {
         net_path.set('/');
         net_path.or(authority);
         net_path.or(abs_path);
     }

     /**
      * BitSet for hier_part.
      *
      * <blockquote>
      *
      * <pre>
      * hier_part     = ( net_path | abs_path ) [ "?" query ]
      * </pre>
      *
      * </blockquote>
      *
      */
     protected static final BitSet hier_part = new BitSet(256);
     // Static initializer for hier_part
     static {
         hier_part.or(net_path);
         hier_part.or(abs_path);
         // hier_part.set('?'); aleady included
         hier_part.or(query);
     }

     /**
      * BitSet for relativeURI.
      *
      * <blockquote>
      *
      * <pre>
      * relativeURI   = ( net_path | abs_path | rel_path ) [ "?" query ]
      * </pre>
      *
      * </blockquote>
      *
      */
     protected static final BitSet relativeURI = new BitSet(256);
     // Static initializer for relativeURI
     static {
         relativeURI.or(net_path);
         relativeURI.or(abs_path);
         relativeURI.or(rel_path);
         // relativeURI.set('?'); aleady included
         relativeURI.or(query);
     }

     /**
      * BitSet for absoluteURI.
      *
      * <blockquote>
      *
      * <pre>
      * absoluteURI   = scheme ":" ( hier_part | opaque_part )
      * </pre>
      *
      * </blockquote>
      *
      */
     protected static final BitSet absoluteURI = new BitSet(256);
     // Static initializer for absoluteURI
     static {
         absoluteURI.or(scheme);
         absoluteURI.set(':');
         absoluteURI.or(hier_part);
         absoluteURI.or(opaque_part);
     }

     /**
      * BitSet for URI-reference.
      *
      * <blockquote>
      *
      * <pre>
      * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
      * </pre>
      *
      * </blockquote>
      *
      */
     protected static final BitSet URI_reference = new BitSet(256);
     // Static initializer for URI_reference
     static {
         URI_reference.or(absoluteURI);
         URI_reference.or(relativeURI);
         URI_reference.set('#');
         URI_reference.or(fragment);
     }

     // ---------------------------- Characters disallowed within the URI syntax
     // Excluded US-ASCII Characters are like control, space, delims and unwise

     /**
      * BitSet for control.
      */
     public static final BitSet control = new BitSet(256);
     // Static initializer for control
     static {
         for (int i = 0; i <= 0x1F; i++) {
             control.set(i);
         }
         control.set(0x7F);
     }

     /**
      * BitSet for space.
      */
     public static final BitSet space = new BitSet(256);
     // Static initializer for space
     static {
         space.set(0x20);
     }

     /**
      * BitSet for delims.
      */
     public static final BitSet delims = new BitSet(256);
     // Static initializer for delims
     static {
         delims.set('<');
         delims.set('>');
         delims.set('#');
         delims.set('%');
         delims.set('"');
     }

     /**
      * BitSet for unwise.
      */
     public static final BitSet unwise = new BitSet(256);
     // Static initializer for unwise
     static {
         unwise.set('{');
         unwise.set('}');
         unwise.set('|');
         unwise.set('\\');
         unwise.set('^');
         unwise.set('[');
         unwise.set(']');
         unwise.set('`');
     }

     /**
      * Disallowed rel_path before escaping.
      */
     public static final BitSet disallowed_rel_path = new BitSet(256);
     // Static initializer for disallowed_rel_path
     static {
         disallowed_rel_path.or(uric);
         disallowed_rel_path.andNot(rel_path);
     }

     /**
      * Disallowed opaque_part before escaping.
      */
     public static final BitSet disallowed_opaque_part = new BitSet(256);
     // Static initializer for disallowed_opaque_part
     static {
         disallowed_opaque_part.or(uric);
         disallowed_opaque_part.andNot(opaque_part);
     }

     // ----------------------- Characters allowed within and for each component

     /**
      * Those characters that are allowed for the authority component.
      */
     public static final BitSet allowed_authority = new BitSet(256);
     // Static initializer for allowed_authority
     static {
         allowed_authority.or(authority);
         allowed_authority.clear('%');
     }

     /**
      * Those characters that are allowed for the opaque_part.
      */
     public static final BitSet allowed_opaque_part = new BitSet(256);
     // Static initializer for allowed_opaque_part
     static {
         allowed_opaque_part.or(opaque_part);
         allowed_opaque_part.clear('%');
     }

     /**
      * Those characters that are allowed for the reg_name.
      */
     public static final BitSet allowed_reg_name = new BitSet(256);
     // Static initializer for allowed_reg_name
     static {
         allowed_reg_name.or(reg_name);
         // allowed_reg_name.andNot(percent);
         allowed_reg_name.clear('%');
     }

     /**
      * Those characters that are allowed for the userinfo component.
      */
     public static final BitSet allowed_userinfo = new BitSet(256);
     // Static initializer for allowed_userinfo
     static {
         allowed_userinfo.or(userinfo);
         // allowed_userinfo.andNot(percent);
         allowed_userinfo.clear('%');
     }

     /**
      * Those characters that are allowed for within the userinfo component.
      */
     public static final BitSet allowed_within_userinfo = new BitSet(256);
     // Static initializer for allowed_within_userinfo
     static {
         allowed_within_userinfo.or(within_userinfo);
         allowed_within_userinfo.clear('%');
     }

     /**
      * Those characters that are allowed for the IPv6reference component. The
      * characters '[', ']' in IPv6reference should be excluded.
      */
     public static final BitSet allowed_IPv6reference = new BitSet(256);
     // Static initializer for allowed_IPv6reference
     static {
         allowed_IPv6reference.or(IPv6reference);
         // allowed_IPv6reference.andNot(unwise);
         allowed_IPv6reference.clear('[');
         allowed_IPv6reference.clear(']');
     }

     /**
      * Those characters that are allowed for the host component. The characters
      * '[', ']' in IPv6reference should be excluded.
      */
     public static final BitSet allowed_host = new BitSet(256);
     // Static initializer for allowed_host
     static {
         allowed_host.or(hostname);
         allowed_host.or(allowed_IPv6reference);
     }

     /**
      * Those characters that are allowed for the authority component.
      */
     public static final BitSet allowed_within_authority = new BitSet(256);
     // Static initializer for allowed_within_authority
     static {
         allowed_within_authority.or(server);
         allowed_within_authority.or(reg_name);
         allowed_within_authority.clear(';');
         allowed_within_authority.clear(':');
         allowed_within_authority.clear('@');
         allowed_within_authority.clear('?');
         allowed_within_authority.clear('/');
     }

     /**
      * Those characters that are allowed for the abs_path.
      */
     public static final BitSet allowed_abs_path = new BitSet(256);
     // Static initializer for allowed_abs_path
     static {
         allowed_abs_path.or(abs_path);
         // allowed_abs_path.set('/'); // aleady included
         allowed_abs_path.andNot(percent);
         allowed_abs_path.clear('+');
     }

     /**
      * Those characters that are allowed for the rel_path.
      */
     public static final BitSet allowed_rel_path = new BitSet(256);
     // Static initializer for allowed_rel_path
     static {
         allowed_rel_path.or(rel_path);
         allowed_rel_path.clear('%');
         allowed_rel_path.clear('+');
     }

     /**
      * Those characters that are allowed within the path.
      */
     public static final BitSet allowed_within_path = new BitSet(256);
     // Static initializer for allowed_within_path
     static {
         allowed_within_path.or(abs_path);
         allowed_within_path.clear('/');
         allowed_within_path.clear(';');
         allowed_within_path.clear('=');
         allowed_within_path.clear('?');
     }

     /**
      * Those characters that are allowed for the query component.
      */
     public static final BitSet allowed_query = new BitSet(256);
     // Static initializer for allowed_query
     static {
         allowed_query.or(uric);
         allowed_query.clear('%');
     }

     /**
      * Those characters that are allowed within the query component.
      */
     public static final BitSet allowed_within_query = new BitSet(256);
     // Static initializer for allowed_within_query
     static {
         allowed_within_query.or(allowed_query);
         allowed_within_query.andNot(reserved); // excluded 'reserved'
     }

     /**
      * Those characters that are allowed for the fragment component.
      */
     public static final BitSet allowed_fragment = new BitSet(256);
     // Static initializer for allowed_fragment
     static {
         allowed_fragment.or(uric);
         allowed_fragment.clear('%');
     }

     // ------------------------------------------- Flags for this URI-reference

     // TODO: Figure out what all these variables are for and provide javadoc

     // URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
     // absoluteURI = scheme ":" ( hier_part | opaque_part )
     protected boolean _is_hier_part;

     protected boolean _is_opaque_part;

     // relativeURI = ( net_path | abs_path | rel_path ) [ "?" query ]
     // hier_part = ( net_path | abs_path ) [ "?" query ]
     protected boolean _is_net_path;

     protected boolean _is_abs_path;

     protected boolean _is_rel_path;

     // net_path = "//" authority [ abs_path ]
     // authority = server | reg_name
     protected boolean _is_reg_name;

     protected boolean _is_server; // = _has_server

     // server = [ [ userinfo "@" ] hostport ]
     // host = hostname | IPv4address | IPv6reference
     protected boolean _is_hostname;

     protected boolean _is_IPv4address;

     protected boolean _is_IPv6reference;

     // ------------------------------------------ Character and escape encoding

     /**
      * Encodes URI string. This is a two mapping, one from original characters
      * to octets, and subsequently a second from octets to URI characters:
      *
      * <blockquote>
      *
      * <pre>
      *   original character sequence->octet sequence->URI character sequence
      * </pre>
      *
      * </blockquote>
      *
      * An escaped octet is encoded as a character triplet, consisting of the
      * percent character "%" followed by the two hexadecimal digits representing
      * the octet code. For example, "%20" is the escaped encoding for the
      * US-ASCII space character.
      *
      * Conversion from the local filesystem character set to UTF-8 will normally
      * involve a two step process. First convert the local character set to the
      * UCS; then convert the UCS to UTF-8. The first step in the process can be
      * performed by maintaining a mapping table that includes the local
      * character set code and the corresponding UCS code. The next step is to
      * convert the UCS character code to the UTF-8 encoding.
      *
      * Mapping between vendor codepages can be done in a very similar manner as
      * described above.
      *
      * The only time escape encodings can allowedly be made is when a URI is
      * being created from its component parts. The escape and validate methods
      * are internally performed within this method.
      *
      * @param original the original character sequence
      * @param allowed those characters that are allowed within a component
      * @param charset the protocol charset
      * @return URI character sequence
      * @throws URIException null component or unsupported character encoding
      */

     protected static char[] encode(String original, BitSet allowed,
             String charset) throws URIException {
         if (original == null) {
             throw new IllegalArgumentException(
                 "Original string may not be null");
         }
         if (allowed == null) {
             throw new IllegalArgumentException("Allowed bitset may not be null");
         }
         byte[] rawdata = encodeUrl(allowed, getBytes(original, charset));
         return getAsciiString(rawdata).toCharArray();
     }

     /**
      * Decodes URI encoded string. This is a two mapping, one from URI
      * characters to octets, and subsequently a second from octets to original
      * characters:
      *
      * <blockquote>
      *
      * <pre>
      *   URI character sequence->octet sequence->original character sequence
      * </pre>
      *
      * </blockquote>
      *
      * A URI must be separated into its components before the escaped characters
      * within those components can be allowedly decoded.
      *
      * Notice that there is a chance that URI characters that are non UTF-8 may
      * be parsed as valid UTF-8. A recent non-scientific analysis found that EUC
      * encoded Japanese words had a 2.7% false reading; SJIS had a 0.0005% false
      * reading; other encoding such as ASCII or KOI-8 have a 0% false reading.
      *
      * The percent "%" character always has the reserved purpose of being the
      * escape indicator, it must be escaped as "%25" in order to be used as data
      * within a URI.
      *
      * The unescape method is internally performed within this method.
      *
      * @param component the URI character sequence
      * @param charset the protocol charset
      * @return original character sequence
      * @throws URIException incomplete trailing escape pattern or unsupported
      *             character encoding
      */
     protected static String decode(char[] component, String charset)
             throws URIException {
         if (component == null) {
             throw new IllegalArgumentException(
                 "Component array of chars may not be null");
         }
         return decode(new String(component), charset);
     }

     /**
      * Decodes URI encoded string. This is a two mapping, one from URI
      * characters to octets, and subsequently a second from octets to original
      * characters:
      *
      * <blockquote>
      *
      * <pre>
      *   URI character sequence->octet sequence->original character sequence
      * </pre>
      *
      * </blockquote>
      *
      * A URI must be separated into its components before the escaped characters
      * within those components can be allowedly decoded.
      *
      * Notice that there is a chance that URI characters that are non UTF-8 may
      * be parsed as valid UTF-8. A recent non-scientific analysis found that EUC
      * encoded Japanese words had a 2.7% false reading; SJIS had a 0.0005% false
      * reading; other encoding such as ASCII or KOI-8 have a 0% false reading.
      *
      * The percent "%" character always has the reserved purpose of being the
      * escape indicator, it must be escaped as "%25" in order to be used as data
      * within a URI.
      *
      * The unescape method is internally performed within this method.
      *
      * @param component the URI character sequence
      * @param charset the protocol charset
      * @return original character sequence
      * @throws URIException incomplete trailing escape pattern or unsupported
      *             character encoding
      * @since 3.0
      */
     protected static String decode(String component, String charset)
             throws URIException {
         if (component == null) {
             throw new IllegalArgumentException(
                 "Component array of chars may not be null");
         }
         byte[] rawdata = decodeUrl(getAsciiBytes(component));
         return getString(rawdata, charset);
     }

     /**
      * Pre-validate the unescaped URI string within a specific component.
      *
      * @param component the component string within the component
      * @param disallowed those characters disallowed within the component
      * @return if true, it doesn't have the disallowed characters if false, the
      *         component is undefined or an incorrect one
      */
     protected boolean prevalidate(String component, BitSet disallowed) {
         // prevalidate the given component by disallowed characters
         if (component == null) {
             return false; // undefined
         }
         char[] target = component.toCharArray();
         for (int i = 0; i < target.length; i++) {
             if (disallowed.get(target[i])) {
                 return false;
             }
         }
         return true;
     }

     /**
      * Validate the URI characters within a specific component. The component
      * must be performed after escape encoding. Or it doesn't include escaped
      * characters.
      *
      * @param component the characters sequence within the component
      * @param generous those characters that are allowed within a component
      * @return if true, it's the correct URI character sequence
      */
     protected boolean validate(char[] component, BitSet generous) {
         // validate each component by generous characters
         return validate(component, 0, -1, generous);
     }

     /**
      * Validate the URI characters within a specific component. The component
      * must be performed after escape encoding. Or it doesn't include escaped
      * characters.
      *
      * It's not that much strict, generous. The strict validation might be
      * performed before being called this method.
      *
      * @param component the characters sequence within the component
      * @param soffset the starting offset of the given component
      * @param eoffset the ending offset of the given component if -1, it means
      *            the length of the component
      * @param generous those characters that are allowed within a component
      * @return if true, it's the correct URI character sequence
      */
     protected boolean validate(char[] component, int soffset, int eoffset,
             BitSet generous) {
         // validate each component by generous characters
         if (eoffset == -1) {
             eoffset = component.length - 1;
         }
         for (int i = soffset; i <= eoffset; i++) {
             if (!generous.get(component[i])) {
                 return false;
             }
         }
         return true;
     }

     /**
      * In order to avoid any possilbity of conflict with non-ASCII characters,
      * Parse a URI reference as a <code>String</code> with the character
      * encoding of the local system or the document.
      *
      * The following line is the regular expression for breaking-down a URI
      * reference into its components.
      *
      * <blockquote>
      *
      * <pre>
      *   ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
      *    12            3  4          5       6  7        8 9
      * </pre>
      *
      * </blockquote>
      *
      * For example, matching the above expression to
      * http://jakarta.apache.org/ietf/uri/#Related results in the following
      * subexpression matches:
      *
      * <blockquote>
      *
      * <pre>
      *               $1 = http:
      *  scheme    =  $2 = http
      *               $3 = //jakarta.apache.org
      *  authority =  $4 = jakarta.apache.org
      *  path      =  $5 = /ietf/uri/
      *               $6 = <undefined>
      *  query     =  $7 = <undefined>
      *               $8 = #Related
      *  fragment  =  $9 = Related
      * </pre>
      *
      * </blockquote>
      *
      *
      * @param original the original character sequence
      * @param escaped <code>true</code> if <code>original</code> is escaped
      * @throws URIException If an error occurs.
      */
     protected void parseUriReference(String original, boolean escaped)
             throws URIException {

         // validate and contruct the URI character sequence
         if (original == null) {
             throw new URIException("URI-Reference required");
         }

         /*
          * @ ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
          */
         String tmp = original.trim();

         /*
          * The length of the string sequence of characters. It may not be equal
          * to the length of the byte array.
          */
         int length = tmp.length();

         /*
          * Remove the delimiters like angle brackets around an URI.
          */
         boolean delim = false;
         if (length > 0) {
             char[] firstDelimiter = { tmp.charAt(0) };
             if (validate(firstDelimiter, delims)) {
                 if (length >= 2) {
                     char[] lastDelimiter = { tmp.charAt(length - 1) };
                     if (validate(lastDelimiter, delims)) {
                         delim = true;
                     }
                 }
             }
         }
         if (delim) {
             tmp = tmp.substring(1, length - 1);
             length = length - 2;
         }
         else {
             tmp = original;
             length = original.length();
             int idx = 0;
             while (idx < length  && tmp.charAt(idx) <= ' ') {
                 idx++;
             }
             if (idx > 0) {
                 if (idx < length) {
                     tmp = tmp.substring(idx);
                     length -= idx;
                 }
                 else {
                     tmp = "";
                     length = 0;
                 }
             }
         }

         /*
          * The starting index
          */
         int from = 0;

         /*
          * The test flag whether the URI is started from the path component.
          */
         boolean isStartedFromPath = false;
         int atColon = tmp.indexOf(':');
         int atSlash = tmp.indexOf('/');
         if ((atColon <= 0 && !tmp.startsWith("//"))
             || (atSlash >= 0 && atSlash < atColon)) {
             isStartedFromPath = true;
         }

         /*
          * <blockquote><pre>
          * @@@@@@@@ ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
          * </pre></blockquote>
          */
         int at = indexFirstOf(tmp, isStartedFromPath ? "/?#" : ":/?#", from);
         if (at == -1) {
             at = 0;
         }

         /*
          * Parse the scheme. <blockquote><pre> scheme = $2 = http
          * @ ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
          * </pre></blockquote>
          */
         if (at > 0 && at < length && tmp.charAt(at) == ':') {
             char[] target = tmp.substring(0, at).toLowerCase().toCharArray();
             if (validate(target, scheme)) {
                 _scheme = target;
             } else {
                 throw new URIException("incorrect scheme");
             }
             from = ++at;
         }

         /*
          * Parse the authority component. <blockquote><pre> authority = $4 =
          * jakarta.apache.org
          * @@ ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
          * </pre></blockquote>
          */
         // Reset flags
         _is_net_path = _is_abs_path = _is_rel_path = _is_hier_part = false;
         if (0 <= at && at < length && tmp.charAt(at) == '/') {
             // Set flag
             _is_hier_part = true;
             if (at + 2 < length && tmp.charAt(at + 1) == '/'
                 && !isStartedFromPath) {
                 // the temporary index to start the search from
                 int next = indexFirstOf(tmp, "/?#", at + 2);
                 if (next == -1) {
                     next = (tmp.substring(at + 2).length() == 0)
                             ? at + 2
                             : tmp.length();
                 }
                 parseAuthority(tmp.substring(at + 2, next), escaped);
                 from = at = next;
                 // Set flag
                 _is_net_path = true;
             }
             if (from == at) {
                 // Set flag
                 _is_abs_path = true;
             }
         }

         /*
          * Parse the path component. <blockquote><pre> path = $5 = /ietf/uri/
          * @@@@@@ ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
          * </pre></blockquote>
          */
         if (from < length) {
             // rel_path = rel_segment [ abs_path ]
             int next = indexFirstOf(tmp, "?#", from);
             if (next == -1) {
                 next = tmp.length();
             }
             if (!_is_abs_path) {
                 if (!escaped
                     && prevalidate(tmp.substring(from, next),
                         disallowed_rel_path)
                     || escaped
                     && validate(tmp.substring(from, next).toCharArray(),
                         rel_path)) {
                     // Set flag
                     _is_rel_path = true;
                 } else if (!escaped
                     && prevalidate(tmp.substring(from, next),
                         disallowed_opaque_part)
                     || escaped
                     && validate(tmp.substring(from, next).toCharArray(),
                         opaque_part)) {
                     // Set flag
                     _is_opaque_part = true;
                 } else {
                     // the path component may be empty
                     _path = null;
                 }
             }
             String s = tmp.substring(from, next);
             if (escaped) {
                 setRawPath(s.toCharArray());
             } else {
                 setPath(s);
             }
             at = next;
         }

         // set the charset to do escape encoding
         String charset = getProtocolCharset();

         /*
          * Parse the query component. <blockquote><pre> query = $7 =
          * <undefined>
          * @@@@@@@@@ ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
          * </pre></blockquote>
          */
         if (0 <= at && at + 1 < length && tmp.charAt(at) == '?') {
             int next = tmp.indexOf('#', at + 1);
             if (next == -1) {
                 next = tmp.length();
             }
             if (escaped) {
                 _query = tmp.substring(at + 1, next).toCharArray();
                 if (!validate(_query, uric)) {
                     throw new URIException("Invalid query");
                 }
             } else {
                 _query = encode(tmp.substring(at + 1, next), allowed_query,
                     charset);
             }
             at = next;
         }

         /*
          * Parse the fragment component. <blockquote><pre> fragment = $9 =
          * Related
          * @@@@@@@@ ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
          * </pre></blockquote>
          */
         if (0 <= at && at + 1 <= length && tmp.charAt(at) == '#') {
             if (at + 1 == length) { // empty fragment
                 _fragment = "".toCharArray();
             } else {
                 _fragment = (escaped)
                         ? tmp.substring(at + 1).toCharArray()
                         : encode(tmp.substring(at + 1), allowed_fragment,
                             charset);
             }
         }

         // set this URI.
         setURI();
     }

     /**
      * Get the earlier index that to be searched for the first occurrance in one
      * of any of the given string.
      *
      * @param s the string to be indexed
      * @param delims the delimiters used to index
      * @return the earlier index if there are delimiters
      */
     protected int indexFirstOf(String s, String delims) {
         return indexFirstOf(s, delims, -1);
     }

     /**
      * Get the earlier index that to be searched for the first occurrance in one
      * of any of the given string.
      *
      * @param s the string to be indexed
      * @param delims the delimiters used to index
      * @param offset the from index
      * @return the earlier index if there are delimiters
      */
     protected int indexFirstOf(String s, String delims, int offset) {
         if (s == null || s.length() == 0) {
             return -1;
         }
         if (delims == null || delims.length() == 0) {
             return -1;
         }
         // check boundaries
         if (offset < 0) {
             offset = 0;
         } else if (offset > s.length()) {
             return -1;
         }
         // s is never null
         int min = s.length();
         char[] delim = delims.toCharArray();
         for (int i = 0; i < delim.length; i++) {
             int at = s.indexOf(delim[i], offset);
             if (at >= 0 && at < min) {
                 min = at;
             }
         }
         return (min == s.length()) ? -1 : min;
     }

     /**
      * Get the earlier index that to be searched for the first occurrance in one
      * of any of the given array.
      *
      * @param s the character array to be indexed
      * @param delim the delimiter used to index
      * @return the ealier index if there are a delimiter
      */
     protected int indexFirstOf(char[] s, char delim) {
         return indexFirstOf(s, delim, 0);
     }

     /**
      * Get the earlier index that to be searched for the first occurrance in one
      * of any of the given array.
      *
      * @param s the character array to be indexed
      * @param delim the delimiter used to index
      * @param offset The offset.
      * @return the ealier index if there is a delimiter
      */
     protected int indexFirstOf(char[] s, char delim, int offset) {
         if (s == null || s.length == 0) {
             return -1;
         }
         // check boundaries
         if (offset < 0) {
             offset = 0;
         } else if (offset > s.length) {
             return -1;
         }
         for (int i = offset; i < s.length; i++) {
             if (s[i] == delim) {
                 return i;
             }
         }
         return -1;
     }

     /**
      * Parse the authority component.
      *
      * @param original the original character sequence of authority component
      * @param escaped <code>true</code> if <code>original</code> is escaped
      * @throws URIException If an error occurs.
      */
     protected void parseAuthority(String original, boolean escaped)
             throws URIException {

         // Reset flags
         _is_reg_name = _is_server = _is_hostname = _is_IPv4address = _is_IPv6reference = false;

         // set the charset to do escape encoding
         String charset = getProtocolCharset();

         boolean hasPort = true;
         int from = 0;
         int next = original.indexOf('@');
         if (next != -1) { // neither -1 and 0
             // each protocol extented from URI supports the specific userinfo
             _userinfo = (escaped)
                     ? original.substring(0, next).toCharArray()
                     : encode(original.substring(0, next), allowed_userinfo,
                         charset);
             from = next + 1;
         }
         next = original.indexOf('[', from);
         if (next >= from) {
             next = original.indexOf(']', from);
             if (next == -1) {
                 throw new URIException(URIException.PARSING, "IPv6reference");
             }
             next++;
             // In IPv6reference, '[', ']' should be excluded
             _host = (escaped)
                     ? original.substring(from, next).toCharArray()
                     : encode(original.substring(from, next),
                         allowed_IPv6reference, charset);
             // Set flag
             _is_IPv6reference = true;
         } else { // only for !_is_IPv6reference
             next = original.indexOf(':', from);
             if (next == -1) {
                 next = original.length();
                 hasPort = false;
             }
             // REMINDME: it doesn't need the pre-validation
             _host = original.substring(from, next).toCharArray();
             if (validate(_host, IPv4address)) {
                 // Set flag
                 _is_IPv4address = true;
             } else if (validate(_host, hostname)) {
                 // Set flag
                 _is_hostname = true;
             } else {
                 // Set flag
                 _is_reg_name = true;
             }
         }
         if (_is_reg_name) {
             // Reset flags for a server-based naming authority
             _is_server = _is_hostname = _is_IPv4address = _is_IPv6reference = false;
             // set a registry-based naming authority
             if (escaped) {
                 _authority = original.toCharArray();
                 if (!validate(_authority, reg_name)) {
                     throw new URIException("Invalid authority");
                 }
             } else {
                 _authority = encode(original, allowed_reg_name, charset);
             }
         } else {
             if (original.length() - 1 > next && hasPort
                 && original.charAt(next) == ':') { // not empty
                 from = next + 1;
                 try {
                     _port = Integer.parseInt(original.substring(from));
                 } catch (NumberFormatException error) {
                     throw new URIException(URIException.PARSING,
                         "invalid port number");
                 }
             }
             // set a server-based naming authority
             StringBuilder buf = new StringBuilder();
             if (_userinfo != null) { // has_userinfo
                 buf.append(_userinfo);
                 buf.append('@');
             }
             if (_host != null) {
                 buf.append(_host);
                 if (_port != -1) {
                     buf.append(':');
                     buf.append(_port);
                 }
             }
             _authority = buf.toString().toCharArray();
             // Set flag
             _is_server = true;
         }
     }

     /**
      * Once it's parsed successfully, set this URI.
      *
      * @see #getRawURI
      */
     protected void setURI() {
         // set _uri
         StringBuilder buf = new StringBuilder();
         // ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
         if (_scheme != null) {
             buf.append(_scheme);
             buf.append(':');
         }
         if (_is_net_path) {
             buf.append("//");
             if (_authority != null) { // has_authority
                 buf.append(_authority);
             }
         }
         if (_opaque != null && _is_opaque_part) {
             buf.append(_opaque);
         } else if (_path != null) {
             // _is_hier_part or _is_relativeURI
             if (_path.length != 0) {
                 buf.append(_path);
             }
         }
         if (_query != null) { // has_query
             buf.append('?');
             buf.append(_query);
         }
         // ignore the fragment identifier
         _uri = buf.toString().toCharArray();
         hash = 0;
     }

     // ----------------------------------------------------------- Test methods

     /**
      * Tell whether or not this URI is absolute.
      *
      * @return true iif this URI is absoluteURI
      */
     public boolean isAbsoluteURI() {
         return (_scheme != null);
     }

     /**
      * Tell whether or not this URI is relative.
      *
      * @return true iif this URI is relativeURI
      */
     public boolean isRelativeURI() {
         return (_scheme == null);
     }

     /**
      * Tell whether or not the absoluteURI of this URI is hier_part.
      *
      * @return true iif the absoluteURI is hier_part
      */
     public boolean isHierPart() {
         return _is_hier_part;
     }

     /**
      * Tell whether or not the absoluteURI of this URI is opaque_part.
      *
      * @return true iif the absoluteURI is opaque_part
      */
     public boolean isOpaquePart() {
         return _is_opaque_part;
     }

     /**
      * Tell whether or not the relativeURI or heir_part of this URI is net_path.
      * It's the same function as the has_authority() method.
      *
      * @return true iif the relativeURI or heir_part is net_path
      * @see #hasAuthority
      */
     public boolean isNetPath() {
         return _is_net_path || (_authority != null);
     }

     /**
      * Tell whether or not the relativeURI or hier_part of this URI is abs_path.
      *
      * @return true iif the relativeURI or hier_part is abs_path
      */
     public boolean isAbsPath() {
         return _is_abs_path;
     }

     /**
      * Tell whether or not the relativeURI of this URI is rel_path.
      *
      * @return true iif the relativeURI is rel_path
      */
     public boolean isRelPath() {
         return _is_rel_path;
     }

     /**
      * Tell whether or not this URI has authority. It's the same function as the
      * is_net_path() method.
      *
      * @return true iif this URI has authority
      * @see #isNetPath
      */
     public boolean hasAuthority() {
         return (_authority != null) || _is_net_path;
     }

     /**
      * Tell whether or not the authority component of this URI is reg_name.
      *
      * @return true iif the authority component is reg_name
      */
     public boolean isRegName() {
         return _is_reg_name;
     }

     /**
      * Tell whether or not the authority component of this URI is server.
      *
      * @return true iif the authority component is server
      */
     public boolean isServer() {
         return _is_server;
     }

     /**
      * Tell whether or not this URI has userinfo.
      *
      * @return true iif this URI has userinfo
      */
     public boolean hasUserinfo() {
         return (_userinfo != null);
     }

     /**
      * Tell whether or not the host part of this URI is hostname.
      *
      * @return true iif the host part is hostname
      */
     public boolean isHostname() {
         return _is_hostname;
     }

     /**
      * Tell whether or not the host part of this URI is IPv4address.
      *
      * @return true iif the host part is IPv4address
      */
     public boolean isIPv4address() {
         return _is_IPv4address;
     }

     /**
      * Tell whether or not the host part of this URI is IPv6reference.
      *
      * @return true iif the host part is IPv6reference
      */
     public boolean isIPv6reference() {
         return _is_IPv6reference;
     }

     /**
      * Tell whether or not this URI has query.
      *
      * @return true iif this URI has query
      */
     public boolean hasQuery() {
         return (_query != null);
     }

     /**
      * Tell whether or not this URI has fragment.
      *
      * @return true iif this URI has fragment
      */
     public boolean hasFragment() {
         return (_fragment != null);
     }

     // ---------------------------------------------------------------- Charset

     /**
      * Set the default charset of the protocol.
      *
      * The character set used to store files SHALL remain a local decision and
      * MAY depend on the capability of local operating systems. Prior to the
      * exchange of URIs they SHOULD be converted into a ISO/IEC 10646 format and
      * UTF-8 encoded. This approach, while allowing international exchange of
      * URIs, will still allow backward compatibility with older systems because
      * the code set positions for ASCII characters are identical to the one byte
      * sequence in UTF-8.
      *
      * An individual URI scheme may require a single charset, define a default
      * charset, or provide a way to indicate the charset used.
      *
      * Always all the time, the setter method is always succeeded and throws
      * <code>DefaultCharsetChanged</code> exception. So API programmer must
      * follow the following way: <code><pre>
      *  import org.apache.util.URI$DefaultCharsetChanged;
      *      .
      *      .
      *      .
      *  try {
      *      URI.setDefaultProtocolCharset("UTF-8");
      *  } catch (DefaultCharsetChanged cc) {
      *      // CASE 1: the exception could be ignored, when it is set by user
      *      if (cc.getReasonCode() == DefaultCharsetChanged.PROTOCOL_CHARSET) {
      *      // CASE 2: let user know the default protocol charset changed
      *      } else {
      *      // CASE 2: let user know the default document charset changed
      *      }
      *  }
      *  </pre></code> The API programmer is responsible to set the correct
      * charset. And each application should remember its own charset to support.
      *
      * @param charset the default charset for each protocol
      * @throws DefaultCharsetChanged default charset changed
      */
     public static void setDefaultProtocolCharset(String charset)
             throws DefaultCharsetChanged {

         defaultProtocolCharset = charset;
         throw new DefaultCharsetChanged(DefaultCharsetChanged.PROTOCOL_CHARSET,
             "the default protocol charset changed");
     }

     /**
      * Get the default charset of the protocol.
      *
      * An individual URI scheme may require a single charset, define a default
      * charset, or provide a way to indicate the charset used.
      *
      * To work globally either requires support of a number of character sets
      * and to be able to convert between them, or the use of a single preferred
      * character set. For support of global compatibility it is STRONGLY
      * RECOMMENDED that clients and servers use UTF-8 encoding when exchanging
      * URIs.
      *
      * @return the default charset string
      */
     public static String getDefaultProtocolCharset() {
         return defaultProtocolCharset;
     }

     /**
      * Get the protocol charset used by this current URI instance. It was set by
      * the constructor for this instance. If it was not set by contructor, it
      * will return the default protocol charset.
      *
      * @return the protocol charset string
      * @see #getDefaultProtocolCharset
      */
     public String getProtocolCharset() {
         return (protocolCharset != null)
                 ? protocolCharset
                 : defaultProtocolCharset;
     }

     /**
      * Set the default charset of the document.
      *
      * Notice that it will be possible to contain mixed characters (e.g.
      * ftp://host/KoreanNamespace/ChineseResource). To handle the Bi-directional
      * display of these character sets, the protocol charset could be simply
      * used again. Because it's not yet implemented that the insertion of BIDI
      * control characters at different points during composition is extracted.
      *
      * Always all the time, the setter method is always succeeded and throws
      * <code>DefaultCharsetChanged</code> exception. So API programmer must
      * follow the following way: <code><pre>
      *  import org.apache.util.URI$DefaultCharsetChanged;
      *      .
      *      .
      *      .
      *  try {
      *      URI.setDefaultDocumentCharset("EUC-KR");
      *  } catch (DefaultCharsetChanged cc) {
      *      // CASE 1: the exception could be ignored, when it is set by user
      *      if (cc.getReasonCode() == DefaultCharsetChanged.DOCUMENT_CHARSET) {
      *      // CASE 2: let user know the default document charset changed
      *      } else {
      *      // CASE 2: let user know the default protocol charset changed
      *      }
      *  }
      *  </pre></code> The API programmer is responsible to set the correct
      * charset. And each application should remember its own charset to support.
      *
      * @param charset the default charset for the document
      * @throws DefaultCharsetChanged default charset changed
      */
     public static void setDefaultDocumentCharset(String charset)
             throws DefaultCharsetChanged {

         defaultDocumentCharset = charset;
         throw new DefaultCharsetChanged(DefaultCharsetChanged.DOCUMENT_CHARSET,
             "the default document charset changed");
     }

     /**
      * Get the recommended default charset of the document.
      *
      * @return the default charset string
      */
     public static String getDefaultDocumentCharset() {
         return defaultDocumentCharset;
     }

     /**
      * Get the default charset of the document by locale.
      *
      * @return the default charset string by locale
      */
     public static String getDefaultDocumentCharsetByLocale() {
         return defaultDocumentCharsetByLocale;
     }

     /**
      * Get the default charset of the document by platform.
      *
      * @return the default charset string by platform
      */
     public static String getDefaultDocumentCharsetByPlatform() {
         return defaultDocumentCharsetByPlatform;
     }

     // ------------------------------------------------------------- The scheme

     /**
      * Get the scheme.
      *
      * @return the scheme
      */
     public char[] getRawScheme() {
         return _scheme;
     }

     /**
      * Get the scheme.
      *
      * @return the scheme null if undefined scheme
      */
     public String getScheme() {
         return (_scheme == null) ? null : new String(_scheme);
     }

     // ---------------------------------------------------------- The authority

     /**
      * Set the authority. It can be one type of server, hostport, hostname,
      * IPv4address, IPv6reference and reg_name.
      *
      * <blockquote>
      *
      * <pre>
      * authority = server | reg_name
      * </pre>
      *
      * </blockquote>
      *
      *
      * @param escapedAuthority the raw escaped authority
      * @throws URIException If {@link #parseAuthority(java.lang.String,boolean)}
      *             fails
      * @throws NullPointerException null authority
      */
     public void setRawAuthority(char[] escapedAuthority) throws URIException,
             NullPointerException {

         parseAuthority(new String(escapedAuthority), true);
         setURI();
     }

     /**
      * Set the authority. It can be one type of server, hostport, hostname,
      * IPv4address, IPv6reference and reg_name. Note that there is no
      * setAuthority method by the escape encoding reason.
      *
      * @param escapedAuthority the escaped authority string
      * @throws URIException If {@link #parseAuthority(java.lang.String,boolean)}
      *             fails
      */
     public void setEscapedAuthority(String escapedAuthority)
             throws URIException {

         parseAuthority(escapedAuthority, true);
         setURI();
     }

     /**
      * Get the raw-escaped authority.
      *
      * @return the raw-escaped authority
      */
     public char[] getRawAuthority() {
         return _authority;
     }

     /**
      * Get the escaped authority.
      *
      * @return the escaped authority
      */
     public String getEscapedAuthority() {
         return (_authority == null) ? null : new String(_authority);
     }

     /**
      * Get the authority.
      *
      * @return the authority
      * @throws URIException If {@link #decode} fails
      */
     public String getAuthority() throws URIException {
         return (_authority == null) ? null : decode(_authority,
             getProtocolCharset());
     }

     // ----------------------------------------------------------- The userinfo

     /**
      * Get the raw-escaped userinfo.
      *
      * @return the raw-escaped userinfo
      * @see #getAuthority
      */
     public char[] getRawUserinfo() {
         return _userinfo;
     }

     /**
      * Get the escaped userinfo.
      *
      * @return the escaped userinfo
      * @see #getAuthority
      */
     public String getEscapedUserinfo() {
         return (_userinfo == null) ? null : new String(_userinfo);
     }

     /**
      * Get the userinfo.
      *
      * @return the userinfo
      * @throws URIException If {@link #decode} fails
      * @see #getAuthority
      */
     public String getUserinfo() throws URIException {
         return (_userinfo == null) ? null : decode(_userinfo,
             getProtocolCharset());
     }

     // --------------------------------------------------------------- The host

     /**
      * Get the host.
      *
      * <blockquote>
      *
      * <pre>
      * host = hostname | IPv4address | IPv6reference
      * </pre>
      *
      * </blockquote>
      *
      *
      * @return the host
      * @see #getAuthority
      */
     public char[] getRawHost() {
         return _host;
     }

     /**
      * Get the host.
      *
      * <blockquote>
      *
      * <pre>
      * host = hostname | IPv4address | IPv6reference
      * </pre>
      *
      * </blockquote>
      *
      *
      * @return the host
      * @throws URIException If {@link #decode} fails
      * @see #getAuthority
      */
     public String getHost() throws URIException {
         if (_host != null) {
             return decode(_host, getProtocolCharset());
         }
         return null;
     }

     // --------------------------------------------------------------- The port

     /**
      * Get the port. In order to get the specfic default port, the specific
      * protocol-supported class extended from the URI class should be used. It
      * has the server-based naming authority.
      *
      * @return the port if -1, it has the default port for the scheme or the
      *         server-based naming authority is not supported in the specific
      *         URI.
      */
     public int getPort() {
         return _port;
     }

     // --------------------------------------------------------------- The path

     /**
      * Set the raw-escaped path.
      *
      * @param escapedPath the path character sequence
      * @throws URIException encoding error or not proper for initial instance
      * @see #encode
      */
     public void setRawPath(char[] escapedPath) throws URIException {
         if (escapedPath == null || escapedPath.length == 0) {
             _path = _opaque = escapedPath;
             setURI();
             return;
         }
         // remove the fragment identifier
         escapedPath = removeFragmentIdentifier(escapedPath);
         if (_is_net_path || _is_abs_path) {
             if (escapedPath[0] != '/') {
                 throw new URIException(URIException.PARSING,
                     "not absolute path");
             }
             if (!validate(escapedPath, abs_path)) {
                 throw new URIException(URIException.ESCAPING,
                     "escaped absolute path not valid");
             }
             _path = escapedPath;
         } else if (_is_rel_path) {
             int at = indexFirstOf(escapedPath, '/');
             if (at == 0) {
                 throw new URIException(URIException.PARSING, "incorrect path");
             }
             if (at > 0 && !validate(escapedPath, 0, at - 1, rel_segment)
                 && !validate(escapedPath, at, -1, abs_path) || at < 0
                 && !validate(escapedPath, 0, -1, rel_segment)) {

                 throw new URIException(URIException.ESCAPING,
                     "escaped relative path not valid");
             }
             _path = escapedPath;
         } else if (_is_opaque_part) {
             if (!uric_no_slash.get(escapedPath[0])
                 && !validate(escapedPath, 1, -1, uric)) {
                 throw new URIException(URIException.ESCAPING,
                     "escaped opaque part not valid");
             }
             _opaque = escapedPath;
         } else {
             throw new URIException(URIException.PARSING, "incorrect path");
         }
         setURI();
     }

     /**
      * Set the escaped path.
      *
      * @param escapedPath the escaped path string
      * @throws URIException encoding error or not proper for initial instance
      * @see #encode
      */
     public void setEscapedPath(String escapedPath) throws URIException {
         if (escapedPath == null) {
             _path = _opaque = null;
             setURI();
             return;
         }
         setRawPath(escapedPath.toCharArray());
     }

     /**
      * Set the path.
      *
      * @param path the path string
      * @throws URIException set incorrectly or fragment only
      * @see #encode
      */
     public void setPath(String path) throws URIException {

         if (path == null || path.length() == 0) {
             _path = _opaque = (path == null) ? null : path.toCharArray();
             setURI();
             return;
         }
         // set the charset to do escape encoding
         String charset = getProtocolCharset();

         if (_is_net_path || _is_abs_path) {
             _path = encode(path, allowed_abs_path, charset);
         } else if (_is_rel_path) {
             StringBuilder buff = new StringBuilder(path.length());
             int at = path.indexOf('/');
             if (at == 0) { // never 0
                 throw new URIException(URIException.PARSING,
                     "incorrect relative path");
             }
             if (at > 0) {
                 buff.append(encode(path.substring(0, at), allowed_rel_path,
                     charset));
                 buff.append(encode(path.substring(at), allowed_abs_path,
                     charset));
             } else {
                 buff.append(encode(path, allowed_rel_path, charset));
             }
             _path = buff.toString().toCharArray();
         } else if (_is_opaque_part) {
             StringBuilder buf = new StringBuilder();
             buf.insert(0, encode(path.substring(0, 1), uric_no_slash, charset));
             buf.insert(1, encode(path.substring(1), uric, charset));
             _opaque = buf.toString().toCharArray();
         } else {
             throw new URIException(URIException.PARSING, "incorrect path");
         }
         setURI();
     }

     /**
      * Resolve the base and relative path.
      *
      * @param basePath a character array of the basePath
      * @param relPath a character array of the relPath
      * @return the resolved path
      * @throws URIException no more higher path level to be resolved
      */
     protected char[] resolvePath(char[] basePath, char[] relPath)
             throws URIException {

         // REMINDME: paths are never null
         String base = (basePath == null) ? "" : new String(basePath);

         // _path could be empty
         if (relPath == null || relPath.length == 0) {
             return normalize(basePath);
         } else if (relPath[0] == '/') {
             return normalize(relPath);
         } else {
             int at = base.lastIndexOf('/');
             if (at != -1) {
                 basePath = base.substring(0, at + 1).toCharArray();
             }
             StringBuilder buff = new StringBuilder(base.length() + relPath.length);
             buff.append((at != -1) ? base.substring(0, at + 1) : "/");
             buff.append(relPath);
             return normalize(buff.toString().toCharArray());
         }
     }

     /**
      * Get the raw-escaped current hierarchy level in the given path. If the
      * last namespace is a collection, the slash mark ('/') should be ended with
      * at the last character of the path string.
      *
      * @param path the path
      * @return the current hierarchy level
      * @throws URIException no hierarchy level
      */
     protected char[] getRawCurrentHierPath(char[] path) throws URIException {

         if (_is_opaque_part) {
             throw new URIException(URIException.PARSING, "no hierarchy level");
         }
         if (path == null) {
             throw new URIException(URIException.PARSING, "empty path");
         }
         String buff = new String(path);
         int first = buff.indexOf('/');
         int last = buff.lastIndexOf('/');
         if (last == 0) {
             return rootPath;
         } else if (first != last && last != -1) {
             return buff.substring(0, last).toCharArray();
         }
         // FIXME: it could be a document on the server side
         return path;
     }

     /**
      * Get the raw-escaped current hierarchy level.
      *
      * @return the raw-escaped current hierarchy level
      * @throws URIException If {@link #getRawCurrentHierPath(char[])} fails.
      */
     public char[] getRawCurrentHierPath() throws URIException {
         return (_path == null) ? null : getRawCurrentHierPath(_path);
     }

     /**
      * Get the escaped current hierarchy level.
      *
      * @return the escaped current hierarchy level
      * @throws URIException If {@link #getRawCurrentHierPath(char[])} fails.
      */
     public String getEscapedCurrentHierPath() throws URIException {
         char[] path = getRawCurrentHierPath();
         return (path == null) ? null : new String(path);
     }

     /**
      * Get the current hierarchy level.
      *
      * @return the current hierarchy level
      * @throws URIException If {@link #getRawCurrentHierPath(char[])} fails.
      * @see #decode
      */
     public String getCurrentHierPath() throws URIException {
         char[] path = getRawCurrentHierPath();
         return (path == null) ? null : decode(path, getProtocolCharset());
     }

     /**
      * Get the level above the this hierarchy level.
      *
      * @return the raw above hierarchy level
      * @throws URIException If {@link #getRawCurrentHierPath(char[])} fails.
      */
     public char[] getRawAboveHierPath() throws URIException {
         char[] path = getRawCurrentHierPath();
         return (path == null) ? null : getRawCurrentHierPath(path);
     }

     /**
      * Get the level above the this hierarchy level.
      *
      * @return the raw above hierarchy level
      * @throws URIException If {@link #getRawCurrentHierPath(char[])} fails.
      */
     public String getEscapedAboveHierPath() throws URIException {
         char[] path = getRawAboveHierPath();
         return (path == null) ? null : new String(path);
     }

     /**
      * Get the level above the this hierarchy level.
      *
      * @return the above hierarchy level
      * @throws URIException If {@link #getRawCurrentHierPath(char[])} fails.
      * @see #decode
      */
     public String getAboveHierPath() throws URIException {
         char[] path = getRawAboveHierPath();
         return (path == null) ? null : decode(path, getProtocolCharset());
     }

     /**
      * Get the raw-escaped path.
      *
      * <blockquote>
      *
      * <pre>
      *   path          = [ abs_path | opaque_part ]
      * </pre>
      *
      * </blockquote>
      *
      *
      * @return the raw-escaped path
      */
     public char[] getRawPath() {
         return _is_opaque_part ? _opaque : _path;
     }

     /**
      * Get the escaped path.
      *
      * <blockquote>
      *
      * <pre>
      *   path          = [ abs_path | opaque_part ]
      *   abs_path      = "/"  path_segments
      *   opaque_part   = uric_no_slash *uric
      * </pre>
      *
      * </blockquote>
      *
      *
      * @return the escaped path string
      */
     public String getEscapedPath() {
         char[] path = getRawPath();
         return (path == null) ? null : new String(path);
     }

     /**
      * Get the path.
      *
      * <blockquote>
      *
      * <pre>
      *   path          = [ abs_path | opaque_part ]
      * </pre>
      *
      * </blockquote>
      *
      *
      * @return the path string
      * @throws URIException If {@link #decode} fails.
      * @see #decode
      */
     public String getPath() throws URIException {
         char[] path = getRawPath();
         return (path == null) ? null : decode(path, getProtocolCharset());
     }

     /**
      * Get the raw-escaped basename of the path.
      *
      * @return the raw-escaped basename
      */
     public char[] getRawName() {
         if (_path == null) {
             return null;
         }

         int at = 0;
         for (int i = _path.length - 1; i >= 0; i--) {
             if (_path[i] == '/') {
                 at = i + 1;
                 break;
             }
         }
         int len = _path.length - at;
         char[] basename = new char[len];
         System.arraycopy(_path, at, basename, 0, len);
         return basename;
     }

     /**
      * Get the escaped basename of the path.
      *
      * @return the escaped basename string
      */
     public String getEscapedName() {
         char[] basename = getRawName();
         return (basename == null) ? null : new String(basename);
     }

     /**
      * Get the basename of the path.
      *
      * @return the basename string
      * @throws URIException incomplete trailing escape pattern or unsupported
      *             character encoding
      * @see #decode
      */
     public String getName() throws URIException {
         char[] basename = getRawName();
         return (basename == null) ? null : decode(getRawName(),
             getProtocolCharset());
     }

     // ----------------------------------------------------- The path and query

     /**
      * Get the raw-escaped path and query.
      *
      * @return the raw-escaped path and query
      */
     public char[] getRawPathQuery() {

         if (_path == null && _query == null) {
             return null;
         }
         StringBuilder buff = new StringBuilder();
         if (_path != null) {
             buff.append(_path);
         }
         if (_query != null) {
             buff.append('?');
             buff.append(_query);
         }
         return buff.toString().toCharArray();
     }

     /**
      * Get the escaped query.
      *
      * @return the escaped path and query string
      */
     public String getEscapedPathQuery() {
         char[] rawPathQuery = getRawPathQuery();
         return (rawPathQuery == null) ? null : new String(rawPathQuery);
     }

     /**
      * Get the path and query.
      *
      * @return the path and query string.
      * @throws URIException incomplete trailing escape pattern or unsupported
      *             character encoding
      * @see #decode
      */
     public String getPathQuery() throws URIException {
         char[] rawPathQuery = getRawPathQuery();
         return (rawPathQuery == null) ? null : decode(rawPathQuery,
             getProtocolCharset());
     }

     // -------------------------------------------------------------- The query

     /**
      * Set the raw-escaped query.
      *
      * @param escapedQuery the raw-escaped query
      * @throws URIException escaped query not valid
      */
     public void setRawQuery(char[] escapedQuery) throws URIException {
         if (escapedQuery == null || escapedQuery.length == 0) {
             _query = escapedQuery;
             setURI();
             return;
         }
         // remove the fragment identifier
         escapedQuery = removeFragmentIdentifier(escapedQuery);
         if (!validate(escapedQuery, query)) {
             throw new URIException(URIException.ESCAPING,
                 "escaped query not valid");
         }
         _query = escapedQuery;
         setURI();
     }

     /**
      * Set the escaped query string.
      *
      * @param escapedQuery the escaped query string
      * @throws URIException escaped query not valid
      */
     public void setEscapedQuery(String escapedQuery) throws URIException {
         if (escapedQuery == null) {
             _query = null;
             setURI();
             return;
         }
         setRawQuery(escapedQuery.toCharArray());
     }

     /**
      * Set the query.
      *
      * When a query string is not misunderstood the reserved special characters
      * ("&amp;", "=", "+", ",", and "$") within a query component, it is
      * recommended to use in encoding the whole query with this method.
      *
      * The additional APIs for the special purpose using by the reserved special
      * characters used in each protocol are implemented in each protocol classes
      * inherited from <code>URI</code>. So refer to the same-named APIs
      * implemented in each specific protocol instance.
      *
      * @param query the query string.
      * @throws URIException incomplete trailing escape pattern or unsupported
      *             character encoding
      * @see #encode
      */
     public void setQuery(String query) throws URIException {
         if (query == null || query.length() == 0) {
             _query = (query == null) ? null : query.toCharArray();
             setURI();
             return;
         }
         setRawQuery(encode(query, allowed_query, getProtocolCharset()));
     }

     /**
      * Get the raw-escaped query.
      *
      * @return the raw-escaped query
      */
     public char[] getRawQuery() {
         return _query;
     }

     /**
      * Get the escaped query.
      *
      * @return the escaped query string
      */
     public String getEscapedQuery() {
         return (_query == null) ? null : new String(_query);
     }

     /**
      * Get the query.
      *
      * @return the query string.
      * @throws URIException incomplete trailing escape pattern or unsupported
      *             character encoding
      * @see #decode
      */
     public String getQuery() throws URIException {
         return (_query == null) ? null : decode(_query, getProtocolCharset());
     }

     // ----------------------------------------------------------- The fragment

     /**
      * Set the raw-escaped fragment.
      *
      * @param escapedFragment the raw-escaped fragment
      * @throws URIException escaped fragment not valid
      */
     public void setRawFragment(char[] escapedFragment) throws URIException {
         if (escapedFragment == null || escapedFragment.length == 0) {
             _fragment = escapedFragment;
             hash = 0;
             return;
         }
         if (!validate(escapedFragment, fragment)) {
             throw new URIException(URIException.ESCAPING,
                 "escaped fragment not valid");
         }
         _fragment = escapedFragment;
         hash = 0;
     }

     /**
      * Set the escaped fragment string.
      *
      * @param escapedFragment the escaped fragment string
      * @throws URIException escaped fragment not valid
      */
     public void setEscapedFragment(String escapedFragment) throws URIException {
         if (escapedFragment == null) {
             _fragment = null;
             hash = 0;
             return;
         }
         setRawFragment(escapedFragment.toCharArray());
     }

     /**
      * Set the fragment.
      *
      * @param fragment the fragment string.
      * @throws URIException If an error occurs.
      */
     public void setFragment(String fragment) throws URIException {
         if (fragment == null || fragment.length() == 0) {
             _fragment = (fragment == null) ? null : fragment.toCharArray();
             hash = 0;
             return;
         }
         _fragment = encode(fragment, allowed_fragment, getProtocolCharset());
         hash = 0;
     }

     /**
      * Get the raw-escaped fragment.
      *
      * The optional fragment identifier is not part of a URI, but is often used
      * in conjunction with a URI.
      *
      * The format and interpretation of fragment identifiers is dependent on the
      * media type [RFC2046] of the retrieval result.
      *
      * A fragment identifier is only meaningful when a URI reference is intended
      * for retrieval and the result of that retrieval is a document for which
      * the identified fragment is consistently defined.
      *
      * @return the raw-escaped fragment
      */
     public char[] getRawFragment() {
         return _fragment;
     }

     /**
      * Get the escaped fragment.
      *
      * @return the escaped fragment string
      */
     public String getEscapedFragment() {
         return (_fragment == null) ? null : new String(_fragment);
     }

     /**
      * Get the fragment.
      *
      * @return the fragment string
      * @throws URIException incomplete trailing escape pattern or unsupported
      *             character encoding
      * @see #decode
      */
     public String getFragment() throws URIException {
         return (_fragment == null) ? null : decode(_fragment,
             getProtocolCharset());
     }

     // ------------------------------------------------------------- Utilities

     /**
      * Remove the fragment identifier of the given component.
      *
      * @param component the component that a fragment may be included
      * @return the component that the fragment identifier is removed
      */
     protected char[] removeFragmentIdentifier(char[] component) {
         if (component == null) {
             return null;
         }
         int lastIndex = new String(component).indexOf('#');
         if (lastIndex != -1) {
             component = new String(component).substring(0, lastIndex).toCharArray();
         }
         return component;
     }

     /**
      * Normalize the given hier path part.
      *
      * Algorithm taken from URI reference parser at
      * http://www.apache.org/~fielding/uri/rev-2002/issues.html.
      *
      * @param path the path to normalize
      * @return the normalized path
      * @throws URIException no more higher path level to be normalized
      */
     protected char[] normalize(char[] path) throws URIException {

         if (path == null) {
             return null;
         }

         String normalized = new String(path);

         // If the buffer begins with "./" or "../", the "." or ".." is removed.
         if (normalized.startsWith("./")) {
             normalized = normalized.substring(1);
         } else if (normalized.startsWith("../")) {
             normalized = normalized.substring(2);
         } else if (normalized.startsWith("..")) {
             normalized = normalized.substring(2);
         }

         // All occurrences of "/./" in the buffer are replaced with "/"
         int index = -1;
         while ((index = normalized.indexOf("/./")) != -1) {
             normalized = normalized.substring(0, index)
                 + normalized.substring(index + 2);
         }

         // If the buffer ends with "/.", the "." is removed.
         if (normalized.endsWith("/.")) {
             normalized = normalized.substring(0, normalized.length() - 1);
         }

         int startIndex = 0;

         // All occurrences of "/<segment>/../" in the buffer, where ".."
         // and <segment> are complete path segments, are iteratively replaced
         // with "/" in order from left to right until no matching pattern
         // remains.
         // If the buffer ends with "/<segment>/..", that is also replaced
         // with "/". Note that <segment> may be empty.
         while ((index = normalized.indexOf("/../", startIndex)) != -1) {
             int slashIndex = normalized.lastIndexOf('/', index - 1);
             if (slashIndex >= 0) {
                 normalized = normalized.substring(0, slashIndex)
                     + normalized.substring(index + 3);
             } else {
                 startIndex = index + 3;
             }
         }
         if (normalized.endsWith("/..")) {
             int slashIndex = normalized.lastIndexOf('/',
                 normalized.length() - 4);
             if (slashIndex >= 0) {
                 normalized = normalized.substring(0, slashIndex + 1);
             }
         }

         // All prefixes of "<segment>/../" in the buffer, where ".."
         // and <segment> are complete path segments, are iteratively replaced
         // with "/" in order from left to right until no matching pattern
         // remains.
         // If the buffer ends with "<segment>/..", that is also replaced
         // with "/". Note that <segment> may be empty.
         while ((index = normalized.indexOf("/../")) != -1) {
             int slashIndex = normalized.lastIndexOf('/', index - 1);
             if (slashIndex >= 0) {
                 break;
             }
             normalized = normalized.substring(index + 3);
         }
         if (normalized.endsWith("/..")) {
             int slashIndex = normalized.lastIndexOf('/',
                 normalized.length() - 4);
             if (slashIndex < 0) {
                 normalized = "/";
             }
         }

         return normalized.toCharArray();
     }

     /**
      * Normalizes the path part of this URI. Normalization is only meant to be
      * performed on URIs with an absolute path. Calling this method on a
      * relative path URI will have no effect.
      *
      * @throws URIException no more higher path level to be normalized
      * @see #isAbsPath()
      */
     public void normalize() throws URIException {
         if (isAbsPath()) {
             _path = normalize(_path);
             setURI();
         }
     }

     /**
      * Test if the first array is equal to the second array.
      *
      * @param first the first character array
      * @param second the second character array
      * @return true if they're equal
      */
     protected boolean equals(char[] first, char[] second) {

         if (first == null && second == null) {
             return true;
         }
         if (first == null || second == null) {
             return false;
         }
         if (first.length != second.length) {
             return false;
         }
         for (int i = 0; i < first.length; i++) {
             if (first[i] != second[i]) {
                 return false;
             }
         }
         return true;
     }

     /**
      * Test an object if this URI is equal to another.
      *
      * @param obj an object to compare
      * @return true if two URI objects are equal
      */
     public boolean equals(Object obj) {

         // normalize and test each components
         if (obj == this) {
             return true;
         }
         if (!(obj instanceof URI)) {
             return false;
         }
         URI another = (URI) obj;
         // scheme
         if (!equals(_scheme, another._scheme)) {
             return false;
         }
         // is_opaque_part or is_hier_part? and opaque
         if (!equals(_opaque, another._opaque)) {
             return false;
         }
         // is_hier_part
         // has_authority
         if (!equals(_authority, another._authority)) {
             return false;
         }
         // path
         if (!equals(_path, another._path)) {
             return false;
         }
         // has_query
         if (!equals(_query, another._query)) {
             return false;
         }
         // has_fragment? should be careful of the only fragment case.
         if (!equals(_fragment, another._fragment)) {
             return false;
         }
         return true;
     }

     // ---------------------------------------------------------- Serialization

     /**
      * Write the content of this URI.
      *
      * @param oos the object-output stream
      * @throws IOException If an IO problem occurs.
      */
     private void writeObject(ObjectOutputStream oos) throws IOException {

         oos.defaultWriteObject();
     }

     /**
      * Read a URI.
      *
      * @param ois the object-input stream
      * @throws ClassNotFoundException If one of the classes specified in the
      *             input stream cannot be found.
      * @throws IOException If an IO problem occurs.
      */
     private void readObject(ObjectInputStream ois)
             throws ClassNotFoundException, IOException {

         ois.defaultReadObject();
     }

     // -------------------------------------------------------------- Hash code

     /**
      * Return a hash code for this URI.
      *
      * @return a has code value for this URI
      */
     public int hashCode() {
         if (hash == 0) {
             char[] c = _uri;
             if (c != null) {
                 for (int i = 0, len = c.length; i < len; i++) {
                     hash = 31 * hash + c[i];
                 }
             }
             c = _fragment;
             if (c != null) {
                 for (int i = 0, len = c.length; i < len; i++) {
                     hash = 31 * hash + c[i];
                 }
             }
         }
         return hash;
     }

     // ------------------------------------------------------------- Comparison

     /**
      * Compare this URI to another object.
      *
      * @param another the object to be compared.
      * @return 0, if it's same, -1, if failed, first being compared with in the
      *         authority component
      * @throws ClassCastException not URI argument
      */
     public int compareTo(URI another) {

         if (!equals(_authority, another.getRawAuthority())) {
             return -1;
         }
         return toString().compareTo(another.toString());
     }

     // ------------------------------------------------------------------ Clone

     /**
      * Create and return a copy of this object, the URI-reference containing the
      * userinfo component. Notice that the whole URI-reference including the
      * userinfo component counld not be gotten as a <code>String</code>.
      *
      * To copy the identical <code>URI</code> object including the userinfo
      * component, it should be used.
      *
      * @return a clone of this instance
      */
     public synchronized Object clone() throws CloneNotSupportedException {

         URI instance = (URI) super.clone();

         instance._uri = _uri;
         instance._scheme = _scheme;
         instance._opaque = _opaque;
         instance._authority = _authority;
         instance._userinfo = _userinfo;
         instance._host = _host;
         instance._port = _port;
         instance._path = _path;
         instance._query = _query;
         instance._fragment = _fragment;
         // the charset to do escape encoding for this instance
         instance.protocolCharset = protocolCharset;
         // flags
         instance._is_hier_part = _is_hier_part;
         instance._is_opaque_part = _is_opaque_part;
         instance._is_net_path = _is_net_path;
         instance._is_abs_path = _is_abs_path;
         instance._is_rel_path = _is_rel_path;
         instance._is_reg_name = _is_reg_name;
         instance._is_server = _is_server;
         instance._is_hostname = _is_hostname;
         instance._is_IPv4address = _is_IPv4address;
         instance._is_IPv6reference = _is_IPv6reference;

         return instance;
     }

     // ------------------------------------------------------------ Get the URI

     /**
      * It can be gotten the URI character sequence. It's raw-escaped. For the
      * purpose of the protocol to be transported, it will be useful.
      *
      * It is clearly unwise to use a URL that contains a password which is
      * intended to be secret. In particular, the use of a password within the
      * 'userinfo' component of a URL is strongly disrecommended except in those
      * rare cases where the 'password' parameter is intended to be public.
      *
      * When you want to get each part of the userinfo, you need to use the
      * specific methods in the specific URL. It depends on the specific URL.
      *
      * @return the URI character sequence
      */
     public char[] getRawURI() {
         return _uri;
     }

     /**
      * It can be gotten the URI character sequence. It's escaped. For the
      * purpose of the protocol to be transported, it will be useful.
      *
      * @return the escaped URI string
      */
     public String getEscapedURI() {
         return (_uri == null) ? null : new String(_uri);
     }

     /**
      * It can be gotten the URI character sequence.
      *
      * @return the original URI string
      * @throws URIException incomplete trailing escape pattern or unsupported
      *             character encoding
      * @see #decode
      */
     public String getURI() throws URIException {
         return (_uri == null) ? null : decode(_uri, getProtocolCharset());
     }

     /**
      * Get the URI reference character sequence.
      *
      * @return the URI reference character sequence
      */
     public char[] getRawURIReference() {
         if (_fragment == null) {
             return _uri;
         }
         if (_uri == null) {
             return _fragment;
         }
         // if _uri != null && _fragment != null
         String uriReference = new String(_uri) + "#" + new String(_fragment);
         return uriReference.toCharArray();
     }

     /**
      * Get the escaped URI reference string.
      *
      * @return the escaped URI reference string
      */
     public String getEscapedURIReference() {
         char[] uriReference = getRawURIReference();
         return (uriReference == null) ? null : new String(uriReference);
     }

     /**
      * Get the original URI reference string.
      *
      * @return the original URI reference string
      * @throws URIException If {@link #decode} fails.
      */
     public String getURIReference() throws URIException {
         char[] uriReference = getRawURIReference();
         return (uriReference == null) ? null : decode(uriReference,
             getProtocolCharset());
     }

     /**
      * Get the escaped URI string.
      *
      * On the document, the URI-reference form is only used without the userinfo
      * component like http://jakarta.apache.org/ by the security reason. But the
      * URI-reference form with the userinfo component could be parsed.
      *
      * In other words, this URI and any its subclasses must not expose the
      * URI-reference expression with the userinfo component like
      * http://user:password@hostport/restricted_zone.<br>
      * It means that the API client programmer should extract each user and
      * password to access manually. Probably it will be supported in the each
      * subclass, however, not a whole URI-reference expression.
      *
      * @return the escaped URI string
      * @see #clone()
      */
     public String toString() {
         return getEscapedURI();
     }

     // ------------------------------------------------------------ Inner class

     /**
      * The charset-changed normal operation to represent to be required to alert
      * to user the fact the default charset is changed.
      */
     @SuppressWarnings("serial")
     public static class DefaultCharsetChanged extends SlingException {

         // ------------------------------------------------------- constructors

         /**
          * The constructor with a reason string and its code arguments.
          *
          * @param reasonCode the reason code
          * @param reason the reason
          */
         public DefaultCharsetChanged(int reasonCode, String reason) {
             super(reason);
             this.reason = reason;
             this.reasonCode = reasonCode;
         }

         // ---------------------------------------------------------- constants

         /** No specified reason code. */
         public static final int UNKNOWN = 0;

         /** Protocol charset changed. */
         public static final int PROTOCOL_CHARSET = 1;

         /** Document charset changed. */
         public static final int DOCUMENT_CHARSET = 2;

         // ------------------------------------------------- instance variables

         /** The reason code. */
         private int reasonCode;

         /** The reason message. */
         private String reason;

         // ------------------------------------------------------------ methods

         /**
          * Get the reason code.
          *
          * @return the reason code
          */
         public int getReasonCode() {
             return reasonCode;
         }

         /**
          * Get the reason message.
          *
          * @return the reason message
          */
         public String getReason() {
             return reason;
         }

     }

     /**
      * A mapping to determine the (somewhat arbitrarily) preferred charset for a
      * given locale. Supports all locales recognized in JDK 1.1.
      *
      * The distribution of this class is Servlets.com. It was originally written
      * by Jason Hunter [jhunter at acm.org] and used by with permission.
      */
     public static class LocaleToCharsetMap {

         /** A mapping of language code to charset */
         private static final HashMap<String, String> LOCALE_TO_CHARSET_MAP;
         static {
             LOCALE_TO_CHARSET_MAP = new HashMap<String, String>();
             LOCALE_TO_CHARSET_MAP.put("ar", "ISO-8859-6");
             LOCALE_TO_CHARSET_MAP.put("be", "ISO-8859-5");
             LOCALE_TO_CHARSET_MAP.put("bg", "ISO-8859-5");
             LOCALE_TO_CHARSET_MAP.put("ca", "ISO-8859-1");
             LOCALE_TO_CHARSET_MAP.put("cs", "ISO-8859-2");
             LOCALE_TO_CHARSET_MAP.put("da", "ISO-8859-1");
             LOCALE_TO_CHARSET_MAP.put("de", "ISO-8859-1");
             LOCALE_TO_CHARSET_MAP.put("el", "ISO-8859-7");
             LOCALE_TO_CHARSET_MAP.put("en", "ISO-8859-1");
             LOCALE_TO_CHARSET_MAP.put("es", "ISO-8859-1");
             LOCALE_TO_CHARSET_MAP.put("et", "ISO-8859-1");
             LOCALE_TO_CHARSET_MAP.put("fi", "ISO-8859-1");
             LOCALE_TO_CHARSET_MAP.put("fr", "ISO-8859-1");
             LOCALE_TO_CHARSET_MAP.put("hr", "ISO-8859-2");
             LOCALE_TO_CHARSET_MAP.put("hu", "ISO-8859-2");
             LOCALE_TO_CHARSET_MAP.put("is", "ISO-8859-1");
             LOCALE_TO_CHARSET_MAP.put("it", "ISO-8859-1");
             LOCALE_TO_CHARSET_MAP.put("iw", "ISO-8859-8");
             LOCALE_TO_CHARSET_MAP.put("ja", "Shift_JIS");
             LOCALE_TO_CHARSET_MAP.put("ko", "EUC-KR");
             LOCALE_TO_CHARSET_MAP.put("lt", "ISO-8859-2");
             LOCALE_TO_CHARSET_MAP.put("lv", "ISO-8859-2");
             LOCALE_TO_CHARSET_MAP.put("mk", "ISO-8859-5");
             LOCALE_TO_CHARSET_MAP.put("nl", "ISO-8859-1");
             LOCALE_TO_CHARSET_MAP.put("no", "ISO-8859-1");
             LOCALE_TO_CHARSET_MAP.put("pl", "ISO-8859-2");
             LOCALE_TO_CHARSET_MAP.put("pt", "ISO-8859-1");
             LOCALE_TO_CHARSET_MAP.put("ro", "ISO-8859-2");
             LOCALE_TO_CHARSET_MAP.put("ru", "ISO-8859-5");
             LOCALE_TO_CHARSET_MAP.put("sh", "ISO-8859-5");
             LOCALE_TO_CHARSET_MAP.put("sk", "ISO-8859-2");
             LOCALE_TO_CHARSET_MAP.put("sl", "ISO-8859-2");
             LOCALE_TO_CHARSET_MAP.put("sq", "ISO-8859-2");
             LOCALE_TO_CHARSET_MAP.put("sr", "ISO-8859-5");
             LOCALE_TO_CHARSET_MAP.put("sv", "ISO-8859-1");
             LOCALE_TO_CHARSET_MAP.put("tr", "ISO-8859-9");
             LOCALE_TO_CHARSET_MAP.put("uk", "ISO-8859-5");
             LOCALE_TO_CHARSET_MAP.put("zh", "GB2312");
             LOCALE_TO_CHARSET_MAP.put("zh_TW", "Big5");
         }

         /**
          * Get the preferred charset for the given locale.
          *
          * @param locale the locale
          * @return the preferred charset or null if the locale is not
          *         recognized.
          */
         public static String getCharset(Locale locale) {
             // try for an full name match (may include country)
             String charset = LOCALE_TO_CHARSET_MAP.get(locale.toString());
             if (charset != null) {
                 return charset;
             }

             // if a full name didn't match, try just the language
             charset = LOCALE_TO_CHARSET_MAP.get(locale.getLanguage());
             return charset; // may be null
         }

     }

     // from EncodingUtils...

     /**
      * Converts the specified string to a byte array. If the charset is not
      * supported the default system charset is used.
      *
      * @param data the string to be encoded
      * @param charset the desired character encoding
      * @return The resulting byte array.
      * @since 3.0
      */
     private static byte[] getBytes(final String data, String charset) {

         if (data == null) {
             throw new IllegalArgumentException("data may not be null");
         }

         if (charset == null || charset.length() == 0) {
             throw new IllegalArgumentException(
                 "charset may not be null or empty");
         }

         try {
             return data.getBytes(charset);
         } catch (UnsupportedEncodingException e) {

             // if (LOG.isWarnEnabled()) {
             // LOG.warn("Unsupported encoding: " + charset +
             // ". System encoding used.");
             // }

             return data.getBytes();
         }
     }

     /**
      * Converts the byte array of ASCII characters to a string. This method is
      * to be used when decoding content of HTTP elements (such as response
      * headers)
      *
      * @param data the byte array to be encoded
      * @param offset the index of the first byte to encode
      * @param length the number of bytes to encode
      * @return The string representation of the byte array
      * @since 3.0
      */
     private static String getAsciiString(final byte[] data) {

         if (data == null) {
             throw new IllegalArgumentException("Parameter may not be null");
         }

         try {
             return new String(data, "US-ASCII");
         } catch (UnsupportedEncodingException e) {
             throw new URIException("HttpClient requires ASCII support");
         }
     }

     /**
      * Converts the byte array of HTTP content characters to a string. If the
      * specified charset is not supported, default system encoding is used.
      *
      * @param data the byte array to be encoded
      * @param charset the desired character encoding
      * @return The result of the conversion.
      * @since 3.0
      */
     public static String getString(final byte[] data, String charset) {

         if (data == null) {
             throw new IllegalArgumentException("Parameter may not be null");
         }

         if (charset == null || charset.length() == 0) {
             throw new IllegalArgumentException(
                 "charset may not be null or empty");
         }

         try {
             return new String(data, charset);
         } catch (UnsupportedEncodingException e) {

             // if (LOG.isWarnEnabled()) {
             // LOG.warn("Unsupported encoding: " + charset +
             // ". System encoding used");
             // }
             return new String(data);
         }
     }

     /**
      * Converts the specified string to byte array of ASCII characters.
      *
      * @param data the string to be encoded
      * @return The string as a byte array.
      * @since 3.0
      */
     public static byte[] getAsciiBytes(final String data) {

         if (data == null) {
             throw new IllegalArgumentException("Parameter may not be null");
         }

         try {
             return data.getBytes("US-ASCII");
         } catch (UnsupportedEncodingException e) {
             throw new URIException("HttpClient requires ASCII support");
         }
     }

     /**
      * Encodes an array of bytes into an array of URL safe 7-bit characters.
      * Unsafe characters are escaped.
      *
      * @param urlsafe bitset of characters deemed URL safe
      * @param bytes array of bytes to convert to URL safe characters
      * @return array of bytes containing URL safe characters
      */
     private static final byte[] encodeUrl(BitSet urlsafe, byte[] bytes) {
         if (bytes == null) {
             return null;
         }

         ByteArrayOutputStream buffer = new ByteArrayOutputStream();
         for (int i = 0; i < bytes.length; i++) {
             int b = bytes[i];
             if (b < 0) {
                 b = 256 + b;
             }
             if (urlsafe.get(b)) {
                 if (b == ' ') {
                     b = '+';
                 }
                 buffer.write(b);
             } else {
                 buffer.write('%');
                 char hex1 = Character.toUpperCase(Character.forDigit(
                     (b >> 4) & 0xF, 16));
                 char hex2 = Character.toUpperCase(Character.forDigit(b & 0xF,
                     16));
                 buffer.write(hex1);
                 buffer.write(hex2);
             }
         }
         return buffer.toByteArray();
     }

     /**
      * Decodes an array of URL safe 7-bit characters into an array of original
      * bytes. Escaped characters are converted back to their original
      * representation.
      *
      * @param bytes array of URL safe characters
      * @return array of original bytes
      * @throws URIException Thrown if URL decoding is unsuccessful
      */
     private static final byte[] decodeUrl(byte[] bytes) {
         if (bytes == null) {
             return null;
         }
         ByteArrayOutputStream buffer = new ByteArrayOutputStream();
         for (int i = 0; i < bytes.length; i++) {
             int b = bytes[i];
             if (b == '+') {
                 buffer.write(' ');
             } else if (b == '%') {
                 try {
                     int u = Character.digit((char) bytes[++i], 16);
                     int l = Character.digit((char) bytes[++i], 16);
                     if (u == -1 || l == -1) {
                         throw new URIException("Invalid URL encoding");
                     }
                     buffer.write((char) ((u << 4) + l));
                 } catch (ArrayIndexOutOfBoundsException e) {
                     throw new URIException("Invalid URL encoding");
                 }
             } else {
                 buffer.write(b);
             }
         }
         return buffer.toByteArray();
     }
 }