| /************************************************************************ |
| * |
| * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER |
| * |
| * Copyright 2008, 2010 Oracle and/or its affiliates. All rights reserved. |
| * |
| * Use is subject to license terms. |
| * |
| * Licensed under the Apache License, Version 2.0 (the "License"); you may not |
| * use this file except in compliance with the License. You may obtain a copy |
| * of the License at http://www.apache.org/licenses/LICENSE-2.0. You can also |
| * obtain a copy of the License at http://odftoolkit.org/docs/license.txt |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT |
| * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| * |
| ************************************************************************/ |
| package org.odftoolkit.odfdom.type; |
| |
| import java.io.UnsupportedEncodingException; |
| import java.util.BitSet; |
| import java.io.ByteArrayOutputStream; |
| import java.util.logging.Level; |
| import java.util.logging.Logger; |
| |
| /** |
| * Transformations for transporting URIs in URLs. |
| * |
| * <h4> URIs, URLs, and URNs </h4> |
| * |
| * A URI is a uniform resource <i>identifier</i> while a URL is a uniform |
| * resource <i>locator</i>. Hence every URL is a URI, abstractly speaking, but |
| * not every URI is a URL. This is because there is another subcategory of |
| * URIs, uniform resource <i>names</i> (URNs), which name resources but do not |
| * specify how to locate them. The <tt>mailto</tt>, <tt>news</tt>, and |
| * <tt>isbn</tt> URIs shown above are examples of URNs. |
| * |
| * |
| * <h4>URI syntax and components</h4> |
| * |
| * At the highest level a URI reference (hereinafter simply "URI") in string |
| * form has the syntax |
| * |
| * <blockquote> |
| * [<i>scheme</i><tt><b>:</b></tt><i></i>]<i>scheme-specific-part</i>[<tt><b>#</b></tt><i>fragment</i>] |
| * </blockquote> |
| * |
| * where square brackets [...] delineate optional components and the characters |
| * <tt><b>:</b></tt> and <tt><b>#</b></tt> stand for themselves. |
| * |
| * <p> An <i>absolute</i> URI specifies a scheme; a URI that is not absolute is |
| * said to be <i>relative</i>. URIs are also classified according to whether |
| * they are <i>opaque</i> or <i>hierarchical</i>. |
| * |
| * <p> An <i>opaque</i> URI is an absolute URI whose scheme-specific part does |
| * not begin with a slash character (<tt>'/'</tt>). Opaque URIs are not |
| * subject to further parsing. Some examples of opaque URIs are: |
| * |
| * <blockquote><table cellpadding=0 cellspacing=0> |
| * <tr><td><tt>mailto:java-net@java.sun.com</tt><td></tr> |
| * <tr><td><tt>news:comp.lang.java</tt><td></tr> |
| * <tr><td><tt>urn:isbn:096139210x</td></tr> |
| * </table></blockquote> |
| * |
| * <p> A <i>hierarchical</i> URI is either an absolute URI whose |
| * scheme-specific part begins with a slash character, or a relative URI, that |
| * is, a URI that does not specify a scheme. Some examples of hierarchical |
| * URIs are: |
| * |
| * <blockquote> |
| * <tt>http://java.sun.com/j2se/1.3/</tt><br> |
| * <tt>docs/guide/collections/designfaq.html#28</tt></br> |
| * <tt>../../../demo/jfc/SwingSet2/src/SwingSet2.java</tt></br> |
| * <tt>file:///~/calendar</tt> |
| * </blockquote> |
| * |
| * <p> A hierarchical URI is subject to further parsing according to the syntax |
| * |
| * <blockquote> |
| * [<i>scheme</i><tt><b>:</b></tt>][<tt><b>//</b></tt><i>authority</i>][<i>path</i>][<tt><b>?</b></tt><i>query</i>][<tt><b>#</b></tt><i>fragment</i>] |
| * </blockquote> |
| * |
| * where the characters <tt><b>:</b></tt>, <tt><b>/</b></tt>, |
| * <tt><b>?</b></tt>, and <tt><b>#</b></tt> stand for themselves. The |
| * scheme-specific part of a hierarchical URI consists of the characters |
| * between the scheme and fragment components. |
| * |
| * <p> The authority component of a hierarchical URI is, if specified, either |
| * <i>server-based</i> or <i>registry-based</i>. A server-based authority |
| * parses according to the familiar syntax |
| * |
| * <blockquote> |
| * [<i>user-info</i><tt><b>@</b></tt>]<i>host</i>[<tt><b>:</b></tt><i>port</i>] |
| * </blockquote> |
| * |
| * where the characters <tt><b>@</b></tt> and <tt><b>:</b></tt> stand for |
| * themselves. Nearly all URI schemes currently in use are server-based. An |
| * authority component that does not parse in this way is considered to be |
| * registry-based. |
| * |
| * <p> The path component of a hierarchical URI is itself said to be absolute |
| * if it begins with a slash character (<tt>'/'</tt>); otherwise it is |
| * relative. The path of a hierarchical URI that is either absolute or |
| * specifies an authority is always absolute. |
| * |
| * <p> All told, then, a URI instance has the following nine components: |
| * |
| * <blockquote><table> |
| * <tr><td><i>Component</i></td><td><i>Type</i></td></tr> |
| * <tr><td>scheme</td><td><tt>String</tt></td></tr> |
| * <tr><td>scheme-specific-part </td><td><tt>String</tt></td></tr> |
| * <tr><td>authority</td><td><tt>String</tt></td></tr> |
| * <tr><td>user-info</td><td><tt>String</tt></td></tr> |
| * <tr><td>host</td><td><tt>String</tt></td></tr> |
| * <tr><td>port</td><td><tt>int</tt></td></tr> |
| * <tr><td>path</td><td><tt>String</tt></td></tr> |
| * <tr><td>query</td><td><tt>String</tt></td></tr> |
| * <tr><td>fragment</td><td><tt>String</tt></td></tr> |
| * </table></blockquote> |
| * |
| * In a given instance any particular component is either <i>undefined</i> or |
| * <i>defined</i> with a distinct value. Undefined string components are |
| * represented by <tt>null</tt>, while undefined integer components are |
| * represented by <tt>-1</tt>. A string component may be defined to have the |
| * empty string as its value; this is not equivalent to that component being |
| * undefined. |
| * |
| * <p> Whether a particular component is or is not defined in an instance |
| * depends upon the type of the URI being represented. An absolute URI has a |
| * scheme component. An opaque URI has a scheme, a scheme-specific part, and |
| * possibly a fragment, but has no other components. A hierarchical URI always |
| * has a path (though it may be empty) and a scheme-specific-part (which at |
| * least contains the path), and may have any of the other components. If the |
| * authority component is present and is server-based then the host component |
| * will be defined and the user-information and port components may be defined. |
| * |
| * See <a href="http://www.isi.edu/in-notes/rfc2396.txt""><i>RFC 2396: |
| * Uniform Resource Identifiers (URI): Generic Syntax</i></a> |
| * |
| */ |
| class URITransformer { |
| |
| /** |
| * Array containing the safe characters set for encoding. |
| * <p> |
| * Only the following characters are not encoded:<br> |
| * A-Z a-z 0-9 : @ & $ - _ . + ! * ' ( ) , |
| * </p> |
| */ |
| protected static BitSet safeCharacters; |
| |
| static { |
| safeCharacters = new BitSet(256); |
| int i; |
| for (i = 'a'; i <= 'z'; i++) { |
| safeCharacters.set(i); |
| } |
| for (i = 'A'; i <= 'Z'; i++) { |
| safeCharacters.set(i); |
| } |
| for (i = '0'; i <= '9'; i++) { |
| safeCharacters.set(i); |
| } |
| safeCharacters.set('='); |
| safeCharacters.set(':'); |
| safeCharacters.set('@'); |
| safeCharacters.set('&'); |
| safeCharacters.set('$'); |
| safeCharacters.set('-'); |
| safeCharacters.set('_'); |
| safeCharacters.set('.'); |
| safeCharacters.set('+'); |
| safeCharacters.set('!'); |
| safeCharacters.set('*'); |
| safeCharacters.set('\''); |
| safeCharacters.set('('); |
| safeCharacters.set(')'); |
| safeCharacters.set(','); |
| } |
| |
| /** |
| * Encode path to be used as path component segments in URI. |
| * |
| * <p>Creates a String that can be used as a sequence of one or more |
| * path components in an URI from a path that uses a slash |
| * character as a path separator and where the segements do not use |
| * any URI encoding rules.</p> |
| * |
| * <p>The <b>/</b> characters (delimiting the individual path_segments) |
| * are left unchanged.</p> |
| * |
| * @param path A path that is not using URI encoding rules. |
| * @return A path that is using URI encoding rules. |
| * |
| * @see #decodePath(String) |
| */ |
| public static String encodePath(String path) { |
| try { |
| StringBuilder pathc = new StringBuilder(); |
| byte[] bytes = null; |
| bytes = path.getBytes("UTF-8"); |
| for (int i = 0; i < bytes.length; i++) { |
| int v = bytes[i]; |
| if (v < 0) { |
| v += 256; |
| } |
| if (v > 0 && v < 256 && safeCharacters.get(v)) { |
| pathc.append((char) v); |
| } else if ((char) v == '/') { |
| pathc.append((char) v); |
| } else { |
| pathc.append("%" + Integer.toHexString(v)); |
| } |
| } |
| path = pathc.toString(); |
| } catch (UnsupportedEncodingException ex) { |
| Logger.getLogger(URITransformer.class.getName()).log(Level.SEVERE, null, ex); |
| } |
| return path; |
| } |
| |
| /** |
| * Decode path component segments in URI. |
| * |
| * <p>Creates a path that uses a slash character as a path separator |
| * and where the segments do not use any URI encoding |
| * from a String that is used as a sequence of one or more |
| * path components in an URI where the path segments do use |
| * URI encoding rules.</p> |
| * |
| * <p>The <b>/</b> characters (delimiting the individual path_segments) |
| * are left unchanged.</p> |
| * |
| * @param path A path that is using URI encoding rules. |
| * @return A path that is not using URI encoding rules. |
| * |
| * @see #encodePath(String) |
| * |
| */ |
| public static String decodePath(String path) { |
| String pathc = path; |
| StringBuilder uri = new StringBuilder(); |
| |
| int j = pathc.indexOf('%', 0); |
| int l = pathc.length(); |
| |
| ByteArrayOutputStream ba = new ByteArrayOutputStream(); |
| byte[] b = {0}; |
| while (j != -1) { |
| if (j + 3 <= l) { |
| try { |
| b = pathc.substring(0, j).getBytes("UTF-8"); |
| ba.write(b, 0, b.length); |
| } catch (java.io.UnsupportedEncodingException e) { |
| } |
| String hex = pathc.substring(j + 1, j + 3); |
| try { |
| int n = Integer.parseInt(hex, 16); |
| ba.write(n); |
| } catch (NumberFormatException e) { |
| String tmp = "=" + hex; |
| try { |
| b = tmp.getBytes("UTF-8"); |
| } catch (java.io.UnsupportedEncodingException e2) { |
| } |
| ba.write(b, 0, b.length); |
| } |
| pathc = pathc.substring(j + 3); |
| l = pathc.length(); |
| j = pathc.indexOf('%', 0); |
| } else { |
| j = -1; |
| } |
| } |
| try { |
| uri.append(new String(ba.toByteArray(), "UTF-8")); |
| } catch (java.io.UnsupportedEncodingException e2) { |
| } |
| |
| uri.append(pathc); |
| |
| return uri.toString(); |
| } |
| |
| /** |
| * Extract URI from a path. |
| * |
| * <p>Transforms a path that was created with the |
| * {@link #uri2path(String)} method back to an URI.</p> |
| * |
| * <p>This method does try to cope with an erroneous |
| * input parameter but the result returned in such a case is not |
| * guaranteed to be a valid URI.</p> |
| * |
| * @param path the path that contains the URI information |
| * @return a String representing a URI |
| * |
| * @see #uri2path(String) |
| */ |
| public static String path2uri(String path) { |
| |
| if (path == null) { |
| return null; |
| } |
| StringBuilder uri = new StringBuilder(); |
| String npath; |
| // ignore leading slash |
| if (path.startsWith("/")) { |
| npath = path.substring(1); |
| } else { |
| npath = path; |
| } |
| |
| int l = npath.length(); |
| |
| int i = npath.indexOf('/'); |
| if (i == -1) { |
| if (npath.equals("")) { |
| return npath; |
| } |
| return npath + "://"; |
| } |
| |
| String rpath = ""; |
| boolean bauth = true; |
| |
| if (i == 0) { |
| uri.append("/"); |
| } else { |
| |
| String scheme; |
| if (i == l) { |
| return npath + "://"; |
| } |
| scheme = npath.substring(0, i) + ":"; |
| if (i + 4 <= l) { |
| String hier = npath.substring(i + 1, i + 4); |
| if (hier.startsWith("==0/")) { |
| i += 5; |
| } else if (hier.startsWith("==0")) { |
| i += 4; |
| } else if (hier.startsWith("==1")) { |
| bauth = false; |
| scheme += ""; |
| i += 3; |
| } else if (hier.startsWith("==2")) { |
| scheme += "//"; |
| i += 3; |
| bauth = false; |
| } else { |
| scheme += "//"; |
| } |
| |
| } |
| uri.append(scheme); |
| } |
| |
| |
| int j = -1; |
| String auth = ""; |
| |
| if (bauth) { |
| if (rpath.equals("")) { |
| if (i + 1 <= l) { |
| j = npath.indexOf('/', i + 1); |
| } |
| |
| if (j == -1) { |
| j = l; |
| } else { |
| rpath = npath.substring(j); |
| } |
| if (i + 1 <= l) { |
| auth = npath.substring(i + 1, j); |
| } else { |
| auth = ""; |
| } |
| } else { |
| if (i + 1 <= l) { |
| rpath = rpath + npath.substring(i + 1); |
| } else { |
| rpath = ""; |
| } |
| } |
| |
| j = auth.indexOf('=', 0); |
| l = auth.length(); |
| |
| ByteArrayOutputStream ba = new ByteArrayOutputStream(); |
| byte[] b = {0}; |
| while (j != -1) { |
| if (j + 3 <= l) { |
| try { |
| b = auth.substring(0, j).getBytes("UTF-8"); |
| ba.write(b, 0, b.length); |
| } catch (java.io.UnsupportedEncodingException e) { |
| } |
| String hex = auth.substring(j + 1, j + 3); |
| try { |
| int n = Integer.parseInt(hex, 16); |
| ba.write(n); |
| } catch (NumberFormatException e) { |
| String tmp = "=" + hex; |
| try { |
| b = tmp.getBytes("UTF-8"); |
| } catch (java.io.UnsupportedEncodingException e2) { |
| } |
| ba.write(b, 0, b.length); |
| } |
| auth = auth.substring(j + 3); |
| l = auth.length(); |
| j = auth.indexOf('=', 0); |
| } else { |
| j = -1; |
| } |
| } |
| try { |
| uri.append(new String(ba.toByteArray(), "UTF-8")); |
| } catch (java.io.UnsupportedEncodingException e2) { |
| } |
| uri.append(auth); |
| |
| } else { |
| if (i + 1 <= l) { |
| rpath = npath.substring(i + 1); // empty authority |
| } else { |
| rpath = ""; |
| } |
| } |
| |
| uri.append(encodePath(rpath)); |
| |
| return uri.toString(); |
| } |
| |
| /** |
| * Embed URI into path. |
| * |
| * <h4>Opaque URIs</h4> |
| * |
| * Opaque URIs are mapped to a <i>path</i> of the form |
| * <<b>/</b> <i>scheme</i> <b>/==0/</b> <i>opaque_part'</i>>.</p> |
| * </p> |
| * <p>The mapping from <i>opaque_part</i> to <i>opaque_part'</i> |
| * works as follows:(*)<p> |
| * |
| * <p>Octets from the set <b>A-Z a-z 0-9 : @ & $ - _ . + ! * ' ( ) ,</b> |
| * are left unchanged.</p> |
| * |
| * <p>Other octest are replaced with <b>=</b> followed by two hex digits |
| * that represent the octet's numerical value.</p> |
| * |
| * <h4>Hierarchical URIs without an <i>authority</i> component</h4> |
| * |
| * <p>Hierarchical URIs without an <i>authority</i> component |
| * are mapped to a <i>path</i> of the form |
| * <<b>/</b> <i>scheme</i> <b>/==1</b> <i>abs_path</i>' [<b>?</b> |
| * <i>query</i>]>.</p> |
| * |
| * <p>If <i>abs_path</i> is empty, it is left unchanged.</p> |
| * |
| * <p>If <i>abs_path</i> is non-empty, it is decoded with |
| * the {@link #decodePath(String)} method.</p> |
| * |
| * <p>A non-empty path_segment is left unchanged.</p> |
| * |
| * <h4>Hierarchical URIs with an <i>authority</i> component</h4> |
| * <p>Hierarchical URIs with an <i>authority</i> component |
| * <b>?</b> query]> are mapped to a <i>path</i> of the |
| * form <<b>/</b> <i>scheme</i> <b>/</b> <i>authority'</i> |
| * <i>abs_path'</i> [<b>?</b><i>query</i>]>.</p> |
| * |
| * <p>If <i>authority</i> is empty, it is mapped to |
| * <b>==2</b>. This eliminates |
| * problems if the servlet container drops final slashes |
| * from <i>paths</i> or cannot handle empty segments |
| * within <i>paths.</i></p> |
| * |
| * <p>The mapping from <i>abs_path</i> to <i>abs_path'</i> |
| * works as follows:</p> |
| * |
| * </p> |
| * <p>If <i>authority</i> is non-empty, it is mapped as described |
| * for the <i>opaque_part</i> above.</p> |
| * |
| * <p>The mapping from <i>abs_path</i> to <i>abs_path'</i> |
| * works as follows:</p> |
| * |
| * <p>If <i>abs_path</i> is empty, it is left unchanged.</p> |
| * |
| * <p>If <i>abs_path</i> is non-empty, it is decoded with |
| * the {@link #decodePath(String)} method.</p> |
| * |
| * @see #path2uri(String) |
| */ |
| public static String uri2path(String uri) { |
| if (uri == null) { |
| return null; |
| } |
| StringBuilder path = new StringBuilder(); |
| int i = uri.indexOf(":"); |
| if (i == -1) { |
| return uri; |
| } |
| int l = uri.length(); |
| |
| path.append(uri.substring(0, i)).append("/"); |
| |
| int sc = 0; |
| if ((i + 3 <= l) && uri.substring(i + 1, i + 3).equals("//")) { |
| i += 3; |
| sc = 2; |
| } else if ((i + 2 <= l) && uri.substring(i + 1, i + 2).equals("/")) { |
| i += 2; |
| sc = 1; |
| } else { |
| i++; |
| } |
| |
| int j = uri.indexOf('/', i); |
| String ruri = ""; |
| if (j == -1) { |
| j = l; |
| ruri = ""; |
| } else { |
| ruri = uri.substring(j); |
| } |
| String auth = uri.substring(i, j); |
| |
| if (sc == 2 && auth.length() == 0) { |
| path.append("==2"); |
| } else if (sc == 1) { |
| path.append("==1/"); |
| } else if (sc == 0) { |
| path.append("==0/"); |
| } |
| |
| byte[] bytes = null; |
| try { |
| bytes = auth.getBytes("UTF-8"); |
| } catch (java.io.UnsupportedEncodingException e) { |
| } |
| for (i = 0; i < bytes.length; i++) { |
| int v = bytes[i]; |
| if (v < 0) { |
| v += 256; |
| } |
| if (v > 0 && v < 256 && safeCharacters.get(v)) { |
| path.append((char) v); |
| } else { |
| path.append("=" + Integer.toHexString(v)); |
| } |
| } |
| |
| path.append(decodePath(ruri)); |
| return path.toString(); |
| } |
| |
| private URITransformer() { |
| } |
| } |