| /* |
| |
| Licensed to the Apache Software Foundation (ASF) under one or more |
| contributor license agreements. See the NOTICE file distributed with |
| this work for additional information regarding copyright ownership. |
| The ASF licenses this file to You under the Apache License, Version 2.0 |
| (the "License"); you may not use this file except in compliance with |
| the License. You may obtain a copy of the License at |
| |
| http://www.apache.org/licenses/LICENSE-2.0 |
| |
| Unless required by applicable law or agreed to in writing, software |
| distributed under the License is distributed on an "AS IS" BASIS, |
| WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| See the License for the specific language governing permissions and |
| limitations under the License. |
| |
| */ |
| package org.apache.batik.util; |
| |
| import java.net.MalformedURLException; |
| import java.net.URL; |
| |
| |
| /** |
| * The default protocol handler this handles the most common |
| * protocols, such as 'file' 'http' 'ftp'. |
| * The parsing should be general enought to support most |
| * 'normal' URL formats, so in many cases |
| * |
| * @author <a href="mailto:deweese@apache.org">Thomas DeWeese</a> |
| * @version $Id$ |
| */ |
| public class ParsedURLDefaultProtocolHandler |
| extends AbstractParsedURLProtocolHandler { |
| |
| /** |
| * Default constructor sets no protocol so this becomes |
| * default handler. |
| */ |
| public ParsedURLDefaultProtocolHandler() { |
| super(null); |
| } |
| |
| /** |
| * Subclass constructor allows subclasses to provide protocol, |
| * to be handled. |
| */ |
| protected ParsedURLDefaultProtocolHandler(String protocol) { |
| super(protocol); |
| } |
| |
| /** |
| * Subclasses can override these method to construct alternate |
| * subclasses of ParsedURLData. |
| */ |
| protected ParsedURLData constructParsedURLData() { |
| return new ParsedURLData(); |
| } |
| |
| /** |
| * Subclasses can override these method to construct alternate |
| * subclasses of ParsedURLData. |
| * @param url the java.net.URL class we reference. |
| */ |
| protected ParsedURLData constructParsedURLData(URL url) { |
| return new ParsedURLData(url); |
| } |
| |
| /** |
| * Parses the string and returns the results of parsing in the |
| * ParsedURLData object. |
| * @param urlStr the string to parse as a URL. |
| */ |
| public ParsedURLData parseURL(String urlStr) { |
| try { |
| URL url = new URL(urlStr); |
| // System.err.println("System Parse: " + urlStr); |
| return constructParsedURLData(url); |
| } catch (MalformedURLException mue) { |
| // Built in URL wouldn't take it... |
| // mue.printStackTrace(); |
| } |
| |
| // new Exception("Custom Parse: " + urlStr).printStackTrace(); |
| // System.err.println("Custom Parse: " + urlStr); |
| |
| ParsedURLData ret = constructParsedURLData(); |
| |
| if (urlStr == null) return ret; |
| |
| int pidx=0, idx; |
| int len = urlStr.length(); |
| |
| // Pull fragment id off first... |
| idx = urlStr.indexOf('#'); |
| ret.ref = null; |
| if (idx != -1) { |
| if (idx+1 < len) |
| ret.ref = urlStr.substring(idx+1); |
| urlStr = urlStr.substring(0,idx); |
| len = urlStr.length(); |
| } |
| |
| if (len == 0) |
| return ret; |
| |
| // Protocol is only allowed to include -+.a-zA-Z |
| // So as soon as we hit something else we know we |
| // are done (if it is a ':' then we have protocol otherwise |
| // we don't. |
| idx = 0; |
| char ch = urlStr.charAt(idx); |
| while ((ch == '-') || |
| (ch == '+') || |
| (ch == '.') || |
| ((ch >= 'a') && (ch <= 'z')) || |
| ((ch >= 'A') && (ch <= 'Z'))) { |
| idx++; |
| if (idx == len) { |
| ch=0; |
| break; |
| } |
| ch = urlStr.charAt(idx); |
| } |
| |
| if (ch == ':') { |
| // Has a protocol spec... |
| ret.protocol = urlStr.substring(pidx, idx).toLowerCase(); |
| pidx = idx+1; // Skip ':' |
| } |
| |
| // See if we have host/port spec. |
| idx = urlStr.indexOf('/'); |
| if ((idx == -1) || ((pidx+2<len) && |
| (urlStr.charAt(pidx) == '/') && |
| (urlStr.charAt(pidx+1) == '/'))) { |
| // No slashes (apache.org) or a double slash |
| // (//apache.org/....) so |
| // we should have host[:port] before next slash. |
| if (idx != -1) |
| pidx+=2; // Skip double slash... |
| |
| idx = urlStr.indexOf('/', pidx); // find end of host:Port spec |
| String hostPort; |
| if (idx == -1) |
| // Just host and port nothing following... |
| hostPort = urlStr.substring(pidx); |
| else |
| // Path spec follows... |
| hostPort = urlStr.substring(pidx, idx); |
| |
| int hidx = idx; // Remember location of '/' |
| |
| // pull apart host and port number... |
| idx = hostPort.indexOf(':'); |
| ret.port = -1; |
| if (idx == -1) { |
| // Just Host... |
| if (hostPort.length() == 0) |
| ret.host = null; |
| else |
| ret.host = hostPort; |
| } else { |
| // Host and port |
| if (idx == 0) ret.host = null; |
| else ret.host = hostPort.substring(0,idx); |
| |
| if (idx+1 < hostPort.length()) { |
| String portStr = hostPort.substring(idx+1); |
| try { |
| ret.port = Integer.parseInt(portStr); |
| } catch (NumberFormatException nfe) { |
| // bad port leave as '-1' |
| } |
| } |
| } |
| if (((ret.host == null) || (ret.host.indexOf('.') == -1)) && |
| (ret.port == -1)) |
| // no '.' in a host spec??? and no port, probably |
| // just a path. |
| ret.host = null; |
| else |
| pidx = hidx; |
| } |
| |
| if ((pidx == -1) || (pidx >= len)) return ret; // Nothing follows |
| |
| ret.path = urlStr.substring(pidx); |
| return ret; |
| } |
| |
| public static String unescapeStr(String str) { |
| int idx = str.indexOf('%'); |
| if (idx == -1) return str; // quick out.. |
| |
| int prev=0; |
| StringBuffer ret = new StringBuffer(); |
| while (idx != -1) { |
| if (idx != prev) |
| ret.append(str.substring(prev, idx)); |
| |
| if (idx+2 >= str.length()) break; |
| prev = idx+3; |
| idx = str.indexOf('%', prev); |
| |
| int ch1 = charToHex(str.charAt(idx+1)); |
| int ch2 = charToHex(str.charAt(idx+1)); |
| if ((ch1 == -1) || (ch2==-1)) continue; |
| ret.append((char)(ch1<<4 | ch2)); |
| } |
| |
| return ret.toString(); |
| } |
| |
| public static int charToHex(int ch) { |
| switch(ch) { |
| case '0': case '1': case '2': case '3': case '4': |
| case '5': case '6': case '7': case '8': case '9': |
| return ch-'0'; |
| case 'a': case 'A': return 10; |
| case 'b': case 'B': return 11; |
| case 'c': case 'C': return 12; |
| case 'd': case 'D': return 13; |
| case 'e': case 'E': return 14; |
| case 'f': case 'F': return 15; |
| default: return -1; |
| } |
| } |
| |
| /** |
| * Parses the string as a sub URL of baseURL, and returns the |
| * results of parsing in the ParsedURLData object. |
| * @param baseURL the base url for parsing. |
| * @param urlStr the string to parse as a URL. |
| */ |
| public ParsedURLData parseURL(ParsedURL baseURL, String urlStr) { |
| // Reference to same document (including fragment, and query). |
| if (urlStr.length() == 0) |
| return baseURL.data; |
| |
| // System.err.println("Base: " + baseURL + "\n" + |
| // "Sub: " + urlStr); |
| |
| int idx = 0, len = urlStr.length(); |
| if (len == 0) return baseURL.data; |
| |
| // Protocol is only allowed to include -+.a-zA-Z |
| // So as soon as we hit something else we know we |
| // are done (if it is a ':' then we have protocol otherwise |
| // we don't. |
| char ch = urlStr.charAt(idx); |
| while ((ch == '-') || |
| (ch == '+') || |
| (ch == '.') || |
| ((ch >= 'a') && (ch <= 'z')) || |
| ((ch >= 'A') && (ch <= 'Z'))) { |
| idx++; |
| if (idx == len) { |
| ch=0; |
| break; |
| } |
| ch = urlStr.charAt(idx); |
| } |
| String protocol = null; |
| if (ch == ':') { |
| // Has a protocol spec... |
| protocol = urlStr.substring(0, idx).toLowerCase(); |
| } |
| |
| if (protocol != null) { |
| // Temporary if we have a protocol then assume absolute |
| // URL. Technically this is the correct handling but much |
| // software supports relative URLs with a protocol that |
| // matches the base URL's protocol. |
| // if (true) |
| // return parseURL(urlStr); |
| if (!protocol.equals(baseURL.getProtocol())) |
| // Different protocols, assume absolute URL ignore base... |
| return parseURL(urlStr); |
| |
| // Same protocols, if char after ':' is a '/' then it's |
| // still absolute... |
| idx++; |
| if (idx == urlStr.length()) |
| // Just a Protocol??? |
| return parseURL(urlStr); |
| |
| if (urlStr.charAt(idx) == '/') |
| // Absolute URL... |
| return parseURL(urlStr); |
| |
| // Still relative just drop the protocol (we will pick it |
| // back up from the baseURL later...). |
| urlStr = urlStr.substring(idx); |
| } |
| |
| if (urlStr.startsWith("/")) { |
| if ((urlStr.length() > 1) && |
| (urlStr.charAt(1) == '/')) { |
| // Relative but only uses protocol from base |
| return parseURL(baseURL.getProtocol() + ":" + urlStr); |
| } |
| // Relative 'absolute' path, uses protocol and authority |
| // (host) from base |
| return parseURL(baseURL.getPortStr() + urlStr); |
| } |
| |
| if (urlStr.startsWith("#")) { |
| String base = baseURL.getPortStr(); |
| if (baseURL.getPath() != null) base += baseURL.getPath(); |
| return parseURL(base + urlStr); |
| } |
| |
| String path = baseURL.getPath(); |
| // No path? well we will treat this as being relative to it's self. |
| if (path == null) path = ""; |
| idx = path.lastIndexOf('/'); |
| if (idx == -1) |
| // baseURL is just a filename (in current dir) so use current dir |
| // as base of new URL. |
| path = ""; |
| else |
| path = path.substring(0,idx+1); |
| |
| // System.err.println("Base Path: " + path); |
| // System.err.println("Base PortStr: " + baseURL.getPortStr()); |
| return parseURL(baseURL.getPortStr() + path + urlStr); |
| } |
| } |
| |