| /* |
| |
| Licensed to the Apache Software Foundation (ASF) under one or more |
| contributor license agreements. See the NOTICE file distributed with |
| this work for additional information regarding copyright ownership. |
| The ASF licenses this file to You under the Apache License, Version 2.0 |
| (the "License"); you may not use this file except in compliance with |
| the License. You may obtain a copy of the License at |
| |
| http://www.apache.org/licenses/LICENSE-2.0 |
| |
| Unless required by applicable law or agreed to in writing, software |
| distributed under the License is distributed on an "AS IS" BASIS, |
| WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| See the License for the specific language governing permissions and |
| limitations under the License. |
| |
| */ |
| package org.apache.batik.util; |
| |
| import java.io.BufferedInputStream; |
| import java.io.IOException; |
| import java.io.InputStream; |
| import java.net.HttpURLConnection; |
| import java.net.MalformedURLException; |
| import java.net.URL; |
| import java.net.URLConnection; |
| import java.util.Iterator; |
| import java.util.LinkedList; |
| import java.util.List; |
| import java.util.zip.GZIPInputStream; |
| import java.util.zip.InflaterInputStream; |
| import java.util.zip.ZipException; |
| |
| /** |
| * Holds the data for more URLs. |
| * |
| * @author <a href="mailto:deweese@apache.org">Thomas DeWeese</a> |
| * @version $Id$ |
| */ |
| public class ParsedURLData { |
| |
| protected static final String HTTP_USER_AGENT_HEADER = "User-Agent"; |
| |
| protected static final String HTTP_ACCEPT_HEADER = "Accept"; |
| protected static final String HTTP_ACCEPT_LANGUAGE_HEADER = "Accept-Language"; |
| protected static final String HTTP_ACCEPT_ENCODING_HEADER = "Accept-Encoding"; |
| |
| protected static List acceptedEncodings = new LinkedList(); |
| static { |
| acceptedEncodings.add("gzip"); |
| } |
| |
| /** |
| * GZIP header magic number bytes, like found in a gzipped |
| * files, which are encoded in Intel format (i.e. little indian). |
| */ |
| public static final byte[] GZIP_MAGIC = {(byte)0x1f, (byte)0x8b}; |
| |
| /** |
| * This is a utility function others can call that checks if |
| * is is a GZIP stream if so it returns a GZIPInputStream that |
| * will decode the contents, otherwise it returns (or a |
| * buffered version of is) untouched. |
| * @param is Stream that may potentially be a GZIP stream. |
| */ |
| public static InputStream checkGZIP(InputStream is) |
| throws IOException { |
| |
| if (!is.markSupported()) |
| is = new BufferedInputStream(is); |
| byte[] data = new byte[2]; |
| try { |
| is.mark(2); |
| is.read(data); |
| is.reset(); |
| } catch (Exception ex) { |
| is.reset(); |
| return is; |
| } |
| if ((data[0] == GZIP_MAGIC[0]) && |
| (data[1] == GZIP_MAGIC[1])) |
| return new GZIPInputStream(is); |
| |
| if (((data[0]&0x0F) == 8) && |
| ((data[0]>>>4) <= 7)) { |
| // Check for a zlib (deflate) stream |
| int chk = ((((int)data[0])&0xFF)*256+ |
| (((int)data[1])&0xFF)); |
| if ((chk %31) == 0) { |
| try { |
| // I'm not really as certain of this check |
| // as I would like so I want to force it |
| // to decode part of the stream. |
| is.mark(100); |
| InputStream ret = new InflaterInputStream(is); |
| if (!ret.markSupported()) |
| ret = new BufferedInputStream(ret); |
| ret.mark(2); |
| ret.read(data); |
| is.reset(); |
| ret = new InflaterInputStream(is); |
| return ret; |
| } catch (ZipException ze) { |
| is.reset(); |
| return is; |
| } |
| } |
| } |
| |
| return is; |
| } |
| |
| /** |
| * Since the Data instance is 'hidden' in the ParsedURL |
| * instance we make all our methods public. This makes it |
| * easy for the various Protocol Handlers to update an |
| * instance as parsing proceeds. |
| */ |
| public String protocol = null; |
| public String host = null; |
| public int port = -1; |
| public String path = null; |
| public String ref = null; |
| public String contentType = null; |
| public String contentEncoding = null; |
| |
| public InputStream stream = null; |
| public boolean hasBeenOpened = false; |
| |
| /** |
| * The extracted type/subtype from the Content-Type header. |
| */ |
| protected String contentTypeMediaType; |
| |
| /** |
| * The extracted charset parameter from the Content-Type header. |
| */ |
| protected String contentTypeCharset; |
| |
| /** |
| * The URL that was ultimately used to fetch the resource. |
| */ |
| protected URL postConnectionURL; |
| |
| /** |
| * Void constructor |
| */ |
| public ParsedURLData() { |
| } |
| |
| /** |
| * Build from an existing URL. |
| */ |
| public ParsedURLData(URL url) { |
| protocol = url.getProtocol(); |
| if ((protocol != null) && (protocol.length() == 0)) |
| protocol = null; |
| |
| host = url.getHost(); |
| if ((host != null) && (host.length() == 0)) |
| host = null; |
| |
| port = url.getPort(); |
| |
| path = url.getFile(); |
| if ((path != null) && (path.length() == 0)) |
| path = null; |
| |
| ref = url.getRef(); |
| if ((ref != null) && (ref.length() == 0)) |
| ref = null; |
| } |
| |
| /** |
| * Attempts to build a normal java.net.URL instance from this |
| * URL. |
| */ |
| protected URL buildURL() throws MalformedURLException { |
| |
| // System.out.println("File: " + file); |
| // if (ref != null) |
| // file += "#" + ref; |
| // System.err.println("Building: " + protocol + " - " + |
| // host + " - " + path); |
| |
| if ((protocol != null) && (host != null)) { |
| String file = ""; |
| if (path != null) |
| file = path; |
| if (port == -1) |
| return new URL(protocol, host, file); |
| |
| return new URL(protocol, host, port, file); |
| } |
| |
| return new URL(toString()); |
| } |
| |
| /** |
| * Implement Object.hashCode. |
| */ |
| public int hashCode() { |
| int hc = port; |
| if (protocol != null) |
| hc ^= protocol.hashCode(); |
| if (host != null) |
| hc ^= host.hashCode(); |
| |
| // For some URLs path and ref can get fairly long |
| // and the most unique part is towards the end |
| // so we grab that part for HC purposes |
| if (path != null) { |
| int len = path.length(); |
| if (len > 20) |
| hc ^= path.substring(len-20).hashCode(); |
| else |
| hc ^= path.hashCode(); |
| } |
| if (ref != null) { |
| int len = ref.length(); |
| if (len > 20) |
| hc ^= ref.substring(len-20).hashCode(); |
| else |
| hc ^= ref.hashCode(); |
| } |
| |
| return hc; |
| } |
| |
| /** |
| * Implement Object.equals for ParsedURLData. |
| */ |
| public boolean equals(Object obj) { |
| if (obj == null) return false; |
| if (! (obj instanceof ParsedURLData)) |
| return false; |
| |
| ParsedURLData ud = (ParsedURLData)obj; |
| if (ud.port != port) |
| return false; |
| |
| if (ud.protocol==null) { |
| if (protocol != null) |
| return false; |
| } else if (protocol == null) |
| return false; |
| else if (!ud.protocol.equals(protocol)) |
| return false; |
| |
| if (ud.host==null) { |
| if (host !=null) |
| return false; |
| } else if (host == null) |
| return false; |
| else if (!ud.host.equals(host)) |
| return false; |
| |
| if (ud.ref==null) { |
| if (ref !=null) |
| return false; |
| } else if (ref == null) |
| return false; |
| else if (!ud.ref.equals(ref)) |
| return false; |
| |
| if (ud.path==null) { |
| if (path !=null) |
| return false; |
| } else if (path == null) |
| return false; |
| else if (!ud.path.equals(path)) |
| return false; |
| |
| return true; |
| } |
| |
| /** |
| * Returns the content type if available. This is only available |
| * for some protocols. |
| */ |
| public String getContentType(String userAgent) { |
| if (contentType != null) |
| return contentType; |
| |
| if (!hasBeenOpened) { |
| try { |
| openStreamInternal(userAgent, null, null); |
| } catch (IOException ioe) { /* nothing */ } |
| } |
| |
| return contentType; |
| } |
| |
| /** |
| * Returns the content type's type/subtype, if available. This is |
| * only available for some protocols. |
| */ |
| public String getContentTypeMediaType(String userAgent) { |
| if (contentTypeMediaType != null) { |
| return contentTypeMediaType; |
| } |
| |
| extractContentTypeParts(userAgent); |
| |
| return contentTypeMediaType; |
| } |
| |
| /** |
| * Returns the content type's charset parameter, if available. This is |
| * only available for some protocols. |
| */ |
| public String getContentTypeCharset(String userAgent) { |
| if (contentTypeMediaType != null) { |
| return contentTypeCharset; |
| } |
| |
| extractContentTypeParts(userAgent); |
| |
| return contentTypeCharset; |
| } |
| |
| /** |
| * Returns whether the Content-Type header has the given parameter. |
| */ |
| public boolean hasContentTypeParameter(String userAgent, String param) { |
| getContentType(userAgent); |
| if (contentType == null) { |
| return false; |
| } |
| int i = 0; |
| int len = contentType.length(); |
| int plen = param.length(); |
| loop1: while (i < len) { |
| switch (contentType.charAt(i)) { |
| case ' ': |
| case ';': |
| break loop1; |
| } |
| i++; |
| } |
| if (i == len) { |
| contentTypeMediaType = contentType; |
| } else { |
| contentTypeMediaType = contentType.substring(0, i); |
| } |
| loop2: for (;;) { |
| while (i < len && contentType.charAt(i) != ';') { |
| i++; |
| } |
| if (i == len) { |
| return false; |
| } |
| i++; |
| while (i < len && contentType.charAt(i) == ' ') { |
| i++; |
| } |
| if (i >= len - plen - 1) { |
| return false; |
| } |
| for (int j = 0; j < plen; j++) { |
| if (!(contentType.charAt(i++) == param.charAt(j))) { |
| continue loop2; |
| } |
| } |
| if (contentType.charAt(i) == '=') { |
| return true; |
| } |
| } |
| } |
| |
| /** |
| * Extracts the type/subtype and charset parameter from the Content-Type |
| * header. |
| */ |
| protected void extractContentTypeParts(String userAgent) { |
| getContentType(userAgent); |
| if (contentType == null) { |
| return; |
| } |
| int i = 0; |
| int len = contentType.length(); |
| loop1: while (i < len) { |
| switch (contentType.charAt(i)) { |
| case ' ': |
| case ';': |
| break loop1; |
| } |
| i++; |
| } |
| if (i == len) { |
| contentTypeMediaType = contentType; |
| } else { |
| contentTypeMediaType = contentType.substring(0, i); |
| } |
| for (;;) { |
| while (i < len && contentType.charAt(i) != ';') { |
| i++; |
| } |
| if (i == len) { |
| return; |
| } |
| i++; |
| while (i < len && contentType.charAt(i) == ' ') { |
| i++; |
| } |
| if (i >= len - 8) { |
| return; |
| } |
| if (contentType.charAt(i++) == 'c') { |
| if (contentType.charAt(i++) != 'h') continue; |
| if (contentType.charAt(i++) != 'a') continue; |
| if (contentType.charAt(i++) != 'r') continue; |
| if (contentType.charAt(i++) != 's') continue; |
| if (contentType.charAt(i++) != 'e') continue; |
| if (contentType.charAt(i++) != 't') continue; |
| if (contentType.charAt(i++) != '=') continue; |
| int j = i; |
| loop2: while (i < len) { |
| switch (contentType.charAt(i)) { |
| case ' ': |
| case ';': |
| break loop2; |
| } |
| i++; |
| } |
| contentTypeCharset = contentType.substring(j, i); |
| return; |
| } |
| } |
| } |
| |
| /** |
| * Returns the content encoding if available. This is only available |
| * for some protocols. |
| */ |
| public String getContentEncoding(String userAgent) { |
| if (contentEncoding != null) |
| return contentEncoding; |
| |
| if (!hasBeenOpened) { |
| try { |
| openStreamInternal(userAgent, null, null); |
| } catch (IOException ioe) { /* nothing */ } |
| } |
| |
| return contentEncoding; |
| } |
| |
| /** |
| * Returns true if the URL looks well formed and complete. |
| * This does not garuntee that the stream can be opened but |
| * is a good indication that things aren't totally messed up. |
| */ |
| public boolean complete() { |
| try { |
| buildURL(); |
| } catch (MalformedURLException mue) { |
| return false; |
| } |
| return true; |
| } |
| |
| /** |
| * Open the stream and check for common compression types. If |
| * the stream is found to be compressed with a standard |
| * compression type it is automatically decompressed. |
| * @param userAgent The user agent opening the stream (may be null). |
| * @param mimeTypes The expected mime types of the content |
| * in the returned InputStream (mapped to Http accept |
| * header among other possability). The elements of |
| * the iterator must be strings (may be null) |
| */ |
| public InputStream openStream(String userAgent, Iterator mimeTypes) |
| throws IOException { |
| InputStream raw = openStreamInternal(userAgent, mimeTypes, |
| acceptedEncodings.iterator()); |
| if (raw == null) |
| return null; |
| stream = null; |
| |
| return checkGZIP(raw); |
| } |
| |
| /** |
| * Open the stream and returns it. No checks are made to see |
| * if the stream is compressed or encoded in any way. |
| * @param userAgent The user agent opening the stream (may be null). |
| * @param mimeTypes The expected mime types of the content |
| * in the returned InputStream (mapped to Http accept |
| * header among other possability). The elements of |
| * the iterator must be strings (may be null) |
| */ |
| public InputStream openStreamRaw(String userAgent, Iterator mimeTypes) |
| throws IOException { |
| |
| InputStream ret = openStreamInternal(userAgent, mimeTypes, null); |
| stream = null; |
| return ret; |
| } |
| |
| protected InputStream openStreamInternal(String userAgent, |
| Iterator mimeTypes, |
| Iterator encodingTypes) |
| throws IOException { |
| if (stream != null) |
| return stream; |
| |
| hasBeenOpened = true; |
| |
| URL url = null; |
| try { |
| url = buildURL(); |
| } catch (MalformedURLException mue) { |
| throw new IOException |
| ("Unable to make sense of URL for connection"); |
| } |
| |
| if (url == null) |
| return null; |
| |
| URLConnection urlC = url.openConnection(); |
| if (urlC instanceof HttpURLConnection) { |
| if (userAgent != null) |
| urlC.setRequestProperty(HTTP_USER_AGENT_HEADER, userAgent); |
| |
| if (mimeTypes != null) { |
| String acceptHeader = ""; |
| while (mimeTypes.hasNext()) { |
| acceptHeader += mimeTypes.next(); |
| if (mimeTypes.hasNext()) |
| acceptHeader += ","; |
| } |
| urlC.setRequestProperty(HTTP_ACCEPT_HEADER, acceptHeader); |
| } |
| |
| if (encodingTypes != null) { |
| String encodingHeader = ""; |
| while (encodingTypes.hasNext()) { |
| encodingHeader += encodingTypes.next(); |
| if (encodingTypes.hasNext()) |
| encodingHeader += ","; |
| } |
| urlC.setRequestProperty(HTTP_ACCEPT_ENCODING_HEADER, |
| encodingHeader); |
| } |
| |
| contentType = urlC.getContentType(); |
| contentEncoding = urlC.getContentEncoding(); |
| postConnectionURL = urlC.getURL(); |
| } |
| |
| try { |
| return (stream = urlC.getInputStream()); |
| } catch (IOException e) { |
| if (urlC instanceof HttpURLConnection) { |
| // bug 49889: if available, return the error stream |
| // (allow interpretation of content in the HTTP error response) |
| stream = ((HttpURLConnection) urlC).getErrorStream(); |
| if (stream == null) { |
| throw e; |
| } |
| return stream; |
| } else { |
| throw e; |
| } |
| } |
| |
| } |
| |
| /** |
| * Returns the URL up to and include the port number on |
| * the host. Does not include the path or fragment pieces. |
| */ |
| public String getPortStr() { |
| String portStr =""; |
| if (protocol != null) |
| portStr += protocol + ":"; |
| |
| if ((host != null) || (port != -1)) { |
| portStr += "//"; |
| if (host != null) portStr += host; |
| if (port != -1) portStr += ":" + port; |
| } |
| |
| return portStr; |
| } |
| |
| protected boolean sameFile(ParsedURLData other) { |
| if (this == other) return true; |
| |
| // Check if the rest of the two PURLs matche other than |
| // the 'ref' |
| if ((port == other.port) && |
| ((path == other.path) |
| || ((path!=null) && path.equals(other.path))) && |
| ((host == other.host) |
| || ((host!=null) && host.equals(other.host))) && |
| ((protocol == other.protocol) |
| || ((protocol!=null) && protocol.equals(other.protocol)))) |
| return true; |
| |
| return false; |
| } |
| |
| |
| /** |
| * Return a string representation of the data. |
| */ |
| public String toString() { |
| String ret = getPortStr(); |
| if (path != null) |
| ret += path; |
| |
| if (ref != null) |
| ret += "#" + ref; |
| |
| return ret; |
| } |
| |
| /** |
| * Returns the URL that was ultimately used to fetch the resource |
| * represented by the <code>ParsedURL</code>. |
| */ |
| public String getPostConnectionURL() { |
| if (postConnectionURL != null) { |
| if (ref != null) { |
| return postConnectionURL.toString() + '#' + ref; |
| } |
| return postConnectionURL.toString(); |
| } |
| return toString(); |
| } |
| } |
| |