src/whiteboard/org/apache/xerces/tree/Resolver.java - xerces2-j - Git at Google

 /*
  * $Id$
  *
  * The Apache Software License, Version 1.1
  *
  *
  * Copyright (c) 2000 The Apache Software Foundation.  All rights
  * reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  *
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  *
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in
  *    the documentation and/or other materials provided with the
  *    distribution.
  *
  * 3. The end-user documentation included with the redistribution,
  *    if any, must include the following acknowledgment:
  *       "This product includes software developed by the
  *        Apache Software Foundation (http://www.apache.org/)."
  *    Alternately, this acknowledgment may appear in the software itself,
  *    if and wherever such third-party acknowledgments normally appear.
  *
  * 4. The names "Crimson" and "Apache Software Foundation" must
  *    not be used to endorse or promote products derived from this
  *    software without prior written permission. For written
  *    permission, please contact apache@apache.org.
  *
  * 5. Products derived from this software may not be called "Apache",
  *    nor may "Apache" appear in their name, without prior written
  *    permission of the Apache Software Foundation.
  *
  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
  * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
  * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
  * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * ====================================================================
  *
  * This software consists of voluntary contributions made by many
  * individuals on behalf of the Apache Software Foundation and was
  * originally based on software copyright (c) 1999, Sun Microsystems, Inc.,
  * http://www.sun.com.  For more information on the Apache Software
  * Foundation, please see <http://www.apache.org/>.
  */


 package org.apache.xerces.tree;

 import java.io.File;
 import java.io.FileInputStream;
 import java.io.InputStream;
 import java.io.IOException;
 import java.net.URL;
 import java.net.URLConnection;
 import java.net.HttpURLConnection;
 import java.util.Hashtable;

 import org.xml.sax.*;


 /**
  * This entity resolver class provides a number of utilities which can help
  * managment of external parsed entities in XML.  These are commonly used
  * to hold markup declarations that are to be used as part of a Document
  * Type Declaration (DTD), or to hold text marked up with XML.
  *
  * <P> Features include: <UL>
  *
  * <LI> Static factory methods are provided for constructing SAX InputSource
  * objects from Files, URLs, or MIME objects.  This eliminates a class of
  * error-prone coding in applications.
  *
  * <LI> Character encodings for XML documents are correctly supported: <UL>
  *
  *	<LI> The encodings defined in the RFCs for MIME content types
  *	(2046 for general MIME, and 2376 for XML in particular), are
  *	supported, handling <em>charset=...</em> attributes and accepting
  *	content types which are known to be safe for use with XML;
  *
  *	<LI> The character encoding autodetection algorithm identified
  *	in the XML specification is used, and leverages all of
  *	the JDK 1.1 (and later) character encoding support.
  *
  *	<LI> The use of MIME typing may optionally be disabled, forcing the
  *	use of autodetection, to support web servers which don't correctly
  *	report MIME types for XML.  For example, they may report text that
  *	is encoded in EUC-JP as being US-ASCII text, leading to fatal
  *	errors during parsing.
  *
  *	<LI> The InputSource objects returned by this class always
  *	have a <code>java.io.Reader</code> available as the "character
  *	stream" property.
  *
  *	</UL>
  *
  * <LI> Catalog entries can map public identifiers to Java resources or
  * to local URLs.  These are used to reduce network dependencies and loads,
  * and will often be used for external DTD components.  For example, packages
  * shipping DTD files as resources in JAR files can eliminate network traffic
  * when accessing them, and sites may provide local caches of common DTDs.
  * Note that no particular catalog syntax is supported by this class, only
  * the notion of a set of entries.
  *
  * </UL>
  *
  * <P> Subclasses can perform tasks such as supporting new URI schemes for
  * URIs which are not URLs, such as URNs (see RFC 2396) or for accessing
  * MIME entities which are part of a <em>multipart/related</em> group
  * (see RFC 2387).  They may also be used to support particular catalog
  * syntaxes, such as the <a href="http://www.oasis-open.org/html/a401.htm">
  * SGML/Open Catalog (SOCAT)</a> which supports the SGML notion of "Formal
  * Public Identifiers (FPIs).
  *
  * @author David Brownell
  * @author Rajiv Mordani
  * @version $Revision$
  */
 public class Resolver implements EntityResolver
 {
     private boolean		ignoringMIME;

     // table mapping public IDs to (local) URIs
     private Hashtable		id2uri;

     // tables mapping public IDs to resources and classloaders
     private Hashtable		id2resource;
     private Hashtable		id2loader;

     //
     // table of MIME content types (less attributes!) known
     // to be mostly "OK" to use with XML MIME entities.  the
     // idea is to rule out obvious braindamage ("image/jpg")
     // not the subtle stuff ("text/html") that might actually
     // be (or become) safe.
     //
     private static final String types [] = {
 	"application/xml",
 	"text/xml",
 	"text/plain",
 	"text/html",			// commonly mis-inferred
 	"application/x-netcdf",		// this is often illegal XML
 	"content/unknown"
     };

     /** Constructs a resolver. */
     public			Resolver () { }

     /**
      * Returns an input source, using the MIME type information and URL
      * scheme to statically determine the correct character encoding if
      * possible and otherwise autodetecting it.  MIME carefully specifies
      * the character encoding defaults, and how attributes of the content
      * type can change it.  XML further specifies two mandatory encodings
      * (UTF-8 and UTF-16), and includes an XML declaration which can be
      * used to internally label most documents encoded using US-ASCII
      * supersets (such as Shift_JIS, EUC-JP, ISO-2022-*, ISO-8859-*, and
      * more).
      *
      * <P> This method can be used to access XML documents which do not
      * have URIs (such as servlet input streams, or most JavaMail message
      * entities) and to support access methods such as HTTP POST or PUT.
      * (URLs normally return content using the GET method.)
      *
      * <P> <em> The caller should set the system ID in order for relative URIs
      * found in this document to be interpreted correctly.</em> In some cases,
      * a custom resolver will need to be used; for example, documents
      * may be grouped in a single MIME "multipart/related" bundle, and
      * relative URLs would refer to other documents in that bundle.
      *
      * @param contentType The MIME content type for the source for which
      *	an InputSource is desired, such as <em>text/xml;charset=utf-8</em>.
      * @param stream The input byte stream for the input source.
      * @param checkType If true, this verifies that the content type is known
      *	to support XML documents, such as <em>application/xml</em>.
      * @param scheme Unless this is "file", unspecified MIME types
      *	default to US-ASCII.  Files are always autodetected since most
      *	file systems discard character encoding information.
      */
     public static InputSource createInputSource (
 	String		contentType,
 	InputStream	stream,
 	boolean		checkType,
 	String		scheme
     ) throws IOException
     {
         InputSource     retval;
 	String		charset = null;

 	if (contentType != null) {
 	    int		index;

 	    contentType = contentType.toLowerCase ();
 	    index = contentType.indexOf (';');
 	    if (index != -1) {
 		String	attributes;

 		attributes = contentType.substring (index + 1);
 		contentType = contentType.substring (0, index);

 		// use "charset=..." if it's available
 		index = attributes.indexOf ("charset");
 		if (index != -1) {
 		    attributes = attributes.substring (index + 7);
 		    // strip out subsequent attributes
 		    if ((index = attributes.indexOf (';')) != -1)
 			attributes = attributes.substring (0, index);
 		    // find start of value
 		    if ((index = attributes.indexOf ('=')) != -1) {
 			attributes = attributes.substring (index + 1);
 			// strip out rfc822 comments
 			if ((index = attributes.indexOf ('(')) != -1)
 			    attributes = attributes.substring (0, index);
 			// double quotes are optional
 			if ((index = attributes.indexOf ('"')) != -1) {
 			    attributes = attributes.substring (index + 1);
 			    attributes = attributes.substring (0,
 				    attributes.indexOf ('"'));
 			}
 			charset = attributes.trim ();
 			// XXX "\;", "\)" etc were mishandled above
 		    }
 		}
 	    }

 	    //
 	    // Check MIME type.
 	    //
 	    if (checkType) {
 		boolean isOK = false;
 		for (int i = 0; i < types.length; i++)
 		    if (types [i].equals (contentType)) {
 			isOK = true;
 			break;
 		    }
 		if (!isOK)
 		    throw new IOException ("Not XML: " + contentType);
 	    }

 	    //
 	    // "text/*" MIME types have hard-wired character set
 	    // defaults, as specified in the RFCs.  For XML, we
 	    // ignore the system "file.encoding" property since
 	    // autodetection is more correct.
 	    //
 	    if (charset == null) {
 		contentType = contentType.trim ();
 		if (contentType.startsWith ("text/")) {
 		    if (!"file".equalsIgnoreCase (scheme))
 			charset = "US-ASCII";
 		}
 		// "application/*" has no default
 	    }
 	}

 	retval = new InputSource (XmlReader.createReader (stream, charset));
 	retval.setByteStream (stream);
 	retval.setEncoding (charset);
 	return retval;
     }


     /**
      * Creates an input source from a given URI.
      *
      * @param uri the URI (system ID) for the entity
      * @param checkType if true, the MIME content type for the entity
      *	is checked for document type and character set encoding.
      */
     static public InputSource createInputSource (URL uri, boolean checkType)
     throws IOException
     {
 	URLConnection	conn = uri.openConnection ();
 	if (conn instanceof HttpURLConnection) {
 	    int status = ((HttpURLConnection)conn).getResponseCode ();
 	    if ((status >= 400 && status <= 417) ||
 		(status >=500 && status <=505))
 	    {
 	   	throw new IOException ("Error in opening uri " + uri +
 					"status code=" + status);
 	    }
 	}
 	InputSource	retval;

 	if (checkType) {
 	    String	contentType = conn.getContentType ();
 	    retval = createInputSource (contentType, conn.getInputStream (),
 		    false, uri.getProtocol ());
 	} else {
 	    retval = new InputSource (
 		    XmlReader.createReader (conn.getInputStream ()));
 	}
 	retval.setSystemId (conn.getURL ().toString ());
 	return retval;
     }


     /**
      * Creates an input source from a given file, autodetecting
      * the character encoding.
      *
      * @param uri the URI (system ID) for the entity
      */
     static public InputSource createInputSource (File file)
     throws IOException
     {
 	InputSource	retval;
 	String		path;

 	retval = new InputSource (
 		    XmlReader.createReader (new FileInputStream (file)));

 	// On JDK 1.2 and later, simplify this:
 	//	"path = file.toURL ().toString ()".
 	path = file.getAbsolutePath ();
 	if (File.separatorChar != '/')
 	    path = path.replace (File.separatorChar, '/');
 	if (!path.startsWith ("/"))
 	    path = "/" + path;
 	if (!path.endsWith ("/") && file.isDirectory ())
 	    path = path + "/";

 	retval.setSystemId ("file:" + path);
 	return retval;
     }


     /**
      * <b>SAX:</b>
      * Resolve the given entity into an input source.  If the name can't
      * be mapped to a preferred form of the entity, the URI is used.  To
      * resolve the entity, first a local catalog mapping names to URIs is
      * consulted.  If no mapping is found there, a catalog mapping names
      * to java resources is consulted.  Finally, if neither mapping found
      * a copy of the entity, the specified URI is used.
      *
      * <P> When a URI is used, <a href="#createInputSource">
      * createInputSource</a> is used to correctly deduce the character
      * encoding used by this entity.  No MIME type checking is done.
      *
      * @param name Used to find alternate copies of the entity, when
      *	this value is non-null; this is the XML "public ID".
      * @param uri Used when no alternate copy of the entity is found;
      *	this is the XML "system ID", normally a URI.
      */
     public InputSource 	resolveEntity (String name, String uri)
     throws IOException, SAXException
     {
 	InputSource     retval;
 	String		mappedURI = name2uri (name);
 	InputStream	stream;

 	// prefer explicit URI mappings, then bundled resources...
 	if (mappedURI == null && (stream = mapResource (name)) != null) {
 	    uri = "java:resource:" + (String) id2resource.get (name);
 	    retval = new InputSource (XmlReader.createReader (stream));

 	// ...and treat all URIs the same (as URLs for now).
 	} else {
 	    URL			url;
 	    URLConnection	conn;

 	    if (mappedURI != null)
 		uri = mappedURI;
 	    else if (uri == null)
 		return null;

 	    url = new URL (uri);
 	    conn = url.openConnection ();
 	    uri = conn.getURL ().toString ();
 	    // System.out.println ("++ URI: " + url);
 	    if (ignoringMIME)
 		retval = new InputSource (
 			XmlReader.createReader (conn.getInputStream ()));
 	    else {
 		String		contentType = conn.getContentType ();
 		retval = createInputSource (contentType,
 			conn.getInputStream (),
 			false, url.getProtocol ());
 	    }
 	}
 	retval.setSystemId (uri);
 	retval.setPublicId (name);
 	return retval;
     }


     /**
      * Returns true if this resolver is ignoring MIME types in the documents
      * it returns, to work around bugs in how servers have reported the
      * documents' MIME types.
      */
     public boolean isIgnoringMIME ()
 	{ return ignoringMIME; }

     /**
      * Tells the resolver whether to ignore MIME types in the documents it
      * retrieves.  Many web servers incorrectly assign text documents a
      * default character encoding, even when that is incorrect.  For example,
      * all HTTP text documents default to use ISO-8859-1 (used for Western
      * European languages), and other MIME sources default text documents
      * to use US-ASCII (a seven bit encoding).  For XML documents which
      * include text encoding declarations (as most should do), these server
      * bugs can be worked around by ignoring the MIME type entirely.
      */
     public void setIgnoringMIME (boolean value)
 	{ ignoringMIME = value; }


     // maps the public ID to an alternate URI, if one is registered
     private String name2uri (String publicId)
     {
 	if (publicId == null || id2uri == null)
 	    return null;
 	return (String) id2uri.get (publicId);
     }


     /**
      * Registers the given public ID as corresponding to a particular
      * URI, typically a local copy.  This URI will be used in preference
      * to ones provided as system IDs in XML entity declarations.  This
      * mechanism would most typically be used for Document Type Definitions
      * (DTDs), where the public IDs are formally managed and versioned.
      *
      * @param publicId The managed public ID being mapped
      * @param uri The URI of the preferred copy of that entity
      */
     public void registerCatalogEntry (
 	String		publicId,
 	String		uri
     )
     {
 	if (id2uri == null)
 	    id2uri = new Hashtable (17);
 	id2uri.put (publicId, uri);
     }


     // return the resource as a stream
     private InputStream mapResource (String publicId)
     {
 	// System.out.println ("++ PUBLIC: " + publicId);
 	if (publicId == null || id2resource == null)
 	    return null;

 	String		resourceName = (String) id2resource.get (publicId);
 	ClassLoader	loader = null;

 	if (resourceName == null)
 	    return null;
 	// System.out.println ("++ Resource: " + resourceName);

 	if (id2loader != null)
 	    loader = (ClassLoader) id2loader.get (publicId);
 	// System.out.println ("++ Loader: " + loader);
 	if (loader == null)
 	    return ClassLoader.getSystemResourceAsStream (resourceName);
 	return loader.getResourceAsStream (resourceName);
     }

     /**
      * Registers a given public ID as corresponding to a particular Java
      * resource in a given class loader, typically distributed with a
      * software package.  This resource will be preferred over system IDs
      * included in XML documents.  This mechanism should most typically be
      * used for Document Type Definitions (DTDs), where the public IDs are
      * formally managed and versioned.
      *
      * <P> If a mapping to a URI has been provided, that mapping takes
      * precedence over this one.
      *
      * @param publicId The managed public ID being mapped
      * @param resourceName The name of the Java resource
      * @param loader The class loader holding the resource, or null if
      *	it is a system resource.
      */
     public void registerCatalogEntry (
 	String		publicId,
 	String		resourceName,
 	ClassLoader	loader
     )
     {
 	if (id2resource == null)
 	    id2resource = new Hashtable (17);
 	id2resource.put (publicId, resourceName);

 	if (loader != null) {
 	    if (id2loader == null)
 		id2loader = new Hashtable (17);
 	    id2loader.put (publicId, loader);
 	}
     }
 }
	/*
	* $Id$
	*
	* The Apache Software License, Version 1.1
	*
	*
	* Copyright (c) 2000 The Apache Software Foundation. All rights
	* reserved.
	*
	* Redistribution and use in source and binary forms, with or without
	* modification, are permitted provided that the following conditions
	* are met:
	*
	* 1. Redistributions of source code must retain the above copyright
	* notice, this list of conditions and the following disclaimer.
	*
	* 2. Redistributions in binary form must reproduce the above copyright
	* notice, this list of conditions and the following disclaimer in
	* the documentation and/or other materials provided with the
	* distribution.
	*
	* 3. The end-user documentation included with the redistribution,
	* if any, must include the following acknowledgment:
	* "This product includes software developed by the
	* Apache Software Foundation (http://www.apache.org/)."
	* Alternately, this acknowledgment may appear in the software itself,
	* if and wherever such third-party acknowledgments normally appear.
	*
	* 4. The names "Crimson" and "Apache Software Foundation" must
	* not be used to endorse or promote products derived from this
	* software without prior written permission. For written
	* permission, please contact apache@apache.org.
	*
	* 5. Products derived from this software may not be called "Apache",
	* nor may "Apache" appear in their name, without prior written
	* permission of the Apache Software Foundation.
	*
	* THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
	* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
	* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
	* DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
	* ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
	* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
	* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
	* USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
	* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
	* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
	* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	* SUCH DAMAGE.
	* ====================================================================
	*
	* This software consists of voluntary contributions made by many
	* individuals on behalf of the Apache Software Foundation and was
	* originally based on software copyright (c) 1999, Sun Microsystems, Inc.,
	* http://www.sun.com. For more information on the Apache Software
	* Foundation, please see <http://www.apache.org/>.
	*/


	package org.apache.xerces.tree;

	import java.io.File;
	import java.io.FileInputStream;
	import java.io.InputStream;
	import java.io.IOException;
	import java.net.URL;
	import java.net.URLConnection;
	import java.net.HttpURLConnection;
	import java.util.Hashtable;

	import org.xml.sax.*;


	/**
	* This entity resolver class provides a number of utilities which can help
	* managment of external parsed entities in XML. These are commonly used
	* to hold markup declarations that are to be used as part of a Document
	* Type Declaration (DTD), or to hold text marked up with XML.
	*
	* <P> Features include: <UL>
	*
	* <LI> Static factory methods are provided for constructing SAX InputSource
	* objects from Files, URLs, or MIME objects. This eliminates a class of
	* error-prone coding in applications.
	*
	* <LI> Character encodings for XML documents are correctly supported: <UL>
	*
	* <LI> The encodings defined in the RFCs for MIME content types
	* (2046 for general MIME, and 2376 for XML in particular), are
	* supported, handling <em>charset=...</em> attributes and accepting
	* content types which are known to be safe for use with XML;
	*
	* <LI> The character encoding autodetection algorithm identified
	* in the XML specification is used, and leverages all of
	* the JDK 1.1 (and later) character encoding support.
	*
	* <LI> The use of MIME typing may optionally be disabled, forcing the
	* use of autodetection, to support web servers which don't correctly
	* report MIME types for XML. For example, they may report text that
	* is encoded in EUC-JP as being US-ASCII text, leading to fatal
	* errors during parsing.
	*
	* <LI> The InputSource objects returned by this class always
	* have a <code>java.io.Reader</code> available as the "character
	* stream" property.
	*
	* </UL>
	*
	* <LI> Catalog entries can map public identifiers to Java resources or
	* to local URLs. These are used to reduce network dependencies and loads,
	* and will often be used for external DTD components. For example, packages
	* shipping DTD files as resources in JAR files can eliminate network traffic
	* when accessing them, and sites may provide local caches of common DTDs.
	* Note that no particular catalog syntax is supported by this class, only
	* the notion of a set of entries.
	*
	* </UL>
	*
	* <P> Subclasses can perform tasks such as supporting new URI schemes for
	* URIs which are not URLs, such as URNs (see RFC 2396) or for accessing
	* MIME entities which are part of a <em>multipart/related</em> group
	* (see RFC 2387). They may also be used to support particular catalog
	* syntaxes, such as the <a href="http://www.oasis-open.org/html/a401.htm">
	* SGML/Open Catalog (SOCAT)</a> which supports the SGML notion of "Formal
	* Public Identifiers (FPIs).
	*
	* @author David Brownell
	* @author Rajiv Mordani
	* @version $Revision$
	*/
	public class Resolver implements EntityResolver
	{
	private boolean ignoringMIME;

	// table mapping public IDs to (local) URIs
	private Hashtable id2uri;

	// tables mapping public IDs to resources and classloaders
	private Hashtable id2resource;
	private Hashtable id2loader;

	//
	// table of MIME content types (less attributes!) known
	// to be mostly "OK" to use with XML MIME entities. the
	// idea is to rule out obvious braindamage ("image/jpg")
	// not the subtle stuff ("text/html") that might actually
	// be (or become) safe.
	//
	private static final String types [] = {
	"application/xml",
	"text/xml",
	"text/plain",
	"text/html", // commonly mis-inferred
	"application/x-netcdf", // this is often illegal XML
	"content/unknown"
	};

	/** Constructs a resolver. */
	public Resolver () { }

	/**
	* Returns an input source, using the MIME type information and URL
	* scheme to statically determine the correct character encoding if
	* possible and otherwise autodetecting it. MIME carefully specifies
	* the character encoding defaults, and how attributes of the content
	* type can change it. XML further specifies two mandatory encodings
	* (UTF-8 and UTF-16), and includes an XML declaration which can be
	* used to internally label most documents encoded using US-ASCII
	* supersets (such as Shift_JIS, EUC-JP, ISO-2022-, ISO-8859-, and
	* more).
	*
	* <P> This method can be used to access XML documents which do not
	* have URIs (such as servlet input streams, or most JavaMail message
	* entities) and to support access methods such as HTTP POST or PUT.
	* (URLs normally return content using the GET method.)
	*
	* <P> <em> The caller should set the system ID in order for relative URIs
	* found in this document to be interpreted correctly.</em> In some cases,
	* a custom resolver will need to be used; for example, documents
	* may be grouped in a single MIME "multipart/related" bundle, and
	* relative URLs would refer to other documents in that bundle.
	*
	* @param contentType The MIME content type for the source for which
	* an InputSource is desired, such as <em>text/xml;charset=utf-8</em>.
	* @param stream The input byte stream for the input source.
	* @param checkType If true, this verifies that the content type is known
	* to support XML documents, such as <em>application/xml</em>.
	* @param scheme Unless this is "file", unspecified MIME types
	* default to US-ASCII. Files are always autodetected since most
	* file systems discard character encoding information.
	*/
	public static InputSource createInputSource (
	String contentType,
	InputStream stream,
	boolean checkType,
	String scheme
	) throws IOException
	{
	InputSource retval;
	String charset = null;

	if (contentType != null) {
	int index;

	contentType = contentType.toLowerCase ();
	index = contentType.indexOf (';');
	if (index != -1) {
	String attributes;

	attributes = contentType.substring (index + 1);
	contentType = contentType.substring (0, index);

	// use "charset=..." if it's available
	index = attributes.indexOf ("charset");
	if (index != -1) {
	attributes = attributes.substring (index + 7);
	// strip out subsequent attributes
	if ((index = attributes.indexOf (';')) != -1)
	attributes = attributes.substring (0, index);
	// find start of value
	if ((index = attributes.indexOf ('=')) != -1) {
	attributes = attributes.substring (index + 1);
	// strip out rfc822 comments
	if ((index = attributes.indexOf ('(')) != -1)
	attributes = attributes.substring (0, index);
	// double quotes are optional
	if ((index = attributes.indexOf ('"')) != -1) {
	attributes = attributes.substring (index + 1);
	attributes = attributes.substring (0,
	attributes.indexOf ('"'));
	}
	charset = attributes.trim ();
	// XXX "\;", "\)" etc were mishandled above
	}
	}
	}

	//
	// Check MIME type.
	//
	if (checkType) {
	boolean isOK = false;
	for (int i = 0; i < types.length; i++)
	if (types [i].equals (contentType)) {
	isOK = true;
	break;
	}
	if (!isOK)
	throw new IOException ("Not XML: " + contentType);
	}

	//
	// "text/*" MIME types have hard-wired character set
	// defaults, as specified in the RFCs. For XML, we
	// ignore the system "file.encoding" property since
	// autodetection is more correct.
	//
	if (charset == null) {
	contentType = contentType.trim ();
	if (contentType.startsWith ("text/")) {
	if (!"file".equalsIgnoreCase (scheme))
	charset = "US-ASCII";
	}
	// "application/*" has no default
	}
	}

	retval = new InputSource (XmlReader.createReader (stream, charset));
	retval.setByteStream (stream);
	retval.setEncoding (charset);
	return retval;
	}


	/**
	* Creates an input source from a given URI.
	*
	* @param uri the URI (system ID) for the entity
	* @param checkType if true, the MIME content type for the entity
	* is checked for document type and character set encoding.
	*/
	static public InputSource createInputSource (URL uri, boolean checkType)
	throws IOException
	{
	URLConnection conn = uri.openConnection ();
	if (conn instanceof HttpURLConnection) {
	int status = ((HttpURLConnection)conn).getResponseCode ();
	if ((status >= 400 && status <= 417) \|\|
	(status >=500 && status <=505))
	{
	throw new IOException ("Error in opening uri " + uri +
	"status code=" + status);
	}
	}
	InputSource retval;

	if (checkType) {
	String contentType = conn.getContentType ();
	retval = createInputSource (contentType, conn.getInputStream (),
	false, uri.getProtocol ());
	} else {
	retval = new InputSource (
	XmlReader.createReader (conn.getInputStream ()));
	}
	retval.setSystemId (conn.getURL ().toString ());
	return retval;
	}


	/**
	* Creates an input source from a given file, autodetecting
	* the character encoding.
	*
	* @param uri the URI (system ID) for the entity
	*/
	static public InputSource createInputSource (File file)
	throws IOException
	{
	InputSource retval;
	String path;

	retval = new InputSource (
	XmlReader.createReader (new FileInputStream (file)));

	// On JDK 1.2 and later, simplify this:
	// "path = file.toURL ().toString ()".
	path = file.getAbsolutePath ();
	if (File.separatorChar != '/')
	path = path.replace (File.separatorChar, '/');
	if (!path.startsWith ("/"))
	path = "/" + path;
	if (!path.endsWith ("/") && file.isDirectory ())
	path = path + "/";

	retval.setSystemId ("file:" + path);
	return retval;
	}


	/**
	* <b>SAX:</b>
	* Resolve the given entity into an input source. If the name can't
	* be mapped to a preferred form of the entity, the URI is used. To
	* resolve the entity, first a local catalog mapping names to URIs is
	* consulted. If no mapping is found there, a catalog mapping names
	* to java resources is consulted. Finally, if neither mapping found
	* a copy of the entity, the specified URI is used.
	*
	* <P> When a URI is used, <a href="#createInputSource">
	* createInputSource</a> is used to correctly deduce the character
	* encoding used by this entity. No MIME type checking is done.
	*
	* @param name Used to find alternate copies of the entity, when
	* this value is non-null; this is the XML "public ID".
	* @param uri Used when no alternate copy of the entity is found;
	* this is the XML "system ID", normally a URI.
	*/
	public InputSource resolveEntity (String name, String uri)
	throws IOException, SAXException
	{
	InputSource retval;
	String mappedURI = name2uri (name);
	InputStream stream;

	// prefer explicit URI mappings, then bundled resources...
	if (mappedURI == null && (stream = mapResource (name)) != null) {
	uri = "java:resource:" + (String) id2resource.get (name);
	retval = new InputSource (XmlReader.createReader (stream));

	// ...and treat all URIs the same (as URLs for now).
	} else {
	URL url;
	URLConnection conn;

	if (mappedURI != null)
	uri = mappedURI;
	else if (uri == null)
	return null;

	url = new URL (uri);
	conn = url.openConnection ();
	uri = conn.getURL ().toString ();
	// System.out.println ("++ URI: " + url);
	if (ignoringMIME)
	retval = new InputSource (
	XmlReader.createReader (conn.getInputStream ()));
	else {
	String contentType = conn.getContentType ();
	retval = createInputSource (contentType,
	conn.getInputStream (),
	false, url.getProtocol ());
	}
	}
	retval.setSystemId (uri);
	retval.setPublicId (name);
	return retval;
	}


	/**
	* Returns true if this resolver is ignoring MIME types in the documents
	* it returns, to work around bugs in how servers have reported the
	* documents' MIME types.
	*/
	public boolean isIgnoringMIME ()
	{ return ignoringMIME; }

	/**
	* Tells the resolver whether to ignore MIME types in the documents it
	* retrieves. Many web servers incorrectly assign text documents a
	* default character encoding, even when that is incorrect. For example,
	* all HTTP text documents default to use ISO-8859-1 (used for Western
	* European languages), and other MIME sources default text documents
	* to use US-ASCII (a seven bit encoding). For XML documents which
	* include text encoding declarations (as most should do), these server
	* bugs can be worked around by ignoring the MIME type entirely.
	*/
	public void setIgnoringMIME (boolean value)
	{ ignoringMIME = value; }


	// maps the public ID to an alternate URI, if one is registered
	private String name2uri (String publicId)
	{
	if (publicId == null \|\| id2uri == null)
	return null;
	return (String) id2uri.get (publicId);
	}


	/**
	* Registers the given public ID as corresponding to a particular
	* URI, typically a local copy. This URI will be used in preference
	* to ones provided as system IDs in XML entity declarations. This
	* mechanism would most typically be used for Document Type Definitions
	* (DTDs), where the public IDs are formally managed and versioned.
	*
	* @param publicId The managed public ID being mapped
	* @param uri The URI of the preferred copy of that entity
	*/
	public void registerCatalogEntry (
	String publicId,
	String uri
	)
	{
	if (id2uri == null)
	id2uri = new Hashtable (17);
	id2uri.put (publicId, uri);
	}


	// return the resource as a stream
	private InputStream mapResource (String publicId)
	{
	// System.out.println ("++ PUBLIC: " + publicId);
	if (publicId == null \|\| id2resource == null)
	return null;

	String resourceName = (String) id2resource.get (publicId);
	ClassLoader loader = null;

	if (resourceName == null)
	return null;
	// System.out.println ("++ Resource: " + resourceName);

	if (id2loader != null)
	loader = (ClassLoader) id2loader.get (publicId);
	// System.out.println ("++ Loader: " + loader);
	if (loader == null)
	return ClassLoader.getSystemResourceAsStream (resourceName);
	return loader.getResourceAsStream (resourceName);
	}

	/**
	* Registers a given public ID as corresponding to a particular Java
	* resource in a given class loader, typically distributed with a
	* software package. This resource will be preferred over system IDs
	* included in XML documents. This mechanism should most typically be
	* used for Document Type Definitions (DTDs), where the public IDs are
	* formally managed and versioned.
	*
	* <P> If a mapping to a URI has been provided, that mapping takes
	* precedence over this one.
	*
	* @param publicId The managed public ID being mapped
	* @param resourceName The name of the Java resource
	* @param loader The class loader holding the resource, or null if
	* it is a system resource.
	*/
	public void registerCatalogEntry (
	String publicId,
	String resourceName,
	ClassLoader loader
	)
	{
	if (id2resource == null)
	id2resource = new Hashtable (17);
	id2resource.put (publicId, resourceName);

	if (loader != null) {
	if (id2loader == null)
	id2loader = new Hashtable (17);
	id2loader.put (publicId, loader);
	}
	}
	}