src/whiteboard/org/apache/xerces/tree/XmlReader.java - xerces2-j - Git at Google

 /*
  * $Id$
  *
  * The Apache Software License, Version 1.1
  *
  *
  * Copyright (c) 2000 The Apache Software Foundation.  All rights
  * reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  *
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  *
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in
  *    the documentation and/or other materials provided with the
  *    distribution.
  *
  * 3. The end-user documentation included with the redistribution,
  *    if any, must include the following acknowledgment:
  *       "This product includes software developed by the
  *        Apache Software Foundation (http://www.apache.org/)."
  *    Alternately, this acknowledgment may appear in the software itself,
  *    if and wherever such third-party acknowledgments normally appear.
  *
  * 4. The names "Crimson" and "Apache Software Foundation" must
  *    not be used to endorse or promote products derived from this
  *    software without prior written permission. For written
  *    permission, please contact apache@apache.org.
  *
  * 5. Products derived from this software may not be called "Apache",
  *    nor may "Apache" appear in their name, without prior written
  *    permission of the Apache Software Foundation.
  *
  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
  * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
  * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
  * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * ====================================================================
  *
  * This software consists of voluntary contributions made by many
  * individuals on behalf of the Apache Software Foundation and was
  * originally based on software copyright (c) 1999, Sun Microsystems, Inc.,
  * http://www.sun.com.  For more information on the Apache Software
  * Foundation, please see <http://www.apache.org/>.
  */

 package org.apache.xerces.tree;

 import java.io.*;
 import java.util.Hashtable;


 // NOTE:  Add I18N support to this class when JDK gets the ability to
 // defer selection of locale for exception messages ... use the same
 // technique for both.


 /**
  * This handles several XML-related tasks that normal java.io Readers
  * don't support, inluding use of IETF standard encoding names and
  * automatic detection of most XML encodings.  The former is needed
  * for interoperability; the latter is needed to conform with the XML
  * spec.  This class also optimizes reading some common encodings by
  * providing low-overhead unsynchronized Reader support.
  *
  * <P> Note that the autodetection facility should be used only on
  * data streams which have an unknown character encoding.  For example,
  * it should never be used on MIME text/xml entities.
  *
  * <P> Note that XML processors are only required to support UTF-8 and
  * UTF-16 character encodings.  Autodetection permits the underlying Java
  * implementation to provide support for many other encodings, such as
  * US-ASCII, ISO-8859-5, Shift_JIS, EUC-JP, and ISO-2022-JP.
  *
  * @author David Brownell
  * @version $Revision$
  */
 // package private
 final class XmlReader extends Reader
 {
     private static final int MAXPUSHBACK = 512;

     private Reader	in;
     private String	assignedEncoding;
     private boolean	closed;

     //
     // This class always delegates I/O to a reader, which gets
     // its data from the very beginning of the XML text.  It needs
     // to use a pushback stream since (a) autodetection can read
     // partial UTF-8 characters which need to be fully processed,
     // (b) the "Unicode" readers swallow characters that they think
     // are byte order marks, so tests fail if they don't see the
     // real byte order mark.
     //
     // It's got do this efficiently:  character I/O is solidly on the
     // critical path.  (So keep buffer length over 2 Kbytes to avoid
     // excess buffering. Many URL handlers stuff a BufferedInputStream
     // between here and the real data source, and larger buffers keep
     // that from slowing you down.)
     //

     /**
      * Constructs the reader from an input stream, autodetecting
      * the encoding to use according to the heuristic specified
      * in the XML 1.0 recommendation.
      *
      * @param in the input stream from which the reader is constructed
      * @exception IOException on error, such as unrecognized encoding
      */
     public static Reader createReader (InputStream in) throws IOException
     {
 	return new XmlReader (in);
     }

     /**
      * Creates a reader supporting the given encoding, mapping
      * from standard encoding names to ones that understood by
      * Java where necessary.
      *
      * @param in the input stream from which the reader is constructed
      * @param encoding the IETF standard name of the encoding to use;
      *	if null, autodetection is used.
      * @exception IOException on error, including unrecognized encoding
      */
     public static Reader createReader (InputStream in, String encoding)
     throws IOException
     {
 	if (encoding == null)
 	    return new XmlReader (in);
 	if ("UTF-8".equalsIgnoreCase (encoding)
 		|| "UTF8".equalsIgnoreCase (encoding))
 	    return new Utf8Reader (in);
 	if ("US-ASCII".equalsIgnoreCase (encoding)
 		|| "ASCII".equalsIgnoreCase (encoding))
 	    return new AsciiReader (in);
 	if ("ISO-8859-1".equalsIgnoreCase (encoding)
 		// plus numerous aliases ...
 		)
 	    return new Iso8859_1Reader (in);

 	//
 	// What we really want is an administerable resource mapping
 	// encoding names/aliases to classnames.  For example a property
 	// file resource, "readers/mapping.props", holding and a set
 	// of readers in that (sub)package... defaulting to this call
 	// only if no better choice is available.
 	//
 	return new InputStreamReader (in, std2java (encoding));
     }

     //
     // JDK doesn't know all of the standard encoding names, and
     // in particular none of the EBCDIC ones IANA defines (and
     // which IBM encourages).
     //
     static private final Hashtable charsets = new Hashtable (31);

     static {
 	charsets.put ("UTF-16", "Unicode");
 	charsets.put ("ISO-10646-UCS-2", "Unicode");

 	// NOTE: no support for ISO-10646-UCS-4 yet.

 	charsets.put ("EBCDIC-CP-US", "cp037");
 	charsets.put ("EBCDIC-CP-CA", "cp037");
 	charsets.put ("EBCDIC-CP-NL", "cp037");
 	charsets.put ("EBCDIC-CP-WT", "cp037");

 	charsets.put ("EBCDIC-CP-DK", "cp277");
 	charsets.put ("EBCDIC-CP-NO", "cp277");
 	charsets.put ("EBCDIC-CP-FI", "cp278");
 	charsets.put ("EBCDIC-CP-SE", "cp278");

 	charsets.put ("EBCDIC-CP-IT", "cp280");
 	charsets.put ("EBCDIC-CP-ES", "cp284");
 	charsets.put ("EBCDIC-CP-GB", "cp285");
 	charsets.put ("EBCDIC-CP-FR", "cp297");

 	charsets.put ("EBCDIC-CP-AR1", "cp420");
 	charsets.put ("EBCDIC-CP-HE", "cp424");
 	charsets.put ("EBCDIC-CP-BE", "cp500");
 	charsets.put ("EBCDIC-CP-CH", "cp500");

 	charsets.put ("EBCDIC-CP-ROECE", "cp870");
 	charsets.put ("EBCDIC-CP-YU", "cp870");
 	charsets.put ("EBCDIC-CP-IS", "cp871");
 	charsets.put ("EBCDIC-CP-AR2", "cp918");

 	// IANA also defines two that JDK 1.2 doesn't handle:
 	//	EBCDIC-CP-GR		--> CP423
 	//	EBCDIC-CP-TR		--> CP905
     }

     // returns an encoding name supported by JDK >= 1.1.6
     // for some cases required by the XML spec
     private static String std2java (String encoding)
     {
 	String temp = encoding.toUpperCase ();
 	temp = (String) charsets.get (temp);
 	return temp != null ? temp : encoding;
     }

     /** Returns the standard name of the encoding in use */
     public String getEncoding ()
     {
 	return assignedEncoding;
     }

     private XmlReader (InputStream stream) throws IOException
     {
 	super (stream);

 	PushbackInputStream	pb;
         byte			buf [];
 	int			len;

 	/*if (stream instanceof PushbackInputStream)
 	    pb = (PushbackInputStream) stream;
 	else*/
 	/**
 	 * Commented out the above code to make sure it works when the
 	 * document is accessed using http. URL connection in the code uses
 	 * a PushbackInputStream with size 7 and when we try to push back
 	 * MAX which default value is set to 512 we get and exception. So
 	 * that's why we need to wrap the stream irrespective of what type
 	 * of stream we start off with.
 	 */
 	pb = new PushbackInputStream (stream, MAXPUSHBACK);

         //
         // See if we can figure out the character encoding used
         // in this file by peeking at the first few bytes.
         //
 	buf = new byte [4];
 	len = pb.read (buf);
 	if (len > 0)
 	    pb.unread (buf, 0, len);

 	if (len == 4) switch (buf [0] & 0x0ff) {
             case 0:
               // 00 3c 00 3f == illegal UTF-16 big-endian
               if (buf [1] == 0x3c && buf [2] == 0x00 && buf [3] == 0x3f) {
 		  setEncoding (pb, "UnicodeBig");
                   return;
               }
 	      // else it's probably UCS-4
 	      break;

             case '<':      // 0x3c: the most common cases!
               switch (buf [1] & 0x0ff) {
                 // First character is '<'; could be XML without
 		// an XML directive such as "<hello>", "<!-- ...",
 		// and so on.
                 default:
                   break;

                 // 3c 00 3f 00 == illegal UTF-16 little endian
                 case 0x00:
                   if (buf [2] == 0x3f && buf [3] == 0x00) {
 		      setEncoding (pb, "UnicodeLittle");
 		      return;
                   }
 		  // else probably UCS-4
 		  break;

                 // 3c 3f 78 6d == ASCII and supersets '<?xm'
                 case '?':
                   if (buf [2] != 'x' || buf [3] != 'm')
 		      break;
 		  //
 		  // One of several encodings could be used:
                   // Shift-JIS, ASCII, UTF-8, ISO-8859-*, etc
 		  //
 		  useEncodingDecl (pb, "UTF8");
                   return;
               }
 	      break;

             // 4c 6f a7 94 ... some EBCDIC code page
             case 0x4c:
               if (buf [1] == 0x6f
 		    && (0x0ff & buf [2]) == 0x0a7
 		    && (0x0ff & buf [3]) == 0x094) {
 		  useEncodingDecl (pb, "CP037");
 		  return;
 	      }
 	      // whoops, treat as UTF-8
 	      break;

             // UTF-16 big-endian
             case 0xfe:
               if ((buf [1] & 0x0ff) != 0xff)
                   break;
 	      setEncoding (pb, "UTF-16");
               return;

             // UTF-16 little-endian
             case 0xff:
               if ((buf [1] & 0x0ff) != 0xfe)
                   break;
 	      setEncoding (pb, "UTF-16");
 	      return;

             // default ... no XML declaration
             default:
               break;
         }

 	//
         // If all else fails, assume XML without a declaration, and
         // using UTF-8 encoding.
 	//
 	setEncoding (pb, "UTF-8");
     }

     /*
      * Read the encoding decl on the stream, knowing that it should
      * be readable using the specified encoding (basically, ASCII or
      * EBCDIC).  The body of the document may use a wider range of
      * characters than the XML/Text decl itself, so we switch to use
      * the specified encoding as soon as we can.  (ASCII is a subset
      * of UTF-8, ISO-8859-*, ISO-2022-JP, EUC-JP, and more; EBCDIC
      * has a variety of "code pages" that have these characters as
      * a common subset.)
      */
     private void useEncodingDecl (PushbackInputStream pb, String encoding)
     throws IOException
     {
 	byte			buffer [] = new byte [MAXPUSHBACK];
 	int			len;
 	Reader			r;
 	int			c;

 	//
 	// Buffer up a bunch of input, and set up to read it in
 	// the specified encoding ... we can skip the first four
 	// bytes since we know that "<?xm" was read to determine
 	// what encoding to use!
 	//
 	len = pb.read (buffer, 0, buffer.length);
 	pb.unread (buffer, 0, len);
 	r = new InputStreamReader (
 		new ByteArrayInputStream (buffer, 4, len),
 		encoding);

 	//
 	// Next must be "l" (and whitespace) else we conclude
 	// error and choose UTF-8.
 	//
 	if ((c = r.read ()) != 'l') {
 	    setEncoding (pb, "UTF-8");
 	    return;
 	}

 	//
 	// Then, we'll skip any
 	// 	S version="..." 	[or single quotes]
 	// bit and get any subsequent
 	// 	S encoding="..." 	[or single quotes]
 	//
 	// We put an arbitrary size limit on how far we read; lots
 	// of space will break this algorithm.
 	//
 	StringBuffer	buf = new StringBuffer ();
 	StringBuffer	keyBuf = null;
 	String		key = null;
 	boolean		sawEq = false;
 	char		quoteChar = 0;
 	boolean		sawQuestion = false;

     XmlDecl:
 	for (int i = 0; i < MAXPUSHBACK - 5; ++i) {
 	    if ((c = r.read ()) == -1)
 		break;

 	    // ignore whitespace before/between "key = 'value'"
 	    if (c == ' ' || c == '\t' || c == '\n' || c == '\r')
 		continue;

 	    // ... but require at least a little!
 	    if (i == 0)
 		break;

 	    // terminate the loop ASAP
 	    if (c == '?')
 		sawQuestion = true;
 	    else if (sawQuestion) {
 		if (c == '>')
 		    break;
 		sawQuestion = false;
 	    }

 	    // did we get the "key =" bit yet?
 	    if (key == null || !sawEq) {
 		if (keyBuf == null) {
 		    if (Character.isWhitespace ((char) c))
 			continue;
 		    keyBuf = buf;
 		    buf.setLength (0);
 		    buf.append ((char)c);
 		    sawEq = false;
 		} else if (Character.isWhitespace ((char) c)) {
 		    key = keyBuf.toString ();
 		} else if (c == '=') {
 		    if (key == null)
 			key = keyBuf.toString ();
 		    sawEq = true;
 		    keyBuf = null;
 		    quoteChar = 0;
 		} else
 		    keyBuf.append ((char)c);
 		continue;
 	    }

 	    // space before quoted value
 	    if (Character.isWhitespace ((char) c))
 		continue;
 	    if (c == '"' || c == '\'') {
 		if (quoteChar == 0) {
 		    quoteChar = (char) c;
 		    buf.setLength (0);
 		    continue;
 		} else if (c == quoteChar) {
 		    if ("encoding".equals (key)) {
 			assignedEncoding = buf.toString ();

 			// [81] Encname ::= [A-Za-z] ([A-Za-z0-9._]|'-')*
 			for (i = 0; i < assignedEncoding.length(); i++) {
 			    c = assignedEncoding.charAt (i);
 			    if ((c >= 'A' && c <= 'Z')
 				    || (c >= 'a' && c <= 'z'))
 				continue;
 			    if (i == 0)
 				break XmlDecl;
 			    if (i > 0 && (c == '-'
 				    || (c >= '0' && c <= '9')
 				    || c == '.' || c == '_'))
 				continue;
 			    // map illegal names to UTF-8 default
 			    break XmlDecl;
 			}

 			setEncoding (pb, assignedEncoding);
 			return;

 		    } else {
 			key = null;
 			continue;
 		    }
 		}
 	    }
 	    buf.append ((char) c);
 	}

 	setEncoding (pb, "UTF-8");
     }

     private void setEncoding (InputStream stream, String encoding)
     throws IOException
     {
 	assignedEncoding = encoding;
 	in = createReader (stream, encoding);
     }

     /**
      * Reads the number of characters read into the buffer, or -1 on EOF.
      */
     public int read (char buf [], int off, int len) throws IOException
     {
 	int	val;

 	if (closed)
 	    return -1;		// throw new IOException ("closed");
 	val = in.read (buf, off, len);
 	if (val == -1)
 	    close ();
 	return val;
     }

     /**
      * Reads a single character.
      */
     public int read () throws IOException
     {
 	int	val;

 	if (closed)
 	    throw new IOException ("closed");
 	val = in.read ();
 	if (val == -1)
 	    close ();
 	return val;
     }

     /**
      * Returns true iff the reader supports mark/reset.
      */
     public boolean markSupported ()
     {
 	return in == null ? false : in.markSupported ();
     }

     /**
      * Sets a mark allowing a limited number of characters to
      * be "peeked", by reading and then resetting.
      * @param value how many characters may be "peeked".
      */
     public void mark (int value) throws IOException
     {
 	if (in != null) in.mark (value);
     }

     /**
      * Resets the current position to the last marked position.
      */
     public void reset () throws IOException
     {
 	if (in != null) in.reset ();
     }

     /**
      * Skips a specified number of characters.
      */
     public long skip (long value) throws IOException
     {
 	return in == null ? 0 : in.skip (value);
     }

     /**
      * Returns true iff input characters are known to be ready.
      */
     public boolean ready () throws IOException
     {
 	return in == null ? false : in.ready ();
     }

     /**
      * Closes the reader.
      */
     public void close () throws IOException
     {
 	if (closed)
 	    return;
 	in.close ();
 	in = null;
 	closed = true;
     }

     //
     // Delegating to a converter module will always be slower than
     // direct conversion.  Use a similar approach for any other
     // readers that need to be particularly fast; only block I/O
     // speed matters to this package.  For UTF-16, separate readers
     // for big and little endian streams make a difference, too;
     // fewer conditionals in the critical path!
     //
     static abstract class BaseReader extends Reader
     {
 	protected InputStream	instream;
 	protected byte		buffer [];
 	protected int		start, finish;

 	BaseReader (InputStream stream)
 	{
 	    super (stream);

 	    instream = stream;
 	    buffer = new byte [8192];
 	}

 	public boolean ready () throws IOException
 	{
 	    return instream == null
 		|| (finish - start) > 0
 		||  instream.available () != 0;
 	}

 	// caller shouldn't read again
 	public void close () throws IOException
 	{
 	    if (instream != null) {
 		instream.close ();
 		start = finish = 0;
 		buffer = null;
 		instream = null;
 	    }
 	}
     }

     //
     // We want this reader, to make the default encoding be as fast
     // as we can make it.  JDK's "UTF8" (not "UTF-8" till JDK 1.2)
     // InputStreamReader works, but 20+% slower speed isn't OK for
     // the default/primary encoding.
     //
     static final class Utf8Reader extends BaseReader
     {
 	// 2nd half of UTF-8 surrogate pair
 	private char		nextChar;

 	Utf8Reader (InputStream stream)
 	{
 	    super (stream);
 	}

 	public int read (char buf [], int offset, int len) throws IOException
 	{
 	    int i = 0, c = 0;

 	    if (len <= 0)
 		return 0;

 	    // avoid many runtime bounds checks ... a good optimizer
             // (static or JIT) will now remove checks from the loop.
             if ((offset + len) > buf.length || offset < 0)
                 throw new ArrayIndexOutOfBoundsException ();

 	    // Consume remaining half of any surrogate pair immediately
 	    if (nextChar != 0) {
 		buf [offset + i++] = nextChar;
 		nextChar = 0;
 	    }

 	    while (i < len) {
 		// stop or read data if needed
 		if (finish <= start) {
 		    if (instream == null) {
 			c = -1;
 			break;
 		    }
 		    start = 0;
 		    finish = instream.read (buffer, 0, buffer.length);
 		    if (finish <= 0) {
 			this.close ();
 			c = -1;
 			break;
 		    }
 		}

 		//
 		// RFC 2279 describes UTF-8; there are six encodings.
 		// Each encoding takes a fixed number of characters
 		// (1-6 bytes) and is flagged by a bit pattern in the
 		// first byte.  The five and six byte-per-character
 		// encodings address characters which are disallowed
 		// in XML documents, as do some four byte ones.
 		//

 		//
 		// Single byte == ASCII.  Common; optimize.
 		//
 		c = buffer [start] & 0x0ff;
 		if ((c & 0x80) == 0x00) {
 		    // 0x0000 <= c <= 0x007f
 		    start++;
 		    buf [offset + i++] = (char) c;
 		    continue;
 		}

 		//
 		// Multibyte chars -- check offsets optimistically,
 		// ditto the "10xx xxxx" format for subsequent bytes
 		//
 		int		off = start;

 		try {
 		    // 2 bytes
 		    if ((buffer [off] & 0x0E0) == 0x0C0) {
 			c  = (buffer [off++] & 0x1f) << 6;
 			c +=  buffer [off++] & 0x3f;

 			// 0x0080 <= c <= 0x07ff

 		    // 3 bytes
 		    } else if ((buffer [off] & 0x0F0) == 0x0E0) {
 			c  = (buffer [off++] & 0x0f) << 12;
 			c += (buffer [off++] & 0x3f) << 6;
 			c +=  buffer [off++] & 0x3f;

 			// 0x0800 <= c <= 0xffff

 		    // 4 bytes
 		    } else if ((buffer [off] & 0x0f8) == 0x0F0) {
 			c  = (buffer [off++] & 0x07) << 18;
 			c += (buffer [off++] & 0x3f) << 12;
 			c += (buffer [off++] & 0x3f) << 6;
 			c +=  buffer [off++] & 0x3f;

 			// 0x0001 0000  <= c  <= 0x001f ffff

 			// Unicode supports c <= 0x0010 ffff ...
 			if (c > 0x0010ffff)
 			    throw new CharConversionException (
 				"UTF-8 encoding of character 0x00"
 				+ Integer.toHexString (c)
 				+ " can't be converted to Unicode."
 				);

 			else if (c > 0xffff) {
 			    // Convert UCS-4 char to surrogate pair (UTF-16)
 			    c -= 0x10000;
 			    nextChar = (char) (0xDC00 + (c & 0x03ff));
 			    c = 0xD800 + (c >> 10);
 			}
 		        // 5 and 6 byte versions are XML WF errors, but
 		        // typically come from mislabeled encodings
 		    } else
 			throw new CharConversionException (
 			    "Unconvertible UTF-8 character"
 			    + " beginning with 0x"
 			    + Integer.toHexString (
 				buffer [start] & 0xff)
 			);

 		} catch (ArrayIndexOutOfBoundsException e) {
 		    // off > length && length >= buffer.length
 		    c = 0;
 		}

 		//
 		// if the buffer held only a partial character,
 		// compact it and try to read the rest of the
 		// character.  worst case involves three
 		// single-byte reads -- quite rare.
 		//
 		if (off > finish) {
 		    System.arraycopy (buffer, start,
 			    buffer, 0, finish - start);
 		    finish -= start;
 		    start = 0;
 		    off = instream.read (buffer, finish,
 			    buffer.length - finish);
 		    if (off < 0) {
 			this.close ();
 			throw new CharConversionException (
 			    "Partial UTF-8 char");
 		    }
 		    finish += off;
 		    continue;
 		}

 		//
 		// check the format of the non-initial bytes
 		//
 		for (start++; start < off; start++) {
 		    if ((buffer [start] & 0xC0) != 0x80) {
 			this.close ();
 			throw new CharConversionException (
 			    "Malformed UTF-8 char -- "
 			    + "is an XML encoding declaration missing?"
 			    );
 		    }
 		}

 		//
 		// If this needed a surrogate pair, consume ASAP
 		//
 		buf [offset + i++] = (char) c;
 		if (nextChar != 0 && i < len) {
 		    buf [offset + i++] = nextChar;
 		    nextChar = 0;
 		}
 	    }
 	    if (i > 0)
 		return i;
 	    return (c == -1) ? -1 : 0;
 	}
     }

     //
     // We want ASCII and ISO-8859 Readers since they're the most common
     // encodings in the US and Europe, and we don't want performance
     // regressions for them.  They're also easy to implement efficiently,
     // since they're bitmask subsets of UNICODE.
     //
     // XXX haven't benchmarked these readers vs what we get out of JDK.
     //
     static final class AsciiReader extends BaseReader
     {
 	AsciiReader (InputStream in) { super (in); }

 	public int read (char buf [], int offset, int len) throws IOException
 	{
 	    int		i, c;

 	    if (instream == null)
 		return -1;

 	    // avoid many runtime bounds checks ... a good optimizer
             // (static or JIT) will now remove checks from the loop.
             if ((offset + len) > buf.length || offset < 0)
                 throw new ArrayIndexOutOfBoundsException ();

 	    for (i = 0; i < len; i++) {
 		if (start >= finish) {
 		    start = 0;
 		    finish = instream.read (buffer, 0, buffer.length);
 		    if (finish <= 0) {
 			if (finish <= 0)
 			    this.close ();
 			break;
 		    }
 		}
 		c = buffer [start++];
 		if ((c & 0x80) != 0)
 		    throw new CharConversionException (
 			"Illegal ASCII character, 0x"
 			+ Integer.toHexString (c & 0xff)
 		    );
 		buf [offset + i] = (char) c;
 	    }
 	    if (i == 0 && finish <= 0)
 		return -1;
 	    return i;
 	}
     }

     static final class Iso8859_1Reader extends BaseReader
     {
 	Iso8859_1Reader (InputStream in) { super (in); }

 	public int read (char buf [], int offset, int len) throws IOException
 	{
 	    int		i;

 	    if (instream == null)
 		return -1;

 	    // avoid many runtime bounds checks ... a good optimizer
             // (static or JIT) will now remove checks from the loop.
             if ((offset + len) > buf.length || offset < 0)
                 throw new ArrayIndexOutOfBoundsException ();

 	    for (i = 0; i < len; i++) {
 		if (start >= finish) {
 		    start = 0;
 		    finish = instream.read (buffer, 0, buffer.length);
 		    if (finish <= 0) {
 			if (finish <= 0)
 			    this.close ();
 			break;
 		    }
 		}
 		buf [offset + i] = (char) (0x0ff & buffer [start++]);
 	    }
 	    if (i == 0 && finish <= 0)
 		return -1;
 	    return i;
 	}
     }
 }