src/org/apache/xerces/readers/UTF8CharReader.java - xerces2-j - Git at Google

 /*
  * The Apache Software License, Version 1.1
  *
  *
  * Copyright (c) 1999 The Apache Software Foundation.  All rights
  * reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  *
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  *
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in
  *    the documentation and/or other materials provided with the
  *    distribution.
  *
  * 3. The end-user documentation included with the redistribution,
  *    if any, must include the following acknowledgment:
  *       "This product includes software developed by the
  *        Apache Software Foundation (http://www.apache.org/)."
  *    Alternately, this acknowledgment may appear in the software itself,
  *    if and wherever such third-party acknowledgments normally appear.
  *
  * 4. The names "Xerces" and "Apache Software Foundation" must
  *    not be used to endorse or promote products derived from this
  *    software without prior written permission. For written
  *    permission, please contact apache@apache.org.
  *
  * 5. Products derived from this software may not be called "Apache",
  *    nor may "Apache" appear in their name, without prior written
  *    permission of the Apache Software Foundation.
  *
  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
  * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
  * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
  * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * ====================================================================
  *
  * This software consists of voluntary contributions made by many
  * individuals on behalf of the Apache Software Foundation and was
  * originally based on software copyright (c) 1999, International
  * Business Machines, Inc., http://www.apache.org.  For more
  * information on the Apache Software Foundation, please see
  * <http://www.apache.org/>.
  */

 package org.apache.xerces.readers;

 import org.apache.xerces.framework.XMLErrorReporter;
 import org.apache.xerces.utils.CharDataChunk;
 import org.apache.xerces.utils.StringPool;
 import org.apache.xerces.utils.ImplementationMessages;
 import java.io.InputStream;

 /**
  * Simple character-based version of a UTF8 reader.
  *
  * This class is not commonly used, but is provided as a much simplified
  * example of the UTF8Reader class that uses the AbstractCharReader to
  * perform all of the reader functions except for filling each buffer
  * of the character data when needed (fillCurrentChunk).  We read the
  * input data from an InputStream and perform end-of-line normalization
  * as we process that data.
  *
  * @version
  */
 final class UTF8CharReader extends AbstractCharReader {
     //
     //
     //
     UTF8CharReader(XMLEntityHandler entityHandler, XMLErrorReporter errorReporter, boolean sendCharDataAsCharArray, InputStream dataStream, StringPool stringPool) throws Exception {
         super(entityHandler, errorReporter, sendCharDataAsCharArray, stringPool);
         fInputStream = dataStream;
         fillCurrentChunk();
     }
     //
     //
     //
     private InputStream fInputStream = null;
     //
     // When we fill a chunk there may be data that was read from the
     // input stream that has not been "processed".  We need to save
     // that data, and any in-progress state, between the calls to
     // fillCurrentChunk() in these instance variables.
     //
     private boolean fCheckOverflow = false;
     private byte[] fOverflow = null;
     private int fOverflowOffset = 0;
     private int fOverflowEnd = 0;
     private int fOutputOffset = 0;
     private boolean fSkipLinefeed = false;
     private int fPartialMultiByteIn = 0;
     private byte[] fPartialMultiByteChar = new byte[3];
     private int fPartialSurrogatePair = 0;
     private boolean fPartialMultiByteResult = false;
     //
     //
     //
     protected int fillCurrentChunk() throws Exception {
         //
         // See if we can find a way to reuse the buffer that may have been returned
         // with a recyled data chunk.
         //
         char[] recycledData = fCurrentChunk.toCharArray();
         //
         // If we have overflow from the last call, normalize from where
         // we left off, copying into the front of the output buffer.
         //
         fOutputOffset = 0;
         if (fCheckOverflow) {
             //
             // The fOverflowEnd should always be equal to CHUNK_SIZE, unless we hit
             // EOF during the previous call.  Copy the remaining data to the front
             // of the buffer and return it as the final chunk.
             //
             fMostRecentData = recycledData;
             if (fOverflowEnd < CharDataChunk.CHUNK_SIZE) {
                 recycledData = null;
                 if (fOverflowEnd > 0) {
                     if (fMostRecentData == null || fMostRecentData.length < 1 + fOverflowEnd - fOverflowOffset)
                         fMostRecentData = new char[1 + fOverflowEnd - fOverflowOffset];
                     copyNormalize(fOverflow, fOverflowOffset, fMostRecentData, fOutputOffset);
                 } else {
                     if (fMostRecentData == null)
                         fMostRecentData = new char[1];
                 }
                 fMostRecentData[fOutputOffset] = 0;
                 //
                 // Update our instance variables
                 //
                 fOverflow = null;
                 fLength += fOutputOffset;
                 fCurrentIndex = 0;
                 fCurrentChunk.setCharArray(fMostRecentData);
                 return (fMostRecentChar = fMostRecentData[0]);
             }
             if (fMostRecentData == null || fMostRecentData.length < CharDataChunk.CHUNK_SIZE)
                 fMostRecentData = new char[CharDataChunk.CHUNK_SIZE];
             else
                 recycledData = null;
             copyNormalize(fOverflow, fOverflowOffset, fMostRecentData, fOutputOffset);
             fCheckOverflow = false;
         } else {
             if (fOverflow == null)
                 fOverflow = new byte[CharDataChunk.CHUNK_SIZE];
             fMostRecentData = null;
         }
         while (true) {
             fOverflowOffset = 0;
             fOverflowEnd = 0;
             int capacity = CharDataChunk.CHUNK_SIZE;
             int result = 0;
             do {
                 try {
                     result = fInputStream.read(fOverflow, fOverflowEnd, capacity);
                 } catch (java.io.IOException ex) {
                     result = -1;
                 }
                 if (result == -1) {
                     //
                     // We have reached the end of the stream.
                     //
                     fInputStream.close();
                     fInputStream = null;
                     if (fMostRecentData == null) {
                         //
                         // There is no previous output data, so we know that all of the
                         // new input data will fit.
                         //
                         fMostRecentData = recycledData;
                         if (fMostRecentData == null || fMostRecentData.length < 1 + fOverflowEnd)
                             fMostRecentData = new char[1 + fOverflowEnd];
                         else
                             recycledData = null;
                         copyNormalize(fOverflow, fOverflowOffset, fMostRecentData, fOutputOffset);
                         fOverflow = null;
                         fMostRecentData[fOutputOffset] = 0;
                     } else {
                         //
                         // Copy the input data to the end of the output buffer.
                         //
                         boolean alldone = copyNormalize(fOverflow, fOverflowOffset, fMostRecentData, fOutputOffset);
                         if (alldone) {
                             if (fOverflowEnd == CharDataChunk.CHUNK_SIZE) {
                                 //
                                 // Special case - everything fit into the overflow buffer,
                                 // except that there is no room for the nul char we use to
                                 // indicate EOF.  Set the overflow buffer length to zero.
                                 // On the next call to this method, we will detect this
                                 // case and which we will handle above .
                                 //
                                 fCheckOverflow = true;
                                 fOverflowOffset = 0;
                                 fOverflowEnd = 0;
                             } else {
                                 //
                                 // It all fit into the output buffer.
                                 //
                                 fOverflow = null;
                                 fMostRecentData[fOutputOffset] = 0;
                             }
                         } else {
                             //
                             // There is still input data left over, save the remaining data as
                             // the overflow buffer for the next call.
                             //
                             fCheckOverflow = true;
                         }
                     }
                     break;
                 }
                 if (result > 0) {
                     fOverflowEnd += result;
                     capacity -= result;
                 }
             } while (capacity > 0);
             //
             //
             //
             if (result == -1)
                 break;
             if (fMostRecentData != null) {
                 boolean alldone = copyNormalize(fOverflow, fOverflowOffset, fMostRecentData, fOutputOffset);
                 if (fOutputOffset == CharDataChunk.CHUNK_SIZE) {
                     //
                     // We filled the output buffer.
                     //
                     if (!alldone) {
                         //
                         // The input buffer will become the next overflow buffer.
                         //
                         fCheckOverflow = true;
                     }
                     break;
                 }
             } else {
                 //
                 // Now normalize the end-of-line characters and see if we need to read more
                 // bytes to fill up the buffer.
                 //
                 fMostRecentData = recycledData;
                 if (fMostRecentData == null || fMostRecentData.length < CharDataChunk.CHUNK_SIZE)
                     fMostRecentData = new char[CharDataChunk.CHUNK_SIZE];
                 else
                     recycledData = null;
                 copyNormalize(fOverflow, fOverflowOffset, fMostRecentData, fOutputOffset);
                 if (fOutputOffset == CharDataChunk.CHUNK_SIZE) {
                     //
                     // The output buffer is full.  We can return now.
                     //
                     break;
                 }
             }
             //
             // We will need to get another intput buffer to be able to fill the
             // overflow buffer completely.
             //
         }
         //
         // Update our instance variables
         //
         fLength += fOutputOffset;
         fCurrentIndex = 0;
         fCurrentChunk.setCharArray(fMostRecentData);
         return (fMostRecentChar = fMostRecentData[0]);
     }
     //
     // Copy and normalize bytes from the overflow buffer into chars in our data buffer.
     //
     private boolean copyNormalize(byte[] in, int inOffset, char[] out, int outOffset) throws Exception {
         //
         // Handle all edge cases before dropping into the inner loop.
         //
         int inEnd = fOverflowEnd;
         int outEnd = out.length;
         if (inOffset == inEnd)
             return true;
         byte b = in[inOffset];
         if (fSkipLinefeed) {
             fSkipLinefeed = false;
             if (b == 0x0A) {
                 if (++inOffset == inEnd)
                     return exitNormalize(inOffset, outOffset, true);
                 b = in[inOffset];
             }
         } else if (fPartialMultiByteIn > 0) {
             if (!handlePartialMultiByteChar(b, in, inOffset, inEnd, out, outOffset, outEnd))
                 return fPartialMultiByteResult;
             inOffset = fOverflowOffset;
             outOffset = fOutputOffset;
             b = in[inOffset];
         }
         while (outOffset < outEnd) {
             //
             // Find the longest run that we can guarantee will not exceed the
             // bounds of the outer loop.
             //
             int inCount = inEnd - inOffset;
             int outCount = outEnd - outOffset;
             if (inCount > outCount)
                 inCount = outCount;
             inOffset++;
             while (true) {
                 while (b == 0x0D || b < 0) {
                     if (b == 0x0D) {
                         out[outOffset++] = 0x0A;
                         if (inOffset == inEnd) {
                             fSkipLinefeed = true;
                             return exitNormalize(inOffset, outOffset, true);
                         }
                         b = in[inOffset];
                         if (b == 0x0A) {
                             if (++inOffset == inEnd)
                                 return exitNormalize(inOffset, outOffset, true);
                             b = in[inOffset];
                         }
                         if (outOffset == outEnd)
                             return exitNormalize(inOffset, outOffset, false);
                     } else {
                         if (!handleMultiByteChar(b, in, inOffset, inEnd, out, outOffset, outEnd))
                             return fPartialMultiByteResult;
                         inOffset = fOverflowOffset;
                         outOffset = fOutputOffset;
                         b = in[inOffset];
                     }
                     inCount = inEnd - inOffset;
                     outCount = outEnd - outOffset;
                     if (inCount > outCount)
                         inCount = outCount;
                     inOffset++;
                 }
                 while (true) {
                     out[outOffset++] = (char)b;
                     if (--inCount == 0)
                         break;
                     b = in[inOffset++];
                     if (b == 0x0D || b < 0)
                         break;
                 }
                 if (inCount == 0)
                     break;
             }
             if (inOffset == inEnd)
                 break;
         }
         return exitNormalize(inOffset, outOffset, inOffset == inEnd);
     }
     //
     //
     //
     private boolean exitNormalize(int inOffset, int outOffset, boolean result) {
         fOverflowOffset = inOffset;
         fOutputOffset = outOffset;
         return result;
     }
     //
     //
     //
     private void savePartialMultiByte(int inCount, byte bz, byte by, byte bx) {
         fPartialMultiByteIn = inCount;
         fPartialMultiByteChar[--inCount] = bz;
         fPartialMultiByteChar[--inCount] = by;
         fPartialMultiByteChar[--inCount] = bx;
     }
     private void savePartialMultiByte(int inCount, byte bz, byte by) {
         fPartialMultiByteIn = inCount;
         fPartialMultiByteChar[--inCount] = bz;
         fPartialMultiByteChar[--inCount] = by;
     }
     private void savePartialMultiByte(int inCount, byte bz) {
         fPartialMultiByteIn = inCount;
         fPartialMultiByteChar[--inCount] = bz;
     }
     private boolean handleMultiByteChar(byte b, byte[] in, int inOffset, int inEnd, char[] out, int outOffset, int outEnd) throws Exception {
         if (inOffset == inEnd) {
             savePartialMultiByte(1, b);
             fPartialMultiByteResult = exitNormalize(inOffset, outOffset, true);
             return false;
         }
         byte b1 = in[inOffset++];
         if ((b1 & 0xc0) != 0x80) {
             Object[] args = {
                 Integer.toHexString(b & 0xff),
                 Integer.toHexString(b1 & 0xff)
             };
             deferException(ImplementationMessages.ENC5, args, outOffset);
             out[outOffset++] = 0;
             return exitNormalize(inOffset, outOffset, true);
         }
         if ((b & 0xe0) == 0xc0) { // 110yyyyy 10xxxxxx
             int ch = ((0x1f & b)<<6) + (0x3f & b1);
             out[outOffset++] = (char)ch;
             if (inOffset == inEnd || outOffset == outEnd) {
                 fPartialMultiByteResult = exitNormalize(inOffset, outOffset, inOffset == inEnd);
                 return false;
             }
         } else {
             if (inOffset == inEnd) {
                 savePartialMultiByte(2, b1, b);
                 fPartialMultiByteResult = exitNormalize(inOffset, outOffset, true);
                 return false;
             }
             byte b2 = in[inOffset++];
             if ((b2 & 0xc0) != 0x80) {
                 Object[] args = {
                     Integer.toHexString(b & 0xff),
                     Integer.toHexString(b1 & 0xff),
                     Integer.toHexString(b2 & 0xff)
                 };
                 deferException(ImplementationMessages.ENC6, args, outOffset);
                 out[outOffset++] = 0;
                 return exitNormalize(inOffset, outOffset, true);
             }
             if ((b & 0xf0) == 0xe0) { // 1110zzzz 10yyyyyy 10xxxxxx
                 int ch = ((0x0f & b)<<12) + ((0x3f & b1)<<6) + (0x3f & b2);
                 out[outOffset++] = (char)ch;
                 if (inOffset == inEnd || outOffset == outEnd) {
                     fPartialMultiByteResult = exitNormalize(inOffset, outOffset, inOffset == inEnd);
                     return false;
                 }
             } else {
                 if ((b & 0xf8) != 0xf0) {
                     Object[] args = { Integer.toHexString(b & 0xff) };
                     deferException(ImplementationMessages.ENC4, args, outOffset);
                     out[outOffset++] = 0;
                     return exitNormalize(inOffset, outOffset, true);
                 }
                 if (inOffset == inEnd) {
                     savePartialMultiByte(3, b2, b1, b);
                     fPartialMultiByteResult = exitNormalize(inOffset, outOffset, true);
                     return false;
                 }
                 byte b3 = in[inOffset++];
                 if ((b3 & 0xc0) != 0x80) {
                     Object[] args = {
                         Integer.toHexString(b & 0xff),
                         Integer.toHexString(b1 & 0xff),
                         Integer.toHexString(b2 & 0xff),
                         Integer.toHexString(b3 & 0xff)
                     };
                     deferException(ImplementationMessages.ENC7, args, outOffset);
                     out[outOffset++] = 0;
                     return exitNormalize(inOffset, outOffset, true);
                 }
                 int ch = ((0x0f & b)<<18) + ((0x3f & b1)<<12) + ((0x3f & b2)<<6) + (0x3f & b3);
                 if (ch >= 0x10000) {
                     out[outOffset++] = (char)(((ch-0x00010000)>>10)+0xd800);
                     ch = (((ch-0x00010000)&0x3ff)+0xdc00);
                     if (outOffset == outEnd) {
                         fPartialSurrogatePair = ch;
                         fPartialMultiByteResult = exitNormalize(inOffset, outOffset, inOffset == inEnd);
                         return false;
                     }
                 }
                 out[outOffset++] = (char)ch;
                 if (inOffset == inEnd || outOffset == outEnd) {
                     fPartialMultiByteResult = exitNormalize(inOffset, outOffset, inOffset == inEnd);
                     return false;
                 }
             }
         }
         return exitNormalize(inOffset, outOffset, true);
     }
     private boolean handlePartialMultiByteChar(byte b, byte[] in, int inOffset, int inEnd, char[] out, int outOffset, int outEnd) throws Exception {
         if (outOffset == outEnd) {
             fPartialMultiByteResult = exitNormalize(inOffset, outOffset, inOffset == inEnd);
             return false;
         }
         if (fPartialMultiByteIn == 4) {
             out[outOffset++] = (char)fPartialSurrogatePair;
             if (outOffset == outEnd) {
                 fPartialMultiByteResult = exitNormalize(inOffset, outOffset, false);
                 return false;
             }
             fOutputOffset = outOffset;
             return true;
         }
         int byteIn = fPartialMultiByteIn;
         fPartialMultiByteIn = 0;
         byte b1 = 0;
         byte b2 = 0;
         byte b3 = 0;
         switch (byteIn) {
         case 1: b1 = b; break;
         case 2: b2 = b; break;
         case 3: b3 = b; break;
         }
         int i = byteIn;
         switch (byteIn) {
         case 3:
             b2 = fPartialMultiByteChar[--i];
         case 2:
             b1 = fPartialMultiByteChar[--i];
         case 1:
             b = fPartialMultiByteChar[--i];
         }
         switch (byteIn) {
         case 1:
             if ((b1 & 0xc0) != 0x80) {
                 Object[] args = {
                     Integer.toHexString(b),
                     Integer.toHexString(b1)
                 };
                 deferException(ImplementationMessages.ENC5, args, outOffset);
                 out[outOffset++] = 0;
                 break;
             }
             // fall through
         case 2:
             if ((b & 0xe0) == 0xc0) { // 110yyyyy 10xxxxxx
                 int ch = ((0x1f & b)<<6) + (0x3f & b1);
                 out[outOffset++] = (char)ch;
                 if (outOffset == outEnd) {
                     fPartialMultiByteResult = exitNormalize(inOffset, outOffset, false);
                     return false;
                 }
                 if (byteIn < 2 && ++inOffset == inEnd) {
                     fPartialMultiByteResult = exitNormalize(inOffset, outOffset, true);
                     return false;
                 }
                 break;
             }
             if (byteIn < 2) {
                 if (++inOffset == inEnd) {
                     savePartialMultiByte(2, b1);
                     fPartialMultiByteResult = exitNormalize(inOffset, outOffset, true);
                     return false;
                 }
                 b2 = in[inOffset];
             }
             if ((b2 & 0xc0) != 0x80) {
                 Object[] args = {
                     Integer.toHexString(b),
                     Integer.toHexString(b1),
                     Integer.toHexString(b2)
                 };
                 deferException(ImplementationMessages.ENC6, args, outOffset);
                 out[outOffset++] = 0;
                 break;
             }
             // fall through
         case 3:
             if ((b & 0xf0) == 0xe0) { // 1110zzzz 10yyyyyy 10xxxxxx
                 int ch = ((0x0f & b)<<12) + ((0x3f & b1)<<6) + (0x3f & b2);
                 out[outOffset++] = (char)ch;
                 if (outOffset == outEnd) {
                     fPartialMultiByteResult = exitNormalize(inOffset, outOffset, false);
                     return false;
                 }
                 if (byteIn < 3 && ++inOffset == inEnd) {
                     fPartialMultiByteResult = exitNormalize(inOffset, outOffset, true);
                     return false;
                 }
                 break;
             }
             if (byteIn < 3) {
                 if ((b & 0xf8) != 0xf0) {
                     Object[] args = { Integer.toHexString(b) };
                     deferException(ImplementationMessages.ENC4, args, outOffset);
                     out[outOffset++] = 0;
                     break;
                 }
                 if (++inOffset == inEnd) {
                     savePartialMultiByte(3, b2, b1);
                     fPartialMultiByteResult = exitNormalize(inOffset, outOffset, true);
                     return false;
                 }
                 b3 = in[inOffset];
             }
             if ((b3 & 0xc0) != 0x80) {
                 Object[] args = {
                     Integer.toHexString(b),
                     Integer.toHexString(b1),
                     Integer.toHexString(b2),
                     Integer.toHexString(b3)
                 };
                 deferException(ImplementationMessages.ENC7, args, outOffset);
                 out[outOffset++] = 0;
                 break;
             }
             int ch = ((0x0f & b)<<18) + ((0x3f & b1)<<12) + ((0x3f & b2)<<6) + (0x3f & b3);
             if (ch >= 0x10000) {
                 out[outOffset++] = (char)(((ch-0x00010000)>>10)+0xd800);
                 ch = (((ch-0x00010000)&0x3ff)+0xdc00);
                 if (outOffset == outEnd) {
                     fPartialSurrogatePair = ch;
                     fPartialMultiByteResult = exitNormalize(inOffset, outOffset, false);
                     return false;
                 }
             }
             out[outOffset++] = (char)ch;
             if (outOffset == outEnd) {
                 fPartialMultiByteResult = exitNormalize(inOffset, outOffset, false);
                 return false;
             }
             if (++inOffset == inEnd) {
                 fPartialMultiByteResult = exitNormalize(inOffset, outOffset, true);
                 return false;
             }
             break;
         }
         return exitNormalize(inOffset, outOffset, true);
     }
 }
	/*
	* The Apache Software License, Version 1.1
	*
	*
	* Copyright (c) 1999 The Apache Software Foundation. All rights
	* reserved.
	*
	* Redistribution and use in source and binary forms, with or without
	* modification, are permitted provided that the following conditions
	* are met:
	*
	* 1. Redistributions of source code must retain the above copyright
	* notice, this list of conditions and the following disclaimer.
	*
	* 2. Redistributions in binary form must reproduce the above copyright
	* notice, this list of conditions and the following disclaimer in
	* the documentation and/or other materials provided with the
	* distribution.
	*
	* 3. The end-user documentation included with the redistribution,
	* if any, must include the following acknowledgment:
	* "This product includes software developed by the
	* Apache Software Foundation (http://www.apache.org/)."
	* Alternately, this acknowledgment may appear in the software itself,
	* if and wherever such third-party acknowledgments normally appear.
	*
	* 4. The names "Xerces" and "Apache Software Foundation" must
	* not be used to endorse or promote products derived from this
	* software without prior written permission. For written
	* permission, please contact apache@apache.org.
	*
	* 5. Products derived from this software may not be called "Apache",
	* nor may "Apache" appear in their name, without prior written
	* permission of the Apache Software Foundation.
	*
	* THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
	* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
	* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
	* DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
	* ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
	* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
	* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
	* USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
	* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
	* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
	* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	* SUCH DAMAGE.
	* ====================================================================
	*
	* This software consists of voluntary contributions made by many
	* individuals on behalf of the Apache Software Foundation and was
	* originally based on software copyright (c) 1999, International
	* Business Machines, Inc., http://www.apache.org. For more
	* information on the Apache Software Foundation, please see
	* <http://www.apache.org/>.
	*/

	package org.apache.xerces.readers;

	import org.apache.xerces.framework.XMLErrorReporter;
	import org.apache.xerces.utils.CharDataChunk;
	import org.apache.xerces.utils.StringPool;
	import org.apache.xerces.utils.ImplementationMessages;
	import java.io.InputStream;

	/**
	* Simple character-based version of a UTF8 reader.
	*
	* This class is not commonly used, but is provided as a much simplified
	* example of the UTF8Reader class that uses the AbstractCharReader to
	* perform all of the reader functions except for filling each buffer
	* of the character data when needed (fillCurrentChunk). We read the
	* input data from an InputStream and perform end-of-line normalization
	* as we process that data.
	*
	* @version
	*/
	final class UTF8CharReader extends AbstractCharReader {
	//
	//
	//
	UTF8CharReader(XMLEntityHandler entityHandler, XMLErrorReporter errorReporter, boolean sendCharDataAsCharArray, InputStream dataStream, StringPool stringPool) throws Exception {
	super(entityHandler, errorReporter, sendCharDataAsCharArray, stringPool);
	fInputStream = dataStream;
	fillCurrentChunk();
	}
	//
	//
	//
	private InputStream fInputStream = null;
	//
	// When we fill a chunk there may be data that was read from the
	// input stream that has not been "processed". We need to save
	// that data, and any in-progress state, between the calls to
	// fillCurrentChunk() in these instance variables.
	//
	private boolean fCheckOverflow = false;
	private byte[] fOverflow = null;
	private int fOverflowOffset = 0;
	private int fOverflowEnd = 0;
	private int fOutputOffset = 0;
	private boolean fSkipLinefeed = false;
	private int fPartialMultiByteIn = 0;
	private byte[] fPartialMultiByteChar = new byte[3];
	private int fPartialSurrogatePair = 0;
	private boolean fPartialMultiByteResult = false;
	//
	//
	//
	protected int fillCurrentChunk() throws Exception {
	//
	// See if we can find a way to reuse the buffer that may have been returned
	// with a recyled data chunk.
	//
	char[] recycledData = fCurrentChunk.toCharArray();
	//
	// If we have overflow from the last call, normalize from where
	// we left off, copying into the front of the output buffer.
	//
	fOutputOffset = 0;
	if (fCheckOverflow) {
	//
	// The fOverflowEnd should always be equal to CHUNK_SIZE, unless we hit
	// EOF during the previous call. Copy the remaining data to the front
	// of the buffer and return it as the final chunk.
	//
	fMostRecentData = recycledData;
	if (fOverflowEnd < CharDataChunk.CHUNK_SIZE) {
	recycledData = null;
	if (fOverflowEnd > 0) {
	if (fMostRecentData == null \|\| fMostRecentData.length < 1 + fOverflowEnd - fOverflowOffset)
	fMostRecentData = new char[1 + fOverflowEnd - fOverflowOffset];
	copyNormalize(fOverflow, fOverflowOffset, fMostRecentData, fOutputOffset);
	} else {
	if (fMostRecentData == null)
	fMostRecentData = new char[1];
	}
	fMostRecentData[fOutputOffset] = 0;
	//
	// Update our instance variables
	//
	fOverflow = null;
	fLength += fOutputOffset;
	fCurrentIndex = 0;
	fCurrentChunk.setCharArray(fMostRecentData);
	return (fMostRecentChar = fMostRecentData[0]);
	}
	if (fMostRecentData == null \|\| fMostRecentData.length < CharDataChunk.CHUNK_SIZE)
	fMostRecentData = new char[CharDataChunk.CHUNK_SIZE];
	else
	recycledData = null;
	copyNormalize(fOverflow, fOverflowOffset, fMostRecentData, fOutputOffset);
	fCheckOverflow = false;
	} else {
	if (fOverflow == null)
	fOverflow = new byte[CharDataChunk.CHUNK_SIZE];
	fMostRecentData = null;
	}
	while (true) {
	fOverflowOffset = 0;
	fOverflowEnd = 0;
	int capacity = CharDataChunk.CHUNK_SIZE;
	int result = 0;
	do {
	try {
	result = fInputStream.read(fOverflow, fOverflowEnd, capacity);
	} catch (java.io.IOException ex) {
	result = -1;
	}
	if (result == -1) {
	//
	// We have reached the end of the stream.
	//
	fInputStream.close();
	fInputStream = null;
	if (fMostRecentData == null) {
	//
	// There is no previous output data, so we know that all of the
	// new input data will fit.
	//
	fMostRecentData = recycledData;
	if (fMostRecentData == null \|\| fMostRecentData.length < 1 + fOverflowEnd)
	fMostRecentData = new char[1 + fOverflowEnd];
	else
	recycledData = null;
	copyNormalize(fOverflow, fOverflowOffset, fMostRecentData, fOutputOffset);
	fOverflow = null;
	fMostRecentData[fOutputOffset] = 0;
	} else {
	//
	// Copy the input data to the end of the output buffer.
	//
	boolean alldone = copyNormalize(fOverflow, fOverflowOffset, fMostRecentData, fOutputOffset);
	if (alldone) {
	if (fOverflowEnd == CharDataChunk.CHUNK_SIZE) {
	//
	// Special case - everything fit into the overflow buffer,
	// except that there is no room for the nul char we use to
	// indicate EOF. Set the overflow buffer length to zero.
	// On the next call to this method, we will detect this
	// case and which we will handle above .
	//
	fCheckOverflow = true;
	fOverflowOffset = 0;
	fOverflowEnd = 0;
	} else {
	//
	// It all fit into the output buffer.
	//
	fOverflow = null;
	fMostRecentData[fOutputOffset] = 0;
	}
	} else {
	//
	// There is still input data left over, save the remaining data as
	// the overflow buffer for the next call.
	//
	fCheckOverflow = true;
	}
	}
	break;
	}
	if (result > 0) {
	fOverflowEnd += result;
	capacity -= result;
	}
	} while (capacity > 0);
	//
	//
	//
	if (result == -1)
	break;
	if (fMostRecentData != null) {
	boolean alldone = copyNormalize(fOverflow, fOverflowOffset, fMostRecentData, fOutputOffset);
	if (fOutputOffset == CharDataChunk.CHUNK_SIZE) {
	//
	// We filled the output buffer.
	//
	if (!alldone) {
	//
	// The input buffer will become the next overflow buffer.
	//
	fCheckOverflow = true;
	}
	break;
	}
	} else {
	//
	// Now normalize the end-of-line characters and see if we need to read more
	// bytes to fill up the buffer.
	//
	fMostRecentData = recycledData;
	if (fMostRecentData == null \|\| fMostRecentData.length < CharDataChunk.CHUNK_SIZE)
	fMostRecentData = new char[CharDataChunk.CHUNK_SIZE];
	else
	recycledData = null;
	copyNormalize(fOverflow, fOverflowOffset, fMostRecentData, fOutputOffset);
	if (fOutputOffset == CharDataChunk.CHUNK_SIZE) {
	//
	// The output buffer is full. We can return now.
	//
	break;
	}
	}
	//
	// We will need to get another intput buffer to be able to fill the
	// overflow buffer completely.
	//
	}
	//
	// Update our instance variables
	//
	fLength += fOutputOffset;
	fCurrentIndex = 0;
	fCurrentChunk.setCharArray(fMostRecentData);
	return (fMostRecentChar = fMostRecentData[0]);
	}
	//
	// Copy and normalize bytes from the overflow buffer into chars in our data buffer.
	//
	private boolean copyNormalize(byte[] in, int inOffset, char[] out, int outOffset) throws Exception {
	//
	// Handle all edge cases before dropping into the inner loop.
	//
	int inEnd = fOverflowEnd;
	int outEnd = out.length;
	if (inOffset == inEnd)
	return true;
	byte b = in[inOffset];
	if (fSkipLinefeed) {
	fSkipLinefeed = false;
	if (b == 0x0A) {
	if (++inOffset == inEnd)
	return exitNormalize(inOffset, outOffset, true);
	b = in[inOffset];
	}
	} else if (fPartialMultiByteIn > 0) {
	if (!handlePartialMultiByteChar(b, in, inOffset, inEnd, out, outOffset, outEnd))
	return fPartialMultiByteResult;
	inOffset = fOverflowOffset;
	outOffset = fOutputOffset;
	b = in[inOffset];
	}
	while (outOffset < outEnd) {
	//
	// Find the longest run that we can guarantee will not exceed the
	// bounds of the outer loop.
	//
	int inCount = inEnd - inOffset;
	int outCount = outEnd - outOffset;
	if (inCount > outCount)
	inCount = outCount;
	inOffset++;
	while (true) {
	while (b == 0x0D \|\| b < 0) {
	if (b == 0x0D) {
	out[outOffset++] = 0x0A;
	if (inOffset == inEnd) {
	fSkipLinefeed = true;
	return exitNormalize(inOffset, outOffset, true);
	}
	b = in[inOffset];
	if (b == 0x0A) {
	if (++inOffset == inEnd)
	return exitNormalize(inOffset, outOffset, true);
	b = in[inOffset];
	}
	if (outOffset == outEnd)
	return exitNormalize(inOffset, outOffset, false);
	} else {
	if (!handleMultiByteChar(b, in, inOffset, inEnd, out, outOffset, outEnd))
	return fPartialMultiByteResult;
	inOffset = fOverflowOffset;
	outOffset = fOutputOffset;
	b = in[inOffset];
	}
	inCount = inEnd - inOffset;
	outCount = outEnd - outOffset;
	if (inCount > outCount)
	inCount = outCount;
	inOffset++;
	}
	while (true) {
	out[outOffset++] = (char)b;
	if (--inCount == 0)
	break;
	b = in[inOffset++];
	if (b == 0x0D \|\| b < 0)
	break;
	}
	if (inCount == 0)
	break;
	}
	if (inOffset == inEnd)
	break;
	}
	return exitNormalize(inOffset, outOffset, inOffset == inEnd);
	}
	//
	//
	//
	private boolean exitNormalize(int inOffset, int outOffset, boolean result) {
	fOverflowOffset = inOffset;
	fOutputOffset = outOffset;
	return result;
	}
	//
	//
	//
	private void savePartialMultiByte(int inCount, byte bz, byte by, byte bx) {
	fPartialMultiByteIn = inCount;
	fPartialMultiByteChar[--inCount] = bz;
	fPartialMultiByteChar[--inCount] = by;
	fPartialMultiByteChar[--inCount] = bx;
	}
	private void savePartialMultiByte(int inCount, byte bz, byte by) {
	fPartialMultiByteIn = inCount;
	fPartialMultiByteChar[--inCount] = bz;
	fPartialMultiByteChar[--inCount] = by;
	}
	private void savePartialMultiByte(int inCount, byte bz) {
	fPartialMultiByteIn = inCount;
	fPartialMultiByteChar[--inCount] = bz;
	}
	private boolean handleMultiByteChar(byte b, byte[] in, int inOffset, int inEnd, char[] out, int outOffset, int outEnd) throws Exception {
	if (inOffset == inEnd) {
	savePartialMultiByte(1, b);
	fPartialMultiByteResult = exitNormalize(inOffset, outOffset, true);
	return false;
	}
	byte b1 = in[inOffset++];
	if ((b1 & 0xc0) != 0x80) {
	Object[] args = {
	Integer.toHexString(b & 0xff),
	Integer.toHexString(b1 & 0xff)
	};
	deferException(ImplementationMessages.ENC5, args, outOffset);
	out[outOffset++] = 0;
	return exitNormalize(inOffset, outOffset, true);
	}
	if ((b & 0xe0) == 0xc0) { // 110yyyyy 10xxxxxx
	int ch = ((0x1f & b)<<6) + (0x3f & b1);
	out[outOffset++] = (char)ch;
	if (inOffset == inEnd \|\| outOffset == outEnd) {
	fPartialMultiByteResult = exitNormalize(inOffset, outOffset, inOffset == inEnd);
	return false;
	}
	} else {
	if (inOffset == inEnd) {
	savePartialMultiByte(2, b1, b);
	fPartialMultiByteResult = exitNormalize(inOffset, outOffset, true);
	return false;
	}
	byte b2 = in[inOffset++];
	if ((b2 & 0xc0) != 0x80) {
	Object[] args = {
	Integer.toHexString(b & 0xff),
	Integer.toHexString(b1 & 0xff),
	Integer.toHexString(b2 & 0xff)
	};
	deferException(ImplementationMessages.ENC6, args, outOffset);
	out[outOffset++] = 0;
	return exitNormalize(inOffset, outOffset, true);
	}
	if ((b & 0xf0) == 0xe0) { // 1110zzzz 10yyyyyy 10xxxxxx
	int ch = ((0x0f & b)<<12) + ((0x3f & b1)<<6) + (0x3f & b2);
	out[outOffset++] = (char)ch;
	if (inOffset == inEnd \|\| outOffset == outEnd) {
	fPartialMultiByteResult = exitNormalize(inOffset, outOffset, inOffset == inEnd);
	return false;
	}
	} else {
	if ((b & 0xf8) != 0xf0) {
	Object[] args = { Integer.toHexString(b & 0xff) };
	deferException(ImplementationMessages.ENC4, args, outOffset);
	out[outOffset++] = 0;
	return exitNormalize(inOffset, outOffset, true);
	}
	if (inOffset == inEnd) {
	savePartialMultiByte(3, b2, b1, b);
	fPartialMultiByteResult = exitNormalize(inOffset, outOffset, true);
	return false;
	}
	byte b3 = in[inOffset++];
	if ((b3 & 0xc0) != 0x80) {
	Object[] args = {
	Integer.toHexString(b & 0xff),
	Integer.toHexString(b1 & 0xff),
	Integer.toHexString(b2 & 0xff),
	Integer.toHexString(b3 & 0xff)
	};
	deferException(ImplementationMessages.ENC7, args, outOffset);
	out[outOffset++] = 0;
	return exitNormalize(inOffset, outOffset, true);
	}
	int ch = ((0x0f & b)<<18) + ((0x3f & b1)<<12) + ((0x3f & b2)<<6) + (0x3f & b3);
	if (ch >= 0x10000) {
	out[outOffset++] = (char)(((ch-0x00010000)>>10)+0xd800);
	ch = (((ch-0x00010000)&0x3ff)+0xdc00);
	if (outOffset == outEnd) {
	fPartialSurrogatePair = ch;
	fPartialMultiByteResult = exitNormalize(inOffset, outOffset, inOffset == inEnd);
	return false;
	}
	}
	out[outOffset++] = (char)ch;
	if (inOffset == inEnd \|\| outOffset == outEnd) {
	fPartialMultiByteResult = exitNormalize(inOffset, outOffset, inOffset == inEnd);
	return false;
	}
	}
	}
	return exitNormalize(inOffset, outOffset, true);
	}
	private boolean handlePartialMultiByteChar(byte b, byte[] in, int inOffset, int inEnd, char[] out, int outOffset, int outEnd) throws Exception {
	if (outOffset == outEnd) {
	fPartialMultiByteResult = exitNormalize(inOffset, outOffset, inOffset == inEnd);
	return false;
	}
	if (fPartialMultiByteIn == 4) {
	out[outOffset++] = (char)fPartialSurrogatePair;
	if (outOffset == outEnd) {
	fPartialMultiByteResult = exitNormalize(inOffset, outOffset, false);
	return false;
	}
	fOutputOffset = outOffset;
	return true;
	}
	int byteIn = fPartialMultiByteIn;
	fPartialMultiByteIn = 0;
	byte b1 = 0;
	byte b2 = 0;
	byte b3 = 0;
	switch (byteIn) {
	case 1: b1 = b; break;
	case 2: b2 = b; break;
	case 3: b3 = b; break;
	}
	int i = byteIn;
	switch (byteIn) {
	case 3:
	b2 = fPartialMultiByteChar[--i];
	case 2:
	b1 = fPartialMultiByteChar[--i];
	case 1:
	b = fPartialMultiByteChar[--i];
	}
	switch (byteIn) {
	case 1:
	if ((b1 & 0xc0) != 0x80) {
	Object[] args = {
	Integer.toHexString(b),
	Integer.toHexString(b1)
	};
	deferException(ImplementationMessages.ENC5, args, outOffset);
	out[outOffset++] = 0;
	break;
	}
	// fall through
	case 2:
	if ((b & 0xe0) == 0xc0) { // 110yyyyy 10xxxxxx
	int ch = ((0x1f & b)<<6) + (0x3f & b1);
	out[outOffset++] = (char)ch;
	if (outOffset == outEnd) {
	fPartialMultiByteResult = exitNormalize(inOffset, outOffset, false);
	return false;
	}
	if (byteIn < 2 && ++inOffset == inEnd) {
	fPartialMultiByteResult = exitNormalize(inOffset, outOffset, true);
	return false;
	}
	break;
	}
	if (byteIn < 2) {
	if (++inOffset == inEnd) {
	savePartialMultiByte(2, b1);
	fPartialMultiByteResult = exitNormalize(inOffset, outOffset, true);
	return false;
	}
	b2 = in[inOffset];
	}
	if ((b2 & 0xc0) != 0x80) {
	Object[] args = {
	Integer.toHexString(b),
	Integer.toHexString(b1),
	Integer.toHexString(b2)
	};
	deferException(ImplementationMessages.ENC6, args, outOffset);
	out[outOffset++] = 0;
	break;
	}
	// fall through
	case 3:
	if ((b & 0xf0) == 0xe0) { // 1110zzzz 10yyyyyy 10xxxxxx
	int ch = ((0x0f & b)<<12) + ((0x3f & b1)<<6) + (0x3f & b2);
	out[outOffset++] = (char)ch;
	if (outOffset == outEnd) {
	fPartialMultiByteResult = exitNormalize(inOffset, outOffset, false);
	return false;
	}
	if (byteIn < 3 && ++inOffset == inEnd) {
	fPartialMultiByteResult = exitNormalize(inOffset, outOffset, true);
	return false;
	}
	break;
	}
	if (byteIn < 3) {
	if ((b & 0xf8) != 0xf0) {
	Object[] args = { Integer.toHexString(b) };
	deferException(ImplementationMessages.ENC4, args, outOffset);
	out[outOffset++] = 0;
	break;
	}
	if (++inOffset == inEnd) {
	savePartialMultiByte(3, b2, b1);
	fPartialMultiByteResult = exitNormalize(inOffset, outOffset, true);
	return false;
	}
	b3 = in[inOffset];
	}
	if ((b3 & 0xc0) != 0x80) {
	Object[] args = {
	Integer.toHexString(b),
	Integer.toHexString(b1),
	Integer.toHexString(b2),
	Integer.toHexString(b3)
	};
	deferException(ImplementationMessages.ENC7, args, outOffset);
	out[outOffset++] = 0;
	break;
	}
	int ch = ((0x0f & b)<<18) + ((0x3f & b1)<<12) + ((0x3f & b2)<<6) + (0x3f & b3);
	if (ch >= 0x10000) {
	out[outOffset++] = (char)(((ch-0x00010000)>>10)+0xd800);
	ch = (((ch-0x00010000)&0x3ff)+0xdc00);
	if (outOffset == outEnd) {
	fPartialSurrogatePair = ch;
	fPartialMultiByteResult = exitNormalize(inOffset, outOffset, false);
	return false;
	}
	}
	out[outOffset++] = (char)ch;
	if (outOffset == outEnd) {
	fPartialMultiByteResult = exitNormalize(inOffset, outOffset, false);
	return false;
	}
	if (++inOffset == inEnd) {
	fPartialMultiByteResult = exitNormalize(inOffset, outOffset, true);
	return false;
	}
	break;
	}
	return exitNormalize(inOffset, outOffset, true);
	}
	}