| /* |
| * The Apache Software License, Version 1.1 |
| * |
| * |
| * Copyright (c) 1999 The Apache Software Foundation. All rights |
| * reserved. |
| * |
| * Redistribution and use in source and binary forms, with or without |
| * modification, are permitted provided that the following conditions |
| * are met: |
| * |
| * 1. Redistributions of source code must retain the above copyright |
| * notice, this list of conditions and the following disclaimer. |
| * |
| * 2. Redistributions in binary form must reproduce the above copyright |
| * notice, this list of conditions and the following disclaimer in |
| * the documentation and/or other materials provided with the |
| * distribution. |
| * |
| * 3. The end-user documentation included with the redistribution, |
| * if any, must include the following acknowledgment: |
| * "This product includes software developed by the |
| * Apache Software Foundation (http://www.apache.org/)." |
| * Alternately, this acknowledgment may appear in the software itself, |
| * if and wherever such third-party acknowledgments normally appear. |
| * |
| * 4. The names "Xerces" and "Apache Software Foundation" must |
| * not be used to endorse or promote products derived from this |
| * software without prior written permission. For written |
| * permission, please contact apache@apache.org. |
| * |
| * 5. Products derived from this software may not be called "Apache", |
| * nor may "Apache" appear in their name, without prior written |
| * permission of the Apache Software Foundation. |
| * |
| * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED |
| * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES |
| * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE |
| * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR |
| * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
| * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
| * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF |
| * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND |
| * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, |
| * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT |
| * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
| * SUCH DAMAGE. |
| * ==================================================================== |
| * |
| * This software consists of voluntary contributions made by many |
| * individuals on behalf of the Apache Software Foundation and was |
| * originally based on software copyright (c) 1999, International |
| * Business Machines, Inc., http://www.apache.org. For more |
| * information on the Apache Software Foundation, please see |
| * <http://www.apache.org/>. |
| */ |
| |
| package org.apache.xerces.readers; |
| |
| import org.apache.xerces.framework.XMLErrorReporter; |
| import org.apache.xerces.utils.CharDataChunk; |
| import org.apache.xerces.utils.StringPool; |
| import java.io.Reader; |
| |
| /** |
| * General purpose character stream reader. |
| * |
| * This class is used when the input source for the document entity is |
| * specified using a character stream, when the input source is specified |
| * using a byte stream with an explicit encoding, or when a recognizer |
| * scans the encoding decl from the byte stream and chooses to use this |
| * reader class for that encoding. For the latter two cases, the byte |
| * stream is wrapped in the appropriate InputStreamReader using the |
| * desired encoding. |
| * |
| * @version |
| */ |
| final class CharReader extends AbstractCharReader { |
| // |
| // |
| // |
| CharReader(XMLEntityHandler entityHandler, XMLErrorReporter errorReporter, boolean sendCharDataAsCharArray, Reader reader, StringPool stringPool) throws Exception { |
| super(entityHandler, errorReporter, sendCharDataAsCharArray, stringPool); |
| fCharacterStream = reader; |
| fillCurrentChunk(); |
| } |
| // |
| // |
| // |
| private Reader fCharacterStream = null; |
| // |
| // When we fill a chunk there may be data that was read from the |
| // input stream that has not been "processed". We need to save |
| // that data, and any in-progress state, between the calls to |
| // fillCurrentChunk() in these instance variables. |
| // |
| private boolean fCheckOverflow = false; |
| private char[] fOverflow = null; |
| private int fOverflowOffset = 0; |
| private int fOverflowEnd = 0; |
| private int fOutputOffset = 0; |
| private boolean fSkipLinefeed = false; |
| // |
| // |
| // |
| protected int fillCurrentChunk() throws Exception { |
| // |
| // See if we can find a way to reuse the buffer that may have been returned |
| // with a recyled data chunk. |
| // |
| char[] recycledData = fCurrentChunk.toCharArray(); |
| // |
| // If we have overflow from the last call, normalize from where |
| // we left off, copying into the front of the output buffer. |
| // |
| fOutputOffset = 0; |
| if (fCheckOverflow) { |
| // |
| // The fOverflowEnd should always be equal to CHUNK_SIZE, unless we hit |
| // EOF during the previous call. Copy the remaining data to the front |
| // of the buffer and return it as the final chunk. |
| // |
| fMostRecentData = recycledData; |
| if (fOverflowEnd < CharDataChunk.CHUNK_SIZE) { |
| recycledData = null; |
| if (fOverflowEnd > 0) { |
| if (fMostRecentData == null || fMostRecentData.length < 1 + fOverflowEnd - fOverflowOffset) |
| fMostRecentData = new char[1 + fOverflowEnd - fOverflowOffset]; |
| copyNormalize(fOverflow, fOverflowOffset, fMostRecentData, fOutputOffset); |
| } else { |
| if (fMostRecentData == null) |
| fMostRecentData = new char[1]; |
| } |
| fMostRecentData[fOutputOffset] = 0; |
| // |
| // Update our instance variables |
| // |
| fOverflow = null; |
| fLength += fOutputOffset; |
| fCurrentIndex = 0; |
| fCurrentChunk.setCharArray(fMostRecentData); |
| return (fMostRecentChar = fMostRecentData[0]); |
| } |
| if (fMostRecentData == null || fMostRecentData.length < CharDataChunk.CHUNK_SIZE) |
| fMostRecentData = new char[CharDataChunk.CHUNK_SIZE]; |
| else |
| recycledData = null; |
| copyNormalize(fOverflow, fOverflowOffset, fMostRecentData, fOutputOffset); |
| fCheckOverflow = false; |
| } else { |
| if (fOverflow == null) { |
| fOverflow = recycledData; |
| if (fOverflow == null || fOverflow.length < CharDataChunk.CHUNK_SIZE) |
| fOverflow = new char[CharDataChunk.CHUNK_SIZE]; |
| else |
| recycledData = null; |
| } |
| fMostRecentData = null; |
| } |
| while (true) { |
| fOverflowOffset = 0; |
| fOverflowEnd = 0; |
| int capacity = CharDataChunk.CHUNK_SIZE; |
| int result = 0; |
| do { |
| try { |
| result = fCharacterStream.read(fOverflow, fOverflowEnd, capacity); |
| } catch (java.io.IOException ex) { |
| result = -1; |
| } |
| if (result == -1) { |
| // |
| // We have reached the end of the stream. |
| // |
| fCharacterStream.close(); |
| fCharacterStream = null; |
| if (fMostRecentData == null) { |
| // |
| // There is no previous output data, so we know that all of the |
| // new input data will fit. |
| // |
| fMostRecentData = recycledData; |
| if (fMostRecentData == null || fMostRecentData.length < 1 + fOverflowEnd) |
| fMostRecentData = new char[1 + fOverflowEnd]; |
| else |
| recycledData = null; |
| copyNormalize(fOverflow, fOverflowOffset, fMostRecentData, fOutputOffset); |
| fOverflow = null; |
| fMostRecentData[fOutputOffset] = 0; |
| } else { |
| // |
| // Copy the input data to the end of the output buffer. |
| // |
| boolean alldone = copyNormalize(fOverflow, fOverflowOffset, fMostRecentData, fOutputOffset); |
| if (alldone) { |
| if (fOverflowEnd == CharDataChunk.CHUNK_SIZE) { |
| // |
| // Special case - everything fit into the overflow buffer, |
| // except that there is no room for the nul char we use to |
| // indicate EOF. Set the overflow buffer length to zero. |
| // On the next call to this method, we will detect this |
| // case and which we will handle above . |
| // |
| fCheckOverflow = true; |
| fOverflowOffset = 0; |
| fOverflowEnd = 0; |
| } else { |
| // |
| // It all fit into the output buffer. |
| // |
| fOverflow = null; |
| fMostRecentData[fOutputOffset] = 0; |
| } |
| } else { |
| // |
| // There is still input data left over, save the remaining data as |
| // the overflow buffer for the next call. |
| // |
| fCheckOverflow = true; |
| } |
| } |
| break; |
| } |
| if (result > 0) { |
| fOverflowEnd += result; |
| capacity -= result; |
| } |
| } while (capacity > 0); |
| // |
| // |
| // |
| if (result == -1) |
| break; |
| if (fMostRecentData != null) { |
| boolean alldone = copyNormalize(fOverflow, fOverflowOffset, fMostRecentData, fOutputOffset); |
| if (fOutputOffset == CharDataChunk.CHUNK_SIZE) { |
| // |
| // We filled the output buffer. |
| // |
| if (!alldone) { |
| // |
| // The input buffer will become the next overflow buffer. |
| // |
| fCheckOverflow = true; |
| } |
| break; |
| } |
| } else { |
| // |
| // Now normalize the end-of-line characters and see if we need to read more |
| // chars to fill up the buffer. |
| // |
| fMostRecentData = recycledData; |
| if (fMostRecentData == null || fMostRecentData.length < CharDataChunk.CHUNK_SIZE) |
| fMostRecentData = new char[CharDataChunk.CHUNK_SIZE]; |
| else |
| recycledData = null; |
| copyNormalize(fOverflow, fOverflowOffset, fMostRecentData, fOutputOffset); |
| if (fOutputOffset == CharDataChunk.CHUNK_SIZE) { |
| // |
| // The output buffer is full. We can return now. |
| // |
| break; |
| } |
| } |
| // |
| // We will need to get another intput buffer to be able to fill the |
| // overflow buffer completely. |
| // |
| } |
| // |
| // Update our instance variables |
| // |
| fLength += fOutputOffset; |
| fCurrentIndex = 0; |
| fCurrentChunk.setCharArray(fMostRecentData); |
| return (fMostRecentChar = fMostRecentData[0]); |
| } |
| // |
| // Copy and normalize chars from the overflow buffer into chars in our data buffer. |
| // |
| private boolean copyNormalize(char[] in, int inOffset, char[] out, int outOffset) throws Exception { |
| // |
| // Handle all edge cases before dropping into the inner loop. |
| // |
| int inEnd = fOverflowEnd; |
| int outEnd = out.length; |
| if (inOffset == inEnd) |
| return true; |
| char b = in[inOffset]; |
| if (fSkipLinefeed) { |
| fSkipLinefeed = false; |
| if (b == 0x0A) { |
| if (++inOffset == inEnd) |
| return exitNormalize(inOffset, outOffset, true); |
| b = in[inOffset]; |
| } |
| } |
| while (outOffset < outEnd) { |
| // |
| // Find the longest run that we can guarantee will not exceed the |
| // bounds of the outer loop. |
| // |
| int inCount = inEnd - inOffset; |
| int outCount = outEnd - outOffset; |
| if (inCount > outCount) |
| inCount = outCount; |
| inOffset++; |
| while (true) { |
| while (b == 0x0D) { |
| out[outOffset++] = 0x0A; |
| if (inOffset == inEnd) { |
| fSkipLinefeed = true; |
| return exitNormalize(inOffset, outOffset, true); |
| } |
| b = in[inOffset]; |
| if (b == 0x0A) { |
| if (++inOffset == inEnd) |
| return exitNormalize(inOffset, outOffset, true); |
| b = in[inOffset]; |
| } |
| if (outOffset == outEnd) |
| return exitNormalize(inOffset, outOffset, false); |
| inCount = inEnd - inOffset; |
| outCount = outEnd - outOffset; |
| if (inCount > outCount) |
| inCount = outCount; |
| inOffset++; |
| } |
| while (true) { |
| out[outOffset++] = b; |
| if (--inCount == 0) |
| break; |
| b = in[inOffset++]; |
| if (b == 0x0D) |
| break; |
| } |
| if (inCount == 0) |
| break; |
| } |
| if (inOffset == inEnd) |
| break; |
| } |
| return exitNormalize(inOffset, outOffset, inOffset == inEnd); |
| } |
| // |
| // |
| // |
| private boolean exitNormalize(int inOffset, int outOffset, boolean result) { |
| fOverflowOffset = inOffset; |
| fOutputOffset = outOffset; |
| return result; |
| } |
| } |