| /* |
| * The Apache Software License, Version 1.1 |
| * |
| * |
| * Copyright (c) 1999,2000 The Apache Software Foundation. All rights |
| * reserved. |
| * |
| * Redistribution and use in source and binary forms, with or without |
| * modification, are permitted provided that the following conditions |
| * are met: |
| * |
| * 1. Redistributions of source code must retain the above copyright |
| * notice, this list of conditions and the following disclaimer. |
| * |
| * 2. Redistributions in binary form must reproduce the above copyright |
| * notice, this list of conditions and the following disclaimer in |
| * the documentation and/or other materials provided with the |
| * distribution. |
| * |
| * 3. The end-user documentation included with the redistribution, |
| * if any, must include the following acknowledgment: |
| * "This product includes software developed by the |
| * Apache Software Foundation (http://www.apache.org/)." |
| * Alternately, this acknowledgment may appear in the software itself, |
| * if and wherever such third-party acknowledgments normally appear. |
| * |
| * 4. The names "Xerces" and "Apache Software Foundation" must |
| * not be used to endorse or promote products derived from this |
| * software without prior written permission. For written |
| * permission, please contact apache@apache.org. |
| * |
| * 5. Products derived from this software may not be called "Apache", |
| * nor may "Apache" appear in their name, without prior written |
| * permission of the Apache Software Foundation. |
| * |
| * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED |
| * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES |
| * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE |
| * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR |
| * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
| * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
| * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF |
| * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND |
| * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, |
| * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT |
| * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
| * SUCH DAMAGE. |
| * ==================================================================== |
| * |
| * This software consists of voluntary contributions made by many |
| * individuals on behalf of the Apache Software Foundation and was |
| * originally based on software copyright (c) 1999, International |
| * Business Machines, Inc., http://www.apache.org. For more |
| * information on the Apache Software Foundation, please see |
| * <http://www.apache.org/>. |
| */ |
| |
| package org.apache.xerces.readers; |
| |
| import org.apache.xerces.framework.XMLErrorReporter; |
| import org.apache.xerces.utils.ChunkyByteArray; |
| import org.apache.xerces.utils.ChunkyCharArray; |
| import org.apache.xerces.utils.QName; |
| import org.apache.xerces.utils.StringHasher; |
| import org.apache.xerces.utils.StringPool; |
| import org.apache.xerces.utils.XMLCharacterProperties; |
| import java.io.IOException; |
| |
| /** |
| * Reader for UCS-2 and UCS-4 encodings. |
| * <p> |
| * This reader is created by the UCSRecognizer class when it decides that the |
| * byte stream is encoded in a format supported by this class. This class |
| * was intended to be another example of an encoding sensitive reader that |
| * could take advantage of the system design to improve performance and reduce |
| * resource consumption, but the actual performance tuning remains to be done. |
| * |
| * @version $Id$ |
| */ |
| final class UCSReader extends XMLEntityReader implements StringPool.StringProducer { |
| |
| // |
| // Constants |
| // |
| |
| // debugging |
| |
| /** Set to true to debug UTF-16, big-endian. */ |
| private static final boolean DEBUG_UTF16_BIG = false; |
| |
| // |
| // Scanner encoding enumeration |
| // |
| static final int |
| E_UCS4B = 0, // UCS-4 big endian |
| E_UCS4L = 1, // UCS-4 little endian |
| E_UCS2B = 2, // UCS-2 big endian with byte order mark |
| E_UCS2L = 3, // UCS-2 little endian with byte order mark |
| E_UCS2B_NOBOM = 4, // UCS-2 big endian without byte order mark |
| E_UCS2L_NOBOM = 5; // UCS-2 little endian without byte order mark |
| // |
| // |
| // |
| private ChunkyByteArray fData = null; |
| private int fEncoding = -1; |
| private StringPool fStringPool = null; |
| private int fBytesPerChar = -1; |
| private boolean fBigEndian = true; |
| private ChunkyCharArray fStringCharArray = null; |
| private boolean fCalledCharPropInit = false; |
| // |
| // |
| // |
| UCSReader(XMLEntityHandler entityHandler, XMLErrorReporter errorReporter, boolean sendCharDataAsCharArray, ChunkyByteArray data, int encoding, StringPool stringPool) throws Exception { |
| super(entityHandler, errorReporter, sendCharDataAsCharArray); |
| fCurrentOffset = (encoding == E_UCS2B || encoding == E_UCS2L) ? 2 : 0; |
| fData = data; |
| fEncoding = encoding; |
| fStringPool = stringPool; |
| fBytesPerChar = (fEncoding == E_UCS4B || fEncoding == E_UCS4L) ? 4 : 2; |
| fBigEndian = fEncoding == E_UCS4B || fEncoding == E_UCS2B || fEncoding == E_UCS2B_NOBOM; |
| } |
| // |
| // |
| // |
| private int getChar(int offset) throws IOException { |
| int b0 = fData.byteAt(offset++) & 0xff; |
| if (b0 == 0xff && fData.atEOF(offset)) |
| return -1; |
| int b1 = fData.byteAt(offset++) & 0xff; |
| if (fBytesPerChar == 4) { |
| int b2 = fData.byteAt(offset++) & 0xff; |
| int b3 = fData.byteAt(offset++) & 0xff; |
| if (fBigEndian) |
| return (b0<<24)+(b1<<16)+(b2<<8)+b3; |
| else |
| return (b3<<24)+(b2<<16)+(b1<<8)+b0; |
| } else { |
| if (fBigEndian) |
| return (b0<<8)+b1; |
| else |
| return (b1<<8)+b0; |
| } |
| } |
| /** |
| * |
| */ |
| public int addString(int offset, int length) { |
| if (length == 0) |
| return 0; |
| return fStringPool.addString(this, offset, length); |
| } |
| /** |
| * |
| */ |
| public int addSymbol(int offset, int length) { |
| if (length == 0) |
| return 0; |
| return fStringPool.addSymbol(this, offset, length, getHashcode(offset, length)); |
| } |
| // |
| // |
| // |
| public void append(XMLEntityHandler.CharBuffer charBuffer, int offset, int length) { |
| int endOffset = offset + length; |
| while (offset < endOffset) { |
| int ch; |
| try { |
| ch = getChar(offset); |
| } |
| catch (IOException ex) { |
| ch = 0; // REVISIT |
| } |
| charBuffer.append((char)ch); |
| offset += fBytesPerChar; |
| } |
| } |
| // |
| // |
| // |
| public void releaseString(int offset, int length) { |
| // nothing to do... |
| } |
| // |
| // |
| // |
| public String toString(int offset, int length) { |
| // |
| // REVISIT - we need to cache this operation !! |
| // |
| if (fStringCharArray == null) |
| fStringCharArray = new ChunkyCharArray(fStringPool); |
| int newOffset = fStringCharArray.length(); |
| append(fStringCharArray, offset, length); |
| int newLength = fStringCharArray.length() - newOffset; |
| int stringIndex = fStringCharArray.addString(newOffset, newLength); |
| return fStringPool.toString(stringIndex); |
| } |
| // |
| // |
| // |
| private int getHashcode(int offset, int length) { |
| int endOffset = offset + length; |
| int hashcode = 0; |
| while (offset < endOffset) { |
| int ch; |
| try { |
| ch = getChar(offset); |
| } |
| catch (IOException ex) { |
| ch = 0; // REVISIT |
| } |
| hashcode = StringHasher.hashChar(hashcode, ch); |
| offset += fBytesPerChar; |
| } |
| return StringHasher.finishHash(hashcode); |
| } |
| // |
| public boolean equalsString(int offset, int length, char[] strChars, int strOffset, int strLength) { |
| int endOffset = offset + length; |
| int slen = strLength; |
| while (offset < endOffset) { |
| if (slen-- == 0) |
| return false; |
| int ch; |
| try { |
| ch = getChar(offset); |
| } |
| catch (IOException ex) { |
| ch = 0; // REVISIT |
| } |
| if (ch != strChars[strOffset++]) |
| return false; |
| offset += fBytesPerChar; |
| } |
| return slen == 0; |
| } |
| // |
| // |
| // |
| private static char[] fCharacters = new char[256]; |
| private int fCharDataLength = 0; |
| private void appendCharData(int ch) { |
| if (fCharacters.length == fCharDataLength) { |
| char[] newchars = new char[fCharacters.length * 2]; |
| System.arraycopy(fCharacters, 0, newchars, 0, fCharacters.length); |
| fCharacters = newchars; |
| } |
| fCharacters[fCharDataLength++] = (char)ch; |
| } |
| public void callCharDataHandler(int offset, int length, boolean isWhitespace) throws Exception { |
| int endOffset = offset + length; |
| boolean skiplf = false; |
| while (offset < endOffset) { |
| int ch = getChar(offset); |
| // fix for Bug23: Element Data not normalized... |
| if (skiplf) { |
| skiplf = false; |
| if (ch == 0x0A) { |
| offset += fBytesPerChar; |
| continue; |
| } |
| } |
| if (ch == 0x0D) { |
| skiplf = true; |
| ch = 0x0A; |
| } |
| appendCharData(ch); |
| offset += fBytesPerChar; |
| } |
| if (fSendCharDataAsCharArray) { |
| if (isWhitespace) |
| fCharDataHandler.processWhitespace(fCharacters, 0, fCharDataLength); |
| else |
| fCharDataHandler.processCharacters(fCharacters, 0, fCharDataLength); |
| } else { |
| int stringIndex = fStringPool.addString(new String(fCharacters, 0, fCharDataLength)); |
| if (isWhitespace) |
| fCharDataHandler.processWhitespace(stringIndex); |
| else |
| fCharDataHandler.processCharacters(stringIndex); |
| } |
| fCharDataLength = 0; |
| } |
| // |
| // |
| // |
| public boolean lookingAtChar(char ch, boolean skipPastChar) throws Exception { |
| int ch2 = getChar(fCurrentOffset); |
| if (ch2 == ch) { |
| if (skipPastChar) { |
| fCharacterCounter++; |
| fCurrentOffset += fBytesPerChar; |
| } |
| return true; |
| } |
| return false; |
| } |
| // |
| // |
| // |
| public boolean lookingAtValidChar(boolean skipPastChar) throws Exception { |
| int ch = getChar(fCurrentOffset); |
| if (ch < 0x20) { |
| if (ch == 0x09) { |
| if (!skipPastChar) |
| return true; |
| fCharacterCounter++; |
| } else if (ch == 0x0A) { |
| if (!skipPastChar) |
| return true; |
| fLinefeedCounter++; |
| fCharacterCounter = 1; |
| } else if (ch == 0x0D) { |
| if (!skipPastChar) |
| return true; |
| fCarriageReturnCounter++; |
| fCharacterCounter = 1; |
| } else { |
| if (ch == -1) { |
| return changeReaders().lookingAtValidChar(skipPastChar); |
| } |
| return false; |
| } |
| fCurrentOffset += fBytesPerChar; |
| return true; |
| } |
| if (ch <= 0xD7FF) { |
| if (skipPastChar) { |
| fCharacterCounter++; |
| fCurrentOffset += fBytesPerChar; |
| } |
| return true; |
| } |
| if (ch <= 0xDFFF) { |
| // REVISIT - check that the surrogate pair is valid |
| if (skipPastChar) { |
| fCharacterCounter++; |
| fCurrentOffset += fBytesPerChar; |
| } |
| return true; |
| } |
| if (ch <= 0xFFFD) { |
| if (skipPastChar) { |
| fCharacterCounter++; |
| fCurrentOffset += fBytesPerChar; |
| } |
| return true; |
| } |
| return false; |
| } |
| // |
| // |
| // |
| public boolean lookingAtSpace(boolean skipPastChar) throws Exception { |
| int ch = getChar(fCurrentOffset); |
| if (ch > 0x20) |
| return false; |
| if (ch == 0x20 || ch == 0x09) { |
| if (!skipPastChar) |
| return true; |
| fCharacterCounter++; |
| } else if (ch == 0x0A) { |
| if (!skipPastChar) |
| return true; |
| fLinefeedCounter++; |
| fCharacterCounter = 1; |
| } else if (ch == 0x0D) { |
| if (!skipPastChar) |
| return true; |
| fCarriageReturnCounter++; |
| fCharacterCounter = 1; |
| } else { |
| if (ch == -1) { // REVISIT - should we be checking this here ? |
| return changeReaders().lookingAtSpace(skipPastChar); |
| } |
| return false; |
| } |
| fCurrentOffset += fBytesPerChar; |
| return true; |
| } |
| // |
| // |
| // |
| public void skipToChar(char chr) throws Exception { |
| while (true) { |
| int ch = getChar(fCurrentOffset); |
| if (ch == chr) |
| return; |
| if (ch == -1) { |
| changeReaders().skipToChar(chr); |
| return; |
| } |
| if (ch == 0x0A) { |
| fLinefeedCounter++; |
| fCharacterCounter = 1; |
| } else if (ch == 0x0D) { |
| fCarriageReturnCounter++; |
| fCharacterCounter = 1; |
| } else if (ch >= 0xD800 && ch < 0xDC00) { |
| fCharacterCounter++; |
| fCurrentOffset += fBytesPerChar; |
| ch = getChar(fCurrentOffset); |
| if (ch < 0xDC00 || ch >= 0xE000) |
| continue; |
| } else |
| fCharacterCounter++; |
| fCurrentOffset += fBytesPerChar; |
| } |
| } |
| // |
| // |
| // |
| public void skipPastSpaces() throws Exception { |
| while (true) { |
| int ch = getChar(fCurrentOffset); |
| if (ch > 0x20) |
| return; |
| if (ch == 0x20 || ch == 0x09) { |
| fCharacterCounter++; |
| } else if (ch == 0x0A) { |
| fLinefeedCounter++; |
| fCharacterCounter = 1; |
| } else if (ch == 0x0D) { |
| fCarriageReturnCounter++; |
| fCharacterCounter = 1; |
| } else { |
| if (ch == -1) |
| changeReaders().skipPastSpaces(); |
| return; |
| } |
| fCurrentOffset += fBytesPerChar; |
| } |
| } |
| // |
| // |
| // |
| public void skipPastName(char fastcheck) throws Exception { |
| int ch = getChar(fCurrentOffset); |
| if (!fCalledCharPropInit) { |
| XMLCharacterProperties.initCharFlags(); |
| fCalledCharPropInit = true; |
| } |
| if ((XMLCharacterProperties.fgCharFlags[ch] & XMLCharacterProperties.E_InitialNameCharFlag) == 0) |
| return; |
| while (true) { |
| fCurrentOffset += fBytesPerChar; |
| fCharacterCounter++; |
| ch = getChar(fCurrentOffset); |
| if (fastcheck == ch) |
| return; |
| if ((XMLCharacterProperties.fgCharFlags[ch] & XMLCharacterProperties.E_NameCharFlag) == 0) |
| return; |
| } |
| } |
| // |
| // |
| // |
| public void skipPastNmtoken(char fastcheck) throws Exception { |
| int ch = getChar(fCurrentOffset); |
| if (!fCalledCharPropInit) { |
| XMLCharacterProperties.initCharFlags(); |
| fCalledCharPropInit = true; |
| } |
| while (true) { |
| if (fastcheck == ch) |
| return; |
| if ((XMLCharacterProperties.fgCharFlags[ch] & XMLCharacterProperties.E_NameCharFlag) == 0) |
| return; |
| fCurrentOffset += fBytesPerChar; |
| fCharacterCounter++; |
| ch = getChar(fCurrentOffset); |
| } |
| } |
| // |
| // |
| // |
| public boolean skippedString(char[] s) throws Exception { |
| int offset = fCurrentOffset; |
| for (int i = 0; i < s.length; i++) { |
| if (getChar(offset) != s[i]) |
| return false; |
| offset += fBytesPerChar; |
| } |
| fCurrentOffset = offset; |
| fCharacterCounter += s.length; |
| return true; |
| } |
| // |
| // |
| // |
| public int scanInvalidChar() throws Exception { |
| int ch = getChar(fCurrentOffset); |
| if (ch == -1) { |
| return changeReaders().scanInvalidChar(); |
| } |
| fCurrentOffset += fBytesPerChar; |
| if (ch == 0x0A) { |
| fLinefeedCounter++; |
| fCharacterCounter = 1; |
| } else if (ch == 0x0D) { |
| fCarriageReturnCounter++; |
| fCharacterCounter = 1; |
| } else { |
| fCharacterCounter++; |
| if (ch >= 0xD800 && ch < 0xDC00) { |
| int ch2 = getChar(fCurrentOffset); |
| if (ch2 >= 0xDC00 && ch2 < 0xE000) { |
| ch = ((ch-0xD800)<<10)+(ch2-0xDC00)+0x10000; |
| fCurrentOffset += fBytesPerChar; |
| } |
| } |
| } |
| return ch; |
| } |
| // |
| // |
| // |
| public int scanCharRef(boolean hex) throws Exception { |
| int ch = getChar(fCurrentOffset); |
| if (ch == -1) { |
| return changeReaders().scanCharRef(hex); |
| } |
| int num = 0; |
| if (hex) { |
| if (ch > 'f' || XMLCharacterProperties.fgAsciiXDigitChar[ch] == 0) |
| return XMLEntityHandler.CHARREF_RESULT_INVALID_CHAR; |
| num = ch - (ch < 'A' ? '0' : (ch < 'a' ? 'A' : 'a') - 10); |
| } else { |
| if (ch < '0' || ch > '9') |
| return XMLEntityHandler.CHARREF_RESULT_INVALID_CHAR; |
| num = ch - '0'; |
| } |
| fCharacterCounter++; |
| fCurrentOffset += fBytesPerChar; |
| boolean toobig = false; |
| while (true) { |
| ch = getChar(fCurrentOffset); |
| if (ch == -1) |
| break; |
| if (hex) { |
| if (ch > 'f' || XMLCharacterProperties.fgAsciiXDigitChar[ch] == 0) |
| break; |
| } else { |
| if (ch < '0' || ch > '9') |
| break; |
| } |
| fCharacterCounter++; |
| fCurrentOffset += fBytesPerChar; |
| if (hex) { |
| int dig = ch - (ch < 'A' ? '0' : (ch < 'a' ? 'A' : 'a') - 10); |
| num = (num << 4) + dig; |
| } else { |
| int dig = ch - '0'; |
| num = (num * 10) + dig; |
| } |
| if (num > 0x10FFFF) { |
| toobig = true; |
| num = 0; |
| } |
| } |
| if (ch != ';') |
| return XMLEntityHandler.CHARREF_RESULT_SEMICOLON_REQUIRED; |
| fCharacterCounter++; |
| fCurrentOffset += fBytesPerChar; |
| if (toobig) |
| return XMLEntityHandler.CHARREF_RESULT_OUT_OF_RANGE; |
| return num; |
| } |
| // |
| // |
| // |
| public int scanStringLiteral() throws Exception { |
| boolean single; |
| if (!(single = lookingAtChar('\'', true)) && !lookingAtChar('\"', true)) { |
| return XMLEntityHandler.STRINGLIT_RESULT_QUOTE_REQUIRED; |
| } |
| int offset = fCurrentOffset; |
| char qchar = single ? '\'' : '\"'; |
| while (!lookingAtChar(qchar, false)) { |
| if (!lookingAtValidChar(true)) { |
| return XMLEntityHandler.STRINGLIT_RESULT_INVALID_CHAR; |
| } |
| } |
| int stringIndex = addString(offset, fCurrentOffset - offset); |
| lookingAtChar(qchar, true); // move past qchar |
| return stringIndex; |
| } |
| // |
| // [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
| // | "'" ([^<&'] | Reference)* "'" |
| // |
| public int scanAttValue(char qchar, boolean asSymbol) throws Exception |
| { |
| int offset = fCurrentOffset; |
| while (true) { |
| if (lookingAtChar(qchar, false)) { |
| break; |
| } |
| if (lookingAtChar(' ', true)) { |
| continue; |
| } |
| if (lookingAtSpace(false)) { |
| return XMLEntityHandler.ATTVALUE_RESULT_COMPLEX; |
| } |
| if (lookingAtChar('&', false)) { |
| return XMLEntityHandler.ATTVALUE_RESULT_COMPLEX; |
| } |
| if (lookingAtChar('<', false)) { |
| return XMLEntityHandler.ATTVALUE_RESULT_LESSTHAN; |
| } |
| if (!lookingAtValidChar(true)) { |
| return XMLEntityHandler.ATTVALUE_RESULT_INVALID_CHAR; |
| } |
| } |
| int result = asSymbol ? addSymbol(offset, fCurrentOffset - offset) : addString(offset, fCurrentOffset - offset); |
| lookingAtChar(qchar, true); |
| return result; |
| } |
| // |
| // [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
| // | "'" ([^%&'] | PEReference | Reference)* "'" |
| // |
| public int scanEntityValue(int qchar, boolean createString) throws Exception |
| { |
| int offset = fCurrentOffset; |
| while (true) { |
| if (qchar != -1 && lookingAtChar((char)qchar, false)) { |
| if (!createString) |
| return XMLEntityHandler.ENTITYVALUE_RESULT_FINISHED; |
| break; |
| } |
| if (lookingAtChar('&', false)) { |
| return XMLEntityHandler.ENTITYVALUE_RESULT_REFERENCE; |
| } |
| if (lookingAtChar('%', false)) { |
| return XMLEntityHandler.ENTITYVALUE_RESULT_PEREF; |
| } |
| if (!lookingAtValidChar(true)) { |
| return XMLEntityHandler.ENTITYVALUE_RESULT_INVALID_CHAR; |
| } |
| } |
| int result = addString(offset, fCurrentOffset - offset); |
| lookingAtChar((char)qchar, true); |
| return result; |
| } |
| // |
| // |
| // |
| public boolean scanExpectedName(char fastcheck, StringPool.CharArrayRange expectedName) throws Exception { |
| int nameOffset = fCurrentOffset; |
| skipPastName(fastcheck); |
| int nameLength = fCurrentOffset - nameOffset; |
| if (nameLength == 0) |
| return false; |
| int nameIndex = addSymbol(nameOffset, nameLength); |
| // DEFECT !! check name against expected name |
| return true; |
| } |
| |
| public void scanQName(char fastcheck, QName qname) throws Exception { |
| |
| // DEFECT !! no code // Defect #126 |
| int nameOffset = fCurrentOffset; |
| skipPastName(fastcheck); |
| int nameLength = fCurrentOffset - nameOffset; |
| if (nameLength == 0) { |
| qname.clear(); |
| return; |
| } |
| qname.prefix = -1; |
| qname.localpart = -1; |
| qname.rawname = addSymbol(nameOffset, nameLength); |
| qname.uri = -1; |
| |
| } // scanQName(char,QName) |
| |
| public int scanName(char fastcheck) throws Exception { |
| int nameOffset = fCurrentOffset; |
| skipPastName(fastcheck); |
| int nameLength = fCurrentOffset - nameOffset; |
| if (nameLength == 0) |
| return -1; |
| int nameIndex = addSymbol(nameOffset, nameLength); |
| return nameIndex; |
| } |
| // |
| // |
| // |
| private static final char[] cdata_string = { 'C','D','A','T','A','[' }; |
| private int recognizeMarkup() throws Exception { |
| int ch = getChar(fCurrentOffset); |
| switch (ch) { |
| case -1: |
| return XMLEntityHandler.CONTENT_RESULT_MARKUP_END_OF_INPUT; |
| case '?': |
| fCharacterCounter++; |
| fCurrentOffset += fBytesPerChar; |
| return XMLEntityHandler.CONTENT_RESULT_START_OF_PI; |
| case '!': |
| fCharacterCounter++; |
| fCurrentOffset += fBytesPerChar; |
| ch = getChar(fCurrentOffset); |
| if (ch == -1) { |
| fCharacterCounter--; |
| fCurrentOffset -= fBytesPerChar;; |
| return XMLEntityHandler.CONTENT_RESULT_MARKUP_END_OF_INPUT; |
| } |
| if (ch == '-') { |
| fCharacterCounter++; |
| fCurrentOffset += fBytesPerChar; |
| ch = getChar(fCurrentOffset); |
| if (ch == -1) { |
| fCharacterCounter -= 2; |
| fCurrentOffset -= 2; |
| return XMLEntityHandler.CONTENT_RESULT_MARKUP_END_OF_INPUT; |
| } |
| if (ch == '-') { |
| fCharacterCounter++; |
| fCurrentOffset += fBytesPerChar; |
| return XMLEntityHandler.CONTENT_RESULT_START_OF_COMMENT; |
| } |
| break; |
| } |
| if (ch == '[') { |
| fCharacterCounter++; |
| fCurrentOffset += fBytesPerChar; |
| for (int i = 0; i < 6; i++) { |
| ch = getChar(fCurrentOffset); |
| if (ch == -1) { |
| fCharacterCounter -= (2 + i); |
| fCurrentOffset -= ((2 + i) * fBytesPerChar); |
| return XMLEntityHandler.CONTENT_RESULT_MARKUP_END_OF_INPUT; |
| } |
| if (ch != cdata_string[i]) { |
| return XMLEntityHandler.CONTENT_RESULT_MARKUP_NOT_RECOGNIZED; |
| } |
| fCharacterCounter++; |
| fCurrentOffset += fBytesPerChar; |
| } |
| return XMLEntityHandler.CONTENT_RESULT_START_OF_CDSECT; |
| } |
| break; |
| case '/': |
| fCharacterCounter++; |
| fCurrentOffset += fBytesPerChar; |
| return XMLEntityHandler.CONTENT_RESULT_START_OF_ETAG; |
| default: |
| return XMLEntityHandler.CONTENT_RESULT_START_OF_ELEMENT; |
| } |
| return XMLEntityHandler.CONTENT_RESULT_MARKUP_NOT_RECOGNIZED; |
| } |
| private int recognizeReference() throws Exception { |
| int ch = getChar(fCurrentOffset); |
| if (ch == -1) { |
| return XMLEntityHandler.CONTENT_RESULT_REFERENCE_END_OF_INPUT; |
| } |
| // |
| // [67] Reference ::= EntityRef | CharRef |
| // [68] EntityRef ::= '&' Name ';' |
| // [66] CharRef ::= '&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';' |
| // |
| if (ch == '#') { |
| fCharacterCounter++; |
| fCurrentOffset += fBytesPerChar; |
| return XMLEntityHandler.CONTENT_RESULT_START_OF_CHARREF; |
| } else { |
| return XMLEntityHandler.CONTENT_RESULT_START_OF_ENTITYREF; |
| } |
| } |
| public int scanContent(QName element) throws Exception { |
| int offset = fCurrentOffset; |
| int ch = getChar(fCurrentOffset); |
| fCurrentOffset += fBytesPerChar; |
| byte prop; |
| if (!fCalledCharPropInit) { |
| XMLCharacterProperties.initCharFlags(); |
| fCalledCharPropInit = true; |
| } |
| if (ch < 0x80) { |
| if (ch == -1) { |
| fCurrentOffset -= fBytesPerChar; |
| return changeReaders().scanContent(element); // REVISIT - not quite... |
| } |
| prop = XMLCharacterProperties.fgCharFlags[ch]; |
| if ((prop & XMLCharacterProperties.E_CharDataFlag) == 0 && ch != 0x0A && ch != 0x0D) { |
| if (ch == '<') { |
| fCharacterCounter++; |
| if (!fInCDSect) { |
| return recognizeMarkup(); |
| } |
| } else if (ch == '&') { |
| fCharacterCounter++; |
| if (!fInCDSect) { |
| return recognizeReference(); |
| } |
| } else if (ch == ']') { |
| if (getChar(fCurrentOffset) == ']' && getChar(fCurrentOffset + fBytesPerChar) == '>') { |
| fCharacterCounter += 3; |
| fCurrentOffset += (2 * fBytesPerChar); |
| return XMLEntityHandler.CONTENT_RESULT_END_OF_CDSECT; |
| } |
| } else { |
| fCurrentOffset -= fBytesPerChar; |
| return XMLEntityHandler.CONTENT_RESULT_INVALID_CHAR; |
| } |
| } else if (ch == 0x20 || ch == 0x09 || ch == 0x0A || ch == 0x0D) { |
| do { |
| if (ch == 0x0A) { |
| fLinefeedCounter++; |
| fCharacterCounter = 1; |
| } else if (ch == 0x0D) { |
| fCarriageReturnCounter++; |
| fCharacterCounter = 1; |
| } else { |
| fCharacterCounter++; |
| } |
| ch = getChar(fCurrentOffset); |
| fCurrentOffset += fBytesPerChar; |
| } while (ch == 0x20 || ch == 0x09 || ch == 0x0A || ch == 0x0D); |
| if (ch < 0x80) { |
| if (ch == -1) { |
| fCurrentOffset -= fBytesPerChar; |
| callCharDataHandler(offset, fCurrentOffset - offset, true); |
| return changeReaders().scanContent(element); // REVISIT - not quite... |
| } |
| prop = XMLCharacterProperties.fgCharFlags[ch]; |
| if ((prop & XMLCharacterProperties.E_CharDataFlag) == 0) { |
| if (ch == '<') { |
| if (!fInCDSect) { |
| callCharDataHandler(offset, (fCurrentOffset - fBytesPerChar) - offset, true); |
| fCharacterCounter++; |
| return recognizeMarkup(); |
| } |
| fCharacterCounter++; |
| } else if (ch == '&') { |
| if (!fInCDSect) { |
| callCharDataHandler(offset, (fCurrentOffset - fBytesPerChar) - offset, true); |
| fCharacterCounter++; |
| return recognizeReference(); |
| } |
| fCharacterCounter++; |
| } else if (ch == ']') { |
| if (getChar(fCurrentOffset) == ']' && getChar(fCurrentOffset + fBytesPerChar) == '>') { |
| callCharDataHandler(offset, (fCurrentOffset - fBytesPerChar) - offset, true); |
| fCharacterCounter += 3; |
| fCurrentOffset += (2 * fBytesPerChar); |
| return XMLEntityHandler.CONTENT_RESULT_END_OF_CDSECT; |
| } |
| } else { |
| fCurrentOffset -= fBytesPerChar; |
| callCharDataHandler(offset, fCurrentOffset - offset, true); |
| return XMLEntityHandler.CONTENT_RESULT_INVALID_CHAR; |
| } |
| } |
| } else { |
| if (ch >= 0xD800 && ch <= 0xDFFF) { |
| fCurrentOffset += fBytesPerChar; |
| } else if (ch == 0xFFFE || ch == 0xFFFF) { |
| fCurrentOffset -= fBytesPerChar; |
| callCharDataHandler(offset, fCurrentOffset - offset, true); |
| return XMLEntityHandler.CONTENT_RESULT_INVALID_CHAR; |
| } |
| } |
| } |
| } else { |
| if (ch >= 0xD800 && ch <= 0xDFFF) { |
| fCurrentOffset += fBytesPerChar; |
| } else if (ch == 0xFFFE || ch == 0xFFFF) { |
| fCurrentOffset -= fBytesPerChar; |
| return XMLEntityHandler.CONTENT_RESULT_INVALID_CHAR; |
| } |
| } |
| fCharacterCounter++; |
| while (true) { |
| ch = getChar(fCurrentOffset); |
| fCurrentOffset += fBytesPerChar; |
| if (ch >= 0x80 || ch < 0) |
| break; |
| prop = XMLCharacterProperties.fgCharFlags[ch]; |
| if ((prop & XMLCharacterProperties.E_CharDataFlag) == 0) { |
| if (ch == 0x0A) { |
| fLinefeedCounter++; |
| fCharacterCounter = 1; |
| } else if (ch == 0x0D) { |
| fCarriageReturnCounter++; |
| fCharacterCounter = 1; |
| } else |
| break; |
| } else |
| fCharacterCounter++; |
| } |
| while (true) { // REVISIT - EOF check ? |
| if (ch < 0x80) { |
| if (ch == -1) { |
| fCurrentOffset -= fBytesPerChar; |
| callCharDataHandler(offset, fCurrentOffset - offset, false); |
| return changeReaders().scanContent(element); // REVISIT - not quite... |
| } |
| prop = XMLCharacterProperties.fgCharFlags[ch]; |
| if ((prop & XMLCharacterProperties.E_CharDataFlag) == 0) { |
| if (ch == '<') { |
| if (!fInCDSect) { |
| callCharDataHandler(offset, (fCurrentOffset - fBytesPerChar) - offset, false); |
| fCharacterCounter++; |
| return recognizeMarkup(); |
| } |
| fCharacterCounter++; |
| } else if (ch == '&') { |
| if (!fInCDSect) { |
| callCharDataHandler(offset, (fCurrentOffset - fBytesPerChar) - offset, false); |
| fCharacterCounter++; |
| return recognizeReference(); |
| } |
| fCharacterCounter++; |
| } else if (ch == 0x0A) { |
| fLinefeedCounter++; |
| fCharacterCounter = 1; |
| } else if (ch == 0x0D) { |
| fCarriageReturnCounter++; |
| fCharacterCounter = 1; |
| } else if (ch == ']') { |
| if (getChar(fCurrentOffset) == ']' && getChar(fCurrentOffset + fBytesPerChar) == '>') { |
| callCharDataHandler(offset, (fCurrentOffset - fBytesPerChar) - offset, false); |
| fCharacterCounter += 3; |
| fCurrentOffset += (2 * fBytesPerChar); |
| return XMLEntityHandler.CONTENT_RESULT_END_OF_CDSECT; |
| } |
| fCharacterCounter++; |
| } else { |
| fCurrentOffset -= fBytesPerChar; |
| callCharDataHandler(offset, fCurrentOffset - offset, false); |
| return XMLEntityHandler.CONTENT_RESULT_INVALID_CHAR; |
| } |
| } else { |
| fCharacterCounter++; |
| } |
| } else { |
| if (ch >= 0xD800 && ch <= 0xDFFF) { |
| fCharacterCounter++; |
| fCurrentOffset += fBytesPerChar; |
| } else if (ch == 0xFFFE || ch == 0xFFFF) { |
| fCurrentOffset -= fBytesPerChar; |
| callCharDataHandler(offset, fCurrentOffset - offset, false); |
| return XMLEntityHandler.CONTENT_RESULT_INVALID_CHAR; |
| } |
| fCharacterCounter++; |
| } |
| ch = getChar(fCurrentOffset); |
| fCurrentOffset += fBytesPerChar; |
| } |
| } |
| } |