blob: 448fb8ad0608791980c953fe82b4e87d26f1077c [file] [log] [blame]
/*
* The Apache Software License, Version 1.1
*
*
* Copyright (c) 1999,2000 The Apache Software Foundation. All rights
* reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* 3. The end-user documentation included with the redistribution,
* if any, must include the following acknowledgment:
* "This product includes software developed by the
* Apache Software Foundation (http://www.apache.org/)."
* Alternately, this acknowledgment may appear in the software itself,
* if and wherever such third-party acknowledgments normally appear.
*
* 4. The names "Xerces" and "Apache Software Foundation" must
* not be used to endorse or promote products derived from this
* software without prior written permission. For written
* permission, please contact apache@apache.org.
*
* 5. Products derived from this software may not be called "Apache",
* nor may "Apache" appear in their name, without prior written
* permission of the Apache Software Foundation.
*
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
* ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
* USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
* ====================================================================
*
* This software consists of voluntary contributions made by many
* individuals on behalf of the Apache Software Foundation and was
* originally based on software copyright (c) 1999, International
* Business Machines, Inc., http://www.apache.org. For more
* information on the Apache Software Foundation, please see
* <http://www.apache.org/>.
*/
package org.apache.xerces.readers;
import org.apache.xerces.framework.XMLErrorReporter;
import org.apache.xerces.utils.ChunkyByteArray;
import org.apache.xerces.utils.ChunkyCharArray;
import org.apache.xerces.utils.QName;
import org.apache.xerces.utils.StringHasher;
import org.apache.xerces.utils.StringPool;
import org.apache.xerces.utils.XMLCharacterProperties;
import java.io.IOException;
/**
* Reader for UCS-2 and UCS-4 encodings.
* <p>
* This reader is created by the UCSRecognizer class when it decides that the
* byte stream is encoded in a format supported by this class. This class
* was intended to be another example of an encoding sensitive reader that
* could take advantage of the system design to improve performance and reduce
* resource consumption, but the actual performance tuning remains to be done.
*
* @version $Id$
*/
final class UCSReader extends XMLEntityReader implements StringPool.StringProducer {
//
// Constants
//
// debugging
/** Set to true to debug UTF-16, big-endian. */
private static final boolean DEBUG_UTF16_BIG = false;
//
// Scanner encoding enumeration
//
static final int
E_UCS4B = 0, // UCS-4 big endian
E_UCS4L = 1, // UCS-4 little endian
E_UCS2B = 2, // UCS-2 big endian with byte order mark
E_UCS2L = 3, // UCS-2 little endian with byte order mark
E_UCS2B_NOBOM = 4, // UCS-2 big endian without byte order mark
E_UCS2L_NOBOM = 5; // UCS-2 little endian without byte order mark
//
//
//
private ChunkyByteArray fData = null;
private int fEncoding = -1;
private StringPool fStringPool = null;
private int fBytesPerChar = -1;
private boolean fBigEndian = true;
private ChunkyCharArray fStringCharArray = null;
private boolean fCalledCharPropInit = false;
//
//
//
UCSReader(XMLEntityHandler entityHandler, XMLErrorReporter errorReporter, boolean sendCharDataAsCharArray, ChunkyByteArray data, int encoding, StringPool stringPool) throws Exception {
super(entityHandler, errorReporter, sendCharDataAsCharArray);
fCurrentOffset = (encoding == E_UCS2B || encoding == E_UCS2L) ? 2 : 0;
fData = data;
fEncoding = encoding;
fStringPool = stringPool;
fBytesPerChar = (fEncoding == E_UCS4B || fEncoding == E_UCS4L) ? 4 : 2;
fBigEndian = fEncoding == E_UCS4B || fEncoding == E_UCS2B || fEncoding == E_UCS2B_NOBOM;
}
//
//
//
private int getChar(int offset) throws IOException {
int b0 = fData.byteAt(offset++) & 0xff;
if (b0 == 0xff && fData.atEOF(offset))
return -1;
int b1 = fData.byteAt(offset++) & 0xff;
if (fBytesPerChar == 4) {
int b2 = fData.byteAt(offset++) & 0xff;
int b3 = fData.byteAt(offset++) & 0xff;
if (fBigEndian)
return (b0<<24)+(b1<<16)+(b2<<8)+b3;
else
return (b3<<24)+(b2<<16)+(b1<<8)+b0;
} else {
if (fBigEndian)
return (b0<<8)+b1;
else
return (b1<<8)+b0;
}
}
/**
*
*/
public int addString(int offset, int length) {
if (length == 0)
return 0;
return fStringPool.addString(this, offset, length);
}
/**
*
*/
public int addSymbol(int offset, int length) {
if (length == 0)
return 0;
return fStringPool.addSymbol(this, offset, length, getHashcode(offset, length));
}
//
//
//
public void append(XMLEntityHandler.CharBuffer charBuffer, int offset, int length) {
int endOffset = offset + length;
while (offset < endOffset) {
int ch;
try {
ch = getChar(offset);
}
catch (IOException ex) {
ch = 0; // REVISIT
}
charBuffer.append((char)ch);
offset += fBytesPerChar;
}
}
//
//
//
public void releaseString(int offset, int length) {
// nothing to do...
}
//
//
//
public String toString(int offset, int length) {
//
// REVISIT - we need to cache this operation !!
//
if (fStringCharArray == null)
fStringCharArray = new ChunkyCharArray(fStringPool);
int newOffset = fStringCharArray.length();
append(fStringCharArray, offset, length);
int newLength = fStringCharArray.length() - newOffset;
int stringIndex = fStringCharArray.addString(newOffset, newLength);
return fStringPool.toString(stringIndex);
}
//
//
//
private int getHashcode(int offset, int length) {
int endOffset = offset + length;
int hashcode = 0;
while (offset < endOffset) {
int ch;
try {
ch = getChar(offset);
}
catch (IOException ex) {
ch = 0; // REVISIT
}
hashcode = StringHasher.hashChar(hashcode, ch);
offset += fBytesPerChar;
}
return StringHasher.finishHash(hashcode);
}
//
public boolean equalsString(int offset, int length, char[] strChars, int strOffset, int strLength) {
int endOffset = offset + length;
int slen = strLength;
while (offset < endOffset) {
if (slen-- == 0)
return false;
int ch;
try {
ch = getChar(offset);
}
catch (IOException ex) {
ch = 0; // REVISIT
}
if (ch != strChars[strOffset++])
return false;
offset += fBytesPerChar;
}
return slen == 0;
}
//
//
//
private static char[] fCharacters = new char[256];
private int fCharDataLength = 0;
private void appendCharData(int ch) {
if (fCharacters.length == fCharDataLength) {
char[] newchars = new char[fCharacters.length * 2];
System.arraycopy(fCharacters, 0, newchars, 0, fCharacters.length);
fCharacters = newchars;
}
fCharacters[fCharDataLength++] = (char)ch;
}
public void callCharDataHandler(int offset, int length, boolean isWhitespace) throws Exception {
int endOffset = offset + length;
boolean skiplf = false;
while (offset < endOffset) {
int ch = getChar(offset);
// fix for Bug23: Element Data not normalized...
if (skiplf) {
skiplf = false;
if (ch == 0x0A) {
offset += fBytesPerChar;
continue;
}
}
if (ch == 0x0D) {
skiplf = true;
ch = 0x0A;
}
appendCharData(ch);
offset += fBytesPerChar;
}
if (fSendCharDataAsCharArray) {
if (isWhitespace)
fCharDataHandler.processWhitespace(fCharacters, 0, fCharDataLength);
else
fCharDataHandler.processCharacters(fCharacters, 0, fCharDataLength);
} else {
int stringIndex = fStringPool.addString(new String(fCharacters, 0, fCharDataLength));
if (isWhitespace)
fCharDataHandler.processWhitespace(stringIndex);
else
fCharDataHandler.processCharacters(stringIndex);
}
fCharDataLength = 0;
}
//
//
//
public boolean lookingAtChar(char ch, boolean skipPastChar) throws Exception {
int ch2 = getChar(fCurrentOffset);
if (ch2 == ch) {
if (skipPastChar) {
fCharacterCounter++;
fCurrentOffset += fBytesPerChar;
}
return true;
}
return false;
}
//
//
//
public boolean lookingAtValidChar(boolean skipPastChar) throws Exception {
int ch = getChar(fCurrentOffset);
if (ch < 0x20) {
if (ch == 0x09) {
if (!skipPastChar)
return true;
fCharacterCounter++;
} else if (ch == 0x0A) {
if (!skipPastChar)
return true;
fLinefeedCounter++;
fCharacterCounter = 1;
} else if (ch == 0x0D) {
if (!skipPastChar)
return true;
fCarriageReturnCounter++;
fCharacterCounter = 1;
} else {
if (ch == -1) {
return changeReaders().lookingAtValidChar(skipPastChar);
}
return false;
}
fCurrentOffset += fBytesPerChar;
return true;
}
if (ch <= 0xD7FF) {
if (skipPastChar) {
fCharacterCounter++;
fCurrentOffset += fBytesPerChar;
}
return true;
}
if (ch <= 0xDFFF) {
// REVISIT - check that the surrogate pair is valid
if (skipPastChar) {
fCharacterCounter++;
fCurrentOffset += fBytesPerChar;
}
return true;
}
if (ch <= 0xFFFD) {
if (skipPastChar) {
fCharacterCounter++;
fCurrentOffset += fBytesPerChar;
}
return true;
}
return false;
}
//
//
//
public boolean lookingAtSpace(boolean skipPastChar) throws Exception {
int ch = getChar(fCurrentOffset);
if (ch > 0x20)
return false;
if (ch == 0x20 || ch == 0x09) {
if (!skipPastChar)
return true;
fCharacterCounter++;
} else if (ch == 0x0A) {
if (!skipPastChar)
return true;
fLinefeedCounter++;
fCharacterCounter = 1;
} else if (ch == 0x0D) {
if (!skipPastChar)
return true;
fCarriageReturnCounter++;
fCharacterCounter = 1;
} else {
if (ch == -1) { // REVISIT - should we be checking this here ?
return changeReaders().lookingAtSpace(skipPastChar);
}
return false;
}
fCurrentOffset += fBytesPerChar;
return true;
}
//
//
//
public void skipToChar(char chr) throws Exception {
while (true) {
int ch = getChar(fCurrentOffset);
if (ch == chr)
return;
if (ch == -1) {
changeReaders().skipToChar(chr);
return;
}
if (ch == 0x0A) {
fLinefeedCounter++;
fCharacterCounter = 1;
} else if (ch == 0x0D) {
fCarriageReturnCounter++;
fCharacterCounter = 1;
} else if (ch >= 0xD800 && ch < 0xDC00) {
fCharacterCounter++;
fCurrentOffset += fBytesPerChar;
ch = getChar(fCurrentOffset);
if (ch < 0xDC00 || ch >= 0xE000)
continue;
} else
fCharacterCounter++;
fCurrentOffset += fBytesPerChar;
}
}
//
//
//
public void skipPastSpaces() throws Exception {
while (true) {
int ch = getChar(fCurrentOffset);
if (ch > 0x20)
return;
if (ch == 0x20 || ch == 0x09) {
fCharacterCounter++;
} else if (ch == 0x0A) {
fLinefeedCounter++;
fCharacterCounter = 1;
} else if (ch == 0x0D) {
fCarriageReturnCounter++;
fCharacterCounter = 1;
} else {
if (ch == -1)
changeReaders().skipPastSpaces();
return;
}
fCurrentOffset += fBytesPerChar;
}
}
//
//
//
public void skipPastName(char fastcheck) throws Exception {
int ch = getChar(fCurrentOffset);
if (!fCalledCharPropInit) {
XMLCharacterProperties.initCharFlags();
fCalledCharPropInit = true;
}
if ((XMLCharacterProperties.fgCharFlags[ch] & XMLCharacterProperties.E_InitialNameCharFlag) == 0)
return;
while (true) {
fCurrentOffset += fBytesPerChar;
fCharacterCounter++;
ch = getChar(fCurrentOffset);
if (fastcheck == ch)
return;
if ((XMLCharacterProperties.fgCharFlags[ch] & XMLCharacterProperties.E_NameCharFlag) == 0)
return;
}
}
//
//
//
public void skipPastNmtoken(char fastcheck) throws Exception {
int ch = getChar(fCurrentOffset);
if (!fCalledCharPropInit) {
XMLCharacterProperties.initCharFlags();
fCalledCharPropInit = true;
}
while (true) {
if (fastcheck == ch)
return;
if ((XMLCharacterProperties.fgCharFlags[ch] & XMLCharacterProperties.E_NameCharFlag) == 0)
return;
fCurrentOffset += fBytesPerChar;
fCharacterCounter++;
ch = getChar(fCurrentOffset);
}
}
//
//
//
public boolean skippedString(char[] s) throws Exception {
int offset = fCurrentOffset;
for (int i = 0; i < s.length; i++) {
if (getChar(offset) != s[i])
return false;
offset += fBytesPerChar;
}
fCurrentOffset = offset;
fCharacterCounter += s.length;
return true;
}
//
//
//
public int scanInvalidChar() throws Exception {
int ch = getChar(fCurrentOffset);
if (ch == -1) {
return changeReaders().scanInvalidChar();
}
fCurrentOffset += fBytesPerChar;
if (ch == 0x0A) {
fLinefeedCounter++;
fCharacterCounter = 1;
} else if (ch == 0x0D) {
fCarriageReturnCounter++;
fCharacterCounter = 1;
} else {
fCharacterCounter++;
if (ch >= 0xD800 && ch < 0xDC00) {
int ch2 = getChar(fCurrentOffset);
if (ch2 >= 0xDC00 && ch2 < 0xE000) {
ch = ((ch-0xD800)<<10)+(ch2-0xDC00)+0x10000;
fCurrentOffset += fBytesPerChar;
}
}
}
return ch;
}
//
//
//
public int scanCharRef(boolean hex) throws Exception {
int ch = getChar(fCurrentOffset);
if (ch == -1) {
return changeReaders().scanCharRef(hex);
}
int num = 0;
if (hex) {
if (ch > 'f' || XMLCharacterProperties.fgAsciiXDigitChar[ch] == 0)
return XMLEntityHandler.CHARREF_RESULT_INVALID_CHAR;
num = ch - (ch < 'A' ? '0' : (ch < 'a' ? 'A' : 'a') - 10);
} else {
if (ch < '0' || ch > '9')
return XMLEntityHandler.CHARREF_RESULT_INVALID_CHAR;
num = ch - '0';
}
fCharacterCounter++;
fCurrentOffset += fBytesPerChar;
boolean toobig = false;
while (true) {
ch = getChar(fCurrentOffset);
if (ch == -1)
break;
if (hex) {
if (ch > 'f' || XMLCharacterProperties.fgAsciiXDigitChar[ch] == 0)
break;
} else {
if (ch < '0' || ch > '9')
break;
}
fCharacterCounter++;
fCurrentOffset += fBytesPerChar;
if (hex) {
int dig = ch - (ch < 'A' ? '0' : (ch < 'a' ? 'A' : 'a') - 10);
num = (num << 4) + dig;
} else {
int dig = ch - '0';
num = (num * 10) + dig;
}
if (num > 0x10FFFF) {
toobig = true;
num = 0;
}
}
if (ch != ';')
return XMLEntityHandler.CHARREF_RESULT_SEMICOLON_REQUIRED;
fCharacterCounter++;
fCurrentOffset += fBytesPerChar;
if (toobig)
return XMLEntityHandler.CHARREF_RESULT_OUT_OF_RANGE;
return num;
}
//
//
//
public int scanStringLiteral() throws Exception {
boolean single;
if (!(single = lookingAtChar('\'', true)) && !lookingAtChar('\"', true)) {
return XMLEntityHandler.STRINGLIT_RESULT_QUOTE_REQUIRED;
}
int offset = fCurrentOffset;
char qchar = single ? '\'' : '\"';
while (!lookingAtChar(qchar, false)) {
if (!lookingAtValidChar(true)) {
return XMLEntityHandler.STRINGLIT_RESULT_INVALID_CHAR;
}
}
int stringIndex = addString(offset, fCurrentOffset - offset);
lookingAtChar(qchar, true); // move past qchar
return stringIndex;
}
//
// [10] AttValue ::= '"' ([^<&"] | Reference)* '"'
// | "'" ([^<&'] | Reference)* "'"
//
public int scanAttValue(char qchar, boolean asSymbol) throws Exception
{
int offset = fCurrentOffset;
while (true) {
if (lookingAtChar(qchar, false)) {
break;
}
if (lookingAtChar(' ', true)) {
continue;
}
if (lookingAtSpace(false)) {
return XMLEntityHandler.ATTVALUE_RESULT_COMPLEX;
}
if (lookingAtChar('&', false)) {
return XMLEntityHandler.ATTVALUE_RESULT_COMPLEX;
}
if (lookingAtChar('<', false)) {
return XMLEntityHandler.ATTVALUE_RESULT_LESSTHAN;
}
if (!lookingAtValidChar(true)) {
return XMLEntityHandler.ATTVALUE_RESULT_INVALID_CHAR;
}
}
int result = asSymbol ? addSymbol(offset, fCurrentOffset - offset) : addString(offset, fCurrentOffset - offset);
lookingAtChar(qchar, true);
return result;
}
//
// [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"'
// | "'" ([^%&'] | PEReference | Reference)* "'"
//
public int scanEntityValue(int qchar, boolean createString) throws Exception
{
int offset = fCurrentOffset;
while (true) {
if (qchar != -1 && lookingAtChar((char)qchar, false)) {
if (!createString)
return XMLEntityHandler.ENTITYVALUE_RESULT_FINISHED;
break;
}
if (lookingAtChar('&', false)) {
return XMLEntityHandler.ENTITYVALUE_RESULT_REFERENCE;
}
if (lookingAtChar('%', false)) {
return XMLEntityHandler.ENTITYVALUE_RESULT_PEREF;
}
if (!lookingAtValidChar(true)) {
return XMLEntityHandler.ENTITYVALUE_RESULT_INVALID_CHAR;
}
}
int result = addString(offset, fCurrentOffset - offset);
lookingAtChar((char)qchar, true);
return result;
}
//
//
//
public boolean scanExpectedName(char fastcheck, StringPool.CharArrayRange expectedName) throws Exception {
int nameOffset = fCurrentOffset;
skipPastName(fastcheck);
int nameLength = fCurrentOffset - nameOffset;
if (nameLength == 0)
return false;
int nameIndex = addSymbol(nameOffset, nameLength);
// DEFECT !! check name against expected name
return true;
}
public void scanQName(char fastcheck, QName qname) throws Exception {
// DEFECT !! no code // Defect #126
int nameOffset = fCurrentOffset;
skipPastName(fastcheck);
int nameLength = fCurrentOffset - nameOffset;
if (nameLength == 0) {
qname.clear();
return;
}
qname.prefix = -1;
qname.localpart = -1;
qname.rawname = addSymbol(nameOffset, nameLength);
qname.uri = -1;
} // scanQName(char,QName)
public int scanName(char fastcheck) throws Exception {
int nameOffset = fCurrentOffset;
skipPastName(fastcheck);
int nameLength = fCurrentOffset - nameOffset;
if (nameLength == 0)
return -1;
int nameIndex = addSymbol(nameOffset, nameLength);
return nameIndex;
}
//
//
//
private static final char[] cdata_string = { 'C','D','A','T','A','[' };
private int recognizeMarkup() throws Exception {
int ch = getChar(fCurrentOffset);
switch (ch) {
case -1:
return XMLEntityHandler.CONTENT_RESULT_MARKUP_END_OF_INPUT;
case '?':
fCharacterCounter++;
fCurrentOffset += fBytesPerChar;
return XMLEntityHandler.CONTENT_RESULT_START_OF_PI;
case '!':
fCharacterCounter++;
fCurrentOffset += fBytesPerChar;
ch = getChar(fCurrentOffset);
if (ch == -1) {
fCharacterCounter--;
fCurrentOffset -= fBytesPerChar;;
return XMLEntityHandler.CONTENT_RESULT_MARKUP_END_OF_INPUT;
}
if (ch == '-') {
fCharacterCounter++;
fCurrentOffset += fBytesPerChar;
ch = getChar(fCurrentOffset);
if (ch == -1) {
fCharacterCounter -= 2;
fCurrentOffset -= 2;
return XMLEntityHandler.CONTENT_RESULT_MARKUP_END_OF_INPUT;
}
if (ch == '-') {
fCharacterCounter++;
fCurrentOffset += fBytesPerChar;
return XMLEntityHandler.CONTENT_RESULT_START_OF_COMMENT;
}
break;
}
if (ch == '[') {
fCharacterCounter++;
fCurrentOffset += fBytesPerChar;
for (int i = 0; i < 6; i++) {
ch = getChar(fCurrentOffset);
if (ch == -1) {
fCharacterCounter -= (2 + i);
fCurrentOffset -= ((2 + i) * fBytesPerChar);
return XMLEntityHandler.CONTENT_RESULT_MARKUP_END_OF_INPUT;
}
if (ch != cdata_string[i]) {
return XMLEntityHandler.CONTENT_RESULT_MARKUP_NOT_RECOGNIZED;
}
fCharacterCounter++;
fCurrentOffset += fBytesPerChar;
}
return XMLEntityHandler.CONTENT_RESULT_START_OF_CDSECT;
}
break;
case '/':
fCharacterCounter++;
fCurrentOffset += fBytesPerChar;
return XMLEntityHandler.CONTENT_RESULT_START_OF_ETAG;
default:
return XMLEntityHandler.CONTENT_RESULT_START_OF_ELEMENT;
}
return XMLEntityHandler.CONTENT_RESULT_MARKUP_NOT_RECOGNIZED;
}
private int recognizeReference() throws Exception {
int ch = getChar(fCurrentOffset);
if (ch == -1) {
return XMLEntityHandler.CONTENT_RESULT_REFERENCE_END_OF_INPUT;
}
//
// [67] Reference ::= EntityRef | CharRef
// [68] EntityRef ::= '&' Name ';'
// [66] CharRef ::= '&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';'
//
if (ch == '#') {
fCharacterCounter++;
fCurrentOffset += fBytesPerChar;
return XMLEntityHandler.CONTENT_RESULT_START_OF_CHARREF;
} else {
return XMLEntityHandler.CONTENT_RESULT_START_OF_ENTITYREF;
}
}
public int scanContent(QName element) throws Exception {
int offset = fCurrentOffset;
int ch = getChar(fCurrentOffset);
fCurrentOffset += fBytesPerChar;
byte prop;
if (!fCalledCharPropInit) {
XMLCharacterProperties.initCharFlags();
fCalledCharPropInit = true;
}
if (ch < 0x80) {
if (ch == -1) {
fCurrentOffset -= fBytesPerChar;
return changeReaders().scanContent(element); // REVISIT - not quite...
}
prop = XMLCharacterProperties.fgCharFlags[ch];
if ((prop & XMLCharacterProperties.E_CharDataFlag) == 0 && ch != 0x0A && ch != 0x0D) {
if (ch == '<') {
fCharacterCounter++;
if (!fInCDSect) {
return recognizeMarkup();
}
} else if (ch == '&') {
fCharacterCounter++;
if (!fInCDSect) {
return recognizeReference();
}
} else if (ch == ']') {
if (getChar(fCurrentOffset) == ']' && getChar(fCurrentOffset + fBytesPerChar) == '>') {
fCharacterCounter += 3;
fCurrentOffset += (2 * fBytesPerChar);
return XMLEntityHandler.CONTENT_RESULT_END_OF_CDSECT;
}
} else {
fCurrentOffset -= fBytesPerChar;
return XMLEntityHandler.CONTENT_RESULT_INVALID_CHAR;
}
} else if (ch == 0x20 || ch == 0x09 || ch == 0x0A || ch == 0x0D) {
do {
if (ch == 0x0A) {
fLinefeedCounter++;
fCharacterCounter = 1;
} else if (ch == 0x0D) {
fCarriageReturnCounter++;
fCharacterCounter = 1;
} else {
fCharacterCounter++;
}
ch = getChar(fCurrentOffset);
fCurrentOffset += fBytesPerChar;
} while (ch == 0x20 || ch == 0x09 || ch == 0x0A || ch == 0x0D);
if (ch < 0x80) {
if (ch == -1) {
fCurrentOffset -= fBytesPerChar;
callCharDataHandler(offset, fCurrentOffset - offset, true);
return changeReaders().scanContent(element); // REVISIT - not quite...
}
prop = XMLCharacterProperties.fgCharFlags[ch];
if ((prop & XMLCharacterProperties.E_CharDataFlag) == 0) {
if (ch == '<') {
if (!fInCDSect) {
callCharDataHandler(offset, (fCurrentOffset - fBytesPerChar) - offset, true);
fCharacterCounter++;
return recognizeMarkup();
}
fCharacterCounter++;
} else if (ch == '&') {
if (!fInCDSect) {
callCharDataHandler(offset, (fCurrentOffset - fBytesPerChar) - offset, true);
fCharacterCounter++;
return recognizeReference();
}
fCharacterCounter++;
} else if (ch == ']') {
if (getChar(fCurrentOffset) == ']' && getChar(fCurrentOffset + fBytesPerChar) == '>') {
callCharDataHandler(offset, (fCurrentOffset - fBytesPerChar) - offset, true);
fCharacterCounter += 3;
fCurrentOffset += (2 * fBytesPerChar);
return XMLEntityHandler.CONTENT_RESULT_END_OF_CDSECT;
}
} else {
fCurrentOffset -= fBytesPerChar;
callCharDataHandler(offset, fCurrentOffset - offset, true);
return XMLEntityHandler.CONTENT_RESULT_INVALID_CHAR;
}
}
} else {
if (ch >= 0xD800 && ch <= 0xDFFF) {
fCurrentOffset += fBytesPerChar;
} else if (ch == 0xFFFE || ch == 0xFFFF) {
fCurrentOffset -= fBytesPerChar;
callCharDataHandler(offset, fCurrentOffset - offset, true);
return XMLEntityHandler.CONTENT_RESULT_INVALID_CHAR;
}
}
}
} else {
if (ch >= 0xD800 && ch <= 0xDFFF) {
fCurrentOffset += fBytesPerChar;
} else if (ch == 0xFFFE || ch == 0xFFFF) {
fCurrentOffset -= fBytesPerChar;
return XMLEntityHandler.CONTENT_RESULT_INVALID_CHAR;
}
}
fCharacterCounter++;
while (true) {
ch = getChar(fCurrentOffset);
fCurrentOffset += fBytesPerChar;
if (ch >= 0x80 || ch < 0)
break;
prop = XMLCharacterProperties.fgCharFlags[ch];
if ((prop & XMLCharacterProperties.E_CharDataFlag) == 0) {
if (ch == 0x0A) {
fLinefeedCounter++;
fCharacterCounter = 1;
} else if (ch == 0x0D) {
fCarriageReturnCounter++;
fCharacterCounter = 1;
} else
break;
} else
fCharacterCounter++;
}
while (true) { // REVISIT - EOF check ?
if (ch < 0x80) {
if (ch == -1) {
fCurrentOffset -= fBytesPerChar;
callCharDataHandler(offset, fCurrentOffset - offset, false);
return changeReaders().scanContent(element); // REVISIT - not quite...
}
prop = XMLCharacterProperties.fgCharFlags[ch];
if ((prop & XMLCharacterProperties.E_CharDataFlag) == 0) {
if (ch == '<') {
if (!fInCDSect) {
callCharDataHandler(offset, (fCurrentOffset - fBytesPerChar) - offset, false);
fCharacterCounter++;
return recognizeMarkup();
}
fCharacterCounter++;
} else if (ch == '&') {
if (!fInCDSect) {
callCharDataHandler(offset, (fCurrentOffset - fBytesPerChar) - offset, false);
fCharacterCounter++;
return recognizeReference();
}
fCharacterCounter++;
} else if (ch == 0x0A) {
fLinefeedCounter++;
fCharacterCounter = 1;
} else if (ch == 0x0D) {
fCarriageReturnCounter++;
fCharacterCounter = 1;
} else if (ch == ']') {
if (getChar(fCurrentOffset) == ']' && getChar(fCurrentOffset + fBytesPerChar) == '>') {
callCharDataHandler(offset, (fCurrentOffset - fBytesPerChar) - offset, false);
fCharacterCounter += 3;
fCurrentOffset += (2 * fBytesPerChar);
return XMLEntityHandler.CONTENT_RESULT_END_OF_CDSECT;
}
fCharacterCounter++;
} else {
fCurrentOffset -= fBytesPerChar;
callCharDataHandler(offset, fCurrentOffset - offset, false);
return XMLEntityHandler.CONTENT_RESULT_INVALID_CHAR;
}
} else {
fCharacterCounter++;
}
} else {
if (ch >= 0xD800 && ch <= 0xDFFF) {
fCharacterCounter++;
fCurrentOffset += fBytesPerChar;
} else if (ch == 0xFFFE || ch == 0xFFFF) {
fCurrentOffset -= fBytesPerChar;
callCharDataHandler(offset, fCurrentOffset - offset, false);
return XMLEntityHandler.CONTENT_RESULT_INVALID_CHAR;
}
fCharacterCounter++;
}
ch = getChar(fCurrentOffset);
fCurrentOffset += fBytesPerChar;
}
}
}