blob: 6083cd4a5c1ff8bc98c5663c3de0d8dac36f2f00 [file] [log] [blame]
/*
* The Apache Software License, Version 1.1
*
*
* Copyright (c) 1999 The Apache Software Foundation. All rights
* reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* 3. The end-user documentation included with the redistribution,
* if any, must include the following acknowledgment:
* "This product includes software developed by the
* Apache Software Foundation (http://www.apache.org/)."
* Alternately, this acknowledgment may appear in the software itself,
* if and wherever such third-party acknowledgments normally appear.
*
* 4. The names "Xerces" and "Apache Software Foundation" must
* not be used to endorse or promote products derived from this
* software without prior written permission. For written
* permission, please contact apache@apache.org.
*
* 5. Products derived from this software may not be called "Apache",
* nor may "Apache" appear in their name, without prior written
* permission of the Apache Software Foundation.
*
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
* ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
* USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
* ====================================================================
*
* This software consists of voluntary contributions made by many
* individuals on behalf of the Apache Software Foundation and was
* originally based on software copyright (c) 1999, International
* Business Machines, Inc., http://www.apache.org. For more
* information on the Apache Software Foundation, please see
* <http://www.apache.org/>.
*/
package org.apache.xerces.readers;
import org.apache.xerces.framework.XMLErrorReporter;
import org.apache.xerces.utils.ChunkyByteArray;
import org.apache.xerces.utils.QName;
import org.apache.xerces.utils.StringPool;
import java.io.InputStreamReader;
import java.io.IOException;
import java.io.UnsupportedEncodingException;
/**
*
* @version
*/
final class UTF8Recognizer extends XMLDeclRecognizer {
private byte[] fUTF8BOM = {(byte)0xEF, (byte)0xBB, (byte)0xBF};
//
//
//
public XMLEntityHandler.EntityReader recognize(XMLEntityReaderFactory readerFactory,
XMLEntityHandler entityHandler,
XMLErrorReporter errorReporter,
boolean sendCharDataAsCharArray,
StringPool stringPool,
ChunkyByteArray data,
boolean xmlDecl,
boolean allowJavaEncodingName) throws Exception {
XMLEntityHandler.EntityReader reader = null;
//check to see if there is a UTF8 BOM, if see one, skip past it.
boolean seeBOM = false;
byte bom0 = data.byteAt(0);
if (bom0 == fUTF8BOM[0]) {
byte bom1 = data.byteAt(1);
if (bom1 == fUTF8BOM[1]) {
byte bom2 = data.byteAt(2);
if (bom2 == fUTF8BOM[2]) {
seeBOM = true;
}
}
}
if (seeBOM) {
// it will have the same content anyway.
data.read(fUTF8BOM, 0, 3);
}
byte b0 = data.byteAt(0);
boolean debug = false;
if (b0 == '<') {
int b1 = data.byteAt(1);
if (b1 == '?') {
if (data.byteAt(2) == 'x' && data.byteAt(3) == 'm' && data.byteAt(4) == 'l') {
int b5 = data.byteAt(5);
if (b5 == 0x20 || b5 == 0x09 || b5 == 0x0a || b5 == 0x0d) {
XMLEntityHandler.EntityReader declReader = new XMLDeclReader(entityHandler, errorReporter, sendCharDataAsCharArray, data, stringPool);
int encoding = prescanXMLDeclOrTextDecl(declReader, xmlDecl);
if (encoding != -1) {
String encname = stringPool.orphanString(encoding);
String enc = encname.toUpperCase();
if ("ISO-10646-UCS-2".equals(enc)) throw new UnsupportedEncodingException(encname);
if ("ISO-10646-UCS-4".equals(enc)) throw new UnsupportedEncodingException(encname);
if ("UTF-16".equals(enc)) throw new UnsupportedEncodingException(encname);
String javaencname = MIME2Java.convert(enc);
if (null == javaencname) {
// Not supported
if (allowJavaEncodingName) {
javaencname = encname;
} else {
throw new UnsupportedEncodingException(encname);
}
}
try {
data.rewind();
if ("UTF-8".equalsIgnoreCase(javaencname) || "UTF8".equalsIgnoreCase(javaencname)) {
reader = readerFactory.createUTF8Reader(entityHandler, errorReporter, sendCharDataAsCharArray, data, stringPool);
} else {
reader = readerFactory.createCharReader(entityHandler, errorReporter, sendCharDataAsCharArray,
new InputStreamReader(data, javaencname), stringPool);
}
} catch (UnsupportedEncodingException e) {
throw new UnsupportedEncodingException(encname);
} catch (Exception e) {
if( debug == true )
e.printStackTrace(); // Internal Error
}
} else {
data.rewind();
reader = readerFactory.createUTF8Reader(entityHandler, errorReporter, sendCharDataAsCharArray, data, stringPool);
}
}
}
}
}
return reader;
}
final class XMLDeclReader extends XMLEntityReader {
//
//
//
private StringPool fStringPool = null;
private ChunkyByteArray fData = null;
//
//
//
XMLDeclReader(XMLEntityHandler entityHandler, XMLErrorReporter errorReporter, boolean sendCharDataAsCharArray, ChunkyByteArray data, StringPool stringPool) {
super(entityHandler, errorReporter, sendCharDataAsCharArray);
fStringPool = stringPool;
fData = data;
}
//
// These methods are used to parse XMLDecl/TextDecl.
//
public boolean lookingAtChar(char ch, boolean skipPastChar) throws IOException {
if (fData.byteAt(fCurrentOffset) != ch)
return false;
if (skipPastChar)
fCurrentOffset++;
return true;
}
public boolean lookingAtSpace(boolean skipPastChar) throws IOException {
int ch = fData.byteAt(fCurrentOffset) & 0xff;
if (ch != 0x20 && ch != 0x09 && ch != 0x0A && ch != 0x0D)
return false;
if (skipPastChar)
fCurrentOffset++;
return true;
}
public void skipPastSpaces() throws IOException {
while (true) {
int ch = fData.byteAt(fCurrentOffset) & 0xff;
if (ch != 0x20 && ch != 0x09 && ch != 0x0A && ch != 0x0D)
return;
fCurrentOffset++;
}
}
public boolean skippedString(char[] s) throws IOException {
int offset = fCurrentOffset;
for (int i = 0; i < s.length; i++) {
if (fData.byteAt(offset) != s[i])
return false;
offset++;
}
fCurrentOffset = offset;
return true;
}
public int scanStringLiteral() throws Exception {
boolean single;
if (!(single = lookingAtChar('\'', true)) && !lookingAtChar('\"', true)) {
return XMLEntityHandler.STRINGLIT_RESULT_QUOTE_REQUIRED;
}
int offset = fCurrentOffset;
char qchar = single ? '\'' : '\"';
while (true) {
byte b = fData.byteAt(fCurrentOffset);
if (b == qchar)
break;
if (b == -1)
return XMLEntityHandler.STRINGLIT_RESULT_QUOTE_REQUIRED;
fCurrentOffset++;
}
int length = fCurrentOffset - offset;
StringBuffer str = new StringBuffer(length);
for (int i = 0; i < length; i++) {
str.append((char)fData.byteAt(offset + i));
}
int stringIndex = fStringPool.addString(str.toString());
fCurrentOffset++; // move past qchar
return stringIndex;
}
//
// The rest of the methods in XMLReader are not used for parsing XMLDecl/TextDecl.
//
public void append(XMLEntityHandler.CharBuffer charBuffer, int offset, int length) {
throw new RuntimeException("RDR002 cannot happen");
}
public int addString(int offset, int length) {
throw new RuntimeException("RDR002 cannot happen");
}
public int addSymbol(int offset, int length) {
throw new RuntimeException("RDR002 cannot happen");
}
public void skipToChar(char ch) throws IOException {
throw new IOException("RDR002 cannot happen");
}
public void skipPastName(char fastcheck) throws IOException {
throw new IOException("RDR002 cannot happen");
}
public void skipPastNmtoken(char fastcheck) throws IOException {
throw new IOException("RDR002 cannot happen");
}
public boolean lookingAtValidChar(boolean skipPastChar) throws IOException {
throw new IOException("RDR002 cannot happen");
}
public int scanInvalidChar() throws IOException {
throw new IOException("RDR002 cannot happen");
}
public int scanCharRef(boolean hex) throws IOException {
throw new IOException("RDR002 cannot happen");
}
public int scanAttValue(char qchar, boolean asSymbol) throws IOException {
throw new IOException("RDR002 cannot happen");
}
public int scanEntityValue(int qchar, boolean createString) throws IOException {
throw new IOException("RDR002 cannot happen");
}
public boolean scanExpectedName(char fastcheck, StringPool.CharArrayRange expectedName) throws IOException {
throw new IOException("RDR002 cannot happen");
}
public void scanQName(char fastcheck, QName qname) throws IOException {
throw new IOException("RDR002 cannot happen");
}
public int scanName(char fastcheck) throws IOException {
throw new IOException("RDR002 cannot happen");
}
public int scanContent(QName element) throws IOException {
throw new IOException("RDR002 cannot happen");
}
}
}