blob: 30104fcb14213ad100f75010164384e4018c47e0 [file] [log] [blame]
/*
* Copyright 2000, 2001, 2002, 2003, 2004, 2005 John G. Wilson
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
package uk.co.wilson.xml;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.Reader;
import java.io.Writer;
import java.net.URL;
import java.util.Locale;
import java.util.Stack;
import java.util.Vector;
import org.xml.sax.AttributeList;
import org.xml.sax.DTDHandler;
import org.xml.sax.EntityResolver;
import org.xml.sax.ErrorHandler;
import org.xml.sax.InputSource;
import org.xml.sax.Locator;
import org.xml.sax.SAXException;
import org.xml.sax.SAXParseException;
import uk.org.xml.sax.DocumentHandler;
import uk.org.xml.sax.Parser;
public class MinML implements Parser, Locator, DocumentHandler, ErrorHandler {
public static final int endStartName = 0;
public static final int emitStartElement = 1;
public static final int emitEndElement = 2;
public static final int emitCharacters = 3;
public static final int saveAttributeName = 4;
public static final int saveAttributeValue = 5;
public static final int startComment = 6;
public static final int endComment = 7;
public static final int incLevel = 8;
public static final int decLevel = 9;
public static final int startCDATA = 10;
public static final int endCDATA = 11;
public static final int processCharRef = 12;
public static final int writeCdata = 13;
public static final int exitParser = 14;
public static final int parseError = 15;
public static final int discardAndChange = 16;
public static final int discardSaveAndChange = 17;
public static final int saveAndChange = 18;
public static final int change = 19;
public static final int inSkipping = 0;
public static final int inSTag = 1;
public static final int inPossiblyAttribute = 2;
public static final int inNextAttribute = 3;
public static final int inAttribute = 4;
public static final int inAttribute1 = 5;
public static final int inAttributeValue = 6;
public static final int inAttributeQuoteValue = 7;
public static final int inAttributeQuotesValue = 8;
public static final int inETag = 9;
public static final int inETag1 = 10;
public static final int inMTTag = 11;
public static final int inTag = 12;
public static final int inPI = 13;
public static final int inPI1 = 14;
public static final int inPossiblySkipping = 15;
public static final int inCharData = 16;
public static final int inCDATA = 17;
public static final int inCDATA1 = 18;
public static final int inComment =19;
public static final int inDTD = 20;
public MinML(final int initialBufferSize, final int bufferIncrement) {
this.initialBufferSize = initialBufferSize;
this.bufferIncrement = bufferIncrement;
}
public MinML() {
this(256, 128);
}
public void parse(final Reader in) throws SAXException, IOException {
final Vector attributeNames = new Vector();
final Vector attributeValues = new Vector();
final AttributeList attrs = new AttributeList() {
public int getLength() {
return attributeNames.size();
}
public String getName(final int i) {
return (String)attributeNames.elementAt(i);
}
public String getType(final int i) {
return "CDATA";
}
public String getValue(final int i) {
return (String)attributeValues.elementAt(i);
}
public String getType(final String name) {
return "CDATA";
}
public String getValue(final String name) {
final int index = attributeNames.indexOf(name);
return (index == -1) ? null : (String)attributeValues.elementAt(index);
}
};
final MinMLBuffer buffer = new MinMLBuffer(in);
int currentChar = 0, charCount = 0;
int level = 0;
String elementName = null;
String state = operands[inSkipping];
this.lineNumber = 1;
this.columnNumber = 0;
try {
main: while(true) {
charCount++;
//
// this is to try and make the loop a bit faster
// currentChar = buffer.read(); is simpler but is a bit slower.
//
currentChar = (buffer.nextIn == buffer.lastIn) ? buffer.read() : buffer.chars[buffer.nextIn++];
final int transition;
if (currentChar > ']') {
transition = state.charAt(14);
} else {
final int charClass = charClasses[currentChar + 1];
if (charClass == -1) fatalError("Document contains illegal control character with value " + currentChar, this.lineNumber, this.columnNumber);
if (charClass == 12) {
if (currentChar == '\r') {
currentChar = '\n';
charCount = -1;
}
if (currentChar == '\n') {
if (charCount == 0) continue; // preceeded by '\r' so ignore
if (charCount != -1) charCount = 0;
this.lineNumber++;
this.columnNumber = 0;
}
}
transition = state.charAt(charClass);
}
this.columnNumber++;
final String operand = operands[transition >>> 8];
switch (transition & 0XFF) {
case endStartName:
// end of start element name
elementName = buffer.getString();
if (currentChar != '>' && currentChar != '/') break; // change state to operand
// drop through to emit start element (we have no attributes)
case emitStartElement:
// emit start element
final Writer newWriter = this.extDocumentHandler.startElement(elementName, attrs,
(this.tags.empty()) ?
this.extDocumentHandler.startDocument(buffer)
:
buffer.getWriter());
buffer.pushWriter(newWriter);
this.tags.push(elementName);
attributeValues.removeAllElements();
attributeNames.removeAllElements();
if (currentChar != '/') break; // change state to operand
// <element/> drop through
case emitEndElement:
// emit end element
if (this.tags.empty())
fatalError("end tag at begining of document", this.lineNumber, this.columnNumber);
final String begin = (String)this.tags.pop();
buffer.popWriter();
elementName = buffer.getString();
if (currentChar != '/' && !elementName.equals(begin)) {
fatalError("end tag </" + elementName + "> does not match begin tag <" + begin + ">",
this.lineNumber, this.columnNumber);
} else {
this.documentHandler.endElement(begin);
if (this.tags.empty()) {
this.documentHandler.endDocument();
return;
}
}
break; // change state to operand
case emitCharacters:
// emit characters
buffer.flush();
break; // change state to operand
case saveAttributeName:
// save attribute name
attributeNames.addElement(buffer.getString());
break; // change state to operand
case saveAttributeValue:
// save attribute value
attributeValues.addElement(buffer.getString());
break; // change state to operand
case startComment:
// change state if we have found "<!--"
if (buffer.read() != '-') continue; // not "<!--"
break; // change state to operand
case endComment:
// change state if we find "-->"
if ((currentChar = buffer.read()) == '-') {
// deal with the case where we might have "------->"
while ((currentChar = buffer.read()) == '-');
if (currentChar == '>') break; // end of comment, change state to operand
}
continue; // not end of comment, don't change state
case incLevel:
level++;
break;
case decLevel:
if (level == 0) break; // outer level <> change state
level--;
continue; // in nested <>, don't change state
case startCDATA:
// change state if we have found "<![CDATA["
if (buffer.read() != 'C') continue; // don't change state
if (buffer.read() != 'D') continue; // don't change state
if (buffer.read() != 'A') continue; // don't change state
if (buffer.read() != 'T') continue; // don't change state
if (buffer.read() != 'A') continue; // don't change state
if (buffer.read() != '[') continue; // don't change state
break; // change state to operand
case endCDATA:
// change state if we find "]]>"
if ((currentChar = buffer.read()) == ']') {
// deal with the case where we might have "]]]]]]]>"
while ((currentChar = buffer.read()) == ']') buffer.write(']');
if (currentChar == '>') break; // end of CDATA section, change state to operand
buffer.write(']');
}
buffer.write(']');
buffer.write(currentChar);
continue; // not end of CDATA section, don't change state
case processCharRef:
// process character entity
int crefState = 0;
currentChar = buffer.read();
while (true) {
if ("#amp;&pos;'quot;\"gt;>lt;<".charAt(crefState) == currentChar) {
crefState++;
if (currentChar == ';') {
buffer.write("#amp;&pos;'quot;\"gt;>lt;<".charAt(crefState));
continue main;
} else if (currentChar == '#') {
final int radix;
currentChar = buffer.read();
if (currentChar == 'x') {
radix = 16;
currentChar = buffer.read();
} else {
radix = 10;
}
int charRef = Character.digit((char)currentChar, radix);
while (true) {
currentChar = buffer.read();
final int digit = Character.digit((char)currentChar, radix);
if (digit == -1) break;
charRef = (char)((charRef * radix) + digit);
}
if (currentChar == ';' && charRef != -1) {
buffer.write(charRef);
continue main;
}
break; // bad char reference
} else {
currentChar = buffer.read();
}
} else {
crefState = ("\u0001\u000b\u0006\u00ff\u00ff\u00ff\u00ff\u00ff\u00ff\u00ff\u00ff" +
// # a m p ; & p o s ; '
// 0 1 2 3 4 5 6 7 8 9 a
"\u0011\u00ff\u00ff\u00ff\u00ff\u00ff\u0015\u00ff\u00ff\u00ff" +
// q u o t ; " g t ; >
// b b d e f 10 11 12 13 14
"\u00ff\u00ff\u00ff").charAt(crefState);
// l t ;
// 15 16 17
if (crefState == 255) break; // bad char reference
}
}
// drop through to report error and exit
case parseError:
// report fatal error
fatalError(operand, this.lineNumber, this.columnNumber);
// drop through to exit parser
case exitParser:
// exit parser
return;
case writeCdata:
// write character data
// this will also write any skipped whitespace
buffer.write(currentChar);
break; // change state to operand
case discardAndChange:
// throw saved characters away and change state
buffer.reset();
break; // change state to operand
case discardSaveAndChange:
// throw saved characters away, save character and change state
buffer.reset();
// drop through to save character and change state
case saveAndChange:
// save character and change state
buffer.saveChar((char)currentChar);
break; // change state to operand
case change:
// change state to operand
break; // change state to operand
}
state = operand;
}
}
catch (final IOException e) {
this.errorHandler.fatalError(new SAXParseException(e.toString(), null, null, this.lineNumber, this.columnNumber, e));
}
finally {
this.errorHandler = this;
this.documentHandler = this.extDocumentHandler = this;
this.tags.removeAllElements();
}
}
public void parse(final InputSource source) throws SAXException, IOException {
if (source.getCharacterStream() != null)
parse(source.getCharacterStream());
else if (source.getByteStream() != null)
parse(new InputStreamReader(source.getByteStream()));
else
parse(new InputStreamReader(new URL(source.getSystemId()).openStream()));
}
public void parse(final String systemId) throws SAXException, IOException {
parse(new InputSource(systemId));
}
public void setLocale(final Locale locale) throws SAXException {
throw new SAXException("Not supported");
}
public void setEntityResolver(final EntityResolver resolver) {
// not supported
}
public void setDTDHandler(final DTDHandler handler) {
// not supported
}
public void setDocumentHandler(final org.xml.sax.DocumentHandler handler) {
this.documentHandler = (handler == null) ? this : handler;
this.extDocumentHandler = this;
}
public void setDocumentHandler(final DocumentHandler handler) {
this.documentHandler = this.extDocumentHandler = (handler == null) ? this : handler;
this.documentHandler.setDocumentLocator(this);
}
public void setErrorHandler(final ErrorHandler handler) {
this.errorHandler = (handler == null) ? this : handler;
}
public void setDocumentLocator(final Locator locator) {
}
public void startDocument() throws SAXException {
}
public Writer startDocument(final Writer writer) throws SAXException {
this.documentHandler.startDocument();
return writer;
}
public void endDocument() throws SAXException {
}
public void startElement(final String name, final AttributeList attributes) throws SAXException {
}
public Writer startElement(final String name, final AttributeList attributes, final Writer writer)
throws SAXException
{
this.documentHandler.startElement(name, attributes);
return writer;
}
public void endElement(final String name) throws SAXException {
}
public void characters(final char ch[], final int start, final int length) throws SAXException {
}
public void ignorableWhitespace(final char ch[], final int start, final int length) throws SAXException {
}
public void processingInstruction(final String target, final String data) throws SAXException {
}
public void warning(final SAXParseException e) throws SAXException {
}
public void error(final SAXParseException e) throws SAXException {
}
public void fatalError(final SAXParseException e) throws SAXException {
throw e;
}
public String getPublicId() {
return "";
}
public String getSystemId() {
return "";
}
public int getLineNumber () {
return this.lineNumber;
}
public int getColumnNumber () {
return this.columnNumber;
}
private void fatalError(final String msg, final int lineNumber, final int columnNumber) throws SAXException {
final SAXParseException e = new SAXParseException(msg, null, null, lineNumber, columnNumber);
this.errorHandler.fatalError(e);
throw e;
}
private class MinMLBuffer extends Writer {
public MinMLBuffer(final Reader in) {
this.in = in;
}
public void close() throws IOException {
flush();
}
public void flush() throws IOException {
try {
_flush();
if (this.writer != this) this.writer.flush();
}
finally {
this.flushed = true;
}
}
public void write(final int c) throws IOException {
this.written = true;
this.chars[this.count++] = (char)c;
}
public void write(final char[] cbuf, final int off, final int len) throws IOException {
this.written = true;
System.arraycopy(cbuf, off, this.chars, this.count, len);
this.count += len;
}
public void saveChar(final char c) {
this.written = false;
this.chars[this.count++] = c;
}
public void pushWriter(final Writer writer) {
MinML.this.tags.push(this.writer);
this.writer = (writer == null) ? this : writer;
this.flushed = this.written = false;
}
public Writer getWriter() {
return this.writer;
}
public void popWriter() throws IOException {
try {
if (!this.flushed && this.writer != this) this.writer.flush();
}
finally {
this.writer = (Writer)MinML.this.tags.pop();
this.flushed = this.written = false;
}
}
public String getString() {
final String result = new String(this.chars, 0, this.count);
this.count = 0;
return result;
}
public void reset() {
this.count = 0;
}
public int read() throws IOException {
if (this.nextIn == this.lastIn) {
if (this.count != 0) {
if (this.written) {
_flush();
} else if (this.count >= (this.chars.length - MinML.this.bufferIncrement)) {
final char[] newChars = new char[this.chars.length + MinML.this.bufferIncrement];
System.arraycopy(this.chars, 0, newChars, 0, this.count);
this.chars = newChars;
}
}
final int numRead = this.in.read(this.chars, this.count, this.chars.length - this.count);
if (numRead == -1) return -1;
this.nextIn = this.count;
this.lastIn = this.count + numRead;
}
return this.chars[this.nextIn++];
}
private void _flush() throws IOException {
if (this.count != 0) {
try {
if (this.writer == this) {
try {
MinML.this.documentHandler.characters(this.chars, 0, this.count);
}
catch (final SAXException e) {
throw new IOException(e.toString());
}
} else {
this.writer.write(this.chars, 0, this.count);
}
}
finally {
this.count = 0;
}
}
}
private int nextIn = 0, lastIn = 0;
private char[] chars = new char[MinML.this.initialBufferSize];
private final Reader in;
private int count = 0;
private Writer writer = this;
private boolean flushed = false;
private boolean written = false;
}
private DocumentHandler extDocumentHandler = this;
private org.xml.sax.DocumentHandler documentHandler = this;
private ErrorHandler errorHandler = this;
private final Stack tags = new Stack();
private int lineNumber = 1;
private int columnNumber = 0;
private final int initialBufferSize;
private final int bufferIncrement;
private static final byte[] charClasses = {
// EOF
13,
// \t \n \r
-1, -1, -1, -1, -1, -1, -1, -1, -1, 12, 12, -1, -1, 12, -1, -1,
//
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
// SP ! " # $ % & ' ( ) * + , - . /
12, 8, 7, 14, 14, 14, 3, 6, 14, 14, 14, 14, 14, 11, 14, 2,
// 0 1 2 3 4 5 6 7 8 9 : ; < = > ?
14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 0, 5, 1, 4,
//
14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
// [ \ ]
14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 9, 14, 10
};
private static final String[] operands = {
"\u0c13\u150f\u150f\u150f\u150f\u150f\u150f\u150f\u150f\u150f\u150f\u150f\u0013\u000e\u150f",
"\u160f\u0f00\u0b00\u160f\u160f\u160f\u160f\u160f\u160f\u160f\u160f\u0112\u0200\u170f\u0112",
"\u160f\u0f01\u0b01\u160f\u160f\u160f\u160f\u160f\u160f\u160f\u160f\u160f\u0213\u170f\u0412",
"\u160f\u0f01\u0b01\u160f\u180f\u180f\u180f\u180f\u180f\u180f\u180f\u180f\u0313\u170f\u0412",
"\u180f\u180f\u180f\u180f\u180f\u0604\u180f\u180f\u180f\u180f\u180f\u0412\u0513\u170f\u0412",
"\u180f\u180f\u180f\u180f\u180f\u0604\u180f\u180f\u180f\u180f\u180f\u180f\u0513\u170f\u180f",
"\u190f\u190f\u190f\u190f\u190f\u190f\u0713\u0813\u190f\u190f\u190f\u190f\u0613\u170f\u190f",
"\u0712\u0712\u0712\u1a0c\u0712\u0712\u0305\u0712\u0712\u0712\u0712\u0712\u0712\u170f\u0712",
"\u0812\u0812\u0812\u1a0c\u0812\u0812\u0812\u0305\u0812\u0812\u0812\u0812\u0812\u170f\u0812",
"\u160f\u0002\u160f\u160f\u160f\u160f\u160f\u160f\u160f\u160f\u160f\u0912\u0913\u170f\u0912",
"\u1b0f\u1b0f\u0903\u1b0f\u1b0f\u1b0f\u1b0f\u1b0f\u1113\u1b0f\u1b0f\u1b0f\u1b0f\u170f\u1b0f",
"\u160f\u0013\u160f\u160f\u160f\u160f\u160f\u160f\u160f\u160f\u160f\u160f\u160f\u170f\u160f",
"\u160f\u1c0f\u0913\u160f\u0d13\u160f\u160f\u160f\u1113\u160f\u160f\u160f\u160f\u170f\u0111",
"\u0d13\u0d13\u0d13\u0d13\u0e13\u0d13\u0d13\u0d13\u0d13\u0d13\u0d13\u0d13\u0d13\u170f\u0d13",
"\u0d13\u0013\u0d13\u0d13\u0e13\u0d13\u0d13\u0d13\u0d13\u0d13\u0d13\u0d13\u0d13\u170f\u0d13",
"\u0c10\u100d\u100d\u1a0c\u100d\u100d\u100d\u100d\u100d\u100d\u100d\u100d\u0f12\u170f\u100d",
"\u0a13\u100d\u100d\u1a0c\u100d\u100d\u100d\u100d\u100d\u100d\u100d\u100d\u100d\u170f\u100d",
"\u1d0f\u1d0f\u1d0f\u1d0f\u1d0f\u1d0f\u1d0f\u1d0f\u1d0f\u120a\u1d0f\u1306\u1d0f\u170f\u1413",
"\u120d\u120d\u120d\u120d\u120d\u120d\u120d\u120d\u120d\u120d\u100b\u120d\u120d\u170f\u120d",
"\u1313\u1313\u1313\u1313\u1313\u1313\u1313\u1313\u1313\u1313\u1313\u0007\u1313\u170f\u1313",
"\u1408\u0009\u1413\u1413\u1413\u1413\u1413\u1413\u1413\u1413\u1413\u1413\u1413\u170f\u1413",
"expected Element",
"unexpected character in tag",
"unexpected end of file found",
"attribute name not followed by '='",
"invalid attribute value",
"invalid Character Entity",
"expecting end tag",
"empty tag",
"unexpected character after <!"
};
}