blob: 944839d921d33a7b564aafbfc7e77aacc2653c54 [file] [log] [blame]
/*
* Copyright 1999-2004 The Apache Software Foundation.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.cocoon.components.sax;
import java.util.ArrayList;
import org.apache.cocoon.xml.DefaultLexicalHandler;
import org.apache.cocoon.xml.XMLConsumer;
import org.apache.cocoon.xml.XMLProducer;
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;
import org.xml.sax.ext.LexicalHandler;
import org.xml.sax.helpers.AttributesImpl;
import org.xml.sax.helpers.DefaultHandler;
/**
* This a simple xml compiler which takes a byte array as input.
* If you want to reuse this interpreter make sure to call first {@link #recycle()}
* and then set the new consumer for the sax events.
*
* @version $Id$
*/
public final class XMLByteStreamInterpreter implements XMLProducer {
private static final int START_DOCUMENT = 0;
private static final int END_DOCUMENT = 1;
private static final int START_PREFIX_MAPPING = 2;
private static final int END_PREFIX_MAPPING = 3;
private static final int START_ELEMENT = 4;
private static final int END_ELEMENT = 5;
private static final int CHARACTERS = 6;
private static final int IGNORABLE_WHITESPACE = 7;
private static final int PROCESSING_INSTRUCTION = 8;
private static final int COMMENT = 9;
private static final int LOCATOR = 10;
private static final int START_DTD = 11;
private static final int END_DTD = 12;
private static final int START_CDATA = 13;
private static final int END_CDATA = 14;
private static final int SKIPPED_ENTITY = 15;
private static final int START_ENTITY = 16;
private static final int END_ENTITY = 17;
private ArrayList list = new ArrayList();
private byte[] input;
private int currentPos;
protected static final ContentHandler EMPTY_CONTENT_HANDLER = new DefaultHandler();
/** The <code>ContentHandler</code> receiving SAX events. */
protected ContentHandler contentHandler = EMPTY_CONTENT_HANDLER;
/** The <code>LexicalHandler</code> receiving SAX events. */
protected LexicalHandler lexicalHandler = DefaultLexicalHandler.NULL_HANDLER;
/**
* Set the <code>XMLConsumer</code> that will receive XML data.
* <br>
* This method will simply call <code>setContentHandler(consumer)</code>
* and <code>setLexicalHandler(consumer)</code>.
*/
public void setConsumer(XMLConsumer consumer) {
setContentHandler(consumer);
setLexicalHandler(consumer);
}
/**
* Set the <code>ContentHandler</code> that will receive XML data.
* <br>
* Subclasses may retrieve this <code>ContentHandler</code> instance
* accessing the protected <code>super.contentHandler</code> field.
*/
public void setContentHandler(ContentHandler handler) {
this.contentHandler = handler;
}
/**
* Set the <code>LexicalHandler</code> that will receive XML data.
* <br>
* Subclasses may retrieve this <code>LexicalHandler</code> instance
* accessing the protected <code>super.lexicalHandler</code> field.
*/
public void setLexicalHandler(LexicalHandler handler) {
this.lexicalHandler = handler;
}
public void recycle() {
this.contentHandler = EMPTY_CONTENT_HANDLER;
this.lexicalHandler = DefaultLexicalHandler.NULL_HANDLER;
this.list.clear();
this.input = null;
}
public void deserialize(Object saxFragment) throws SAXException {
if (!(saxFragment instanceof byte[])) {
throw new SAXException("XMLDeserializer needs byte array for deserialization.");
}
this.list.clear();
this.input = (byte[])saxFragment;
this.currentPos = 0;
this.checkProlog();
this.parse();
}
private void parse() throws SAXException {
while ( currentPos < input.length) {
switch (this.readEvent()) {
case START_DOCUMENT:
contentHandler.startDocument();
break;
case END_DOCUMENT:
contentHandler.endDocument();
break;
case START_PREFIX_MAPPING:
contentHandler.startPrefixMapping(this.readString(), this.readString());
break;
case END_PREFIX_MAPPING:
contentHandler.endPrefixMapping(this.readString());
break;
case START_ELEMENT:
int attributes = this.readAttributes();
AttributesImpl atts = new AttributesImpl();
for (int i = 0; i < attributes; i++) {
atts.addAttribute(this.readString(), this.readString(), this.readString(), this.readString(), this.readString());
}
contentHandler.startElement(this.readString(), this.readString(), this.readString(), atts);
break;
case END_ELEMENT:
contentHandler.endElement(this.readString(), this.readString(), this.readString());
break;
case CHARACTERS:
char[] chars = this.readChars();
int len = chars.length;
while (len > 0 && chars[len-1]==0) len--;
if (len > 0) contentHandler.characters(chars, 0, len);
break;
case IGNORABLE_WHITESPACE:
char[] spaces = this.readChars();
len = spaces.length;
while (len > 0 && spaces[len-1]==0) len--;
if (len > 0) contentHandler.characters(spaces, 0, len);
break;
case PROCESSING_INSTRUCTION:
contentHandler.processingInstruction(this.readString(), this.readString());
break;
case COMMENT:
chars = this.readChars();
len = chars.length;
while (len > 0 && chars[len-1]==0) len--;
if (len > 0) lexicalHandler.comment(chars, 0, len);
break;
case LOCATOR:
{
String publicId = this.readString();
String systemId = this.readString();
int lineNumber = this.read();
int columnNumber = this.read();
org.xml.sax.helpers.LocatorImpl locator = new org.xml.sax.helpers.LocatorImpl();
locator.setPublicId(publicId);
locator.setSystemId(systemId);
locator.setLineNumber(lineNumber);
locator.setColumnNumber(columnNumber);
contentHandler.setDocumentLocator(locator);
}
break;
case START_DTD:
lexicalHandler.startDTD(this.readString(),
this.readString(),
this.readString());
break;
case END_DTD:
lexicalHandler.endDTD();
break;
case START_CDATA:
lexicalHandler.startCDATA();
break;
case END_CDATA:
lexicalHandler.endCDATA();
break;
case SKIPPED_ENTITY:
contentHandler.skippedEntity( this.readString() );
break;
case START_ENTITY:
lexicalHandler.startEntity( this.readString() );
break;
case END_ENTITY:
lexicalHandler.endEntity( this.readString() );
break;
default:
throw new SAXException ("parsing error: event not supported.");
}
}
}
private void checkProlog() throws SAXException {
int valid = 0;
if (this.read() == 'C') valid++;
if (this.read() == 'X') valid++;
if (this.read() == 'M') valid++;
if (this.read() == 'L') valid++;
if (this.read() == 1) valid++;
if (this.read() == 0) valid++;
if (valid != 6) throw new SAXException("Unrecognized file format.");
}
protected int readEvent() throws SAXException {
return this.read();
}
private int readAttributes() throws SAXException {
int ch1 = this.read();
int ch2 = this.read();
return ((ch1 << 8) + (ch2 << 0));
}
private String readString() throws SAXException {
int length = this.readWord();
int index = length & 0x00007FFF;
if (length >= 0x00008000) {
return (String) list.get(index);
}
else {
if (length == 0x00007FFF) {
length = this.readLong();
}
char[] chars = this.readChars(length);
int len = chars.length;
if (len > 0) {
while (chars[len-1]==0) len--;
}
String str;
if (len == 0) {
str = "";
} else {
str = new String(chars, 0, len);
}
list.add(str);
return str;
}
}
/**
* The returned char array might contain any number of zero bytes
* at the end
*/
private char[] readChars() throws SAXException {
int length = this.readWord();
if (length == 0x00007FFF) {
length = this.readLong();
}
return this.readChars(length);
}
private int read() throws SAXException {
if (currentPos >= input.length)
throw new SAXException("Reached end of input.");
return input[currentPos++] & 0xff;
}
/**
* The returned char array might contain any number of zero bytes
* at the end
*/
private char[] readChars(int len) throws SAXException {
char[] str = new char[len];
byte[] bytearr = new byte[len];
int c, char2, char3;
int count = 0;
int i = 0;
this.readBytes(bytearr);
while (count < len) {
c = bytearr[count] & 0xff;
switch (c >> 4) {
case 0: case 1: case 2: case 3: case 4: case 5: case 6: case 7:
// 0xxxxxxx
count++;
str[i++] = (char) c;
break;
case 12: case 13:
// 110x xxxx 10xx xxxx
count += 2;
char2 = bytearr[count-1];
str[i++] = (char) (((c & 0x1F) << 6) | (char2 & 0x3F));
break;
case 14:
// 1110 xxxx 10xx xxxx 10xx xxxx
count += 3;
char2 = bytearr[count-2];
char3 = bytearr[count-1];
str[i++] = ((char)(((c & 0x0F) << 12) | ((char2 & 0x3F) << 6) | ((char3 & 0x3F) << 0)));
break;
default:
// 10xx xxxx, 1111 xxxx
throw new SAXException("UTFDataFormatException");
}
}
return str;
}
private void readBytes(byte[] b) throws SAXException {
if (this.currentPos + b.length > this.input.length) {
// TC:
// >= prevents getting the last byte
// 0 1 2 3 4 input.length = 5
// |_ currentPos = 2
// b.length = 3
// 2 + 3 > 5 ok
// 2 + 3 >= 5 wrong
// why has this worked before?
throw new SAXException("End of input reached.");
}
System.arraycopy(this.input, this.currentPos, b, 0, b.length);
this.currentPos += b.length;
}
private int readWord() throws SAXException {
int ch1 = this.read();
int ch2 = this.read();
return ((ch1 << 8) + (ch2 << 0));
}
private int readLong() throws SAXException {
int ch1 = this.read();
int ch2 = this.read();
int ch3 = this.read();
int ch4 = this.read();
return ((ch1 << 24) + (ch2 << 16) + (ch3 << 8) + (ch4 << 0));
}
}