blob: 7e9be75d6122514fef8f60792d1568e0ce334141 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.cocoon.components.sax;
import java.util.ArrayList;
import org.apache.cocoon.xml.DefaultLexicalHandler;
import org.apache.cocoon.xml.XMLConsumer;
import org.apache.cocoon.xml.XMLProducer;
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;
import org.xml.sax.ext.LexicalHandler;
import org.xml.sax.helpers.AttributesImpl;
import org.xml.sax.helpers.DefaultHandler;
/**
* This a simple xml compiler which takes a byte array as input.
* If you want to reuse this interpreter make sure to call first {@link #recycle()}
* and then set the new consumer for the sax events.
*
* @version $Id: AbstractXMLByteStreamInterpreter.java 587751 2007-10-24 02:41:36Z vgritsenko $
*/
public abstract class AbstractXMLByteStreamInterpreter implements XMLProducer, XMLByteStreamConstants {
private ArrayList list = new ArrayList();
protected static final ContentHandler EMPTY_CONTENT_HANDLER = new DefaultHandler();
/** The <code>ContentHandler</code> receiving SAX events. */
protected ContentHandler contentHandler = EMPTY_CONTENT_HANDLER;
/** The <code>LexicalHandler</code> receiving SAX events. */
protected LexicalHandler lexicalHandler = DefaultLexicalHandler.NULL_HANDLER;
/**
* Set the <code>XMLConsumer</code> that will receive XML data.
* <br>
* This method will simply call <code>setContentHandler(consumer)</code>
* and <code>setLexicalHandler(consumer)</code>.
*/
public void setConsumer(XMLConsumer consumer) {
setContentHandler(consumer);
setLexicalHandler(consumer);
}
/**
* Set the <code>ContentHandler</code> that will receive XML data.
* <br>
* Subclasses may retrieve this <code>ContentHandler</code> instance
* accessing the protected <code>super.contentHandler</code> field.
*/
public void setContentHandler(ContentHandler handler) {
this.contentHandler = handler;
}
/**
* Set the <code>LexicalHandler</code> that will receive XML data.
* <br>
* Subclasses may retrieve this <code>LexicalHandler</code> instance
* accessing the protected <code>super.lexicalHandler</code> field.
*/
public void setLexicalHandler(LexicalHandler handler) {
this.lexicalHandler = handler;
}
public void recycle() {
this.contentHandler = EMPTY_CONTENT_HANDLER;
this.lexicalHandler = DefaultLexicalHandler.NULL_HANDLER;
this.list.clear();
}
/**
* This method needs to be used by sub classes to start the parsing of the byte stream
*
* @throws SAXException
*/
protected void parse() throws SAXException {
this.list.clear();
this.checkProlog();
int event = -1;
int lastEvent = -1;
while ( ( event = readEvent() ) != -1 ) {
lastEvent = event;
switch (event) {
case START_DOCUMENT:
contentHandler.startDocument();
break;
case END_DOCUMENT:
contentHandler.endDocument();
break;
case START_PREFIX_MAPPING:
contentHandler.startPrefixMapping(this.readString(), this.readString());
break;
case END_PREFIX_MAPPING:
contentHandler.endPrefixMapping(this.readString());
break;
case START_ELEMENT:
int attributes = this.readAttributes();
AttributesImpl atts = new AttributesImpl();
for (int i = 0; i < attributes; i++) {
atts.addAttribute(this.readString(), this.readString(), this.readString(), this.readString(), this.readString());
}
contentHandler.startElement(this.readString(), this.readString(), this.readString(), atts);
break;
case END_ELEMENT:
contentHandler.endElement(this.readString(), this.readString(), this.readString());
break;
case CHARACTERS:
char[] chars = this.readChars();
int len = chars.length;
while (len > 0 && chars[len-1]==0) len--;
if (len > 0) contentHandler.characters(chars, 0, len);
break;
case IGNORABLE_WHITESPACE:
char[] spaces = this.readChars();
len = spaces.length;
while (len > 0 && spaces[len-1]==0) len--;
if (len > 0) contentHandler.characters(spaces, 0, len);
break;
case PROCESSING_INSTRUCTION:
contentHandler.processingInstruction(this.readString(), this.readString());
break;
case COMMENT:
chars = this.readChars();
len = chars.length;
while (len > 0 && chars[len-1]==0) len--;
if (len > 0) lexicalHandler.comment(chars, 0, len);
break;
case LOCATOR:
{
String publicId = this.readString();
String systemId = this.readString();
int lineNumber = this.read();
int columnNumber = this.read();
org.xml.sax.helpers.LocatorImpl locator = new org.xml.sax.helpers.LocatorImpl();
locator.setPublicId(publicId);
locator.setSystemId(systemId);
locator.setLineNumber(lineNumber);
locator.setColumnNumber(columnNumber);
contentHandler.setDocumentLocator(locator);
}
break;
case START_DTD:
lexicalHandler.startDTD(this.readString(),
this.readString(),
this.readString());
break;
case END_DTD:
lexicalHandler.endDTD();
break;
case START_CDATA:
lexicalHandler.startCDATA();
break;
case END_CDATA:
lexicalHandler.endCDATA();
break;
case SKIPPED_ENTITY:
contentHandler.skippedEntity( this.readString() );
break;
case START_ENTITY:
lexicalHandler.startEntity( this.readString() );
break;
case END_ENTITY:
lexicalHandler.endEntity( this.readString() );
break;
default:
throw new SAXException ("parsing error: event not supported: " + event);
}
}
if( lastEvent != END_DOCUMENT )
{
throw new SAXException ("parsing error: premature end of stream (lastEvent was " + lastEvent + ")." );
}
}
protected int readEvent() throws SAXException {
return this.read();
}
private int readAttributes() throws SAXException {
int ch1 = this.read();
int ch2 = this.read();
return ((ch1 << 8) + (ch2 << 0));
}
private String readString() throws SAXException {
int length = this.readWord();
int index;
if (length >= 0x00008000) {
// index value in 16-bits format
index = length & 0x00007FFF;
return (String) list.get(index);
} else {
if (length == 0x00007FFF) {
length = this.readLong();
if (length >= 0x80000000) {
// index value in 32-bits format
index = length & 0x7fffffff;
return (String) list.get(index);
}
}
char[] chars = this.readChars(length);
int len = chars.length;
if (len > 0) {
while (chars[len-1]==0) len--;
}
String str;
if (len == 0) {
str = "";
} else {
str = new String(chars, 0, len);
}
list.add(str);
return str;
}
}
/**
* The returned char array might contain any number of zero bytes
* at the end
*/
private char[] readChars() throws SAXException {
int length = this.readWord();
if (length == 0x00007FFF) {
length = this.readLong();
}
return this.readChars(length);
}
protected abstract int read() throws SAXException;
protected abstract int read(byte[] b) throws SAXException;
/**
* The returned char array might contain any number of zero bytes
* at the end
*/
private char[] readChars(int len) throws SAXException {
char[] str = new char[len];
byte[] bytearr = new byte[len];
int c, char2, char3;
int count = 0;
int i = 0;
this.readBytes(bytearr);
while (count < len) {
c = bytearr[count] & 0xff;
switch (c >> 4) {
case 0: case 1: case 2: case 3: case 4: case 5: case 6: case 7:
// 0xxxxxxx
count++;
str[i++] = (char) c;
break;
case 12: case 13:
// 110x xxxx 10xx xxxx
count += 2;
char2 = bytearr[count-1];
str[i++] = (char) (((c & 0x1F) << 6) | (char2 & 0x3F));
break;
case 14:
// 1110 xxxx 10xx xxxx 10xx xxxx
count += 3;
char2 = bytearr[count-2];
char3 = bytearr[count-1];
str[i++] = ((char)(((c & 0x0F) << 12) | ((char2 & 0x3F) << 6) | ((char3 & 0x3F) << 0)));
break;
default:
// 10xx xxxx, 1111 xxxx
throw new SAXException("UTFDataFormatException");
}
}
return str;
}
private void readBytes(byte[] b) throws SAXException {
final int bytesRead = this.read( b );
if (bytesRead < b.length ) {
throw new SAXException("End of is reached.");
}
}
private int readWord() throws SAXException {
int ch1 = this.read();
int ch2 = this.read();
return ((ch1 << 8) + (ch2 << 0));
}
private int readLong() throws SAXException {
int ch1 = this.read();
int ch2 = this.read();
int ch3 = this.read();
int ch4 = this.read();
return ((ch1 << 24) + (ch2 << 16) + (ch3 << 8) + (ch4 << 0));
}
private void checkProlog() throws SAXException {
int valid = 0;
int ch = 0;
if ((ch = this.read()) == 'C') valid++;
else throw new SAXException("Unrecognized file format (." + valid + "," + ch + ")");
if ((ch = this.read()) == 'X') valid++;
else throw new SAXException("Unrecognized file format (." + valid + "," + ch + ")");
if ((ch = this.read()) == 'M') valid++;
else throw new SAXException("Unrecognized file format (." + valid + "," + ch + ")");
if ((ch = this.read()) == 'L') valid++;
else throw new SAXException("Unrecognized file format (." + valid + "," + ch + ")");
if ((ch = this.read()) == 1) valid++;
else throw new SAXException("Unrecognized file format (." + valid + "," + ch + ")");
if ((ch = this.read()) == 0) valid++;
else throw new SAXException("Unrecognized file format (." + valid + "," + ch + ")");
}
}