blob: 31798666ea105d1e37097bd5c2902ab692c08f36 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.cocoon.transformation;
import java.io.IOException;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import org.apache.avalon.framework.parameters.Parameters;
import org.apache.cocoon.ProcessingException;
import org.apache.cocoon.environment.SourceResolver;
import org.apache.cocoon.xml.dom.DOMBuilder;
import org.w3c.dom.Document;
import org.xml.sax.Attributes;
import org.xml.sax.SAXException;
/**
* This transformer sieves an incoming stream of xml
* and feeds a DOMBuilder with it.
*
* @author <a href="mailto:paul@luminas.co.uk">Paul Russell</a>
* @author <a href="mailto:haul@apache.org">Christian Haul</a>
* @version CVS $Id$
*/
abstract public class AbstractExtractionTransformer extends AbstractTransformer {
protected DOMBuilder currentBuilder;
private Map prefixMap;
protected int extractLevel;
/** Setup the transformer. */
public void setup(SourceResolver resolver, Map objectModel, String src, Parameters parameters)
throws ProcessingException, SAXException, IOException {
extractLevel = 0;
prefixMap = new HashMap();
}
public void recycle() {
this.extractLevel = 0;
this.currentBuilder = null;
this.prefixMap = null;
super.recycle();
}
/**
* Begin the scope of a prefix-URI Namespace mapping.
*
* @param prefix The Namespace prefix being declared.
* @param uri The Namespace URI the prefix is mapped to.
*/
public void startPrefixMapping(String prefix, String uri)
throws SAXException {
if (extractLevel == 0) {
super.startPrefixMapping(prefix,uri);
prefixMap.put(prefix,uri);
} else {
this.currentBuilder.startPrefixMapping(prefix,uri);
}
}
/**
* End the scope of a prefix-URI mapping.
*
* @param prefix The prefix that was being mapping.
*/
public void endPrefixMapping(String prefix)
throws SAXException {
if (extractLevel == 0) {
super.endPrefixMapping(prefix);
prefixMap.remove(prefix);
} else {
this.currentBuilder.endPrefixMapping(prefix);
}
}
/**
* Receive notification of the beginning of an element. Uses
* startExtraction to determine whether to start
* extracting. Nested triggering tags result in only one document.
* * startExtractedDocument with the first node of the extracted
* Document.
*
* @param uri The Namespace URI, or the empty string if the element has no
* Namespace URI or if Namespace
* processing is not being performed.
* @param loc The local name (without prefix), or the empty string if
* Namespace processing is not being performed.
* @param raw The raw XML 1.0 name (with prefix), or the empty string if
* raw names are not available.
* @param a The attributes attached to the element. If there are no
* attributes, it shall be an empty Attributes object.
*/
public void startElement(String uri, String loc, String raw, Attributes a) throws SAXException {
if (!startExtracting(uri, loc, raw, a)) {
if (extractLevel == 0) {
super.startElement(uri,loc,raw,a);
} else {
this.currentBuilder.startElement(uri,loc,raw,a);
}
} else {
extractLevel++;
if (this.getLogger().isDebugEnabled()) {
getLogger().debug("extractLevel now " + extractLevel + ".");
}
if (extractLevel != 1) {
this.currentBuilder.startElement(uri,loc,raw,a);
} else {
// setup new document
this.currentBuilder = new DOMBuilder();
this.currentBuilder.startDocument();
// setup namespaces
Iterator itt = prefixMap.entrySet().iterator();
while (itt.hasNext()) {
Map.Entry entry = (Map.Entry)itt.next();
this.currentBuilder.startPrefixMapping(
(String)entry.getKey(),
(String)entry.getValue()
);
}
// start root node
startExtractingDocument(uri, loc, raw, a);
}
}
}
/**
* Receive notification of the end of an element. Uses
* endExtraction to determine whether to stop extracting or
* not. Calls endExtractedDocument with the extracted document.
*
* @param uri The Namespace URI, or the empty string if the element has no
* Namespace URI or if Namespace
* processing is not being performed.
* @param loc The local name (without prefix), or the empty string if
* Namespace processing is not being performed.
* @param raw The raw XML 1.0 name (with prefix), or the empty string if
* raw names are not available.
*/
public void endElement(String uri, String loc, String raw)
throws SAXException {
if (extractLevel == 0) {
super.endElement(uri,loc,raw);
} else {
if (endExtracting(uri, loc, raw)) {
extractLevel--;
if (this.getLogger().isDebugEnabled()) {
getLogger().debug("extractLevel now " + extractLevel + ".");
}
if (extractLevel != 0) {
this.currentBuilder.endElement(uri,loc,raw);
} else {
// end root element
endExtractingDocument(uri, loc, raw);
// finish building the document. remove existing prefix mappings.
Iterator itt = prefixMap.entrySet().iterator();
while (itt.hasNext()) {
Map.Entry entry = (Map.Entry) itt.next();
this.currentBuilder.endPrefixMapping(
(String)entry.getKey()
);
}
this.currentBuilder.endDocument();
handleExtractedDocument(this.currentBuilder.getDocument());
if (this.getLogger().isDebugEnabled()) {
getLogger().debug("Stored document.");
}
}
} else {
this.currentBuilder.endElement(uri, loc, raw);
}
}
}
/**
* Receive notification of character data.
*
* @param c The characters from the XML document.
* @param start The start position in the array.
* @param len The number of characters to read from the array.
*/
public void characters(char c[], int start, int len)
throws SAXException {
if (extractLevel == 0) {
super.characters(c,start,len);
} else {
this.currentBuilder.characters(c,start,len);
}
}
/**
* Receive notification of ignorable whitespace in element content.
*
* @param c The characters from the XML document.
* @param start The start position in the array.
* @param len The number of characters to read from the array.
*/
public void ignorableWhitespace(char c[], int start, int len)
throws SAXException {
if (extractLevel == 0) {
super.ignorableWhitespace(c,start,len);
} else {
this.currentBuilder.ignorableWhitespace(c,start,len);
}
}
/**
* Receive notification of a processing instruction.
*
* @param target The processing instruction target.
* @param data The processing instruction data, or null if none was
* supplied.
*/
public void processingInstruction(String target, String data)
throws SAXException {
if (extractLevel == 0) {
super.processingInstruction(target,data);
} else {
this.currentBuilder.processingInstruction(target,data);
}
}
/**
* Receive notification of a skipped entity.
*
* @param name The name of the skipped entity. If it is a parameter
* entity, the name will begin with '%'.
*/
public void skippedEntity(String name)
throws SAXException {
if (extractLevel == 0) {
super.skippedEntity(name);
} else {
this.currentBuilder.skippedEntity(name);
}
}
/**
* Report the start of DTD declarations, if any.
*
* @param name The document type name.
* @param publicId The declared public identifier for the external DTD
* subset, or null if none was declared.
* @param systemId The declared system identifier for the external DTD
* subset, or null if none was declared.
*/
public void startDTD(String name, String publicId, String systemId)
throws SAXException {
if (extractLevel == 0) {
super.startDTD(name,publicId,systemId);
} else {
throw new SAXException(
"Recieved startDTD after beginning fragment extraction process."
);
}
}
/**
* Report the end of DTD declarations.
*/
public void endDTD()
throws SAXException {
if (extractLevel == 0) {
super.endDTD();
} else {
throw new SAXException(
"Recieved endDTD after beginning fragment extraction process."
);
}
}
/**
* Report the beginning of an entity.
*
* @param name The name of the entity. If it is a parameter entity, the
* name will begin with '%'.
*/
public void startEntity(String name)
throws SAXException {
if (extractLevel == 0) {
super.startEntity(name);
} else {
this.currentBuilder.startEntity(name);
}
}
/**
* Report the end of an entity.
*
* @param name The name of the entity that is ending.
*/
public void endEntity(String name)
throws SAXException {
if (extractLevel == 0) {
super.endEntity(name);
} else {
this.currentBuilder.endEntity(name);
}
}
/**
* Report the start of a CDATA section.
*/
public void startCDATA()
throws SAXException {
if (extractLevel == 0) {
super.startCDATA();
} else {
this.currentBuilder.startCDATA();
}
}
/**
* Report the end of a CDATA section.
*/
public void endCDATA()
throws SAXException {
if (extractLevel == 0) {
super.endCDATA();
} else {
this.currentBuilder.endCDATA();
}
}
/**
* Report an XML comment anywhere in the document.
*
* @param ch An array holding the characters in the comment.
* @param start The starting position in the array.
* @param len The number of characters to use from the array.
*/
public void comment(char ch[], int start, int len)
throws SAXException {
if (extractLevel == 0) {
super.comment(ch,start,len);
} else {
this.currentBuilder.comment(ch,start,len);
}
}
/**
* Receive notification of the beginning of an element and signal extraction start.
*
* @param uri The Namespace URI, or the empty string if the element has no
* Namespace URI or if Namespace
* processing is not being performed.
* @param loc The local name (without prefix), or the empty string if
* Namespace processing is not being performed.
* @param raw The raw XML 1.0 name (with prefix), or the empty string if
* raw names are not available.
* @param a The attributes attached to the element. If there are no
* attributes, it shall be an empty Attributes object.
* @return a <code>boolean</code> value to signal to start extracting
*/
abstract boolean startExtracting(String uri, String loc, String raw, Attributes a);
/**
* Receive notification of the beginning of the extracted Document. Per default send
* startElement message to document builder. Override if necessary. Must override
* {@link #endExtractingDocument(String, String, String)} as well.
*
* @param uri The Namespace URI, or the empty string if the element has no
* Namespace URI or if Namespace
* processing is not being performed.
* @param loc The local name (without prefix), or the empty string if
* Namespace processing is not being performed.
* @param raw The raw XML 1.0 name (with prefix), or the empty string if
* raw names are not available.
* @param a The attributes attached to the element. If there are no
* attributes, it shall be an empty Attributes object.
*/
public void startExtractingDocument(String uri, String loc, String raw, Attributes a) throws SAXException{
this.currentBuilder.startElement(uri,loc,raw,a);
}
/**
* Receive notification of the end of an element and signal extraction end.
*
* @param uri The Namespace URI, or the empty string if the element has no
* Namespace URI or if Namespace
* processing is not being performed.
* @param loc The local name (without prefix), or the empty string if
* Namespace processing is not being performed.
* @param raw The raw XML 1.0 name (with prefix), or the empty string if
* @return a <code>boolean</code> value to signal to stop extracting
*/
abstract boolean endExtracting(String uri, String loc, String raw);
/**
* Receive notification of the end of the extracted Document. Per default,
* send endElement message to document builder. Override if necessary.
* Must override
* {@link #startExtractingDocument(String, String, String, Attributes)}
* as well.
*
* @param uri The Namespace URI, or the empty string if the element has no
* Namespace URI or if Namespace
* processing is not being performed.
* @param loc The local name (without prefix), or the empty string if
* Namespace processing is not being performed.
* @param raw The raw XML 1.0 name (with prefix), or the empty string if
* raw names are not available.
*/
public void endExtractingDocument(String uri, String loc, String raw) throws SAXException{
this.currentBuilder.endElement(uri,loc,raw);
}
/**
* Receive notification of the end of the extracted Document.
*
* @param doc a <code>Document</code> value
*/
abstract void handleExtractedDocument(Document doc);
}