blob: 1b7917242020db6e88c62401559849b04c67584f [file] [log] [blame]
// Copyright 2004 The Apache Software Foundation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package org.apache.tapestry.util.xml;
import java.io.IOException;
import java.io.InputStream;
import java.net.URL;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.tapestry.ApplicationRuntimeException;
import org.apache.tapestry.ILocation;
import org.apache.tapestry.IResourceLocation;
import org.apache.tapestry.Location;
import org.apache.tapestry.Tapestry;
import org.apache.tapestry.util.RegexpMatcher;
import org.xml.sax.Attributes;
import org.xml.sax.InputSource;
import org.xml.sax.Locator;
import org.xml.sax.SAXException;
import org.xml.sax.SAXParseException;
import org.xml.sax.helpers.DefaultHandler;
/**
* A simplified version of {@link org.apache.commons.digester.Digester}.
* This version is without as many bells and whistles but has some key features needed when parsing
* a document (rather than a configuration file):
* <br>
* <ul>
* <li>Notifications for each bit of text</ul>
* <li>Tracking of exact location within the document.</li>
* </ul>
*
* <p>
* Like Digester, there's an object stack and a rule stack. The rules are much
* simpler (more coding), in that there's a one-to-one relationship between
* an element and a rule.
*
* <p>
* Based on SAX2.
*
* @author Howard Lewis Ship
* @version $Id$
* @since 3.0
*/
public class RuleDirectedParser extends DefaultHandler
{
private static final Log LOG = LogFactory.getLog(RuleDirectedParser.class);
private IResourceLocation _documentLocation;
private List _ruleStack = new ArrayList();
private List _objectStack = new ArrayList();
private Object _documentObject;
private Locator _locator;
private int _line = -1;
private int _column = -1;
private ILocation _location;
private static SAXParserFactory _parserFactory;
private SAXParser _parser;
private RegexpMatcher _matcher;
private String _uri;
private String _localName;
private String _qName;
/**
* Map of {@link IRule} keyed on the local name
* of the element.
*/
private Map _ruleMap = new HashMap();
/**
* Used to accumlate content provided by
* {@link org.xml.sax.ContentHandler#characters(char[], int, int)}.
*/
private StringBuffer _contentBuffer = new StringBuffer();
/**
* Map of paths to external entities (such as the DTD) keyed on public id.
*/
private Map _entities = new HashMap();
public Object parse(IResourceLocation documentLocation)
{
if (LOG.isDebugEnabled())
LOG.debug("Parsing: " + documentLocation);
try
{
_documentLocation = documentLocation;
URL url = documentLocation.getResourceURL();
if (url == null)
throw new DocumentParseException(
Tapestry.format("RuleDrivenParser.resource-missing", documentLocation),
documentLocation,
null,
null);
return parse(url);
}
finally
{
_documentLocation = null;
_ruleStack.clear();
_objectStack.clear();
_documentObject = null;
_uri = null;
_localName = null;
_qName = null;
_line = -1;
_column = -1;
_location = null;
_locator = null;
_contentBuffer.setLength(0);
}
}
protected Object parse(URL url)
{
if (_parser == null)
_parser = constructParser();
InputStream stream = null;
try
{
stream = url.openStream();
}
catch (IOException ex)
{
throw new DocumentParseException(
Tapestry.format("RuleDrivenParser.unable-to-open-resource", url),
_documentLocation,
null,
ex);
}
InputSource source = new InputSource(stream);
try
{
_parser.parse(source, this);
stream.close();
}
catch (Exception ex)
{
throw new DocumentParseException(
Tapestry.format("RuleDrivenParser.parse-error", url, ex.getMessage()),
_documentLocation,
getLocation(),
ex);
}
if (LOG.isDebugEnabled())
LOG.debug("Document parsed as: " + _documentObject);
return _documentObject;
}
/**
* Returns an {@link ILocation} representing the
* current position within the document (depending
* on the parser, this may be accurate to
* column number level).
*/
public ILocation getLocation()
{
if (_locator == null)
return null;
int line = _locator.getLineNumber();
int column = _locator.getColumnNumber();
if (_line != line || _column != column)
{
_location = null;
_line = line;
_column = column;
}
if (_location == null)
_location = new Location(_documentLocation, _line, _column);
return _location;
}
/**
* Pushes an object onto the object stack. The first object
* pushed is the "document object", the root object returned
* by the parse.
*/
public void push(Object object)
{
if (_documentObject == null)
_documentObject = object;
push(_objectStack, object, "object stack");
}
/**
* Returns the top object on the object stack.
*/
public Object peek()
{
return peek(_objectStack, 0);
}
/**
* Returns an object within the object stack, at depth.
* Depth 0 is the top object, depth 1 is the next-to-top object,
* etc.
*/
public Object peek(int depth)
{
return peek(_objectStack, depth);
}
/**
* Removes and returns the top object on the object stack.
*/
public Object pop()
{
return pop(_objectStack, "object stack");
}
private Object pop(List list, String name)
{
Object result = list.remove(list.size() - 1);
if (LOG.isDebugEnabled())
LOG.debug("Popped " + result + " off " + name + " (at " + getLocation() + ")");
return result;
}
private Object peek(List list, int depth)
{
return list.get(list.size() - 1 - depth);
}
private void push(List list, Object object, String name)
{
if (LOG.isDebugEnabled())
LOG.debug("Pushing " + object + " onto " + name + " (at " + getLocation() + ")");
list.add(object);
}
/**
* Pushes a new rule onto the rule stack.
*/
protected void pushRule(IRule rule)
{
push(_ruleStack, rule, "rule stack");
}
/**
* Returns the top rule on the stack.
*/
protected IRule peekRule()
{
return (IRule) peek(_ruleStack, 0);
}
protected IRule popRule()
{
return (IRule) pop(_ruleStack, "rule stack");
}
public void addRule(String localElementName, IRule rule)
{
_ruleMap.put(localElementName, rule);
}
/**
* Registers
* a public id and corresponding input source. Generally, the source
* is a wrapper around an input stream to a package resource.
*
* @param publicId the public identifier to be registerred, generally
* the publicId of a DTD related to the document being parsed
* @param entityPath the resource path of the entity, typically a DTD
* file. Relative files names are expected to be stored in the same package
* as the class file, otherwise a leading slash is an absolute pathname
* within the classpath.
*
**/
public void registerEntity(String publicId, String entityPath)
{
if (LOG.isDebugEnabled())
LOG.debug("Registering " + publicId + " as " + entityPath);
if (_entities == null)
_entities = new HashMap();
_entities.put(publicId, entityPath);
}
protected IRule selectRule(String localName, Attributes attributes)
{
IRule rule = (IRule) _ruleMap.get(localName);
if (rule == null)
throw new DocumentParseException(
Tapestry.format("RuleDrivenParser.no-rule-for-element", localName),
_documentLocation,
getLocation(),
null);
return rule;
}
/**
* Uses the {@link Locator} to track the position
* in the document as a {@link ILocation}. This is invoked
* once (before the initial element is parsed) and
* the Locator is retained and queried as to
* the current file location.
*
* @see #getLocation()
*/
public void setDocumentLocator(Locator locator)
{
_locator = locator;
}
/**
* Accumulates the content in a buffer; the concatinated content
* is provided to the top rule just before any start or end tag.
*/
public void characters(char[] ch, int start, int length) throws SAXException
{
_contentBuffer.append(ch, start, length);
}
/**
* Pops the top rule off the stack and
* invokes {@link IRule#endElement(RuleDirectedParser)}.
*/
public void endElement(String uri, String localName, String qName) throws SAXException
{
fireContentRule();
_uri = uri;
_localName = localName;
_qName = qName;
popRule().endElement(this);
}
/**
* Ignorable content is ignored.
*/
public void ignorableWhitespace(char[] ch, int start, int length) throws SAXException
{
}
/**
* Invokes {@link #selectRule(String, Attributes)} to choose a new rule,
* which is pushed onto the rule stack, then invokes
* {@link IRule#startElement(RuleDirectedParser, Attributes)}.
*/
public void startElement(String uri, String localName, String qName, Attributes attributes)
throws SAXException
{
fireContentRule();
_uri = uri;
_localName = localName;
_qName = qName;
String name = extractName(uri, localName, qName);
IRule newRule = selectRule(name, attributes);
pushRule(newRule);
newRule.startElement(this, attributes);
}
private String extractName(String uri, String localName, String qName)
{
return Tapestry.isBlank(localName) ? qName : localName;
}
/**
* Uses {@link javax.xml.parsers.SAXParserFactory} to create a instance
* of a validation SAX2 parser.
*/
protected synchronized SAXParser constructParser()
{
if (_parserFactory == null)
{
_parserFactory = SAXParserFactory.newInstance();
configureParserFactory(_parserFactory);
}
try
{
return _parserFactory.newSAXParser();
}
catch (SAXException ex)
{
throw new ApplicationRuntimeException(ex);
}
catch (ParserConfigurationException ex)
{
throw new ApplicationRuntimeException(ex);
}
}
/**
* Configures a {@link SAXParserFactory} before
* {@link SAXParserFactory#newSAXParser()} is invoked.
* The default implementation sets validating to true
* and namespaceAware to false,
*/
protected void configureParserFactory(SAXParserFactory factory)
{
factory.setValidating(true);
factory.setNamespaceAware(false);
}
/**
* Throws the exception.
*/
public void error(SAXParseException ex) throws SAXException
{
fatalError(ex);
}
/**
* Throws the exception.
*/
public void fatalError(SAXParseException ex) throws SAXException
{
// Sometimes, a bad parse "corrupts" a parser so that it doesn't
// work properly for future parses (of valid documents),
// so discard it here.
_parser = null;
throw ex;
}
/**
* Throws the exception.
*/
public void warning(SAXParseException ex) throws SAXException
{
fatalError(ex);
}
public InputSource resolveEntity(String publicId, String systemId) throws SAXException
{
String entityPath = null;
if (LOG.isDebugEnabled())
LOG.debug(
"Attempting to resolve entity; publicId = " + publicId + " systemId = " + systemId);
if (_entities != null)
entityPath = (String) _entities.get(publicId);
if (entityPath == null)
{
if (LOG.isDebugEnabled())
LOG.debug("Entity not found, using " + systemId);
return null;
}
InputStream stream = getClass().getResourceAsStream(entityPath);
InputSource result = new InputSource(stream);
if (result != null && LOG.isDebugEnabled())
LOG.debug("Resolved " + publicId + " as " + result + " (for " + entityPath + ")");
return result;
}
/**
* Validates that the input value matches against the specified
* Perl5 pattern. If valid, the method simply returns.
* If not a match, then an error message is generated (using the
* errorKey and the input value) and a
* {@link InvalidStringException} is thrown.
*
**/
public void validate(String value, String pattern, String errorKey)
throws DocumentParseException
{
if (_matcher == null)
_matcher = new RegexpMatcher();
if (_matcher.matches(pattern, value))
return;
throw new InvalidStringException(Tapestry.format(errorKey, value), value, getLocation());
}
public IResourceLocation getDocumentLocation()
{
return _documentLocation;
}
/**
* Returns the localName for the current element.
* @see org.xml.sax.ContentHandler#startElement(java.lang.String, java.lang.String, java.lang.String, org.xml.sax.Attributes)
*/
public String getLocalName()
{
return _localName;
}
/**
* Returns the qualified name for the current element.
* @see org.xml.sax.ContentHandler#startElement(java.lang.String, java.lang.String, java.lang.String, org.xml.sax.Attributes)
*/
public String getQName()
{
return _qName;
}
/**
* Returns the URI for the current element.
* @see org.xml.sax.ContentHandler#startElement(java.lang.String, java.lang.String, java.lang.String, org.xml.sax.Attributes)
*/
public String getUri()
{
return _uri;
}
private void fireContentRule()
{
String content = _contentBuffer.toString();
_contentBuffer.setLength(0);
if (!_ruleStack.isEmpty())
peekRule().content(this, content);
}
}