| // Copyright 2004, 2005 The Apache Software Foundation |
| // |
| // Licensed under the Apache License, Version 2.0 (the "License"); |
| // you may not use this file except in compliance with the License. |
| // You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, software |
| // distributed under the License is distributed on an "AS IS" BASIS, |
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| // See the License for the specific language governing permissions and |
| // limitations under the License. |
| |
| package org.apache.tapestry.util.xml; |
| |
| import org.apache.commons.logging.Log; |
| import org.apache.commons.logging.LogFactory; |
| import org.apache.hivemind.ApplicationRuntimeException; |
| import org.apache.hivemind.HiveMind; |
| import org.apache.hivemind.Location; |
| import org.apache.hivemind.Resource; |
| import org.apache.hivemind.impl.LocationImpl; |
| import org.apache.tapestry.Tapestry; |
| import org.apache.tapestry.util.RegexpMatcher; |
| import org.xml.sax.*; |
| import org.xml.sax.helpers.DefaultHandler; |
| |
| import javax.xml.parsers.ParserConfigurationException; |
| import javax.xml.parsers.SAXParser; |
| import javax.xml.parsers.SAXParserFactory; |
| import java.io.IOException; |
| import java.io.InputStream; |
| import java.net.URL; |
| import java.util.ArrayList; |
| import java.util.HashMap; |
| import java.util.List; |
| import java.util.Map; |
| |
| /** |
| * A simplified version of org.apache.commons.digester.Digester. This version is without as |
| * many bells and whistles but has some key features needed when parsing a document (rather than a |
| * configuration file): <br> |
| * <ul> |
| * <li>Notifications for each bit of text</li> |
| * <li>Tracking of exact location within the document.</li> |
| * </ul> |
| * <p> |
| * Like Digester, there's an object stack and a rule stack. The rules are much simpler (more |
| * coding), in that there's a one-to-one relationship between an element and a rule. |
| * <p> |
| * Based on SAX2. |
| * |
| * @author Howard Lewis Ship |
| * @since 3.0 |
| */ |
| |
| public class RuleDirectedParser extends DefaultHandler |
| { |
| private static final Log LOG = LogFactory.getLog(RuleDirectedParser.class); |
| |
| private static SAXParserFactory _parserFactory; |
| |
| private Resource _documentLocation; |
| |
| private List _ruleStack = new ArrayList(); |
| |
| private List _objectStack = new ArrayList(); |
| |
| private Object _documentObject; |
| |
| private Locator _locator; |
| |
| private int _line = -1; |
| |
| private int _column = -1; |
| |
| private Location _location; |
| |
| private SAXParser _parser; |
| |
| private RegexpMatcher _matcher; |
| |
| private String _uri; |
| |
| private String _localName; |
| |
| private String _qName; |
| |
| /** |
| * Map of {@link IRule}keyed on the local name of the element. |
| */ |
| private Map _ruleMap = new HashMap(); |
| |
| /** |
| * Used to accumlate content provided by |
| * {@link org.xml.sax.ContentHandler#characters(char[], int, int)}. |
| */ |
| |
| private StringBuffer _contentBuffer = new StringBuffer(); |
| |
| /** |
| * Map of paths to external entities (such as the DTD) keyed on public id. |
| */ |
| |
| private Map _entities = new HashMap(); |
| |
| public Object parse(Resource documentLocation) |
| { |
| if (LOG.isDebugEnabled()) |
| LOG.debug("Parsing: " + documentLocation); |
| |
| try |
| { |
| _documentLocation = documentLocation; |
| |
| URL url = documentLocation.getResourceURL(); |
| |
| if (url == null) |
| throw new DocumentParseException(Tapestry.format("RuleDrivenParser.resource-missing", documentLocation), documentLocation); |
| |
| return parse(url); |
| } |
| finally |
| { |
| _documentLocation = null; |
| _ruleStack.clear(); |
| _objectStack.clear(); |
| _documentObject = null; |
| |
| _uri = null; |
| _localName = null; |
| _qName = null; |
| |
| _line = -1; |
| _column = -1; |
| _location = null; |
| _locator = null; |
| |
| _contentBuffer.setLength(0); |
| } |
| } |
| |
| protected Object parse(URL url) |
| { |
| if (_parser == null) |
| _parser = constructParser(); |
| |
| InputStream stream = null; |
| |
| try |
| { |
| stream = url.openStream(); |
| } |
| catch (IOException ex) |
| { |
| throw new DocumentParseException(Tapestry.format( |
| "RuleDrivenParser.unable-to-open-resource", |
| url), _documentLocation, ex); |
| } |
| |
| InputSource source = new InputSource(stream); |
| |
| try |
| { |
| _parser.parse(source, this); |
| |
| stream.close(); |
| } |
| catch (Exception ex) |
| { |
| throw new DocumentParseException(Tapestry.format( |
| "RuleDrivenParser.parse-error", |
| url, |
| ex.getMessage()), getLocation(), ex); |
| } |
| |
| if (LOG.isDebugEnabled()) |
| LOG.debug("Document parsed as: " + _documentObject); |
| |
| return _documentObject; |
| } |
| |
| /** |
| * Returns an {@link Location}representing the current position within the document (depending |
| * on the parser, this may be accurate to column number level). |
| */ |
| |
| public Location getLocation() |
| { |
| if (_locator == null) |
| return null; |
| |
| int line = _locator.getLineNumber(); |
| int column = _locator.getColumnNumber(); |
| |
| if (_line != line || _column != column) |
| { |
| _location = null; |
| _line = line; |
| _column = column; |
| } |
| |
| if (_location == null) |
| _location = new LocationImpl(_documentLocation, _line, _column); |
| |
| return _location; |
| } |
| |
| /** |
| * Pushes an object onto the object stack. The first object pushed is the "document object", the |
| * root object returned by the parse. |
| */ |
| public void push(Object object) |
| { |
| if (_documentObject == null) |
| _documentObject = object; |
| |
| push(_objectStack, object, "object stack"); |
| } |
| |
| /** |
| * Returns the top object on the object stack. |
| */ |
| public Object peek() |
| { |
| return peek(_objectStack, 0); |
| } |
| |
| /** |
| * Returns an object within the object stack, at depth. Depth 0 is the top object, depth 1 is |
| * the next-to-top object, etc. |
| */ |
| |
| public Object peek(int depth) |
| { |
| return peek(_objectStack, depth); |
| } |
| |
| /** |
| * Removes and returns the top object on the object stack. |
| */ |
| public Object pop() |
| { |
| return pop(_objectStack, "object stack"); |
| } |
| |
| private Object pop(List list, String name) |
| { |
| Object result = list.remove(list.size() - 1); |
| |
| if (LOG.isDebugEnabled()) |
| LOG.debug("Popped " + result + " off " + name + " (at " + getLocation() + ")"); |
| |
| return result; |
| } |
| |
| private Object peek(List list, int depth) |
| { |
| return list.get(list.size() - 1 - depth); |
| } |
| |
| private void push(List list, Object object, String name) |
| { |
| if (LOG.isDebugEnabled()) |
| LOG.debug("Pushing " + object + " onto " + name + " (at " + getLocation() + ")"); |
| |
| list.add(object); |
| } |
| |
| /** |
| * Pushes a new rule onto the rule stack. |
| */ |
| |
| protected void pushRule(IRule rule) |
| { |
| push(_ruleStack, rule, "rule stack"); |
| } |
| |
| /** |
| * Returns the top rule on the stack. |
| */ |
| |
| protected IRule peekRule() |
| { |
| return (IRule) peek(_ruleStack, 0); |
| } |
| |
| protected IRule popRule() |
| { |
| return (IRule) pop(_ruleStack, "rule stack"); |
| } |
| |
| public void addRule(String localElementName, IRule rule) |
| { |
| _ruleMap.put(localElementName, rule); |
| } |
| |
| /** |
| * Registers a public id and corresponding input source. Generally, the source is a wrapper |
| * around an input stream to a package resource. |
| * |
| * @param publicId |
| * the public identifier to be registerred, generally the publicId of a DTD related |
| * to the document being parsed |
| * @param entityPath |
| * the resource path of the entity, typically a DTD file. Relative files names are |
| * expected to be stored in the same package as the class file, otherwise a leading |
| * slash is an absolute pathname within the classpath. |
| */ |
| |
| public void registerEntity(String publicId, String entityPath) |
| { |
| if (LOG.isDebugEnabled()) |
| LOG.debug("Registering " + publicId + " as " + entityPath); |
| |
| if (_entities == null) |
| _entities = new HashMap(); |
| |
| _entities.put(publicId, entityPath); |
| } |
| |
| protected IRule selectRule(String localName, Attributes attributes) |
| { |
| IRule rule = (IRule) _ruleMap.get(localName); |
| |
| if (rule == null) |
| throw new DocumentParseException(Tapestry.format( |
| "RuleDrivenParser.no-rule-for-element", |
| localName), getLocation()); |
| |
| return rule; |
| } |
| |
| /** |
| * Uses the {@link Locator}to track the position in the document as a {@link Location}. This |
| * is invoked once (before the initial element is parsed) and the Locator is retained and |
| * queried as to the current file location. |
| * |
| * @see #getLocation() |
| */ |
| public void setDocumentLocator(Locator locator) |
| { |
| _locator = locator; |
| } |
| |
| /** |
| * Accumulates the content in a buffer; the concatinated content is provided to the top rule |
| * just before any start or end tag. |
| */ |
| public void characters(char[] ch, int start, int length) throws SAXException |
| { |
| _contentBuffer.append(ch, start, length); |
| } |
| |
| /** |
| * Pops the top rule off the stack and invokes {@link IRule#endElement(RuleDirectedParser)}. |
| */ |
| public void endElement(String uri, String localName, String qName) throws SAXException |
| { |
| fireContentRule(); |
| |
| _uri = uri; |
| _localName = localName; |
| _qName = qName; |
| |
| popRule().endElement(this); |
| } |
| |
| /** |
| * Ignorable content is ignored. |
| */ |
| public void ignorableWhitespace(char[] ch, int start, int length) throws SAXException |
| { |
| } |
| |
| /** |
| * Invokes {@link #selectRule(String, Attributes)}to choose a new rule, which is pushed onto |
| * the rule stack, then invokes {@link IRule#startElement(RuleDirectedParser, Attributes)}. |
| */ |
| public void startElement(String uri, String localName, String qName, Attributes attributes) |
| throws SAXException |
| { |
| fireContentRule(); |
| |
| _uri = uri; |
| _localName = localName; |
| _qName = qName; |
| |
| String name = extractName(uri, localName, qName); |
| |
| IRule newRule = selectRule(name, attributes); |
| |
| pushRule(newRule); |
| |
| newRule.startElement(this, attributes); |
| } |
| |
| private String extractName(String uri, String localName, String qName) |
| { |
| return HiveMind.isBlank(localName) ? qName : localName; |
| } |
| |
| /** |
| * Uses {@link javax.xml.parsers.SAXParserFactory}to create a instance of a validation SAX2 |
| * parser. |
| */ |
| protected synchronized SAXParser constructParser() |
| { |
| if (_parserFactory == null) |
| { |
| _parserFactory = SAXParserFactory.newInstance(); |
| configureParserFactory(_parserFactory); |
| } |
| |
| try |
| { |
| return _parserFactory.newSAXParser(); |
| } |
| catch (SAXException ex) |
| { |
| throw new ApplicationRuntimeException(ex); |
| } |
| catch (ParserConfigurationException ex) |
| { |
| throw new ApplicationRuntimeException(ex); |
| } |
| |
| } |
| |
| /** |
| * Configures a {@link SAXParserFactory}before {@link SAXParserFactory#newSAXParser()}is |
| * invoked. The default implementation sets validating to true and namespaceAware to false, |
| */ |
| |
| protected void configureParserFactory(SAXParserFactory factory) |
| { |
| factory.setValidating(true); |
| factory.setNamespaceAware(false); |
| } |
| |
| /** |
| * Throws the exception. |
| */ |
| public void error(SAXParseException ex) throws SAXException |
| { |
| fatalError(ex); |
| } |
| |
| /** |
| * Throws the exception. |
| */ |
| public void fatalError(SAXParseException ex) throws SAXException |
| { |
| // Sometimes, a bad parse "corrupts" a parser so that it doesn't |
| // work properly for future parses (of valid documents), |
| // so discard it here. |
| |
| _parser = null; |
| |
| throw ex; |
| } |
| |
| /** |
| * Throws the exception. |
| */ |
| public void warning(SAXParseException ex) throws SAXException |
| { |
| fatalError(ex); |
| } |
| |
| public InputSource resolveEntity(String publicId, String systemId) throws SAXException |
| { |
| String entityPath = null; |
| |
| if (LOG.isDebugEnabled()) |
| LOG.debug("Attempting to resolve entity; publicId = " + publicId + " systemId = " |
| + systemId); |
| |
| if (_entities != null) |
| entityPath = (String) _entities.get(publicId); |
| |
| if (entityPath == null) |
| { |
| if (LOG.isDebugEnabled()) |
| LOG.debug("Entity not found, using " + systemId); |
| |
| return null; |
| } |
| |
| InputStream stream = getClass().getResourceAsStream(entityPath); |
| |
| InputSource result = new InputSource(stream); |
| |
| if (result != null && LOG.isDebugEnabled()) |
| LOG.debug("Resolved " + publicId + " as " + result + " (for " + entityPath + ")"); |
| |
| return result; |
| } |
| |
| /** |
| * Validates that the input value matches against the specified Perl5 pattern. If valid, the |
| * method simply returns. If not a match, then an error message is generated (using the errorKey |
| * and the input value) and a {@link InvalidStringException}is thrown. |
| */ |
| |
| public void validate(String value, String pattern, String errorKey) |
| { |
| if (_matcher == null) |
| _matcher = new RegexpMatcher(); |
| |
| if (_matcher.matches(pattern, value)) |
| return; |
| |
| throw new InvalidStringException(Tapestry.format(errorKey, value), value, getLocation()); |
| } |
| |
| public Resource getDocumentLocation() |
| { |
| return _documentLocation; |
| } |
| |
| /** |
| * Returns the localName for the current element. |
| * |
| * @see org.xml.sax.ContentHandler#startElement(java.lang.String, java.lang.String, |
| * java.lang.String, org.xml.sax.Attributes) |
| */ |
| public String getLocalName() |
| { |
| return _localName; |
| } |
| |
| /** |
| * Returns the qualified name for the current element. |
| * |
| * @see org.xml.sax.ContentHandler#startElement(java.lang.String, java.lang.String, |
| * java.lang.String, org.xml.sax.Attributes) |
| */ |
| public String getQName() |
| { |
| return _qName; |
| } |
| |
| /** |
| * Returns the URI for the current element. |
| * |
| * @see org.xml.sax.ContentHandler#startElement(java.lang.String, java.lang.String, |
| * java.lang.String, org.xml.sax.Attributes) |
| */ |
| public String getUri() |
| { |
| return _uri; |
| } |
| |
| private void fireContentRule() |
| { |
| String content = _contentBuffer.toString(); |
| _contentBuffer.setLength(0); |
| |
| if (!_ruleStack.isEmpty()) |
| peekRule().content(this, content); |
| } |
| |
| } |