| /* |
| * ==================================================================== |
| * The Apache Software License, Version 1.1 |
| * |
| * Copyright (c) 2002 The Apache Software Foundation. All rights |
| * reserved. |
| * |
| * Redistribution and use in source and binary forms, with or without |
| * modification, are permitted provided that the following conditions |
| * are met: |
| * |
| * 1. Redistributions of source code must retain the above copyright |
| * notice, this list of conditions and the following disclaimer. |
| * |
| * 2. Redistributions in binary form must reproduce the above copyright |
| * notice, this list of conditions and the following disclaimer in |
| * the documentation and/or other materials provided with the |
| * distribution. |
| * |
| * 3. The end-user documentation included with the redistribution, |
| * if any, must include the following acknowledgment: |
| * "This product includes software developed by the |
| * Apache Software Foundation (http://www.apache.org/)." |
| * Alternately, this acknowledgment may appear in the software itself, |
| * if and wherever such third-party acknowledgments normally appear. |
| * |
| * 4. The names "Apache" and "Apache Software Foundation" and |
| * "Apache Tapestry" must not be used to endorse or promote products |
| * derived from this software without prior written permission. For |
| * written permission, please contact apache@apache.org. |
| * |
| * 5. Products derived from this software may not be called "Apache", |
| * "Apache Tapestry", nor may "Apache" appear in their name, without |
| * prior written permission of the Apache Software Foundation. |
| * |
| * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED |
| * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES |
| * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE |
| * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR |
| * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
| * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
| * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF |
| * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND |
| * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, |
| * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT |
| * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
| * SUCH DAMAGE. |
| * ==================================================================== |
| * |
| * This software consists of voluntary contributions made by many |
| * individuals on behalf of the Apache Software Foundation. For more |
| * information on the Apache Software Foundation, please see |
| * <http://www.apache.org/>. |
| */ |
| package net.sf.tapestry.util.xml; |
| |
| import java.io.IOException; |
| import java.io.InputStream; |
| import java.util.HashMap; |
| import java.util.Map; |
| |
| import javax.xml.parsers.DocumentBuilder; |
| import javax.xml.parsers.DocumentBuilderFactory; |
| import javax.xml.parsers.ParserConfigurationException; |
| |
| import net.sf.tapestry.ApplicationRuntimeException; |
| import net.sf.tapestry.Tapestry; |
| |
| import org.apache.commons.logging.Log; |
| import org.apache.commons.logging.LogFactory; |
| import org.apache.oro.text.regex.MalformedPatternException; |
| import org.apache.oro.text.regex.Pattern; |
| import org.apache.oro.text.regex.PatternCompiler; |
| import org.apache.oro.text.regex.PatternMatcher; |
| import org.apache.oro.text.regex.Perl5Compiler; |
| import org.apache.oro.text.regex.Perl5Matcher; |
| import org.w3c.dom.Document; |
| import org.w3c.dom.Element; |
| import org.w3c.dom.NamedNodeMap; |
| import org.w3c.dom.Node; |
| import org.w3c.dom.Text; |
| import org.xml.sax.EntityResolver; |
| import org.xml.sax.ErrorHandler; |
| import org.xml.sax.InputSource; |
| import org.xml.sax.SAXException; |
| import org.xml.sax.SAXParseException; |
| |
| /** |
| * A wrapper around {@link DocumentBuilder} (itself a wrapper around |
| * some XML parser), this class provides error handling and entity |
| * resolving. |
| * |
| * @version $Id$ |
| * @author Howard Lewis Ship |
| * @since 0.2.10 |
| * |
| **/ |
| |
| public abstract class AbstractDocumentParser implements ErrorHandler, EntityResolver |
| { |
| private static final Log LOG = LogFactory.getLog(AbstractDocumentParser.class); |
| |
| private DocumentBuilder _builder; |
| private String _resourcePath; |
| |
| /** |
| * Map used to resolve public identifiers to corresponding InputSource. |
| * |
| **/ |
| |
| private Map _entities; |
| |
| |
| /** |
| * |
| * Compiler used to convert pattern strings into {@link Pattern} |
| * instances. |
| * |
| * @since 2.2 |
| * |
| **/ |
| |
| protected PatternCompiler _patternCompiler; |
| |
| |
| |
| /** |
| * |
| * Matcher used to match patterns against input strings. |
| * |
| * @since 2.2 |
| * |
| **/ |
| |
| protected PatternMatcher _matcher; |
| |
| |
| |
| /** |
| * |
| * Map of compiled {@link Pattern}s, keyed on pattern |
| * string. Patterns are lazily compiled as needed. |
| * |
| * @since 2.2 |
| * |
| **/ |
| |
| protected Map _compiledPatterns; |
| |
| /** |
| * Simple property names match Java variable names; a leading letter |
| * (or underscore), followed by letters, numbers and underscores. |
| * |
| * @since 2.2 |
| * |
| **/ |
| |
| public static final String SIMPLE_PROPERTY_NAME_PATTERN = "^_?[a-zA-Z]\\w*$"; |
| |
| |
| /** |
| * Invoked by subclasses (usually inside thier constructor) to register |
| * a public id and corresponding input source. Generally, the source |
| * is a wrapper around an input stream to a package resource. |
| * |
| * @param publicId the public identifier to be registerred, generally |
| * the publicId of a DTD related to the document being parsed |
| * @param entityPath the resource path of the entity, typically a DTD |
| * file. Relative files names are expected to be stored in the same package |
| * as the class file, otherwise a leading slash is an absolute pathname |
| * within the classpath. |
| * |
| **/ |
| |
| protected void register(String publicId, String entityPath) |
| { |
| if (LOG.isDebugEnabled()) |
| LOG.debug("Registering " + publicId + " as " + entityPath); |
| |
| if (_entities == null) |
| _entities = new HashMap(); |
| |
| _entities.put(publicId, entityPath); |
| } |
| |
| public String getResourcePath() |
| { |
| return _resourcePath; |
| } |
| |
| public void setResourcePath(String value) |
| { |
| _resourcePath = null; |
| } |
| |
| /** |
| * Invoked by subclasses to parse a document. Obtains (or re-uses) a |
| * {@link DocumentBuilder} and parses the document from the {@link InputSource}. |
| * |
| * @param source source from which to read the document |
| * @param resourcePath a description of the source, used in errors |
| * @param rootElementName the expected root element of the {@link Document}, or |
| * null if the rootElementName isn't known before parsing |
| * |
| * @throws DocumentParseException wrapped around {@link SAXParseException} or |
| * {@link IOException}, or if the root element is wrong. |
| *` |
| **/ |
| |
| protected Document parse(InputSource source, String resourcePath, String rootElementName) |
| throws DocumentParseException |
| { |
| boolean error = true; |
| |
| if (LOG.isDebugEnabled()) |
| LOG.debug( |
| "Parsing " |
| + source |
| + " (" |
| + resourcePath |
| + ") for element " |
| + (rootElementName != null ? rootElementName : "Unknown")); |
| |
| try |
| { |
| if (_builder == null) |
| _builder = constructBuilder(); |
| |
| Document document = _builder.parse(source); |
| |
| error = false; |
| |
| if (rootElementName != null) |
| validateRootElement(document, rootElementName, resourcePath); |
| |
| return document; |
| } |
| catch (SAXParseException ex) |
| { |
| // This constructor captures the line number and column number |
| |
| throw new DocumentParseException( |
| Tapestry.getString("AbstractDocumentParser.unable-to-parse", resourcePath, ex.getMessage()), |
| resourcePath, |
| ex); |
| } |
| catch (SAXException ex) |
| { |
| throw new DocumentParseException( |
| Tapestry.getString("AbstractDocumentParser.unable-to-parse", resourcePath, ex.getMessage()), |
| resourcePath, |
| ex); |
| } |
| catch (IOException ex) |
| { |
| throw new DocumentParseException( |
| Tapestry.getString("AbstractDocumentParser.unable-to-read", resourcePath, ex.getMessage()), |
| resourcePath, |
| ex); |
| } |
| catch (ParserConfigurationException ex) |
| { |
| throw new DocumentParseException( |
| Tapestry.getString("AbstractDocumentParser.unable-to-construct-builder", ex.getMessage()), |
| ex); |
| } |
| finally |
| { |
| // If there was an error, discard the builder --- it may be in |
| // an unknown and unusable state. |
| |
| if (error && _builder != null) |
| { |
| LOG.debug("Discarding builder due to parse error."); |
| _builder = null; |
| } |
| } |
| } |
| |
| /** |
| * Validates that the root element of the specified document matches the expected |
| * root element name. |
| * |
| * @throws DocumentParseException if the root element is not as expected. |
| * |
| * @since 2.2 |
| * |
| **/ |
| |
| protected void validateRootElement(Document document, String rootElementName, String resourcePath) |
| throws DocumentParseException |
| { |
| |
| Element root = document.getDocumentElement(); |
| if (!root.getTagName().equals(rootElementName)) |
| { |
| throw new DocumentParseException( |
| Tapestry.getString( |
| "AbstractDocumentParser.incorrect-document-type", |
| rootElementName, |
| root.getTagName()), |
| resourcePath, |
| null); |
| } |
| } |
| |
| /** |
| * Throws the exception, which is caught and wrapped |
| * in a {@link DocumentParseException} by {@link #parse(InputSource,String,String)}. |
| * |
| **/ |
| |
| public void warning(SAXParseException exception) throws SAXException |
| { |
| throw exception; |
| } |
| |
| /** |
| * Throws the exception, which is caught and wrapped |
| * in a {@link DocumentParseException} by {@link #parse(InputSource,String,String)}. |
| * |
| **/ |
| |
| public void error(SAXParseException exception) throws SAXException |
| { |
| throw exception; |
| } |
| |
| /** |
| * Throws the exception, which is caught and wrapped |
| * in a {@link DocumentParseException} by {@link #parse(InputSource,String,String)}. |
| * |
| **/ |
| |
| public void fatalError(SAXParseException exception) throws SAXException |
| { |
| throw exception; |
| } |
| |
| /** |
| * Checks for a previously registered public ID and returns the corresponding |
| * input source. |
| * |
| **/ |
| |
| public InputSource resolveEntity(String publicId, String systemId) throws SAXException, IOException |
| { |
| String entityPath = null; |
| |
| if (LOG.isDebugEnabled()) |
| LOG.debug("Attempting to resolve entity; publicId = " + publicId + " systemId = " + systemId); |
| |
| if (_entities != null) |
| entityPath = (String) _entities.get(publicId); |
| |
| if (entityPath == null) |
| { |
| if (LOG.isDebugEnabled()) |
| LOG.debug("Entity not found, using " + systemId); |
| |
| return null; |
| } |
| |
| InputStream stream = getClass().getResourceAsStream(entityPath); |
| |
| InputSource result = new InputSource(stream); |
| |
| if (result != null && LOG.isDebugEnabled()) |
| LOG.debug("Resolved " + publicId + " as " + result + " (for " + entityPath + ")"); |
| |
| return result; |
| } |
| |
| /** |
| * Returns true if the node is an element with the specified |
| * name. |
| * |
| **/ |
| |
| protected boolean isElement(Node node, String elementName) throws DocumentParseException |
| { |
| if (node.getNodeType() != Node.ELEMENT_NODE) |
| return false; |
| |
| // Cast it to Element |
| |
| Element element = (Element) node; |
| |
| // Note: Using Xerces 1.0.3 and deferred DOM loading |
| // (which is explicitly turned off), this sometimes |
| // throws a NullPointerException. |
| |
| return element.getTagName().equals(elementName); |
| |
| } |
| |
| /** |
| * Returns the value of an {@link Element} node. That is, all the {@link TextArea} |
| * nodes appended together. Invokes trim() to remove leading and trailing spaces. |
| * |
| **/ |
| |
| protected String getValue(Node node) |
| { |
| String result; |
| Node child; |
| Text text; |
| StringBuffer buffer; |
| |
| buffer = new StringBuffer(); |
| |
| for (child = node.getFirstChild(); child != null; child = child.getNextSibling()) |
| { |
| text = (Text) child; |
| |
| buffer.append(text.getData()); |
| } |
| |
| result = buffer.toString().trim(); |
| |
| return result; |
| } |
| |
| /** |
| * Returns the value of an {@link Element} node (via {@link #getValue(Node)}), |
| * but then validates that the result is a good identifier (starts with a |
| * letter, contains letters, numbers, dashes, underscore). |
| * |
| **/ |
| |
| protected String getId(Node node) throws DocumentParseException |
| { |
| String result = getValue(node); |
| char[] array = result.toCharArray(); |
| char ch; |
| boolean fail = false; |
| |
| for (int i = 0; i < array.length; i++) |
| { |
| ch = array[i]; |
| |
| if (i == 0) |
| fail = !Character.isLetter(ch); |
| else |
| { |
| fail = !(Character.isLetter(ch) || Character.isDigit(ch) || ch == '-' || ch == '_'); |
| } |
| |
| if (fail) |
| throw new DocumentParseException( |
| Tapestry.getString( |
| "AbstractDocumentParser.invalid-identifier", |
| result, |
| getNodePath(node.getParentNode())), |
| _resourcePath, |
| null); |
| } |
| |
| return result; |
| } |
| |
| /** |
| * Returns a 'path' to the given node, which is a list of enclosing |
| * element names seperated by periods. The root element name is first, |
| * and the node's element is last. This is used when reporting some |
| * parse errors. |
| * |
| **/ |
| |
| protected String getNodePath(Node node) |
| { |
| int count = 0; |
| int length = 0; |
| |
| String[] path = new String[10]; |
| |
| while (node != null) |
| { |
| // Dynamically expand the list before it overflows. |
| |
| if (count == path.length) |
| { |
| String newPath[] = new String[count * 2]; |
| System.arraycopy(path, 0, newPath, 0, count); |
| |
| path = newPath; |
| } |
| |
| String nodeName = node.getNodeName(); |
| |
| path[count++] = nodeName; |
| node = node.getParentNode(); |
| |
| length += nodeName.length() + 1; |
| |
| } |
| |
| StringBuffer buffer = new StringBuffer(length); |
| boolean addDot = false; |
| |
| for (int i = count - 1; i >= 0; i--) |
| { |
| |
| if (addDot) |
| buffer.append('.'); |
| |
| buffer.append(path[i]); |
| |
| addDot = true; |
| } |
| |
| return buffer.toString(); |
| } |
| |
| /** |
| * Constructs a new {@link DocumentBuilder} to be used for parsing. |
| * The builder is used and reused, at least until there is an error |
| * parsing a document (at which point, it is discarded). |
| * |
| * <p>This implementation obtains a builder with the following |
| * characteristics: |
| * <ul> |
| * <li>validating (if {@link #getRequireValidatingParser()} returns true} |
| * <li>ignoringElementContentWhitespace |
| * <li>ignoringComments |
| * <li>coalescing |
| * </ul> |
| * |
| * <p>These characteristics are appropriate to parsing things such |
| * as Tapestry specifications; subclasses with unusual demands |
| * may need to override this method. |
| * |
| * <p>The builder uses this {@link AbstractDocumentParser} |
| * as the entity resolver and error handler. |
| * |
| **/ |
| |
| protected DocumentBuilder constructBuilder() throws ParserConfigurationException |
| { |
| DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); |
| |
| factory.setValidating(getRequireValidatingParser()); |
| factory.setIgnoringElementContentWhitespace(true); |
| factory.setIgnoringComments(true); |
| factory.setCoalescing(true); |
| |
| |
| DocumentBuilder result = factory.newDocumentBuilder(); |
| |
| result.setErrorHandler(this); |
| result.setEntityResolver(this); |
| |
| if (LOG.isDebugEnabled()) |
| LOG.debug("Constructed new builder " + result); |
| |
| return result; |
| } |
| |
| /** |
| * Used by {@link #constructBuilder()} to determine if the a validating |
| * {@link DocumentBuilder} is required. This implementation returns true, |
| * subclasses that don't require a validating builder (such as documents |
| * without a known DTD), may override to return false. |
| * |
| * @since 1.0.1 |
| **/ |
| |
| protected boolean getRequireValidatingParser() |
| { |
| return true; |
| } |
| |
| /** |
| * Returns the value of the named attribute of the node. Returns null |
| * if the node doesn't contain an attribute with the given name. |
| * |
| * @since 1.0.1 |
| * |
| **/ |
| |
| protected String getAttribute(Node node, String attributeName) |
| { |
| NamedNodeMap map = node.getAttributes(); |
| |
| if (map == null) |
| return null; |
| |
| Node attributeNode = map.getNamedItem(attributeName); |
| |
| if (attributeNode == null) |
| return null; |
| |
| return attributeNode.getNodeValue(); |
| } |
| |
| /** |
| * Validates that the input value matches against the specified |
| * Perl5 pattern. If valid, the method simply returns. |
| * If not a match, then an error message is generated (using the |
| * errorKey and the input value) and a |
| * {@link DocumentParseException} is thrown. |
| * |
| * @since 2.2 |
| * |
| **/ |
| |
| protected void validate(String value, String pattern, String errorKey) throws DocumentParseException |
| { |
| if (_compiledPatterns == null) |
| _compiledPatterns = new HashMap(); |
| |
| Pattern compiled = (Pattern) _compiledPatterns.get(pattern); |
| |
| if (compiled == null) |
| { |
| compiled = compilePattern(pattern); |
| |
| _compiledPatterns.put(pattern, compiled); |
| } |
| |
| if (_matcher == null) |
| _matcher = new Perl5Matcher(); |
| |
| if (_matcher.matches(value, compiled)) |
| return; |
| |
| throw new InvalidStringException(Tapestry.getString(errorKey, value), value, getResourcePath()); |
| } |
| |
| |
| |
| /** |
| * |
| * Returns a pattern compiled for single line matching |
| * |
| * @since 2.2 |
| * |
| **/ |
| |
| protected Pattern compilePattern(String pattern) |
| { |
| if (_patternCompiler == null) |
| _patternCompiler = new Perl5Compiler(); |
| |
| try |
| { |
| return _patternCompiler.compile(pattern, Perl5Compiler.SINGLELINE_MASK); |
| } |
| catch (MalformedPatternException ex) |
| { |
| throw new ApplicationRuntimeException(ex); |
| } |
| } |
| |
| |
| |
| } |