blob: 1a68b91966f1c065a9fd2c8d49239cece8fe8937 [file] [log] [blame]
// Copyright 2006, 2007, 2008 The Apache Software Foundation
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// See the License for the specific language governing permissions and
// limitations under the License.
import org.apache.tapestry.SymbolConstants;
import org.apache.tapestry.internal.parser.*;
import static org.apache.tapestry.ioc.IOCConstants.PERTHREAD_SCOPE;
import org.apache.tapestry.ioc.Location;
import org.apache.tapestry.ioc.Resource;
import org.apache.tapestry.ioc.annotation.Scope;
import org.apache.tapestry.ioc.annotation.Symbol;
import org.apache.tapestry.ioc.internal.util.CollectionFactory;
import static org.apache.tapestry.ioc.internal.util.CollectionFactory.newList;
import org.apache.tapestry.ioc.internal.util.InternalUtils;
import org.apache.tapestry.ioc.internal.util.LocationImpl;
import org.apache.tapestry.ioc.internal.util.TapestryException;
import org.apache.tapestry.ioc.util.Stack;
import org.slf4j.Logger;
import org.xml.sax.*;
import org.xml.sax.ext.LexicalHandler;
import org.xml.sax.helpers.XMLReaderFactory;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
* Non-threadsafe implementation; the IOC service uses the perthread lifecycle.
public class TemplateParserImpl implements TemplateParser, LexicalHandler, ContentHandler, EntityResolver
private static final String MIXINS_ATTRIBUTE_NAME = "mixins";
private static final String TYPE_ATTRIBUTE_NAME = "type";
private static final String ID_ATTRIBUTE_NAME = "id";
public static final String XML_NAMESPACE_URI = "";
* Used as the namespace URI for Tapestry templates.
public static final String TAPESTRY_SCHEMA_5_0_0 = "";
private static final Pattern ID_PATTERN = Pattern.compile("^[a-z]\\w*$", Pattern.CASE_INSENSITIVE);
* Any amount of mixed simple whitespace (space, tab, form feed) mixed with at least one carriage return or line
* feed, followed by any amount of whitespace. Will be reduced to a single linefeed.
private static final Pattern REDUCE_LINEBREAKS_PATTERN = Pattern.compile("[ \\t\\f]*[\\r\\n]\\s*",
* Used when compressing whitespace, matches any sequence of simple whitespace (space, tab, formfeed). Applied after
private static final Pattern REDUCE_WHITESPACE_PATTERN = Pattern.compile("[ \\t\\f]+", Pattern.MULTILINE);
// Note the use of the non-greedy modifier; this prevents the pattern from merging multiple
// expansions on the same text line into a single large
// but invalid expansion.
private static final String EXPANSION_REGEXP = "\\$\\{\\s*(.*?)\\s*}";
private static final Pattern EXPANSION_PATTERN = Pattern.compile(EXPANSION_REGEXP);
private XMLReader reader;
// Resource being parsed
private Resource templateResource;
private Locator locator;
private final List<TemplateToken> tokens = CollectionFactory.newList();
private final boolean compressWhitespaceDefault;
* Because {@link org.xml.sax.ContentHandler#startPrefixMapping(String, String)} events arrive before the
* corresponding {@link org.xml.sax.ContentHandler#startElement(String, String, String, org.xml.sax.Attributes)}
* events, we need to accumlate the {@link org.apache.tapestry.internal.parser.DefineNamespacePrefixToken}s ahead of
* time to get the correct ordering in the output tokens list.
private final List<DefineNamespacePrefixToken> defineNamespaceTokens = newList();
// Non-blank ids from start component elements
private final Set<String> componentIds = CollectionFactory.newSet();
// Used to accumulate text provided by the characters() method. Even contiguous characters may
// be broken up across multiple invocations due to parser internals. We accumulate those
// together before forming a text token.
private final StringBuilder textBuffer = new StringBuilder();
private Location textStartLocation;
private boolean textIsCData;
private boolean insideBody;
private boolean insideBodyErrorLogged;
private boolean ignoreEvents;
private final Logger logger;
private final Map<String, URL> configuration;
private final Stack<Runnable> endTagHandlerStack = new Stack<Runnable>();
private boolean compressWhitespace;
private final Stack<Boolean> compressWhitespaceStack = new Stack<Boolean>();
private final Runnable endOfElementHandler = new Runnable()
public void run()
tokens.add(new EndElementToken(getCurrentLocation()));
// Restore the flag to how it was before the element was parsed.
compressWhitespace = compressWhitespaceStack.pop();
private final Runnable ignoreEndElement = new Runnable()
public void run()
compressWhitespace = compressWhitespaceStack.pop();
public TemplateParserImpl(Logger logger, Map<String, URL> configuration,
boolean compressWhitespaceDefault)
this.logger = logger;
this.configuration = configuration;
this.compressWhitespaceDefault = compressWhitespaceDefault;
private void reset()
templateResource = null;
locator = null;
textStartLocation = null;
textIsCData = false;
insideBody = false;
insideBodyErrorLogged = false;
ignoreEvents = true;
public ComponentTemplate parseTemplate(Resource templateResource)
compressWhitespace = compressWhitespaceDefault;
if (reader == null)
reader = XMLReaderFactory.createXMLReader();
reader.setFeature("", true);
reader.setProperty("", this);
catch (Exception ex)
throw new RuntimeException(ServicesMessages.newParserError(templateResource, ex), ex);
if (!templateResource.exists())
throw new RuntimeException(ServicesMessages.missingTemplateResource(templateResource));
this.templateResource = templateResource;
InputSource source = new InputSource(templateResource.openStream());
return new ComponentTemplateImpl(this.templateResource, tokens, componentIds);
catch (Exception ex)
// Some parsers get in an unknown state when an error occurs, and are are not
// subsequently useable.
reader = null;
throw new TapestryException(ServicesMessages.templateParseError(templateResource, ex), getCurrentLocation(),
public void setDocumentLocator(Locator locator)
this.locator = locator;
* Accumulates the characters into a text buffer.
public void characters(char[] ch, int start, int length) throws SAXException
if (ignoreEvents) return;
if (insideBody()) return;
if (textBuffer.length() == 0) textStartLocation = getCurrentLocation();
textBuffer.append(ch, start, length);
* Adds tokens corresponding to the content in the text buffer. For a non-CDATA section, we also search for
* expansions (thus we may add more than one token). Clears the text buffer.
private void processTextBuffer()
if (textBuffer.length() == 0) return;
String text = textBuffer.toString();
if (textIsCData)
tokens.add(new CDATAToken(text, textStartLocation));
if (compressWhitespace)
text = compressWhitespaceInText(text);
if (InternalUtils.isBlank(text)) return;
private String compressWhitespaceInText(String text)
String linebreaksReduced = REDUCE_LINEBREAKS_PATTERN.matcher(text).replaceAll("\n");
return REDUCE_WHITESPACE_PATTERN.matcher(linebreaksReduced).replaceAll(" ");
* Scans the text, using a regular expression pattern, for expansion patterns, and adds appropriate tokens for what
* it finds.
* @param text to add as {@link org.apache.tapestry.internal.parser.TextToken}s and {@link
* org.apache.tapestry.internal.parser.ExpansionToken}s
private void addTokensForText(String text)
Matcher matcher = EXPANSION_PATTERN.matcher(text);
int startx = 0;
// The big problem with all this code is that everything gets assigned to the
// start of the text block, even if there are line breaks leading up to it.
// That's going to take a lot more work and there are bigger fish to fry. In addition,
// TAPESTRY-2028 means that the whitespace has likely been stripped out of the text
// already anyway.
while (matcher.find())
int matchStart = matcher.start();
if (matchStart != startx)
String prefix = text.substring(startx, matchStart);
tokens.add(new TextToken(prefix, textStartLocation));
// Group 1 includes the real text of the expansion, with whitespace around the
// expression (but inside the curly braces) excluded.
String expression =;
tokens.add(new ExpansionToken(expression, textStartLocation));
startx = matcher.end();
// Catch anything after the final regexp match.
if (startx < text.length())
tokens.add(new TextToken(text.substring(startx, text.length()), textStartLocation));
public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException
ignoreEvents = false;
if (insideBody) throw new IllegalStateException(ServicesMessages
// Add any accumulated text into a text token
if (TAPESTRY_SCHEMA_5_0_0.equals(uri))
startTapestryElement(localName, attributes);
startPossibleComponent(attributes, uri, localName, null);
* Checks to see if currently inside a t:body element (which should always be empty). Content is ignored inside a
* body. If inside a body, then a warning is logged (but only one warning per body element).
* @return true if inside t:body, false otherwise
private boolean insideBody()
if (insideBody)
// Limit to one logged error per infraction.
if (!insideBodyErrorLogged)
insideBodyErrorLogged = true;
return insideBody;
private void startTapestryElement(String localName, Attributes attributes)
if (localName.equalsIgnoreCase("body"))
if (localName.equalsIgnoreCase("parameter"))
if (localName.equalsIgnoreCase("block"))
if (localName.equalsIgnoreCase("container"))
startContainer(localName, attributes);
// The component type is derived from the element name. Since element names may not contain
// slashes, we convert periods to slashes. Later down the pipeline, they'll probably be
// converted back into periods, as part of a fully qualified class name.
String componentType = localName.replace('.', '/');
// With a component type specified, it's not just possibly a component ...
startPossibleComponent(attributes, null, null, componentType);
private void startContainer(String elementName, Attributes attributes)
// Neither the container nor its end tag are considered tokens, just the contents inside.
for (int i = 0; i < attributes.getLength(); i++)
String name = attributes.getLocalName(i);
// The name will be blank for an xmlns: attribute
if (InternalUtils.isBlank(name)) continue;
String uri = attributes.getURI(i);
String value = attributes.getValue(i);
if (isXMLSpaceAttribute(uri, name, value)) continue;
throw new TapestryException(ServicesMessages.attributeNotAllowed(elementName), getCurrentLocation(), null);
private void startBlock(Attributes attributes)
String blockId = findSingleParameter("block", "id", attributes);
validateId(blockId, "invalid-block-id");
// null is ok for blockId
tokens.add(new BlockToken(blockId, getCurrentLocation()));
// TODO: Check for an xml:space attribute
private void startParameter(Attributes attributes)
String parameterName = findSingleParameter("parameter", "name", attributes);
if (InternalUtils.isBlank(parameterName))
throw new TapestryException(ServicesMessages.parameterElementNameRequired(), getCurrentLocation(), null);
tokens.add(new ParameterToken(parameterName, getCurrentLocation()));
* Should be called *before* the _compressWhitespace is changed.
private void addEndOfElementHandler()
// Record how the flag was set at the start of the element
private String findSingleParameter(String elementName, String attributeName, Attributes attributes)
String result = null;
for (int i = 0; i < attributes.getLength(); i++)
String uri = attributes.getURI(i);
String name = attributes.getLocalName(i);
String value = attributes.getValue(i);
if (isXMLSpaceAttribute(uri, name, value)) continue;
if (name.equals(attributeName))
result = value;
// Only the named attribute is allowed.
throw new TapestryException(ServicesMessages.undefinedTapestryAttribute(elementName, name, attributeName),
getCurrentLocation(), null);
return result;
private boolean isXMLSpaceAttribute(String uri, String name, String value)
if (uri.equals(XML_NAMESPACE_URI) && name.equals("space"))
// "preserve" turns off whitespace compression
// "default" (the other option, but we'll accept anything) turns it on (or leaves it on, more likely).
compressWhitespace = !"preserve".equalsIgnoreCase(value);
return true;
return false;
private String nullForBlank(String input)
return InternalUtils.isBlank(input) ? null : input;
* @param attributes the attributes for the element
* @param namespaceURI the namespace URI for the element (or the empty string)
* @param elementName the name of the element (to be assigned to the new token), may be null for a component in
* the Tapestry namespace
* @param identifiedType the type of the element, usually null, but may be the component type derived from
private void startPossibleComponent(Attributes attributes, String namespaceURI, String elementName,
String identifiedType)
// Add an end handler to match this start tag.
String id = null;
String type = identifiedType;
String mixins = null;
int count = attributes.getLength();
Location location = getCurrentLocation();
List<TemplateToken> attributeTokens = newList();
for (int i = 0; i < count; i++)
String name = attributes.getLocalName(i);
// The name will be blank for an xmlns: attribute
if (InternalUtils.isBlank(name)) continue;
String uri = attributes.getURI(i);
String value = attributes.getValue(i);
if (TAPESTRY_SCHEMA_5_0_0.equals(uri))
if (name.equalsIgnoreCase(ID_ATTRIBUTE_NAME))
id = nullForBlank(value);
validateId(id, "invalid-component-id");
if (type == null && name.equalsIgnoreCase(TYPE_ATTRIBUTE_NAME))
type = nullForBlank(value);
if (name.equalsIgnoreCase(MIXINS_ATTRIBUTE_NAME))
mixins = nullForBlank(value);
// Anything else is the name of a Tapestry component parameter that is simply
// not part of the template's doctype for the element being instrumented.
if (isXMLSpaceAttribute(uri, name, value)) continue;
attributeTokens.add(new AttributeToken(uri, name, value, location));
boolean isComponent = (id != null || type != null);
// If provided t:mixins but not t:id or t:type, then its not quite a component
if (mixins != null && !isComponent)
throw new TapestryException(ServicesMessages.mixinsInvalidWithoutIdOrType(elementName), location, null);
if (isComponent)
tokens.add(new StartComponentToken(elementName, id, type, mixins, location));
tokens.add(new StartElementToken(namespaceURI, elementName, location));
if (id != null) componentIds.add(id);
// TODO: Is there value in having different end elements for components vs. ordinary
// elements?
private void validateId(String id, String messageKey)
if (id == null) return;
if (ID_PATTERN.matcher(id).matches()) return;
// Not a match.
throw new TapestryException(ServicesMessages.invalidId(messageKey, id), getCurrentLocation(), null);
private void startBody()
tokens.add(new BodyToken(getCurrentLocation()));
insideBody = true;
insideBodyErrorLogged = false;
endTagHandlerStack.push(new Runnable()
public void run()
insideBody = false;
// And don't add an end element token.
public void endElement(String uri, String localName, String qName) throws SAXException
private Location getCurrentLocation()
if (locator == null) return null;
return new LocationImpl(templateResource, locator.getLineNumber(), locator
* Adds any namespace tokens accumulated from just before the current element. The list of namespace tokens is then
* cleared.
private void addDefineNamespaceTokens()
public void comment(char[] ch, int start, int length) throws SAXException
if (ignoreEvents || insideBody()) return;
// Remove excess whitespace. The Comment DOM node will add a leadig and trailing space.
String comment = new String(ch, start, length).trim();
// TODO: Perhaps comments need to be "aggregated" the same way we aggregate text and CDATA.
// Hm. Probably not. Any whitespace between one comment and the next will become a
// TextToken.
// Unless we trim whitespace between consecutive comments ... and on down the rabbit hole.
// Oops -- unless a single comment may be passed into this method as multiple calls
// (have to check how multiline comments are handled).
// Tests against Sun's built in parser does show that multiline comments are still
// provided as a single call to comment(), so we're good for the meantime (until we find
// out some parsers aren't so compliant).
tokens.add(new CommentToken(comment, getCurrentLocation()));
public void endCDATA() throws SAXException
// Add a token for any accumulated CDATA.
// Again, CDATA doesn't nest, so we know we're back to ordinary markup.
textIsCData = false;
public void startCDATA() throws SAXException
if (ignoreEvents || insideBody()) return;
// Because CDATA doesn't mix with any other SAX/lexical events, we can simply turn on a flag
// here and turn it off when we see the end.
textIsCData = true;
// Empty methods defined by the various interfaces.
public void endDTD() throws SAXException
public void endEntity(String name) throws SAXException
public void startDTD(String name, String publicId, String systemId) throws SAXException
// notes:
// 1) a DTD has to occur at the very start of a document. Since we don't start
// recording characters until we hit the first element of a document (see
// characters and startElement), there should be no text to process.
// It's worth noting that the sax parser will puke if any of the following
// occur:
// 1) a doctype is encountered multiple times in the same document
// 2) a doctype is encountered anywhere other than the very first item
// in a document.
// Hence, the assumption made in 1 should hold.
// Since an exception is thrown for case #1 above, we can just add the DTDToken.
// When we go to process the token (in PageLoaderProcessor), we can make sure
// that the final page has only a single DTDToken (the first one).
tokens.add(new DTDToken(name, publicId, systemId, getCurrentLocation()));
public void startEntity(String name) throws SAXException
public void endDocument() throws SAXException
public void ignorableWhitespace(char[] ch, int start, int length) throws SAXException
public void processingInstruction(String target, String data) throws SAXException
public void skippedEntity(String name) throws SAXException
public void startDocument() throws SAXException
public void startPrefixMapping(String prefix, String uri) throws SAXException
// Not interested in the Tapestry namespace (that is never sent to the client).
if (uri.equals(TAPESTRY_SCHEMA_5_0_0)) return;
// The prefix may be blank, which happens when the xmlns attribute is used to define the
// namespace for the default namespace, and when a document has an explicit DOCTYPE.
DefineNamespacePrefixToken token = new DefineNamespacePrefixToken(uri, prefix, getCurrentLocation());
public void endPrefixMapping(String prefix) throws SAXException
public InputSource resolveEntity(String publicId, String systemId) throws SAXException, IOException
URL url = configuration.get(publicId);
if (url != null) return new InputSource(url.openStream());
return null;