| /******************************************************************************* |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, |
| * software distributed under the License is distributed on an |
| * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| * KIND, either express or implied. See the License for the |
| * specific language governing permissions and limitations |
| * under the License. |
| ******************************************************************************/ |
| package org.apache.sling.scripting.sightly.impl.html.dom; |
| |
| import java.io.CharArrayWriter; |
| import java.util.Iterator; |
| import java.util.LinkedHashMap; |
| import java.util.LinkedHashSet; |
| import java.util.Map; |
| import java.util.Set; |
| |
| /** |
| * Tokenizes a snippet of characters into a structured tag/attribute name list. |
| */ |
| class TagTokenizer { |
| /** Tag name buffer */ |
| private final CharArrayWriter tagName = new CharArrayWriter(30); |
| |
| /** Attribute name buffer */ |
| private final CharArrayWriter attName = new CharArrayWriter(30); |
| |
| /** Attribute value buffer */ |
| private final CharArrayWriter attValue = new CharArrayWriter(30); |
| |
| /** Internal property list */ |
| private final AttributeListImpl attributes = new AttributeListImpl(); |
| |
| /** Parse state constant */ |
| private final static int START = 0; |
| |
| /** Parse state constant */ |
| private final static int TAG = START + 1; |
| |
| /** Parse state constant */ |
| private final static int NAME = TAG + 1; |
| |
| /** Parse state constant */ |
| private final static int INSIDE = NAME + 1; |
| |
| /** Parse state constant */ |
| private final static int ATTNAME = INSIDE + 1; |
| |
| /** Parse state constant */ |
| private final static int EQUAL = ATTNAME + 1; |
| |
| /** Parse state constant */ |
| private final static int ATTVALUE = EQUAL + 1; |
| |
| /** Parse state constant */ |
| private final static int STRING = ATTVALUE + 1; |
| |
| /** Parse state constant */ |
| private final static int ENDSLASH = STRING + 1; |
| |
| /** Parse state constant */ |
| private final static int END = ENDSLASH + 1; |
| |
| /** Parse state constant */ |
| private final static int BETWEEN_ATTNAME = END + 1; |
| |
| /** Quote character */ |
| private char quoteChar = '"'; |
| |
| /** Flag indicating whether the tag scanned is an end tag */ |
| private boolean endTag; |
| |
| /** Flag indicating whether an ending slash was parsed */ |
| private boolean endSlash; |
| |
| /** temporary flag indicating if attribute has a value */ |
| private boolean hasAttributeValue; |
| |
| /** |
| * Scan characters passed to this parser |
| */ |
| public void tokenize(char[] buf, int off, int len) { |
| reset(); |
| |
| int parseState = START; |
| |
| for (int i = 0; i < len; i++) { |
| char c = buf[off + i]; |
| |
| switch (parseState) { |
| case START: |
| if (c == '<') { |
| parseState = TAG; |
| } |
| break; |
| case TAG: |
| if (c == '/') { |
| endTag = true; |
| parseState = NAME; |
| } else if (c == '"' || c == '\'') { |
| quoteChar = c; |
| parseState = STRING; |
| } else if (Character.isWhitespace(c)) { |
| parseState = INSIDE; |
| } else { |
| tagName.write(c); |
| parseState = NAME; |
| } |
| break; |
| case NAME: |
| if (Character.isWhitespace(c)) { |
| parseState = INSIDE; |
| } else if (c == '"' || c == '\'') { |
| quoteChar = c; |
| parseState = STRING; |
| } else if (c == '>') { |
| parseState = END; |
| } else if (c == '/') { |
| parseState = ENDSLASH; |
| } else { |
| tagName.write(c); |
| } |
| break; |
| case INSIDE: |
| if (c == '>') { |
| attributeEnded(); |
| parseState = END; |
| } else if (c == '/') { |
| attributeEnded(); |
| parseState = ENDSLASH; |
| } else if (c == '"' || c == '\'') { |
| attributeValueStarted(); |
| quoteChar = c; |
| parseState = STRING; |
| } else if (c == '=') { |
| parseState = EQUAL; |
| } else if (!Character.isWhitespace(c)) { |
| attName.write(c); |
| parseState = ATTNAME; |
| } |
| break; |
| case ATTNAME: |
| if (c == '>') { |
| attributeEnded(); |
| parseState = END; |
| } else if (c == '/') { |
| attributeEnded(); |
| parseState = ENDSLASH; |
| } else if (c == '=') { |
| parseState = EQUAL; |
| } else if (c == '"' || c == '\'') { |
| quoteChar = c; |
| parseState = STRING; |
| } else if (Character.isWhitespace(c)) { |
| parseState = BETWEEN_ATTNAME; |
| } else { |
| attName.write(c); |
| } |
| break; |
| case BETWEEN_ATTNAME: |
| if (c == '>') { |
| attributeEnded(); |
| parseState = END; |
| } else if (c == '/') { |
| attributeEnded(); |
| parseState = ENDSLASH; |
| } else if (c == '"' || c == '\'') { |
| attributeValueStarted(); |
| quoteChar = c; |
| parseState = STRING; |
| } else if (c == '=') { |
| parseState = EQUAL; |
| } else if (!Character.isWhitespace(c)) { |
| attributeEnded(); |
| attName.write(c); |
| parseState = ATTNAME; |
| } |
| break; |
| case EQUAL: |
| if (c == '>') { |
| attributeEnded(); |
| parseState = END; |
| } else if (c == '"' || c == '\'') { |
| attributeValueStarted(); |
| quoteChar = c; |
| parseState = STRING; |
| } else if (!Character.isWhitespace(c)) { |
| attributeValueStarted(); |
| attValue.write(c); |
| parseState = ATTVALUE; |
| } |
| break; |
| case ATTVALUE: |
| if (Character.isWhitespace(c)) { |
| attributeEnded(); |
| parseState = INSIDE; |
| } else if (c == '"' || c == '\'') { |
| attributeEnded(); |
| quoteChar = c; |
| parseState = STRING; |
| } else if (c == '>') { |
| attributeEnded(); |
| parseState = END; |
| } else { |
| attValue.write(c); |
| } |
| break; |
| case STRING: |
| if (c == quoteChar) { |
| attributeEnded(); |
| parseState = INSIDE; |
| } else { |
| attValue.write(c); |
| } |
| break; |
| case ENDSLASH: |
| if (c == '>') { |
| endSlash = true; |
| parseState = END; |
| } else if (c == '"' || c == '\'') { |
| quoteChar = c; |
| parseState = STRING; |
| } else if (c != '/' && !Character.isWhitespace(c)) { |
| attName.write(c); |
| parseState = ATTNAME; |
| } else { |
| parseState = INSIDE; |
| } |
| break; |
| case END: |
| break; |
| |
| } |
| } |
| } |
| |
| /** |
| * Return a flag indicating whether the tag scanned was an end tag |
| * @return <code>true</code> if it was an end tag, otherwise |
| * <code>false</code> |
| */ |
| public boolean endTag() { |
| return endTag; |
| } |
| |
| /** |
| * Return a flag indicating whether an ending slash was scanned |
| * @return <code>true</code> if an ending slash was scanned, otherwise |
| * <code>false</code> |
| */ |
| public boolean endSlash() { |
| return endSlash; |
| } |
| |
| /** |
| * Return the tagname scanned |
| * @return tag name |
| */ |
| public String tagName() { |
| return tagName.toString(); |
| } |
| |
| /** |
| * Return the list of attributes scanned |
| * @return list of attributes |
| */ |
| public AttributeList attributes() { |
| return attributes; |
| } |
| |
| /** |
| * Reset the internal state of the tokenizer |
| */ |
| private void reset() { |
| tagName.reset(); |
| attributes.reset(); |
| endTag = false; |
| endSlash = false; |
| } |
| |
| /** |
| * Invoked when an attribute ends |
| */ |
| private void attributeEnded() { |
| if (attName.size() > 0) { |
| if (hasAttributeValue) { |
| attributes.addAttribute(attName.toString(), attValue.toString(), |
| quoteChar); |
| } else { |
| attributes.addAttribute(attName.toString(), quoteChar); |
| |
| } |
| attName.reset(); |
| attValue.reset(); |
| hasAttributeValue = false; |
| } |
| } |
| |
| /** |
| * Invoked when an attribute value starts |
| */ |
| private void attributeValueStarted() { |
| hasAttributeValue = true; |
| } |
| |
| /** |
| * Retransfers the tokenized tag data into html again |
| * @return the reassembled html string |
| */ |
| public String toHtmlString() { |
| StringBuffer sb = new StringBuffer(); |
| if (endTag) { |
| sb.append("</" + tagName()); |
| } else { |
| sb.append("<" + tagName()); |
| Iterator<String> attNames = attributes().attributeNames(); |
| while (attNames.hasNext()) { |
| String attName = attNames.next(); |
| String attValue = attributes().getQuotedValue(attName); |
| |
| sb.append(" "); |
| sb.append(attName); |
| if (attValue != null) { |
| sb.append('='); |
| sb.append(attValue); |
| } |
| } |
| if (endSlash) { |
| sb.append(" /"); |
| } |
| } |
| sb.append(">"); |
| return sb.toString(); |
| } |
| } |
| |
| /** |
| * Internal implementation of an <code>AttributeList</code> |
| */ |
| class AttributeListImpl implements AttributeList { |
| |
| /** |
| * Internal Value class |
| */ |
| static class Value { |
| |
| /** |
| * Create a new <code>Value</code> instance |
| */ |
| public Value(char quoteChar, String value) { |
| this.quoteChar = quoteChar; |
| this.value = value; |
| } |
| |
| /** Quote character */ |
| public final char quoteChar; |
| |
| /** Value itself */ |
| public final String value; |
| |
| /** String representation */ |
| private String stringRep; |
| |
| /** |
| * @see Object#toString() |
| */ |
| @Override |
| public String toString() { |
| if (stringRep == null) { |
| stringRep = quoteChar + value + quoteChar; |
| } |
| return stringRep; |
| } |
| } |
| |
| /** Attribute/Value pair map with case insensitives names */ |
| private final Map<String, Value> attributes = new LinkedHashMap<String, Value>(); |
| |
| /** Attribute names, case sensitive */ |
| private final Set<String> attributeNames = new LinkedHashSet<String>(); |
| |
| /** Flag indicating whether this object was modified */ |
| private boolean modified; |
| |
| /** |
| * Add an attribute/value pair to this attribute list |
| */ |
| public void addAttribute(String name, String value, char quoteChar) { |
| attributes.put(name.toUpperCase(), new Value(quoteChar, value)); |
| attributeNames.add(name); |
| } |
| |
| /** |
| * Add an attribute/value pair to this attribute list |
| */ |
| public void addAttribute(String name, char quoteChar) { |
| attributes.put(name.toUpperCase(), null); |
| attributeNames.add(name); |
| } |
| |
| /** |
| * Empty this attribute list |
| */ |
| public void reset() { |
| attributes.clear(); |
| attributeNames.clear(); |
| modified = false; |
| } |
| |
| /** |
| * @see AttributeList#attributeCount |
| */ |
| public int attributeCount() { |
| return attributes.size(); |
| } |
| |
| /** |
| * @see AttributeList#attributeNames |
| */ |
| public Iterator<String> attributeNames() { |
| return attributeNames.iterator(); |
| } |
| |
| /** |
| * @see AttributeList#containsAttribute(String) |
| */ |
| public boolean containsAttribute(String name) { |
| return attributes.containsKey(name.toUpperCase()); |
| } |
| |
| /** |
| * @see AttributeList#getValue(String) |
| */ |
| public String getValue(String name) { |
| Value value = getValueEx(name); |
| if (value != null) { |
| return value.value; |
| } |
| return null; |
| } |
| |
| /** |
| * @see AttributeList#getQuoteChar(java.lang.String) |
| */ |
| public char getQuoteChar(String name) { |
| Value value = getValueEx(name); |
| if (value != null) { |
| return value.quoteChar; |
| } |
| return 0; |
| } |
| |
| /** |
| * @see AttributeList#getQuotedValue(String) |
| */ |
| public String getQuotedValue(String name) { |
| Value value = getValueEx(name); |
| if (value != null) { |
| return value.toString(); |
| } |
| return null; |
| } |
| |
| /** |
| * @see AttributeList#setValue(String, String) |
| */ |
| public void setValue(String name, String value) { |
| if (value == null) { |
| removeValue(name); |
| } else { |
| Value old = getValueEx(name); |
| if (old == null) { |
| addAttribute(name, value, '"'); |
| modified = true; |
| } else if (!old.value.equals(value)) { |
| addAttribute(name, value, old.quoteChar); |
| modified = true; |
| } |
| } |
| } |
| |
| /** |
| * @see AttributeList#removeValue(String) |
| */ |
| public void removeValue(String name) { |
| attributeNames.remove(name); |
| attributes.remove(name.toUpperCase()); |
| modified = true; |
| } |
| |
| /** |
| * @see AttributeList#isModified |
| */ |
| public boolean isModified() { |
| return modified; |
| } |
| |
| /** |
| * Return internal value structure |
| */ |
| protected Value getValueEx(String name) { |
| return attributes.get(name.toUpperCase()); |
| } |
| } |