blob: 3466cf279833fac001b8834ea5155c6f19b5caf6 [file] [log] [blame]
/*******************************************************************************
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
******************************************************************************/
package org.apache.sling.scripting.sightly.impl.html.dom;
import java.io.CharArrayWriter;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.LinkedHashSet;
import java.util.Map;
import java.util.Set;
/**
* Tokenizes a snippet of characters into a structured tag/attribute name list.
*/
class TagTokenizer {
/** Tag name buffer */
private final CharArrayWriter tagName = new CharArrayWriter(30);
/** Attribute name buffer */
private final CharArrayWriter attName = new CharArrayWriter(30);
/** Attribute value buffer */
private final CharArrayWriter attValue = new CharArrayWriter(30);
/** Internal property list */
private final AttributeListImpl attributes = new AttributeListImpl();
/** Parse state constant */
private final static int START = 0;
/** Parse state constant */
private final static int TAG = START + 1;
/** Parse state constant */
private final static int NAME = TAG + 1;
/** Parse state constant */
private final static int INSIDE = NAME + 1;
/** Parse state constant */
private final static int ATTNAME = INSIDE + 1;
/** Parse state constant */
private final static int EQUAL = ATTNAME + 1;
/** Parse state constant */
private final static int ATTVALUE = EQUAL + 1;
/** Parse state constant */
private final static int STRING = ATTVALUE + 1;
/** Parse state constant */
private final static int ENDSLASH = STRING + 1;
/** Parse state constant */
private final static int END = ENDSLASH + 1;
/** Parse state constant */
private final static int BETWEEN_ATTNAME = END + 1;
/** Quote character */
private char quoteChar = '"';
/** Flag indicating whether the tag scanned is an end tag */
private boolean endTag;
/** Flag indicating whether an ending slash was parsed */
private boolean endSlash;
/** temporary flag indicating if attribute has a value */
private boolean hasAttributeValue;
/**
* Scan characters passed to this parser
*/
public void tokenize(char[] buf, int off, int len) {
reset();
int parseState = START;
for (int i = 0; i < len; i++) {
char c = buf[off + i];
switch (parseState) {
case START:
if (c == '<') {
parseState = TAG;
}
break;
case TAG:
if (c == '/') {
endTag = true;
parseState = NAME;
} else if (c == '"' || c == '\'') {
quoteChar = c;
parseState = STRING;
} else if (Character.isWhitespace(c)) {
parseState = INSIDE;
} else {
tagName.write(c);
parseState = NAME;
}
break;
case NAME:
if (Character.isWhitespace(c)) {
parseState = INSIDE;
} else if (c == '"' || c == '\'') {
quoteChar = c;
parseState = STRING;
} else if (c == '>') {
parseState = END;
} else if (c == '/') {
parseState = ENDSLASH;
} else {
tagName.write(c);
}
break;
case INSIDE:
if (c == '>') {
attributeEnded();
parseState = END;
} else if (c == '/') {
attributeEnded();
parseState = ENDSLASH;
} else if (c == '"' || c == '\'') {
attributeValueStarted();
quoteChar = c;
parseState = STRING;
} else if (c == '=') {
parseState = EQUAL;
} else if (!Character.isWhitespace(c)) {
attName.write(c);
parseState = ATTNAME;
}
break;
case ATTNAME:
if (c == '>') {
attributeEnded();
parseState = END;
} else if (c == '/') {
attributeEnded();
parseState = ENDSLASH;
} else if (c == '=') {
parseState = EQUAL;
} else if (c == '"' || c == '\'') {
quoteChar = c;
parseState = STRING;
} else if (Character.isWhitespace(c)) {
parseState = BETWEEN_ATTNAME;
} else {
attName.write(c);
}
break;
case BETWEEN_ATTNAME:
if (c == '>') {
attributeEnded();
parseState = END;
} else if (c == '/') {
attributeEnded();
parseState = ENDSLASH;
} else if (c == '"' || c == '\'') {
attributeValueStarted();
quoteChar = c;
parseState = STRING;
} else if (c == '=') {
parseState = EQUAL;
} else if (!Character.isWhitespace(c)) {
attributeEnded();
attName.write(c);
parseState = ATTNAME;
}
break;
case EQUAL:
if (c == '>') {
attributeEnded();
parseState = END;
} else if (c == '"' || c == '\'') {
attributeValueStarted();
quoteChar = c;
parseState = STRING;
} else if (!Character.isWhitespace(c)) {
attributeValueStarted();
attValue.write(c);
parseState = ATTVALUE;
}
break;
case ATTVALUE:
if (Character.isWhitespace(c)) {
attributeEnded();
parseState = INSIDE;
} else if (c == '"' || c == '\'') {
attributeEnded();
quoteChar = c;
parseState = STRING;
} else if (c == '>') {
attributeEnded();
parseState = END;
} else {
attValue.write(c);
}
break;
case STRING:
if (c == quoteChar) {
attributeEnded();
parseState = INSIDE;
} else {
attValue.write(c);
}
break;
case ENDSLASH:
if (c == '>') {
endSlash = true;
parseState = END;
} else if (c == '"' || c == '\'') {
quoteChar = c;
parseState = STRING;
} else if (c != '/' && !Character.isWhitespace(c)) {
attName.write(c);
parseState = ATTNAME;
} else {
parseState = INSIDE;
}
break;
case END:
break;
}
}
}
/**
* Return a flag indicating whether the tag scanned was an end tag
* @return <code>true</code> if it was an end tag, otherwise
* <code>false</code>
*/
public boolean endTag() {
return endTag;
}
/**
* Return a flag indicating whether an ending slash was scanned
* @return <code>true</code> if an ending slash was scanned, otherwise
* <code>false</code>
*/
public boolean endSlash() {
return endSlash;
}
/**
* Return the tagname scanned
* @return tag name
*/
public String tagName() {
return tagName.toString();
}
/**
* Return the list of attributes scanned
* @return list of attributes
*/
public AttributeList attributes() {
return attributes;
}
/**
* Reset the internal state of the tokenizer
*/
private void reset() {
tagName.reset();
attributes.reset();
endTag = false;
endSlash = false;
}
/**
* Invoked when an attribute ends
*/
private void attributeEnded() {
if (attName.size() > 0) {
if (hasAttributeValue) {
attributes.addAttribute(attName.toString(), attValue.toString(),
quoteChar);
} else {
attributes.addAttribute(attName.toString(), quoteChar);
}
attName.reset();
attValue.reset();
hasAttributeValue = false;
}
}
/**
* Invoked when an attribute value starts
*/
private void attributeValueStarted() {
hasAttributeValue = true;
}
/**
* Retransfers the tokenized tag data into html again
* @return the reassembled html string
*/
public String toHtmlString() {
StringBuffer sb = new StringBuffer();
if (endTag) {
sb.append("</" + tagName());
} else {
sb.append("<" + tagName());
Iterator<String> attNames = attributes().attributeNames();
while (attNames.hasNext()) {
String attName = attNames.next();
String attValue = attributes().getQuotedValue(attName);
sb.append(" ");
sb.append(attName);
if (attValue != null) {
sb.append('=');
sb.append(attValue);
}
}
if (endSlash) {
sb.append(" /");
}
}
sb.append(">");
return sb.toString();
}
}
/**
* Internal implementation of an <code>AttributeList</code>
*/
class AttributeListImpl implements AttributeList {
/**
* Internal Value class
*/
static class Value {
/**
* Create a new <code>Value</code> instance
*/
public Value(char quoteChar, String value) {
this.quoteChar = quoteChar;
this.value = value;
}
/** Quote character */
public final char quoteChar;
/** Value itself */
public final String value;
/** String representation */
private String stringRep;
/**
* @see Object#toString()
*/
@Override
public String toString() {
if (stringRep == null) {
stringRep = quoteChar + value + quoteChar;
}
return stringRep;
}
}
/** Attribute/Value pair map with case insensitives names */
private final Map<String, Value> attributes = new LinkedHashMap<String, Value>();
/** Attribute names, case sensitive */
private final Set<String> attributeNames = new LinkedHashSet<String>();
/** Flag indicating whether this object was modified */
private boolean modified;
/**
* Add an attribute/value pair to this attribute list
*/
public void addAttribute(String name, String value, char quoteChar) {
attributes.put(name.toUpperCase(), new Value(quoteChar, value));
attributeNames.add(name);
}
/**
* Add an attribute/value pair to this attribute list
*/
public void addAttribute(String name, char quoteChar) {
attributes.put(name.toUpperCase(), null);
attributeNames.add(name);
}
/**
* Empty this attribute list
*/
public void reset() {
attributes.clear();
attributeNames.clear();
modified = false;
}
/**
* @see AttributeList#attributeCount
*/
public int attributeCount() {
return attributes.size();
}
/**
* @see AttributeList#attributeNames
*/
public Iterator<String> attributeNames() {
return attributeNames.iterator();
}
/**
* @see AttributeList#containsAttribute(String)
*/
public boolean containsAttribute(String name) {
return attributes.containsKey(name.toUpperCase());
}
/**
* @see AttributeList#getValue(String)
*/
public String getValue(String name) {
Value value = getValueEx(name);
if (value != null) {
return value.value;
}
return null;
}
/**
* @see AttributeList#getQuoteChar(java.lang.String)
*/
public char getQuoteChar(String name) {
Value value = getValueEx(name);
if (value != null) {
return value.quoteChar;
}
return 0;
}
/**
* @see AttributeList#getQuotedValue(String)
*/
public String getQuotedValue(String name) {
Value value = getValueEx(name);
if (value != null) {
return value.toString();
}
return null;
}
/**
* @see AttributeList#setValue(String, String)
*/
public void setValue(String name, String value) {
if (value == null) {
removeValue(name);
} else {
Value old = getValueEx(name);
if (old == null) {
addAttribute(name, value, '"');
modified = true;
} else if (!old.value.equals(value)) {
addAttribute(name, value, old.quoteChar);
modified = true;
}
}
}
/**
* @see AttributeList#removeValue(String)
*/
public void removeValue(String name) {
attributeNames.remove(name);
attributes.remove(name.toUpperCase());
modified = true;
}
/**
* @see AttributeList#isModified
*/
public boolean isModified() {
return modified;
}
/**
* Return internal value structure
*/
protected Value getValueEx(String name) {
return attributes.get(name.toUpperCase());
}
}