blob: f868e2bd502ec01c42b0d7c387402156eb015ac0 [file] [log] [blame]
/*
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
options {
IGNORE_CASE = true;
STATIC = false;
UNICODE_INPUT = true;
}
PARSER_BEGIN(TagParser)
package org.apache.sling.commons.html.impl.parser;
import org.apache.sling.commons.html.*;
import org.apache.sling.commons.html.impl.tag.*;
import java.util.*;
public class TagParser {
private static String getTokenHtmlText(Token first, Token cur) {
Token t;
StringBuilder sb = new StringBuilder();
for (t=first; t != cur.next; t = t.next) {
if (t.specialToken != null) {
Token tt=t.specialToken;
while (tt.specialToken != null) {
tt = tt.specialToken;
}
while (tt != null) {
sb.append(tt.image);
tt = tt.next;
}
};
sb.append(t.image);
};
return sb.toString();
}
}
PARSER_END(TagParser)
< * >TOKEN :
{
<#SPACE: ( " " | "\t" | "\n" | "\r" | "\u000C" ) >
| <#ATTR_IDENTIFIER: (~[" ", "\t", "\n", "\r", "\u000C", "=", "/", ">" ])+ >
| <#NAME_IDENTIFIER: (~[" ", "\t", "\n", "\r", "\u000C", "/", ">" ])+ >
| <#TAG_END: ">" >
| <#TAG_SLASHEND: "/>" >
}
<DEFAULT> TOKEN :
{
<TAG_START: "<" > : START_TAG
| <ENDTAG_START: "</" > : START_TAG
| <COMMENT_START: "<!--" > : COMMENT
| <DECL_START: "<!" > : DECLARATION
| <RAWTEXT: ( ~["<"] )+ >
}
<START_TAG> TOKEN :
{
<TAG_NAME: <NAME_IDENTIFIER> > : IN_TAG
| <LST_ERROR: ~[]> : DEFAULT
}
<IN_TAG, ATTR_VALUE, IN_DECLARATION> SPECIAL_TOKEN :
{
< (<SPACE>)+ >
}
<IN_TAG> TOKEN :
{
<ATTR_NAME: <ATTR_IDENTIFIER> >
| <END_OF_TAG: <TAG_END> | <TAG_SLASHEND> > : DEFAULT
| <ATTR_EQ: "=" > : ATTR_VALUE
| <LIT_ERROR: ~[]>
}
<ATTR_VALUE> TOKEN :
{
<ATTR_VAL: ( ~[">", "\"", "'", " ", "\t", "\n", "\r"] )+ > : IN_TAG
| <ERROR: ~[]>
}
<ATTR_VALUE> TOKEN :
{
<STRING:< SQUOTE >|< DQUOTE >> {matchedToken.image = image.substring(1, lengthOfMatch - 1);} : IN_TAG
| <#SQUOTE : "'" ( ~["'"] )* "'" >
| <#DQUOTE : "\"" ( ~["\""] )* "\"" >
}
<COMMENT> TOKEN :
{
< COMMENT_END: "-->" > : DEFAULT
| < COMMENT_WORD: ~[] >
}
<DECLARATION> TOKEN :
{
<DECL_TAG: <NAME_IDENTIFIER> > : IN_DECLARATION
}
<IN_DECLARATION> TOKEN :
{
<DECL_ATTR: < SQUOTE > | < DQUOTE > | ( ~[">", "\"", "'", " ", "\t", "\n", "\r"] )+ >
| <DECL_END: <TAG_END> | <TAG_SLASHEND> > : DEFAULT
}
HtmlElement element() :
{
HtmlElement e;
Token text;
}
{
(
LOOKAHEAD(2)
e = tag() { return e; }
| e = endHtmlElement() { return e; }
| e = commentHtmlElement() { return e; }
| e = decltag() { return e; }
| LOOKAHEAD(2)
<TAG_START> text=<LST_ERROR>
{ return new TextData("<" + text.image); }
| text = <RAWTEXT> { return new TextData(text.image); }
| <EOF> { return new EndOfFile(); }
)
}
/** @return an attribute */
void attribute(Map<String,AttrValue > alist) :
{
Token t1, t2=null;
}
{
t1=<ATTR_NAME> [ <ATTR_EQ> t2= < STRING > | t2 = <ATTR_VAL> ]
{
if (t2 == null) {
alist.put(t1.image,new AttrValue());
} else {
alist.put(t1.image,new AttrValue(t2.image));
}
}
}
Map<String,AttrValue> attributeList() :
{
Map<String,AttrValue> alist = new HashMap<String,AttrValue>();
}
{
(attribute(alist) )*
{
return alist;
}
}
HtmlElement tag() :
{
Token t, et;
Map<String,AttrValue> alist;
Token firstToken = getToken(1);
}
{
try {
<TAG_START> t=<TAG_NAME> alist=attributeList() et=<END_OF_TAG>
{
return new StartTag(t.image, alist, et.image.equals(">"));
}
} catch (ParseException ex) {
token_source.SwitchTo(DEFAULT);
String s = getTokenHtmlText(firstToken, getNextToken());
return new TextData(s);
}
}
/** @return the end of a tag */
HtmlElement endHtmlElement() :
{
Token t;
Token firstToken = getToken(1);
}
{
try {
<ENDTAG_START> t=<TAG_NAME> <END_OF_TAG>
{ return new EndTag(t.image); }
} catch (ParseException ex) {
token_source.SwitchTo(DEFAULT);
String s = getTokenHtmlText(firstToken, getNextToken());
return new TextData(s);
}
}
Comment commentHtmlElement() :
{
StringBuilder sb = new StringBuilder();
}
{
<COMMENT_START> ( <COMMENT_WORD> { sb.append(token.image); } )* (<EOF> | <COMMENT_END>)
{ return new Comment(sb.toString()); }
}
/** @return the start of a DECLARATION */
HtmlElement decltag() :
{
Token tok = null;
Map<String,AttrValue > alist = new HashMap<String,AttrValue>();
Token firstToken = getToken(1);
}
{
try {
<DECL_START> tok=<DECL_TAG> (< DECL_ATTR > { alist.put(token.image,new AttrValue()); })* <DECL_END>
{
return new StartTag(tok.image, alist);
}
} catch (ParseException ex) {
token_source.SwitchTo(DEFAULT);
String s = getTokenHtmlText(firstToken, getNextToken());
return new TextData(s);
}
}