blob: 201f96118429e93c39ce0e2760c345cd435026d2 [file] [log] [blame]
/*
*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
package org.apache.royale.compiler.internal.parsing.mxml;
/*
* RawMXMLTokenizer.java is generated from RawMXMLTokenizer.lex.
* DO NOT MAKE EDITS DIRECTLY TO RawMXMLTokenizer.java.
* THEY WILL BE LOST WHEN THE FILE IS GENERATED AGAIN.
*/
import antlr.Token;
import antlr.CommonToken;
import org.apache.royale.compiler.internal.parsing.mxml.BaseRawMXMLTokenizer;
import static org.apache.royale.compiler.parsing.MXMLTokenTypes.*;
@SuppressWarnings("all")
%%
%{
/**
* Nested '<' bracket level (for <!DOCTYPE et al)
*/
protected int bracketLevel;
/**
* Get the current column of the tokenizer.
*/
public final int getColumn()
{
return yycolumn;
}
/**
* Unget one character
*/
protected final void unget()
{
zzMarkedPos--;
}
public final int getLine()
{
return yyline;
}
/**
* Setter for the start offset from where the token's offsets will start
*/
public final void setOffset(int offset)
{
yychar = offset;
}
public final int getOffset()
{
return yychar;
}
public RawMXMLTokenizer()
{
}
protected final Token buildToken(int type, int start, int end, int line, int column, String text)
{
MXMLToken token = new MXMLToken(type, start, end, line, column, text);
token.setSourcePath(sourcePath);
return token;
}
public void reset()
{
super.reset();
bracketLevel = 0;
}
protected final void fillBuffer(StringBuilder builder)
{
builder.append(zzBuffer, zzStartRead, zzMarkedPos - zzStartRead);
}
%}
%char
%line
%column
%state CDATA, COMMENT, ASDOC_COMMENT, DIRECTIVE, MARKUP, STRING1, STRING2, MARKUP_IGNORE, STRING1_IGNORE, STRING2_IGNORE, WHITESPACE
%class RawMXMLTokenizer
%function nextToken
%type Token
%extends BaseRawMXMLTokenizer
%unicode
%ignorecase
LETTER=[a-zA-Z]
DIGIT=[0-9]
ID_FIRST=({LETTER}|":"|"_")
ID_FOLLOW=({LETTER}|{DIGIT}|":"|"_"|".")
ID={ID_FIRST}{ID_FOLLOW}*
WHITE_SPACE_CHAR=[\r\n\ \t\b\012]
%%
//
// Whitespace has semantic significance only in certain states.
// In the following states, we simply ignore it.
//
<DIRECTIVE, MARKUP_IGNORE, STRING1_IGNORE, STRING2_IGNORE, MARKUP> {WHITE_SPACE_CHAR}+
{
}
//
// The YYINITIAL state is the starting state of the lexer, and the state to which
// other states eventually return in a well-formed document.
// In this state, it looks for processing instructions, whitespace, comments, DTDs, tags,
// text, and CDATA blocks; it then transitions to other states to handle each of these
// (except for text, which doesn't need a separate state).
// It builds TOKEN_OPEN_TAG_START and TOKEN_CLOSE_TAG_START to represent the beginning
// of tags such as
// <s:Button
// and
// </s:Button
// and also builds TOKEN_TEXT tokens to represent text.
// All other tokens are built by other states.
//
<YYINITIAL> "<?"
{
startAggregate();
yybegin(DIRECTIVE);
}
<YYINITIAL> {WHITE_SPACE_CHAR}+
{
startAggregate();
yybegin(WHITESPACE);
}
<YYINITIAL> "<!--"
{
startAggregate();
yybegin(COMMENT);
}
<YYINITIAL> "<!---"
{
startAggregate();
yybegin(ASDOC_COMMENT);
}
<YYINITIAL> ("<!DOCTYPE"|"<!ENTITY"|"<!ELEMENT"|"<!ATTLIST"|"<!NOTATION")
{
yybegin(MARKUP_IGNORE);
bracketLevel = 1;
}
<YYINITIAL> ("<"{ID})
{
Token token = buildToken(TOKEN_OPEN_TAG_START);
yybegin(MARKUP);
return token;
}
<YYINITIAL> ("</"{ID})
{
yybegin(MARKUP);
return buildToken(TOKEN_CLOSE_TAG_START);
}
<YYINITIAL> [^<]+
{
return buildToken(TOKEN_TEXT);
}
<YYINITIAL> "<![CDATA["
{
startAggregate();
yybegin(CDATA);
}
//
// The DIRECTIVE state lexes an XML processing instruction such as
// <?xml version="1.0" encoding="utf-8"?>
// It builds a single TOKEN_PROCESSING_INSTRUCTION token
// containing the text of the entire instruction before
// returning to the initial state.
//
<DIRECTIVE> ([^?])*
{
continueAggregate();
}
<DIRECTIVE> ("?"+[^?>])
{
continueAggregate();
}
<DIRECTIVE> ("?"+">")
{
continueAggregate();
yybegin(YYINITIAL);
return buildAggregateToken(TOKEN_PROCESSING_INSTRUCTION);
}
<DIRECTIVE> .
{
continueAggregate();
}
<DIRECTIVE><<EOF>>
{
continueAggregate();
yybegin(YYINITIAL);
return buildAggregateToken(TOKEN_PROCESSING_INSTRUCTION);
}
//
// The WHITESPACE state lexes whitespace that has semantic significance.
// It builds a single TOKEN_WHITESPACE token before returning to the initial state.
//
<WHITESPACE> [<]
{
yybegin(YYINITIAL);
unget();
return buildAggregateToken(TOKEN_WHITESPACE);
}
<WHITESPACE><<EOF>>
{
yybegin(YYINITIAL);
return buildAggregateToken(TOKEN_WHITESPACE);
}
//
// The COMMENT and ASDOC_COMMENT states lex comments such as
// <!-- This is a normal comment -->
// and
// <!--- This is an ASDoc comment -->
// They build a single TOKEN_COMMENT and TOKEN_ASDOC_COMMENT token
// before returning to the initial state.
//
<COMMENT,ASDOC_COMMENT> [^]
{
continueAggregate();
}
<COMMENT> ~("-""-"+">")
{
continueAggregate();
yybegin(YYINITIAL);
return buildAggregateToken(TOKEN_COMMENT);
}
<ASDOC_COMMENT> ~("-""-"+">")
{
continueAggregate();
yybegin(YYINITIAL);
return buildAggregateToken(TOKEN_ASDOC_COMMENT);
}
<COMMENT><<EOF>>
{
MXMLToken token = (MXMLToken)buildAggregateToken(TOKEN_COMMENT);
if (token != null)
reportUnclosedComment(token);
return token;
}
<ASDOC_COMMENT><<EOF>>
{
MXMLToken token = (MXMLToken)buildAggregateToken(TOKEN_ASDOC_COMMENT);
if (token != null)
reportUnclosedASDocComment(token);
return token;
}
//
// The MARKUP_IGNORE, STRING1_IGNORE, and STRING2_IGNORE states
// lex the DTD by ignoring it. They build no tokens.
//
<MARKUP_IGNORE> ([^<>\"']*)
{
}
<MARKUP_IGNORE> [<]
{
bracketLevel++;
}
<MARKUP_IGNORE> [>]
{
bracketLevel--;
if (bracketLevel == 0)
yybegin(YYINITIAL);
}
<MARKUP_IGNORE> [\"]
{
yybegin(STRING1_IGNORE);
}
<MARKUP_IGNORE> [']
{
yybegin(STRING2_IGNORE);
}
<STRING1_IGNORE> ([^\"<])*
{
}
<STRING1_IGNORE> [\"]
{
yybegin(MARKUP_IGNORE);
}
<STRING1_IGNORE> [<]
{
yybegin(MARKUP_IGNORE);
unget();
}
<STRING2_IGNORE> ([^'<])*
{
}
<STRING2_IGNORE> [']
{
yybegin(MARKUP_IGNORE);
}
<STRING2_IGNORE> [<]
{
yybegin(MARKUP_IGNORE);
unget();
}
//
// The MARKUP state lexes the parts of normal tags such as
// <s:Button xmlns:s="library://ns.adobe.com/flex/spark" id="b">
// or
// </s:Button>
// or
// <s:Button id="b"/>
// that follow the tag name.
//
// It builds a TOKEN_XMLNS or TOKEN_NAME token for each attribute name
// (depending on whether it is a namespace attribute),
// and a TOKEN_EQUALS token for each equals sign in an attribute.
// The attribute values are handled by transitioning to the
// STRING1 and STRING2 states, depending on whether the attribute
// value is delimited by double quotes or single quotes.
// At the end of the tag, it builds a TOKEN_TAG_END token (for >)
// or a TOKEN_EMPTY_TAG_END token (for />) before returning to the
// initial state.
<MARKUP> "xmlns"(":"{ID_FOLLOW}*)?
{
return buildToken(TOKEN_XMLNS);
}
<MARKUP> ({ID})
{
return buildToken(TOKEN_NAME);
}
<MARKUP> "="
{
return buildToken(TOKEN_EQUALS);
}
<MARKUP> (">")
{
yybegin(YYINITIAL);
return buildToken(TOKEN_TAG_END);
}
<MARKUP> ("/>")
{
yybegin(YYINITIAL);
return buildToken(TOKEN_EMPTY_TAG_END);
}
<MARKUP> "<"
{
unget();
yybegin(YYINITIAL);
}
<MARKUP> [\"]
{
startAggregate();
yybegin(STRING1);
}
<MARKUP> [']
{
startAggregate();
yybegin(STRING2);
}
//
// The STRING1 state handles attribute values within double-quotes
// such as the OK in
// label="OK"
// It builds a single TOKEN_STRING token.
//
<STRING1> ([^\"<])*
{
continueAggregate();
}
<STRING1> {WHITE_SPACE_CHAR}+
{
continueAggregate();
}
<STRING1> [\"]
{
yybegin(MARKUP);
continueAggregate();
return buildAggregateToken(TOKEN_STRING);
}
<STRING1> [<]
{
yybegin(MARKUP);
unget();
return buildAggregateToken(TOKEN_STRING);
}
<STRING1><<EOF>>
{
yybegin(MARKUP);
continueAggregate();
return buildAggregateToken(TOKEN_STRING);
}
//
// The STRING2 state handles attribute values within single-quotes
// such as the OK in
// label='OK'
// It builds a single TOKEN_STRING token.
//
<STRING2> ([^'<])*
{
continueAggregate();
}
<STRING2> {WHITE_SPACE_CHAR}+
{
continueAggregate();
}
<STRING2> [']
{
yybegin(MARKUP);
continueAggregate();
return buildAggregateToken(TOKEN_STRING);
}
<STRING2> [<]
{
yybegin(MARKUP);
unget();
return buildAggregateToken(TOKEN_STRING);
}
<STRING2><<EOF>>
{
yybegin(MARKUP);
continueAggregate();
return buildAggregateToken(TOKEN_STRING);
}
//
// The CATA state handles a CDATA block such as
// <![CDATA[File > New]]>
// It builds a single TOKEN_CDATA token
// before returning to the initial state.
//
<CDATA> [^]
{
continueAggregate();
}
<CDATA> ~("]""]"+">")
{
continueAggregate();
yybegin(YYINITIAL);
return buildAggregateToken(TOKEN_CDATA);
}
<CDATA><<EOF>>
{
MXMLToken token = (MXMLToken)buildAggregateToken(TOKEN_CDATA);
if (token != null)
reportUnclosedCDATA(token);
return token;
}
//
// Just ignore anything we don't recognize
//
<YYINITIAL, MARKUP, STRING1, STRING2, MARKUP_IGNORE, STRING1_IGNORE, STRING2_IGNORE> .
{
reportBadCharacterProblem(yytext());
}