blob: 8ba92e220f167afe5099d593b507aee2e3d6fd12 [file] [log] [blame]
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* See the License for the specific language governing permissions and
* limitations under the License.
import org.apache.royale.compiler.common.ISourceLocation;
import org.apache.royale.compiler.problems.ASDocNotClosedProblem;
import org.apache.royale.compiler.problems.CDataNotClosedProblem;
import org.apache.royale.compiler.problems.CommentNotClosedProblem;
import org.apache.royale.compiler.problems.ICompilerProblem;
import org.apache.royale.compiler.problems.StringLiteralMustBeTerminatedBeforeLineBreakProblem;
import org.apache.royale.compiler.problems.StringLiteralNotClosedProblem;
* Base class for RawActionScriptTokenizer. Pulling out code into Java backing
* class so it's easier to deal with. Uses a token pool to avoid object
* creation. The token pool will keep a buffer of 10 tokens in memory, and it is
* generally unsafe to hold on to tokens passed the point they are needed. If
* tokens are to be preserved, their copy constructor should be used
public abstract class BaseRawASTokenizer extends BaseRawTokenizer<ASToken>
* Tracks if we are in a close tag. Used in the generated code
protected boolean isInCloseTag = false;
* The depth of open tags, used to determine if we're still in E4X content.
* Each time we see a tag open, this count is incremented. When we see any
* kind of tag close (</ or />) we decrement the counter.
protected int e4xTagDepth = 0;
* The depth of the braces in E4X content. Used to determine when to pop out
* of E4X databindings and back to E4X content
protected int e4xBraceBalance = 0;
* State to return to from E4X. Used in the generated code
protected int e4xReturnState = RawASTokenizer.E4X;
* Depth of typed collection constructs. Used in the generated code
protected int typedDepth = 0;
* Nested '<' bracket level (for <!DOCTYPE et al)
protected int docTypeLevel;
* Flag to indicate we should collect comments
protected boolean collectComments = false;
* token that we may need to return for rules that may return more than one
* token
protected ASToken bufferToken;
protected void continueAggregate()
if (!hasAggregateContents())
* Convert the unicode escape sequence and append the unicode character to
* text buffer. The input <b>must</b> have the leading escape character "\".
* @param escapeSequence Unicode escape sequence like {@code \u00FF} or
* {@code \xFF}.
protected final void aggregateEscapedUnicodeChar(final String escapeSequence)
final int unicode = decodeEscapedUnicode(escapeSequence);
//check to make sure we are in valid unicode range
if (!Character.isValidCodePoint(unicode))
* Build a token from the aggregation buffer.
* @param type Type of the new token.
* @return New token.
public final ASToken buildAggregateToken(final int type)
final ASToken token = fetchToken(
getOffset() + (markedPosition() - readStart()),
aggregateContents = null;
return token;
* Build an e4x text token from a given entity type.
protected final ASToken buildE4XTextToken(final int type)
final ASToken token = buildToken(type);
if (hasAggregateContents())
bufferToken = token;
final int ttype;
if (((RawASTokenizer)this).yystate() == RawASTokenizer.E4XTEXTVALUE)
ttype = ASTokenTypes.TOKEN_E4X_TEXT;
ttype = ASTokenTypes.TOKEN_E4X_STRING;
return buildAggregateToken(ttype);
aggregateContents = null;
return token;
protected final void fillBuffer(StringBuilder builder)
builder.append(buffer(), readStart(), markedPosition() - readStart());
* returns the start of the current read
* @return a non-negative int
protected abstract int readStart();
* returns the marked position in the current read
* @return a non-negative int
protected abstract int markedPosition();
* returns the end of the current read
* @return a non-negative int
protected abstract int readEnd();
* returns the current active buffer. this will change and is not constant
* @return the current buffer
protected abstract char[] buffer();
* True if we want to collect comments, besides ASDoc. ASDoc comments will
* always be returned
* @param collect <code>true</code to collect non-ASDoc comments,
* <code>false</code> to ignore them.
public void setCollectComments(final boolean collect)
collectComments = collect;
* in E4X, <code>&lt;</code> is allowed in char and string literals.
* @param allow true if <code>&lt;</code> is allowed
protected abstract void setAllowLTInE4XStringLiterals(boolean allow);
protected ASToken[] initTokenPool()
return new ASToken[10];
* @return true if we are still parsing XML content
public boolean isInXML()
return e4xTagDepth > 0 && e4xBraceBalance == 0;
* @return true if we are parsing inside of an E4x databinding expression
public boolean isInE4XDatabinding()
return e4xBraceBalance > 0;
* begins the current state in the lexer
* @param state the state to begin
protected abstract void yybegin(int state);
* returns a char at the given offset into the internal char buffer
* @param pos the offset into the buffer
* @return a char
protected abstract char yycharat(int pos);
* returns the length of the current read
* @return a non-negative int
protected abstract int yylength();
protected final ASToken newToken(final int type, final int start, final int end, final int line, final int column, final CharSequence text)
return new ASToken(type, start, end, line, column, text);
* @return true if we are collecting comments
public final boolean isCollectingComments()
return collectComments;
public final ASToken getBufferToken()
final ASToken retVal = bufferToken.clone();
bufferToken = null;
return retVal;
public final boolean hasBufferToken()
return bufferToken != null;
public void reset()
e4xBraceBalance = 0;
e4xTagDepth = 0;
* Matches escaped unicode sequence like {@code \u00FF}.
static final String PATTERN_U4 = "\\\\u[a-fA-F0-9]{4}";
* Matches escaped unicode sequence like {@code \xFF}.
private static final String PATTERN_X2 = "\\\\x[a-fA-F0-9]{2}";
* Matches either {@link #PATTERN_U4} or {@link #PATTERN_X2}.
private static final String PATTERN_UNICODE = String.format("(%s)|(%s)", PATTERN_U4, PATTERN_X2);
* Convert escaped unicode sequence such as {@code \u00FF} and {@code \xFF}
* to HTML entities like {@code &#xFF}.
* @param escapedUnicode Escaped unicode sequence in the form of either
* {@code \u0000} or {@code \xFF}.
* @return Encoded HTML entity string.
protected String escapedUnicodeToHtmlEntity(final String escapedUnicode)
final int unicode = decodeEscapedUnicode(escapedUnicode);
return String.format("&#x%H;", unicode);
* Report unexpected line terminators in a string literal.
protected final void reportInvalidLineTerminatorInStringLiteral()
final ISourceLocation location = getCurrentSourceLocation(0);
final ICompilerProblem problem = new StringLiteralMustBeTerminatedBeforeLineBreakProblem(location);
* Report syntax error: input ended before reaching the closing quotation
* mark for a string literal.
protected final void reportUnclosedStringLiteral()
final ISourceLocation location = getCurrentSourceLocation(0);
final ICompilerProblem problem = new StringLiteralNotClosedProblem(location);
* Report syntax error: input ended before ASDoc is closed.
protected final void reportUnclosedASDoc()
final ISourceLocation location = getCurrentSourceLocation(0);
final ICompilerProblem problem = new ASDocNotClosedProblem(location);
* Report syntax error: input ended before Comment is closed.
protected final void reportUnclosedComment()
final ISourceLocation location = getCurrentSourceLocation(0);
final ICompilerProblem problem = new CommentNotClosedProblem(location);
* Report syntax error: input ended before CDATA is closed.
protected final void reportUnclosedCDATA()
final ISourceLocation location = getCurrentSourceLocation(0);
final ICompilerProblem problem = new CDataNotClosedProblem(location);
* Convert escaped unicode sequence such as {@code \u00FF} and {@code \xFF}
* to unicode code point.
* @param escapedUnicode Escaped unicode sequence in the form of either
* {@code \u0000} or {@code \xFF}.
* @return Unicode number.
protected static int decodeEscapedUnicode(final String escapedUnicode)
if (escapedUnicode == null)
throw new IllegalArgumentException("Escape sequence can't be null");
if (!escapedUnicode.matches(PATTERN_UNICODE))
throw new IllegalStateException("Only call this method from a lexer rule that matches unicode sequence pattern.");
return Integer.parseInt(escapedUnicode.substring(2), 16);