blob: 32017a4d237d8f277fc5730850d71963057dfce8 [file] [log] [blame]
/*
*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
package org.apache.royale.compiler.internal.parsing.as;
import java.io.Closeable;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.Reader;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Stack;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.royale.compiler.clients.ASC;
import org.apache.royale.compiler.constants.IASKeywordConstants;
import org.apache.royale.compiler.filespecs.IFileSpecification;
import org.apache.royale.compiler.internal.parsing.ITokenStreamFilter;
import org.apache.royale.compiler.internal.parsing.SourceFragmentsReader;
import org.apache.royale.compiler.internal.parsing.TokenBase;
import org.apache.royale.compiler.internal.units.ASCompilationUnit;
import org.apache.royale.compiler.parsing.IASToken;
import org.apache.royale.compiler.parsing.IASTokenizer;
import org.apache.royale.compiler.parsing.IASToken.ASTokenKind;
import org.apache.royale.compiler.problems.CyclicalIncludesProblem;
import org.apache.royale.compiler.problems.ExpectXmlBeforeNamespaceProblem;
import org.apache.royale.compiler.problems.FileNotFoundProblem;
import org.apache.royale.compiler.problems.ICompilerProblem;
import org.apache.royale.compiler.problems.InternalCompilerProblem2;
import org.apache.royale.compiler.problems.UnexpectedTokenProblem;
import org.apache.royale.utils.ILengthAwareReader;
import org.apache.royale.utils.NonLockingStringReader;
import org.apache.royale.utils.ILengthAwareReader.InputType;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
/**
* This Tokenizer provides tokens to be used by various clients, most notably
* the ASParser. Given the nature of ambiguities in the ActionScript 3 language,
* this tokenizer also serves to disambiguate tokens based on a combination of
* look behind and lookahead. For all cases of ambiguity, only one token is
* needed for look behind, and in our worst case, n tokens forwards where n is
* the number of tokens that can be produced. Some other state is kept in order
* to know which type of container we may exist in (function, class, interface,
* etc). We buffer LA token results to avoid unneeded lookahead
*/
public class StreamingASTokenizer implements ASTokenTypes, IASTokenizer, Closeable
{
private static final String FOR_EACH = "for each";
private static final String XML = "xml";
private static final String DEFAULT_XML_NAMESPACE = "default xml namespace";
private static final String ZERO = "0";
/**
* Map from keyword text to token type.
* <p>
* We use a HashMap here to avoid slowing down the performance of the
* underlying lexer. We are avoid the "longest match" problem, requiring a
* lot of rescanning on the lexer level to determine keywords from
* identifiers. And since hash map lookup is constant, this is (in theory)
* faster than doing this in the scanner since we're not bound by i/o or
* state machine back-tracing.
*/
private static final Map<String, Integer> keywordToTokenMap = new ImmutableMap.Builder<String, Integer>()
.put(IASKeywordConstants.AS, TOKEN_KEYWORD_AS)
.put(IASKeywordConstants.IS, TOKEN_KEYWORD_IS)
.put(IASKeywordConstants.INSTANCEOF, TOKEN_KEYWORD_INSTANCEOF)
.put(IASKeywordConstants.IN, TOKEN_KEYWORD_IN)
.put(IASKeywordConstants.DELETE, TOKEN_KEYWORD_DELETE)
.put(IASKeywordConstants.TYPEOF, TOKEN_KEYWORD_TYPEOF)
.put(IASKeywordConstants.CONST, TOKEN_KEYWORD_CONST)
.put(IASKeywordConstants.GET, TOKEN_RESERVED_WORD_GET)
.put(IASKeywordConstants.IMPLEMENTS, TOKEN_RESERVED_WORD_IMPLEMENTS)
.put(IASKeywordConstants.IMPORT, TOKEN_KEYWORD_IMPORT)
.put(IASKeywordConstants.USE, TOKEN_KEYWORD_USE)
.put(IASKeywordConstants.EXTENDS, TOKEN_RESERVED_WORD_EXTENDS)
.put(IASKeywordConstants.NEW, TOKEN_KEYWORD_NEW)
.put(IASKeywordConstants.DYNAMIC, TOKEN_MODIFIER_DYNAMIC)
.put(IASKeywordConstants.FINAL, TOKEN_MODIFIER_FINAL)
.put(IASKeywordConstants.NATIVE, TOKEN_MODIFIER_NATIVE)
.put(IASKeywordConstants.OVERRIDE, TOKEN_MODIFIER_OVERRIDE)
.put(IASKeywordConstants.STATIC, TOKEN_MODIFIER_STATIC)
.put(IASKeywordConstants.VIRTUAL, TOKEN_MODIFIER_VIRTUAL)
.put(IASKeywordConstants.ABSTRACT, TOKEN_MODIFIER_ABSTRACT)
.put(IASKeywordConstants.SET, TOKEN_RESERVED_WORD_SET)
// Keywords with special token types that affect subsequent blocks
.put(IASKeywordConstants.CATCH, TOKEN_KEYWORD_CATCH)
.put(IASKeywordConstants.CLASS, TOKEN_KEYWORD_CLASS)
.put(IASKeywordConstants.FUNCTION, TOKEN_KEYWORD_FUNCTION)
.put(IASKeywordConstants.INTERFACE, TOKEN_KEYWORD_INTERFACE)
.put(IASKeywordConstants.PACKAGE, TOKEN_KEYWORD_PACKAGE)
// #120009: allow "var" inside parameter list, even though it's not
// valid AS (don't turn the subsequent function block open into a block open
.put(IASKeywordConstants.VAR, TOKEN_KEYWORD_VAR)
.put(IASKeywordConstants.FALSE, TOKEN_KEYWORD_FALSE)
.put(IASKeywordConstants.NULL, TOKEN_KEYWORD_NULL)
.put(IASKeywordConstants.TRUE, TOKEN_KEYWORD_TRUE)
.put(IASKeywordConstants.PUBLIC, HIDDEN_TOKEN_BUILTIN_NS)
.put(IASKeywordConstants.PRIVATE, HIDDEN_TOKEN_BUILTIN_NS)
.put(IASKeywordConstants.PROTECTED, HIDDEN_TOKEN_BUILTIN_NS)
.put(IASKeywordConstants.INTERNAL, HIDDEN_TOKEN_BUILTIN_NS)
.put(IASKeywordConstants.INCLUDE, TOKEN_KEYWORD_INCLUDE)
// Keywords for statements that affect subsequent blocks
.put(IASKeywordConstants.DO, TOKEN_KEYWORD_DO)
.put(IASKeywordConstants.WHILE, TOKEN_KEYWORD_WHILE)
.put(IASKeywordConstants.BREAK, TOKEN_KEYWORD_BREAK)
.put(IASKeywordConstants.CONTINUE, TOKEN_KEYWORD_CONTINUE)
.put(IASKeywordConstants.GOTO, TOKEN_RESERVED_WORD_GOTO)
.put(IASKeywordConstants.FOR, TOKEN_KEYWORD_FOR)
.put(StreamingASTokenizer.FOR_EACH, TOKEN_KEYWORD_FOR)
.put(IASKeywordConstants.EACH, TOKEN_RESERVED_WORD_EACH)
.put(IASKeywordConstants.WITH, TOKEN_KEYWORD_WITH)
.put(IASKeywordConstants.ELSE, TOKEN_KEYWORD_ELSE)
.put(IASKeywordConstants.IF, TOKEN_KEYWORD_IF)
.put(IASKeywordConstants.SWITCH, TOKEN_KEYWORD_SWITCH)
.put(IASKeywordConstants.CASE, TOKEN_KEYWORD_CASE)
.put(IASKeywordConstants.DEFAULT, TOKEN_KEYWORD_DEFAULT)
.put(IASKeywordConstants.TRY, TOKEN_KEYWORD_TRY)
.put(IASKeywordConstants.FINALLY, TOKEN_KEYWORD_FINALLY)
// Keywords with a generic keyword token type that have no effect
// on subsequent blocks.
.put(IASKeywordConstants.NAMESPACE, TOKEN_RESERVED_WORD_NAMESPACE)
.put(IASKeywordConstants.CONFIG, TOKEN_RESERVED_WORD_CONFIG)
.put(IASKeywordConstants.THROW, TOKEN_KEYWORD_THROW)
.put(IASKeywordConstants.SUPER, TOKEN_KEYWORD_SUPER)
.put(IASKeywordConstants.THIS, TOKEN_KEYWORD_THIS)
.put(IASKeywordConstants.VOID, TOKEN_KEYWORD_VOID)
.put(IASKeywordConstants.RETURN, TOKEN_KEYWORD_RETURN)
.build();
/**
* Configuration for out tokenizer
*/
private static final class TokenizerConfig
{
/**
* Flag that lets us ignore keywords for more general string parsing
*/
public boolean ignoreKeywords = false;
/**
* Flag that lets us be aware of metadata
*/
public boolean findMetadata = true;
/**
* Flag indicating that we are tokenizing full content/files, and not
* segments
*/
public boolean completeContent = true;
/**
* IFilter for old APIs
*/
public ITokenStreamFilter filter;
/**
* Flag indicating we should collect comments
*/
public boolean collectComments = false;
/**
* Flag indicating we follow include statements, including their tokens
*/
public boolean followIncludes = true;
}
private Reader reader;
//underlying lexer
private RawASTokenizer tokenizer;
//last exception to prevent us from looping forever
private Exception lastException = null;
//LA buffer
private final List<ASToken> lookAheadBuffer;
private int bufferSize = 0; //maintain size ourselves since it's faster
//last token we encountered, used for lookback
private ASToken lastToken;
private int offsetAdjustment; //for offset adjustment
private int lineAdjustment = 0;
private int columnAdjustment = 0;
private IncludeHandler includeHandler;
/**
* The forked tokenizer for included files. If not null, {@link #next()}
* will return a token from this tokenizer.
* <p>
* After all the tokens are returned from the included source file,
* {@link #closeIncludeTokenizer()} closes the tokenizer and set this field
* to null.
*/
private StreamingASTokenizer forkIncludeTokenizer;
/**
* Flag to indicate if we have followed include statements
*/
private boolean hasEncounteredIncludeStatements = false;
private TokenizerConfig config;
/**
* Source file handler. This is used by resolving included file path.
* {@link #StreamingASTokenizer(IFileSpecification)} and
* {@link #StreamingASTokenizer(IFileSpecification, Stack)} sets the value.
*/
private String sourcePath;
/**
* Lexer problems.
* */
private final List<ICompilerProblem> problems = new ArrayList<ICompilerProblem>();
/**
* Imaginary tokens generated for {@code asc -in} option.
*/
private Iterator<ASToken> ascIncludeImaginaryTokens;
/**
* You should probably not use this constructor. There is some legacy code
* that uses this constructor, but that code should be updated to use one of
* the static create methods below.
* <p>
* TODO: make this private.
*/
public StreamingASTokenizer(final Reader reader)
{
this();
setReader(reader);
}
/**
* A pool to reduce duplicated string literals created
*/
private final HashMap<String, String> stringPool;
/**
* You should probably not use this constructor. There is a lot of code that
* uses this constructor, but that code should be updated to use one of the
* static create methods below.
* <p>
* TODO: make this private.
*/
public StreamingASTokenizer()
{
tokenizer = new RawASTokenizer();
config = new TokenizerConfig();
lookAheadBuffer = new ArrayList<ASToken>(5);
includeHandler = IncludeHandler.creatDefaultIncludeHandler();
stringPool = new HashMap<String, String>();
// Initialize string pool with keyword strings. The keyword strings
// are declared as constants which are automatically "interned".
for (final String keyword : keywordToTokenMap.keySet())
{
stringPool.put(keyword, keyword);
}
}
/**
* Creates a tokenizer suitable for the mxml indexing code. fragments the
* new tokenizer will tokenize.
*
* @return A new tokenizer suitable for tokenizing script fragments in an
* mxml document that is being tokenized for the full text search index.
*/
public static StreamingASTokenizer createForMXMLIndexing(String fileName)
{
StreamingASTokenizer result = new StreamingASTokenizer();
result.setPath(fileName);
result.includeHandler.enterFile(result.sourcePath);
return result;
}
/**
* Fork a new tokenizer when an "include" directive is found. This method
* will pass the {@code StructureTracker} of the current tokenizer down to
* the forked tokenizer.
*
* @param currentTokenizer Current tokenizer.
* @param fileSpec File specification of the included file.
* @param includeHandler Include handler.
* @return A tokenizer for the included file.
* @throws FileNotFoundException Error.
*/
private static StreamingASTokenizer createForIncludeFile(
final StreamingASTokenizer currentTokenizer,
final IFileSpecification fileSpec,
final IncludeHandler includeHandler)
throws FileNotFoundException
{
final StreamingASTokenizer tokenizer = create(fileSpec, includeHandler);
return tokenizer;
}
/**
* Create a tokenizer from a source file. This is the lexer entry-point used
* by {@link ASCompilationUnit}.
*
* @param fileSpec File specification provides the reader and the file path.
* @param includeHandler Include handler.
* @throws FileNotFoundException error
*/
protected static StreamingASTokenizer create(
final IFileSpecification fileSpec,
final IncludeHandler includeHandler)
throws FileNotFoundException
{
assert fileSpec != null : "File specification can't be null.";
assert includeHandler != null : "Include handler can't be null.";
final StreamingASTokenizer tokenizer = new StreamingASTokenizer();
tokenizer.setReader(fileSpec.createReader());
tokenizer.setPath(fileSpec.getPath());
tokenizer.includeHandler = includeHandler;
tokenizer.includeHandler.enterFile(tokenizer.sourcePath);
return tokenizer;
}
/**
* Create a tokenizer for {@code ASParser#parseFile()}.
*
* @param fileSpec File specification provides the reader and the file path.
* @param includeHandler Include handler.
* @param followIncludes True if included files are also parsed.
* @param includedFiles A list of included file paths.
* @return Lexer.
* @throws FileNotFoundException error
*/
protected static StreamingASTokenizer createForASParser(
final IFileSpecification fileSpec,
final IncludeHandler includeHandler,
final boolean followIncludes,
final List<String> includedFiles)
throws FileNotFoundException
{
final StreamingASTokenizer tokenizer = create(fileSpec, includeHandler);
tokenizer.setFollowIncludes(followIncludes);
final ImmutableList.Builder<ASToken> imaginaryTokensBuilder =
new ImmutableList.Builder<ASToken>();
for (final String filename : includedFiles)
{
imaginaryTokensBuilder.add(new ASToken(
ASTokenTypes.TOKEN_KEYWORD_INCLUDE,
0,
0,
0,
0,
"include"));
imaginaryTokensBuilder.add(new ASToken(
ASTokenTypes.TOKEN_LITERAL_STRING,
0,
0,
0,
0,
'"' + filename + '"'));
}
tokenizer.ascIncludeImaginaryTokens = imaginaryTokensBuilder.build().iterator();
return tokenizer;
}
/**
* This creator doesn't "enter file" on creation.
*/
protected static StreamingASTokenizer createForInlineScriptScopeBuilding(
final Reader reader,
final String path,
final IncludeHandler includeHandler,
final int offsetAdjustment,
final int lineAdjustment,
final int columnAdjustment)
{
assert reader != null : "Reader can't be null";
assert path != null : "Path can't be null";
assert includeHandler != null : "IncludeHandler can't be null";
final StreamingASTokenizer tokenizer = new StreamingASTokenizer();
tokenizer.setReader(reader);
tokenizer.setPath(path);
tokenizer.includeHandler = includeHandler;
tokenizer.setSourcePositionAdjustment(
offsetAdjustment, lineAdjustment, columnAdjustment);
return tokenizer;
}
/**
* Create a tokenizer to parse an Expression.
*/
protected static StreamingASTokenizer createForInlineExpressionParsing(
final Reader reader,
final String path
)
{
assert reader != null : "Reader can't be null";
assert path != null : "Path can't be null";
final StreamingASTokenizer tokenizer = new StreamingASTokenizer();
tokenizer.setReader(reader);
tokenizer.setPath(path);
tokenizer.includeHandler.enterFile(path);
// Have to do this to get the tokenizer to work right - some things, like function expressions,
// won't tokenize correctly unless the last token is '=' or some other special tokens.
tokenizer.lastToken = new ASToken(ASTokenTypes.TOKEN_OPERATOR_ASSIGNMENT, -1, -1, -1, -1, "=");
return tokenizer;
}
/**
* This method can create a {@code StreamingASTokenizer} with optional
* "follow includes". If {@code IncludeHandler} is not null, it will follow
* {@code include} directives.
*
* @param reader Input to the tokenizer.
* @param path File path of the input.
* @param includeHandler If not null, the created tokenizer will follow
* {@code include} directives.
* @return A {@code StreamingASTokenizer}.
*/
public static StreamingASTokenizer createForRepairingASTokenizer(
final Reader reader,
final String path,
final IncludeHandler includeHandler)
{
assert path != null || includeHandler == null : "We need a source path to follow includes";
final StreamingASTokenizer tokenizer = new StreamingASTokenizer();
tokenizer.setReader(reader);
tokenizer.setPath(path);
if (includeHandler != null)
{
tokenizer.includeHandler = includeHandler;
includeHandler.enterFile(path);
}
return tokenizer;
}
/**
* Sets the {@link Reader} that supplies the content to this tokenizer. It
* is up to the client to close any previous readers that have been in use.
* It is also up to the client to close the reader once it has been used
*
* @param reader a {@link Reader}
*/
public void setReader(final Reader reader)
{
setReader(reader, 0, 0, 0);
}
/**
* Sets the {@link Reader} that supplies the content to this tokenizer. It
* is up to the client to close any previous readers that have been in use.
* It is also up to the client to close the reader once it has been used
*
* @param reader a {@link Reader}
* @param offset Offset adjustment. If the specified reader is reading from
* a string extracted from a source file, this should be the offset of the
* first character read from the reader in the source file.
* @param line Line adjustment.
* @param column Column adjustment
*/
public void setReader(final Reader reader, int offset, int line, int column)
{
this.reader = reader;
tokenizer = new RawASTokenizer();
tokenizer.yyreset(reader);
tokenizer.setCollectComments(config.collectComments);
setSourcePositionAdjustment(offset, line, column);
}
/**
* Sets the path to the file this tokenizer is scanning
*
* @param path a file path
*/
@Override
public void setPath(String path)
{
assert path != null : "path of tokenizer shouldn't be null";
sourcePath = path;
tokenizer.setSourcePath(path);
}
/**
* Allows for the adjustment of offset, line and column information when
* parsing subsequences of text. This should be called before tokenization
* has started
*
* @param offset The offset where the fragment starts.
* @param line The line where the fragment starts. This should be a
* ZERO-based line number
* @param column The column where the fragment starts. This should be a
* ZERO-based column number
*/
public void setSourcePositionAdjustment(int offset, int line, int column)
{
offsetAdjustment = offset;
lineAdjustment = line;
columnAdjustment = column;
}
/**
* Sets whether we comments are collected: single line and multi-line.
* Default is <code>false</code>
*
* @param collect true if we should collect comments
*/
@Override
public void setCollectComments(final boolean collect)
{
config.collectComments = collect;
if (tokenizer != null)
tokenizer.setCollectComments(collect);
}
/**
* Sets whether we follow include statements, including their tokens.
* Default is <code>true</code>
*
* @param followIncludes true if we should follow includes
*/
@Override
public void setFollowIncludes(final boolean followIncludes)
{
config.followIncludes = followIncludes;
}
/**
* Closes the underlying reader
*/
@Override
public void close() throws IOException
{
if (tokenizer != null)
{
tokenizer.reset();
tokenizer.yyclose(); //close the reader
}
}
/**
* Sets whether we ignore keywords while scanning. Default is
* <code>false</code>
*
* @param ignore true if we should ignore keywords
*/
public void setIgnoreKeywords(final boolean ignore)
{
config.ignoreKeywords = ignore;
}
/**
* Sets whether we are scanning a full file, or a fragment. Default is
* <code>true</code>
*
* @param full true if we are scanning a full file.
*/
public void setScanningFullContent(final boolean full)
{
config.completeContent = full;
}
/**
* Sets whether we will find metadata constructs Default is
* <code>true</code>
*
* @param aware true if we will find metadata
*/
public void setIsMetadataAware(final boolean aware)
{
config.findMetadata = aware;
}
/**
* Sets the {@link ITokenStreamFilter} used to filter out unwanted tokens
*
* @param filter the token filter to alter the stream returned from the
* tokenizer
*/
public void setTokenFilter(ITokenStreamFilter filter)
{
config.filter = filter;
}
/**
* Sets the include handler used by this tokenizer to get
* {@link IFileSpecification} for included files.
*
* @param handler {@link IncludeHandler} this tokenizer should use.
*/
public void setIncludeHandler(IncludeHandler handler)
{
includeHandler = handler;
}
/**
* Indicated that we have tokenization problems. Can be called once scanning
* has begun
*
* @return true if problems have been encountered
*/
public boolean hasTokenizationProblems()
{
return tokenizer.hasProblems() || problems.size() > 0;
}
/**
* Indicated whether this tokenizer has encountered include statements,
* regardless of whether it is set to follow them or not
*
* @return true if we have encountered includes
*/
public boolean hasEncounteredIncludeStatements()
{
return hasEncounteredIncludeStatements;
}
/**
* Returns a collection of problems that have been encountered while
* scanning.
*
* @return a list of problems, never null
*/
public List<ICompilerProblem> getTokenizationProblems()
{
ArrayList<ICompilerProblem> problems = new ArrayList<ICompilerProblem>(this.problems);
problems.addAll(tokenizer.getProblems());
return problems;
}
public ASToken[] getTokens(final Reader reader, ITokenStreamFilter filter)
{
setReader(reader);
List<ASToken> tokenList = initializeTokenList(reader);
ASToken token = null;
do
{
token = next();
if (token != null && filter.accept(token))
tokenList.add(token.clone()); //make a copy because of object pool
}
while (token != null);
return tokenList.toArray(new ASToken[0]);
}
@Override
public ASToken[] getTokens(final Reader reader)
{
if (config.filter != null)
return getTokens(reader, config.filter);
setReader(reader);
List<ASToken> tokenList = initializeTokenList(reader);
ASToken token = null;
do
{
token = next();
if (token != null)
tokenList.add(token.clone()); //copy ctor because of object pool
}
while (token != null);
return tokenList.toArray(new ASToken[0]);
}
/**
* @param reader
* @return
*/
private List<ASToken> initializeTokenList(final Reader reader)
{
List<ASToken> tokenList;
int listSize = 8012;
if (reader instanceof NonLockingStringReader)
{
//we know the length of this string. For string of length x, their are roughly x/5 tokens that
//can be constructed from that string. size the array appropriately.
listSize = 5;
if (((NonLockingStringReader)reader).getLength() > 0)
{
listSize = Math.max((int)((NonLockingStringReader)reader).getLength() / 5, 5);
}
}
else if (reader instanceof ILengthAwareReader && ((ILengthAwareReader)reader).getInputType() == InputType.FILE)
{
listSize = 9;
if (((ILengthAwareReader)reader).getLength() > 0)
{
listSize = Math.max((int)((ILengthAwareReader)reader).getLength() / 9, 9);
}
}
tokenList = new ArrayList<ASToken>(listSize);
return tokenList;
}
@Override
public IASToken[] getTokens(final String range)
{
return getTokens(new NonLockingStringReader(range));
}
/**
* Returns the next token that can be produced from the underlying reader
*
* @param filter an {@link ITokenStreamFilter} to restrict the tokens that
* are returned
* @return an ASToken, or null if no more tokens can be produced
*/
public final ASToken next(final ITokenStreamFilter filter)
{
ASToken retVal = null;
while (true)
{
retVal = next();
if (retVal == null || filter.accept(retVal))
{
break;
}
}
return retVal;
}
/**
* Returns the next token that can be produced from the underlying reader.
* <p>
* If the forked "include file tokenizer" is open (not null), return the
* next token from it. If the forked tokenizer reaches the end of the
* included file, close (set to null) the forked tokenizer and return token
* from the main source file.
*
* @return an ASToken, or null if no more tokens can be produced
*/
public final ASToken next()
{
ASToken retVal = null;
// If the lexer for the included file is open, read from the included tokenizer.
boolean consumeSemi = false;
try
{
// Return token from the main file.
if (forkIncludeTokenizer != null)
{
retVal = forkIncludeTokenizer.next();
// Check if the forked tokenizer reached EOF.
if (retVal == null)
{
closeIncludeTokenizer();
// We should consume the next semicolon we find.
// Most include statements are terminated with a semicolon,
// and because we read the contents of the included file,
// this could cause problems with a semicolon in a place
// we don't want it.
consumeSemi = true;
}
else
return retVal;
}
if (bufferSize > 0)
{
retVal = lookAheadBuffer.remove(0);
bufferSize--;
}
else
{
retVal = nextTokenFromReader();
}
if (retVal == null)
return null;
final int tokenType = retVal.getType();
switch (tokenType)
{
// if we're seeing each in this part of the loop, it's not a
// syntactic keyword
// since we do lookahead when we see "for", checking for "each"
case TOKEN_RESERVED_WORD_EACH:
treatKeywordAsIdentifier(retVal);
processUserDefinedNamespace(retVal, 0);
return retVal;
case TOKEN_KEYWORD_INCLUDE:
{
if (lastToken != null)
{
int lastTokenType = lastToken.getType();
switch (lastTokenType)
{
case TOKEN_KEYWORD_VAR:
case TOKEN_KEYWORD_FUNCTION:
case TOKEN_RESERVED_WORD_GET:
case TOKEN_RESERVED_WORD_SET:
case TOKEN_OPERATOR_MEMBER_ACCESS:
{
retVal.setType(TOKEN_IDENTIFIER);
return retVal;
}
}
}
// "followIncludes=false" is usually used for code model
// partitioner. They want the "include" token.
if (!config.followIncludes)
return retVal;
final ASToken token = LT(1);
// "include" at EOF is always a keyword
if (token == null)
return retVal;
if (!matches(token, TOKEN_LITERAL_STRING))
{
treatKeywordAsIdentifier(retVal); // it's an identifier
processUserDefinedNamespace(retVal, 0);
}
else
{
hasEncounteredIncludeStatements = true;
// Consume the file path after the include token.
consume(1);
final String filenameTokenText = token.getText();
final String includeString = filenameTokenText.substring(1, filenameTokenText.length() - 1);
if (sourcePath == null)
throw new NullPointerException("Source file is needed for resolving included file path.");
IFileSpecification includedFileSpec = null;
//respond to problems from our file handler
includedFileSpec = includeHandler.getFileSpecificationForInclude(sourcePath, includeString);
//
if (includedFileSpec == null)
{
ICompilerProblem problem = new FileNotFoundProblem(token, filenameTokenText); //the text will be the path not found
problems.add(problem);
retVal = next();
return retVal;
}
if (includeHandler.isCyclicInclude(includedFileSpec.getPath()))
{
ICompilerProblem problem = new CyclicalIncludesProblem(token);
problems.add(problem);
retVal = next();
return retVal;
}
else
{
// Fork a tokenizer for the included file
try
{
forkIncludeTokenizer = createForIncludeFile(this, includedFileSpec, includeHandler);
retVal = forkIncludeTokenizer.next();
}
catch (FileNotFoundException fnfe)
{
includeHandler.handleFileNotFound(includedFileSpec);
ICompilerProblem problem = new FileNotFoundProblem(token, includedFileSpec.getPath());
problems.add(problem);
retVal = next();
return retVal;
}
}
}
// Recover from compiler problems and continue.
if (retVal == null)
{
// Included file is empty.
closeIncludeTokenizer();
// Fall back to main source.
retVal = this.next();
}
return retVal;
}
case TOKEN_RESERVED_WORD_CONFIG:
if (matches(LT(1), TOKEN_RESERVED_WORD_NAMESPACE))
{ //we config namespace
retVal.setType(TOKEN_RESERVED_WORD_CONFIG);
return retVal;
}
treatKeywordAsIdentifier(retVal); //identifier
processUserDefinedNamespace(retVal, 0);
return retVal;
case HIDDEN_TOKEN_BUILTIN_NS:
if (matches(LT(1), TOKEN_OPERATOR_NS_QUALIFIER))
{ //we have public:: and this structure is not an annotation but a name ref
retVal.setType(TOKEN_NAMESPACE_NAME);
return retVal;
}
retVal.setType(TOKEN_NAMESPACE_ANNOTATION);
return retVal;
case TOKEN_MODIFIER_DYNAMIC:
case TOKEN_MODIFIER_FINAL:
case TOKEN_MODIFIER_NATIVE:
case TOKEN_MODIFIER_OVERRIDE:
case TOKEN_MODIFIER_STATIC:
case TOKEN_MODIFIER_VIRTUAL:
case TOKEN_MODIFIER_ABSTRACT:
{
// previous token is either a modifier or a namespace, or if
// null, assume keyword
// next token is from a definition or a modifier or a namespace
final ASToken nextToken = LT(1);
if (nextToken != null)
{
switch (nextToken.getType())
{
case TOKEN_KEYWORD_CLASS:
case TOKEN_KEYWORD_FUNCTION:
case TOKEN_KEYWORD_INTERFACE:
case TOKEN_RESERVED_WORD_NAMESPACE:
case TOKEN_KEYWORD_VAR:
case TOKEN_KEYWORD_CONST:
case TOKEN_MODIFIER_DYNAMIC:
case TOKEN_MODIFIER_FINAL:
case TOKEN_MODIFIER_NATIVE:
case TOKEN_MODIFIER_OVERRIDE:
case TOKEN_MODIFIER_STATIC:
case TOKEN_MODIFIER_VIRTUAL:
case TOKEN_MODIFIER_ABSTRACT:
case TOKEN_NAMESPACE_ANNOTATION:
case TOKEN_NAMESPACE_NAME:
case HIDDEN_TOKEN_BUILTIN_NS:
return retVal;
case TOKEN_IDENTIFIER:
if (isUserDefinedNamespace(nextToken, 1)) // we're already looking ahead one so make sure we look ahead one further
return retVal;
default:
// Not applicable to other token types.
break;
}
}
treatKeywordAsIdentifier(retVal);
processUserDefinedNamespace(retVal, 0);
return retVal;
}
//we combine +/- for numeric literals here
case TOKEN_OPERATOR_MINUS:
case TOKEN_OPERATOR_PLUS:
{
if (lastToken == null || !lastToken.canPreceedSignedOperator())
{
final ASToken nextToken = LT(1);
if (nextToken != null)
{
switch (nextToken.getType())
{
case TOKEN_LITERAL_NUMBER:
case TOKEN_LITERAL_HEX_NUMBER:
retVal.setEnd(nextToken.getEnd());
final StringBuilder builder = new StringBuilder(retVal.getText());
builder.append(nextToken.getText());
retVal.setText(poolString(builder.toString()));
consume(1);
retVal.setType(nextToken.getType());
break;
default:
// ignore other tokens
break;
}
}
}
return retVal;
}
//RECOGNIZE: for each
case TOKEN_KEYWORD_FOR:
{
final ASToken token = LT(1);
if (matches(token, TOKEN_RESERVED_WORD_EACH))
{
retVal.setEnd(token.getEnd());
retVal.setText(FOR_EACH);
consume(1);
return retVal;
}
if (lastToken != null)
{
int lastTokenType = lastToken.getType();
switch (lastTokenType)
{
case TOKEN_KEYWORD_VAR:
case TOKEN_KEYWORD_FUNCTION:
case TOKEN_RESERVED_WORD_GET:
case TOKEN_RESERVED_WORD_SET:
case TOKEN_OPERATOR_MEMBER_ACCESS:
retVal.setType(TOKEN_IDENTIFIER);
}
}
return retVal;
}
//RECOGNIZE: default xml namespace
//default xml namespace must exist on the same line
case TOKEN_KEYWORD_DEFAULT:
{
final ASToken maybeNS = LT(2);
final boolean foundTokenNamespace = maybeNS != null &&
maybeNS.getType() == TOKEN_RESERVED_WORD_NAMESPACE;
final ASToken maybeXML = LT(1);
if (foundTokenNamespace)
{
final boolean foundTokenXML = maybeXML != null &&
maybeXML.getType() == TOKEN_IDENTIFIER &&
XML.equals(maybeXML.getText());
if (!foundTokenXML)
{
final ICompilerProblem problem =
new ExpectXmlBeforeNamespaceProblem(maybeNS);
problems.add(problem);
}
//combine all of these tokens together
retVal.setEnd(maybeNS.getEnd());
retVal.setText(DEFAULT_XML_NAMESPACE);
retVal.setType(TOKEN_DIRECTIVE_DEFAULT_XML);
consume(2);
}
// if this isn't "default xml namespace" then
// see if it is the default case in a switch
// otherwise, assume it is an identiferName
else if (maybeXML != null &&
maybeXML.getType() != TOKEN_COLON)
retVal.setType(TOKEN_IDENTIFIER);
else if (lastToken != null)
{
int lastTokenType = lastToken.getType();
switch (lastTokenType)
{
case TOKEN_KEYWORD_VAR:
case TOKEN_KEYWORD_FUNCTION:
case TOKEN_RESERVED_WORD_GET:
case TOKEN_RESERVED_WORD_SET:
case TOKEN_OPERATOR_MEMBER_ACCESS:
retVal.setType(TOKEN_IDENTIFIER);
}
}
return retVal;
}
case TOKEN_KEYWORD_VOID:
{
//check for void 0
final ASToken token = LT(1);
if (matches(token, TOKEN_LITERAL_NUMBER) && ZERO.equals(token.getText()))
{
retVal.setType(TOKEN_VOID_0);
combineText(retVal, token);
consume(1);
}
//check for void(0)
else if (matches(token, TOKEN_PAREN_OPEN))
{
final ASToken zeroT = LT(2);
if (matches(zeroT, TOKEN_LITERAL_NUMBER) && ZERO.equals(zeroT.getText()))
{
final ASToken closeParenT = LT(3);
if (matches(closeParenT, TOKEN_PAREN_CLOSE))
{
combineText(retVal, token);
combineText(retVal, zeroT);
combineText(retVal, closeParenT);
retVal.setType(TOKEN_VOID_0);
consume(3);
}
}
}
return retVal;
}
case TOKEN_IDENTIFIER:
{
//check for user-defined namespace before we return anything
processUserDefinedNamespace(retVal, 0);
return retVal;
}
//this is for metadata processing
case TOKEN_SQUARE_OPEN:
{
retVal = tryParseMetadata(retVal);
return retVal;
}
case HIDDEN_TOKEN_STAR_ASSIGNMENT:
{
//this is to solve an ambiguous case, where we can't tell the difference between
//var foo:*=null and foo *= null;
retVal.setType(TOKEN_OPERATOR_STAR);
retVal.setEnd(retVal.getEnd() - 1);
retVal.setText("*");
//add the equals
final ASToken nextToken = tokenizer.buildToken(TOKEN_OPERATOR_ASSIGNMENT,
retVal.getEnd() + 1, retVal.getEnd() + 2,
retVal.getLine(), retVal.getColumn(), "=");
nextToken.setSourcePath(sourcePath);
addTokenToBuffer(nextToken);
return retVal;
}
case TOKEN_SEMICOLON:
if (consumeSemi)
{
return next();
}
return retVal;
case TOKEN_VOID_0:
case TOKEN_LITERAL_REGEXP:
case TOKEN_COMMA:
case TOKEN_COLON:
case TOKEN_PAREN_OPEN:
case TOKEN_PAREN_CLOSE:
case TOKEN_SQUARE_CLOSE:
case TOKEN_ELLIPSIS:
case TOKEN_OPERATOR_PLUS_ASSIGNMENT:
case TOKEN_OPERATOR_MINUS_ASSIGNMENT:
case TOKEN_OPERATOR_MULTIPLICATION_ASSIGNMENT:
case TOKEN_OPERATOR_DIVISION_ASSIGNMENT:
case TOKEN_OPERATOR_MODULO_ASSIGNMENT:
case TOKEN_OPERATOR_BITWISE_AND_ASSIGNMENT:
case TOKEN_OPERATOR_BITWISE_OR_ASSIGNMENT:
case TOKEN_OPERATOR_BITWISE_XOR_ASSIGNMENT:
case TOKEN_OPERATOR_BITWISE_LEFT_SHIFT_ASSIGNMENT:
case TOKEN_OPERATOR_BITWISE_RIGHT_SHIFT_ASSIGNMENT:
case TOKEN_OPERATOR_BITWISE_UNSIGNED_RIGHT_SHIFT_ASSIGNMENT:
case TOKEN_OPERATOR_STAR:
case TOKEN_OPERATOR_NS_QUALIFIER:
case TOKEN_ASDOC_COMMENT:
case TOKEN_OPERATOR_DIVISION:
case TOKEN_OPERATOR_MODULO:
case TOKEN_OPERATOR_BITWISE_LEFT_SHIFT:
case TOKEN_OPERATOR_BITWISE_RIGHT_SHIFT:
case TOKEN_OPERATOR_BITWISE_UNSIGNED_RIGHT_SHIFT:
case TOKEN_OPERATOR_LESS_THAN:
case TOKEN_OPERATOR_GREATER_THAN:
case TOKEN_OPERATOR_LESS_THAN_EQUALS:
case TOKEN_OPERATOR_GREATER_THAN_EQUALS:
case TOKEN_OPERATOR_EQUAL:
case TOKEN_OPERATOR_NOT_EQUAL:
case TOKEN_OPERATOR_STRICT_EQUAL:
case TOKEN_OPERATOR_STRICT_NOT_EQUAL:
case TOKEN_OPERATOR_BITWISE_AND:
case TOKEN_OPERATOR_BITWISE_XOR:
case TOKEN_OPERATOR_BITWISE_OR:
case TOKEN_OPERATOR_LOGICAL_AND:
case TOKEN_OPERATOR_LOGICAL_OR:
case TOKEN_OPERATOR_LOGICAL_AND_ASSIGNMENT:
case TOKEN_OPERATOR_LOGICAL_OR_ASSIGNMENT:
case TOKEN_TYPED_COLLECTION_OPEN:
case TOKEN_TYPED_COLLECTION_CLOSE:
case TOKEN_OPERATOR_MEMBER_ACCESS:
case TOKEN_RESERVED_WORD_NAMESPACE:
case TOKEN_RESERVED_WORD_GET:
case TOKEN_RESERVED_WORD_SET:
case TOKEN_OPERATOR_ASSIGNMENT:
case TOKEN_TYPED_LITERAL_CLOSE:
case TOKEN_TYPED_LITERAL_OPEN:
case TOKEN_OPERATOR_TERNARY:
case TOKEN_OPERATOR_DECREMENT:
case TOKEN_OPERATOR_INCREMENT:
case TOKEN_OPERATOR_ATSIGN:
case TOKEN_OPERATOR_BITWISE_NOT:
case TOKEN_OPERATOR_LOGICAL_NOT:
case TOKEN_E4X_BINDING_CLOSE:
case TOKEN_E4X_BINDING_OPEN:
case TOKEN_OPERATOR_DESCENDANT_ACCESS:
case TOKEN_NAMESPACE_ANNOTATION:
case TOKEN_NAMESPACE_NAME:
case TOKEN_BLOCK_OPEN:
case TOKEN_BLOCK_CLOSE:
case TOKEN_KEYWORD_FUNCTION:
return retVal;
case HIDDEN_TOKEN_MULTI_LINE_COMMENT:
case HIDDEN_TOKEN_SINGLE_LINE_COMMENT:
if (tokenizer.isCollectingComments())
{
return retVal;
}
assert (false);
return null;
case TOKEN_KEYWORD_INSTANCEOF:
case TOKEN_KEYWORD_AS:
case TOKEN_KEYWORD_IN:
case TOKEN_KEYWORD_IS:
if (lastToken != null)
{
int lastTokenType = lastToken.getType();
switch (lastTokenType)
{
case TOKEN_SEMICOLON:
case TOKEN_BLOCK_OPEN:
case TOKEN_COMMA:
retVal.setType(TOKEN_IDENTIFIER);
return retVal;
}
}
else
{
// we are first token so assume identifier
retVal.setType(TOKEN_IDENTIFIER);
return retVal;
}
// and fall through
case TOKEN_KEYWORD_DELETE:
ASToken nextToken = LT(1);
if (nextToken != null)
{
int nextTokenType = nextToken.getType();
switch (nextTokenType)
{
// if followed by a token assume it is the
// keyword and not the identiferName;
case TOKEN_IDENTIFIER:
return retVal;
// followed by a comma or semicolon
// probably being used in an expression
case TOKEN_COMMA:
case TOKEN_SEMICOLON:
retVal.setType(TOKEN_IDENTIFIER);
return retVal;
}
}
// and fall through
case TOKEN_KEYWORD_BREAK:
case TOKEN_KEYWORD_CASE:
case TOKEN_KEYWORD_CATCH:
case TOKEN_KEYWORD_CLASS:
case TOKEN_KEYWORD_CONST:
case TOKEN_KEYWORD_CONTINUE:
case TOKEN_KEYWORD_DO:
case TOKEN_KEYWORD_ELSE:
case TOKEN_KEYWORD_FALSE:
case TOKEN_KEYWORD_FINALLY:
case TOKEN_KEYWORD_IF:
case TOKEN_KEYWORD_IMPORT:
case TOKEN_KEYWORD_INTERFACE:
case TOKEN_KEYWORD_NULL:
case TOKEN_KEYWORD_PACKAGE:
case TOKEN_KEYWORD_SUPER:
case TOKEN_KEYWORD_SWITCH:
case TOKEN_KEYWORD_THIS:
case TOKEN_KEYWORD_TRUE:
case TOKEN_KEYWORD_TRY:
case TOKEN_KEYWORD_TYPEOF:
case TOKEN_KEYWORD_USE:
case TOKEN_KEYWORD_VAR:
case TOKEN_KEYWORD_WHILE:
case TOKEN_KEYWORD_WITH:
case TOKEN_KEYWORD_RETURN:
case TOKEN_KEYWORD_THROW:
case TOKEN_KEYWORD_NEW:
if (lastToken != null)
{
int lastTokenType = lastToken.getType();
switch (lastTokenType)
{
case TOKEN_KEYWORD_VAR:
case TOKEN_KEYWORD_FUNCTION:
case TOKEN_RESERVED_WORD_GET:
case TOKEN_RESERVED_WORD_SET:
case TOKEN_OPERATOR_MEMBER_ACCESS:
retVal.setType(TOKEN_IDENTIFIER);
}
}
return retVal;
default:
if (ASToken.isE4X(tokenType))
return retVal;
if (retVal.isKeywordOrContextualReservedWord() || retVal.isLiteral())
return retVal;
// If we reach here, the token fails to match any processing logic.
final UnexpectedTokenProblem problem = new UnexpectedTokenProblem(
retVal,
ASTokenKind.UNKNOWN);
problems.add(problem);
}
}
catch (final Exception e)
{
if (lastException != null)
{
if (lastException.getClass().isInstance(e))
{
ICompilerProblem problem = new InternalCompilerProblem2(sourcePath, e, "StreamingASTokenizer");
problems.add(problem);
return null;
}
}
else
{
lastException = e;
retVal = null;
return next();
}
}
finally
{
consumeSemi = false;
lastToken = retVal;
}
return null;
}
/**
* Error recovery: convert the given keyword token into an identifier token,
* and log a syntax error.
*
* @param token Keyword token.
*/
private void treatKeywordAsIdentifier(final ASToken token)
{
assert token != null : "token can't be null";
assert token.isKeywordOrContextualReservedWord() : "only transfer reserved words";
if (token.isKeyword())
{
final UnexpectedTokenProblem problem = new UnexpectedTokenProblem(token, ASTokenKind.IDENTIFIER);
problems.add(problem);
}
token.setType(TOKEN_IDENTIFIER);
}
/**
* Decide within the current context whether the following content can be
* parsed as a metadata tag token.
*
* @param nextToken The next token coming from
* {@link #nextTokenFromReader()}.
* @return If the following content can be a metadata tag, the result is a
* token of type {@link ASTokenTypes#TOKEN_ATTRIBUTE}. Otherwise, the
* argument {@code nextToken} is returned.
* @throws Exception Parsing error.
*/
private ASToken tryParseMetadata(ASToken nextToken) throws Exception
{
// Do not initialize this variable so that Java flow-analysis can check if
// the following rules cover all the possibilities.
final boolean isNextMetadata;
if (!config.findMetadata)
{
// The lexer is configured to not recognize metadata.
isNextMetadata = false;
}
else if (lastToken == null)
{
// An "[" at the beginning of a script is always a part of a metadata.
isNextMetadata = true;
}
else
{
switch (lastToken.getType())
{
case TOKEN_ASDOC_COMMENT:
case TOKEN_SEMICOLON:
case TOKEN_ATTRIBUTE:
case TOKEN_BLOCK_OPEN:
// "[" after these tokens are always part of a metadata token.
isNextMetadata = true;
break;
case TOKEN_SQUARE_CLOSE:
case TOKEN_IDENTIFIER:
// "[" following a "]" is an array access.
// "[" following an identifier is an array access.
isNextMetadata = false;
break;
case TOKEN_KEYWORD_INCLUDE:
case TOKEN_BLOCK_CLOSE:
case TOKEN_OPERATOR_STAR:
// "[" after these tokens are part of a metadata token, if
// the "[" is on a new line.
isNextMetadata = !lastToken.matchesLine(nextToken);
break;
default:
// If we are lexing an entire file
// then at this point we "know" that the next token
// is not meta-data.
if (config.completeContent)
{
isNextMetadata = false;
}
else
{
// In "fragment" mode which is used by the syntax coloring code
// in builder, we assume the following list of tokens can not
// precede meta-data because they all start or occur in expressions.
switch (lastToken.getType())
{
case TOKEN_OPERATOR_EQUAL:
case TOKEN_OPERATOR_TERNARY:
case TOKEN_COLON:
case TOKEN_OPERATOR_PLUS:
case TOKEN_OPERATOR_MINUS:
case TOKEN_OPERATOR_STAR:
case TOKEN_OPERATOR_DIVISION:
case TOKEN_OPERATOR_MODULO:
case TOKEN_OPERATOR_BITWISE_AND:
case TOKEN_OPERATOR_BITWISE_OR:
case TOKEN_KEYWORD_AS:
case TOKEN_OPERATOR_BITWISE_XOR:
case TOKEN_OPERATOR_LOGICAL_AND:
case TOKEN_OPERATOR_LOGICAL_OR:
case TOKEN_PAREN_OPEN:
case TOKEN_COMMA:
case TOKEN_OPERATOR_BITWISE_NOT:
case TOKEN_OPERATOR_LOGICAL_NOT:
case TOKEN_OPERATOR_ASSIGNMENT:
case TOKEN_OPERATOR_BITWISE_LEFT_SHIFT:
case TOKEN_OPERATOR_BITWISE_RIGHT_SHIFT:
case TOKEN_OPERATOR_BITWISE_UNSIGNED_RIGHT_SHIFT:
case TOKEN_OPERATOR_LESS_THAN:
case TOKEN_OPERATOR_GREATER_THAN:
case TOKEN_OPERATOR_LESS_THAN_EQUALS:
case TOKEN_OPERATOR_GREATER_THAN_EQUALS:
case TOKEN_OPERATOR_NOT_EQUAL:
case TOKEN_OPERATOR_STRICT_EQUAL:
case TOKEN_OPERATOR_STRICT_NOT_EQUAL:
case TOKEN_OPERATOR_PLUS_ASSIGNMENT:
case TOKEN_OPERATOR_MINUS_ASSIGNMENT:
case TOKEN_OPERATOR_MULTIPLICATION_ASSIGNMENT:
case TOKEN_OPERATOR_DIVISION_ASSIGNMENT:
case TOKEN_OPERATOR_MODULO_ASSIGNMENT:
case TOKEN_OPERATOR_BITWISE_AND_ASSIGNMENT:
case TOKEN_OPERATOR_BITWISE_OR_ASSIGNMENT:
case TOKEN_OPERATOR_BITWISE_XOR_ASSIGNMENT:
case TOKEN_OPERATOR_BITWISE_LEFT_SHIFT_ASSIGNMENT:
case TOKEN_OPERATOR_BITWISE_RIGHT_SHIFT_ASSIGNMENT:
case TOKEN_OPERATOR_BITWISE_UNSIGNED_RIGHT_SHIFT_ASSIGNMENT:
isNextMetadata = false;
break;
default:
isNextMetadata = true;
break;
}
}
break;
}
}
final ASToken result;
if (isNextMetadata)
result = consumeMetadata(nextToken);
else
result = nextToken;
return result;
}
/**
* Close the forked include file tokenizer, and set it to null.
*/
private void closeIncludeTokenizer()
{
if (forkIncludeTokenizer == null)
return;
try
{
problems.addAll(forkIncludeTokenizer.problems);
forkIncludeTokenizer.close();
}
catch (IOException e)
{
throw new RuntimeException(e);
}
includeHandler.leaveFile(forkIncludeTokenizer.getEndOffset());
forkIncludeTokenizer = null;
}
/**
* @throws Exception
*/
private final ASToken consumeMetadata(final ASToken startToken) throws Exception
{
final ASToken originalToken = new ASToken(startToken);
MetaDataPayloadToken payload = new MetaDataPayloadToken(originalToken);
final ArrayList<ASToken> safetyNet = new ArrayList<ASToken>(5);
boolean isMetadata = true;
while (true)
{
tokenizer.setReuseLastToken();
final ASToken next = LT(1);
if (next == null)
{
break;
}
safetyNet.add(new ASToken(next)); //sadly, we have to deal with the extra object creation if we're wrong
payload.addToken(next); //here too
if (!next.canExistInMetadata())
{
isMetadata = false;
//consume the last token we saw so that we don't get ourselves into an infinite loop
//it was the last token of the metadata, and this makes "next" the current token.
consume(1);
break;
}
consume(1);
if (next.getType() == TOKEN_SQUARE_CLOSE)
{
break;
}
}
if (!isMetadata)
{ //we're wrong, so let's add back the tokens to our lookahead buffer
lookAheadBuffer.addAll(safetyNet);
bufferSize = lookAheadBuffer.size();
return originalToken;
}
return payload;
}
private final void fill(final int distance) throws Exception
{
int pos = 0;
while (pos < distance)
{
addTokenToBuffer(nextTokenFromReader());
pos++;
}
}
/**
* @param nextToken
*/
private final void addTokenToBuffer(final ASToken nextToken)
{
bufferSize++;
lookAheadBuffer.add(nextToken);
// at EOF, nextToken can be null.
if (nextToken != null)
nextToken.lock();
}
/**
* Get the pooled version of a given string.
*
* @param text String literal.
* @return Pooled string.
*/
private final String poolString(final String text)
{
String pooledString = stringPool.get(text);
if (pooledString == null)
{
stringPool.put(text, text);
pooledString = text;
}
return pooledString;
}
/**
* Get the next token from the source input. If this tokenizer is created
* for a source file by {@link ASC}, and there are files included by
* {@code -in} option, the tokenizer will return the
* "injected include tokens" before real tokens coming from the JFlex
* generated tokenizer.
*
* @return next token from the source input
* @throws IOException error
* @see ASCompilationUnit#createMainCompilationUnitForASC()
*/
private final ASToken nextTokenFromReader() throws IOException
{
final ASToken nextToken;
if (ascIncludeImaginaryTokens != null && ascIncludeImaginaryTokens.hasNext())
nextToken = ascIncludeImaginaryTokens.next();
else if (tokenizer.hasBufferToken())
nextToken = tokenizer.getBufferToken();
else
nextToken = tokenizer.nextToken();
if (nextToken != null)
{
// Converting unicode on-the-fly in the lexer is much slower than
// converting it here after the token is made, especially for
// identifiers.
switch (nextToken.getType())
{
case TOKEN_LITERAL_NUMBER:
nextToken.setText(poolString(nextToken.getText()));
break;
case TOKEN_LITERAL_REGEXP:
// Any "backslash-u" entities left after "convertUnicode"
// are invalid unicode escape sequences. According to AS3
// behavior, the backslash character is dropped.
nextToken.setText(poolString(convertUnicode(nextToken.getText()).replaceAll("\\\\u", "u")));
break;
case TOKEN_IDENTIFIER:
// Intern 'identifiers' and 'keywords'.
// 'keywords' were 'identifiers' before they are analyzed.
final String originalIdentifierName = nextToken.getText();
final String normalizedIdentifierName = poolString(convertUnicode(originalIdentifierName));
nextToken.setText(normalizedIdentifierName);
if (!config.ignoreKeywords)
{
/**
* If the identifier has escaped unicode sequence, it
* can't be a keyword.
* <p>
* According to ASL syntax spec chapter 3.4:
* <blockquote> Unicode escape sequences may be used to
* spell the names of identifiers that would otherwise
* be keywords. This is in contrast to ECMAScript.
* </blockquote>
*/
if (originalIdentifierName.equals(normalizedIdentifierName))
{
// do keyword analysis here
final Integer info = keywordToTokenMap.get(nextToken.getText());
if (info != null)
nextToken.setType(info);
}
}
break;
default:
// Ignore other tokens.
break;
}
//so we want to adjust all of our offsets here, BUT
//the column is really only valid for the first line, which is line 0.
//if we're not the first line, don't bother
nextToken.adjustLocation(
offsetAdjustment,
lineAdjustment,
nextToken.getLine() == 0 ? columnAdjustment : 0);
nextToken.storeLocalOffset();
if (includeHandler != null)
{
nextToken.setSourcePath(includeHandler.getIncludeStackTop());
includeHandler.onNextToken(nextToken);
}
if (nextToken.getSourcePath() == null)
nextToken.setSourcePath(sourcePath);
if (reader instanceof SourceFragmentsReader)
((SourceFragmentsReader)reader).adjustLocation(nextToken);
}
return nextToken;
}
/**
* Consume tokens in the buffer
*
* @param distance the number of tokens to consume
*/
private final void consume(int distance)
{
if (bufferSize >= distance)
{
for (; distance > 0; distance--)
{
lookAheadBuffer.remove(bufferSize - 1);
bufferSize--;
}
}
}
/**
* Returns the next token that will be produced by the underlying lexer
*
* @param distance distance to look ahead
* @return an {@link ASToken}
* @throws Exception
*/
private final ASToken LT(final int distance) throws Exception
{
if (bufferSize < distance)
{
fill(distance - bufferSize);
}
return lookAheadBuffer.get(distance - 1);
}
private static final boolean matches(final ASToken token, final int type)
{
return token != null && token.getType() == type;
}
/**
* Retrieve the end offset of the file.
* <p>
* The result is the end offset of the file, not the offset of the last
* token, this allows any trailing space to be included so that the parser
* can span the result {@code FileNode} to the entire file.
*
* @return the end offset of the input file
*/
public final int getEndOffset()
{
return tokenizer.getOffset() + offsetAdjustment;
}
/**
* Computers whether the following token is a user-defined namespace. This
* method calls processUserDefinedNamespace which will change token types
*
* @param token token to start our analysis
* @param lookaheadOffset offset of the tokens to look at
* @return true if we're a user-defined namespace
* @throws Exception
*/
private final boolean isUserDefinedNamespace(final ASToken token, final int lookaheadOffset) throws Exception
{
processUserDefinedNamespace(token, lookaheadOffset);
return token.getType() == TOKEN_NAMESPACE_ANNOTATION || token.getType() == TOKEN_NAMESPACE_NAME;
}
/**
* Because AS3 supports qualified/unqualified namespaces as decorators on
* definitions, we need to detect them before we even make it to the parser.
* These look exactly like names/qnames, and so if they're on the same line
* as a definition they might be a namespace name instead of a standard
* identifier. This method will detect these cases, and change token types
* accordingly
*
* @param token token token to start our analysis
* @param lookaheadOffset offset of the tokens to look at
* @throws Exception
*/
private final void processUserDefinedNamespace(final ASToken token, final int lookaheadOffset) throws Exception
{
token.lock();
//determine if we have a user-defined namespace
//our first token will be an identifier, and the cases we're looking for are:
//1.) user_namespace (function|var|dynamic|static|final|native|override)
//2.) my.pack.user_namespace (function|var|dynamic|static|final|native|override)
//option number 1 is probably the 99% case so optimize for it
ASToken nextToken = LT(1 + lookaheadOffset);
if (token.matchesLine(nextToken))
{
// If the next token is an identifier check to see if it should
// be modified to a TOKEN_NAMESPACE_ANNOTATION
// This is so that code like:
// ns1 ns2 var x;
// gets parsed correctly (2 namespace annotations, which is an error)
if (nextToken.getType() == TOKEN_IDENTIFIER)
processUserDefinedNamespace(nextToken, 1 + lookaheadOffset);
switch (nextToken.getType())
{
case TOKEN_KEYWORD_FUNCTION:
case TOKEN_KEYWORD_VAR:
case TOKEN_KEYWORD_CONST:
case TOKEN_RESERVED_WORD_NAMESPACE:
case TOKEN_MODIFIER_DYNAMIC:
case TOKEN_MODIFIER_FINAL:
case TOKEN_MODIFIER_NATIVE:
case TOKEN_MODIFIER_OVERRIDE:
case TOKEN_MODIFIER_STATIC:
case TOKEN_MODIFIER_VIRTUAL:
case TOKEN_MODIFIER_ABSTRACT:
case TOKEN_KEYWORD_CLASS:
case TOKEN_KEYWORD_INTERFACE:
case TOKEN_NAMESPACE_ANNOTATION:
case HIDDEN_TOKEN_BUILTIN_NS:
token.setType(TOKEN_NAMESPACE_ANNOTATION);
return;
case TOKEN_OPERATOR_NS_QUALIFIER: //simple name with a :: binding after it. has to be a NS
token.setType(TOKEN_NAMESPACE_NAME);
return;
}
if (nextToken.getType() == TOKEN_OPERATOR_MEMBER_ACCESS)
{
int nextValidPart = TOKEN_IDENTIFIER;
final ArrayList<ASToken> toTransform = new ArrayList<ASToken>(3);
toTransform.add(token);
toTransform.add(nextToken);
int laDistance = lookaheadOffset + 1;
while (true)
{
nextToken = LT(++laDistance);
if (token.matchesLine(nextToken))
{
if (nextToken.getType() == nextValidPart)
{
nextValidPart = (nextToken.getType() == TOKEN_IDENTIFIER) ? TOKEN_OPERATOR_MEMBER_ACCESS : TOKEN_IDENTIFIER;
toTransform.add(nextToken);
}
else if (nextValidPart != TOKEN_IDENTIFIER && nextToken.canFollowUserNamespace())
{
// Next token is in the follow set of a namespace,
// so all the buffered tokens need to be converted
// into namespace tokens.
for (final ASToken ttToken : toTransform)
{
if (ttToken.getType() == TOKEN_IDENTIFIER)
ttToken.setType(TOKEN_NAMESPACE_ANNOTATION);
else
ttToken.setType(TOKEN_OPERATOR_MEMBER_ACCESS);
}
break;
}
else
{
break;
}
}
else
{
break;
}
}
}
}
}
/**
* Combines the text of two tokens, adding whitespace between them and
* adjusting offsets appropriately
*
* @param target the base token that we will add the next to
* @param source the source of the text to add
*/
private final void combineText(TokenBase target, TokenBase source)
{
StringBuilder text = new StringBuilder();
text.append(target.getText());
//add whitespace for gaps between tokens
for (int i = 0; i < (source.getStart() - target.getEnd()); i++)
{
text.append(" ");
}
text.append(source.getText());
target.setText(poolString(text.toString()));
target.setEnd(target.getStart() + text.length());
}
/**
* Unicode pattern for {@code \u0000}.
*/
private static final Pattern UNICODE_PATTERN = Pattern.compile(BaseRawASTokenizer.PATTERN_U4);
/**
* Leading characters of a unicode pattern.
*/
private static final String UNICODE_LEADING_CHARS = "\\u";
/**
* Convert escaped unicode sequence in a string. For example:
* {@code foo\u0051bar} is converted into {@code fooQbar}.
*
* @param text input string
* @return converted text
*/
static String convertUnicode(final String text)
{
// Calling Pattern.matcher() is much slower than String.contains(), so
// we need this predicate to skip unnecessary RegEx computation.
if (text.contains(UNICODE_LEADING_CHARS))
{
final StringBuilder result = new StringBuilder();
final Matcher matcher = UNICODE_PATTERN.matcher(text);
int start = 0;
while (matcher.find())
{
result.append(text, start, matcher.start());
result.append(Character.toChars(BaseRawASTokenizer.decodeEscapedUnicode(matcher.group())));
start = matcher.end();
}
result.append(text, start, text.length());
return result.toString();
}
else
{
return text;
}
}
/**
* Gets the source path to the file being tokenized.
*/
public String getSourcePath()
{
return sourcePath;
}
}