compiler/src/main/java/org/apache/royale/compiler/internal/parsing/as/StreamingASTokenizer.java - royale-compiler - Git at Google

 /*
  *
  *  Licensed to the Apache Software Foundation (ASF) under one or more
  *  contributor license agreements.  See the NOTICE file distributed with
  *  this work for additional information regarding copyright ownership.
  *  The ASF licenses this file to You under the Apache License, Version 2.0
  *  (the "License"); you may not use this file except in compliance with
  *  the License.  You may obtain a copy of the License at
  *
  *      http://www.apache.org/licenses/LICENSE-2.0
  *
  *  Unless required by applicable law or agreed to in writing, software
  *  distributed under the License is distributed on an "AS IS" BASIS,
  *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  *  See the License for the specific language governing permissions and
  *  limitations under the License.
  *
  */

 package org.apache.royale.compiler.internal.parsing.as;

 import java.io.Closeable;
 import java.io.FileNotFoundException;
 import java.io.IOException;
 import java.io.Reader;
 import java.util.ArrayList;
 import java.util.HashMap;
 import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
 import java.util.Stack;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;

 import org.apache.royale.compiler.clients.ASC;
 import org.apache.royale.compiler.constants.IASKeywordConstants;
 import org.apache.royale.compiler.filespecs.IFileSpecification;
 import org.apache.royale.compiler.internal.parsing.ITokenStreamFilter;
 import org.apache.royale.compiler.internal.parsing.SourceFragmentsReader;
 import org.apache.royale.compiler.internal.parsing.TokenBase;
 import org.apache.royale.compiler.internal.units.ASCompilationUnit;
 import org.apache.royale.compiler.parsing.IASToken;
 import org.apache.royale.compiler.parsing.IASTokenizer;
 import org.apache.royale.compiler.parsing.IASToken.ASTokenKind;
 import org.apache.royale.compiler.problems.CyclicalIncludesProblem;
 import org.apache.royale.compiler.problems.ExpectXmlBeforeNamespaceProblem;
 import org.apache.royale.compiler.problems.FileNotFoundProblem;
 import org.apache.royale.compiler.problems.ICompilerProblem;
 import org.apache.royale.compiler.problems.InternalCompilerProblem2;
 import org.apache.royale.compiler.problems.UnexpectedTokenProblem;
 import org.apache.royale.utils.ILengthAwareReader;
 import org.apache.royale.utils.NonLockingStringReader;
 import org.apache.royale.utils.ILengthAwareReader.InputType;
 import com.google.common.collect.ImmutableList;
 import com.google.common.collect.ImmutableMap;

 /**
  * This Tokenizer provides tokens to be used by various clients, most notably
  * the ASParser. Given the nature of ambiguities in the ActionScript 3 language,
  * this tokenizer also serves to disambiguate tokens based on a combination of
  * look behind and lookahead. For all cases of ambiguity, only one token is
  * needed for look behind, and in our worst case, n tokens forwards where n is
  * the number of tokens that can be produced. Some other state is kept in order
  * to know which type of container we may exist in (function, class, interface,
  * etc). We buffer LA token results to avoid unneeded lookahead
  */
 public class StreamingASTokenizer implements ASTokenTypes, IASTokenizer, Closeable
 {
     private static final String FOR_EACH = "for each";
     private static final String XML = "xml";
     private static final String DEFAULT_XML_NAMESPACE = "default xml namespace";
     private static final String ZERO = "0";

     /**
      * Map from keyword text to token type.
      * <p>
      * We use a HashMap here to avoid slowing down the performance of the
      * underlying lexer. We are avoid the "longest match" problem, requiring a
      * lot of rescanning on the lexer level to determine keywords from
      * identifiers. And since hash map lookup is constant, this is (in theory)
      * faster than doing this in the scanner since we're not bound by i/o or
      * state machine back-tracing.
      */
     private static final Map<String, Integer> keywordToTokenMap = new ImmutableMap.Builder<String, Integer>()
             .put(IASKeywordConstants.AS, TOKEN_KEYWORD_AS)
             .put(IASKeywordConstants.IS, TOKEN_KEYWORD_IS)
             .put(IASKeywordConstants.INSTANCEOF, TOKEN_KEYWORD_INSTANCEOF)
             .put(IASKeywordConstants.IN, TOKEN_KEYWORD_IN)
             .put(IASKeywordConstants.DELETE, TOKEN_KEYWORD_DELETE)
             .put(IASKeywordConstants.TYPEOF, TOKEN_KEYWORD_TYPEOF)
             .put(IASKeywordConstants.CONST, TOKEN_KEYWORD_CONST)
             .put(IASKeywordConstants.GET, TOKEN_RESERVED_WORD_GET)
             .put(IASKeywordConstants.IMPLEMENTS, TOKEN_RESERVED_WORD_IMPLEMENTS)
             .put(IASKeywordConstants.IMPORT, TOKEN_KEYWORD_IMPORT)
             .put(IASKeywordConstants.USE, TOKEN_KEYWORD_USE)
             .put(IASKeywordConstants.EXTENDS, TOKEN_RESERVED_WORD_EXTENDS)
             .put(IASKeywordConstants.NEW, TOKEN_KEYWORD_NEW)
             .put(IASKeywordConstants.DYNAMIC, TOKEN_MODIFIER_DYNAMIC)
             .put(IASKeywordConstants.FINAL, TOKEN_MODIFIER_FINAL)
             .put(IASKeywordConstants.NATIVE, TOKEN_MODIFIER_NATIVE)
             .put(IASKeywordConstants.OVERRIDE, TOKEN_MODIFIER_OVERRIDE)
             .put(IASKeywordConstants.STATIC, TOKEN_MODIFIER_STATIC)
             .put(IASKeywordConstants.VIRTUAL, TOKEN_MODIFIER_VIRTUAL)
             .put(IASKeywordConstants.ABSTRACT, TOKEN_MODIFIER_ABSTRACT)
             .put(IASKeywordConstants.SET, TOKEN_RESERVED_WORD_SET)
             // Keywords with special token types that affect subsequent blocks
             .put(IASKeywordConstants.CATCH, TOKEN_KEYWORD_CATCH)
             .put(IASKeywordConstants.CLASS, TOKEN_KEYWORD_CLASS)
             .put(IASKeywordConstants.FUNCTION, TOKEN_KEYWORD_FUNCTION)
             .put(IASKeywordConstants.INTERFACE, TOKEN_KEYWORD_INTERFACE)
             .put(IASKeywordConstants.PACKAGE, TOKEN_KEYWORD_PACKAGE)
             // #120009: allow "var" inside parameter list, even though it's not
             // valid AS (don't turn the subsequent function block open into a block open
             .put(IASKeywordConstants.VAR, TOKEN_KEYWORD_VAR)
             .put(IASKeywordConstants.FALSE, TOKEN_KEYWORD_FALSE)
             .put(IASKeywordConstants.NULL, TOKEN_KEYWORD_NULL)
             .put(IASKeywordConstants.TRUE, TOKEN_KEYWORD_TRUE)
             .put(IASKeywordConstants.PUBLIC, HIDDEN_TOKEN_BUILTIN_NS)
             .put(IASKeywordConstants.PRIVATE, HIDDEN_TOKEN_BUILTIN_NS)
             .put(IASKeywordConstants.PROTECTED, HIDDEN_TOKEN_BUILTIN_NS)
             .put(IASKeywordConstants.INTERNAL, HIDDEN_TOKEN_BUILTIN_NS)
             .put(IASKeywordConstants.INCLUDE, TOKEN_KEYWORD_INCLUDE)
             // Keywords for statements that affect subsequent blocks
             .put(IASKeywordConstants.DO, TOKEN_KEYWORD_DO)
             .put(IASKeywordConstants.WHILE, TOKEN_KEYWORD_WHILE)
             .put(IASKeywordConstants.BREAK, TOKEN_KEYWORD_BREAK)
             .put(IASKeywordConstants.CONTINUE, TOKEN_KEYWORD_CONTINUE)
             .put(IASKeywordConstants.GOTO, TOKEN_RESERVED_WORD_GOTO)
             .put(IASKeywordConstants.FOR, TOKEN_KEYWORD_FOR)
             .put(StreamingASTokenizer.FOR_EACH, TOKEN_KEYWORD_FOR)
             .put(IASKeywordConstants.EACH, TOKEN_RESERVED_WORD_EACH)
             .put(IASKeywordConstants.WITH, TOKEN_KEYWORD_WITH)
             .put(IASKeywordConstants.ELSE, TOKEN_KEYWORD_ELSE)
             .put(IASKeywordConstants.IF, TOKEN_KEYWORD_IF)
             .put(IASKeywordConstants.SWITCH, TOKEN_KEYWORD_SWITCH)
             .put(IASKeywordConstants.CASE, TOKEN_KEYWORD_CASE)
             .put(IASKeywordConstants.DEFAULT, TOKEN_KEYWORD_DEFAULT)
             .put(IASKeywordConstants.TRY, TOKEN_KEYWORD_TRY)
             .put(IASKeywordConstants.FINALLY, TOKEN_KEYWORD_FINALLY)
             // Keywords with a generic keyword token type that have no effect
             // on subsequent blocks.
             .put(IASKeywordConstants.NAMESPACE, TOKEN_RESERVED_WORD_NAMESPACE)
             .put(IASKeywordConstants.CONFIG, TOKEN_RESERVED_WORD_CONFIG)
             .put(IASKeywordConstants.THROW, TOKEN_KEYWORD_THROW)
             .put(IASKeywordConstants.SUPER, TOKEN_KEYWORD_SUPER)
             .put(IASKeywordConstants.THIS, TOKEN_KEYWORD_THIS)
             .put(IASKeywordConstants.VOID, TOKEN_KEYWORD_VOID)
             .put(IASKeywordConstants.RETURN, TOKEN_KEYWORD_RETURN)
             .build();

     /**
      * Configuration for out tokenizer
      */
     private static final class TokenizerConfig
     {
         /**
          * Flag that lets us ignore keywords for more general string parsing
          */
         public boolean ignoreKeywords = false;

         /**
          * Flag that lets us be aware of metadata
          */
         public boolean findMetadata = true;

         /**
          * Flag indicating that we are tokenizing full content/files, and not
          * segments
          */
         public boolean completeContent = true;

         /**
          * IFilter for old APIs
          */
         public ITokenStreamFilter filter;

         /**
          * Flag indicating we should collect comments
          */
         public boolean collectComments = false;

         /**
          * Flag indicating we follow include statements, including their tokens
          */
         public boolean followIncludes = true;
     }

     private Reader reader;

     //underlying lexer
     private RawASTokenizer tokenizer;

     //last exception to prevent us from looping forever
     private Exception lastException = null;

     //LA buffer
     private final List<ASToken> lookAheadBuffer;
     private int bufferSize = 0; //maintain size ourselves since it's faster

     //last token we encountered, used for lookback
     private ASToken lastToken;

     private int offsetAdjustment; //for offset adjustment
     private int lineAdjustment = 0;
     private int columnAdjustment = 0;

     private IncludeHandler includeHandler;

     /**
      * The forked tokenizer for included files. If not null, {@link #next()}
      * will return a token from this tokenizer.
      * <p>
      * After all the tokens are returned from the included source file,
      * {@link #closeIncludeTokenizer()} closes the tokenizer and set this field
      * to null.
      */
     private StreamingASTokenizer forkIncludeTokenizer;

     /**
      * Flag to indicate if we have followed include statements
      */
     private boolean hasEncounteredIncludeStatements = false;

     private TokenizerConfig config;

     /**
      * Source file handler. This is used by resolving included file path.
      * {@link #StreamingASTokenizer(IFileSpecification)} and
      * {@link #StreamingASTokenizer(IFileSpecification, Stack)} sets the value.
      */
     private String sourcePath;

     /**
      * Lexer problems.
      * */
     private final List<ICompilerProblem> problems = new ArrayList<ICompilerProblem>();

     /**
      * Imaginary tokens generated for {@code asc -in} option.
      */
     private Iterator<ASToken> ascIncludeImaginaryTokens;

     /**
      * You should probably not use this constructor. There is some legacy code
      * that uses this constructor, but that code should be updated to use one of
      * the static create methods below.
      * <p>
      * TODO: make this private.
      */
     public StreamingASTokenizer(final Reader reader)
     {
         this();
         setReader(reader);
     }

     /**
      * A pool to reduce duplicated string literals created
      */
     private final HashMap<String, String> stringPool;

     /**
      * You should probably not use this constructor. There is a lot of code that
      * uses this constructor, but that code should be updated to use one of the
      * static create methods below.
      * <p>
      * TODO: make this private.
      */
     public StreamingASTokenizer()
     {
         tokenizer = new RawASTokenizer();
         config = new TokenizerConfig();
         lookAheadBuffer = new ArrayList<ASToken>(5);
         includeHandler = IncludeHandler.creatDefaultIncludeHandler();
         stringPool = new HashMap<String, String>();

         // Initialize string pool with keyword strings. The keyword strings
         // are declared as constants which are automatically "interned".
         for (final String keyword : keywordToTokenMap.keySet())
         {
             stringPool.put(keyword, keyword);
         }
     }

     /**
      * Creates a tokenizer suitable for the mxml indexing code. fragments the
      * new tokenizer will tokenize.
      *
      * @return A new tokenizer suitable for tokenizing script fragments in an
      * mxml document that is being tokenized for the full text search index.
      */
     public static StreamingASTokenizer createForMXMLIndexing(String fileName)
     {
         StreamingASTokenizer result = new StreamingASTokenizer();
         result.setPath(fileName);
         result.includeHandler.enterFile(result.sourcePath);
         return result;
     }

     /**
      * Fork a new tokenizer when an "include" directive is found. This method
      * will pass the {@code StructureTracker} of the current tokenizer down to
      * the forked tokenizer.
      *
      * @param currentTokenizer Current tokenizer.
      * @param fileSpec File specification of the included file.
      * @param includeHandler Include handler.
      * @return A tokenizer for the included file.
      * @throws FileNotFoundException Error.
      */
     private static StreamingASTokenizer createForIncludeFile(
             final StreamingASTokenizer currentTokenizer,
             final IFileSpecification fileSpec,
             final IncludeHandler includeHandler)
             throws FileNotFoundException
     {
         final StreamingASTokenizer tokenizer = create(fileSpec, includeHandler);
         return tokenizer;
     }

     /**
      * Create a tokenizer from a source file. This is the lexer entry-point used
      * by {@link ASCompilationUnit}.
      *
      * @param fileSpec File specification provides the reader and the file path.
      * @param includeHandler Include handler.
      * @throws FileNotFoundException error
      */
     protected static StreamingASTokenizer create(
             final IFileSpecification fileSpec,
             final IncludeHandler includeHandler)
             throws FileNotFoundException
     {
         assert fileSpec != null : "File specification can't be null.";
         assert includeHandler != null : "Include handler can't be null.";

         final StreamingASTokenizer tokenizer = new StreamingASTokenizer();
         tokenizer.setReader(fileSpec.createReader());
         tokenizer.setPath(fileSpec.getPath());
         tokenizer.includeHandler = includeHandler;
         tokenizer.includeHandler.enterFile(tokenizer.sourcePath);
         return tokenizer;
     }

     /**
      * Create a tokenizer for {@code ASParser#parseFile()}.
      *
      * @param fileSpec File specification provides the reader and the file path.
      * @param includeHandler Include handler.
      * @param followIncludes True if included files are also parsed.
      * @param includedFiles A list of included file paths.
      * @return Lexer.
      * @throws FileNotFoundException error
      */
     protected static StreamingASTokenizer createForASParser(
             final IFileSpecification fileSpec,
             final IncludeHandler includeHandler,
             final boolean followIncludes,
             final List<String> includedFiles)
             throws FileNotFoundException
     {
         final StreamingASTokenizer tokenizer = create(fileSpec, includeHandler);
         tokenizer.setFollowIncludes(followIncludes);

         final ImmutableList.Builder<ASToken> imaginaryTokensBuilder =
                 new ImmutableList.Builder<ASToken>();
         for (final String filename : includedFiles)
         {
             imaginaryTokensBuilder.add(new ASToken(
                     ASTokenTypes.TOKEN_KEYWORD_INCLUDE,
                     0,
                     0,
                     0,
                     0,
                     "include"));
             imaginaryTokensBuilder.add(new ASToken(
                     ASTokenTypes.TOKEN_LITERAL_STRING,
                     0,
                     0,
                     0,
                     0,
                     '"' + filename + '"'));
         }
         tokenizer.ascIncludeImaginaryTokens = imaginaryTokensBuilder.build().iterator();
         return tokenizer;
     }

     /**
      * This creator doesn't "enter file" on creation.
      */
     protected static StreamingASTokenizer createForInlineScriptScopeBuilding(
             final Reader reader,
             final String path,
             final IncludeHandler includeHandler,
             final int offsetAdjustment,
             final int lineAdjustment,
             final int columnAdjustment)
     {
         assert reader != null : "Reader can't be null";
         assert path != null : "Path can't be null";
         assert includeHandler != null : "IncludeHandler can't be null";

         final StreamingASTokenizer tokenizer = new StreamingASTokenizer();
         tokenizer.setReader(reader);
         tokenizer.setPath(path);
         tokenizer.includeHandler = includeHandler;
         tokenizer.setSourcePositionAdjustment(
                 offsetAdjustment, lineAdjustment, columnAdjustment);
         return tokenizer;
     }

     /**
      * Create a tokenizer to parse an Expression.
      */
     protected static StreamingASTokenizer createForInlineExpressionParsing(
             final Reader reader,
             final String path
             )
     {
         assert reader != null : "Reader can't be null";
         assert path != null : "Path can't be null";

         final StreamingASTokenizer tokenizer = new StreamingASTokenizer();
         tokenizer.setReader(reader);
         tokenizer.setPath(path);
         tokenizer.includeHandler.enterFile(path);

         // Have to do this to get the tokenizer to work right - some things, like function expressions,
         // won't tokenize correctly unless the last token is '=' or some other special tokens.
         tokenizer.lastToken = new ASToken(ASTokenTypes.TOKEN_OPERATOR_ASSIGNMENT, -1, -1, -1, -1, "=");

         return tokenizer;
     }

     /**
      * This method can create a {@code StreamingASTokenizer} with optional
      * "follow includes". If {@code IncludeHandler} is not null, it will follow
      * {@code include} directives.
      *
      * @param reader Input to the tokenizer.
      * @param path File path of the input.
      * @param includeHandler If not null, the created tokenizer will follow
      * {@code include} directives.
      * @return A {@code StreamingASTokenizer}.
      */
     public static StreamingASTokenizer createForRepairingASTokenizer(
             final Reader reader,
             final String path,
             final IncludeHandler includeHandler)
     {
         assert path != null || includeHandler == null : "We need a source path to follow includes";
         final StreamingASTokenizer tokenizer = new StreamingASTokenizer();
         tokenizer.setReader(reader);
         tokenizer.setPath(path);
         if (includeHandler != null)
         {
             tokenizer.includeHandler = includeHandler;
             includeHandler.enterFile(path);
         }
         return tokenizer;
     }

     /**
      * Sets the {@link Reader} that supplies the content to this tokenizer. It
      * is up to the client to close any previous readers that have been in use.
      * It is also up to the client to close the reader once it has been used
      *
      * @param reader a {@link Reader}
      */
     public void setReader(final Reader reader)
     {
         setReader(reader, 0, 0, 0);
     }

     /**
      * Sets the {@link Reader} that supplies the content to this tokenizer. It
      * is up to the client to close any previous readers that have been in use.
      * It is also up to the client to close the reader once it has been used
      *
      * @param reader a {@link Reader}
      * @param offset Offset adjustment. If the specified reader is reading from
      * a string extracted from a source file, this should be the offset of the
      * first character read from the reader in the source file.
      * @param line Line adjustment.
      * @param column Column adjustment
      */
     public void setReader(final Reader reader, int offset, int line, int column)
     {
         this.reader = reader;
         tokenizer = new RawASTokenizer();
         tokenizer.yyreset(reader);
         tokenizer.setCollectComments(config.collectComments);
         setSourcePositionAdjustment(offset, line, column);
     }

     /**
      * Sets the path to the file this tokenizer is scanning
      *
      * @param path a file path
      */
     @Override
     public void setPath(String path)
     {
         assert path != null : "path of tokenizer shouldn't be null";
         sourcePath = path;
         tokenizer.setSourcePath(path);
     }

     /**
      * Allows for the adjustment of offset, line and column information when
      * parsing subsequences of text. This should be called before tokenization
      * has started
      *
      * @param offset The offset where the fragment starts.
      * @param line The line where the fragment starts. This should be a
      * ZERO-based line number
      * @param column The column where the fragment starts. This should be a
      * ZERO-based column number
      */
     public void setSourcePositionAdjustment(int offset, int line, int column)
     {
         offsetAdjustment = offset;
         lineAdjustment = line;
         columnAdjustment = column;
     }

     /**
      * Sets whether we comments are collected: single line and multi-line.
      * Default is <code>false</code>
      *
      * @param collect true if we should collect comments
      */
     @Override
     public void setCollectComments(final boolean collect)
     {
         config.collectComments = collect;

         if (tokenizer != null)
             tokenizer.setCollectComments(collect);
     }

     /**
      * Sets whether we follow include statements, including their tokens.
      * Default is <code>true</code>
      *
      * @param followIncludes true if we should follow includes
      */
     @Override
     public void setFollowIncludes(final boolean followIncludes)
     {
         config.followIncludes = followIncludes;
     }

     /**
      * Closes the underlying reader
      */
     @Override
     public void close() throws IOException
     {
         if (tokenizer != null)
         {
             tokenizer.reset();
             tokenizer.yyclose(); //close the reader
         }
     }

     /**
      * Sets whether we ignore keywords while scanning. Default is
      * <code>false</code>
      *
      * @param ignore true if we should ignore keywords
      */
     public void setIgnoreKeywords(final boolean ignore)
     {
         config.ignoreKeywords = ignore;
     }

     /**
      * Sets whether we are scanning a full file, or a fragment. Default is
      * <code>true</code>
      *
      * @param full true if we are scanning a full file.
      */
     public void setScanningFullContent(final boolean full)
     {
         config.completeContent = full;
     }

     /**
      * Sets whether we will find metadata constructs Default is
      * <code>true</code>
      *
      * @param aware true if we will find metadata
      */
     public void setIsMetadataAware(final boolean aware)
     {
         config.findMetadata = aware;
     }

     /**
      * Sets the {@link ITokenStreamFilter} used to filter out unwanted tokens
      *
      * @param filter the token filter to alter the stream returned from the
      * tokenizer
      */
     public void setTokenFilter(ITokenStreamFilter filter)
     {
         config.filter = filter;
     }

     /**
      * Sets the include handler used by this tokenizer to get
      * {@link IFileSpecification} for included files.
      *
      * @param handler {@link IncludeHandler} this tokenizer should use.
      */
     public void setIncludeHandler(IncludeHandler handler)
     {
         includeHandler = handler;
     }

     /**
      * Indicated that we have tokenization problems. Can be called once scanning
      * has begun
      *
      * @return true if problems have been encountered
      */
     public boolean hasTokenizationProblems()
     {
         return tokenizer.hasProblems() || problems.size() > 0;
     }

     /**
      * Indicated whether this tokenizer has encountered include statements,
      * regardless of whether it is set to follow them or not
      *
      * @return true if we have encountered includes
      */
     public boolean hasEncounteredIncludeStatements()
     {
         return hasEncounteredIncludeStatements;
     }

     /**
      * Returns a collection of problems that have been encountered while
      * scanning.
      *
      * @return a list of problems, never null
      */
     public List<ICompilerProblem> getTokenizationProblems()
     {
         ArrayList<ICompilerProblem> problems = new ArrayList<ICompilerProblem>(this.problems);
         problems.addAll(tokenizer.getProblems());
         return problems;
     }

     public ASToken[] getTokens(final Reader reader, ITokenStreamFilter filter)
     {
         setReader(reader);
         List<ASToken> tokenList = initializeTokenList(reader);
         ASToken token = null;
         do
         {
             token = next();
             if (token != null && filter.accept(token))
                 tokenList.add(token.clone()); //make a copy because of object pool
         }
         while (token != null);
         return tokenList.toArray(new ASToken[0]);
     }

     @Override
     public ASToken[] getTokens(final Reader reader)
     {
         if (config.filter != null)
             return getTokens(reader, config.filter);
         setReader(reader);
         List<ASToken> tokenList = initializeTokenList(reader);
         ASToken token = null;
         do
         {
             token = next();
             if (token != null)
                 tokenList.add(token.clone()); //copy ctor because of object pool
         }
         while (token != null);
         return tokenList.toArray(new ASToken[0]);
     }

     /**
      * @param reader
      * @return
      */
     private List<ASToken> initializeTokenList(final Reader reader)
     {
         List<ASToken> tokenList;
         int listSize = 8012;
         if (reader instanceof NonLockingStringReader)
         {
             //we know the length of this string.  For string of length x, their are roughly x/5 tokens that
             //can be constructed from that string.  size the array appropriately.
             listSize = 5;
             if (((NonLockingStringReader)reader).getLength() > 0)
             {
                 listSize = Math.max((int)((NonLockingStringReader)reader).getLength() / 5, 5);
             }

         }
         else if (reader instanceof ILengthAwareReader && ((ILengthAwareReader)reader).getInputType() == InputType.FILE)
         {
             listSize = 9;
             if (((ILengthAwareReader)reader).getLength() > 0)
             {
                 listSize = Math.max((int)((ILengthAwareReader)reader).getLength() / 9, 9);

             }
         }
         tokenList = new ArrayList<ASToken>(listSize);
         return tokenList;
     }

     @Override
     public IASToken[] getTokens(final String range)
     {
         return getTokens(new NonLockingStringReader(range));
     }

     /**
      * Returns the next token that can be produced from the underlying reader
      *
      * @param filter an {@link ITokenStreamFilter} to restrict the tokens that
      * are returned
      * @return an ASToken, or null if no more tokens can be produced
      */
     public final ASToken next(final ITokenStreamFilter filter)
     {
         ASToken retVal = null;
         while (true)
         {
             retVal = next();
             if (retVal == null || filter.accept(retVal))
             {
                 break;
             }
         }
         return retVal;
     }

     /**
      * Returns the next token that can be produced from the underlying reader.
      * <p>
      * If the forked "include file tokenizer" is open (not null), return the
      * next token from it. If the forked tokenizer reaches the end of the
      * included file, close (set to null) the forked tokenizer and return token
      * from the main source file.
      *
      * @return an ASToken, or null if no more tokens can be produced
      */
     public final ASToken next()
     {
         ASToken retVal = null;
         // If the lexer for the included file is open, read from the included tokenizer.
         boolean consumeSemi = false;
         try
         {
             // Return token from the main file.
             if (forkIncludeTokenizer != null)
             {
                 retVal = forkIncludeTokenizer.next();

                 // Check if the forked tokenizer reached EOF.
                 if (retVal == null)
                 {
                     closeIncludeTokenizer();
                     // We should consume the next semicolon we find.
                     // Most include statements are terminated with a semicolon,
                     // and because we read the contents of the included file,
                     // this could cause problems with a semicolon in a place
                     // we don't want it.
                     consumeSemi = true;
                 }
                 else
                     return retVal;
             }
             if (bufferSize > 0)
             {
                 retVal = lookAheadBuffer.remove(0);
                 bufferSize--;
             }
             else
             {
                 retVal = nextTokenFromReader();

             }
             if (retVal == null)
                 return null;
             final int tokenType = retVal.getType();

             switch (tokenType)
             {
                 // if we're seeing each in this part of the loop, it's not a
                 // syntactic keyword
                 // since we do lookahead when we see "for", checking for "each"
                 case TOKEN_RESERVED_WORD_EACH:
                     treatKeywordAsIdentifier(retVal);
                     processUserDefinedNamespace(retVal, 0);
                     return retVal;
                 case TOKEN_KEYWORD_INCLUDE:
                 {
                     if (lastToken != null)
                     {
                         int lastTokenType = lastToken.getType();
                         switch (lastTokenType)
                         {
                             case TOKEN_KEYWORD_VAR:
                             case TOKEN_KEYWORD_FUNCTION:
                             case TOKEN_RESERVED_WORD_GET:
                             case TOKEN_RESERVED_WORD_SET:
                             case TOKEN_OPERATOR_MEMBER_ACCESS:
                             {
                                 retVal.setType(TOKEN_IDENTIFIER);
                                 return retVal;
                             }
                         }
                     }
                     // "followIncludes=false" is usually used for code model
                     // partitioner. They want the "include" token.
                     if (!config.followIncludes)
                         return retVal;

                     final ASToken token = LT(1);

                     // "include" at EOF is always a keyword
                     if (token == null)
                         return retVal;

                     if (!matches(token, TOKEN_LITERAL_STRING))
                     {
                         treatKeywordAsIdentifier(retVal); // it's an identifier
                         processUserDefinedNamespace(retVal, 0);
                     }
                     else
                     {
                         hasEncounteredIncludeStatements = true;
                         // Consume the file path after the include token.
                         consume(1);
                         final String filenameTokenText = token.getText();
                         final String includeString = filenameTokenText.substring(1, filenameTokenText.length() - 1);

                         if (sourcePath == null)
                             throw new NullPointerException("Source file is needed for resolving included file path.");
                         IFileSpecification includedFileSpec = null;
                         //respond to problems from our file handler
                         includedFileSpec = includeHandler.getFileSpecificationForInclude(sourcePath, includeString);
                         //
                         if (includedFileSpec == null)
                         {
                             ICompilerProblem problem = new FileNotFoundProblem(token, filenameTokenText); //the text will be the path not found
                             problems.add(problem);
                             retVal = next();
                             return retVal;
                         }
                         if (includeHandler.isCyclicInclude(includedFileSpec.getPath()))
                         {
                             ICompilerProblem problem = new CyclicalIncludesProblem(token);
                             problems.add(problem);
                             retVal = next();
                             return retVal;
                         }
                         else
                         {
                             // Fork a tokenizer for the included file
                             try
                             {
                                 forkIncludeTokenizer = createForIncludeFile(this, includedFileSpec, includeHandler);
                                 retVal = forkIncludeTokenizer.next();
                             }
                             catch (FileNotFoundException fnfe)
                             {
                                 includeHandler.handleFileNotFound(includedFileSpec);
                                 ICompilerProblem problem = new FileNotFoundProblem(token, includedFileSpec.getPath());
                                 problems.add(problem);
                                 retVal = next();
                                 return retVal;
                             }
                         }
                     }

                     // Recover from compiler problems and continue.
                     if (retVal == null)
                     {
                         // Included file is empty.
                         closeIncludeTokenizer();
                         // Fall back to main source.
                         retVal = this.next();
                     }
                     return retVal;
                 }
                 case TOKEN_RESERVED_WORD_CONFIG:
                     if (matches(LT(1), TOKEN_RESERVED_WORD_NAMESPACE))
                     { //we config namespace
                         retVal.setType(TOKEN_RESERVED_WORD_CONFIG);
                         return retVal;
                     }
                     treatKeywordAsIdentifier(retVal); //identifier
                     processUserDefinedNamespace(retVal, 0);
                     return retVal;
                 case HIDDEN_TOKEN_BUILTIN_NS:
                     if (matches(LT(1), TOKEN_OPERATOR_NS_QUALIFIER))
                     { //we have public:: and this structure is not an annotation but a name ref
                         retVal.setType(TOKEN_NAMESPACE_NAME);
                         return retVal;
                     }
                     retVal.setType(TOKEN_NAMESPACE_ANNOTATION);
                     return retVal;
                 case TOKEN_MODIFIER_DYNAMIC:
                 case TOKEN_MODIFIER_FINAL:
                 case TOKEN_MODIFIER_NATIVE:
                 case TOKEN_MODIFIER_OVERRIDE:
                 case TOKEN_MODIFIER_STATIC:
                 case TOKEN_MODIFIER_VIRTUAL:
                 case TOKEN_MODIFIER_ABSTRACT:
                 {
                     // previous token is either a modifier or a namespace, or if
                     // null, assume keyword
                     // next token is from a definition or a modifier or a namespace
                     final ASToken nextToken = LT(1);
                     if (nextToken != null)
                     {
                         switch (nextToken.getType())
                         {
                             case TOKEN_KEYWORD_CLASS:
                             case TOKEN_KEYWORD_FUNCTION:
                             case TOKEN_KEYWORD_INTERFACE:
                             case TOKEN_RESERVED_WORD_NAMESPACE:
                             case TOKEN_KEYWORD_VAR:
                             case TOKEN_KEYWORD_CONST:
                             case TOKEN_MODIFIER_DYNAMIC:
                             case TOKEN_MODIFIER_FINAL:
                             case TOKEN_MODIFIER_NATIVE:
                             case TOKEN_MODIFIER_OVERRIDE:
                             case TOKEN_MODIFIER_STATIC:
                             case TOKEN_MODIFIER_VIRTUAL:
                             case TOKEN_MODIFIER_ABSTRACT:
                             case TOKEN_NAMESPACE_ANNOTATION:
                             case TOKEN_NAMESPACE_NAME:
                             case HIDDEN_TOKEN_BUILTIN_NS:
                                 return retVal;
                             case TOKEN_IDENTIFIER:
                                 if (isUserDefinedNamespace(nextToken, 1)) // we're already looking ahead one so make sure we look ahead one further
                                     return retVal;
                             default:
                                 // Not applicable to other token types.
                                 break;
                         }
                     }
                     treatKeywordAsIdentifier(retVal);
                     processUserDefinedNamespace(retVal, 0);
                     return retVal;
                 }
                     //we combine +/- for numeric literals here
                 case TOKEN_OPERATOR_MINUS:
                 case TOKEN_OPERATOR_PLUS:
                 {
                     if (lastToken == null || !lastToken.canPreceedSignedOperator())
                     {
                         final ASToken nextToken = LT(1);
                         if (nextToken != null)
                         {
                             switch (nextToken.getType())
                             {
                                 case TOKEN_LITERAL_NUMBER:
                                 case TOKEN_LITERAL_HEX_NUMBER:
                                     retVal.setEnd(nextToken.getEnd());
                                     final StringBuilder builder = new StringBuilder(retVal.getText());
                                     builder.append(nextToken.getText());
                                     retVal.setText(poolString(builder.toString()));
                                     consume(1);
                                     retVal.setType(nextToken.getType());
                                     break;
                                 default:
                                     // ignore other tokens
                                     break;
                             }
                         }
                     }

                     return retVal;
                 }
                     //RECOGNIZE: for each
                 case TOKEN_KEYWORD_FOR:
                 {
                     final ASToken token = LT(1);
                     if (matches(token, TOKEN_RESERVED_WORD_EACH))
                     {
                         retVal.setEnd(token.getEnd());
                         retVal.setText(FOR_EACH);
                         consume(1);
                         return retVal;
                     }
                     if (lastToken != null)
                     {
                         int lastTokenType = lastToken.getType();
                         switch (lastTokenType)
                         {
                             case TOKEN_KEYWORD_VAR:
                             case TOKEN_KEYWORD_FUNCTION:
                             case TOKEN_RESERVED_WORD_GET:
                             case TOKEN_RESERVED_WORD_SET:
                             case TOKEN_OPERATOR_MEMBER_ACCESS:
                                 retVal.setType(TOKEN_IDENTIFIER);
                         }
                     }
                     return retVal;
                 }
                     //RECOGNIZE: default xml namespace
                     //default xml namespace must exist on the same line
                 case TOKEN_KEYWORD_DEFAULT:
                 {
                     final ASToken maybeNS = LT(2);
                     final boolean foundTokenNamespace = maybeNS != null &&
                                                         maybeNS.getType() == TOKEN_RESERVED_WORD_NAMESPACE;
                     final ASToken maybeXML = LT(1);
                     if (foundTokenNamespace)
                     {
                         final boolean foundTokenXML = maybeXML != null &&
                                                       maybeXML.getType() == TOKEN_IDENTIFIER &&
                                                       XML.equals(maybeXML.getText());
                         if (!foundTokenXML)
                         {
                             final ICompilerProblem problem =
                                     new ExpectXmlBeforeNamespaceProblem(maybeNS);
                             problems.add(problem);
                         }

                         //combine all of these tokens together
                         retVal.setEnd(maybeNS.getEnd());
                         retVal.setText(DEFAULT_XML_NAMESPACE);
                         retVal.setType(TOKEN_DIRECTIVE_DEFAULT_XML);
                         consume(2);
                     }
                     // if this isn't "default xml namespace" then
                     // see if it is the default case in a switch
                     // otherwise, assume it is an identiferName
                     else if (maybeXML != null &&
                             maybeXML.getType() != TOKEN_COLON)
                         retVal.setType(TOKEN_IDENTIFIER);
                     else if (lastToken != null)
                     {
                         int lastTokenType = lastToken.getType();
                         switch (lastTokenType)
                         {
                             case TOKEN_KEYWORD_VAR:
                             case TOKEN_KEYWORD_FUNCTION:
                             case TOKEN_RESERVED_WORD_GET:
                             case TOKEN_RESERVED_WORD_SET:
                             case TOKEN_OPERATOR_MEMBER_ACCESS:
                                 retVal.setType(TOKEN_IDENTIFIER);
                         }
                     }
                     return retVal;
                 }
                 case TOKEN_KEYWORD_VOID:
                 {
                     //check for void 0
                     final ASToken token = LT(1);
                     if (matches(token, TOKEN_LITERAL_NUMBER) && ZERO.equals(token.getText()))
                     {
                         retVal.setType(TOKEN_VOID_0);
                         combineText(retVal, token);
                         consume(1);
                     }
                     //check for void(0)
                     else if (matches(token, TOKEN_PAREN_OPEN))
                     {
                         final ASToken zeroT = LT(2);
                         if (matches(zeroT, TOKEN_LITERAL_NUMBER) && ZERO.equals(zeroT.getText()))
                         {
                             final ASToken closeParenT = LT(3);
                             if (matches(closeParenT, TOKEN_PAREN_CLOSE))
                             {
                                 combineText(retVal, token);
                                 combineText(retVal, zeroT);
                                 combineText(retVal, closeParenT);
                                 retVal.setType(TOKEN_VOID_0);
                                 consume(3);
                             }
                         }
                     }
                     return retVal;
                 }
                 case TOKEN_IDENTIFIER:
                 {
                     //check for user-defined namespace before we return anything
                     processUserDefinedNamespace(retVal, 0);
                     return retVal;
                 }
                     //this is for metadata processing
                 case TOKEN_SQUARE_OPEN:
                 {
                     retVal = tryParseMetadata(retVal);
                     return retVal;
                 }
                 case HIDDEN_TOKEN_STAR_ASSIGNMENT:
                 {
                     //this is to solve an ambiguous case, where we can't tell the difference between
                     //var foo:*=null and foo *= null;
                     retVal.setType(TOKEN_OPERATOR_STAR);
                     retVal.setEnd(retVal.getEnd() - 1);
                     retVal.setText("*");
                     //add the equals
                     final ASToken nextToken = tokenizer.buildToken(TOKEN_OPERATOR_ASSIGNMENT,
                                 retVal.getEnd() + 1, retVal.getEnd() + 2,
                                 retVal.getLine(), retVal.getColumn(), "=");
                     nextToken.setSourcePath(sourcePath);
                     addTokenToBuffer(nextToken);
                     return retVal;
                 }
                 case TOKEN_SEMICOLON:
                     if (consumeSemi)
                     {
                         return next();
                     }
                     return retVal;
                 case TOKEN_VOID_0:
                 case TOKEN_LITERAL_REGEXP:
                 case TOKEN_COMMA:
                 case TOKEN_COLON:
                 case TOKEN_PAREN_OPEN:
                 case TOKEN_PAREN_CLOSE:
                 case TOKEN_SQUARE_CLOSE:
                 case TOKEN_ELLIPSIS:
                 case TOKEN_OPERATOR_PLUS_ASSIGNMENT:
                 case TOKEN_OPERATOR_MINUS_ASSIGNMENT:
                 case TOKEN_OPERATOR_MULTIPLICATION_ASSIGNMENT:
                 case TOKEN_OPERATOR_DIVISION_ASSIGNMENT:
                 case TOKEN_OPERATOR_MODULO_ASSIGNMENT:
                 case TOKEN_OPERATOR_BITWISE_AND_ASSIGNMENT:
                 case TOKEN_OPERATOR_BITWISE_OR_ASSIGNMENT:
                 case TOKEN_OPERATOR_BITWISE_XOR_ASSIGNMENT:
                 case TOKEN_OPERATOR_BITWISE_LEFT_SHIFT_ASSIGNMENT:
                 case TOKEN_OPERATOR_BITWISE_RIGHT_SHIFT_ASSIGNMENT:
                 case TOKEN_OPERATOR_BITWISE_UNSIGNED_RIGHT_SHIFT_ASSIGNMENT:
                 case TOKEN_OPERATOR_STAR:
                 case TOKEN_OPERATOR_NS_QUALIFIER:
                 case TOKEN_ASDOC_COMMENT:
                 case TOKEN_OPERATOR_DIVISION:
                 case TOKEN_OPERATOR_MODULO:
                 case TOKEN_OPERATOR_BITWISE_LEFT_SHIFT:
                 case TOKEN_OPERATOR_BITWISE_RIGHT_SHIFT:
                 case TOKEN_OPERATOR_BITWISE_UNSIGNED_RIGHT_SHIFT:
                 case TOKEN_OPERATOR_LESS_THAN:
                 case TOKEN_OPERATOR_GREATER_THAN:
                 case TOKEN_OPERATOR_LESS_THAN_EQUALS:
                 case TOKEN_OPERATOR_GREATER_THAN_EQUALS:
                 case TOKEN_OPERATOR_EQUAL:
                 case TOKEN_OPERATOR_NOT_EQUAL:
                 case TOKEN_OPERATOR_STRICT_EQUAL:
                 case TOKEN_OPERATOR_STRICT_NOT_EQUAL:
                 case TOKEN_OPERATOR_BITWISE_AND:
                 case TOKEN_OPERATOR_BITWISE_XOR:
                 case TOKEN_OPERATOR_BITWISE_OR:
                 case TOKEN_OPERATOR_LOGICAL_AND:
                 case TOKEN_OPERATOR_LOGICAL_OR:
                 case TOKEN_OPERATOR_LOGICAL_AND_ASSIGNMENT:
                 case TOKEN_OPERATOR_LOGICAL_OR_ASSIGNMENT:
                 case TOKEN_TYPED_COLLECTION_OPEN:
                 case TOKEN_TYPED_COLLECTION_CLOSE:
                 case TOKEN_OPERATOR_MEMBER_ACCESS:
                 case TOKEN_RESERVED_WORD_NAMESPACE:
                 case TOKEN_RESERVED_WORD_GET:
                 case TOKEN_RESERVED_WORD_SET:
                 case TOKEN_OPERATOR_ASSIGNMENT:
                 case TOKEN_TYPED_LITERAL_CLOSE:
                 case TOKEN_TYPED_LITERAL_OPEN:
                 case TOKEN_OPERATOR_TERNARY:
                 case TOKEN_OPERATOR_DECREMENT:
                 case TOKEN_OPERATOR_INCREMENT:
                 case TOKEN_OPERATOR_ATSIGN:
                 case TOKEN_OPERATOR_BITWISE_NOT:
                 case TOKEN_OPERATOR_LOGICAL_NOT:
                 case TOKEN_E4X_BINDING_CLOSE:
                 case TOKEN_E4X_BINDING_OPEN:
                 case TOKEN_OPERATOR_DESCENDANT_ACCESS:
                 case TOKEN_NAMESPACE_ANNOTATION:
                 case TOKEN_NAMESPACE_NAME:
                 case TOKEN_BLOCK_OPEN:
                 case TOKEN_BLOCK_CLOSE:
                 case TOKEN_KEYWORD_FUNCTION:
                     return retVal;
                 case HIDDEN_TOKEN_MULTI_LINE_COMMENT:
                 case HIDDEN_TOKEN_SINGLE_LINE_COMMENT:
                     if (tokenizer.isCollectingComments())
                     {
                         return retVal;
                     }
                     assert (false);
                     return null;
                 case TOKEN_KEYWORD_INSTANCEOF:
                 case TOKEN_KEYWORD_AS:
                 case TOKEN_KEYWORD_IN:
                 case TOKEN_KEYWORD_IS:
                     if (lastToken != null)
                     {
                         int lastTokenType = lastToken.getType();
                         switch (lastTokenType)
                         {
                             case TOKEN_SEMICOLON:
                             case TOKEN_BLOCK_OPEN:
                             case TOKEN_COMMA:
                                 retVal.setType(TOKEN_IDENTIFIER);
                                 return retVal;
                         }
                     }
                     else
                     {
                         // we are first token so assume identifier
                         retVal.setType(TOKEN_IDENTIFIER);
                         return retVal;
                     }
                     // and fall through
                 case TOKEN_KEYWORD_DELETE:
                     ASToken nextToken = LT(1);
                     if (nextToken != null)
                     {
                         int nextTokenType = nextToken.getType();
                         switch (nextTokenType)
                         {
                             // if followed by a token assume it is the
                             // keyword and not the identiferName;
                             case TOKEN_IDENTIFIER:
                                 return retVal;
                             // followed by a comma or semicolon
                             // probably being used in an expression
                             case TOKEN_COMMA:
                             case TOKEN_SEMICOLON:
                                 retVal.setType(TOKEN_IDENTIFIER);
                                 return retVal;
                         }
                     }
                     // and fall through
                 case TOKEN_KEYWORD_BREAK:
                 case TOKEN_KEYWORD_CASE:
                 case TOKEN_KEYWORD_CATCH:
                 case TOKEN_KEYWORD_CLASS:
                 case TOKEN_KEYWORD_CONST:
                 case TOKEN_KEYWORD_CONTINUE:
                 case TOKEN_KEYWORD_DO:
                 case TOKEN_KEYWORD_ELSE:
                 case TOKEN_KEYWORD_FALSE:
                 case TOKEN_KEYWORD_FINALLY:
                 case TOKEN_KEYWORD_IF:
                 case TOKEN_KEYWORD_IMPORT:
                 case TOKEN_KEYWORD_INTERFACE:
                 case TOKEN_KEYWORD_NULL:
                 case TOKEN_KEYWORD_PACKAGE:
                 case TOKEN_KEYWORD_SUPER:
                 case TOKEN_KEYWORD_SWITCH:
                 case TOKEN_KEYWORD_THIS:
                 case TOKEN_KEYWORD_TRUE:
                 case TOKEN_KEYWORD_TRY:
                 case TOKEN_KEYWORD_TYPEOF:
                 case TOKEN_KEYWORD_USE:
                 case TOKEN_KEYWORD_VAR:
                 case TOKEN_KEYWORD_WHILE:
                 case TOKEN_KEYWORD_WITH:
                 case TOKEN_KEYWORD_RETURN:
                 case TOKEN_KEYWORD_THROW:
                 case TOKEN_KEYWORD_NEW:
                     if (lastToken != null)
                     {
                         int lastTokenType = lastToken.getType();
                         switch (lastTokenType)
                         {
                             case TOKEN_KEYWORD_VAR:
                             case TOKEN_KEYWORD_FUNCTION:
                             case TOKEN_RESERVED_WORD_GET:
                             case TOKEN_RESERVED_WORD_SET:
                             case TOKEN_OPERATOR_MEMBER_ACCESS:
                                 retVal.setType(TOKEN_IDENTIFIER);
                         }
                     }
                     return retVal;
                 default:
                     if (ASToken.isE4X(tokenType))
                         return retVal;

                     if (retVal.isKeywordOrContextualReservedWord() || retVal.isLiteral())
                         return retVal;

                     // If we reach here, the token fails to match any processing logic.
                     final UnexpectedTokenProblem problem = new UnexpectedTokenProblem(
                             retVal,
                             ASTokenKind.UNKNOWN);
                     problems.add(problem);
             }
         }
         catch (final Exception e)
         {
             if (lastException != null)
             {
                 if (lastException.getClass().isInstance(e))
                 {
                     ICompilerProblem problem = new InternalCompilerProblem2(sourcePath, e, "StreamingASTokenizer");
                     problems.add(problem);
                     return null;
                 }
             }
             else
             {
                 lastException = e;
                 retVal = null;
                 return next();
             }
         }
         finally
         {
             consumeSemi = false;
             lastToken = retVal;
         }
         return null;
     }

     /**
      * Error recovery: convert the given keyword token into an identifier token,
      * and log a syntax error.
      *
      * @param token Keyword token.
      */
     private void treatKeywordAsIdentifier(final ASToken token)
     {
         assert token != null : "token can't be null";
         assert token.isKeywordOrContextualReservedWord() : "only transfer reserved words";

         if (token.isKeyword())
         {
             final UnexpectedTokenProblem problem = new UnexpectedTokenProblem(token, ASTokenKind.IDENTIFIER);
             problems.add(problem);
         }
         token.setType(TOKEN_IDENTIFIER);
     }

     /**
      * Decide within the current context whether the following content can be
      * parsed as a metadata tag token.
      *
      * @param nextToken The next token coming from
      * {@link #nextTokenFromReader()}.
      * @return If the following content can be a metadata tag, the result is a
      * token of type {@link ASTokenTypes#TOKEN_ATTRIBUTE}. Otherwise, the
      * argument {@code nextToken} is returned.
      * @throws Exception Parsing error.
      */
     private ASToken tryParseMetadata(ASToken nextToken) throws Exception
     {
         // Do not initialize this variable so that Java flow-analysis can check if
         // the following rules cover all the possibilities.
         final boolean isNextMetadata;

         if (!config.findMetadata)
         {
             // The lexer is configured to not recognize metadata.
             isNextMetadata = false;
         }
         else if (lastToken == null)
         {
             // An "[" at the beginning of a script is always a part of a metadata.
             isNextMetadata = true;
         }
         else
         {
             switch (lastToken.getType())
             {
                 case TOKEN_ASDOC_COMMENT:
                 case TOKEN_SEMICOLON:
                 case TOKEN_ATTRIBUTE:
                 case TOKEN_BLOCK_OPEN:
                     // "[" after these tokens are always part of a metadata token.
                     isNextMetadata = true;
                     break;

                 case TOKEN_SQUARE_CLOSE:
                 case TOKEN_IDENTIFIER:
                     // "[" following a "]" is an array access.
                     // "[" following an identifier is an array access.
                     isNextMetadata = false;
                     break;

                 case TOKEN_KEYWORD_INCLUDE:
                 case TOKEN_BLOCK_CLOSE:
                 case TOKEN_OPERATOR_STAR:
                     // "[" after these tokens are part of a metadata token, if
                     // the "[" is on a new line.
                     isNextMetadata = !lastToken.matchesLine(nextToken);
                     break;

                 default:
                     // If we are lexing an entire file
                     // then at this point we "know" that the next token
                     // is not meta-data.
                     if (config.completeContent)
                     {
                         isNextMetadata = false;
                     }
                     else
                     {
                         // In "fragment" mode which is used by the syntax coloring code
                         // in builder, we assume the following list of tokens can not
                         // precede meta-data because they all start or occur in expressions.
                         switch (lastToken.getType())
                         {
                             case TOKEN_OPERATOR_EQUAL:
                             case TOKEN_OPERATOR_TERNARY:
                             case TOKEN_COLON:
                             case TOKEN_OPERATOR_PLUS:
                             case TOKEN_OPERATOR_MINUS:
                             case TOKEN_OPERATOR_STAR:
                             case TOKEN_OPERATOR_DIVISION:
                             case TOKEN_OPERATOR_MODULO:
                             case TOKEN_OPERATOR_BITWISE_AND:
                             case TOKEN_OPERATOR_BITWISE_OR:
                             case TOKEN_KEYWORD_AS:
                             case TOKEN_OPERATOR_BITWISE_XOR:
                             case TOKEN_OPERATOR_LOGICAL_AND:
                             case TOKEN_OPERATOR_LOGICAL_OR:
                             case TOKEN_PAREN_OPEN:
                             case TOKEN_COMMA:
                             case TOKEN_OPERATOR_BITWISE_NOT:
                             case TOKEN_OPERATOR_LOGICAL_NOT:
                             case TOKEN_OPERATOR_ASSIGNMENT:
                             case TOKEN_OPERATOR_BITWISE_LEFT_SHIFT:
                             case TOKEN_OPERATOR_BITWISE_RIGHT_SHIFT:
                             case TOKEN_OPERATOR_BITWISE_UNSIGNED_RIGHT_SHIFT:
                             case TOKEN_OPERATOR_LESS_THAN:
                             case TOKEN_OPERATOR_GREATER_THAN:
                             case TOKEN_OPERATOR_LESS_THAN_EQUALS:
                             case TOKEN_OPERATOR_GREATER_THAN_EQUALS:
                             case TOKEN_OPERATOR_NOT_EQUAL:
                             case TOKEN_OPERATOR_STRICT_EQUAL:
                             case TOKEN_OPERATOR_STRICT_NOT_EQUAL:
                             case TOKEN_OPERATOR_PLUS_ASSIGNMENT:
                             case TOKEN_OPERATOR_MINUS_ASSIGNMENT:
                             case TOKEN_OPERATOR_MULTIPLICATION_ASSIGNMENT:
                             case TOKEN_OPERATOR_DIVISION_ASSIGNMENT:
                             case TOKEN_OPERATOR_MODULO_ASSIGNMENT:
                             case TOKEN_OPERATOR_BITWISE_AND_ASSIGNMENT:
                             case TOKEN_OPERATOR_BITWISE_OR_ASSIGNMENT:
                             case TOKEN_OPERATOR_BITWISE_XOR_ASSIGNMENT:
                             case TOKEN_OPERATOR_BITWISE_LEFT_SHIFT_ASSIGNMENT:
                             case TOKEN_OPERATOR_BITWISE_RIGHT_SHIFT_ASSIGNMENT:
                             case TOKEN_OPERATOR_BITWISE_UNSIGNED_RIGHT_SHIFT_ASSIGNMENT:
                                 isNextMetadata = false;
                                 break;
                             default:
                                 isNextMetadata = true;
                                 break;
                         }
                     }
                     break;
             }
         }

         final ASToken result;
         if (isNextMetadata)
             result = consumeMetadata(nextToken);
         else
             result = nextToken;

         return result;
     }

     /**
      * Close the forked include file tokenizer, and set it to null.
      */
     private void closeIncludeTokenizer()
     {
         if (forkIncludeTokenizer == null)
             return;

         try
         {
             problems.addAll(forkIncludeTokenizer.problems);
             forkIncludeTokenizer.close();
         }
         catch (IOException e)
         {
             throw new RuntimeException(e);
         }
         includeHandler.leaveFile(forkIncludeTokenizer.getEndOffset());
         forkIncludeTokenizer = null;
     }

     /**
      * @throws Exception
      */
     private final ASToken consumeMetadata(final ASToken startToken) throws Exception
     {
         final ASToken originalToken = new ASToken(startToken);
         MetaDataPayloadToken payload = new MetaDataPayloadToken(originalToken);
         final ArrayList<ASToken> safetyNet = new ArrayList<ASToken>(5);
         boolean isMetadata = true;
         while (true)
         {
             tokenizer.setReuseLastToken();
             final ASToken next = LT(1);
             if (next == null)
             {
                 break;
             }
             safetyNet.add(new ASToken(next)); //sadly, we have to deal with the extra object creation if we're wrong
             payload.addToken(next); //here too

             if (!next.canExistInMetadata())
             {
                 isMetadata = false;
                 //consume the last token we saw so that we don't get ourselves into an infinite loop
                 //it was the last token of the metadata, and this makes "next" the current token.
                 consume(1);
                 break;
             }
             consume(1);
             if (next.getType() == TOKEN_SQUARE_CLOSE)
             {
                 break;
             }
         }
         if (!isMetadata)
         { //we're wrong, so let's add back the tokens to our lookahead buffer
             lookAheadBuffer.addAll(safetyNet);
             bufferSize = lookAheadBuffer.size();
             return originalToken;
         }
         return payload;

     }

     private final void fill(final int distance) throws Exception
     {
         int pos = 0;
         while (pos < distance)
         {
             addTokenToBuffer(nextTokenFromReader());
             pos++;
         }
     }

     /**
      * @param nextToken
      */
     private final void addTokenToBuffer(final ASToken nextToken)
     {
         bufferSize++;
         lookAheadBuffer.add(nextToken);
         // at EOF, nextToken can be null.
         if (nextToken != null)
             nextToken.lock();
     }

     /**
      * Get the pooled version of a given string.
      *
      * @param text String literal.
      * @return Pooled string.
      */
     private final String poolString(final String text)
     {
         String pooledString = stringPool.get(text);
         if (pooledString == null)
         {
             stringPool.put(text, text);
             pooledString = text;
         }
         return pooledString;
     }

     /**
      * Get the next token from the source input. If this tokenizer is created
      * for a source file by {@link ASC}, and there are files included by
      * {@code -in} option, the tokenizer will return the
      * "injected include tokens" before real tokens coming from the JFlex
      * generated tokenizer.
      *
      * @return next token from the source input
      * @throws IOException error
      * @see ASCompilationUnit#createMainCompilationUnitForASC()
      */
     private final ASToken nextTokenFromReader() throws IOException
     {
         final ASToken nextToken;
         if (ascIncludeImaginaryTokens != null && ascIncludeImaginaryTokens.hasNext())
             nextToken = ascIncludeImaginaryTokens.next();
         else if (tokenizer.hasBufferToken())
             nextToken = tokenizer.getBufferToken();
         else
             nextToken = tokenizer.nextToken();

         if (nextToken != null)
         {
             // Converting unicode on-the-fly in the lexer is much slower than
             // converting it here after the token is made, especially for
             // identifiers.
             switch (nextToken.getType())
             {
                 case TOKEN_LITERAL_NUMBER:
                     nextToken.setText(poolString(nextToken.getText()));
                     break;
                 case TOKEN_LITERAL_REGEXP:
                     // Any "backslash-u" entities left after "convertUnicode"
                     // are invalid unicode escape sequences. According to AS3
                     // behavior, the backslash character is dropped.
                     nextToken.setText(poolString(convertUnicode(nextToken.getText()).replaceAll("\\\\u", "u")));
                     break;
                 case TOKEN_IDENTIFIER:
                     // Intern 'identifiers' and 'keywords'.
                     // 'keywords' were 'identifiers' before they are analyzed.
                     final String originalIdentifierName = nextToken.getText();
                     final String normalizedIdentifierName = poolString(convertUnicode(originalIdentifierName));
                     nextToken.setText(normalizedIdentifierName);
                     if (!config.ignoreKeywords)
                     {
                         /**
                          * If the identifier has escaped unicode sequence, it
                          * can't be a keyword.
                          * <p>
                          * According to ASL syntax spec chapter 3.4:
                          * <blockquote> Unicode escape sequences may be used to
                          * spell the names of identifiers that would otherwise
                          * be keywords. This is in contrast to ECMAScript.
                          * </blockquote>
                          */
                         if (originalIdentifierName.equals(normalizedIdentifierName))
                         {
                             // do keyword analysis here
                             final Integer info = keywordToTokenMap.get(nextToken.getText());
                             if (info != null)
                                 nextToken.setType(info);
                         }
                     }
                     break;
                 default:
                     // Ignore other tokens.
                     break;
             }

             //so we want to adjust all of our offsets here, BUT
             //the column is really only valid for the first line, which is line 0.
             //if we're not the first line, don't bother
             nextToken.adjustLocation(
                     offsetAdjustment,
                     lineAdjustment,
                     nextToken.getLine() == 0 ? columnAdjustment : 0);
             nextToken.storeLocalOffset();

             if (includeHandler != null)
             {
                 nextToken.setSourcePath(includeHandler.getIncludeStackTop());
                 includeHandler.onNextToken(nextToken);
             }

             if (nextToken.getSourcePath() == null)
                 nextToken.setSourcePath(sourcePath);

             if (reader instanceof SourceFragmentsReader)
                 ((SourceFragmentsReader)reader).adjustLocation(nextToken);
         }
         return nextToken;
     }

     /**
      * Consume tokens in the buffer
      *
      * @param distance the number of tokens to consume
      */
     private final void consume(int distance)
     {
         if (bufferSize >= distance)
         {
             for (; distance > 0; distance--)
             {
                 lookAheadBuffer.remove(bufferSize - 1);
                 bufferSize--;
             }
         }
     }

     /**
      * Returns the next token that will be produced by the underlying lexer
      *
      * @param distance distance to look ahead
      * @return an {@link ASToken}
      * @throws Exception
      */
     private final ASToken LT(final int distance) throws Exception
     {
         if (bufferSize < distance)
         {
             fill(distance - bufferSize);
         }
         return lookAheadBuffer.get(distance - 1);
     }

     private static final boolean matches(final ASToken token, final int type)
     {
         return token != null && token.getType() == type;
     }

     /**
      * Retrieve the end offset of the file.
      * <p>
      * The result is the end offset of the file, not the offset of the last
      * token, this allows any trailing space to be included so that the parser
      * can span the result {@code FileNode} to the entire file.
      *
      * @return the end offset of the input file
      */
     public final int getEndOffset()
     {
         return tokenizer.getOffset() + offsetAdjustment;
     }

     /**
      * Computers whether the following token is a user-defined namespace. This
      * method calls processUserDefinedNamespace which will change token types
      *
      * @param token token to start our analysis
      * @param lookaheadOffset offset of the tokens to look at
      * @return true if we're a user-defined namespace
      * @throws Exception
      */
     private final boolean isUserDefinedNamespace(final ASToken token, final int lookaheadOffset) throws Exception
     {
         processUserDefinedNamespace(token, lookaheadOffset);
         return token.getType() == TOKEN_NAMESPACE_ANNOTATION || token.getType() == TOKEN_NAMESPACE_NAME;
     }

     /**
      * Because AS3 supports qualified/unqualified namespaces as decorators on
      * definitions, we need to detect them before we even make it to the parser.
      * These look exactly like names/qnames, and so if they're on the same line
      * as a definition they might be a namespace name instead of a standard
      * identifier. This method will detect these cases, and change token types
      * accordingly
      *
      * @param token token token to start our analysis
      * @param lookaheadOffset offset of the tokens to look at
      * @throws Exception
      */
     private final void processUserDefinedNamespace(final ASToken token, final int lookaheadOffset) throws Exception
     {
         token.lock();

         //determine if we have a user-defined namespace
         //our first token will be an identifier, and the cases we're looking for are:
         //1.) user_namespace (function|var|dynamic|static|final|native|override)
         //2.) my.pack.user_namespace (function|var|dynamic|static|final|native|override)
         //option number 1 is probably the 99% case so optimize for it
         ASToken nextToken = LT(1 + lookaheadOffset);
         if (token.matchesLine(nextToken))
         {
             // If the next token is an identifier check to see if it should
             // be modified to a TOKEN_NAMESPACE_ANNOTATION
             // This is so that code like:
             //    ns1 ns2 var x;
             // gets parsed correctly (2 namespace annotations, which is an error)
             if (nextToken.getType() == TOKEN_IDENTIFIER)
                 processUserDefinedNamespace(nextToken, 1 + lookaheadOffset);

             switch (nextToken.getType())
             {
                 case TOKEN_KEYWORD_FUNCTION:
                 case TOKEN_KEYWORD_VAR:
                 case TOKEN_KEYWORD_CONST:
                 case TOKEN_RESERVED_WORD_NAMESPACE:
                 case TOKEN_MODIFIER_DYNAMIC:
                 case TOKEN_MODIFIER_FINAL:
                 case TOKEN_MODIFIER_NATIVE:
                 case TOKEN_MODIFIER_OVERRIDE:
                 case TOKEN_MODIFIER_STATIC:
                 case TOKEN_MODIFIER_VIRTUAL:
                 case TOKEN_MODIFIER_ABSTRACT:
                 case TOKEN_KEYWORD_CLASS:
                 case TOKEN_KEYWORD_INTERFACE:
                 case TOKEN_NAMESPACE_ANNOTATION:
                 case HIDDEN_TOKEN_BUILTIN_NS:
                     token.setType(TOKEN_NAMESPACE_ANNOTATION);
                     return;
                 case TOKEN_OPERATOR_NS_QUALIFIER: //simple name with a :: binding after it.  has to be a NS
                     token.setType(TOKEN_NAMESPACE_NAME);
                     return;
             }
             if (nextToken.getType() == TOKEN_OPERATOR_MEMBER_ACCESS)
             {
                 int nextValidPart = TOKEN_IDENTIFIER;
                 final ArrayList<ASToken> toTransform = new ArrayList<ASToken>(3);
                 toTransform.add(token);
                 toTransform.add(nextToken);
                 int laDistance = lookaheadOffset + 1;
                 while (true)
                 {
                     nextToken = LT(++laDistance);
                     if (token.matchesLine(nextToken))
                     {
                         if (nextToken.getType() == nextValidPart)
                         {
                             nextValidPart = (nextToken.getType() == TOKEN_IDENTIFIER) ? TOKEN_OPERATOR_MEMBER_ACCESS : TOKEN_IDENTIFIER;
                             toTransform.add(nextToken);
                         }
                         else if (nextValidPart != TOKEN_IDENTIFIER && nextToken.canFollowUserNamespace())
                         {
                             // Next token is in the follow set of a namespace,
                             // so all the buffered tokens need to be converted
                             // into namespace tokens.
                             for (final ASToken ttToken : toTransform)
                             {
                                 if (ttToken.getType() == TOKEN_IDENTIFIER)
                                     ttToken.setType(TOKEN_NAMESPACE_ANNOTATION);
                                 else
                                     ttToken.setType(TOKEN_OPERATOR_MEMBER_ACCESS);
                             }
                             break;
                         }
                         else
                         {
                             break;
                         }
                     }
                     else
                     {
                         break;
                     }
                 }
             }
         }
     }

     /**
      * Combines the text of two tokens, adding whitespace between them and
      * adjusting offsets appropriately
      *
      * @param target the base token that we will add the next to
      * @param source the source of the text to add
      */
     private final void combineText(TokenBase target, TokenBase source)
     {
         StringBuilder text = new StringBuilder();
         text.append(target.getText());
         //add whitespace for gaps between tokens
         for (int i = 0; i < (source.getStart() - target.getEnd()); i++)
         {
             text.append(" ");
         }
         text.append(source.getText());
         target.setText(poolString(text.toString()));
         target.setEnd(target.getStart() + text.length());
     }

     /**
      * Unicode pattern for {@code \u0000}.
      */
     private static final Pattern UNICODE_PATTERN = Pattern.compile(BaseRawASTokenizer.PATTERN_U4);

     /**
      * Leading characters of a unicode pattern.
      */
     private static final String UNICODE_LEADING_CHARS = "\\u";

     /**
      * Convert escaped unicode sequence in a string. For example:
      * {@code foo\u0051bar} is converted into {@code fooQbar}.
      *
      * @param text input string
      * @return converted text
      */
     static String convertUnicode(final String text)
     {
         // Calling Pattern.matcher() is much slower than String.contains(), so
         // we need this predicate to skip unnecessary RegEx computation.
         if (text.contains(UNICODE_LEADING_CHARS))
         {
             final StringBuilder result = new StringBuilder();
             final Matcher matcher = UNICODE_PATTERN.matcher(text);
             int start = 0;
             while (matcher.find())
             {
                 result.append(text, start, matcher.start());
                 result.append(Character.toChars(BaseRawASTokenizer.decodeEscapedUnicode(matcher.group())));
                 start = matcher.end();
             }
             result.append(text, start, text.length());
             return result.toString();
         }
         else
         {
             return text;
         }
     }

     /**
      * Gets the source path to the file being tokenized.
      */
     public String getSourcePath()
     {
         return sourcePath;
     }
 }