blob: 70bf55ae5b41321b624e330a984aa454dab6a47e [file] [log] [blame]
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* See the License for the specific language governing permissions and
* limitations under the License.
package de.bokelberg.flex.parser;
import java.util.logging.Logger;
import java.util.regex.Pattern;
import java.util.regex.PatternSyntaxException;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;
* convert a actionscript to a stream of tokens
public class AS3Scanner
public static final class Token
private static Token create( final String textContent,
final int tokenLine,
final int tokenColumn )
return new Token( textContent, tokenLine, tokenColumn );
private final int column;
private final boolean isNumeric;
private final int line;
private final String text;
* @param textContent
* @param tokenLine
* @param tokenColumn
protected Token( final String textContent,
final int tokenLine,
final int tokenColumn )
this( textContent, tokenLine, tokenColumn, false );
* @param textContent
* @param tokenLine
* @param tokenColumn
* @param isNumToSet
protected Token( final String textContent,
final int tokenLine,
final int tokenColumn,
final boolean isNumToSet )
text = textContent;
line = tokenLine + 1;
column = tokenColumn + 1;
isNumeric = isNumToSet;
* @return
public int getColumn()
return column;
* @return
public int getLine()
return line;
* @return
public String getText()
return text;
* @return
public boolean isNum()
return isNumeric;
private static class XMLVerifier
private static DefaultHandler handler;
private static SAXParser saxParser;
final SAXParserFactory factory = SAXParserFactory.newInstance();
handler = new DefaultHandler();
factory.setNamespaceAware( false );
saxParser = factory.newSAXParser();
catch ( final ParserConfigurationException e )
LOGGER.warning( StackTraceUtils.print( e ) );
catch ( final SAXException e )
public static boolean verify( final String text )
saxParser.parse( new InputSource( new StringReader( text ) ),
handler );
return true;
catch ( final SAXException e )
LOGGER.warning( StackTraceUtils.print( e ) );
return false;
catch ( final IOException e )
LOGGER.warning( StackTraceUtils.print( e ) );
return false;
private static final String END = "__END__";
private static final Logger LOGGER = Logger.getLogger( AS3Scanner.class.getName() );
protected static boolean isDecimalChar( final char currentCharacter )
return currentCharacter >= '0'
&& currentCharacter <= '9';
private int column;
private boolean inVector;
private int line;
private String[] lines = null;
* @return
public Token moveToNextToken()
return nextToken();
* @param linesToBeSet
public void setLines( final String[] linesToBeSet )
lines = linesToBeSet;
line = 0;
column = -1;
boolean isHexChar( final char currentCharacter )
final boolean isNum = currentCharacter >= '0'
&& currentCharacter <= '9';
final boolean isLower = currentCharacter >= 'A'
&& currentCharacter <= 'Z';
final boolean isUpper = currentCharacter >= 'a'
&& currentCharacter <= 'z';
return isNum
|| isLower || isUpper;
* @return
protected Token nextToken()
char currentCharacter;
if ( lines != null
&& line < lines.length )
currentCharacter = nextNonWhitespaceCharacter();
return new Token( END, line, column );
if ( currentCharacter == '\n' )
return new Token( "\n", line, column );
if ( currentCharacter == '/' )
return scanCommentRegExpOrOperator();
if ( currentCharacter == '"' )
return scanString( currentCharacter );
if ( currentCharacter == '\'' )
return scanString( currentCharacter );
if ( currentCharacter == '<' )
return scanXMLOrOperator( currentCharacter );
if ( currentCharacter >= '0'
&& currentCharacter <= '9' || currentCharacter == '.' )
return scanNumberOrDots( currentCharacter );
if ( currentCharacter == '{'
|| currentCharacter == '}' || currentCharacter == '(' || currentCharacter == ')'
|| currentCharacter == '[' || currentCharacter == ']' || currentCharacter == ';'
|| currentCharacter == ',' || currentCharacter == '?' || currentCharacter == '~' )
return scanSingleCharacterToken( currentCharacter );
if ( currentCharacter == ':' )
return scanCharacterSequence( currentCharacter,
new String[]
{ "::" } );
if ( currentCharacter == '*' )
return scanCharacterSequence( currentCharacter,
new String[]
{} );
if ( currentCharacter == '+' )
return scanCharacterSequence( currentCharacter,
new String[]
{ "++",
"+=" } );
if ( currentCharacter == '-' )
return scanCharacterSequence( currentCharacter,
new String[]
{ "--",
"-=" } );
if ( currentCharacter == '%' )
return scanCharacterSequence( currentCharacter,
new String[]
{ "%=" } );
if ( currentCharacter == '&' )
return scanCharacterSequence( currentCharacter,
new String[]
{ "&&",
"&=" } );
if ( currentCharacter == '|' )
return scanCharacterSequence( currentCharacter,
new String[]
{ "||",
"|=" } );
if ( currentCharacter == '^' )
return scanCharacterSequence( currentCharacter,
new String[]
{ "^=" } );
if ( currentCharacter == '>' )
if ( inVector )
inVector = false;
return scanCharacterSequence( currentCharacter,
new String[]
{ ">>>=",
">=" } );
if ( currentCharacter == '=' )
return scanCharacterSequence( currentCharacter,
new String[]
{ "===",
"==" } );
if ( currentCharacter == '!' )
return scanCharacterSequence( currentCharacter,
new String[]
{ "!==",
"!=" } );
return scanWord( currentCharacter );
private int computePossibleMatchesMaxLength( final String[] possibleMatches )
int max = 0;
for ( final String possibleMatch : possibleMatches )
max = Math.max( max,
possibleMatch.length() );
return max;
private char getPreviousCharacter()
int currentIndex = -1;
char currentChar;
currentChar = peekChar( currentIndex-- );
while ( currentChar == ' ' );
return currentChar;
private boolean isIdentifierCharacter( final char currentCharacter )
return currentCharacter >= 'A'
&& currentCharacter <= 'Z' || currentCharacter >= 'a' && currentCharacter <= 'z'
|| currentCharacter >= '0' && currentCharacter <= '9' || currentCharacter == '_'
|| currentCharacter == '$';
private boolean isProcessingInstruction( final String text )
return text.startsWith( "<?" );
private boolean isValidRegExp( final String pattern )
Pattern.compile( pattern );
catch ( final PatternSyntaxException t )
return false;
return true;
private boolean isValidXML( final String text )
return XMLVerifier.verify( text );
private char nextChar()
final String currentLine = lines[ line ];
if ( currentLine.length() <= column )
column = -1;
return '\n';
char currentChar = currentLine.charAt( column );
while ( currentChar == '\uFEFF' )
currentChar = currentLine.charAt( column );
return currentChar;
private char nextNonWhitespaceCharacter()
char result;
result = nextChar();
while ( result == ' '
|| result == '\t' );
return result;
private char peekChar( final int offset )
final String currentLine = lines[ line ];
final int index = column
+ offset;
if ( index == -1 )
return '\0';
if ( index >= currentLine.length() )
return '\n';
return currentLine.charAt( index );
* find the longest matching sequence
* @param currentCharacter
* @param possibleMatches
* @param maxLength
* @return
private Token scanCharacterSequence( final char currentCharacter,
final String[] possibleMatches )
int peekPos = 1;
final StringBuffer buffer = new StringBuffer();
final int maxLength = computePossibleMatchesMaxLength( possibleMatches );
buffer.append( currentCharacter );
String found = buffer.toString();
while ( peekPos < maxLength )
buffer.append( peekChar( peekPos ) );
for ( final String possibleMatche : possibleMatches )
if ( buffer.toString().equals( possibleMatche ) )
found = buffer.toString();
final Token result = new Token( found, line, column );
skipChars( found.length() - 1 );
return result;
* Something started with a slash This might be a comment, a regexp or a
* operator
* @param currentCharacter
* @return
private Token scanCommentRegExpOrOperator()
final char firstCharacter = peekChar( 1 );
if ( firstCharacter == '/' )
return scanSingleLineComment();
if ( firstCharacter == '*' )
return scanMultiLineComment();
Token result;
if ( getPreviousCharacter() == '='
|| getPreviousCharacter() == '(' || getPreviousCharacter() == ',' )
result = scanRegExp();
if ( result != null )
return result;
if ( firstCharacter == '=' )
result = new Token( "/=", line, column );
skipChars( 1 );
return result;
result = new Token( "/", line, column );
return result;
* c is either a dot or a number
* @return
private Token scanDecimal( final char currentCharacter )
char currentChar = currentCharacter;
final StringBuffer buffer = new StringBuffer();
int peekPos = 1;
while ( isDecimalChar( currentChar ) )
buffer.append( currentChar );
currentChar = peekChar( peekPos++ );
if ( currentChar == '.' )
buffer.append( currentChar );
currentChar = peekChar( peekPos++ );
while ( isDecimalChar( currentChar ) )
buffer.append( currentChar );
currentChar = peekChar( peekPos++ );
if ( currentChar == 'E' )
buffer.append( currentChar );
currentChar = peekChar( peekPos++ );
while ( isDecimalChar( currentChar ) )
buffer.append( currentChar );
currentChar = peekChar( peekPos++ );
final Token result = new Token( buffer.toString(), line, column, true );
skipChars( result.text.length() - 1 );
return result;
* The first dot has been scanned Are the next chars dots as well?
* @return
private Token scanDots()
final char secondCharacter = peekChar( 1 );
if ( secondCharacter == '.' )
final char thirdCharacter = peekChar( 2 );
final String text = thirdCharacter == '.' ? "..."
: "..";
final Token result = new Token( text, line, column );
skipChars( text.length() - 1 );
return result;
else if ( secondCharacter == '<' )
final Token result = new Token( ".<", line, column );
skipChars( 1 );
inVector = true;
return result;
return null;
* we have seen the 0x prefix
* @return
private Token scanHex()
final StringBuffer buffer = new StringBuffer();
buffer.append( "0x" );
int peekPos = 2;
for ( ;; )
final char character = peekChar( peekPos++ );
if ( !isHexChar( character ) )
buffer.append( character );
final Token result = new Token( buffer.toString(), line, column, true );
skipChars( result.text.length() - 1 );
return result;
* the current char is the first slash plus we know, that a * is following
* @return
private Token scanMultiLineComment()
final StringBuffer buffer = new StringBuffer();
char currentCharacter = ' ';
char previousCharacter = ' ';
buffer.append( "/*" );
previousCharacter = currentCharacter;
currentCharacter = nextChar();
buffer.append( currentCharacter );
while ( currentCharacter != 0
&& !( currentCharacter == '/' && previousCharacter == '*' ) );
return new Token( buffer.toString(), line, column );
* Something started with a number or a dot.
* @param characterToBeScanned
* @return
private Token scanNumberOrDots( final char characterToBeScanned )
if ( characterToBeScanned == '.' )
final Token result = scanDots();
if ( result != null )
return result;
final char firstCharacter = peekChar( 1 );
if ( !isDecimalChar( firstCharacter ) )
return new Token( ".", line, column );
if ( characterToBeScanned == '0' )
final char firstCharacter = peekChar( 1 );
if ( firstCharacter == 'x' )
return scanHex();
return scanDecimal( characterToBeScanned );
private Token scanRegExp()
final Token token = scanUntilDelimiter( '/' );
if ( token != null
&& isValidRegExp( token.text ) )
return token;
return null;
private Token scanSingleCharacterToken( final char character )
return new Token( String.valueOf( character ), line, column );
* the current char is the first slash plus we know, that another slash is
* following
* @return
private Token scanSingleLineComment()
final Token result = new Token( lines[ line ].substring( column ), line, column );
skipChars( result.text.length() - 1 );
return result;
* Something started with a quote or double quote consume characters until
* the quote/double quote shows up again and is not escaped
* @param startingCharacter
* @return
private Token scanString( final char startingCharacter )
return scanUntilDelimiter( startingCharacter );
private Token scanUntilDelimiter( final char delimiter )
return scanUntilDelimiter( delimiter,
delimiter );
private Token scanUntilDelimiter( final char start,
final char delimiter )
final StringBuffer buffer = new StringBuffer();
int peekPos = 1;
int numberOfBackslashes = 0;
buffer.append( start );
for ( ;; )
final char currentCharacter = peekChar( peekPos++ );
if ( currentCharacter == '\n' )
return null;
buffer.append( currentCharacter );
if ( currentCharacter == delimiter
&& numberOfBackslashes == 0 )
final Token result = Token.create( buffer.toString(),
column );
skipChars( buffer.toString().length() - 1 );
return result;
numberOfBackslashes = currentCharacter == '\\' ? ( numberOfBackslashes + 1 ) % 2
: 0;
private Token scanWord( final char startingCharacter )
char currentChar = startingCharacter;
final StringBuffer buffer = new StringBuffer();
buffer.append( currentChar );
int peekPos = 1;
for ( ;; )
currentChar = peekChar( peekPos++ );
if ( !isIdentifierCharacter( currentChar ) )
buffer.append( currentChar );
final Token result = new Token( buffer.toString(), line, column );
skipChars( buffer.toString().length() - 1 );
return result;
* Try to parse a XML document
* @return
private Token scanXML()
final int currentLine = line;
final int currentColumn = column;
int level = 0;
final StringBuffer buffer = new StringBuffer();
char currentCharacter = '<';
for ( ;; )
Token currentToken = null;
currentToken = scanUntilDelimiter( '<',
'>' );
if ( currentToken == null )
line = currentLine;
column = currentColumn;
return null;
buffer.append( currentToken.text );
if ( isProcessingInstruction( currentToken.text ) )
currentCharacter = nextChar();
if ( currentCharacter == '\n' )
buffer.append( '\n' );
currentToken = null;
while ( currentToken == null );
if ( currentToken.text.startsWith( "</" ) )
else if ( !currentToken.text.endsWith( "/>" )
&& !currentToken.text.equals( "<>" ) ) // NOT operator in AS2
if ( level <= 0 )
return new Token( buffer.toString(), line, column );
for ( ;; )
currentCharacter = nextChar();
if ( currentCharacter == '<' )
buffer.append( currentCharacter );
* Something started with a lower sign <
* @param startingCharacterc
* @return
private Token scanXMLOrOperator( final char startingCharacterc )
final Token xmlToken = scanXML();
if ( xmlToken != null
&& isValidXML( xmlToken.text ) )
return xmlToken;
return scanCharacterSequence( startingCharacterc,
new String[]
{ "<<<=",
"<=" } );
private void skipChar()
private void skipChars( final int count )
int decrementCount = count;
while ( decrementCount-- > 0 )