blob: 70bf55ae5b41321b624e330a984aa454dab6a47e [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package de.bokelberg.flex.parser;
import java.io.IOException;
import java.io.StringReader;
import java.util.logging.Logger;
import java.util.regex.Pattern;
import java.util.regex.PatternSyntaxException;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;
import com.adobe.ac.utils.StackTraceUtils;
/**
* convert a actionscript to a stream of tokens
*/
public class AS3Scanner
{
public static final class Token
{
private static Token create( final String textContent,
final int tokenLine,
final int tokenColumn )
{
return new Token( textContent, tokenLine, tokenColumn );
}
private final int column;
private final boolean isNumeric;
private final int line;
private final String text;
/**
* @param textContent
* @param tokenLine
* @param tokenColumn
*/
protected Token( final String textContent,
final int tokenLine,
final int tokenColumn )
{
this( textContent, tokenLine, tokenColumn, false );
}
/**
* @param textContent
* @param tokenLine
* @param tokenColumn
* @param isNumToSet
*/
protected Token( final String textContent,
final int tokenLine,
final int tokenColumn,
final boolean isNumToSet )
{
text = textContent;
line = tokenLine + 1;
column = tokenColumn + 1;
isNumeric = isNumToSet;
}
/**
* @return
*/
public int getColumn()
{
return column;
}
/**
* @return
*/
public int getLine()
{
return line;
}
/**
* @return
*/
public String getText()
{
return text;
}
/**
* @return
*/
public boolean isNum()
{
return isNumeric;
}
}
private static class XMLVerifier
{
private static DefaultHandler handler;
private static SAXParser saxParser;
static
{
final SAXParserFactory factory = SAXParserFactory.newInstance();
handler = new DefaultHandler();
factory.setNamespaceAware( false );
try
{
saxParser = factory.newSAXParser();
}
catch ( final ParserConfigurationException e )
{
LOGGER.warning( StackTraceUtils.print( e ) );
}
catch ( final SAXException e )
{
}
}
public static boolean verify( final String text )
{
try
{
saxParser.parse( new InputSource( new StringReader( text ) ),
handler );
return true;
}
catch ( final SAXException e )
{
LOGGER.warning( StackTraceUtils.print( e ) );
return false;
}
catch ( final IOException e )
{
LOGGER.warning( StackTraceUtils.print( e ) );
return false;
}
}
}
private static final String END = "__END__";
private static final Logger LOGGER = Logger.getLogger( AS3Scanner.class.getName() );
protected static boolean isDecimalChar( final char currentCharacter )
{
return currentCharacter >= '0'
&& currentCharacter <= '9';
}
private int column;
private boolean inVector;
private int line;
private String[] lines = null;
/**
* @return
*/
public Token moveToNextToken()
{
return nextToken();
}
/**
* @param linesToBeSet
*/
public void setLines( final String[] linesToBeSet )
{
lines = linesToBeSet;
line = 0;
column = -1;
}
boolean isHexChar( final char currentCharacter )
{
final boolean isNum = currentCharacter >= '0'
&& currentCharacter <= '9';
final boolean isLower = currentCharacter >= 'A'
&& currentCharacter <= 'Z';
final boolean isUpper = currentCharacter >= 'a'
&& currentCharacter <= 'z';
return isNum
|| isLower || isUpper;
}
/**
* @return
*/
protected Token nextToken()
{
char currentCharacter;
if ( lines != null
&& line < lines.length )
{
currentCharacter = nextNonWhitespaceCharacter();
}
else
{
return new Token( END, line, column );
}
if ( currentCharacter == '\n' )
{
return new Token( "\n", line, column );
}
if ( currentCharacter == '/' )
{
return scanCommentRegExpOrOperator();
}
if ( currentCharacter == '"' )
{
return scanString( currentCharacter );
}
if ( currentCharacter == '\'' )
{
return scanString( currentCharacter );
}
if ( currentCharacter == '<' )
{
return scanXMLOrOperator( currentCharacter );
}
if ( currentCharacter >= '0'
&& currentCharacter <= '9' || currentCharacter == '.' )
{
return scanNumberOrDots( currentCharacter );
}
if ( currentCharacter == '{'
|| currentCharacter == '}' || currentCharacter == '(' || currentCharacter == ')'
|| currentCharacter == '[' || currentCharacter == ']' || currentCharacter == ';'
|| currentCharacter == ',' || currentCharacter == '?' || currentCharacter == '~' )
{
return scanSingleCharacterToken( currentCharacter );
}
if ( currentCharacter == ':' )
{
return scanCharacterSequence( currentCharacter,
new String[]
{ "::" } );
}
if ( currentCharacter == '*' )
{
return scanCharacterSequence( currentCharacter,
new String[]
{} );
}
if ( currentCharacter == '+' )
{
return scanCharacterSequence( currentCharacter,
new String[]
{ "++",
"+=" } );
}
if ( currentCharacter == '-' )
{
return scanCharacterSequence( currentCharacter,
new String[]
{ "--",
"-=" } );
}
if ( currentCharacter == '%' )
{
return scanCharacterSequence( currentCharacter,
new String[]
{ "%=" } );
}
if ( currentCharacter == '&' )
{
return scanCharacterSequence( currentCharacter,
new String[]
{ "&&",
"&=" } );
}
if ( currentCharacter == '|' )
{
return scanCharacterSequence( currentCharacter,
new String[]
{ "||",
"|=" } );
}
if ( currentCharacter == '^' )
{
return scanCharacterSequence( currentCharacter,
new String[]
{ "^=" } );
}
if ( currentCharacter == '>' )
{
if ( inVector )
{
inVector = false;
}
else
{
return scanCharacterSequence( currentCharacter,
new String[]
{ ">>>=",
">>>",
">>=",
">>",
">=" } );
}
}
if ( currentCharacter == '=' )
{
return scanCharacterSequence( currentCharacter,
new String[]
{ "===",
"==" } );
}
if ( currentCharacter == '!' )
{
return scanCharacterSequence( currentCharacter,
new String[]
{ "!==",
"!=" } );
}
return scanWord( currentCharacter );
}
private int computePossibleMatchesMaxLength( final String[] possibleMatches )
{
int max = 0;
for ( final String possibleMatch : possibleMatches )
{
max = Math.max( max,
possibleMatch.length() );
}
return max;
}
private char getPreviousCharacter()
{
int currentIndex = -1;
char currentChar;
do
{
currentChar = peekChar( currentIndex-- );
}
while ( currentChar == ' ' );
return currentChar;
}
private boolean isIdentifierCharacter( final char currentCharacter )
{
return currentCharacter >= 'A'
&& currentCharacter <= 'Z' || currentCharacter >= 'a' && currentCharacter <= 'z'
|| currentCharacter >= '0' && currentCharacter <= '9' || currentCharacter == '_'
|| currentCharacter == '$';
}
private boolean isProcessingInstruction( final String text )
{
return text.startsWith( "<?" );
}
private boolean isValidRegExp( final String pattern )
{
try
{
Pattern.compile( pattern );
}
catch ( final PatternSyntaxException t )
{
return false;
}
return true;
}
private boolean isValidXML( final String text )
{
return XMLVerifier.verify( text );
}
private char nextChar()
{
final String currentLine = lines[ line ];
column++;
if ( currentLine.length() <= column )
{
column = -1;
line++;
return '\n';
}
char currentChar = currentLine.charAt( column );
while ( currentChar == '\uFEFF' )
{
column++;
currentChar = currentLine.charAt( column );
}
return currentChar;
}
private char nextNonWhitespaceCharacter()
{
char result;
do
{
result = nextChar();
}
while ( result == ' '
|| result == '\t' );
return result;
}
private char peekChar( final int offset )
{
final String currentLine = lines[ line ];
final int index = column
+ offset;
if ( index == -1 )
{
return '\0';
}
if ( index >= currentLine.length() )
{
return '\n';
}
return currentLine.charAt( index );
}
/**
* find the longest matching sequence
*
* @param currentCharacter
* @param possibleMatches
* @param maxLength
* @return
*/
private Token scanCharacterSequence( final char currentCharacter,
final String[] possibleMatches )
{
int peekPos = 1;
final StringBuffer buffer = new StringBuffer();
final int maxLength = computePossibleMatchesMaxLength( possibleMatches );
buffer.append( currentCharacter );
String found = buffer.toString();
while ( peekPos < maxLength )
{
buffer.append( peekChar( peekPos ) );
peekPos++;
for ( final String possibleMatche : possibleMatches )
{
if ( buffer.toString().equals( possibleMatche ) )
{
found = buffer.toString();
}
}
}
final Token result = new Token( found, line, column );
skipChars( found.length() - 1 );
return result;
}
/**
* Something started with a slash This might be a comment, a regexp or a
* operator
*
* @param currentCharacter
* @return
*/
private Token scanCommentRegExpOrOperator()
{
final char firstCharacter = peekChar( 1 );
if ( firstCharacter == '/' )
{
return scanSingleLineComment();
}
if ( firstCharacter == '*' )
{
return scanMultiLineComment();
}
Token result;
if ( getPreviousCharacter() == '='
|| getPreviousCharacter() == '(' || getPreviousCharacter() == ',' )
{
result = scanRegExp();
if ( result != null )
{
return result;
}
}
if ( firstCharacter == '=' )
{
result = new Token( "/=", line, column );
skipChars( 1 );
return result;
}
result = new Token( "/", line, column );
return result;
}
/**
* c is either a dot or a number
*
* @return
*/
private Token scanDecimal( final char currentCharacter )
{
char currentChar = currentCharacter;
final StringBuffer buffer = new StringBuffer();
int peekPos = 1;
while ( isDecimalChar( currentChar ) )
{
buffer.append( currentChar );
currentChar = peekChar( peekPos++ );
}
if ( currentChar == '.' )
{
buffer.append( currentChar );
currentChar = peekChar( peekPos++ );
while ( isDecimalChar( currentChar ) )
{
buffer.append( currentChar );
currentChar = peekChar( peekPos++ );
}
if ( currentChar == 'E' )
{
buffer.append( currentChar );
currentChar = peekChar( peekPos++ );
while ( isDecimalChar( currentChar ) )
{
buffer.append( currentChar );
currentChar = peekChar( peekPos++ );
}
}
}
final Token result = new Token( buffer.toString(), line, column, true );
skipChars( result.text.length() - 1 );
return result;
}
/**
* The first dot has been scanned Are the next chars dots as well?
*
* @return
*/
private Token scanDots()
{
final char secondCharacter = peekChar( 1 );
if ( secondCharacter == '.' )
{
final char thirdCharacter = peekChar( 2 );
final String text = thirdCharacter == '.' ? "..."
: "..";
final Token result = new Token( text, line, column );
skipChars( text.length() - 1 );
return result;
}
else if ( secondCharacter == '<' )
{
final Token result = new Token( ".<", line, column );
skipChars( 1 );
inVector = true;
return result;
}
return null;
}
/**
* we have seen the 0x prefix
*
* @return
*/
private Token scanHex()
{
final StringBuffer buffer = new StringBuffer();
buffer.append( "0x" );
int peekPos = 2;
for ( ;; )
{
final char character = peekChar( peekPos++ );
if ( !isHexChar( character ) )
{
break;
}
buffer.append( character );
}
final Token result = new Token( buffer.toString(), line, column, true );
skipChars( result.text.length() - 1 );
return result;
}
/**
* the current char is the first slash plus we know, that a * is following
*
* @return
*/
private Token scanMultiLineComment()
{
final StringBuffer buffer = new StringBuffer();
char currentCharacter = ' ';
char previousCharacter = ' ';
buffer.append( "/*" );
skipChar();
do
{
previousCharacter = currentCharacter;
currentCharacter = nextChar();
buffer.append( currentCharacter );
}
while ( currentCharacter != 0
&& !( currentCharacter == '/' && previousCharacter == '*' ) );
return new Token( buffer.toString(), line, column );
}
/**
* Something started with a number or a dot.
*
* @param characterToBeScanned
* @return
*/
private Token scanNumberOrDots( final char characterToBeScanned )
{
if ( characterToBeScanned == '.' )
{
final Token result = scanDots();
if ( result != null )
{
return result;
}
final char firstCharacter = peekChar( 1 );
if ( !isDecimalChar( firstCharacter ) )
{
return new Token( ".", line, column );
}
}
if ( characterToBeScanned == '0' )
{
final char firstCharacter = peekChar( 1 );
if ( firstCharacter == 'x' )
{
return scanHex();
}
}
return scanDecimal( characterToBeScanned );
}
private Token scanRegExp()
{
final Token token = scanUntilDelimiter( '/' );
if ( token != null
&& isValidRegExp( token.text ) )
{
return token;
}
return null;
}
private Token scanSingleCharacterToken( final char character )
{
return new Token( String.valueOf( character ), line, column );
}
/**
* the current char is the first slash plus we know, that another slash is
* following
*
* @return
*/
private Token scanSingleLineComment()
{
final Token result = new Token( lines[ line ].substring( column ), line, column );
skipChars( result.text.length() - 1 );
return result;
}
/**
* Something started with a quote or double quote consume characters until
* the quote/double quote shows up again and is not escaped
*
* @param startingCharacter
* @return
*/
private Token scanString( final char startingCharacter )
{
return scanUntilDelimiter( startingCharacter );
}
private Token scanUntilDelimiter( final char delimiter )
{
return scanUntilDelimiter( delimiter,
delimiter );
}
private Token scanUntilDelimiter( final char start,
final char delimiter )
{
final StringBuffer buffer = new StringBuffer();
int peekPos = 1;
int numberOfBackslashes = 0;
buffer.append( start );
for ( ;; )
{
final char currentCharacter = peekChar( peekPos++ );
if ( currentCharacter == '\n' )
{
return null;
}
buffer.append( currentCharacter );
if ( currentCharacter == delimiter
&& numberOfBackslashes == 0 )
{
final Token result = Token.create( buffer.toString(),
line,
column );
skipChars( buffer.toString().length() - 1 );
return result;
}
numberOfBackslashes = currentCharacter == '\\' ? ( numberOfBackslashes + 1 ) % 2
: 0;
}
}
private Token scanWord( final char startingCharacter )
{
char currentChar = startingCharacter;
final StringBuffer buffer = new StringBuffer();
buffer.append( currentChar );
int peekPos = 1;
for ( ;; )
{
currentChar = peekChar( peekPos++ );
if ( !isIdentifierCharacter( currentChar ) )
{
break;
}
buffer.append( currentChar );
}
final Token result = new Token( buffer.toString(), line, column );
skipChars( buffer.toString().length() - 1 );
return result;
}
/**
* Try to parse a XML document
*
* @return
*/
private Token scanXML()
{
final int currentLine = line;
final int currentColumn = column;
int level = 0;
final StringBuffer buffer = new StringBuffer();
char currentCharacter = '<';
for ( ;; )
{
Token currentToken = null;
do
{
currentToken = scanUntilDelimiter( '<',
'>' );
if ( currentToken == null )
{
line = currentLine;
column = currentColumn;
return null;
}
buffer.append( currentToken.text );
if ( isProcessingInstruction( currentToken.text ) )
{
currentCharacter = nextChar();
if ( currentCharacter == '\n' )
{
buffer.append( '\n' );
skipChar();
}
currentToken = null;
}
}
while ( currentToken == null );
if ( currentToken.text.startsWith( "</" ) )
{
level--;
}
else if ( !currentToken.text.endsWith( "/>" )
&& !currentToken.text.equals( "<>" ) ) // NOT operator in AS2
{
level++;
}
if ( level <= 0 )
{
return new Token( buffer.toString(), line, column );
}
for ( ;; )
{
currentCharacter = nextChar();
if ( currentCharacter == '<' )
{
break;
}
buffer.append( currentCharacter );
}
}
}
/**
* Something started with a lower sign <
*
* @param startingCharacterc
* @return
*/
private Token scanXMLOrOperator( final char startingCharacterc )
{
final Token xmlToken = scanXML();
if ( xmlToken != null
&& isValidXML( xmlToken.text ) )
{
return xmlToken;
}
return scanCharacterSequence( startingCharacterc,
new String[]
{ "<<<=",
"<<<",
"<<=",
"<<",
"<=" } );
}
private void skipChar()
{
nextChar();
}
private void skipChars( final int count )
{
int decrementCount = count;
while ( decrementCount-- > 0 )
{
nextChar();
}
}
}