blob: e4c175bee9d10fcc0e98b934c888ddd65222e9f8 [file] [log] [blame]
package org.codehaus.groovy.syntax.lexer;
//{{{ imports
import org.codehaus.groovy.syntax.ReadException;
import org.codehaus.groovy.syntax.Token;
import org.codehaus.groovy.GroovyBugError;
//}}}
/**
* A Lexer for processing standard strings.
*
* @author Chris Poirier
*/
public class StringLexer extends TextLexerBase
{
protected String delimiter = null;
protected char watchFor;
protected boolean allowGStrings = false;
protected boolean emptyString = true; // If set, we need to send an empty string
/**
* If set true, the filter will allow \\ and \$ to pass through unchanged.
* You should set this appropriately BEFORE setting source!
*/
public void allowGStrings( boolean allow )
{
allowGStrings = allow;
}
/**
* Returns a single STRING, then null. The STRING is all of the processed
* input. Backslashes are stripped, with the \r, \n, and \t converted
* appropriately.
*/
public Token undelegatedNextToken( ) throws ReadException, LexerException
{
if( emptyString )
{
emptyString = false;
return Token.newString( "", getStartLine(), getStartColumn() );
}
else if( finished )
{
return null;
}
else
{
StringBuffer string = new StringBuffer();
while( la(1) != CharStream.EOS )
{
string.append( consume() );
}
if( la(1) == CharStream.EOS && string.length() == 0 )
{
finished = true;
}
return Token.newString( string.toString(), getStartLine(), getStartColumn() );
}
}
/**
* Controls delimiter search. When turned on, the first thing we do
* is check for and eat our delimiter.
*/
public void delimit( boolean delimit )
{
super.delimit( delimit );
if( delimit )
{
try
{
if( !finished && la(1) == CharStream.EOS )
{
finishUp();
//
// The GStringLexer will correctly handle the empty string.
// We don't. In order to ensure that an empty string is
// supplied, we set a flag that is checked during
// undelegatedNextToken().
if( !allowGStrings )
{
emptyString = true;
}
}
}
catch( Exception e )
{
finished = true;
}
}
}
/**
* Sets the source lexer and identifies and consumes the opening delimiter.
*/
public void setSource( Lexer source )
{
super.setSource( source );
emptyString = false;
try
{
char c = source.la();
switch( c )
{
case '\'':
case '"':
mark();
source.consume();
if( source.la() == c && source.la(2) == c )
{
source.consume(); source.consume();
delimiter = new StringBuffer().append(c).append(c).append(c).toString();
}
else
{
delimiter = new StringBuffer().append(c).toString();
}
watchFor = delimiter.charAt(0);
break;
default:
{
throw new GroovyBugError( "at the time of StringLexer.setSource(), the source must be on a single or double quote" );
}
}
restart();
delimit( true );
}
catch( Exception e )
{
//
// If we couldn't read our delimiter, we'll just
// cancel our source. nextToken() will return null.
e.printStackTrace();
unsetSource( );
}
}
/**
* Unsets our source.
*/
public void unsetSource()
{
super.unsetSource();
delimiter = null;
finished = true;
emptyString = false;
}
//---------------------------------------------------------------------------
// STREAM ROUTINES
private int lookahead = 0; // the number of characters identified
private char[] characters = new char[3]; // the next characters identified by la()
private int[] widths = new int[3]; // the source widths of the next characters
public char la() throws LexerException, ReadException
{
return la(1);
}
/**
* Returns the next <code>k</code>th character, without consuming any.
*/
public char la(int k) throws LexerException, ReadException
{
if( !finished && source != null )
{
if( delimited )
{
if( k > characters.length )
{
throw new GroovyBugError( "StringLexer lookahead tolerance exceeded" );
}
if( lookahead >= k && k >= 1)
{
lookahead = 1;
return characters[k-1];
}
lookahead = 0;
char c = ' ', c1 = ' ', c2 = ' ';
int offset = 1, width = 0;
for( int i = 1; i <= k; i++ )
{
c1 = source.la(offset);
C1_SWITCH: switch( c1 )
{
case CharStream.EOS:
{
return c1;
}
case '\\':
{
c2 = source.la( offset + 1 );
ESCAPE_SWITCH: switch( c2 )
{
case CharStream.EOS:
return c2;
case '\\':
c = '\\';
characters[0] = c;
widths[0] = 2;
lookahead = 1;
return c;
case 'n':
c = '\n';
width = 2;
break ESCAPE_SWITCH;
case 'r':
c = '\r';
width = 2;
break ESCAPE_SWITCH;
case 't':
c = '\t';
width = 2;
break ESCAPE_SWITCH;
case 'b':
c = '\b';
width = 2;
break ESCAPE_SWITCH;
case 'f':
c = '\f';
width = 2;
break ESCAPE_SWITCH;
case '$':
if ( allowGStrings )
{
c = c1;
width = 1;
}
else
{
c = c2;
width = 2;
}
break ESCAPE_SWITCH;
case '"':
case '\'':
c = c2;
characters[0] = c;
widths[0] = 2;
lookahead = 1;
return c;
default:
c = '\\';
characters[0] = c;
widths[0] = 1;
lookahead = 1;
return c;
}
break C1_SWITCH;
}
default:
{
if( c1 == watchFor )
{
boolean atEnd = true;
if (delimiter.length() == 1)
{
if (source.la(offset) != watchFor)
{
atEnd = false;
c = c1;
break C1_SWITCH;
}
}
else {
for( int j = 1; j < delimiter.length(); j++ )
{
if( source.la(offset+j) != delimiter.charAt(j) )
{
atEnd = false;
break;
}
}
}
if( atEnd )
{
return CharStream.EOS;
}
}
c = c1;
width = 1;
if (c == '$' && allowGStrings)
{
lookahead = 0;
}
break C1_SWITCH;
}
}
characters[lookahead] = c;
widths[lookahead] = width;
offset += width;
lookahead += 1;
}
return c; // <<< FLOW CONTROL <<<<<<<<<
}
lookahead = 0;
return source.la(k);
}
return CharStream.EOS;
}
/**
* Eats a character from the input stream. Searches for the delimiter if
* delimited. Note that turning delimiting on also checks if we are at the
* delimiter, so if we aren't finished, there is something to consume.
*/
public char consume() throws LexerException, ReadException
{
if( !finished && source != null )
{
char c = CharStream.EOS;
if( delimited )
{
if( lookahead < 1 )
{
la( 1 );
}
if( lookahead >= 1 )
{
c = characters[0];
for( int i = 0; i < widths[0]; i++ )
{
source.consume();
}
lookahead = 0;
}
if( la(1) == CharStream.EOS )
{
finishUp();
}
}
else
{
c = source.consume();
}
lookahead = 0;
return c;
}
return CharStream.EOS;
}
/**
* Eats our delimiter from the stream and marks us finished.
*/
protected void finishUp() throws LexerException, ReadException
{
for( int i = 0; i < delimiter.length(); i++ )
{
char c = source.la(1);
if( c == CharStream.EOS )
{
throw new UnterminatedStringLiteralException(getStartLine(), getStartColumn());
}
else if( c == delimiter.charAt(i) )
{
source.consume();
}
else
{
throw new GroovyBugError( "la() said delimiter [" + delimiter + "], finishUp() found [" + c + "]" );
}
}
finish();
}
}