groovy/modules/classic/src/java/org/codehaus/groovy/syntax/lexer/StringLexer.java - groovy - Git at Google

 package org.codehaus.groovy.syntax.lexer;

 //{{{ imports
 import org.codehaus.groovy.syntax.ReadException;
 import org.codehaus.groovy.syntax.Token;
 import org.codehaus.groovy.GroovyBugError;
 //}}}

 /**
  *  A Lexer for processing standard strings.
  *
  *  @author Chris Poirier
  */

 public class StringLexer extends TextLexerBase
 {

     protected String  delimiter = null;
     protected char    watchFor;
     protected boolean allowGStrings = false;
     protected boolean emptyString   = true;   // If set, we need to send an empty string


    /**
     *  If set true, the filter will allow \\ and \$ to pass through unchanged.
     *  You should set this appropriately BEFORE setting source!
     */

     public void allowGStrings( boolean allow )
     {
         allowGStrings = allow;
     }


    /**
     *  Returns a single STRING, then null.   The STRING is all of the processed
     *  input.  Backslashes are stripped, with the \r, \n, and \t converted
     *  appropriately.
     */

     public Token undelegatedNextToken( ) throws ReadException, LexerException
     {
         if( emptyString )
         {
             emptyString = false;
             return Token.newString( "", getStartLine(), getStartColumn() );
         }
         else if( finished )
         {
             return null;
         }
         else
         {
             StringBuffer string = new StringBuffer();

             while( la(1) != CharStream.EOS )
             {
                 string.append( consume() );
             }

             if( la(1) == CharStream.EOS && string.length() == 0 )
             {
                 finished = true;
             }

             return Token.newString( string.toString(), getStartLine(), getStartColumn() );
         }
     }


    /**
     *  Controls delimiter search.  When turned on, the first thing we do
     *  is check for and eat our delimiter.
     */

     public void delimit( boolean delimit )
     {
         super.delimit( delimit );

         if( delimit )
         {
             try
             {
                 if( !finished && la(1) == CharStream.EOS )
                 {
                     finishUp();

                     //
                     // The GStringLexer will correctly handle the empty string.
                     // We don't.  In order to ensure that an empty string is
                     // supplied, we set a flag that is checked during
                     // undelegatedNextToken().

                     if( !allowGStrings )
                     {
                         emptyString = true;
                     }
                 }
             }
             catch( Exception e )
             {
                 finished = true;
             }
         }
     }


    /**
     *  Sets the source lexer and identifies and consumes the opening delimiter.
     */

     public void setSource( Lexer source )
     {
         super.setSource( source );

         emptyString = false;

         try
         {
             char c = source.la();
             switch( c )
             {
                 case '\'':
                 case '"':
                     mark();
                     source.consume();

                     if( source.la() == c && source.la(2) == c )
                     {
                         source.consume(); source.consume();
                         delimiter = new StringBuffer().append(c).append(c).append(c).toString();
                     }
                     else
                     {
                         delimiter = new StringBuffer().append(c).toString();
                     }

                     watchFor = delimiter.charAt(0);
                     break;


                 default:
                 {
                     throw new GroovyBugError( "at the time of StringLexer.setSource(), the source must be on a single or double quote" );
                 }
             }

             restart();
             delimit( true );
         }
         catch( Exception e )
         {
             //
             // If we couldn't read our delimiter, we'll just
             // cancel our source.  nextToken() will return null.

             e.printStackTrace();
             unsetSource( );
         }
     }


    /**
     *  Unsets our source.
     */

     public void unsetSource()
     {
         super.unsetSource();
         delimiter   = null;
         finished    = true;
         emptyString = false;
     }


   //---------------------------------------------------------------------------
   // STREAM ROUTINES

     private int    lookahead  = 0;             // the number of characters identified
     private char[] characters = new char[3];   // the next characters identified by la()
     private int[]  widths     = new int[3];    // the source widths of the next characters


     public char la() throws LexerException, ReadException
     {
 		return la(1);
     }

    /**
     *  Returns the next <code>k</code>th character, without consuming any.
     */

     public char la(int k) throws LexerException, ReadException
     {

         if( !finished && source != null )
         {

             if( delimited )
             {

                 if( k > characters.length )
                 {
                     throw new GroovyBugError( "StringLexer lookahead tolerance exceeded" );
                 }

                 if( lookahead >= k && k >= 1)
                 {
                     lookahead = 1;
                     return characters[k-1];
                 }

                 lookahead = 0;

                 char c = ' ', c1 = ' ', c2 = ' ';
                 int offset = 1, width = 0;
                 for( int i = 1; i <= k; i++ )
                 {
                     c1 = source.la(offset);
                     C1_SWITCH: switch( c1 )
                     {
                         case CharStream.EOS:
                         {
                             return c1;
                         }

                         case '\\':
                         {
                             c2 = source.la( offset + 1 );

                             ESCAPE_SWITCH: switch( c2 )
                             {

                                 case CharStream.EOS:
                                     return c2;

                                 case '\\':
                                     c = '\\';
                                     characters[0] = c;
                                     widths[0] = 2;
                                     lookahead = 1;
                                     return c;

                                 case 'n':
                                     c = '\n';
                                     width = 2;
                                     break ESCAPE_SWITCH;

                                 case 'r':
                                     c = '\r';
                                     width = 2;
                                     break ESCAPE_SWITCH;

                                 case 't':
                                     c = '\t';
                                     width = 2;
                                     break ESCAPE_SWITCH;

                                 case 'b':
                                     c = '\b';
                                     width = 2;
                                     break ESCAPE_SWITCH;

                                 case 'f':
                                     c = '\f';
                                     width = 2;
                                     break ESCAPE_SWITCH;

                                 case '$':
                                     if ( allowGStrings )
                                     {
                                         c = c1;
                                         width = 1;
                                     }
                                     else
                                     {
                                         c = c2;
                                         width = 2;
                                     }
                                     break ESCAPE_SWITCH;


                                 case '"':
                                 case '\'':
                                     c = c2;
                                     characters[0] = c;
                                     widths[0] = 2;
                                     lookahead = 1;
                                     return c;

                                 default:
                                     c = '\\';
                                     characters[0] = c;
                                     widths[0] = 1;
                                     lookahead = 1;
                                     return c;
                             }
                             break C1_SWITCH;
                         }

                         default:
                         {
                             if( c1 == watchFor )
                             {
                                 boolean atEnd = true;
                                 if (delimiter.length() == 1)
                                 {
                                     if (source.la(offset) != watchFor)
                                     {
                                         atEnd = false;
                                         c = c1;
                                         break C1_SWITCH;
                                     }
                                 }
                                 else {
                                     for( int j = 1; j < delimiter.length(); j++ )
                                     {
                                         if( source.la(offset+j) != delimiter.charAt(j) )
                                         {
                                             atEnd = false;
                                             break;
                                         }
                                     }
                                 }

                                 if( atEnd )
                                 {
                                     return CharStream.EOS;
                                 }
                             }

                             c = c1;
                             width = 1;
                             if (c == '$' && allowGStrings)
                             {
                                 lookahead = 0;
                             }
                             break C1_SWITCH;
                         }
                     }


                     characters[lookahead] = c;
                     widths[lookahead]     = width;

                     offset += width;
                     lookahead += 1;
                 }

                 return c;                                         // <<< FLOW CONTROL <<<<<<<<<
             }

             lookahead = 0;
             return source.la(k);
         }

         return CharStream.EOS;

     }


    /**
     *  Eats a character from the input stream.  Searches for the delimiter if
     *  delimited.  Note that turning delimiting on also checks if we are at the
     *  delimiter, so if we aren't finished, there is something to consume.
     */

     public char consume() throws LexerException, ReadException
     {
         if( !finished && source != null )
         {
             char c = CharStream.EOS;

             if( delimited )
             {
                 if( lookahead < 1 )
                 {
                     la( 1 );
                 }

                 if( lookahead >= 1 )
                 {
                     c = characters[0];
                     for( int i = 0; i < widths[0]; i++ )
                     {
                         source.consume();
                     }

                     lookahead = 0;
                 }

                 if( la(1) == CharStream.EOS )
                 {
                     finishUp();
                 }
             }
             else
             {
                 c = source.consume();
             }

             lookahead = 0;
             return c;
         }

         return CharStream.EOS;
     }


    /**
     *  Eats our delimiter from the stream and marks us finished.
     */

     protected void finishUp() throws LexerException, ReadException
     {
         for( int i = 0; i < delimiter.length(); i++ )
         {
             char c = source.la(1);
             if( c == CharStream.EOS )
             {
                 throw new UnterminatedStringLiteralException(getStartLine(), getStartColumn());
             }
             else if( c == delimiter.charAt(i) )
             {
                 source.consume();
             }
             else
             {
                 throw new GroovyBugError( "la() said delimiter [" + delimiter + "], finishUp() found [" + c + "]" );
             }
         }

         finish();
     }

 }
	package org.codehaus.groovy.syntax.lexer;

	//{{{ imports
	import org.codehaus.groovy.syntax.ReadException;
	import org.codehaus.groovy.syntax.Token;
	import org.codehaus.groovy.GroovyBugError;
	//}}}

	/**
	* A Lexer for processing standard strings.
	*
	* @author Chris Poirier
	*/

	public class StringLexer extends TextLexerBase
	{

	protected String delimiter = null;
	protected char watchFor;
	protected boolean allowGStrings = false;
	protected boolean emptyString = true; // If set, we need to send an empty string


	/**
	* If set true, the filter will allow \\ and \$ to pass through unchanged.
	* You should set this appropriately BEFORE setting source!
	*/

	public void allowGStrings( boolean allow )
	{
	allowGStrings = allow;
	}



	/**
	* Returns a single STRING, then null. The STRING is all of the processed
	* input. Backslashes are stripped, with the \r, \n, and \t converted
	* appropriately.
	*/

	public Token undelegatedNextToken( ) throws ReadException, LexerException
	{
	if( emptyString )
	{
	emptyString = false;
	return Token.newString( "", getStartLine(), getStartColumn() );
	}
	else if( finished )
	{
	return null;
	}
	else
	{
	StringBuffer string = new StringBuffer();

	while( la(1) != CharStream.EOS )
	{
	string.append( consume() );
	}

	if( la(1) == CharStream.EOS && string.length() == 0 )
	{
	finished = true;
	}

	return Token.newString( string.toString(), getStartLine(), getStartColumn() );
	}
	}



	/**
	* Controls delimiter search. When turned on, the first thing we do
	* is check for and eat our delimiter.
	*/

	public void delimit( boolean delimit )
	{
	super.delimit( delimit );

	if( delimit )
	{
	try
	{
	if( !finished && la(1) == CharStream.EOS )
	{
	finishUp();

	//
	// The GStringLexer will correctly handle the empty string.
	// We don't. In order to ensure that an empty string is
	// supplied, we set a flag that is checked during
	// undelegatedNextToken().

	if( !allowGStrings )
	{
	emptyString = true;
	}
	}
	}
	catch( Exception e )
	{
	finished = true;
	}
	}
	}




	/**
	* Sets the source lexer and identifies and consumes the opening delimiter.
	*/

	public void setSource( Lexer source )
	{
	super.setSource( source );

	emptyString = false;

	try
	{
	char c = source.la();
	switch( c )
	{
	case '\'':
	case '"':
	mark();
	source.consume();

	if( source.la() == c && source.la(2) == c )
	{
	source.consume(); source.consume();
	delimiter = new StringBuffer().append(c).append(c).append(c).toString();
	}
	else
	{
	delimiter = new StringBuffer().append(c).toString();
	}

	watchFor = delimiter.charAt(0);
	break;


	default:
	{
	throw new GroovyBugError( "at the time of StringLexer.setSource(), the source must be on a single or double quote" );
	}
	}

	restart();
	delimit( true );
	}
	catch( Exception e )
	{
	//
	// If we couldn't read our delimiter, we'll just
	// cancel our source. nextToken() will return null.

	e.printStackTrace();
	unsetSource( );
	}
	}



	/**
	* Unsets our source.
	*/

	public void unsetSource()
	{
	super.unsetSource();
	delimiter = null;
	finished = true;
	emptyString = false;
	}




	//---------------------------------------------------------------------------
	// STREAM ROUTINES

	private int lookahead = 0; // the number of characters identified
	private char[] characters = new char[3]; // the next characters identified by la()
	private int[] widths = new int[3]; // the source widths of the next characters


	public char la() throws LexerException, ReadException
	{
	return la(1);
	}

	/**
	* Returns the next <code>k</code>th character, without consuming any.
	*/

	public char la(int k) throws LexerException, ReadException
	{

	if( !finished && source != null )
	{

	if( delimited )
	{

	if( k > characters.length )
	{
	throw new GroovyBugError( "StringLexer lookahead tolerance exceeded" );
	}

	if( lookahead >= k && k >= 1)
	{
	lookahead = 1;
	return characters[k-1];
	}

	lookahead = 0;

	char c = ' ', c1 = ' ', c2 = ' ';
	int offset = 1, width = 0;
	for( int i = 1; i <= k; i++ )
	{
	c1 = source.la(offset);
	C1_SWITCH: switch( c1 )
	{
	case CharStream.EOS:
	{
	return c1;
	}

	case '\\':
	{
	c2 = source.la( offset + 1 );

	ESCAPE_SWITCH: switch( c2 )
	{

	case CharStream.EOS:
	return c2;

	case '\\':
	c = '\\';
	characters[0] = c;
	widths[0] = 2;
	lookahead = 1;
	return c;

	case 'n':
	c = '\n';
	width = 2;
	break ESCAPE_SWITCH;

	case 'r':
	c = '\r';
	width = 2;
	break ESCAPE_SWITCH;

	case 't':
	c = '\t';
	width = 2;
	break ESCAPE_SWITCH;

	case 'b':
	c = '\b';
	width = 2;
	break ESCAPE_SWITCH;

	case 'f':
	c = '\f';
	width = 2;
	break ESCAPE_SWITCH;

	case '$':
	if ( allowGStrings )
	{
	c = c1;
	width = 1;
	}
	else
	{
	c = c2;
	width = 2;
	}
	break ESCAPE_SWITCH;


	case '"':
	case '\'':
	c = c2;
	characters[0] = c;
	widths[0] = 2;
	lookahead = 1;
	return c;

	default:
	c = '\\';
	characters[0] = c;
	widths[0] = 1;
	lookahead = 1;
	return c;
	}
	break C1_SWITCH;
	}

	default:
	{
	if( c1 == watchFor )
	{
	boolean atEnd = true;
	if (delimiter.length() == 1)
	{
	if (source.la(offset) != watchFor)
	{
	atEnd = false;
	c = c1;
	break C1_SWITCH;
	}
	}
	else {
	for( int j = 1; j < delimiter.length(); j++ )
	{
	if( source.la(offset+j) != delimiter.charAt(j) )
	{
	atEnd = false;
	break;
	}
	}
	}

	if( atEnd )
	{
	return CharStream.EOS;
	}
	}

	c = c1;
	width = 1;
	if (c == '$' && allowGStrings)
	{
	lookahead = 0;
	}
	break C1_SWITCH;
	}
	}


	characters[lookahead] = c;
	widths[lookahead] = width;

	offset += width;
	lookahead += 1;
	}

	return c; // <<< FLOW CONTROL <<<<<<<<<
	}

	lookahead = 0;
	return source.la(k);
	}

	return CharStream.EOS;

	}



	/**
	* Eats a character from the input stream. Searches for the delimiter if
	* delimited. Note that turning delimiting on also checks if we are at the
	* delimiter, so if we aren't finished, there is something to consume.
	*/

	public char consume() throws LexerException, ReadException
	{
	if( !finished && source != null )
	{
	char c = CharStream.EOS;

	if( delimited )
	{
	if( lookahead < 1 )
	{
	la( 1 );
	}

	if( lookahead >= 1 )
	{
	c = characters[0];
	for( int i = 0; i < widths[0]; i++ )
	{
	source.consume();
	}

	lookahead = 0;
	}

	if( la(1) == CharStream.EOS )
	{
	finishUp();
	}
	}
	else
	{
	c = source.consume();
	}

	lookahead = 0;
	return c;
	}

	return CharStream.EOS;
	}



	/**
	* Eats our delimiter from the stream and marks us finished.
	*/

	protected void finishUp() throws LexerException, ReadException
	{
	for( int i = 0; i < delimiter.length(); i++ )
	{
	char c = source.la(1);
	if( c == CharStream.EOS )
	{
	throw new UnterminatedStringLiteralException(getStartLine(), getStartColumn());
	}
	else if( c == delimiter.charAt(i) )
	{
	source.consume();
	}
	else
	{
	throw new GroovyBugError( "la() said delimiter [" + delimiter + "], finishUp() found [" + c + "]" );
	}
	}

	finish();
	}

	}