geronimo-javamail_1.5_spec/src/main/java/javax/mail/internet/AddressParser.java - geronimo-specs - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
  * regarding copyright ownership.  The ASF licenses this file
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
  *
  *  http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing,
  * software distributed under the License is distributed on an
  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
  * KIND, either express or implied.  See the License for the
  * specific language governing permissions and limitations
  * under the License.
  */

 package javax.mail.internet;

 import java.io.UnsupportedEncodingException;
 import java.util.ArrayList;
 import java.util.List;

 class AddressParser {

     // the validation strictness levels, from most lenient to most conformant.
     static public final int NONSTRICT = 0;
     static public final int PARSE_HEADER = 1;
     static public final int STRICT = 2;

     // different mailbox types
     static protected final int UNKNOWN = 0;
     static protected final int ROUTE_ADDR = 1;
     static protected final int GROUP_ADDR = 2;
     static protected final int SIMPLE_ADDR = 3;

     // constants for token types.
     static protected final int END_OF_TOKENS = '\0';
     static protected final int PERIOD = '.';
     static protected final int LEFT_ANGLE = '<';
     static protected final int RIGHT_ANGLE = '>';
     static protected final int COMMA = ',';
     static protected final int AT_SIGN = '@';
     static protected final int SEMICOLON = ';';
     static protected final int COLON = ':';
     static protected final int QUOTED_LITERAL = '"';
     static protected final int DOMAIN_LITERAL = '[';
     static protected final int COMMENT = '(';
     static protected final int ATOM = 'A';
     static protected final int WHITESPACE = ' ';


     // the string we're parsing
     private final String addresses;
     // the current parsing position
     private int    position;
     // the end position of the string
     private int    end;
     // the strictness flag
     private final int validationLevel;

     public AddressParser(final String addresses, final int validation) {
         this.addresses = addresses;
         validationLevel = validation;
     }


     /**
      * Parse an address list into an array of internet addresses.
      *
      * @return An array containing all of the non-null addresses in the list.
      * @exception AddressException
      *                   Thrown for any validation errors.
      */
     public InternetAddress[] parseAddressList() throws AddressException
     {
         // get the address as a set of tokens we can process.
         final TokenStream tokens = tokenizeAddress();

         // get an array list accumulator.
         final ArrayList addressList = new ArrayList();

         // we process sections of the token stream until we run out of tokens.
         while (true) {
             // parse off a single address.  Address lists can have null elements,
             // so this might return a null value.  The null value does not get added
             // to the address accumulator.
             addressList.addAll(parseSingleAddress(tokens, false));
             // This token should be either a "," delimiter or a stream terminator.  If we're
             // at the end, time to get out.
             final AddressToken token = tokens.nextToken();
             if (token.type == END_OF_TOKENS) {
                 break;
             }
         }

         return (InternetAddress [])addressList.toArray(new InternetAddress[0]);
     }


     /**
      * Parse a single internet address.  This must be a single address,
      * not an address list.
      *
      * @exception AddressException
      */
     public InternetAddress parseAddress() throws AddressException
     {
         // get the address as a set of tokens we can process.
         final TokenStream tokens = tokenizeAddress();

         // parse off a single address.  Address lists can have null elements,
         // so this might return a null value.  The null value does not get added
         // to the address accumulator.
         final List addressList = parseSingleAddress(tokens, false);
         // we must get exactly one address back from this.
         if (addressList.isEmpty()) {
             throw new AddressException("Null address", addresses, 0);
         }
         // this could be a simple list of blank delimited tokens.  Ensure we only got one back.
         if (addressList.size() > 1) {
             throw new AddressException("Illegal Address", addresses, 0);
         }

         // This token must be a stream stream terminator, or we have an error.
         final AddressToken token = tokens.nextToken();
         if (token.type != END_OF_TOKENS) {
             illegalAddress("Illegal Address", token);
         }

         return (InternetAddress)addressList.get(0);
     }


     /**
      * Validate an internet address.  This must be a single address,
      * not a list of addresses.  The address also must not contain
      * and personal information to be valid.
      *
      * @exception AddressException
      */
     public void validateAddress() throws AddressException
     {
         // get the address as a set of tokens we can process.
         final TokenStream tokens = tokenizeAddress();

         // parse off a single address.  Address lists can have null elements,
         // so this might return a null value.  The null value does not get added
         // to the address accumulator.
         final List addressList = parseSingleAddress(tokens, false);
         if (addressList.isEmpty()) {
             throw new AddressException("Null address", addresses, 0);
         }

         // this could be a simple list of blank delimited tokens.  Ensure we only got one back.
         if (addressList.size() > 1) {
             throw new AddressException("Illegal Address", addresses, 0);
         }

         final InternetAddress address = (InternetAddress)addressList.get(0);

         // validation occurs on an address that's already been split into personal and address
         // data.
         if (address.personal != null) {
             throw new AddressException("Illegal Address", addresses, 0);
         }
         // This token must be a stream stream terminator, or we have an error.
         final AddressToken token = tokens.nextToken();
         if (token.type != END_OF_TOKENS) {
             illegalAddress("Illegal Address", token);
         }
     }


     /**
      * Extract the set of address from a group Internet specification.
      *
      * @return An array containing all of the non-null addresses in the list.
      * @exception AddressException
      */
     public InternetAddress[] extractGroupList() throws AddressException
     {
         // get the address as a set of tokens we can process.
         final TokenStream tokens = tokenizeAddress();

         // get an array list accumulator.
         final ArrayList addresses = new ArrayList();

         AddressToken token = tokens.nextToken();

         // scan forward to the ':' that starts the group list.  If we don't find one,
         // this is an exception.
         while (token.type != COLON) {
             if (token.type == END_OF_TOKENS) {
                 illegalAddress("Missing ':'", token);
             }
             token = tokens.nextToken();
         }

         // we process sections of the token stream until we run out of tokens.
         while (true) {
             // parse off a single address.  Address lists can have null elements,
             // so this might return a null value.  The null value does not get added
             // to the address accumulator.
             addresses.addAll(parseSingleAddress(tokens, true));
             // This token should be either a "," delimiter or a group terminator.  If we're
             // at the end, this is an error.
             token = tokens.nextToken();
             if (token.type == SEMICOLON) {
                 break;
             }
             else if (token.type == END_OF_TOKENS) {
                 illegalAddress("Missing ';'", token);
             }
         }

         return (InternetAddress [])addresses.toArray(new InternetAddress[0]);
     }


     /**
      * Parse out a single address from a string from a string
      * of address tokens, returning an InternetAddress object that
      * represents the address.
      *
      * @param tokens The token source for this address.
      *
      * @return A parsed out and constructed InternetAddress object for
      *         the next address.  Returns null if this is an "empty"
      *         address in a list.
      * @exception AddressException
      */
     private List parseSingleAddress(final TokenStream tokens, final boolean inGroup) throws AddressException
     {
         final List parsedAddresses = new ArrayList();

         // index markers for personal information
         AddressToken personalStart = null;
         AddressToken personalEnd = null;

         // and similar bits for the address information.
         AddressToken addressStart = null;
         AddressToken addressEnd = null;

         // there is a fall-back set of rules allowed that will parse the address as a set of blank delimited
         // tokens.  However, we do NOT allow this if we encounter any tokens that fall outside of these
         // rules.  For example, comment fields and quoted strings will disallow the very lenient rule set.
         boolean nonStrictRules = true;

         // we don't know the type of address yet
         int addressType = UNKNOWN;

         // the parsing goes in two stages.  Stage one runs through the tokens locating the bounds
         // of the address we're working on, resolving the personal information, and also validating
         // some of the larger scale syntax features of an address (matched delimiters for routes and
         // groups, invalid nesting checks, etc.).

         // get the next token from the queue and save this.  We're going to scan ahead a bit to
         // figure out what type of address we're looking at, then reset to do the actually parsing
         // once we've figured out a form.
         final AddressToken first = tokens.nextToken();
         // push it back on before starting processing.
         tokens.pushToken(first);

         // scan ahead for a trigger token that tells us what we've got.
         while (addressType == UNKNOWN) {

             final AddressToken token = tokens.nextToken();
             switch (token.type) {
                 // skip these for now...after we've processed everything and found that this is a simple
                 // address form, then we'll check for a leading comment token in the first position and use
                 // if as personal information.
                 case COMMENT:
                     // comments do, however, denote that this must be parsed according to RFC822 rules.
                     nonStrictRules = false;
                     break;

                 // a semi-colon when processing a group is an address terminator.  we need to
                 // process this like a comma then
                 case SEMICOLON:
                     if (inGroup) {
                         // we need to push the terminator back on for the caller to see.
                         tokens.pushToken(token);
                         // if we've not tagged any tokens as being the address beginning, so this must be a
                         // null address.
                         if (addressStart == null) {
                             // just return the empty list from this.
                             return parsedAddresses;
                         }
                         // the end token is the back part.
                         addressEnd = tokens.previousToken(token);
                         // without a '<' for a route addr, we can't distinguish address tokens from personal data.
                         // We'll use a leading comment, if there is one.
                         personalStart = null;
                         // this is just a simple form.
                         addressType = SIMPLE_ADDR;
                         break;
                     }

                 // NOTE:  The above falls through if this is not a group.

                 // any of these tokens are a real token that can be the start of an address.  Many of
                 // them are not valid as first tokens in this context, but we flag them later if validation
                 // has been requested.  For now, we just mark these as the potential address start.
                 case DOMAIN_LITERAL:
                 case QUOTED_LITERAL:
                     // this set of tokens require fuller RFC822 parsing, so turn off the flag.
                     nonStrictRules = false;

                 case ATOM:
                 case AT_SIGN:
                 case PERIOD:
                     // if we're not determined the start of the address yet, then check to see if we
                     // need to consider this the personal start.
                     if (addressStart == null) {
                         if (personalStart == null) {
                             personalStart = token;
                         }
                         // This is the first real token of the address, which at this point can
                         // be either the personal info or the first token of the address.  If we hit
                         // an address terminator without encountering either a route trigger or group
                         // trigger, then this is the real address.
                         addressStart = token;
                     }
                     break;

                 // a LEFT_ANGLE indicates we have a full RFC822 mailbox form.  The leading phrase
                 // is the personal info.  The address is inside the brackets.
                 case LEFT_ANGLE:
                     // a route address automatically switches off the blank-delimited token mode.
                     nonStrictRules = false;
                     // this is a route address
                     addressType = ROUTE_ADDR;
                     // the address is placed in the InternetAddress object without the route
                     // brackets, so our start is one past this.
                     addressStart = tokens.nextRealToken();
                     // push this back on the queue so the scanner picks it up properly.
                     tokens.pushToken(addressStart);
                     // make sure we flag the end of the personal section too.
                     if (personalStart != null) {
                         personalEnd = tokens.previousToken(token);
                     }
                     // scan the rest of a route address.
                     addressEnd = scanRouteAddress(tokens, false);
                     break;

                 // a COLON indicates this is a group specifier...parse the group.
                 case COLON:
                     // Colons would not be valid in simple lists, so turn it off.
                     nonStrictRules = false;
                     // if we're scanning a group, we shouldn't encounter a ":".  This is a
                     // recursion error if found.
                     if (inGroup) {
                         illegalAddress("Nested group element", token);
                     }
                     addressType = GROUP_ADDR;
                     // groups don't have any personal sections.
                     personalStart = null;
                     // our real start was back at the beginning
                     addressStart = first;
                     addressEnd = scanGroupAddress(tokens);
                     break;

                 // a semi colon can the same as a comma if we're processing a group.


                 // reached the end of string...this might be a null address, or one of the very simple name
                 // forms used for non-strict RFC822 versions.  Reset, and try that form
                 case END_OF_TOKENS:
                     // if we're scanning a group, we shouldn't encounter an end token.  This is an
                     // error if found.
                     if (inGroup) {
                         illegalAddress("Missing ';'", token);
                     }

                     // NOTE:  fall through from above.

                 // this is either a terminator for an address list or a a group terminator.
                 case COMMA:
                     // we need to push the terminator back on for the caller to see.
                     tokens.pushToken(token);
                     // if we've not tagged any tokens as being the address beginning, so this must be a
                     // null address.
                     if (addressStart == null) {
                         // just return the empty list from this.
                         return parsedAddresses;
                     }
                     // the end token is the back part.
                     addressEnd = tokens.previousToken(token);
                     // without a '<' for a route addr, we can't distinguish address tokens from personal data.
                     // We'll use a leading comment, if there is one.
                     personalStart = null;
                     // this is just a simple form.
                     addressType = SIMPLE_ADDR;
                     break;

                 // right angle tokens are pushed, because parsing of the bracketing is not necessarily simple.
                 // we need to flag these here.
                 case RIGHT_ANGLE:
                     illegalAddress("Unexpected '>'", token);

             }
         }

         String personal = null;

         // if we have personal data, then convert it to a string value.
         if (personalStart != null) {
             final TokenStream personalTokens = tokens.section(personalStart, personalEnd);
             personal = personalToString(personalTokens);
         }
         // if we have a simple address, then check the first token to see if it's a comment.  For simple addresses,
         // we'll accept the first comment token as the personal information.
         else {
             if (addressType == SIMPLE_ADDR && first.type == COMMENT) {
                 personal = first.value;
             }
         }

         final TokenStream addressTokens = tokens.section(addressStart, addressEnd);

         // if this is one of the strictly RFC822 types, then we always validate the address.  If this is a
         // a simple address, then we only validate if strict parsing rules are in effect or we've been asked
         // to validate.
         if (validationLevel != PARSE_HEADER) {
             switch (addressType) {
                 case GROUP_ADDR:
                     validateGroup(addressTokens);
                     break;

                 case ROUTE_ADDR:
                     validateRouteAddr(addressTokens, false);
                     break;

                 case SIMPLE_ADDR:
                     // this is a conditional validation
                     validateSimpleAddress(addressTokens);
                     break;
             }
         }

         // more complex addresses and addresses containing tokens other than just simple addresses
         // need proper handling.
         if (validationLevel != NONSTRICT || addressType != SIMPLE_ADDR || !nonStrictRules) {
             // we might have traversed this already when we validated, so reset the
             // position before using this again.
             addressTokens.reset();
             final String address = addressToString(addressTokens);

             // get the parsed out sections as string values.
             final InternetAddress result = new InternetAddress();
             result.setAddress(address);
             try {
                 result.setPersonal(personal);
             } catch (final UnsupportedEncodingException e) {
             }
             // even though we have a single address, we return this as an array.  Simple addresses
             // can be produce an array of items, so we need to return everything.
             parsedAddresses.add(result);
             return parsedAddresses;
         }
         else {
             addressTokens.reset();

             TokenStream nextAddress = addressTokens.getBlankDelimitedToken();
             while (nextAddress != null) {
                 final String address = addressToString(nextAddress);
                 // get the parsed out sections as string values.
                 final InternetAddress result = new InternetAddress();
                 result.setAddress(address);
                 parsedAddresses.add(result);
                 nextAddress = addressTokens.getBlankDelimitedToken();
             }
             return parsedAddresses;
         }
     }


     /**
      * Scan the token stream, parsing off a route addr spec.  This
      * will do some basic syntax validation, but will not actually
      * validate any of the address information.  Comments will be
      * discarded.
      *
      * @param tokens The stream of tokens.
      *
      * @return The last token of the route address (the one preceeding the
      *         terminating '>'.
      */
     private AddressToken scanRouteAddress(final TokenStream tokens, final boolean inGroup) throws AddressException {
         // get the first token and ensure we have something between the "<" and ">".
         AddressToken token = tokens.nextRealToken();
         // the last processed non-whitespace token, which is the actual address end once the
         // right angle bracket is encountered.

         AddressToken previous = null;

         // if this route-addr has route information, the first token after the '<' must be a '@'.
         // this determines if/where a colon or comma can appear.
         boolean inRoute = token.type == AT_SIGN;

         // now scan until we reach the terminator.  The only validation is done on illegal characters.
         while (true) {
             switch (token.type) {
                 // The following tokens are all valid between the brackets, so just skip over them.
                 case ATOM:
                 case QUOTED_LITERAL:
                 case DOMAIN_LITERAL:
                 case PERIOD:
                 case AT_SIGN:
                     break;

                 case COLON:
                     // if not processing route information, this is illegal.
                     if (!inRoute) {
                         illegalAddress("Unexpected ':'", token);
                     }
                     // this is the end of the route information, the rules now change.
                     inRoute = false;
                     break;

                 case COMMA:
                     // if not processing route information, this is illegal.
                     if (!inRoute) {
                         illegalAddress("Unexpected ','", token);
                     }
                     break;

                 case RIGHT_ANGLE:
                     // if previous is null, we've had a route address which is "<>".  That's illegal.
                     if (previous == null) {
                         illegalAddress("Illegal address", token);
                     }
                     // step to the next token..this had better be either a comma for another address or
                     // the very end of the address list .
                     token = tokens.nextRealToken();
                     // if we're scanning part of a group, then the allowed terminators are either ',' or ';'.
                     if (inGroup) {
                         if (token.type != COMMA && token.type != SEMICOLON) {
                             illegalAddress("Illegal address", token);
                         }
                     }
                     // a normal address should have either a ',' for a list or the end.
                     else {
                         if (token.type != COMMA && token.type != END_OF_TOKENS) {
                             illegalAddress("Illegal address", token);
                         }
                     }
                     // we need to push the termination token back on.
                     tokens.pushToken(token);
                     // return the previous token as the updated position.
                     return previous;

                 case END_OF_TOKENS:
                     illegalAddress("Missing '>'", token);

                 // now for the illegal ones in this context.
                 case SEMICOLON:
                     illegalAddress("Unexpected ';'", token);

                 case LEFT_ANGLE:
                     illegalAddress("Unexpected '<'", token);
             }
             // remember the previous token.
             previous = token;
             token = tokens.nextRealToken();
         }
     }


     /**
      * Scan the token stream, parsing off a group address.  This
      * will do some basic syntax validation, but will not actually
      * validate any of the address information.  Comments will be
      * ignored.
      *
      * @param tokens The stream of tokens.
      *
      * @return The last token of the group address (the terminating ':").
      */
     private AddressToken scanGroupAddress(final TokenStream tokens) throws AddressException {
         // A group does not require that there be anything between the ':' and ';".  This is
         // just a group with an empty list.
         AddressToken token = tokens.nextRealToken();

         // now scan until we reach the terminator.  The only validation is done on illegal characters.
         while (true) {
             switch (token.type) {
                 // The following tokens are all valid in group addresses, so just skip over them.
                 case ATOM:
                 case QUOTED_LITERAL:
                 case DOMAIN_LITERAL:
                 case PERIOD:
                 case AT_SIGN:
                 case COMMA:
                     break;

                 case COLON:
                      illegalAddress("Nested group", token);

                 // route address within a group specifier....we need to at least verify the bracket nesting
                 // and higher level syntax of the route.
                 case LEFT_ANGLE:
                     scanRouteAddress(tokens, true);
                     break;

                 // the only allowed terminator is the ';'
                 case END_OF_TOKENS:
                     illegalAddress("Missing ';'", token);

                 // now for the illegal ones in this context.
                 case SEMICOLON:
                     // verify there's nothing illegal after this.
                     final AddressToken next = tokens.nextRealToken();
                     if (next.type != COMMA && next.type != END_OF_TOKENS) {
                         illegalAddress("Illegal address", token);
                     }
                     // don't forget to put this back on...our caller will need it.
                     tokens.pushToken(next);
                     return token;

                 case RIGHT_ANGLE:
                     illegalAddress("Unexpected '>'", token);
             }
             token = tokens.nextRealToken();
         }
     }


     /**
      * Parse the provided internet address into a set of tokens.  This
      * phase only does a syntax check on the tokens.  The interpretation
      * of the tokens is the next phase.
      *
      * @exception AddressException
      */
     private TokenStream tokenizeAddress() throws AddressException {

         // get a list for the set of tokens
         final TokenStream tokens = new TokenStream();

         end = addresses.length();    // our parsing end marker

         // now scan along the string looking for the special characters in an internet address.
         while (moreCharacters()) {
             final char ch = currentChar();

             switch (ch) {
                 // start of a comment bit...ignore everything until we hit a closing paren.
                 case '(':
                     scanComment(tokens);
                     break;
                 // a closing paren found outside of normal processing.
                 case ')':
                     syntaxError("Unexpected ')'", position);


                 // start of a quoted string
                 case '"':
                     scanQuotedLiteral(tokens);
                     break;
                 // domain literal
                 case '[':
                     scanDomainLiteral(tokens);
                     break;

                 // a naked closing bracket...not valid except as part of a domain literal.
                 case ']':
                     syntaxError("Unexpected ']'", position);

                 // special character delimiters
                 case '<':
                     tokens.addToken(new AddressToken(LEFT_ANGLE, position));
                     nextChar();
                     break;

                 // a naked closing bracket...not valid without a starting one, but
                 // we need to handle this in context.
                 case '>':
                     tokens.addToken(new AddressToken(RIGHT_ANGLE, position));
                     nextChar();
                     break;
                 case ':':
                     tokens.addToken(new AddressToken(COLON, position));
                     nextChar();
                     break;
                 case ',':
                     tokens.addToken(new AddressToken(COMMA, position));
                     nextChar();
                     break;
                 case '.':
                     tokens.addToken(new AddressToken(PERIOD, position));
                     nextChar();
                     break;
                 case ';':
                     tokens.addToken(new AddressToken(SEMICOLON, position));
                     nextChar();
                     break;
                 case '@':
                     tokens.addToken(new AddressToken(AT_SIGN, position));
                     nextChar();
                     break;

                 // white space characters.  These are mostly token delimiters, but there are some relaxed
                 // situations where they get processed, so we need to add a white space token for the first
                 // one we encounter in a span.
                 case ' ':
                 case '\t':
                 case '\r':
                 case '\n':
                     // add a single white space token
                     tokens.addToken(new AddressToken(WHITESPACE, position));

                     nextChar();
                     // step over any space characters, leaving us positioned either at the end
                     // or the first
                     while (moreCharacters()) {
                         final char nextChar = currentChar();
                         if (nextChar == ' ' || nextChar == '\t' || nextChar == '\r' || nextChar == '\n') {
                             nextChar();
                         }
                         else {
                             break;
                         }
                     }
                     break;

                 // potentially an atom...if it starts with an allowed atom character, we
                 // parse out the token, otherwise this is invalid.
                 default:
                     if (ch < 040 || ch >= 0177) {
                         syntaxError("Illegal character in address", position);
                     }

                     scanAtom(tokens);
                     break;
             }
         }

         // for this end marker, give an end position.
         tokens.addToken(new AddressToken(END_OF_TOKENS, addresses.length()));
         return tokens;
     }


     /**
      * Step to the next character position while parsing.
      */
     private void nextChar() {
         position++;
     }


     /**
      * Retrieve the character at the current parsing position.
      *
      * @return The current character.
      */
     private char currentChar() {
         return addresses.charAt(position);
     }

     /**
      * Test if there are more characters left to parse.
      *
      * @return True if we've hit the last character, false otherwise.
      */
     private boolean moreCharacters() {
         return position < end;
     }


     /**
      * Parse a quoted string as specified by the RFC822 specification.
      *
      * @param tokens The TokenStream where the parsed out token is added.
      */
     private void scanQuotedLiteral(final TokenStream tokens) throws AddressException {
         final StringBuffer value = new StringBuffer();

         // step over the quote delimiter.
         nextChar();

         while (moreCharacters()) {
             final char ch = currentChar();

             // is this an escape char?
             if (ch == '\\') {
                 // step past this, and grab the following character
                 nextChar();
                 if (!moreCharacters()) {
                     syntaxError("Missing '\"'", position);
                 }
                 value.append(currentChar());
             }
             // end of the string?
             else if (ch == '"') {
                 // return the constructed string.
                 tokens.addToken(new AddressToken(value.toString(), QUOTED_LITERAL, position));
                 // step over the close delimiter for the benefit of the next token.
                 nextChar();
                 return;
             }
             // the RFC822 spec disallows CR characters.
             else if (ch == '\r') {
                 syntaxError("Illegal line end in literal", position);
             }
             else
             {
                 value.append(ch);
             }
             nextChar();
         }
         // missing delimiter
         syntaxError("Missing '\"'", position);
     }


     /**
      * Parse a domain literal as specified by the RFC822 specification.
      *
      * @param tokens The TokenStream where the parsed out token is added.
      */
     private void scanDomainLiteral(final TokenStream tokens) throws AddressException {
         final StringBuffer value = new StringBuffer();

         final int startPosition = position;
         // step over the quote delimiter.
         nextChar();

         while (moreCharacters()) {
             final char ch = currentChar();

             // is this an escape char?
             if (ch == '\\') {
                 // because domain literals don't get extra escaping, we render them
                 // with the escaped characters intact.  Therefore, append the '\' escape
                 // first, then append the escaped character without examination.
                 value.append(currentChar());
                 // step past this, and grab the following character
                 nextChar();
                 if (!moreCharacters()) {
                     syntaxError("Missing '\"'", position);
                 }
                 value.append(currentChar());
             }
             // end of the string?
             else if (ch == ']') {
                 // return the constructed string.
                 tokens.addToken(new AddressToken(value.toString(), DOMAIN_LITERAL, startPosition));
                 // step over the close delimiter for the benefit of the next token.
                 nextChar();
                 return;
             }
             // the RFC822 spec says no nesting
             else if (ch == '[') {
                 syntaxError("Unexpected '['", position);
             }
             // carriage returns are similarly illegal.
             else if (ch == '\r') {
                 syntaxError("Illegal line end in domain literal", position);
             }
             else
             {
                 value.append(ch);
             }
             nextChar();
         }
         // missing delimiter
         syntaxError("Missing ']'", position);
     }

     /**
      * Scan an atom in an internet address, using the RFC822 rules
      * for atom delimiters.
      *
      * @param tokens The TokenStream where the parsed out token is added.
      */
     private void scanAtom(final TokenStream tokens) throws AddressException {
         final int start = position;
         nextChar();
         while (moreCharacters()) {

             final char ch = currentChar();
             if (isAtom(ch)) {
                 nextChar();
             }
             else {
                 break;
             }
         }

         // return the scanned part of the string.
         tokens.addToken(new AddressToken(addresses.substring(start, position), ATOM, start));
     }


     /**
      * Parse an internet address comment field as specified by
      * RFC822.  Includes support for quoted characters and nesting.
      *
      * @param tokens The TokenStream where the parsed out token is added.
      */
     private void scanComment(final TokenStream tokens) throws AddressException {
         final StringBuffer value = new StringBuffer();

         final int startPosition = position;
         // step past the start character
         nextChar();

         // we're at the top nesting level on the comment.
         int nest = 1;

         // scan while we have more characters.
         while (moreCharacters()) {
             final char ch = currentChar();
             // escape character?
             if (ch == '\\') {
                 // step over this...if escaped, we must have at least one more character
                 // in the string.
                 nextChar();
                 if (!moreCharacters()) {
                     syntaxError("Missing ')'", position);
                 }
                 value.append(currentChar());
             }
             // nested comment?
             else if (ch == '(') {
                 // step the nesting level...we treat the comment as a single unit, with the delimiters
                 // for the nested comments embedded in the middle
                 nest++;
                 value.append(ch);
             }
             // is this the comment close?
             else if (ch == ')') {
                 // reduce the nesting level.  If we still have more to process, add the delimiter character
                 // and keep going.
                 nest--;
                 if (nest > 0) {
                     value.append(ch);
                 }
                 else {
                     // step past this and return.  The outermost comment delimiter is not included in
                     // the string value, since this is frequently used as personal data on the
                     // InternetAddress objects.
                     nextChar();
                     tokens.addToken(new AddressToken(value.toString(), COMMENT, startPosition));
                     return;
                 }
             }
             else if (ch == '\r') {
                 syntaxError("Illegal line end in comment", position);
             }
             else {
                 value.append(ch);
             }
             // step to the next character.
             nextChar();
         }
         // ran out of data before seeing the closing bit, not good
         syntaxError("Missing ')'", position);
     }


     /**
      * Validate the syntax of an RFC822 group internet address specification.
      *
      * @param tokens The stream of tokens for the address.
      *
      * @exception AddressException
      */
     private void validateGroup(final TokenStream tokens) throws AddressException {
         // we know already this is an address in the form "phrase:group;".  Now we need to validate the
         // elements.

         int phraseCount = 0;

         AddressToken token = tokens.nextRealToken();
         // now scan to the semi color, ensuring we have only word or comment tokens.
         while (token.type != COLON) {
             // only these tokens are allowed here.
             if (token.type != ATOM && token.type != QUOTED_LITERAL) {
                 invalidToken(token);
             }
             phraseCount++;
             token = tokens.nextRealToken();
         }


         // RFC822 groups require a leading phrase in group specifiers.
         if (phraseCount == 0) {
             illegalAddress("Missing group identifier phrase", token);
         }

         // now we do the remainder of the parsing using the initial phrase list as the sink...the entire
         // address will be converted to a string later.

         // ok, we only know this has been valid up to the ":", now we have some real checks to perform.
         while (true) {
             // go scan off a mailbox.  if everything goes according to plan, we should be positioned at either
             // a comma or a semicolon.
             validateGroupMailbox(tokens);

             token = tokens.nextRealToken();

             // we're at the end of the group.  Make sure this is truely the end.
             if (token.type == SEMICOLON) {
                 token = tokens.nextRealToken();
                 if (token.type != END_OF_TOKENS) {
                     illegalAddress("Illegal group address", token);
                 }
                 return;
             }

             // if not a semicolon, this better be a comma.
             else if (token.type != COMMA) {
                 illegalAddress("Illegal group address", token);
             }
         }
     }


     /**
      * Validate the syntax of single mailbox within a group address.
      *
      * @param tokens The stream of tokens representing the address.
      *
      * @exception AddressException
      */
     private void validateGroupMailbox(final TokenStream tokens) throws AddressException {
         final AddressToken first = tokens.nextRealToken();
         // is this just a null address in the list?  then push the terminator back and return.
         if (first.type == COMMA || first.type == SEMICOLON) {
             tokens.pushToken(first);
             return;
         }

         // now we need to scan ahead to see if we can determine the type.
         AddressToken token = first;


         // we need to scan forward to figure out what sort of address this is.
         while (first != null) {
             switch (token.type) {
                 // until we know the context, these are all just ignored.
                 case QUOTED_LITERAL:
                 case ATOM:
                     break;

                 // a LEFT_ANGLE indicates we have a full RFC822 mailbox form.  The leading phrase
                 // is the personal info.  The address is inside the brackets.
                 case LEFT_ANGLE:
                     tokens.pushToken(first);
                     validatePhrase(tokens, false);
                     validateRouteAddr(tokens, true);
                     return;

                 // we've hit a period as the first non-word token.  This should be part of a local-part
                 // of an address.
                 case PERIOD:
                 // we've hit an "@" as the first non-word token.  This is probably a simple address in
                 // the form "user@domain".
                 case AT_SIGN:
                     tokens.pushToken(first);
                     validateAddressSpec(tokens);
                     return;

                 // reached the end of string...this might be a null address, or one of the very simple name
                 // forms used for non-strict RFC822 versions.  Reset, and try that form
                 case COMMA:
                 // this is the end of the group...handle it like a comma for now.
                 case SEMICOLON:
                     tokens.pushToken(first);
                     validateAddressSpec(tokens);
                     return;

                 case END_OF_TOKENS:
                     illegalAddress("Missing ';'", token);

             }
             token = tokens.nextRealToken();
         }
     }


     /**
      * Utility method for throwing an AddressException caused by an
      * unexpected primitive token.
      *
      * @param token  The token causing the problem (must not be a value type token).
      *
      * @exception AddressException
      */
     private void invalidToken(final AddressToken token) throws AddressException {
         illegalAddress("Unexpected '" + token.type + "'", token);
     }


     /**
      * Raise an error about illegal syntax.
      *
      * @param message  The message used in the thrown exception.
      * @param position The parsing position within the string.
      *
      * @exception AddressException
      */
     private void syntaxError(final String message, final int position) throws AddressException
     {
         throw new AddressException(message, addresses, position);
     }


     /**
      * Throw an exception based on the position of an invalid token.
      *
      * @param message The exception message.
      * @param token   The token causing the error.  This tokens position is used
      *                in the exception information.
      */
     private void illegalAddress(final String message, final AddressToken token) throws AddressException {
         throw new AddressException(message, addresses, token.position);
     }


     /**
      * Validate that a required phrase exists.
      *
      * @param tokens   The set of tokens to validate. positioned at the phrase start.
      * @param required A flag indicating whether the phrase is optional or required.
      *
      * @exception AddressException
      */
     private void validatePhrase(final TokenStream tokens, final boolean required) throws AddressException {
         // we need to have at least one WORD token in the phrase...everything is optional
         // after that.
         AddressToken token = tokens.nextRealToken();
         if (token.type != ATOM && token.type != QUOTED_LITERAL) {
             if (required) {
                 illegalAddress("Missing group phrase", token);
             }
         }

         // now scan forward to the end of the phrase
         token = tokens.nextRealToken();
         while (token.type == ATOM || token.type == QUOTED_LITERAL) {
             token = tokens.nextRealToken();
         }
     }


     /**
      * validate a routeaddr specification
      *
      * @param tokens  The tokens representing the address portion (personal information
      *                already removed).
      * @param ingroup true indicates we're validating a route address inside a
      *                group list.  false indicates we're validating a standalone
      *                address.
      *
      * @exception AddressException
      */
     private void validateRouteAddr(final TokenStream tokens, final boolean ingroup) throws AddressException {
         // get the next real token.
         AddressToken token = tokens.nextRealToken();
         // if this is an at sign, then we have a list of domains to parse.
         if (token.type == AT_SIGN) {
             // push the marker token back in for the route parser, and step past that part.
             tokens.pushToken(token);
             validateRoute(tokens);
         }
         else {
             // we need to push this back on to validate the local part.
             tokens.pushToken(token);
         }

         // now we expect to see an address spec.
         validateAddressSpec(tokens);

         token = tokens.nextRealToken();
         if (ingroup) {
             // if we're validating within a group specification, the angle brackets are still there (and
             // required).
             if (token.type != RIGHT_ANGLE) {
                 illegalAddress("Missing '>'", token);
             }
         }
         else {
             // the angle brackets were removed to make this an address, so we should be done.  Make sure we
             // have a terminator here.
             if (token.type != END_OF_TOKENS) {
                 illegalAddress("Illegal Address", token);
             }
         }
     }


     /**
      * Validate a simple address in the form "user@domain".
      *
      * @param tokens The stream of tokens representing the address.
      */
     private void validateSimpleAddress(final TokenStream tokens) throws AddressException {

         // the validation routines occur after addresses have been split into
         // personal and address forms.  Therefore, our validation begins directly
         // with the first token.
         validateAddressSpec(tokens);

         // get the next token and see if there is something here...anything but the terminator is an error
         final AddressToken token = tokens.nextRealToken();
         if (token.type != END_OF_TOKENS) {
             illegalAddress("Illegal Address", token);
         }
     }

     /**
      * Validate the addr-spec portion of an address.  RFC822 requires
      * this be of the form "local-part@domain".  However, javamail also
      * allows simple address of the form "local-part".  We only require
      * the domain if an '@' is encountered.
      *
      * @param tokens
      */
     private void validateAddressSpec(final TokenStream tokens) throws AddressException {
         // all addresses, even the simple ones, must have at least a local part.
         validateLocalPart(tokens);

         // now see if we have a domain portion to look at.
         final AddressToken token = tokens.nextRealToken();
         if (token.type == AT_SIGN) {
             validateDomain(tokens);
         }
         else {
             // put this back for termination
             tokens.pushToken(token);
         }

     }


     /**
      * Validate the route portion of a route-addr.  This is a list
      * of domain values in the form 1#("@" domain) ":".
      *
      * @param tokens The token stream holding the address information.
      */
     private void validateRoute(final TokenStream tokens) throws AddressException {
         while (true) {
             final AddressToken token = tokens.nextRealToken();
             // if this is the first part of the list, go parse off a domain
             if (token.type == AT_SIGN) {
                 validateDomain(tokens);
             }
             // another element in the list?  Go around again
             else if (token.type == COMMA) {
                 continue;
             }
             // the list is terminated by a colon...stop this part of the validation once we hit one.
             else if (token.type == COLON) {
                 return;
             }
             // the list is terminated by a colon.  If this isn't one of those, we have an error.
             else {
                 illegalAddress("Missing ':'", token);
             }
         }
     }


     /**
      * Parse the local part of an address spec.  The local part
      * is a series of "words" separated by ".".
      */
     private void validateLocalPart(final TokenStream tokens) throws AddressException {
         while (true) {
             // get the token.
             AddressToken token = tokens.nextRealToken();

             // this must be either an atom or a literal.
             if (token.type != ATOM && token.type != QUOTED_LITERAL) {
                 illegalAddress("Invalid local part", token);
             }

             // get the next token (white space and comments ignored)
             token = tokens.nextRealToken();
             // if this is a period, we continue parsing
             if (token.type != PERIOD) {
                 tokens.pushToken(token);
                 // return the token
                 return;
             }
         }
     }


     /**
      * Parse a domain name of the form sub-domain *("." sub-domain).
      * a sub-domain is either an atom or a domain-literal.
      */
     private void validateDomain(final TokenStream tokens) throws AddressException {
         while (true) {
             // get the token.
             AddressToken token = tokens.nextRealToken();

             // this must be either an atom or a domain literal.
             if (token.type != ATOM && token.type != DOMAIN_LITERAL) {
                 illegalAddress("Invalid domain", token);
             }

             // get the next token (white space is ignored)
             token = tokens.nextRealToken();
             // if this is a period, we continue parsing
             if (token.type != PERIOD) {
                 // return the token
                 tokens.pushToken(token);
                 return;
             }
         }
     }

     /**
      * Convert a list of word tokens into a phrase string.  The
      * rules for this are a little hard to puzzle out, but there
      * is a logic to it.  If the list is empty, the phrase is
      * just a null value.
      *
      * If we have a phrase, then the quoted strings need to
      * handled appropriately.  In multi-token phrases, the
      * quoted literals are concatenated with the quotes intact,
      * regardless of content.  Thus a phrase that comes in like this:
      *
      * "Geronimo" Apache
      *
      * gets converted back to the same string.
      *
      * If there is just a single token in the phrase, AND the token
      * is a quoted string AND the string does not contain embedded
      * special characters ("\.,@<>()[]:;), then the phrase
      * is expressed as an atom.  Thus the literal
      *
      *    "Geronimo"
      *
      * becomes
      *
      *    Geronimo
      *
      * but
      *
      *    "(Geronimo)"
      *
      * remains
      *
      *    "(Geronimo)"
      *
      * Note that we're generating a canonical form of the phrase,
      * which removes comments and reduces linear whitespace down
      * to a single separator token.
      *
      * @param phrase An array list of phrase tokens (which may be empty).
      */
     private String personalToString(final TokenStream tokens) {

         // no tokens in the stream?  This is a null value.
         AddressToken token = tokens.nextToken();

         if (token.type == END_OF_TOKENS) {
             return null;
         }

         final AddressToken next = tokens.nextToken();

         // single element phrases get special treatment.
         if (next.type == END_OF_TOKENS) {
             // this can be used directly...if it contains special characters, quoting will be
             // performed when it's converted to a string value.
             return token.value;
         }

         // reset to the beginning
         tokens.pushToken(token);

         // have at least two tokens,
         final StringBuffer buffer = new StringBuffer();

         // get the first token.  After the first, we add these as blank delimited values.
         token = tokens.nextToken();
         addTokenValue(token, buffer);

         token = tokens.nextToken();
         while (token.type != END_OF_TOKENS) {
             // add a blank separator
             buffer.append(' ');
             // now add the next tokens value
             addTokenValue(token, buffer);
             token = tokens.nextToken();
         }
         // and return the canonicalized value
         return buffer.toString();
     }


     /**
      * take a canonicalized set of address tokens and reformat it back into a string value,
      * inserting whitespace where appropriate.
      *
      * @param tokens The set of tokens representing the address.
      *
      * @return The string value of the tokens.
      */
     private String addressToString(final TokenStream tokens) {
         final StringBuffer buffer = new StringBuffer();

         // this flag controls whether we insert a blank delimiter between tokens as
         // we advance through the list.  Blanks are only inserted between consequtive value tokens.
         // Initially, this is false, then we flip it to true whenever we add a value token, and
         // back to false for any special character token.
         boolean spaceRequired = false;

         // we use nextToken rather than nextRealToken(), since we need to process the comments also.
         AddressToken token = tokens.nextToken();

         // now add each of the tokens
         while (token.type != END_OF_TOKENS) {
             switch (token.type) {
                 // the word tokens are the only ones where we need to worry about adding
                 // whitespace delimiters.
                 case ATOM:
                 case QUOTED_LITERAL:
                     // was the last token also a word?  Insert a blank first.
                     if (spaceRequired) {
                         buffer.append(' ');
                     }
                     addTokenValue(token, buffer);
                     // let the next iteration know we just added a word to the list.
                     spaceRequired = true;
                     break;

                 // these special characters are just added in.  The constants for the character types
                 // were carefully selected to be the character value in question.  This allows us to
                 // just append the value.
                 case LEFT_ANGLE:
                 case RIGHT_ANGLE:
                 case COMMA:
                 case COLON:
                 case AT_SIGN:
                 case SEMICOLON:
                 case PERIOD:
                     buffer.append((char)token.type);
                     // no spaces around specials
                     spaceRequired = false;
                     break;

                 // Domain literals self delimiting...we can just append them and turn off the space flag.
                 case DOMAIN_LITERAL:
                     addTokenValue(token, buffer);
                     spaceRequired = false;
                     break;

                 // Comments are also self delimitin.
                 case COMMENT:
                     addTokenValue(token, buffer);
                     spaceRequired = false;
                     break;
             }
             token = tokens.nextToken();
         }
         return buffer.toString();
     }


     /**
      * Append a value token on to a string buffer used to create
      * the canonicalized string value.
      *
      * @param token  The token we're adding.
      * @param buffer The target string buffer.
      */
     private void addTokenValue(final AddressToken token, final StringBuffer buffer) {
         // atom values can be added directly.
         if (token.type == ATOM) {
             buffer.append(token.value);
         }
         // a literal value?  Add this as a quoted string
         else if (token.type == QUOTED_LITERAL) {
             buffer.append(formatQuotedString(token.value));
         }
         // could be a domain literal of the form "[value]"
         else if (token.type == DOMAIN_LITERAL) {
             buffer.append('[');
             buffer.append(token.value);
             buffer.append(']');
         }
         // comments also have values
         else if (token.type == COMMENT) {
             buffer.append('(');
             buffer.append(token.value);
             buffer.append(')');
         }
     }


     private static final byte[] CHARMAP = {
         0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,  0x06, 0x02, 0x06, 0x02, 0x02, 0x06, 0x02, 0x02,
         0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,  0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
         0x04, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00,  0x01, 0x01, 0x00, 0x00, 0x01, 0x00, 0x01, 0x00,
         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,  0x00, 0x00, 0x01, 0x01, 0x01, 0x00, 0x01, 0x00,

         0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,  0x00, 0x00, 0x00, 0x01, 0x01, 0x01, 0x00, 0x00,
         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02,
     };

     private static final byte FLG_SPECIAL = 1;
     private static final byte FLG_CONTROL = 2;

     /**
      * Quick test to see if a character is an allowed atom character
      * or not.
      *
      * @param ch     The test character.
      *
      * @return true if this character is allowed in atoms, false for any
      *         control characters, special characters, or blanks.
      */
     public static boolean isAtom(final char ch) {
         if (ch > '\u007f') {
             return false;
         }
         else if (ch == ' ') {
             return false;
         }
         else {
             return (CHARMAP[ch] & (FLG_SPECIAL | FLG_CONTROL)) == 0;
         }
     }

     /**
      * Tests one string to determine if it contains any of the
      * characters in a supplied test string.
      *
      * @param s      The string we're testing.
      * @param chars  The set of characters we're testing against.
      *
      * @return true if any of the characters is found, false otherwise.
      */
     public static boolean containsCharacters(final String s, final String chars)
     {
         for (int i = 0; i < s.length(); i++) {
             if (chars.indexOf(s.charAt(i)) >= 0) {
                 return true;
             }
         }
         return false;
     }


     /**
      * Tests if a string contains any non-special characters that
      * would require encoding the value as a quoted string rather
      * than a simple atom value.
      *
      * @param s      The test string.
      *
      * @return True if the string contains only blanks or allowed atom
      *         characters.
      */
     public static boolean containsSpecials(final String s)
     {
         for (int i = 0; i < s.length(); i++) {
             final char ch = s.charAt(i);
             // must be either a blank or an allowed atom char.
             if (ch == ' ' || isAtom(ch)) {
                 continue;
             }
             else {
                 return true;
             }
         }
         return false;
     }


     /**
      * Tests if a string contains any non-special characters that
      * would require encoding the value as a quoted string rather
      * than a simple atom value.
      *
      * @param s      The test string.
      *
      * @return True if the string contains only blanks or allowed atom
      *         characters.
      */
     public static boolean isAtom(final String s)
     {
         for (int i = 0; i < s.length(); i++) {
             final char ch = s.charAt(i);
             // must be an allowed atom character
             if (!isAtom(ch)) {
                 return false;
             }
         }
         return true;
     }

     /**
      * Apply RFC822 quoting rules to a literal string value.  This
      * will search the string to see if there are any characters that
      * require special escaping, and apply the escapes.  If the
      * string is just a string of blank-delimited atoms, the string
      * value is returned without quotes.
      *
      * @param s      The source string.
      *
      * @return A version of the string as a valid RFC822 quoted literal.
      */
     public static String quoteString(final String s) {

         // only backslash and double quote require escaping.  If the string does not
         // contain any of these, then we can just slap on some quotes and go.
         if (s.indexOf('\\') == -1 && s.indexOf('"') == -1) {
             // if the string is an atom (or a series of blank-delimited atoms), we can just return it directly.
             if (!containsSpecials(s)) {
                 return s;
             }
             final StringBuffer buffer = new StringBuffer(s.length() + 2);
             buffer.append('"');
             buffer.append(s);
             buffer.append('"');
             return buffer.toString();
         }

         // get a buffer sufficiently large for the string, two quote characters, and a "reasonable"
         // number of escaped values.
         final StringBuffer buffer = new StringBuffer(s.length() + 10);
         buffer.append('"');

         // now check all of the characters.
         for (int i = 0; i < s.length(); i++) {
             final char ch = s.charAt(i);
             // character requiring escaping?
             if (ch == '\\' || ch == '"') {
                 // add an extra backslash
                 buffer.append('\\');
             }
             // and add on the character
             buffer.append(ch);
         }
         buffer.append('"');
         return buffer.toString();
     }

     /**
      * Apply RFC822 quoting rules to a literal string value.  This
      * will search the string to see if there are any characters that
      * require special escaping, and apply the escapes.  The returned
      * value is enclosed in quotes.
      *
      * @param s      The source string.
      *
      * @return A version of the string as a valid RFC822 quoted literal.
      */
     public static String formatQuotedString(final String s) {
         // only backslash and double quote require escaping.  If the string does not
         // contain any of these, then we can just slap on some quotes and go.
         if (s.indexOf('\\') == -1 && s.indexOf('"') == -1) {
             final StringBuffer buffer = new StringBuffer(s.length() + 2);
             buffer.append('"');
             buffer.append(s);
             buffer.append('"');
             return buffer.toString();
         }

         // get a buffer sufficiently large for the string, two quote characters, and a "reasonable"
         // number of escaped values.
         final StringBuffer buffer = new StringBuffer(s.length() + 10);
         buffer.append('"');

         // now check all of the characters.
         for (int i = 0; i < s.length(); i++) {
             final char ch = s.charAt(i);
             // character requiring escaping?
             if (ch == '\\' || ch == '"') {
                 // add an extra backslash
                 buffer.append('\\');
             }
             // and add on the character
             buffer.append(ch);
         }
         buffer.append('"');
         return buffer.toString();
     }

     public class TokenStream {
         // the set of tokens in the parsed address list, as determined by RFC822 syntax rules.
         private final List tokens;

         // the current token position
         int currentToken = 0;


         /**
          * Default constructor for a TokenStream.  This creates an
          * empty TokenStream for purposes of tokenizing an address.
          * It is the creator's responsibility to terminate the stream
          * with a terminator token.
          */
         public TokenStream() {
             tokens = new ArrayList();
         }


         /**
          * Construct a TokenStream from a list of tokens.  A terminator
          * token is added to the end.
          *
          * @param tokens An existing token list.
          */
         public TokenStream(final List tokens) {
             this.tokens = tokens;
             tokens.add(new AddressToken(END_OF_TOKENS, -1));
         }

         /**
          * Add an address token to the token list.
          *
          * @param t      The new token to add to the list.
          */
         public void addToken(final AddressToken token) {
             tokens.add(token);
         }

         /**
          * Get the next token at the cursor position, advancing the
          * position accordingly.
          *
          * @return The token at the current token position.
          */
         public AddressToken nextToken() {
             AddressToken token = (AddressToken)tokens.get(currentToken++);
             // we skip over white space tokens when operating in this mode, so
             // check the token and iterate until we get a non-white space.
             while (token.type == WHITESPACE) {
                 token = (AddressToken)tokens.get(currentToken++);
             }
             return token;
         }


         /**
          * Get the next token at the cursor position, without advancing the
          * position.
          *
          * @return The token at the current token position.
          */
         public AddressToken currentToken() {
             // return the current token and step the cursor
             return (AddressToken)tokens.get(currentToken);
         }


         /**
          * Get the next non-comment token from the string.  Comments are ignored, except as personal information
          * for very simple address specifications.
          *
          * @return A token guaranteed not to be a whitespace token.
          */
         public AddressToken nextRealToken()
         {
             AddressToken token = nextToken();
             if (token.type == COMMENT) {
                 token = nextToken();
             }
             return token;
         }

         /**
          * Push a token back on to the queue, making the index of this
          * token the current cursor position.
          *
          * @param token  The token to push.
          */
         public void pushToken(final AddressToken token) {
             // just reset the cursor to the token's index position.
             currentToken = tokenIndex(token);
         }

         /**
          * Get the next token after a given token, without advancing the
          * token position.
          *
          * @param token  The token we're retrieving a token relative to.
          *
          * @return The next token in the list.
          */
         public AddressToken nextToken(final AddressToken token) {
             return (AddressToken)tokens.get(tokenIndex(token) + 1);
         }


         /**
          * Return the token prior to a given token.
          *
          * @param token  The token used for the index.
          *
          * @return The token prior to the index token in the list.
          */
         public AddressToken previousToken(final AddressToken token) {
             return (AddressToken)tokens.get(tokenIndex(token) - 1);
         }


         /**
          * Retrieve a token at a given index position.
          *
          * @param index  The target index.
          */
         public AddressToken getToken(final int index)
         {
             return (AddressToken)tokens.get(index);
         }


         /**
          * Retrieve the index of a particular token in the stream.
          *
          * @param token  The target token.
          *
          * @return The index of the token within the stream.  Returns -1 if this
          *         token is somehow not in the stream.
          */
         public int tokenIndex(final AddressToken token) {
             return tokens.indexOf(token);
         }


         /**
          * Extract a new TokenStream running from the start token to the
          * token preceeding the end token.
          *
          * @param start  The starting token of the section.
          * @param end    The last token (+1) for the target section.
          *
          * @return A new TokenStream object for processing this section of tokens.
          */
         public TokenStream section(final AddressToken start, final AddressToken end) {
             final int startIndex = tokenIndex(start);
             final int endIndex = tokenIndex(end);

             // List.subList() returns a list backed by the original list.  Since we need to add a
             // terminator token to this list when we take the sublist, we need to manually copy the
             // references so we don't end up munging the original list.
             final ArrayList list = new ArrayList(endIndex - startIndex + 2);

             for (int i = startIndex; i <= endIndex; i++) {
                 list.add(tokens.get(i));
             }
             return new TokenStream(list);
         }


         /**
          * Reset the token position back to the beginning of the
          * stream.
          */
         public void reset() {
             currentToken = 0;
         }

         /**
          * Scan forward looking for a non-blank token.
          *
          * @return The first non-blank token in the stream.
          */
         public AddressToken getNonBlank()
         {
             AddressToken token = currentToken();
             while (token.type == WHITESPACE) {
                 currentToken++;
                 token = currentToken();
             }
             return token;
         }


         /**
          * Extract a blank delimited token from a TokenStream.  A blank
          * delimited token is the set of tokens up to the next real whitespace
          * token (comments not included).
          *
          * @return A TokenStream object with the new set of tokens.
          */
         public TokenStream getBlankDelimitedToken()
         {
             // get the next non-whitespace token.
             final AddressToken first = getNonBlank();
             // if this is the end, we return null.
             if (first.type == END_OF_TOKENS) {
                 return null;
             }

             AddressToken last = first;

             // the methods for retrieving tokens skip over whitespace, so we're going to process this
             // by index.
             currentToken++;

             AddressToken token = currentToken();
             while (true) {
                 // if this is our marker, then pluck out the section and return it.
                 if (token.type == END_OF_TOKENS || token.type == WHITESPACE) {
                     return section(first, last);
                 }
                 last = token;
                 currentToken++;
                 // we accept any and all tokens here.
                 token = currentToken();
             }
         }

         /**
          * Return the index of the current cursor position.
          *
          * @return The integer index of the current token.
          */
         public int currentIndex() {
             return currentToken;
         }

         public void dumpTokens()
         {
             System.out.println(">>>>>>>>> Start dumping TokenStream tokens");
             for (int i = 0; i < tokens.size(); i++) {
                 System.out.println("-------- Token: " + tokens.get(i));
             }

             System.out.println("++++++++ cursor position=" + currentToken);
             System.out.println(">>>>>>>>> End dumping TokenStream tokens");
         }
     }


     /**
      * Simple utility class for representing address tokens.
      */
     public class AddressToken {

         // the token type
         int type;

         // string value of the token (can be null)
         String value;

         // position of the token within the address string.
         int position;

         AddressToken(final int type, final int position)
         {
             this.type = type;
             this.value = null;
             this.position = position;
         }

         AddressToken(final String value, final int type, final int position)
         {
             this.type = type;
             this.value = value;
             this.position = position;
         }

         @Override
         public String toString()
         {
             if (type == END_OF_TOKENS) {
                 return "AddressToken:  type=END_OF_TOKENS";
             }
             if (value == null) {
                 return "AddressToken:  type=" + (char)type;
             }
             else {
                 return "AddressToken:  type=" + (char)type + " value=" + value;
             }
         }
     }
 }