| /* |
| * Copyright 1999-2004 The Apache Software Foundation. |
| * |
| * Licensed under the Apache License, Version 2.0 (the "License"); |
| * you may not use this file except in compliance with the License. |
| * You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| package org.apache.cocoon.util; |
| |
| import java.util.Enumeration; |
| import java.util.NoSuchElementException; |
| |
| /** |
| * Replacement for StringTokenizer in java.util, beacuse of bug in the |
| * Sun's implementation. |
| * |
| * @author <A HREF="mailto:moravek@pobox.sk">Peter Moravek</A> |
| * @version CVS $Id: Tokenizer.java,v 1.2 2004/03/05 13:03:00 bdelacretaz Exp $ |
| */ |
| public class Tokenizer implements Enumeration { |
| |
| /** |
| * Constructs a string tokenizer for the specified string. All characters |
| * in the delim argument are the delimiters for separating tokens. |
| * If the returnTokens flag is true, then the delimiter characters are |
| * also returned as tokens. Each delimiter is returned as a string of |
| * length one. If the flag is false, the delimiter characters are skipped |
| * and only serve as separators between tokens. |
| * |
| * @param str a string to be parsed |
| * @param delim the delimiters |
| * @param returnTokens flag indicating whether to return the delimiters |
| * as tokens |
| */ |
| public Tokenizer(String str, String delim, boolean returnTokens) { |
| this.str = str; |
| this.delim = delim; |
| this.returnTokens = returnTokens; |
| |
| max = str.length(); |
| } |
| |
| /** |
| * Constructs a string tokenizer for the specified string. The characters |
| * in the delim argument are the delimiters for separating tokens. |
| * Delimiter characters themselves will not be treated as tokens. |
| * |
| * @param str a string to be parsed |
| * @param delim the delimiters |
| */ |
| public Tokenizer(String str, String delim) { |
| this(str, delim, false); |
| } |
| |
| /** |
| * Constructs a string tokenizer for the specified string. The character |
| * in the delim argument is the delimiter for separating tokens. |
| * Delimiter character themselves will not be treated as token. |
| * |
| * @param str a string to be parsed |
| * @param delim the delimiter |
| */ |
| public Tokenizer(String str, char delim) { |
| this(str, String.valueOf(delim), false); |
| } |
| |
| /** |
| * Constructs a string tokenizer for the specified string. The tokenizer |
| * uses the default delimiter set, which is " \t\n\r\f": the space |
| * character, the tab character, the newline character, the carriage-return |
| * character, and the form-feed character. Delimiter characters themselves |
| * will not be treated as tokens. |
| * |
| * @param str a string to be parsed |
| */ |
| public Tokenizer(String str) { |
| this(str, DEFAULT_DELIMITERS, false); |
| } |
| |
| /** |
| * Tests if there are more tokens available from this tokenizer's string. |
| * If this method returns true, then a subsequent call to nextToken with |
| * no argument will successfully return a token. |
| * |
| * @return true if and only if there is at least one token in the string |
| * after the current position; false otherwise. |
| */ |
| public boolean hasMoreTokens() { |
| return ((current < max) ? (true) : |
| (((current == max) && (max == 0 |
| || (returnTokens && delim.indexOf(str.charAt(previous)) >= 0))))); |
| } |
| |
| /** |
| * Returns the next token from this string tokenizer. |
| * |
| * @return the next token from this string tokenizer |
| * |
| * @exception NoSuchElementException if there are no more tokens in this |
| * tokenizer's string |
| */ |
| public String nextToken() throws NoSuchElementException { |
| if (current == max |
| && (max == 0 |
| || (returnTokens && delim.indexOf(str.charAt(previous)) >= 0))) { |
| |
| current++; |
| return new String(); |
| } |
| |
| if (current >= max) |
| throw new NoSuchElementException(); |
| |
| int start = current; |
| String result = null; |
| |
| if (delim.indexOf(str.charAt(start)) >= 0) { |
| if (previous == -1 || (returnTokens && previous != current |
| && delim.indexOf(str.charAt(previous)) >= 0)) { |
| |
| result = new String(); |
| } |
| else if (returnTokens) |
| result = str.substring(start, ++current); |
| |
| if (!returnTokens) |
| current++; |
| } |
| |
| previous = start; |
| start = current; |
| |
| if (result == null) |
| while (current < max && delim.indexOf(str.charAt(current)) < 0) |
| current++; |
| |
| return result == null ? str.substring(start, current) : result; |
| } |
| |
| /** |
| * Returns the next token in this string tokenizer's string. First, the |
| * set of characters considered to be delimiters by this Tokenizer |
| * object is changed to be the characters in the string delim. |
| * Then the next token in the string after the current position is |
| * returned. The current position is advanced beyond the recognized token. |
| * The new delimiter set remains the default after this call. |
| * |
| * @param delim the new delimiters |
| * |
| * @return the next token, after switching to the new delimiter set |
| * |
| * @exception NoSuchElementException if there are no more tokens in this |
| * tokenizer's string. |
| */ |
| public String nextToken(String delim) throws NoSuchElementException { |
| this.delim = delim; |
| return nextToken(); |
| } |
| |
| /** |
| * Returns the same value as the hasMoreTokens method. It exists so that |
| * this class can implement the Enumeration interface. |
| * |
| * @return true if there are more tokens; false otherwise. |
| */ |
| public boolean hasMoreElements() { |
| return hasMoreTokens(); |
| } |
| |
| /** |
| * Returns the same value as the nextToken method, except that its |
| * declared return value is Object rather than String. It exists so that |
| * this class can implement the Enumeration interface. |
| * |
| * @return the next token in the string |
| * |
| * @exception NoSuchElementException if there are no more tokens in this |
| * tokenizer's string |
| */ |
| public Object nextElement() { |
| return nextToken(); |
| } |
| |
| /** |
| * Calculates the number of times that this tokenizer's nextToken method |
| * can be called before it generates an exception. The current position |
| * is not advanced. |
| * |
| * @return the number of tokens remaining in the string using the |
| * current delimiter set |
| */ |
| public int countTokens() { |
| int curr = current; |
| int count = 0; |
| |
| for (int i = curr; i < max; i++) { |
| if (delim.indexOf(str.charAt(i)) >= 0) |
| count++; |
| |
| curr++; |
| } |
| |
| return count + (returnTokens ? count : 0) + 1; |
| } |
| |
| /** |
| * Resets this tokenizer's state so the tokenizing starts from the begin. |
| */ |
| public void reset() { |
| previous = -1; |
| current = 0; |
| } |
| |
| /** |
| * Constructs a string tokenizer for the specified string. All characters |
| * in the delim argument are the delimiters for separating tokens. |
| * If the returnTokens flag is true, then the delimiter characters are |
| * also returned as tokens. Each delimiter is returned as a string of |
| * length one. If the flag is false, the delimiter characters are skipped |
| * and only serve as separators between tokens. Then tokenizes the str |
| * and return an String[] array with tokens. |
| * |
| * @param str a string to be parsed |
| * @param delim the delimiters |
| * @param returnTokens flag indicating whether to return the delimiters |
| * as tokens |
| * |
| * @return array with tokens |
| */ |
| public static String[] tokenize(String str, String delim, |
| boolean returnTokens) { |
| |
| Tokenizer tokenizer = new Tokenizer(str, delim, returnTokens); |
| String[] tokens = new String[tokenizer.countTokens()]; |
| |
| int i = 0; |
| while (tokenizer.hasMoreTokens()) { |
| tokens[i] = tokenizer.nextToken(); |
| i++; |
| } |
| |
| return tokens; |
| } |
| |
| /** |
| * Default delimiters "\t\n\r\f": |
| * the space character, the tab character, the newline character, |
| * the carriage-return character, and the form-feed character. |
| */ |
| public static final String DEFAULT_DELIMITERS = " \t\n\r\f"; |
| |
| /** |
| * String to tokenize. |
| */ |
| private String str = null; |
| |
| /** |
| * Delimiters. |
| */ |
| private String delim = null; |
| |
| /** |
| * Flag indicating whether to return the delimiters as tokens. |
| */ |
| private boolean returnTokens = false; |
| |
| /** |
| * Previous token start. |
| */ |
| private int previous = -1; |
| |
| /** |
| * Current position in str string. |
| */ |
| private int current = 0; |
| |
| /** |
| * Maximal position in str string. |
| */ |
| private int max = 0; |
| } |
| |
| |