| /* |
| * |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, |
| * software distributed under the License is distributed on an |
| * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| * KIND, either express or implied. See the License for the |
| * specific language governing permissions and limitations |
| * under the License. |
| * |
| */ |
| |
| using System; |
| using System.Collections; |
| using System.Collections.Generic; |
| using System.Linq; |
| using System.Text; |
| |
| namespace Lucene.Net.Search.Highlight.Test |
| { |
| /// <summary> |
| /// The class performs token processing in strings |
| /// </summary> |
| public class Tokenizer : IEnumerator<string> |
| { |
| /// Position over the string |
| private long currentPos = 0; |
| |
| /// Include demiliters in the results. |
| private bool includeDelims = false; |
| |
| /// Char representation of the String to tokenize. |
| private char[] chars = null; |
| |
| //The tokenizer uses the default delimiter set: the space character, the tab character, the newline character, and the carriage-return character and the form-feed character |
| private string delimiters = " \t\n\r\f"; |
| |
| /// <summary> |
| /// Initializes a new class instance with a specified string to process |
| /// </summary> |
| /// <param name="source">String to tokenize</param> |
| public Tokenizer(System.String source) |
| { |
| this.chars = source.ToCharArray(); |
| } |
| |
| /// <summary> |
| /// Initializes a new class instance with a specified string to process |
| /// and the specified token delimiters to use |
| /// </summary> |
| /// <param name="source">String to tokenize</param> |
| /// <param name="delimiters">String containing the delimiters</param> |
| public Tokenizer(System.String source, System.String delimiters) |
| : this(source) |
| { |
| this.delimiters = delimiters; |
| } |
| |
| |
| /// <summary> |
| /// Initializes a new class instance with a specified string to process, the specified token |
| /// delimiters to use, and whether the delimiters must be included in the results. |
| /// </summary> |
| /// <param name="source">String to tokenize</param> |
| /// <param name="delimiters">String containing the delimiters</param> |
| /// <param name="includeDelims">Determines if delimiters are included in the results.</param> |
| public Tokenizer(System.String source, System.String delimiters, bool includeDelims) |
| : this(source, delimiters) |
| { |
| this.includeDelims = includeDelims; |
| } |
| |
| |
| /// <summary> |
| /// Returns the next token from the token list |
| /// </summary> |
| /// <returns>The string value of the token</returns> |
| public System.String NextToken() |
| { |
| return NextToken(this.delimiters); |
| } |
| |
| /// <summary> |
| /// Returns the next token from the source string, using the provided |
| /// token delimiters |
| /// </summary> |
| /// <param name="delimiters">String containing the delimiters to use</param> |
| /// <returns>The string value of the token</returns> |
| public System.String NextToken(System.String delimiters) |
| { |
| //According to documentation, the usage of the received delimiters should be temporary (only for this call). |
| //However, it seems it is not true, so the following line is necessary. |
| this.delimiters = delimiters; |
| |
| //at the end |
| if (this.currentPos == this.chars.Length) |
| throw new System.ArgumentOutOfRangeException(); |
| //if over a delimiter and delimiters must be returned |
| else if ((System.Array.IndexOf(delimiters.ToCharArray(), chars[this.currentPos]) != -1) |
| && this.includeDelims) |
| return "" + this.chars[this.currentPos++]; |
| //need to get the token wo delimiters. |
| else |
| return NextToken(delimiters.ToCharArray()); |
| } |
| |
| //Returns the nextToken wo delimiters |
| private System.String NextToken(char[] delimiters) |
| { |
| string token = ""; |
| long pos = this.currentPos; |
| |
| //skip possible delimiters |
| while (System.Array.IndexOf(delimiters, this.chars[currentPos]) != -1) |
| //The last one is a delimiter (i.e there is no more tokens) |
| if (++this.currentPos == this.chars.Length) |
| { |
| this.currentPos = pos; |
| throw new System.ArgumentOutOfRangeException(); |
| } |
| |
| //getting the token |
| while (System.Array.IndexOf(delimiters, this.chars[this.currentPos]) == -1) |
| { |
| token += this.chars[this.currentPos]; |
| //the last one is not a delimiter |
| if (++this.currentPos == this.chars.Length) |
| break; |
| } |
| return token; |
| } |
| |
| |
| /// <summary> |
| /// Determines if there are more tokens to return from the source string |
| /// </summary> |
| /// <returns>True or false, depending if there are more tokens</returns> |
| public bool HasMoreTokens() |
| { |
| //keeping the current pos |
| long pos = this.currentPos; |
| |
| try |
| { |
| this.NextToken(); |
| } |
| catch (System.ArgumentOutOfRangeException) |
| { |
| return false; |
| } |
| finally |
| { |
| this.currentPos = pos; |
| } |
| return true; |
| } |
| |
| /// <summary> |
| /// Remaining tokens count |
| /// </summary> |
| public int Count |
| { |
| get |
| { |
| //keeping the current pos |
| long pos = this.currentPos; |
| int i = 0; |
| |
| try |
| { |
| while (true) |
| { |
| this.NextToken(); |
| i++; |
| } |
| } |
| catch (System.ArgumentOutOfRangeException) |
| { |
| this.currentPos = pos; |
| return i; |
| } |
| } |
| } |
| |
| /// <summary> |
| /// Performs the same action as NextToken. |
| /// </summary> |
| public string Current |
| { |
| get { return this.NextToken(); } |
| } |
| |
| /// <summary> |
| /// Performs the same action as NextToken. |
| /// </summary> |
| object IEnumerator.Current |
| { |
| get { return Current; } |
| } |
| |
| /// <summary> |
| // Performs the same action as HasMoreTokens. |
| /// </summary> |
| /// <returns>True or false, depending if there are more tokens</returns> |
| public bool MoveNext() |
| { |
| return this.HasMoreTokens(); |
| } |
| |
| /// <summary> |
| /// Does nothing. |
| /// </summary> |
| public void Reset() |
| { |
| } |
| |
| /// <summary> |
| /// Does nothing. |
| /// </summary> |
| public void Dispose() |
| { |
| |
| } |
| } |
| } |