blob: ded1c9c2c5673e3585866a8eb4186b95411aebc5 [file]
#if FEATURE_BREAKITERATOR
using System;
namespace Lucene.Net.Support
{
/// <summary>
/// The <code>BreakIterator</code> class implements methods for finding
/// the location of boundaries in text. Instances of <code>BreakIterator</code>
/// maintain a current position and scan over text
/// returning the index of characters where boundaries occur.
/// </summary>
public abstract class BreakIterator
#if FEATURE_CLONEABLE
: ICloneable
#endif
{
/// <summary>
/// Constructor. BreakIterator is stateless and has no default behavior.
/// </summary>
protected BreakIterator()
{
}
/// <summary>
/// Create a copy of this iterator
/// </summary>
/// <returns>A member-wise copy of this</returns>
public object Clone()
{
return MemberwiseClone();
}
/// <summary>
/// DONE is returned by Previous(), Next(), Next(int), Preceding(int)
/// and Following(int) when either the first or last text boundary has been
/// reached.
/// </summary>
public static readonly int DONE = -1;
/// <summary>
/// Returns the first boundary. The iterator's current position is set
/// to the first text boundary.
/// </summary>
/// <returns>The character index of the first text boundary</returns>
public abstract int First();
/// <summary>
/// Returns the last boundary. The iterator's current position is set
/// to the last text boundary.
/// </summary>
/// <returns>The character index of the last text boundary.</returns>
public abstract int Last();
/// <summary>
/// Returns the nth boundary from the current boundary. If either
/// the first or last text boundary has been reached, it returns
/// <see cref="BreakIterator.DONE"/> and the current position is set to either
/// the first or last text boundary depending on which one is reached. Otherwise,
/// the iterator's current position is set to the new boundary.
/// For example, if the iterator's current position is the mth text boundary
/// and three more boundaries exist from the current boundary to the last text
/// boundary, the Next(2) call will return m + 2. The new text position is set
/// to the (m + 2)th text boundary. A Next(4) call would return
/// <see cref="BreakIterator.DONE"/> and the last text boundary would become the
/// new text position.
/// </summary>
/// <param name="n">
/// which boundary to return. A value of 0
/// does nothing. Negative values move to previous boundaries
/// and positive values move to later boundaries.
/// </param>
/// <returns>
/// The character index of the nth boundary from the current position
/// or <see cref="BreakIterator.DONE"/> if either first or last text boundary
/// has been reached.
/// </returns>
public abstract int Next(int n);
/// <summary>
/// Returns the boundary following the current boundary. If the current boundary
/// is the last text boundary, it returns <c>BreakIterator.DONE</c> and
/// the iterator's current position is unchanged. Otherwise, the iterator's
/// current position is set to the boundary following the current boundary.
/// </summary>
/// <returns>
/// The character index of the next text boundary or
/// <see cref="BreakIterator.DONE"/> if the current boundary is the last text
/// boundary.
/// Equivalent to Next(1).
/// </returns>
/// <seealso cref="Next(int)"/>
public abstract int Next();
/// <summary>
/// Returns the boundary preceding the current boundary. If the current boundary
/// is the first text boundary, it returns <code>BreakIterator.DONE</code> and
/// the iterator's current position is unchanged. Otherwise, the iterator's
/// current position is set to the boundary preceding the current boundary.
/// </summary>
/// <returns>
/// The character index of the previous text boundary or
/// <see cref="BreakIterator.DONE"/> if the current boundary is the first text
/// boundary.
/// </returns>
public abstract int Previous();
/// <summary>
/// Returns the first boundary following the specified character offset. If the
/// specified offset equals to the last text boundary, it returns
/// <see cref="BreakIterator.DONE"/> and the iterator's current position is unchanged.
/// Otherwise, the iterator's current position is set to the returned boundary.
/// The value returned is always greater than the offset or the value
/// <see cref="BreakIterator.DONE"/>.
/// </summary>
/// <param name="offset">the character offset to begin scanning.</param>
/// <returns>
/// The first boundary after the specified offset or
/// <see cref="BreakIterator.DONE"/> if the last text boundary is passed in
/// as the offset.
/// </returns>
/// <exception cref="ArgumentException">
/// if the specified offset is less than
/// the first text boundary or greater than the last text boundary.
/// </exception>
public abstract int Following(int offset);
/// <summary>
/// Returns the last boundary preceding the specified character offset. If the
/// specified offset equals to the first text boundary, it returns
/// <see cref="BreakIterator.DONE"/> and the iterator's current position is unchanged.
/// Otherwise, the iterator's current position is set to the returned boundary.
/// The value returned is always less than the offset or the value
/// <see cref="BreakIterator.DONE"/>.
/// </summary>
/// <param name="offset">the character offset to begin scanning.</param>
/// <returns>
/// The last boundary before the specified offset or
/// <see cref="BreakIterator.DONE"/> if the first text boundary is passed in
/// as the offset.
/// </returns>
public abstract int Preceding(int offset);
//{
// // NOTE: This implementation is here solely because we can't add new
// // abstract methods to an existing class. There is almost ALWAYS a
// // better, faster way to do this.
// int pos = Following(offset);
// while (pos >= offset && pos != DONE)
// {
// pos = Previous();
// }
// return pos;
//}
/// <summary>
/// Returns true if the specified character offset is a text boundary.
/// </summary>
/// <param name="offset">the character offset to check.</param>
/// <returns><c>true</c> if "offset" is a boundary position, <c>false</c> otherwise.</returns>
/// <exception cref="ArgumentException">
/// if the specified offset is less than
/// the first text boundary or greater than the last text boundary.
/// </exception>
public abstract bool IsBoundary(int offset);
//{
// // NOTE: This implementation probably is wrong for most situations
// // because it fails to take into account the possibility that a
// // CharacterIterator passed to setText() may not have a begin offset
// // of 0. But since the abstract BreakIterator doesn't have that
// // knowledge, it assumes the begin offset is 0. If you subclass
// // BreakIterator, copy the SimpleTextBoundary implementation of this
// // function into your subclass. [This should have been abstract at
// // this level, but it's too late to fix that now.]
// if (offset == 0)
// {
// return true;
// }
// int boundary = Following(offset - 1);
// if (boundary == DONE)
// {
// throw new ArgumentException();
// }
// return boundary == offset;
//}
/// <summary>
/// Returns character index of the text boundary that was most
/// recently returned by Next(), Next(int), Previous(), First(), Last(),
/// Following(int) or Preceding(int). If any of these methods returns
/// <see cref="BreakIterator.DONE"/> because either first or last text boundary
/// has been reached, it returns the first or last text boundary depending on
/// which one is reached.
/// </summary>
/// <returns>
/// The text boundary returned from the above methods, first or last
/// text boundary.
/// </returns>
/// <seealso cref="Next()"/>
/// <seealso cref="Next(int)"/>
/// <seealso cref="Previous()"/>
/// <seealso cref="First()"/>
/// <seealso cref="Last()"/>
/// <seealso cref="Following(int)"/>
/// <seealso cref="Preceding(int)"/>
public abstract int Current { get; }
/// <summary>
/// Get the text being scanned
/// </summary>
/// <returns>the text being scanned</returns>
//public abstract CharacterIterator GetText();
public abstract string Text { get; }
/// <summary>
/// Set a new text string to be scanned. The current scan
/// position is reset to First().
/// </summary>
/// <param name="newText">new text to scan.</param>
public virtual void SetText(string newText)
{
SetText(new StringCharacterIterator(newText));
}
/// <summary>
/// Set a new text string to be scanned. The current scan
/// position is reset to First().
/// </summary>
/// <param name="newText">new text to scan.</param>
public abstract void SetText(CharacterIterator newText);
}
}
#endif