modules/text/src/main/java/java/text/BreakIterator.java - harmony-classlib - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
  * The ASF licenses this file to You under the Apache License, Version 2.0
  * (the "License"); you may not use this file except in compliance with
  * the License.  You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */

 package java.text;

 import java.util.Locale;

 import org.apache.harmony.text.internal.nls.Messages;

 /**
  * Locates boundaries in text. This class defines a protocol for objects that
  * break up a piece of natural-language text according to a set of criteria.
  * Instances or subclasses of {@code BreakIterator} can be provided, for
  * example, to break a piece of text into words, sentences, or logical
  * characters according to the conventions of some language or group of
  * languages. We provide four built-in types of {@code BreakIterator}:
  * <ul>
  * <li>{@link #getSentenceInstance()} returns a {@code BreakIterator} that
  * locates boundaries between sentences. This is useful for triple-click
  * selection, for example.</li>
  * <li>{@link #getWordInstance()} returns a {@code BreakIterator} that locates
  * boundaries between words. This is useful for double-click selection or "find
  * whole words" searches. This type of {@code BreakIterator} makes sure there is
  * a boundary position at the beginning and end of each legal word (numbers
  * count as words, too). Whitespace and punctuation are kept separate from real
  * words.</li>
  * <li>{@code getLineInstance()} returns a {@code BreakIterator} that locates
  * positions where it is legal for a text editor to wrap lines. This is similar
  * to word breaking, but not the same: punctuation and whitespace are generally
  * kept with words (you don't want a line to start with whitespace, for
  * example), and some special characters can force a position to be considered a
  * line break position or prevent a position from being a line break position.</li>
  * <li>{@code getCharacterInstance()} returns a {@code BreakIterator} that
  * locates boundaries between logical characters. Because of the structure of
  * the Unicode encoding, a logical character may be stored internally as more
  * than one Unicode code point. (A with an umlaut may be stored as an a followed
  * by a separate combining umlaut character, for example, but the user still
  * thinks of it as one character.) This iterator allows various processes
  * (especially text editors) to treat as characters the units of text that a
  * user would think of as characters, rather than the units of text that the
  * computer sees as "characters".</li>
  * </ul> {@code BreakIterator}'s interface follows an "iterator" model (hence
  * the name), meaning it has a concept of a "current position" and methods like
  * {@code first()}, {@code last()}, {@code next()}, and {@code previous()} that
  * update the current position. All {@code BreakIterator}s uphold the following
  * invariants:
  * <ul>
  * <li>The beginning and end of the text are always treated as boundary
  * positions.</li>
  * <li>The current position of the iterator is always a boundary position
  * (random- access methods move the iterator to the nearest boundary position
  * before or after the specified position, not <i>to</i> the specified
  * position).</li>
  * <li>{@code DONE} is used as a flag to indicate when iteration has stopped.
  * {@code DONE} is only returned when the current position is the end of the
  * text and the user calls {@code next()}, or when the current position is the
  * beginning of the text and the user calls {@code previous()}.</li>
  * <li>Break positions are numbered by the positions of the characters that
  * follow them. Thus, under normal circumstances, the position before the first
  * character is 0, the position after the first character is 1, and the position
  * after the last character is 1 plus the length of the string.</li>
  * <li>The client can change the position of an iterator, or the text it
  * analyzes, at will, but cannot change the behavior. If the user wants
  * different behavior, he must instantiate a new iterator.</li>
  * </ul>
  * <p>
  * {@code BreakIterator} accesses the text it analyzes through a
  * {@link CharacterIterator}, which makes it possible to use {@code
  * BreakIterator} to analyze text in any text-storage vehicle that provides a
  * {@code CharacterIterator} interface.
  * <p>
  * <em>Note:</em> Some types of {@code BreakIterator} can take a long time to
  * create, and instances of {@code BreakIterator} are not currently cached by
  * the system. For optimal performance, keep instances of {@code BreakIterator}
  * around as long as it makes sense. For example, when word-wrapping a document,
  * don't create and destroy a new {@code BreakIterator} for each line. Create
  * one break iterator for the whole document (or whatever stretch of text you're
  * wrapping) and use it to do the whole job of wrapping the text.
  * <p>
  * <em>Examples</em>:
  * <p>
  * Creating and using text boundaries:
  * <blockquote>
  *
  * <pre>
  * public static void main(String args[]) {
  *     if (args.length == 1) {
  *         String stringToExamine = args[0];
  *         //print each word in order
  *         BreakIterator boundary = BreakIterator.getWordInstance();
  *         boundary.setText(stringToExamine);
  *         printEachForward(boundary, stringToExamine);
  *         //print each sentence in reverse order
  *         boundary = BreakIterator.getSentenceInstance(Locale.US);
  *         boundary.setText(stringToExamine);
  *         printEachBackward(boundary, stringToExamine);
  *         printFirst(boundary, stringToExamine);
  *         printLast(boundary, stringToExamine);
  *     }
  * }
  * </pre>
  *
  * </blockquote>
  * <p>
  * Print each element in order:
  * <blockquote>
  *
  * <pre>
  * public static void printEachForward(BreakIterator boundary, String source) {
  *     int start = boundary.first();
  *     for (int end = boundary.next(); end != BreakIterator.DONE; start = end, end = boundary.next()) {
  *         System.out.println(source.substring(start, end));
  *     }
  * }
  * </pre>
  *
  * </blockquote>
  * <p>
  * Print each element in reverse order:
  * <blockquote>
  *
  * <pre>
  * public static void printEachBackward(BreakIterator boundary, String source) {
  *     int end = boundary.last();
  *     for (int start = boundary.previous(); start != BreakIterator.DONE; end = start, start = boundary
  *             .previous()) {
  *         System.out.println(source.substring(start, end));
  *     }
  * }
  * </pre>
  *
  * </blockquote>
  * <p>
  * Print the first element:
  * <blockquote>
  *
  * <pre>
  * public static void printFirst(BreakIterator boundary, String source) {
  *     int start = boundary.first();
  *     int end = boundary.next();
  *     System.out.println(source.substring(start, end));
  * }
  * </pre>
  *
  * </blockquote>
  * <p>
  * Print the last element:
  * <blockquote>
  *
  * <pre>
  * public static void printLast(BreakIterator boundary, String source) {
  *     int end = boundary.last();
  *     int start = boundary.previous();
  *     System.out.println(source.substring(start, end));
  * }
  * </pre>
  *
  * </blockquote>
  * <p>
  * Print the element at a specified position:
  * <blockquote>
  *
  * <pre>
  * public static void printAt(BreakIterator boundary, int pos, String source) {
  *     int end = boundary.following(pos);
  *     int start = boundary.previous();
  *     System.out.println(source.substring(start, end));
  * }
  * </pre>
  *
  * </blockquote>
  * <p>
  * Find the next word:
  * <blockquote>
  *
  * <pre>
  * public static int nextWordStartAfter(int pos, String text) {
  *     BreakIterator wb = BreakIterator.getWordInstance();
  *     wb.setText(text);
  *     int last = wb.following(pos);
  *     int current = wb.next();
  *     while (current != BreakIterator.DONE) {
  *         for (int p = last; p &lt; current; p++) {
  *             if (Character.isLetter(text.charAt(p)))
  *                 return last;
  *         }
  *         last = current;
  *         current = wb.next();
  *     }
  *     return BreakIterator.DONE;
  * }
  * </pre>
  *
  * </blockquote>
  * <p>
  * The iterator returned by {@code BreakIterator.getWordInstance()} is unique in
  * that the break positions it returns don't represent both the start and end of
  * the thing being iterated over. That is, a sentence-break iterator returns
  * breaks that each represent the end of one sentence and the beginning of the
  * next. With the word-break iterator, the characters between two boundaries
  * might be a word, or they might be the punctuation or whitespace between two
  * words. The above code uses a simple heuristic to determine which boundary is
  * the beginning of a word: If the characters between this boundary and the next
  * boundary include at least one letter (this can be an alphabetical letter, a
  * CJK ideograph, a Hangul syllable, a Kana character, etc.), then the text
  * between this boundary and the next is a word; otherwise, it's the material
  * between words.)
  *
  * @see CharacterIterator
  */
 public abstract class BreakIterator implements Cloneable {

     /**
      * This constant is returned by iterate methods like {@code previous()} or
      * {@code next()} if they have returned all valid boundaries.
      */
     public static final int DONE = -1;

     private static final int LONG_LENGTH = 8;

     private static final int INT_LENGTH = 4;

     private static final int SHORT_LENGTH = 2;

     // the wrapped ICU implementation
     com.ibm.icu.text.BreakIterator wrapped;

     /**
      * Default constructor, just for invocation by subclass.
      */
     protected BreakIterator() {
         super();
     }

     /*
      * wrapping constructor
      */
     BreakIterator(com.ibm.icu.text.BreakIterator iterator) {
         wrapped = iterator;
     }

     /**
      * Returns all supported locales in an array.
      *
      * @return all supported locales.
      */
     public static Locale[] getAvailableLocales() {
         return com.ibm.icu.text.BreakIterator.getAvailableLocales();
     }

     /**
      * Returns a new instance of {@code BreakIterator} to iterate over
      * characters using the default locale.
      *
      * @return a new instance of {@code BreakIterator} using the default locale.
      */
     public static BreakIterator getCharacterInstance() {
         return new RuleBasedBreakIterator(com.ibm.icu.text.BreakIterator
                 .getCharacterInstance());
     }

     /**
      * Returns a new instance of {@code BreakIterator} to iterate over
      * characters using the given locale.
      *
      * @param where
      *            the given locale.
      * @return a new instance of {@code BreakIterator} using the given locale.
      */
     public static BreakIterator getCharacterInstance(Locale where) {
         if (where == null) {
             throw new NullPointerException();
         }

         return new RuleBasedBreakIterator(com.ibm.icu.text.BreakIterator
                 .getCharacterInstance(where));
     }

     /**
      * Returns a new instance of {{@code BreakIterator} to iterate over
      * line breaks using the default locale.
      *
      * @return a new instance of {@code BreakIterator} using the default locale.
      */
     public static BreakIterator getLineInstance() {
         return new RuleBasedBreakIterator(com.ibm.icu.text.BreakIterator
                 .getLineInstance());
     }

     /**
      * Returns a new instance of {@code BreakIterator} to iterate over
      * line breaks using the given locale.
      *
      * @param where
      *            the given locale.
      * @return a new instance of {@code BreakIterator} using the given locale.
      * @throws NullPointerException if {@code where} is {@code null}.
      */
     public static BreakIterator getLineInstance(Locale where) {
         if (where == null) {
             throw new NullPointerException();
         }

         return new RuleBasedBreakIterator(com.ibm.icu.text.BreakIterator
                 .getLineInstance(where));
     }

     /**
      * Returns a new instance of {@code BreakIterator} to iterate over
      * sentence-breaks using the default locale.
      *
      * @return a new instance of {@code BreakIterator} using the default locale.
      */
     public static BreakIterator getSentenceInstance() {
         return new RuleBasedBreakIterator(com.ibm.icu.text.BreakIterator
                 .getSentenceInstance());
     }

     /**
      * Returns a new instance of {@code BreakIterator} to iterate over
      * sentence-breaks using the given locale.
      *
      * @param where
      *            the given locale.
      * @return a new instance of {@code BreakIterator} using the given locale.
      * @throws NullPointerException if {@code where} is {@code null}.
      */
     public static BreakIterator getSentenceInstance(Locale where) {
         if (where == null) {
             throw new NullPointerException();
         }

         return new RuleBasedBreakIterator(com.ibm.icu.text.BreakIterator
                 .getSentenceInstance(where));
     }

     /**
      * Returns a new instance of {@code BreakIterator} to iterate over
      * word-breaks using the default locale.
      *
      * @return a new instance of {@code BreakIterator} using the default locale.
      */
     public static BreakIterator getWordInstance() {
         return new RuleBasedBreakIterator(com.ibm.icu.text.BreakIterator
                 .getWordInstance());
     }

     /**
      * Returns a new instance of {@code BreakIterator} to iterate over
      * word-breaks using the given locale.
      *
      * @param where
      *            the given locale.
      * @return a new instance of {@code BreakIterator} using the given locale.
      * @throws NullPointerException if {@code where} is {@code null}.
      */
     public static BreakIterator getWordInstance(Locale where) {
         if (where == null) {
             throw new NullPointerException();
         }

         return new RuleBasedBreakIterator(com.ibm.icu.text.BreakIterator
                 .getWordInstance(where));
     }

     /**
      * Indicates whether the given offset is a boundary position. If this method
      * returns true, the current iteration position is set to the given
      * position; if the function returns false, the current iteration position
      * is set as though {@link #following(int)} had been called.
      *
      * @param offset
      *            the given offset to check.
      * @return {@code true} if the given offset is a boundary position; {@code
      *         false} otherwise.
      */
     public boolean isBoundary(int offset) {
         return wrapped.isBoundary(offset);
     }

     /**
      * Returns the position of last boundary preceding the given offset, and
      * sets the current position to the returned value, or {@code DONE} if the
      * given offset specifies the starting position.
      *
      * @param offset
      *            the given start position to be searched for.
      * @return the position of the last boundary preceding the given offset.
      * @throws IllegalArgumentException
      *            if the offset is invalid.
      */
     public int preceding(int offset) {
         return wrapped.preceding(offset);
     }

     /**
      * Sets the new text string to be analyzed, the current position will be
      * reset to the beginning of this new string, and the old string will be
      * lost.
      *
      * @param newText
      *            the new text string to be analyzed.
      */
     public void setText(String newText) {
         wrapped.setText(newText);
     }

     /**
      * Returns this iterator's current position.
      *
      * @return this iterator's current position.
      */
     public abstract int current();

     /**
      * Sets this iterator's current position to the first boundary and returns
      * that position.
      *
      * @return the position of the first boundary.
      */
     public abstract int first();

     /**
      * Sets the position of the first boundary to the one following the given
      * offset and returns this position. Returns {@code DONE} if there is no
      * boundary after the given offset.
      *
      * @param offset
      *            the given position to be searched for.
      * @return the position of the first boundary following the given offset.
      * @throws IllegalArgumentException
      *            if the offset is invalid.
      */
     public abstract int following(int offset);

     /**
      * Returns a {@code CharacterIterator} which represents the text being
      * analyzed. Please note that the returned value is probably the internal
      * iterator used by this object. If the invoker wants to modify the status
      * of the returned iterator, it is recommended to first create a clone of
      * the iterator returned.
      *
      * @return a {@code CharacterIterator} which represents the text being
      *         analyzed.
      */
     public abstract CharacterIterator getText();

     /**
      * Sets this iterator's current position to the last boundary and returns
      * that position.
      *
      * @return the position of last boundary.
      */
     public abstract int last();

     /**
      * Sets this iterator's current position to the next boundary after the
      * current position, and returns this position. Returns {@code DONE} if no
      * boundary was found after the current position.
      *
      * @return the position of last boundary.
      */
     public abstract int next();

     /**
      * Sets this iterator's current position to the next boundary after the
      * given position, and returns that position. Returns {@code DONE} if no
      * boundary was found after the given position.
      *
      * @param n
      *            the given position.
      * @return the position of last boundary.
      */
     public abstract int next(int n);

     /**
      * Sets this iterator's current position to the previous boundary before the
      * current position and returns that position. Returns {@code DONE} if
      * no boundary was found before the current position.
      *
      * @return the position of last boundary.
      */
     public abstract int previous();

     /**
      * Sets the new text to be analyzed by the given {@code CharacterIterator}.
      * The position will be reset to the beginning of the new text, and other
      * status information of this iterator will be kept.
      *
      * @param newText
      *            the {@code CharacterIterator} referring to the text to be
      *            analyzed.
      */
     public abstract void setText(CharacterIterator newText);

     /**
      * Creates a copy of this iterator, all status information including the
      * current position are kept the same.
      *
      * @return a copy of this iterator.
      */
     @Override
     public Object clone() {
         try {
             BreakIterator cloned = (BreakIterator) super.clone();
             cloned.wrapped = (com.ibm.icu.text.BreakIterator) wrapped.clone();
             return cloned;
         } catch (CloneNotSupportedException e) {
             throw new InternalError(e.getMessage());
         }
     }

     /**
      * Gets a long value from the given byte array, starting from the given
      * offset.
      *
      * @param buf
      *            the bytes to be converted.
      * @param offset
      *            the start position of the conversion.
      * @return the converted long value.
      * @throws NullPointerException
      *             if {@code buf} is {@code null}.
      * @throws ArrayIndexOutOfBoundsException
      *             if {@code offset < 0} or {@code offset + LONG_LENGTH} is
      *             greater than the length of {@code buf}.
      */
     protected static long getLong(byte[] buf, int offset) {
         // Force a buf null check first!
         if (buf.length - offset < LONG_LENGTH || offset < 0) {
             // text.1E=Offset out of bounds \: {0}
             throw new ArrayIndexOutOfBoundsException(Messages.getString("text.1E", offset)); //$NON-NLS-1$
         }
         long result = 0;
         for (int i = offset; i < offset + LONG_LENGTH; i++) {
             result = (result << 8) | (buf[i] & 0xff);
         }
         return result;
     }

     /**
      * Gets an int value from the given byte array, starting from the given
      * offset.
      *
      * @param buf
      *            the bytes to be converted.
      * @param offset
      *            the start position of the conversion.
      * @return the converted int value.
      * @throws NullPointerException
      *             if {@code buf} is {@code null}.
      * @throws ArrayIndexOutOfBoundsException
      *             if {@code offset < 0} or {@code offset + INT_LENGTH} is
      *             greater than the length of {@code buf}.
      */
     protected static int getInt(byte[] buf, int offset) {
         // Force buf null check first!
         if (buf.length - INT_LENGTH < offset || offset < 0) {
             // text.1E=Offset out of bounds \: {0}
             throw new ArrayIndexOutOfBoundsException(Messages.getString("text.1E", offset)); //$NON-NLS-1$
         }
         int result = 0;
         for (int i = offset; i < offset + INT_LENGTH; i++) {
             result = (result << 8) | (buf[i] & 0xff);
         }
         return result;
     }

     /**
      * Gets a short value from the given byte array, starting from the given
      * offset.
      *
      * @param buf
      *            the bytes to be converted.
      * @param offset
      *            the start position of the conversion.
      * @return the converted short value.
      * @throws NullPointerException
      *             if {@code buf} is {@code null}.
      * @throws ArrayIndexOutOfBoundsException
      *             if {@code offset < 0} or {@code offset + SHORT_LENGTH} is
      *             greater than the length of {@code buf}.
      */
     protected static short getShort(byte[] buf, int offset) {
         // Force buf null check first!
         if (buf.length - SHORT_LENGTH < offset || offset < 0) {
             // text.1E=Offset out of bounds \: {0}
             throw new ArrayIndexOutOfBoundsException(Messages.getString("text.1E", offset)); //$NON-NLS-1$
         }
         short result = 0;
         for (int i = offset; i < offset + SHORT_LENGTH; i++) {
             result = (short) ((result << 8) | (buf[i] & 0xff));
         }
         return result;
     }
 }
	/*
	* Licensed to the Apache Software Foundation (ASF) under one or more
	* contributor license agreements. See the NOTICE file distributed with
	* this work for additional information regarding copyright ownership.
	* The ASF licenses this file to You under the Apache License, Version 2.0
	* (the "License"); you may not use this file except in compliance with
	* the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/

	package java.text;

	import java.util.Locale;

	import org.apache.harmony.text.internal.nls.Messages;

	/**
	* Locates boundaries in text. This class defines a protocol for objects that
	* break up a piece of natural-language text according to a set of criteria.
	* Instances or subclasses of {@code BreakIterator} can be provided, for
	* example, to break a piece of text into words, sentences, or logical
	* characters according to the conventions of some language or group of
	* languages. We provide four built-in types of {@code BreakIterator}:
	* <ul>
	* <li>{@link #getSentenceInstance()} returns a {@code BreakIterator} that
	* locates boundaries between sentences. This is useful for triple-click
	* selection, for example.</li>
	* <li>{@link #getWordInstance()} returns a {@code BreakIterator} that locates
	* boundaries between words. This is useful for double-click selection or "find
	* whole words" searches. This type of {@code BreakIterator} makes sure there is
	* a boundary position at the beginning and end of each legal word (numbers
	* count as words, too). Whitespace and punctuation are kept separate from real
	* words.</li>
	* <li>{@code getLineInstance()} returns a {@code BreakIterator} that locates
	* positions where it is legal for a text editor to wrap lines. This is similar
	* to word breaking, but not the same: punctuation and whitespace are generally
	* kept with words (you don't want a line to start with whitespace, for
	* example), and some special characters can force a position to be considered a
	* line break position or prevent a position from being a line break position.</li>
	* <li>{@code getCharacterInstance()} returns a {@code BreakIterator} that
	* locates boundaries between logical characters. Because of the structure of
	* the Unicode encoding, a logical character may be stored internally as more
	* than one Unicode code point. (A with an umlaut may be stored as an a followed
	* by a separate combining umlaut character, for example, but the user still
	* thinks of it as one character.) This iterator allows various processes
	* (especially text editors) to treat as characters the units of text that a
	* user would think of as characters, rather than the units of text that the
	* computer sees as "characters".</li>
	* </ul> {@code BreakIterator}'s interface follows an "iterator" model (hence
	* the name), meaning it has a concept of a "current position" and methods like
	* {@code first()}, {@code last()}, {@code next()}, and {@code previous()} that
	* update the current position. All {@code BreakIterator}s uphold the following
	* invariants:
	* <ul>
	* <li>The beginning and end of the text are always treated as boundary
	* positions.</li>
	* <li>The current position of the iterator is always a boundary position
	* (random- access methods move the iterator to the nearest boundary position
	* before or after the specified position, not <i>to</i> the specified
	* position).</li>
	* <li>{@code DONE} is used as a flag to indicate when iteration has stopped.
	* {@code DONE} is only returned when the current position is the end of the
	* text and the user calls {@code next()}, or when the current position is the
	* beginning of the text and the user calls {@code previous()}.</li>
	* <li>Break positions are numbered by the positions of the characters that
	* follow them. Thus, under normal circumstances, the position before the first
	* character is 0, the position after the first character is 1, and the position
	* after the last character is 1 plus the length of the string.</li>
	* <li>The client can change the position of an iterator, or the text it
	* analyzes, at will, but cannot change the behavior. If the user wants
	* different behavior, he must instantiate a new iterator.</li>
	* </ul>
	* <p>
	* {@code BreakIterator} accesses the text it analyzes through a
	* {@link CharacterIterator}, which makes it possible to use {@code
	* BreakIterator} to analyze text in any text-storage vehicle that provides a
	* {@code CharacterIterator} interface.
	* <p>
	* <em>Note:</em> Some types of {@code BreakIterator} can take a long time to
	* create, and instances of {@code BreakIterator} are not currently cached by
	* the system. For optimal performance, keep instances of {@code BreakIterator}
	* around as long as it makes sense. For example, when word-wrapping a document,
	* don't create and destroy a new {@code BreakIterator} for each line. Create
	* one break iterator for the whole document (or whatever stretch of text you're
	* wrapping) and use it to do the whole job of wrapping the text.
	* <p>
	* <em>Examples</em>:
	* <p>
	* Creating and using text boundaries:
	* <blockquote>
	*
	* <pre>
	* public static void main(String args[]) {
	* if (args.length == 1) {
	* String stringToExamine = args[0];
	* //print each word in order
	* BreakIterator boundary = BreakIterator.getWordInstance();
	* boundary.setText(stringToExamine);
	* printEachForward(boundary, stringToExamine);
	* //print each sentence in reverse order
	* boundary = BreakIterator.getSentenceInstance(Locale.US);
	* boundary.setText(stringToExamine);
	* printEachBackward(boundary, stringToExamine);
	* printFirst(boundary, stringToExamine);
	* printLast(boundary, stringToExamine);
	* }
	* }
	* </pre>
	*
	* </blockquote>
	* <p>
	* Print each element in order:
	* <blockquote>
	*
	* <pre>
	* public static void printEachForward(BreakIterator boundary, String source) {
	* int start = boundary.first();
	* for (int end = boundary.next(); end != BreakIterator.DONE; start = end, end = boundary.next()) {
	* System.out.println(source.substring(start, end));
	* }
	* }
	* </pre>
	*
	* </blockquote>
	* <p>
	* Print each element in reverse order:
	* <blockquote>
	*
	* <pre>
	* public static void printEachBackward(BreakIterator boundary, String source) {
	* int end = boundary.last();
	* for (int start = boundary.previous(); start != BreakIterator.DONE; end = start, start = boundary
	* .previous()) {
	* System.out.println(source.substring(start, end));
	* }
	* }
	* </pre>
	*
	* </blockquote>
	* <p>
	* Print the first element:
	* <blockquote>
	*
	* <pre>
	* public static void printFirst(BreakIterator boundary, String source) {
	* int start = boundary.first();
	* int end = boundary.next();
	* System.out.println(source.substring(start, end));
	* }
	* </pre>
	*
	* </blockquote>
	* <p>
	* Print the last element:
	* <blockquote>
	*
	* <pre>
	* public static void printLast(BreakIterator boundary, String source) {
	* int end = boundary.last();
	* int start = boundary.previous();
	* System.out.println(source.substring(start, end));
	* }
	* </pre>
	*
	* </blockquote>
	* <p>
	* Print the element at a specified position:
	* <blockquote>
	*
	* <pre>
	* public static void printAt(BreakIterator boundary, int pos, String source) {
	* int end = boundary.following(pos);
	* int start = boundary.previous();
	* System.out.println(source.substring(start, end));
	* }
	* </pre>
	*
	* </blockquote>
	* <p>
	* Find the next word:
	* <blockquote>
	*
	* <pre>
	* public static int nextWordStartAfter(int pos, String text) {
	* BreakIterator wb = BreakIterator.getWordInstance();
	* wb.setText(text);
	* int last = wb.following(pos);
	* int current = wb.next();
	* while (current != BreakIterator.DONE) {
	* for (int p = last; p < current; p++) {
	* if (Character.isLetter(text.charAt(p)))
	* return last;
	* }
	* last = current;
	* current = wb.next();
	* }
	* return BreakIterator.DONE;
	* }
	* </pre>
	*
	* </blockquote>
	* <p>
	* The iterator returned by {@code BreakIterator.getWordInstance()} is unique in
	* that the break positions it returns don't represent both the start and end of
	* the thing being iterated over. That is, a sentence-break iterator returns
	* breaks that each represent the end of one sentence and the beginning of the
	* next. With the word-break iterator, the characters between two boundaries
	* might be a word, or they might be the punctuation or whitespace between two
	* words. The above code uses a simple heuristic to determine which boundary is
	* the beginning of a word: If the characters between this boundary and the next
	* boundary include at least one letter (this can be an alphabetical letter, a
	* CJK ideograph, a Hangul syllable, a Kana character, etc.), then the text
	* between this boundary and the next is a word; otherwise, it's the material
	* between words.)
	*
	* @see CharacterIterator
	*/
	public abstract class BreakIterator implements Cloneable {

	/**
	* This constant is returned by iterate methods like {@code previous()} or
	* {@code next()} if they have returned all valid boundaries.
	*/
	public static final int DONE = -1;

	private static final int LONG_LENGTH = 8;

	private static final int INT_LENGTH = 4;

	private static final int SHORT_LENGTH = 2;

	// the wrapped ICU implementation
	com.ibm.icu.text.BreakIterator wrapped;

	/**
	* Default constructor, just for invocation by subclass.
	*/
	protected BreakIterator() {
	super();
	}

	/*
	* wrapping constructor
	*/
	BreakIterator(com.ibm.icu.text.BreakIterator iterator) {
	wrapped = iterator;
	}

	/**
	* Returns all supported locales in an array.
	*
	* @return all supported locales.
	*/
	public static Locale[] getAvailableLocales() {
	return com.ibm.icu.text.BreakIterator.getAvailableLocales();
	}

	/**
	* Returns a new instance of {@code BreakIterator} to iterate over
	* characters using the default locale.
	*
	* @return a new instance of {@code BreakIterator} using the default locale.
	*/
	public static BreakIterator getCharacterInstance() {
	return new RuleBasedBreakIterator(com.ibm.icu.text.BreakIterator
	.getCharacterInstance());
	}

	/**
	* Returns a new instance of {@code BreakIterator} to iterate over
	* characters using the given locale.
	*
	* @param where
	* the given locale.
	* @return a new instance of {@code BreakIterator} using the given locale.
	*/
	public static BreakIterator getCharacterInstance(Locale where) {
	if (where == null) {
	throw new NullPointerException();
	}

	return new RuleBasedBreakIterator(com.ibm.icu.text.BreakIterator
	.getCharacterInstance(where));
	}

	/**
	* Returns a new instance of {{@code BreakIterator} to iterate over
	* line breaks using the default locale.
	*
	* @return a new instance of {@code BreakIterator} using the default locale.
	*/
	public static BreakIterator getLineInstance() {
	return new RuleBasedBreakIterator(com.ibm.icu.text.BreakIterator
	.getLineInstance());
	}

	/**
	* Returns a new instance of {@code BreakIterator} to iterate over
	* line breaks using the given locale.
	*
	* @param where
	* the given locale.
	* @return a new instance of {@code BreakIterator} using the given locale.
	* @throws NullPointerException if {@code where} is {@code null}.
	*/
	public static BreakIterator getLineInstance(Locale where) {
	if (where == null) {
	throw new NullPointerException();
	}

	return new RuleBasedBreakIterator(com.ibm.icu.text.BreakIterator
	.getLineInstance(where));
	}

	/**
	* Returns a new instance of {@code BreakIterator} to iterate over
	* sentence-breaks using the default locale.
	*
	* @return a new instance of {@code BreakIterator} using the default locale.
	*/
	public static BreakIterator getSentenceInstance() {
	return new RuleBasedBreakIterator(com.ibm.icu.text.BreakIterator
	.getSentenceInstance());
	}

	/**
	* Returns a new instance of {@code BreakIterator} to iterate over
	* sentence-breaks using the given locale.
	*
	* @param where
	* the given locale.
	* @return a new instance of {@code BreakIterator} using the given locale.
	* @throws NullPointerException if {@code where} is {@code null}.
	*/
	public static BreakIterator getSentenceInstance(Locale where) {
	if (where == null) {
	throw new NullPointerException();
	}

	return new RuleBasedBreakIterator(com.ibm.icu.text.BreakIterator
	.getSentenceInstance(where));
	}

	/**
	* Returns a new instance of {@code BreakIterator} to iterate over
	* word-breaks using the default locale.
	*
	* @return a new instance of {@code BreakIterator} using the default locale.
	*/
	public static BreakIterator getWordInstance() {
	return new RuleBasedBreakIterator(com.ibm.icu.text.BreakIterator
	.getWordInstance());
	}

	/**
	* Returns a new instance of {@code BreakIterator} to iterate over
	* word-breaks using the given locale.
	*
	* @param where
	* the given locale.
	* @return a new instance of {@code BreakIterator} using the given locale.
	* @throws NullPointerException if {@code where} is {@code null}.
	*/
	public static BreakIterator getWordInstance(Locale where) {
	if (where == null) {
	throw new NullPointerException();
	}

	return new RuleBasedBreakIterator(com.ibm.icu.text.BreakIterator
	.getWordInstance(where));
	}

	/**
	* Indicates whether the given offset is a boundary position. If this method
	* returns true, the current iteration position is set to the given
	* position; if the function returns false, the current iteration position
	* is set as though {@link #following(int)} had been called.
	*
	* @param offset
	* the given offset to check.
	* @return {@code true} if the given offset is a boundary position; {@code
	* false} otherwise.
	*/
	public boolean isBoundary(int offset) {
	return wrapped.isBoundary(offset);
	}

	/**
	* Returns the position of last boundary preceding the given offset, and
	* sets the current position to the returned value, or {@code DONE} if the
	* given offset specifies the starting position.
	*
	* @param offset
	* the given start position to be searched for.
	* @return the position of the last boundary preceding the given offset.
	* @throws IllegalArgumentException
	* if the offset is invalid.
	*/
	public int preceding(int offset) {
	return wrapped.preceding(offset);
	}

	/**
	* Sets the new text string to be analyzed, the current position will be
	* reset to the beginning of this new string, and the old string will be
	* lost.
	*
	* @param newText
	* the new text string to be analyzed.
	*/
	public void setText(String newText) {
	wrapped.setText(newText);
	}

	/**
	* Returns this iterator's current position.
	*
	* @return this iterator's current position.
	*/
	public abstract int current();

	/**
	* Sets this iterator's current position to the first boundary and returns
	* that position.
	*
	* @return the position of the first boundary.
	*/
	public abstract int first();

	/**
	* Sets the position of the first boundary to the one following the given
	* offset and returns this position. Returns {@code DONE} if there is no
	* boundary after the given offset.
	*
	* @param offset
	* the given position to be searched for.
	* @return the position of the first boundary following the given offset.
	* @throws IllegalArgumentException
	* if the offset is invalid.
	*/
	public abstract int following(int offset);

	/**
	* Returns a {@code CharacterIterator} which represents the text being
	* analyzed. Please note that the returned value is probably the internal
	* iterator used by this object. If the invoker wants to modify the status
	* of the returned iterator, it is recommended to first create a clone of
	* the iterator returned.
	*
	* @return a {@code CharacterIterator} which represents the text being
	* analyzed.
	*/
	public abstract CharacterIterator getText();

	/**
	* Sets this iterator's current position to the last boundary and returns
	* that position.
	*
	* @return the position of last boundary.
	*/
	public abstract int last();

	/**
	* Sets this iterator's current position to the next boundary after the
	* current position, and returns this position. Returns {@code DONE} if no
	* boundary was found after the current position.
	*
	* @return the position of last boundary.
	*/
	public abstract int next();

	/**
	* Sets this iterator's current position to the next boundary after the
	* given position, and returns that position. Returns {@code DONE} if no
	* boundary was found after the given position.
	*
	* @param n
	* the given position.
	* @return the position of last boundary.
	*/
	public abstract int next(int n);

	/**
	* Sets this iterator's current position to the previous boundary before the
	* current position and returns that position. Returns {@code DONE} if
	* no boundary was found before the current position.
	*
	* @return the position of last boundary.
	*/
	public abstract int previous();

	/**
	* Sets the new text to be analyzed by the given {@code CharacterIterator}.
	* The position will be reset to the beginning of the new text, and other
	* status information of this iterator will be kept.
	*
	* @param newText
	* the {@code CharacterIterator} referring to the text to be
	* analyzed.
	*/
	public abstract void setText(CharacterIterator newText);

	/**
	* Creates a copy of this iterator, all status information including the
	* current position are kept the same.
	*
	* @return a copy of this iterator.
	*/
	@Override
	public Object clone() {
	try {
	BreakIterator cloned = (BreakIterator) super.clone();
	cloned.wrapped = (com.ibm.icu.text.BreakIterator) wrapped.clone();
	return cloned;
	} catch (CloneNotSupportedException e) {
	throw new InternalError(e.getMessage());
	}
	}

	/**
	* Gets a long value from the given byte array, starting from the given
	* offset.
	*
	* @param buf
	* the bytes to be converted.
	* @param offset
	* the start position of the conversion.
	* @return the converted long value.
	* @throws NullPointerException
	* if {@code buf} is {@code null}.
	* @throws ArrayIndexOutOfBoundsException
	* if {@code offset < 0} or {@code offset + LONG_LENGTH} is
	* greater than the length of {@code buf}.
	*/
	protected static long getLong(byte[] buf, int offset) {
	// Force a buf null check first!
	if (buf.length - offset < LONG_LENGTH \|\| offset < 0) {
	// text.1E=Offset out of bounds \: {0}
	throw new ArrayIndexOutOfBoundsException(Messages.getString("text.1E", offset)); //$NON-NLS-1$
	}
	long result = 0;
	for (int i = offset; i < offset + LONG_LENGTH; i++) {
	result = (result << 8) \| (buf[i] & 0xff);
	}
	return result;
	}

	/**
	* Gets an int value from the given byte array, starting from the given
	* offset.
	*
	* @param buf
	* the bytes to be converted.
	* @param offset
	* the start position of the conversion.
	* @return the converted int value.
	* @throws NullPointerException
	* if {@code buf} is {@code null}.
	* @throws ArrayIndexOutOfBoundsException
	* if {@code offset < 0} or {@code offset + INT_LENGTH} is
	* greater than the length of {@code buf}.
	*/
	protected static int getInt(byte[] buf, int offset) {
	// Force buf null check first!
	if (buf.length - INT_LENGTH < offset \|\| offset < 0) {
	// text.1E=Offset out of bounds \: {0}
	throw new ArrayIndexOutOfBoundsException(Messages.getString("text.1E", offset)); //$NON-NLS-1$
	}
	int result = 0;
	for (int i = offset; i < offset + INT_LENGTH; i++) {
	result = (result << 8) \| (buf[i] & 0xff);
	}
	return result;
	}

	/**
	* Gets a short value from the given byte array, starting from the given
	* offset.
	*
	* @param buf
	* the bytes to be converted.
	* @param offset
	* the start position of the conversion.
	* @return the converted short value.
	* @throws NullPointerException
	* if {@code buf} is {@code null}.
	* @throws ArrayIndexOutOfBoundsException
	* if {@code offset < 0} or {@code offset + SHORT_LENGTH} is
	* greater than the length of {@code buf}.
	*/
	protected static short getShort(byte[] buf, int offset) {
	// Force buf null check first!
	if (buf.length - SHORT_LENGTH < offset \|\| offset < 0) {
	// text.1E=Offset out of bounds \: {0}
	throw new ArrayIndexOutOfBoundsException(Messages.getString("text.1E", offset)); //$NON-NLS-1$
	}
	short result = 0;
	for (int i = offset; i < offset + SHORT_LENGTH; i++) {
	result = (short) ((result << 8) \| (buf[i] & 0xff));
	}
	return result;
	}
	}