#ifndef UIMA_UNICODESTRINGREF_HPP
#define UIMA_UNICODESTRINGREF_HPP
/** \file unistrref.hpp .
-----------------------------------------------------------------------------



           string interface of uima::UnicodeStringRef

 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.

-----------------------------------------------------------------------------


    \brief  Shallow string object consisting of a pair of string pointer and a length

-----------------------------------------------------------------------------
*/

#include "uima/pragmas.hpp" //must be included first to disable warnings

#include <vector>
#include <string>
#include <iostream>

#include "uima/types.h"
#include "uima/assertmsg.h"
#include "uima/ccsid.hpp"
#include "unicode/unistr.h"
#include "unicode/ustring.h"
#include "unicode/uchar.h"
#include "uima/strtools.hpp"

/* ----------------------------------------------------------------------- */
/*       Interface dependencies                                            */
/* ----------------------------------------------------------------------- */

/* ----------------------------------------------------------------------- */
/*       Types / Classes                                                   */
/* ----------------------------------------------------------------------- */

namespace uima {

  /**
   * The class <TT>UnicodeStringRef</TT> provides support for non zero-terminated
   * strings that are presented as pointers to Unicode character arrays
   * with an associated length.
   * As this type of string is supposed to be used only as string reference into
   * read-only buffers, the string pointer is constant.
   * The member functions are named to implement the icu::UnicodeString interface
   * but only providing const member functions
   * This class is a quick ,light-weight, shallow string
   * (internally it consists only of a pointer and a length)
   * which can be copied by value without performance penalty.
   * It allows references into other string buffers to be treated like real
   * string objects.
   * Since it does not own it's string memory care must be taken to make sure
   * the lifetime of an UnicodeStringRef object does not exceed the lifetime
   * of the Unicode character buffer it references.
   */
  class UIMA_LINK_IMPORTSPEC UnicodeStringRef {
  public:
    /**
     * Default Constructor
     */
    UnicodeStringRef( void );

    /**
     * Constructor from icu::UnicodeString
     */
    UnicodeStringRef( const icu::UnicodeString & crUniString );

    /**
     * Constructor from zero terminated string
     */
    explicit UnicodeStringRef( UChar const * cpacString );

    /**
     * Constructor from string and length
     */
    UnicodeStringRef( UChar const * cpacString, int32_t uiLength );

    /**
     * Constructor from a two pointers (begin/end).
     * Note: end points to the first char <em>behind</em> the string.
     * @deprecated Replace with UnicodeStringRef(paucStringBegin,paucStringEnd-paucStringBegin).
     */
    UnicodeStringRef( UChar const * paucStringBegin, UChar const * paucStringEnd );

    ///Accessor for the number of bytes occupied by this string
    int32_t getSizeInBytes( void ) const;

    ///CONST Accessor for the string content (NOT ZERO DELIMITED!).
    UChar const * getBuffer( void ) const;

    ///Assignment operator
    UnicodeStringRef & operator=( UnicodeStringRef const & crclRHS );

    ///Equality operator
    int operator==( const UnicodeStringRef & crclRHS ) const;
    ///Inequality operator
    int operator!=( const UnicodeStringRef & crclRHS ) const;
    ///less operator
    bool operator< ( UnicodeStringRef const & text ) const;
    ///less equal operator
    bool operator<=( UnicodeStringRef const & text ) const;
    ///greater operator
    bool operator> ( UnicodeStringRef const & text ) const;
    ///greater equal operator
    bool operator>=( UnicodeStringRef const & text ) const;

    /**
     * Compare the characters bitwise in this UnicodeStringRef to
     * the characters in <TT>text</TT>.
     * @param text The UnicodeStringRef to compare to this one.
     * @return The result of bitwise character comparison: 0 if <TT>text</TT>
     * contains the same characters as this, -1 if the characters in
     * <TT>text</TT> are bitwise less than the characters in this, +1 if the
     * characters in <TT>text</TT> are bitwise greater than the characters
     * in this.
     * @stable
     */
    inline int8_t compare(const UnicodeStringRef& text) const;

    /**
     * Compare the characters bitwise in this UnicodeStringRef to
     * the characters in <TT>text</TT>.
     * @param text The UnicodeString to compare to this one.
     * @return The result of bitwise character comparison: 0 if <TT>text</TT>
     * contains the same characters as this, -1 if the characters in
     * <TT>text</TT> are bitwise less than the characters in this, +1 if the
     * characters in <TT>text</TT> are bitwise greater than the characters
     * in this.
     * @stable
     */
    inline int8_t compare(const icu::UnicodeString& text) const;

    /**
     * Compare the characters bitwise in the range
     * [<TT>start</TT>, <TT>start + length</TT>) with the characters
     * in <TT>srcText</TT>
     * @param start the offset at which the compare operation begins
     * @param length the number of characters of text to compare.
     * @param srcText the text to be compared
     * @return The result of bitwise character comparison: 0 if <TT>text</TT>
     * contains the same characters as this, -1 if the characters in
     * <TT>text</TT> are bitwise less than the characters in this, +1 if the
     * characters in <TT>text</TT> are bitwise greater than the characters
     * in this.
     * @stable
     */
    inline int8_t compare(int32_t start,
                          int32_t length,
                          const UnicodeStringRef& srcText) const;

    /**
     * Compare the characters bitwise in the range
     * [<TT>start</TT>, <TT>start + length</TT>) with the characters
     * in <TT>srcText</TT> in the range
     * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
     * @param start the offset at which the compare operation begins
     * @param length the number of characters in this to compare.
     * @param srcText the text to be compared
     * @param srcStart the offset into <TT>srcText</TT> to start comparison
     * @param srcLength the number of characters in <TT>src</TT> to compare
     * @return The result of bitwise character comparison: 0 if <TT>text</TT>
     * contains the same characters as this, -1 if the characters in
     * <TT>text</TT> are bitwise less than the characters in this, +1 if the
     * characters in <TT>text</TT> are bitwise greater than the characters
     * in this.
     * @stable
     */
    inline int8_t compare(int32_t start,
                          int32_t length,
                          const UnicodeStringRef& srcText,
                          int32_t srcStart,
                          int32_t srcLength) const;

    /**
     * Compare the characters bitwise in this UnicodeStringRef with the first
     * <TT>srcLength</TT> characters in <TT>srcChars</TT>.
     * @param srcChars The characters to compare to this UnicodeStringRef.
     * @param srcLength the number of characters in <TT>srcChars</TT> to compare
     * @return The result of bitwise character comparison: 0 if <TT>text</TT>
     * contains the same characters as this, -1 if the characters in
     * <TT>text</TT> are bitwise less than the characters in this, +1 if the
     * characters in <TT>text</TT> are bitwise greater than the characters
     * in this.
     * @stable
     */
    inline int8_t compare(UChar const *srcChars,
                          int32_t srcLength) const;

    /**
     * Compare the characters bitwise in the range
     * [<TT>start</TT>, <TT>start + length</TT>) with the first
     * <TT>length</TT> characters in <TT>srcChars</TT>
     * @param start the offset at which the compare operation begins
     * @param length the number of characters to compare.
     * @param srcChars the characters to be compared
     * @return The result of bitwise character comparison: 0 if <TT>text</TT>
     * contains the same characters as this, -1 if the characters in
     * <TT>text</TT> are bitwise less than the characters in this, +1 if the
     * characters in <TT>text</TT> are bitwise greater than the characters
     * in this.
     * @stable
     */
    inline int8_t compare(int32_t start,
                          int32_t length,
                          UChar const *srcChars) const;

    /**
     * Compare the characters bitwise in the range
     * [<TT>start</TT>, <TT>start + length</TT>) with the characters
     * in <TT>srcChars</TT> in the range
     * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
     * @param start the offset at which the compare operation begins
     * @param length the number of characters in this to compare
     * @param srcChars the characters to be compared
     * @param srcStart the offset into <TT>srcChars</TT> to start comparison
     * @param srcLength the number of characters in <TT>srcChars</TT> to compare
     * @return The result of bitwise character comparison: 0 if <TT>text</TT>
     * contains the same characters as this, -1 if the characters in
     * <TT>text</TT> are bitwise less than the characters in this, +1 if the
     * characters in <TT>text</TT> are bitwise greater than the characters
     * in this.
     * @stable
     */
    inline int8_t compare(int32_t start,
                          int32_t length,
                          UChar const *srcChars,
                          int32_t srcStart,
                          int32_t srcLength) const;

    /**
     * Compare the characters bitwise in the range
     * [<TT>start</TT>, <TT>limit</TT>) with the characters
     * in <TT>srcText</TT> in the range
     * [<TT>srcStart</TT>, <TT>srcLimit</TT>).
     * @param start the offset at which the compare operation begins
     * @param limit the offset immediately following the compare operation
     * @param srcText the text to be compared
     * @param srcStart the offset into <TT>srcText</TT> to start comparison
     * @param srcLimit the offset into <TT>srcText</TT> to limit comparison
     * @return The result of bitwise character comparison: 0 if <TT>text</TT>
     * contains the same characters as this, -1 if the characters in
     * <TT>text</TT> are bitwise less than the characters in this, +1 if the
     * characters in <TT>text</TT> are bitwise greater than the characters
     * in this.
     * @stable
     */
    inline int8_t compareBetween(int32_t start,
                                 int32_t limit,
                                 const UnicodeStringRef& srcText,
                                 int32_t srcStart,
                                 int32_t srcLimit) const;

    /**
     * Compare two Unicode strings in code point order.
     * This is different in UTF-16 from how compare(), operator==, startsWith() etc. work
     * if supplementary characters are present:
     *
     * In UTF-16, supplementary characters (with code points U+10000 and above) are
     * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
     * which means that they compare as less than some other BMP characters like U+feff.
     * This function compares Unicode strings in code point order.
     * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
     *
     * @param text Another string to compare this one to.
     * @return a negative/zero/positive integer corresponding to whether
     * this string is less than/equal to/greater than the second one
     * in code point order
     */
    inline int8_t compareCodePointOrder(const UnicodeStringRef& text) const;

    /**
     * Compare two Unicode strings in code point order.
     * This is different in UTF-16 from how compare(), operator==, startsWith() etc. work
     * if supplementary characters are present:
     *
     * In UTF-16, supplementary characters (with code points U+10000 and above) are
     * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
     * which means that they compare as less than some other BMP characters like U+feff.
     * This function compares Unicode strings in code point order.
     * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
     *
     * @param start The start offset in this string at which the compare operation begins.
     * @param length The number of code units from this string to compare.
     * @param srcText Another string to compare this one to.
     * @return a negative/zero/positive integer corresponding to whether
     * this string is less than/equal to/greater than the second one
     * in code point order
     */
    inline int8_t compareCodePointOrder(int32_t start,
                                        int32_t length,
                                        const UnicodeStringRef& srcText) const;

    /**
     * Compare two Unicode strings in code point order.
     * This is different in UTF-16 from how compare(), operator==, startsWith() etc. work
     * if supplementary characters are present:
     *
     * In UTF-16, supplementary characters (with code points U+10000 and above) are
     * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
     * which means that they compare as less than some other BMP characters like U+feff.
     * This function compares Unicode strings in code point order.
     * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
     *
     * @param start The start offset in this string at which the compare operation begins.
     * @param length The number of code units from this string to compare.
     * @param srcText Another string to compare this one to.
     * @param srcStart The start offset in that string at which the compare operation begins.
     * @param srcLength The number of code units from that string to compare.
     * @return a negative/zero/positive integer corresponding to whether
     * this string is less than/equal to/greater than the second one
     * in code point order
     */
    inline int8_t compareCodePointOrder(int32_t start,
                                        int32_t length,
                                        const UnicodeStringRef& srcText,
                                        int32_t srcStart,
                                        int32_t srcLength) const;

    /**
     * Compare two Unicode strings in code point order.
     * This is different in UTF-16 from how compare(), operator==, startsWith() etc. work
     * if supplementary characters are present:
     *
     * In UTF-16, supplementary characters (with code points U+10000 and above) are
     * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
     * which means that they compare as less than some other BMP characters like U+feff.
     * This function compares Unicode strings in code point order.
     * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
     *
     * @param srcChars A pointer to another string to compare this one to.
     * @param srcLength The number of code units from that string to compare.
     * @return a negative/zero/positive integer corresponding to whether
     * this string is less than/equal to/greater than the second one
     * in code point order
     */
    inline int8_t compareCodePointOrder(UChar const *srcChars,
                                        int32_t srcLength) const;

    /**
     * Compare two Unicode strings in code point order.
     * This is different in UTF-16 from how compare(), operator==, startsWith() etc. work
     * if supplementary characters are present:
     *
     * In UTF-16, supplementary characters (with code points U+10000 and above) are
     * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
     * which means that they compare as less than some other BMP characters like U+feff.
     * This function compares Unicode strings in code point order.
     * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
     *
     * @param start The start offset in this string at which the compare operation begins.
     * @param length The number of code units from this string to compare.
     * @param srcChars A pointer to another string to compare this one to.
     * @return a negative/zero/positive integer corresponding to whether
     * this string is less than/equal to/greater than the second one
     * in code point order
     */
    inline int8_t compareCodePointOrder(int32_t start,
                                        int32_t length,
                                        UChar const *srcChars) const;

    /**
     * Compare two Unicode strings in code point order.
     * This is different in UTF-16 from how compare(), operator==, startsWith() etc. work
     * if supplementary characters are present:
     *
     * In UTF-16, supplementary characters (with code points U+10000 and above) are
     * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
     * which means that they compare as less than some other BMP characters like U+feff.
     * This function compares Unicode strings in code point order.
     * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
     *
     * @param start The start offset in this string at which the compare operation begins.
     * @param length The number of code units from this string to compare.
     * @param srcChars A pointer to another string to compare this one to.
     * @param srcStart The start offset in that string at which the compare operation begins.
     * @param srcLength The number of code units from that string to compare.
     * @return a negative/zero/positive integer corresponding to whether
     * this string is less than/equal to/greater than the second one
     * in code point order
     */
    inline int8_t compareCodePointOrder(int32_t start,
                                        int32_t length,
                                        UChar const *srcChars,
                                        int32_t srcStart,
                                        int32_t srcLength) const;

    /**
     * Compare two Unicode strings in code point order.
     * This is different in UTF-16 from how compare(), operator==, startsWith() etc. work
     * if supplementary characters are present:
     *
     * In UTF-16, supplementary characters (with code points U+10000 and above) are
     * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
     * which means that they compare as less than some other BMP characters like U+feff.
     * This function compares Unicode strings in code point order.
     * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
     *
     * @param start The start offset in this string at which the compare operation begins.
     * @param limit The offset after the last code unit from this string to compare.
     * @param srcText Another string to compare this one to.
     * @param srcStart The start offset in that string at which the compare operation begins.
     * @param srcLimit The offset after the last code unit from that string to compare.
     * @return a negative/zero/positive integer corresponding to whether
     * this string is less than/equal to/greater than the second one
     * in code point order
     */
    inline int8_t compareCodePointOrderBetween(int32_t start,
        int32_t limit,
        const UnicodeStringRef& srcText,
        int32_t srcStart,
        int32_t srcLimit) const;

    /**
     * Compare two strings case-insensitively using full case folding.
     * This is equivalent to this->foldCase(options).compare(text.foldCase(options)).
     *
     * @param text Another string to compare this one to.
     * @param options Either U_FOLD_CASE_DEFAULT or U_FOLD_CASE_EXCLUDE_SPECIAL_I
     * @return A negative, zero, or positive integer indicating the comparison result.
     */
    inline int8_t caseCompare(const UnicodeStringRef& text, uint32_t options) const;

    /**
     * Compare two strings case-insensitively using full case folding.
     * This is equivalent to this->foldCase(options).compare(srcText.foldCase(options)).
     *
     * @param start The start offset in this string at which the compare operation begins.
     * @param length The number of code units from this string to compare.
     * @param srcText Another string to compare this one to.
     * @param options Either U_FOLD_CASE_DEFAULT or U_FOLD_CASE_EXCLUDE_SPECIAL_I
     * @return A negative, zero, or positive integer indicating the comparison result.
     */
    inline int8_t caseCompare(int32_t start,
                              int32_t length,
                              const UnicodeStringRef& srcText,
                              uint32_t options) const;

    /**
     * Compare two strings case-insensitively using full case folding.
     * This is equivalent to this->foldCase(options).compare(srcText.foldCase(options)).
     *
     * @param start The start offset in this string at which the compare operation begins.
     * @param length The number of code units from this string to compare.
     * @param srcText Another string to compare this one to.
     * @param srcStart The start offset in that string at which the compare operation begins.
     * @param srcLength The number of code units from that string to compare.
     * @param options Either U_FOLD_CASE_DEFAULT or U_FOLD_CASE_EXCLUDE_SPECIAL_I
     * @return A negative, zero, or positive integer indicating the comparison result.
     */
    inline int8_t caseCompare(int32_t start,
                              int32_t length,
                              const UnicodeStringRef& srcText,
                              int32_t srcStart,
                              int32_t srcLength,
                              uint32_t options) const;

    /**
     * Compare two strings case-insensitively using full case folding.
     * This is equivalent to this->foldCase(options).compare(srcChars.foldCase(options)).
     *
     * @param srcChars A pointer to another string to compare this one to.
     * @param srcLength The number of code units from that string to compare.
     * @param options Either U_FOLD_CASE_DEFAULT or U_FOLD_CASE_EXCLUDE_SPECIAL_I
     * @return A negative, zero, or positive integer indicating the comparison result.
     */
    inline int8_t caseCompare(UChar const *srcChars,
                              int32_t srcLength,
                              uint32_t options) const;

    /**
     * Compare two strings case-insensitively using full case folding.
     * This is equivalent to this->foldCase(options).compare(srcChars.foldCase(options)).
     *
     * @param start The start offset in this string at which the compare operation begins.
     * @param length The number of code units from this string to compare.
     * @param srcChars A pointer to another string to compare this one to.
     * @param options Either U_FOLD_CASE_DEFAULT or U_FOLD_CASE_EXCLUDE_SPECIAL_I
     * @return A negative, zero, or positive integer indicating the comparison result.
     */
    inline int8_t caseCompare(int32_t start,
                              int32_t length,
                              UChar const *srcChars,
                              uint32_t options) const;

    /**
     * Compare two strings case-insensitively using full case folding.
     * This is equivalent to this->foldCase(options).compare(srcChars.foldCase(options)).
     *
     * @param start The start offset in this string at which the compare operation begins.
     * @param length The number of code units from this string to compare.
     * @param srcChars A pointer to another string to compare this one to.
     * @param srcStart The start offset in that string at which the compare operation begins.
     * @param srcLength The number of code units from that string to compare.
     * @param options Either U_FOLD_CASE_DEFAULT or U_FOLD_CASE_EXCLUDE_SPECIAL_I
     * @return A negative, zero, or positive integer indicating the comparison result.
     */
    inline int8_t caseCompare(int32_t start,
                              int32_t length,
                              UChar const *srcChars,
                              int32_t srcStart,
                              int32_t srcLength,
                              uint32_t options) const;

    /**
     * Compare two strings case-insensitively using full case folding.
     * This is equivalent to this->foldCase(options).compareBetween(text.foldCase(options)).
     *
     * @param start The start offset in this string at which the compare operation begins.
     * @param limit The offset after the last code unit from this string to compare.
     * @param srcText Another string to compare this one to.
     * @param srcStart The start offset in that string at which the compare operation begins.
     * @param srcLimit The offset after the last code unit from that string to compare.
     * @param options Either U_FOLD_CASE_DEFAULT or U_FOLD_CASE_EXCLUDE_SPECIAL_I
     * @return A negative, zero, or positive integer indicating the comparison result.
     */
    inline int8_t caseCompareBetween(int32_t start,
                                     int32_t limit,
                                     const UnicodeStringRef& srcText,
                                     int32_t srcStart,
                                     int32_t srcLimit,
                                     uint32_t options) const;

    /**
     * Determine if this starts with the characters in <TT>text</TT>
     * @param text The text to match.
     * @return TRUE if this starts with the characters in <TT>text</TT>,
     * FALSE otherwise
     * @stable
     */
    inline bool startsWith(const UnicodeStringRef& text) const;

    /**
     * Determine if this starts with the characters in <TT>srcText</TT>
     * in the range [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
     * @param srcText The text to match.
     * @param srcStart the offset into <TT>srcText</TT> to start matching
     * @param srcLength the number of characters in <TT>srcText</TT> to match
     * @return TRUE if this starts with the characters in <TT>text</TT>,
     * FALSE otherwise
     * @stable
     */
    inline bool startsWith(const UnicodeStringRef& srcText,
                           int32_t srcStart,
                           int32_t srcLength) const;

    /**
     * Determine if this starts with the characters in <TT>srcChars</TT>
     * @param srcChars The characters to match.
     * @param srcLength the number of characters in <TT>srcChars</TT>
     * @return TRUE if this starts with the characters in <TT>srcChars</TT>,
     * FALSE otherwise
     * @stable
     */
    inline bool startsWith(UChar const *srcChars,
                           int32_t srcLength) const;

    /**
     * Determine if this starts with the characters in <TT>srcChars</TT>
     * in the range [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
     * @param srcChars The characters to match.
     * @param srcStart the offset into <TT>srcText</TT> to start matching
     * @param srcLength the number of characters in <TT>srcChars</TT> to match
     * @return TRUE if this starts with the characters in <TT>srcChars</TT>,
     * FALSE otherwise
     * @stable
     */
    inline bool startsWith(UChar const *srcChars,
                           int32_t srcStart,
                           int32_t srcLength) const;

    /**
     * Determine if this ends with the characters in <TT>text</TT>
     * @param text The text to match.
     * @return TRUE if this ends with the characters in <TT>text</TT>,
     * FALSE otherwise
     * @stable
     */
    inline bool endsWith(const UnicodeStringRef& text) const;

    /**
     * Determine if this ends with the characters in <TT>srcText</TT>
     * in the range [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
     * @param srcText The text to match.
     * @param srcStart the offset into <TT>srcText</TT> to start matching
     * @param srcLength the number of characters in <TT>srcText</TT> to match
     * @return TRUE if this ends with the characters in <TT>text</TT>,
     * FALSE otherwise
     * @stable
     */
    inline bool endsWith(const UnicodeStringRef& srcText,
                         int32_t srcStart,
                         int32_t srcLength) const;

    /**
     * Determine if this ends with the characters in <TT>srcChars</TT>
     * @param srcChars The characters to match.
     * @param srcLength the number of characters in <TT>srcChars</TT>
     * @return TRUE if this ends with the characters in <TT>srcChars</TT>,
     * FALSE otherwise
     * @stable
     */
    inline bool endsWith(UChar const *srcChars,
                         int32_t srcLength) const;

    /**
     * Determine if this ends with the characters in <TT>srcChars</TT>
     * in the range [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
     * @param srcChars The characters to match.
     * @param srcStart the offset into <TT>srcText</TT> to start matching
     * @param srcLength the number of characters in <TT>srcChars</TT> to match
     * @return TRUE if this ends with the characters in <TT>srcChars</TT>,
     * FALSE otherwise
     * @stable
     */
    inline bool endsWith(UChar const *srcChars,
                         int32_t srcStart,
                         int32_t srcLength) const;


    /* Searching - bitwise only */

    /**
     * Locate in this the first occurrence of the characters in <TT>text</TT>,
     * using bitwise comparison.
     * @param text The text to search for.
     * @return The offset into this of the start of <TT>text</TT>,
     * or -1 if not found.
     * @stable
     */
    inline int32_t indexOf(const UnicodeStringRef& text) const;

    /**
     * Locate in this the first occurrence of the characters in <TT>text</TT>
     * starting at offset <TT>start</TT>, using bitwise comparison.
     * @param text The text to search for.
     * @param start The offset at which searching will start.
     * @return The offset into this of the start of <TT>text</TT>,
     * or -1 if not found.
     * @stable
     */
    inline int32_t indexOf(const UnicodeStringRef& text,
                           int32_t start) const;

    /**
     * Locate in this the first occurrence in the range
     * [<TT>start</TT>, <TT>start + length</TT>) of the characters
     * in <TT>text</TT>, using bitwise comparison.
     * @param text The text to search for.
     * @param start The offset at which searching will start.
     * @param length The number of characters to search
     * @return The offset into this of the start of <TT>text</TT>,
     * or -1 if not found.
     * @stable
     */
    inline int32_t indexOf(const UnicodeStringRef& text,
                           int32_t start,
                           int32_t length) const;

    /**
     * Locate in this the first occurrence in the range
     * [<TT>start</TT>, <TT>start + length</TT>) of the characters
     *  in <TT>srcText</TT> in the range
     * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>),
     * using bitwise comparison.
     * @param srcText The text to search for.
     * @param srcStart the offset into <TT>srcText</TT> at which
     * to start matching
     * @param srcLength the number of characters in <TT>srcText</TT> to match
     * @param start the offset into this at which to start matching
     * @param length the number of characters in this to search
     * @return The offset into this of the start of <TT>text</TT>,
     * or -1 if not found.
     * @stable
     */
    inline int32_t indexOf(const UnicodeStringRef& srcText,
                           int32_t srcStart,
                           int32_t srcLength,
                           int32_t start,
                           int32_t length) const;

    /**
     * Locate in this the first occurrence of the characters in
     * <TT>srcChars</TT>
     * starting at offset <TT>start</TT>, using bitwise comparison.
     * @param srcChars The text to search for.
     * @param srcLength the number of characters in <TT>srcChars</TT> to match
     * @param start the offset into this at which to start matching
     * @return The offset into this of the start of <TT>text</TT>,
     * or -1 if not found.
     * @stable
     */
    inline int32_t indexOf(UChar const *srcChars,
                           int32_t srcLength,
                           int32_t start) const;

    /**
     * Locate in this the first occurrence in the range
     * [<TT>start</TT>, <TT>start + length</TT>) of the characters
     * in <TT>srcChars</TT>, using bitwise comparison.
     * @param srcChars The text to search for.
     * @param srcLength the number of characters in <TT>srcChars</TT>
     * @param start The offset at which searching will start.
     * @param length The number of characters to search
     * @return The offset into this of the start of <TT>srcChars</TT>,
     * or -1 if not found.
     * @stable
     */
    inline int32_t indexOf(UChar const *srcChars,
                           int32_t srcLength,
                           int32_t start,
                           int32_t length) const;

    /**
     * Locate in this the first occurrence in the range
     * [<TT>start</TT>, <TT>start + length</TT>) of the characters
     * in <TT>srcChars</TT> in the range
     * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>),
     * using bitwise comparison.
     * @param srcChars The text to search for.
     * @param srcStart the offset into <TT>srcChars</TT> at which
     * to start matching
     * @param srcLength the number of characters in <TT>srcChars</TT> to match
     * @param start the offset into this at which to start matching
     * @param length the number of characters in this to search
     * @return The offset into this of the start of <TT>text</TT>,
     * or -1 if not found.
     * @stable
     */
    int32_t indexOf(UChar const *srcChars,
                    int32_t srcStart,
                    int32_t srcLength,
                    int32_t start,
                    int32_t length) const;

    /**
     * Locate in this the first occurrence of the code unit <TT>c</TT>,
     * using bitwise comparison.
     * @param c The code unit to search for.
     * @return The offset into this of <TT>c</TT>, or -1 if not found.
     * @stable
     */
    inline int32_t indexOf(UChar c) const;

    /**
     * Locate in this the first occurrence of the code point <TT>c</TT>,
     * using bitwise comparison.
     * @param c The code point to search for.
     * @return The offset into this of <TT>c</TT>, or -1 if not found.
     * @stable
     */
    inline int32_t indexOf(UChar32 c) const;

    /**
     * Locate in this the first occurrence of the code unit <TT>c</TT>
     * starting at offset <TT>start</TT>, using bitwise comparison.
     * @param c The code unit to search for.
     * @param start The offset at which searching will start.
     * @return The offset into this of <TT>c</TT>, or -1 if not found.
     * @stable
     */
    inline int32_t indexOf(UChar c,
                           int32_t start) const;

    /**
     * Locate in this the first occurrence of the code point <TT>c</TT>
     * starting at offset <TT>start</TT>, using bitwise comparison.
     * @param c The code point to search for.
     * @param start The offset at which searching will start.
     * @return The offset into this of <TT>c</TT>, or -1 if not found.
     * @stable
     */
    inline int32_t indexOf(UChar32 c,
                           int32_t start) const;

    /**
     * Locate in this the first occurrence of the code unit <TT>c</TT>
     * in the range [<TT>start</TT>, <TT>start + length</TT>),
     * using bitwise comparison.
     * @param c The code unit to search for.
     * @param start the offset into this at which to start matching
     * @param length the number of characters in this to search
     * @return The offset into this of <TT>c</TT>, or -1 if not found.
     * @stable
     */
    inline int32_t indexOf(UChar c,
                           int32_t start,
                           int32_t length) const;

    /**
     * Locate in this the first occurrence of the code point <TT>c</TT>
     * in the range [<TT>start</TT>, <TT>start + length</TT>),
     * using bitwise comparison.
     * @param c The code point to search for.
     * @param start the offset into this at which to start matching
     * @param length the number of characters in this to search
     * @return The offset into this of <TT>c</TT>, or -1 if not found.
     * @stable
     */
    inline int32_t indexOf(UChar32 c,
                           int32_t start,
                           int32_t length) const;

    /**
     * Locate in this the last occurrence of the characters in <TT>text</TT>,
     * using bitwise comparison.
     * @param text The text to search for.
     * @return The offset into this of the start of <TT>text</TT>,
     * or -1 if not found.
     * @stable
     */
    inline int32_t lastIndexOf(const UnicodeStringRef& text) const;

    /**
     * Locate in this the last occurrence of the characters in <TT>text</TT>
     * starting at offset <TT>start</TT>, using bitwise comparison.
     * @param text The text to search for.
     * @param start The offset at which searching will start.
     * @return The offset into this of the start of <TT>text</TT>,
     * or -1 if not found.
     * @stable
     */
    inline int32_t lastIndexOf(const UnicodeStringRef& text,
                               int32_t start) const;

    /**
     * Locate in this the last occurrence in the range
     * [<TT>start</TT>, <TT>start + length</TT>) of the characters
     * in <TT>text</TT>, using bitwise comparison.
     * @param text The text to search for.
     * @param start The offset at which searching will start.
     * @param length The number of characters to search
     * @return The offset into this of the start of <TT>text</TT>,
     * or -1 if not found.
     * @stable
     */
    inline int32_t lastIndexOf(const UnicodeStringRef& text,
                               int32_t start,
                               int32_t length) const;

    /**
     * Locate in this the last occurrence in the range
     * [<TT>start</TT>, <TT>start + length</TT>) of the characters
     * in <TT>srcText</TT> in the range
     * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>),
     * using bitwise comparison.
     * @param srcText The text to search for.
     * @param srcStart the offset into <TT>srcText</TT> at which
     * to start matching
     * @param srcLength the number of characters in <TT>srcText</TT> to match
     * @param start the offset into this at which to start matching
     * @param length the number of characters in this to search
     * @return The offset into this of the start of <TT>text</TT>,
     * or -1 if not found.
     * @stable
     */
    inline int32_t lastIndexOf(const UnicodeStringRef& srcText,
                               int32_t srcStart,
                               int32_t srcLength,
                               int32_t start,
                               int32_t length) const;

    /**
     * Locate in this the last occurrence of the characters in <TT>srcChars</TT>
     * starting at offset <TT>start</TT>, using bitwise comparison.
     * @param srcChars The text to search for.
     * @param srcLength the number of characters in <TT>srcChars</TT> to match
     * @param start the offset into this at which to start matching
     * @return The offset into this of the start of <TT>text</TT>,
     * or -1 if not found.
     * @stable
     */
    inline int32_t lastIndexOf(UChar const *srcChars,
                               int32_t srcLength,
                               int32_t start) const;

    /**
     * Locate in this the last occurrence in the range
     * [<TT>start</TT>, <TT>start + length</TT>) of the characters
     * in <TT>srcChars</TT>, using bitwise comparison.
     * @param srcChars The text to search for.
     * @param srcLength the number of characters in <TT>srcChars</TT>
     * @param start The offset at which searching will start.
     * @param length The number of characters to search
     * @return The offset into this of the start of <TT>srcChars</TT>,
     * or -1 if not found.
     * @stable
     */
    inline int32_t lastIndexOf(UChar const *srcChars,
                               int32_t srcLength,
                               int32_t start,
                               int32_t length) const;

    /**
     * Locate in this the last occurrence in the range
     * [<TT>start</TT>, <TT>start + length</TT>) of the characters
     * in <TT>srcChars</TT> in the range
     * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>),
     * using bitwise comparison.
     * @param srcChars The text to search for.
     * @param srcStart the offset into <TT>srcChars</TT> at which
     * to start matching
     * @param srcLength the number of characters in <TT>srcChars</TT> to match
     * @param start the offset into this at which to start matching
     * @param length the number of characters in this to search
     * @return The offset into this of the start of <TT>text</TT>,
     * or -1 if not found.
     * @stable
     */
    int32_t lastIndexOf(UChar const *srcChars,
                        int32_t srcStart,
                        int32_t srcLength,
                        int32_t start,
                        int32_t length) const;

    /**
     * Locate in this the last occurrence of the code unit <TT>c</TT>,
     * using bitwise comparison.
     * @param c The code unit to search for.
     * @return The offset into this of <TT>c</TT>, or -1 if not found.
     * @stable
     */
    inline int32_t lastIndexOf(UChar c) const;

    /**
     * Locate in this the last occurrence of the code point <TT>c</TT>,
     * using bitwise comparison.
     * @param c The code point to search for.
     * @return The offset into this of <TT>c</TT>, or -1 if not found.
     * @stable
     */
    inline int32_t lastIndexOf(UChar32 c) const;

    /**
     * Locate in this the last occurrence of the code unit <TT>c</TT>
     * starting at offset <TT>start</TT>, using bitwise comparison.
     * @param c The code unit to search for.
     * @param start The offset at which searching will start.
     * @return The offset into this of <TT>c</TT>, or -1 if not found.
     * @stable
     */
    inline int32_t lastIndexOf(UChar c,
                               int32_t start) const;

    /**
     * Locate in this the last occurrence of the code point <TT>c</TT>
     * starting at offset <TT>start</TT>, using bitwise comparison.
     * @param c The code point to search for.
     * @param start The offset at which searching will start.
     * @return The offset into this of <TT>c</TT>, or -1 if not found.
     * @stable
     */
    inline int32_t lastIndexOf(UChar32 c,
                               int32_t start) const;

    /**
     * Locate in this the last occurrence of the code unit <TT>c</TT>
     * in the range [<TT>start</TT>, <TT>start + length</TT>),
     * using bitwise comparison.
     * @param c The code unit to search for.
     * @param start the offset into this at which to start matching
     * @param length the number of characters in this to search
     * @return The offset into this of <TT>c</TT>, or -1 if not found.
     * @stable
     */
    inline int32_t lastIndexOf(UChar c,
                               int32_t start,
                               int32_t length) const;

    /**
     * Locate in this the last occurrence of the code point <TT>c</TT>
     * in the range [<TT>start</TT>, <TT>start + length</TT>),
     * using bitwise comparison.
     * @param c The code point to search for.
     * @param start the offset into this at which to start matching
     * @param length the number of characters in this to search
     * @return The offset into this of <TT>c</TT>, or -1 if not found.
     * @stable
     */
    inline int32_t lastIndexOf(UChar32 c,
                               int32_t start,
                               int32_t length) const;


    /* Character access */

    /**
     * Return the code unit at offset <tt>offset</tt>.
     * @param offset a valid offset into the text
     * @returns the code unit at offset <tt>offset</tt>
     * @stable
     */
    inline UChar charAt(int32_t offset) const;

    /**
     * Return the code unit at offset <tt>offset</tt>.
     * @param offset a valid offset into the text
     * @returns the code unit at offset <tt>offset</tt>
     * @stable
     */
    inline UChar operator [] (int32_t offset) const;

    /**
     * Return the code point that contains the code unit
     * at offset <tt>offset</tt>.
     * @param offset a valid offset into the text
     * that indicates the text offset of any of the code units
     * that will be assembled into a code point (21-bit value) and returned
     * @returns the code point of text at <tt>offset</tt>
     * @stable
     */
    inline UChar32 char32At(int32_t offset) const;

    /**
     * Adjust a random-access offset so that
     * it points to the beginning of a Unicode character.
     * The offset that is passed in points to
     * any code unit of a code point,
     * while the returned offset will point to the first code unit
     * of the same code point.
     * In UTF-16, if the input offset points to a iv_uiLength surrogate
     * of a surrogate pair, then the returned offset will point
     * to the first surrogate.
     * @param offset a valid offset into one code point of the text
     * @return offset of the first code unit of the same code point
     */
    inline int32_t getChar32Start(int32_t offset) const;

    /**
     * Adjust a random-access offset so that
     * it points behind a Unicode character.
     * The offset that is passed in points behind
     * any code unit of a code point,
     * while the returned offset will point behind the last code unit
     * of the same code point.
     * In UTF-16, if the input offset points behind the first surrogate
     * (i.e., to the iv_uiLength surrogate)
     * of a surrogate pair, then the returned offset will point
     * behind the iv_uiLength surrogate (i.e., to the first surrogate).
     * @param offset a valid offset after any code unit of a code point of the text
     * @return offset of the first code unit after the same code point
     */
    inline int32_t getChar32Limit(int32_t offset) const;

    /**
     * Move the code unit index along the string by delta code points.
     * Interpret the input index as a code unit-based offset into the string,
     * move the index forward or backward by delta code points, and
     * return the resulting index.
     * The input index should point to the first code unit of a code point,
     * if there is more than one.
     *
     * Both input and output indexes are code unit-based as for all
     * string indexes/offsets in ICU (and other libraries, like MBCS char*).
     * If delta<0 then the index is moved backward (toward the start of the string).
     * If delta>0 then the index is moved forward (toward the end of the string).
     *
     * This behaves like CharacterIterator::move32(delta, kCurrent).
     *
     * Examples:
     * <code>
     * // s has code points 'a' U+10000 'b' U+10ffff U+2029
     * UnicodeStringRef s=UNICODE_STRING("a\\U00010000b\\U0010ffff\\u2029", 31).unescape();
     *
     * // initial index: position of U+10000
     * int32_t index=1;
     *
     * // the following examples will all result in index==4, position of U+10ffff
     *
     * // skip 2 code points from some position in the string
     * index=s.moveIndex32(index, 2); // skips U+10000 and 'b'
     *
     * // go to the 3rd code point from the start of s (0-based)
     * index=s.moveIndex32(0, 3); // skips 'a', U+10000, and 'b'
     *
     * // go to the next-to-last code point of s
     *
     * index=s.moveIndex32(s.length(), -2); // backward-skips U+2029 and U+10ffff
     * </code>
     *
     * @param index input code unit index
     * @param delta (signed) code point count to move the index forward or backward
     *        in the string
     * @return the resulting code unit index
     */
    int32_t moveIndex32(int32_t index, int32_t delta) const;

    /* Substring extraction without conversion */

    /**
     * Copy the characters in the range
     * [<tt>start</tt>, <tt>start + length</tt>) into the array <tt>dst</tt>,
     * beginning at <tt>dstStart</tt>.
     * If the string aliases to <code>dst</code> itself as an external buffer,
     * then extract() will not copy the contents.
     *
     * @param start offset of first character which will be copied into the array
     * @param length the number of characters to extract
     * @param dst array in which to copy characters.  The length of <tt>dst</tt>
     * must be at least (<tt>dstStart + length</tt>).
     * @param dstStart the offset in <TT>dst</TT> where the first character
     * will be extracted
     * @stable
     */
    inline void extract(int32_t start,
                        int32_t length,
                        UChar *dst,
                        int32_t dstStart = 0) const;

    /**
     * Copy the characters in the range [<tt>start</tt>, <tt>limit</tt>)
     * into the array <tt>dst</tt>, beginning at <tt>dstStart</tt>.
     * @param start offset of first character which will be copied into the array
     * @param limit offset immediately following the last character to be copied
     * @param dst array in which to copy characters.  The length of <tt>dst</tt>
     * must be at least (<tt>dstStart + (limit - start)</tt>).
     * @param dstStart the offset in <TT>dst</TT> where the first character
     * will be extracted
     * @stable
     */
    inline void extractBetween(int32_t start,
                               int32_t limit,
                               UChar *dst,
                               int32_t dstStart = 0) const;

    /**
     * Copy the contents of the string into dst.
     * This is a convenience function that
     * checks if there is enough space in dst,
     * extracts the entire string if possible,
     * and NUL-terminates dst if possible.
     *
     * If the string fits into dst but cannot be NUL-terminated
     * (length()==dstCapacity) then the error code is set to U_STRING_NOT_TERMINATED_WARNING.
     * If the string itself does not fit into dst
     * (length()>dstCapacity) then the error code is set to U_BUFFER_OVERFLOW_ERROR.
     *
     * If the string aliases to <code>dst</code> itself as an external buffer,
     * then extract() will not copy the contents.
     *
     * @param dst Destination string buffer.
     * @param dstCapacity Number of UChars available at dst.
     * @param errorCode ICU error code.
     * @return length()
     */
    int32_t
    extract(UChar *dst, int32_t dstCapacity,
            UErrorCode &errorCode) const;

    /**
     * Copy the characters in the range
     * [<tt>start</tt>, <tt>start + length</tt>) into the  UnicodeString
     * <tt>dst</tt>.
     * @param start offset of first character which will be copied
     * @param length the number of characters to extract
     * @param dst UnicodeString into which to copy characters.
     * @return A reference to <TT>dst</TT>
     * @stable
     */
    inline void extract(int32_t start,
                        int32_t length,
                        icu::UnicodeString& dst) const;

    /**
     * Copy the characters in the range [<tt>start</tt>, <tt>limit</tt>)
     * into the UnicodeString <tt>dst</tt>.
     * @param start offset of first character which will be copied
     * @param limit offset immediately following the last character to be copied
     * @param dst UnicodeString into which to copy characters.
     * @return A reference to <TT>dst</TT>
     * @stable
     */
    inline void extractBetween(int32_t start,
                               int32_t limit,
                               icu::UnicodeString& dst) const;

    /* Substring extraction with conversion */

    /**
     * Copy the characters in the range
     * [<tt>start</TT>, <tt>start + length</TT>) into an array of characters
     * in a specified codepage.
     * The output string is NUL-terminated.
     *
     * @param start offset of first character which will be copied
     * @param startLength the number of characters to extract
     * @param target the target buffer for extraction
     * @param codepage the desired codepage for the characters.  0 has
     * the special meaning of the default codepage
     * If <code>codepage</code> is an empty string (<code>""</code>),
     * then a simple conversion is performed on the codepage-invariant
     * subset ("invariant characters") of the platform encoding. See utypes.h.
     * If <TT>target</TT> is NULL, then the number of bytes required for
     * <TT>target</TT> is returned.
     * NOTE: It is assumed that the target is big enough to fit all of the characters.
     * @return the output string length, not including the terminating NUL
     * @stable
     */
    inline int32_t extract(int32_t start,
                           int32_t startLength,
                           char *target,
                           const char *codepage = 0) const;

    /**
     * Copy the characters in the range
     * [<tt>start</TT>, <tt>start + length</TT>) into an array of characters
     * in a specified codepage.
     * This function does not write any more than <code>targetLength</code>
     * characters but returns the length of the entire output string
     * so that one can allocate a larger buffer and call the function again
     * if necessary.
     * The output string is NUL-terminated if possible.
     *
     * @param start offset of first character which will be copied
     * @param startLength the number of characters to extract
     * @param target the target buffer for extraction
     * @param targetLength the length of the target buffer
     * @param codepage the desired codepage for the characters.  0 has
     * the special meaning of the default codepage
     * If <code>codepage</code> is an empty string (<code>""</code>),
     * then a simple conversion is performed on the codepage-invariant
     * subset ("invariant characters") of the platform encoding. See utypes.h.
     * If <TT>target</TT> is NULL, then the number of bytes required for
     * <TT>target</TT> is returned.
     * @return the output string length, not including the terminating NUL
     * @stable
     */
    int32_t extract(int32_t start,
                    int32_t startLength,
                    char *target,
                    uint32_t targetLength,
                    const char *codepage = 0) const;

    /**
     * Convert the UnicodeStringRef into a codepage string using an existing UConverter.
     * The output string is NUL-terminated if possible.
     *
     * This function avoids the overhead of opening and closing a converter if
     * multiple strings are extracted.
     *
     * @param target destination string buffer, can be NULL if targetCapacity==0
     * @param targetCapacity the number of chars available at target
     * @param cnv the converter object to be used (ucnv_resetFromUnicode() will be called),
     *        or NULL for the default converter
     * @param errorCode normal ICU error code
     * @return the length of the output string, not counting the terminating NUL;
     *         if the length is greater than targetCapacity, then the string will not fit
     *         and a buffer of the indicated length would need to be passed in
     * @stable
     */
    int32_t extract(char *target, int32_t targetCapacity,
                    UConverter *cnv,
                    UErrorCode &errorCode) const;

    /**
     * Copy the characters in the range
     * [<tt>start</TT>, <tt>start + length</TT>) into a std::string object
     * in a specified codepage.
     * The output string is NUL-terminated.
     *
     * @param start offset of first character which will be copied
     * @param startLength the number of characters to extract
     * @param target the target string for extraction
     * @param codepage the desired codepage for the characters.  0 has
     * the special meaning of the default codepage.
     * If <code>codepage</code> is an empty string (<code>""</code>),
     * then a simple conversion is performed on the codepage-invariant
     * subset ("invariant characters") of the platform encoding. See utypes.h.
     * @return the output string length, not including the terminating NUL
     * @stable
     */
    int32_t extract(int32_t start,
                    int32_t startLength,
                    std::string & target,
                    const char *codepage = 0) const;

    /**
     * Copy all the characters in the string into an std::string object
     * in a specified codepage.  Equivalent to 
     * extract(0, length(), target, codepage)
     *
     * @param target the target string for extraction
     * @param codepage the desired codepage for the characters.
     * @return the output string length, not including the terminating NUL
     * @stable
     */
    inline int32_t extract(std::string & target,
                           const char *codepage = 0) const;

    /**
     * Copy all the characters in the string into an std::string object
     * in UTF-8.  Slightly more efficient than asUTF8() as avoids
     * one copy.
     *
     * @param target the target string for extraction
     * @return the output string length, not including the terminating NUL
     */
    int32_t extractUTF8(std::string & target) const;

    /**
     * Convert to a UTF8 string
     * @return a std::string
     */
    inline std::string asUTF8(void) const;

    /**
     * Release contents of string container allocated by extract methods
     * Useful when caller and callee use different heaps, 
     * e.g. when debug code uses a release library.
     * Is static so can be called on the <TT>UnicodeStringRef</TT> class directly.
     */
    static void release(std::string & target);

    /* Length operations */

    /**
     * Return the length of the UnicodeStringRef object.
     * The length is the number of characters in the text.
     * @returns the length of the UnicodeStringRef object
     * @stable
     */
    inline int32_t  length(void) const;

    /**
     * Count Unicode code points in the length UChar code units of the string.
     * A code point may occupy either one or two UChar code units.
     * Counting code points involves reading all code units.
     *
     * This functions is basically the inverse of moveIndex32().
     *
     * @param start the index of the first code unit to check
     * @param length the number of UChar code units to check
     * @return the number of code points in the specified code units
     */
    int32_t
    countChar32(int32_t start=0, int32_t length=0x7fffffff) const;

    /**
     * Determine if this string is empty.
     * @return TRUE if this string contains 0 characters, FALSE otherwise.
     */
    inline bool isEmpty(void) const;

    /**
     * Set the text in the UnicodeString object to the characters in
     * <TT>srcText</TT>.
     * <TT>srcText</TT> is not modified.
     * @param srcText the source for the new characters
     * @return a reference to this
     * @stable
     */
    inline UnicodeStringRef& setTo(const UnicodeStringRef& srcText);

    /**
     * Set the text in the UnicodeString object to the characters in
     * <TT>srcText</TT>.
     * <TT>srcText</TT> is not modified.
     * @param srcText the source for the new characters
     * @return a reference to this
     * @stable
     */
  inline UnicodeStringRef& setTo(const icu::UnicodeString& srcText);

    /**
     * Set the characters in the UnicodeString object to the characters
     * in <TT>srcChars</TT>. <TT>srcChars</TT> is not modified.
     * @param srcChars the source for the new characters
     * @param srcLength the number of Unicode characters in srcChars.
     * @return a reference to this
     * @stable
     */
    inline UnicodeStringRef& setTo(const UChar *srcChars,
                                   int32_t srcLength);
    /**
     * Print a single byte version to outStream.
     * The encoding is UTF-8 if outStream is directed to disk,
     * if outStream is cout our cerr the encoding is a Console-CCSID
     * that will allow most character to be readable in a shell/command window.
     */
    void toSingleByteStream(std::ostream & outStream) const;



  private:
    /* --- functions -------------------------------------------------------- */

    inline int8_t
    doCompare( int32_t start,
               int32_t length,
               const UnicodeStringRef& srcText,
               int32_t srcStart,
               int32_t srcLength) const;

    int8_t
    doCompare( int32_t start,
               int32_t length,
               const UChar *srcChars,
               int32_t srcStart,
               int32_t srcLength) const;
    inline int8_t
    doCompareCodePointOrder(int32_t start,
                            int32_t length,
                            const UnicodeStringRef& srcText,
                            int32_t srcStart,
                            int32_t srcLength) const;
    int8_t
    doCompareCodePointOrder(int32_t start,
                            int32_t length,
                            const UChar *srcChars,
                            int32_t srcStart,
                            int32_t srcLength) const;
    inline int8_t
    doCaseCompare(int32_t start,
                  int32_t length,
                  const UnicodeStringRef& srcText,
                  int32_t srcStart,
                  int32_t srcLength,
                  uint32_t options) const;

    int8_t
    doCaseCompare(int32_t start,
                  int32_t length,
                  const UChar *srcChars,
                  int32_t srcStart,
                  int32_t srcLength,
                  uint32_t options) const;
    int32_t doIndexOf(UChar c,
                      int32_t start,
                      int32_t length) const;
    int32_t doLastIndexOf(UChar c,
                          int32_t start,
                          int32_t length) const;

    inline void doExtract(int32_t start,
                          int32_t length,
                          UChar *dst,
                          int32_t dstStart) const;
    inline void doExtract(int32_t start,
                          int32_t length,
                          icu::UnicodeString& dst) const;

    inline void
    pinIndices(int32_t& start,
               int32_t& length) const;
    // constants
    enum {
      kInvalidUChar=0xffff // invalid UChar index
    };
    /* --- variables -------------------------------------------------------- */
    UChar const * iv_pUChars;
    int32_t        iv_uiLength;
  }
  ;  // class UnicodeStringRef

  ///Output stream support for UnicodeStringRef (Note: inside namespace)
  UIMA_LINK_IMPORTSPEC std::ostream &
  operator << (
    std::ostream                & outStream,
    const uima::UnicodeStringRef & crUStrRef
  );
} // namespace uima


/* ----------------------------------------------------------------------- */
/*  Implementation UnicodeStringRef                                        */
/* ----------------------------------------------------------------------- */

namespace uima {

  inline
  UnicodeStringRef::UnicodeStringRef( void ) :
      iv_pUChars(NULL),
      iv_uiLength(0) {}

  inline UnicodeStringRef::UnicodeStringRef(
    const icu::UnicodeString & crUniString
  ) :
      iv_pUChars(crUniString.getBuffer()),
      iv_uiLength(crUniString.length()) {}

  inline
  UnicodeStringRef::UnicodeStringRef(
    UChar const * cpacString
  ) :
      iv_pUChars(cpacString),
      iv_uiLength(cpacString==NULL ? 0 : u_strlen(cpacString)) {
    assert(   (EXISTS(iv_pUChars) )
              || ((iv_pUChars == NULL       ) && (iv_uiLength == 0)) );
  }

  inline
  UnicodeStringRef::UnicodeStringRef(
    UChar const * cpacString,
    int32_t        uiLength
  ) :
  iv_pUChars(cpacString),
  iv_uiLength(uiLength) {
    assert(   (EXISTS(iv_pUChars) )
              || ((iv_pUChars == NULL       ) && (iv_uiLength == 0)) );
  }

  inline
  UnicodeStringRef::UnicodeStringRef(
    UChar const * paucStringBegin,
    UChar const * paucStringEnd
  ) :
      iv_pUChars(paucStringBegin),
      iv_uiLength(paucStringEnd - paucStringBegin) {
    assert(EXISTS(paucStringBegin));
    assert(EXISTS(paucStringEnd));
    assert(paucStringEnd >= paucStringBegin);
    assert(   (EXISTS(iv_pUChars) )
              || ((iv_pUChars == NULL) && (iv_uiLength == 0)) );
  }

  inline int32_t
  UnicodeStringRef::length( void ) const {
    return iv_uiLength;
  }

  inline int32_t
  UnicodeStringRef::getSizeInBytes( void ) const {
    return (iv_uiLength * sizeof(UChar));
  }

  inline UChar
  UnicodeStringRef::operator[]( int32_t uiIndex ) const {
    assert(uiIndex < iv_uiLength);
    assert(EXISTS(iv_pUChars));
    return iv_pUChars[uiIndex];  //lint !e613: Possible use of null pointer 'UnicodeStringRef<wchar_t>::iv_pUChars' in left argument to operator '['
  }

  inline int
  UnicodeStringRef::operator==( const UnicodeStringRef & crclRHS ) const {
    if (iv_uiLength != crclRHS.iv_uiLength) {
      return false;
    }
    return u_strncmp(iv_pUChars, crclRHS.iv_pUChars, iv_uiLength) == 0;
  }

  inline int
  UnicodeStringRef::operator!=( const UnicodeStringRef & crclRHS ) const {
    return !((*this)==crclRHS);
  }

  inline UnicodeStringRef &
  UnicodeStringRef::operator=( UnicodeStringRef const & crclRHS ) {
    iv_pUChars = crclRHS.iv_pUChars;
    iv_uiLength = crclRHS.iv_uiLength;
    return (*this);
  }

//========================================
// Read-only alias methods
//========================================
  inline void
  UnicodeStringRef::pinIndices(int32_t& start,
                               int32_t& length) const {
    // pin indices
    if (start > iv_uiLength) {
      start = iv_uiLength;
    }
    if (length > (iv_uiLength - start)) {
      length = (iv_uiLength - start);
    }
  }

  inline bool
  UnicodeStringRef::operator> (const UnicodeStringRef& text) const {
    return doCompare(0, iv_uiLength, text, 0, text.iv_uiLength) == 1;
  }

  inline bool
  UnicodeStringRef::operator< (const UnicodeStringRef& text) const {
    return doCompare(0, iv_uiLength, text, 0, text.iv_uiLength) == -1;
  }

  inline bool
  UnicodeStringRef::operator>= (const UnicodeStringRef& text) const {
    return doCompare(0, iv_uiLength, text, 0, text.iv_uiLength) != -1;
  }

  inline bool
  UnicodeStringRef::operator<= (const UnicodeStringRef& text) const {
    return doCompare(0, iv_uiLength, text, 0, text.iv_uiLength) != 1;
  }

  inline int8_t
  UnicodeStringRef::compare(const UnicodeStringRef& text) const {
    return doCompare(0, iv_uiLength, text, 0, text.iv_uiLength);
  }

  inline int8_t
  UnicodeStringRef::compare(int32_t start,
                            int32_t length,
                            const UnicodeStringRef& srcText) const {
    return doCompare(start, length, srcText, 0, srcText.iv_uiLength);
  }

  inline int8_t
  UnicodeStringRef::compare(const UChar *srcChars,
                            int32_t srcLength) const {
    return doCompare(0, iv_uiLength, srcChars, 0, srcLength);
  }


  inline int8_t
  UnicodeStringRef::compare(icu::UnicodeString const  &src ) const {
    return doCompare(0, iv_uiLength, src.getBuffer(), 0, src.length());
  }


  inline int8_t
  UnicodeStringRef::compare(int32_t start,
                            int32_t length,
                            const UChar *srcChars) const {
    return doCompare(start, length, srcChars, 0, length);
  }

  inline int8_t
  UnicodeStringRef::compare(int32_t start,
                            int32_t length,
                            const UChar *srcChars,
                            int32_t srcStart,
                            int32_t srcLength) const {
    return doCompare(start, length, srcChars, srcStart, srcLength);
  }

  inline int8_t
  UnicodeStringRef::compare(int32_t start,
                            int32_t length,
                            const UnicodeStringRef& srcText,
                            int32_t srcStart,
                            int32_t srcLength) const {
    return doCompare(start, length, srcText, srcStart, srcLength);
  }

  inline int8_t
  UnicodeStringRef::compareBetween(int32_t start,
                                   int32_t limit,
                                   const UnicodeStringRef& srcText,
                                   int32_t srcStart,
                                   int32_t srcLimit) const {
    return doCompare(start, limit - start,
                     srcText, srcStart, srcLimit - srcStart);
  }

  inline int8_t
  UnicodeStringRef::doCompare(int32_t start,
                              int32_t length,
                              const UnicodeStringRef& srcText,
                              int32_t srcStart,
                              int32_t srcLength) const {
    const UChar *srcChars = srcText.getBuffer();
    return doCompare(start, length, srcChars, srcStart, srcLength);
  }

  inline int8_t
  UnicodeStringRef::compareCodePointOrder(const UnicodeStringRef& text) const {
    return doCompareCodePointOrder(0, iv_uiLength, text, 0, text.iv_uiLength);
  }

  inline int8_t
  UnicodeStringRef::compareCodePointOrder(int32_t start,
                                          int32_t length,
                                          const UnicodeStringRef& srcText) const {
    return doCompareCodePointOrder(start, length, srcText, 0, srcText.iv_uiLength);
  }

  inline int8_t
  UnicodeStringRef::compareCodePointOrder(const UChar *srcChars,
                                          int32_t srcLength) const {
    return doCompareCodePointOrder(0, iv_uiLength, srcChars, 0, srcLength);
  }

  inline int8_t
  UnicodeStringRef::compareCodePointOrder(int32_t start,
                                          int32_t length,
                                          const UnicodeStringRef& srcText,
                                          int32_t srcStart,
                                          int32_t srcLength) const {
    return doCompareCodePointOrder(start, length, srcText, srcStart, srcLength);
  }

  inline int8_t
  UnicodeStringRef::compareCodePointOrder(int32_t start,
                                          int32_t length,
                                          const UChar *srcChars) const {
    return doCompareCodePointOrder(start, length, srcChars, 0, length);
  }

  inline int8_t
  UnicodeStringRef::compareCodePointOrder(int32_t start,
                                          int32_t length,
                                          const UChar *srcChars,
                                          int32_t srcStart,
                                          int32_t srcLength) const {
    return doCompareCodePointOrder(start, length, srcChars, srcStart, srcLength);
  }

  inline int8_t
  UnicodeStringRef::compareCodePointOrderBetween(int32_t start,
      int32_t limit,
      const UnicodeStringRef& srcText,
      int32_t srcStart,
      int32_t srcLimit) const {
    return doCompareCodePointOrder(start, limit - start,
                                   srcText, srcStart, srcLimit - srcStart);
  }

  inline int8_t
  UnicodeStringRef::doCompareCodePointOrder(int32_t start,
      int32_t length,
      const UnicodeStringRef& srcText,
      int32_t srcStart,
      int32_t srcLength) const {
    const UChar *srcChars = srcText.getBuffer();
    return doCompareCodePointOrder(start, length, srcChars, srcStart, srcLength);
  }

  inline int8_t
  UnicodeStringRef::caseCompare(const UnicodeStringRef &text, uint32_t options) const {
    return doCaseCompare(0, iv_uiLength, text, 0, text.iv_uiLength, options);
  }

  inline int8_t
  UnicodeStringRef::caseCompare(int32_t start,
                                int32_t length,
                                const UnicodeStringRef &srcText,
                                uint32_t options) const {
    return doCaseCompare(start, length, srcText, 0, srcText.iv_uiLength, options);
  }

  inline int8_t
  UnicodeStringRef::caseCompare(const UChar *srcChars,
                                int32_t srcLength,
                                uint32_t options) const {
    return doCaseCompare(0, iv_uiLength, srcChars, 0, srcLength, options);
  }

  inline int8_t
  UnicodeStringRef::caseCompare(int32_t start,
                                int32_t length,
                                const UnicodeStringRef &srcText,
                                int32_t srcStart,
                                int32_t srcLength,
                                uint32_t options) const {
    return doCaseCompare(start, length, srcText, srcStart, srcLength, options);
  }

  inline int8_t
  UnicodeStringRef::caseCompare(int32_t start,
                                int32_t length,
                                const UChar *srcChars,
                                uint32_t options) const {
    return doCaseCompare(start, length, srcChars, 0, length, options);
  }

  inline int8_t
  UnicodeStringRef::caseCompare(int32_t start,
                                int32_t length,
                                const UChar *srcChars,
                                int32_t srcStart,
                                int32_t srcLength,
                                uint32_t options) const {
    return doCaseCompare(start, length, srcChars, srcStart, srcLength, options);
  }

  inline int8_t
  UnicodeStringRef::caseCompareBetween(int32_t start,
                                       int32_t limit,
                                       const UnicodeStringRef &srcText,
                                       int32_t srcStart,
                                       int32_t srcLimit,
                                       uint32_t options) const {
    return doCaseCompare(start, limit - start, srcText, srcStart, srcLimit - srcStart, options);
  }

  inline int8_t
  UnicodeStringRef::doCaseCompare(int32_t start,
                                  int32_t length,
                                  const UnicodeStringRef &srcText,
                                  int32_t srcStart,
                                  int32_t srcLength,
                                  uint32_t options) const {
    const UChar *srcChars = srcText.getBuffer();
    return doCaseCompare(start, length, srcChars, srcStart, srcLength, options);
  }

  inline int32_t
  UnicodeStringRef::indexOf(const UnicodeStringRef& text) const {
    return indexOf(text, 0, text.iv_uiLength, 0, iv_uiLength);
  }

  inline int32_t
  UnicodeStringRef::indexOf(const UnicodeStringRef& text,
                            int32_t start) const {
    return indexOf(text, 0, text.iv_uiLength, start, iv_uiLength - start);
  }

  inline int32_t
  UnicodeStringRef::indexOf(const UnicodeStringRef& text,
                            int32_t start,
                            int32_t length) const {
    return indexOf(text, 0, text.iv_uiLength, start, length);
  }

  inline int32_t
  UnicodeStringRef::indexOf(const UnicodeStringRef& srcText,
                            int32_t srcStart,
                            int32_t srcLength,
                            int32_t start,
                            int32_t length) const {
    return indexOf(srcText.getBuffer(), srcStart, srcLength, start, length);
  }

  inline int32_t
  UnicodeStringRef::indexOf(const UChar *srcChars,
                            int32_t srcLength,
                            int32_t start) const {
    return indexOf(srcChars, 0, srcLength, start, iv_uiLength - start);
  }

  inline int32_t
  UnicodeStringRef::indexOf(const UChar *srcChars,
                            int32_t srcLength,
                            int32_t start,
                            int32_t length) const {
    return indexOf(srcChars, 0, srcLength, start, length);
  }

  inline int32_t
  UnicodeStringRef::indexOf(UChar c) const {
    return doIndexOf(c, 0, iv_uiLength);
  }

  inline int32_t
  UnicodeStringRef::indexOf(UChar32 c) const {
    if (!UTF_NEED_MULTIPLE_UCHAR(c)) {
      return doIndexOf((UChar)c, 0, iv_uiLength);
    } else {
      UChar buffer[UTF_MAX_CHAR_LENGTH];
      int32_t length = 0;
      UTF_APPEND_CHAR_UNSAFE(buffer, length, c);
      return indexOf(buffer, length, 0);
    }
  }

  inline int32_t
  UnicodeStringRef::indexOf(UChar c,
                            int32_t start) const {
    return doIndexOf(c, start, iv_uiLength - start);
  }

  inline int32_t
  UnicodeStringRef::indexOf(UChar32 c,
                            int32_t start) const {
    if (!UTF_NEED_MULTIPLE_UCHAR(c)) {
      return doIndexOf((UChar)c, start, iv_uiLength - start);
    } else {
      UChar buffer[UTF_MAX_CHAR_LENGTH];
      int32_t length = 0;
      UTF_APPEND_CHAR_UNSAFE(buffer, length, c);
      return indexOf(buffer, length, start);
    }
  }

  inline int32_t
  UnicodeStringRef::indexOf(UChar c,
                            int32_t start,
                            int32_t length) const {
    return doIndexOf(c, start, length);
  }

  inline int32_t
  UnicodeStringRef::indexOf(UChar32 c,
                            int32_t start,
                            int32_t length) const {
    if (!UTF_NEED_MULTIPLE_UCHAR(c)) {
      return doIndexOf((UChar)c, start, length);
    } else {
      UChar buffer[UTF_MAX_CHAR_LENGTH];
      int32_t cLength = 0;
      UTF_APPEND_CHAR_UNSAFE(buffer, cLength, c);
      return indexOf(buffer, cLength, start, length);
    }
  }

  inline int32_t
  UnicodeStringRef::lastIndexOf(const UnicodeStringRef& text) const {
    return lastIndexOf(text, 0, text.iv_uiLength, 0, iv_uiLength);
  }

  inline int32_t
  UnicodeStringRef::lastIndexOf(const UnicodeStringRef& text,
                                int32_t start) const {
    return lastIndexOf(text, 0, text.iv_uiLength, start, iv_uiLength - start);
  }

  inline int32_t
  UnicodeStringRef::lastIndexOf(const UnicodeStringRef& text,
                                int32_t start,
                                int32_t length) const {
    return lastIndexOf(text, 0, text.iv_uiLength, start, length);
  }

  inline int32_t
  UnicodeStringRef::lastIndexOf(const UnicodeStringRef& srcText,
                                int32_t srcStart,
                                int32_t srcLength,
                                int32_t start,
                                int32_t length) const {
    return lastIndexOf(srcText.getBuffer(), srcStart, srcLength, start, length);
  }

  inline int32_t
  UnicodeStringRef::lastIndexOf(const UChar *srcChars,
                                int32_t srcLength,
                                int32_t start) const {
    return lastIndexOf(srcChars, 0, srcLength, start, iv_uiLength - start);
  }

  inline int32_t
  UnicodeStringRef::lastIndexOf(const UChar *srcChars,
                                int32_t srcLength,
                                int32_t start,
                                int32_t length) const {
    return lastIndexOf(srcChars, 0, srcLength, start, length);
  }

  inline int32_t
  UnicodeStringRef::lastIndexOf(UChar c) const {
    return doLastIndexOf(c, 0, iv_uiLength);
  }

  inline int32_t
  UnicodeStringRef::lastIndexOf(UChar32 c) const {
    if (!UTF_NEED_MULTIPLE_UCHAR(c)) {
      return doLastIndexOf((UChar)c, 0, iv_uiLength);
    } else {
      UChar buffer[UTF_MAX_CHAR_LENGTH];
      int32_t count = 0;
      UTF_APPEND_CHAR_UNSAFE(buffer, count, c);
      return lastIndexOf(buffer, count, 0);
    }
  }

  inline int32_t
  UnicodeStringRef::lastIndexOf(UChar c,
                                int32_t start) const {
    return doLastIndexOf(c, start, iv_uiLength - start);
  }

  inline int32_t
  UnicodeStringRef::lastIndexOf(UChar32 c,
                                int32_t start) const {
    if (!UTF_NEED_MULTIPLE_UCHAR(c)) {
      return doLastIndexOf((UChar)c, start, iv_uiLength - start);
    } else {
      UChar buffer[UTF_MAX_CHAR_LENGTH];
      int32_t count = 0;
      UTF_APPEND_CHAR_UNSAFE(buffer, count, c);
      return lastIndexOf(buffer, count, start);
    }
  }

  inline int32_t
  UnicodeStringRef::lastIndexOf(UChar c,
                                int32_t start,
                                int32_t length) const {
    return doLastIndexOf(c, start, length);
  }

  inline int32_t
  UnicodeStringRef::lastIndexOf(UChar32 c,
                                int32_t start,
                                int32_t length) const {
    if (!UTF_NEED_MULTIPLE_UCHAR(c)) {
      return doLastIndexOf((UChar)c, start, length);
    } else {
      UChar buffer[UTF_MAX_CHAR_LENGTH];
      int32_t count = 0;
      UTF_APPEND_CHAR_UNSAFE(buffer, count, c);
      return lastIndexOf(buffer, count, start, length);
    }
  }

  inline bool
  UnicodeStringRef::startsWith(const UnicodeStringRef& text) const {
    return compare(0, text.iv_uiLength, text, 0, text.iv_uiLength) == 0;
  }

  inline bool
  UnicodeStringRef::startsWith(const UnicodeStringRef& srcText,
                               int32_t srcStart,
                               int32_t srcLength) const {
    return doCompare(0, srcLength, srcText, srcStart, srcLength) == 0;
  }

  inline bool
  UnicodeStringRef::startsWith(const UChar *srcChars,
                               int32_t srcLength) const {
    return doCompare(0, srcLength, srcChars, 0, srcLength) == 0;
  }

  inline bool
  UnicodeStringRef::startsWith(const UChar *srcChars,
                               int32_t srcStart,
                               int32_t srcLength) const {
    return doCompare(0, srcLength, srcChars, srcStart, srcLength) == 0;
  }

  inline bool
  UnicodeStringRef::endsWith(const UnicodeStringRef& text) const {
    return doCompare(iv_uiLength - text.iv_uiLength, text.iv_uiLength,
                     text, 0, text.iv_uiLength) == 0;
  }

  inline bool
  UnicodeStringRef::endsWith(const UnicodeStringRef& srcText,
                             int32_t srcStart,
                             int32_t srcLength) const {
    return doCompare(iv_uiLength - srcLength, srcLength,
                     srcText, srcStart, srcLength) == 0;
  }

  inline bool
  UnicodeStringRef::endsWith(const UChar *srcChars,
                             int32_t srcLength) const {
    return doCompare(iv_uiLength - srcLength, srcLength,
                     srcChars, 0, srcLength) == 0;
  }

  inline bool
  UnicodeStringRef::endsWith(const UChar *srcChars,
                             int32_t srcStart,
                             int32_t srcLength) const {
    return doCompare(iv_uiLength - srcLength, srcLength,
                     srcChars, srcStart, srcLength) == 0;
  }

// ============================
// extract implementations (some in .cpp)
// ============================
  inline void
  UnicodeStringRef::extract(int32_t start,
                            int32_t length,
                            UChar *dst,
                            int32_t dstStart) const {
    pinIndices(start, length);
    memcpy(dst+dstStart, getBuffer()+start, length*sizeof(UChar));
  }


  inline void
  UnicodeStringRef::extract(int32_t start,
                            int32_t length,
                            icu::UnicodeString& target) const {
    target.replace(0, target.length(), getBuffer(), start, length);
  }
// Replaces all of target by substring of src
// Could use setTo(getBuffer()+start,length) but that is implemented as a replace

  inline void
  UnicodeStringRef::extractBetween(int32_t start,
                                   int32_t limit,
                                   UChar *dst,
                                   int32_t dstStart) const {
    extract(start, limit - start, dst, dstStart);
  }

  inline void
  UnicodeStringRef::extractBetween(int32_t start,
                                   int32_t limit,
                                   icu::UnicodeString& dst) const {
    extract(start, limit - start, dst);
  }



  inline int32_t
  UnicodeStringRef::extract(int32_t start,
                            int32_t length,
                            char *target,
                            const char *codepage) const {
    // User-beware ... assumes target buffer is large enough
    // Capacity assumed to be either large, or 0 if no buffer provided (pre-flighting)
    return extract(start, length, target, target!=0 ? 0xffffffff : 0, codepage);
  }

  inline int32_t
  UnicodeStringRef::extract(std::string & target,
                            const char *codepage) const {
    return extract(0, iv_uiLength, target, codepage);
  }

  inline std::string
  UnicodeStringRef::asUTF8(void) const {
    std::string target;
    extractUTF8(target);
    return target;
  }

  inline UChar
  UnicodeStringRef::charAt(int32_t offset) const {
    assert(EXISTS(iv_pUChars));
    if ((uint32_t)offset < (uint32_t)iv_uiLength) {
      return iv_pUChars[offset];
    } else {
      return kInvalidUChar;
    }
  }

  inline UChar32
  UnicodeStringRef::char32At(int32_t offset) const {
    if ((uint32_t)offset < (uint32_t)iv_uiLength) {
      UChar32 c;
      UTF_GET_CHAR(iv_pUChars, 0, offset, iv_uiLength, c);
      return c;
    } else {
      return kInvalidUChar;
    }
  }

  inline int32_t
  UnicodeStringRef::getChar32Start(int32_t offset) const {
    if ((uint32_t)offset < (uint32_t)iv_uiLength) {
      UTF_SET_CHAR_START(iv_pUChars, 0, offset);
      return offset;
    } else {
      return 0;
    }
  }

  inline int32_t
  UnicodeStringRef::getChar32Limit(int32_t offset) const {
    if ((uint32_t)offset < (uint32_t)iv_uiLength) {
      UTF_SET_CHAR_LIMIT(iv_pUChars, 0, offset, iv_uiLength);
      return offset;
    } else {
      return iv_uiLength;
    }
  }

  inline bool
  UnicodeStringRef::isEmpty() const {
    return iv_uiLength == 0;
  }

  inline UChar const *
  UnicodeStringRef::getBuffer() const {
    return iv_pUChars;
  }

  inline int8_t
  UnicodeStringRef::doCaseCompare(int32_t start,
                                  int32_t length,
                                  const UChar *srcChars,
                                  int32_t srcStart,
                                  int32_t srcLength,
                                  uint32_t options) const {
    icu::UnicodeString s(iv_pUChars+start, (int32_t)length);
    return s.caseCompare(srcChars + srcStart, (int32_t)srcLength, options);
  }

  inline UnicodeStringRef& UnicodeStringRef::setTo(const UnicodeStringRef& srcText) {
    iv_pUChars  = srcText.iv_pUChars;
    iv_uiLength = srcText.iv_uiLength;
    return (*this);
  }

  inline UnicodeStringRef& UnicodeStringRef::setTo(const icu::UnicodeString& srcText) {
    iv_pUChars  = srcText.getBuffer();
    iv_uiLength = srcText.length();
    return (*this);
  }

  inline UnicodeStringRef& UnicodeStringRef::setTo(const UChar *srcChars, int32_t srcLength) {
    iv_pUChars  = srcChars;
    iv_uiLength = srcLength;
    return (*this);
  }

  UIMA_LINK_IMPORTSPEC std::ostream &
  operator << (
    std::ostream           & rclOStream,
    const UnicodeStringRef & crclLString
  );



  /* ----------------------------------------------------------------------- */
  /** @name vector to/from delimited string conversion routines              */
  /* ----------------------------------------------------------------------- */
  /*@{*/

  /**
     Removes whitespace from both ends of a string.
     Template function using <TT>isspace_templ()</TT>.
  */
  inline UnicodeStringRef
  strtrim(
    const UnicodeStringRef & s
  ) {
    if (s.length() == 0) {
      return s;
    }
    UChar const * beg = s.getBuffer();
    UChar const * end = s.getBuffer()+s.length()-1;
    while (end >= beg && u_isspace(*end) ) {
      --end;
    }
    while (beg < end && u_isspace(*beg) ) {
      ++beg;
    }
    return UnicodeStringRef(beg, end-beg+1);
  }

  /**
     Splits a delimited string into pieces and stores the results in a vector
     of strings. Delimiters are passed as a zero terminated string.

     @param rveclstrOutput      (Output) The vector where the results are stored
     @param pcInput             The delimited string to split.
     @param uiInputLength       The number of chars in pcInput
     @param cpszDelimiters      The delimiters. UChar* are interpreted as a set of delimiters.
     @param bTrimString         Flag: If true, all pieces will be trimmed before storing in <TT>storeVar</TT>
     @param bInsertEmptyStrings Flag: If false, pieces that have length 0 will not be stored in  <TT>storeVar</TT>

     @return The number of strings added to <TT>rvecstrOutput</TT>
  */
  UIMA_LINK_IMPORTSPEC int32_t
  delimitedUnicodeStringRef2Vector(
    std::vector< uima::UnicodeStringRef > & rveclstrOutput,
    const UChar                          * pcInput,
    int32_t                                 uiInputLength,
    const UChar                          * cpszDelimiters,
    bool                                   bTrimString,
    bool                                   bInsertEmptyStrings
  );

  inline int32_t
  delimitedUnicodeStringRef2Vector(
    std::vector< UnicodeStringRef > & veclstrOutput,
    const UChar                     * pcInput,
    const UChar                     * cpszDelimiters,
    bool                              bTrimString,
    bool                              bInsertEmptyStrings
  ) {
    return delimitedUnicodeStringRef2Vector(veclstrOutput, pcInput, u_strlen(pcInput), cpszDelimiters, bTrimString, bInsertEmptyStrings);
  }

//@}

} // namespace uima

#endif /* UIMA_UNICODESTRINGREF_HPP */

/* <EOF> */

