| #ifndef UIMA_UNICODESTRINGREF_HPP |
| #define UIMA_UNICODESTRINGREF_HPP |
| /** \file unistrref.hpp . |
| ----------------------------------------------------------------------------- |
| |
| |
| |
| string interface of uima::UnicodeStringRef |
| |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, |
| * software distributed under the License is distributed on an |
| * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| * KIND, either express or implied. See the License for the |
| * specific language governing permissions and limitations |
| * under the License. |
| |
| ----------------------------------------------------------------------------- |
| |
| |
| \brief Shallow string object consisting of a pair of string pointer and a length |
| |
| ----------------------------------------------------------------------------- |
| */ |
| |
| #include "uima/pragmas.hpp" //must be included first to disable warnings |
| |
| #include <vector> |
| #include <string> |
| #include <iostream> |
| |
| #include "uima/types.h" |
| #include "uima/assertmsg.h" |
| #include "uima/ccsid.hpp" |
| #include "unicode/unistr.h" |
| #include "unicode/ustring.h" |
| #include "unicode/uchar.h" |
| #include "uima/strtools.hpp" |
| |
| /* ----------------------------------------------------------------------- */ |
| /* Interface dependencies */ |
| /* ----------------------------------------------------------------------- */ |
| |
| /* ----------------------------------------------------------------------- */ |
| /* Types / Classes */ |
| /* ----------------------------------------------------------------------- */ |
| |
| namespace uima { |
| |
| /** |
| * The class <TT>UnicodeStringRef</TT> provides support for non zero-terminated |
| * strings that are presented as pointers to Unicode character arrays |
| * with an associated length. |
| * As this type of string is supposed to be used only as string reference into |
| * read-only buffers, the string pointer is constant. |
| * The member functions are named to implement the icu::UnicodeString interface |
| * but only providing const member functions |
| * This class is a quick ,light-weight, shallow string |
| * (internally it consists only of a pointer and a length) |
| * which can be copied by value without performance penalty. |
| * It allows references into other string buffers to be treated like real |
| * string objects. |
| * Since it does not own it's string memory care must be taken to make sure |
| * the lifetime of an UnicodeStringRef object does not exceed the lifetime |
| * of the Unicode character buffer it references. |
| */ |
| class UIMA_LINK_IMPORTSPEC UnicodeStringRef { |
| public: |
| /** |
| * Default Constructor |
| */ |
| UnicodeStringRef( void ); |
| |
| /** |
| * Constructor from icu::UnicodeString |
| */ |
| UnicodeStringRef( const icu::UnicodeString & crUniString ); |
| |
| /** |
| * Constructor from zero terminated string |
| */ |
| explicit UnicodeStringRef( UChar const * cpacString ); |
| |
| /** |
| * Constructor from string and length |
| */ |
| UnicodeStringRef( UChar const * cpacString, int32_t uiLength ); |
| |
| /** |
| * Constructor from a two pointers (begin/end). |
| * Note: end points to the first char <em>behind</em> the string. |
| * @deprecated Replace with UnicodeStringRef(paucStringBegin,paucStringEnd-paucStringBegin). |
| */ |
| UnicodeStringRef( UChar const * paucStringBegin, UChar const * paucStringEnd ); |
| |
| ///Accessor for the number of bytes occupied by this string |
| int32_t getSizeInBytes( void ) const; |
| |
| ///CONST Accessor for the string content (NOT ZERO DELIMITED!). |
| UChar const * getBuffer( void ) const; |
| |
| ///Assignment operator |
| UnicodeStringRef & operator=( UnicodeStringRef const & crclRHS ); |
| |
| ///Equality operator |
| int operator==( const UnicodeStringRef & crclRHS ) const; |
| ///Inequality operator |
| int operator!=( const UnicodeStringRef & crclRHS ) const; |
| ///less operator |
| bool operator< ( UnicodeStringRef const & text ) const; |
| ///less equal operator |
| bool operator<=( UnicodeStringRef const & text ) const; |
| ///greater operator |
| bool operator> ( UnicodeStringRef const & text ) const; |
| ///greater equal operator |
| bool operator>=( UnicodeStringRef const & text ) const; |
| |
| /** |
| * Compare the characters bitwise in this UnicodeStringRef to |
| * the characters in <TT>text</TT>. |
| * @param text The UnicodeStringRef to compare to this one. |
| * @return The result of bitwise character comparison: 0 if <TT>text</TT> |
| * contains the same characters as this, -1 if the characters in |
| * <TT>text</TT> are bitwise less than the characters in this, +1 if the |
| * characters in <TT>text</TT> are bitwise greater than the characters |
| * in this. |
| * @stable |
| */ |
| inline int8_t compare(const UnicodeStringRef& text) const; |
| |
| /** |
| * Compare the characters bitwise in this UnicodeStringRef to |
| * the characters in <TT>text</TT>. |
| * @param text The UnicodeString to compare to this one. |
| * @return The result of bitwise character comparison: 0 if <TT>text</TT> |
| * contains the same characters as this, -1 if the characters in |
| * <TT>text</TT> are bitwise less than the characters in this, +1 if the |
| * characters in <TT>text</TT> are bitwise greater than the characters |
| * in this. |
| * @stable |
| */ |
| inline int8_t compare(const icu::UnicodeString& text) const; |
| |
| /** |
| * Compare the characters bitwise in the range |
| * [<TT>start</TT>, <TT>start + length</TT>) with the characters |
| * in <TT>srcText</TT> |
| * @param start the offset at which the compare operation begins |
| * @param length the number of characters of text to compare. |
| * @param srcText the text to be compared |
| * @return The result of bitwise character comparison: 0 if <TT>text</TT> |
| * contains the same characters as this, -1 if the characters in |
| * <TT>text</TT> are bitwise less than the characters in this, +1 if the |
| * characters in <TT>text</TT> are bitwise greater than the characters |
| * in this. |
| * @stable |
| */ |
| inline int8_t compare(int32_t start, |
| int32_t length, |
| const UnicodeStringRef& srcText) const; |
| |
| /** |
| * Compare the characters bitwise in the range |
| * [<TT>start</TT>, <TT>start + length</TT>) with the characters |
| * in <TT>srcText</TT> in the range |
| * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>). |
| * @param start the offset at which the compare operation begins |
| * @param length the number of characters in this to compare. |
| * @param srcText the text to be compared |
| * @param srcStart the offset into <TT>srcText</TT> to start comparison |
| * @param srcLength the number of characters in <TT>src</TT> to compare |
| * @return The result of bitwise character comparison: 0 if <TT>text</TT> |
| * contains the same characters as this, -1 if the characters in |
| * <TT>text</TT> are bitwise less than the characters in this, +1 if the |
| * characters in <TT>text</TT> are bitwise greater than the characters |
| * in this. |
| * @stable |
| */ |
| inline int8_t compare(int32_t start, |
| int32_t length, |
| const UnicodeStringRef& srcText, |
| int32_t srcStart, |
| int32_t srcLength) const; |
| |
| /** |
| * Compare the characters bitwise in this UnicodeStringRef with the first |
| * <TT>srcLength</TT> characters in <TT>srcChars</TT>. |
| * @param srcChars The characters to compare to this UnicodeStringRef. |
| * @param srcLength the number of characters in <TT>srcChars</TT> to compare |
| * @return The result of bitwise character comparison: 0 if <TT>text</TT> |
| * contains the same characters as this, -1 if the characters in |
| * <TT>text</TT> are bitwise less than the characters in this, +1 if the |
| * characters in <TT>text</TT> are bitwise greater than the characters |
| * in this. |
| * @stable |
| */ |
| inline int8_t compare(UChar const *srcChars, |
| int32_t srcLength) const; |
| |
| /** |
| * Compare the characters bitwise in the range |
| * [<TT>start</TT>, <TT>start + length</TT>) with the first |
| * <TT>length</TT> characters in <TT>srcChars</TT> |
| * @param start the offset at which the compare operation begins |
| * @param length the number of characters to compare. |
| * @param srcChars the characters to be compared |
| * @return The result of bitwise character comparison: 0 if <TT>text</TT> |
| * contains the same characters as this, -1 if the characters in |
| * <TT>text</TT> are bitwise less than the characters in this, +1 if the |
| * characters in <TT>text</TT> are bitwise greater than the characters |
| * in this. |
| * @stable |
| */ |
| inline int8_t compare(int32_t start, |
| int32_t length, |
| UChar const *srcChars) const; |
| |
| /** |
| * Compare the characters bitwise in the range |
| * [<TT>start</TT>, <TT>start + length</TT>) with the characters |
| * in <TT>srcChars</TT> in the range |
| * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>). |
| * @param start the offset at which the compare operation begins |
| * @param length the number of characters in this to compare |
| * @param srcChars the characters to be compared |
| * @param srcStart the offset into <TT>srcChars</TT> to start comparison |
| * @param srcLength the number of characters in <TT>srcChars</TT> to compare |
| * @return The result of bitwise character comparison: 0 if <TT>text</TT> |
| * contains the same characters as this, -1 if the characters in |
| * <TT>text</TT> are bitwise less than the characters in this, +1 if the |
| * characters in <TT>text</TT> are bitwise greater than the characters |
| * in this. |
| * @stable |
| */ |
| inline int8_t compare(int32_t start, |
| int32_t length, |
| UChar const *srcChars, |
| int32_t srcStart, |
| int32_t srcLength) const; |
| |
| /** |
| * Compare the characters bitwise in the range |
| * [<TT>start</TT>, <TT>limit</TT>) with the characters |
| * in <TT>srcText</TT> in the range |
| * [<TT>srcStart</TT>, <TT>srcLimit</TT>). |
| * @param start the offset at which the compare operation begins |
| * @param limit the offset immediately following the compare operation |
| * @param srcText the text to be compared |
| * @param srcStart the offset into <TT>srcText</TT> to start comparison |
| * @param srcLimit the offset into <TT>srcText</TT> to limit comparison |
| * @return The result of bitwise character comparison: 0 if <TT>text</TT> |
| * contains the same characters as this, -1 if the characters in |
| * <TT>text</TT> are bitwise less than the characters in this, +1 if the |
| * characters in <TT>text</TT> are bitwise greater than the characters |
| * in this. |
| * @stable |
| */ |
| inline int8_t compareBetween(int32_t start, |
| int32_t limit, |
| const UnicodeStringRef& srcText, |
| int32_t srcStart, |
| int32_t srcLimit) const; |
| |
| /** |
| * Compare two Unicode strings in code point order. |
| * This is different in UTF-16 from how compare(), operator==, startsWith() etc. work |
| * if supplementary characters are present: |
| * |
| * In UTF-16, supplementary characters (with code points U+10000 and above) are |
| * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff, |
| * which means that they compare as less than some other BMP characters like U+feff. |
| * This function compares Unicode strings in code point order. |
| * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined. |
| * |
| * @param text Another string to compare this one to. |
| * @return a negative/zero/positive integer corresponding to whether |
| * this string is less than/equal to/greater than the second one |
| * in code point order |
| */ |
| inline int8_t compareCodePointOrder(const UnicodeStringRef& text) const; |
| |
| /** |
| * Compare two Unicode strings in code point order. |
| * This is different in UTF-16 from how compare(), operator==, startsWith() etc. work |
| * if supplementary characters are present: |
| * |
| * In UTF-16, supplementary characters (with code points U+10000 and above) are |
| * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff, |
| * which means that they compare as less than some other BMP characters like U+feff. |
| * This function compares Unicode strings in code point order. |
| * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined. |
| * |
| * @param start The start offset in this string at which the compare operation begins. |
| * @param length The number of code units from this string to compare. |
| * @param srcText Another string to compare this one to. |
| * @return a negative/zero/positive integer corresponding to whether |
| * this string is less than/equal to/greater than the second one |
| * in code point order |
| */ |
| inline int8_t compareCodePointOrder(int32_t start, |
| int32_t length, |
| const UnicodeStringRef& srcText) const; |
| |
| /** |
| * Compare two Unicode strings in code point order. |
| * This is different in UTF-16 from how compare(), operator==, startsWith() etc. work |
| * if supplementary characters are present: |
| * |
| * In UTF-16, supplementary characters (with code points U+10000 and above) are |
| * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff, |
| * which means that they compare as less than some other BMP characters like U+feff. |
| * This function compares Unicode strings in code point order. |
| * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined. |
| * |
| * @param start The start offset in this string at which the compare operation begins. |
| * @param length The number of code units from this string to compare. |
| * @param srcText Another string to compare this one to. |
| * @param srcStart The start offset in that string at which the compare operation begins. |
| * @param srcLength The number of code units from that string to compare. |
| * @return a negative/zero/positive integer corresponding to whether |
| * this string is less than/equal to/greater than the second one |
| * in code point order |
| */ |
| inline int8_t compareCodePointOrder(int32_t start, |
| int32_t length, |
| const UnicodeStringRef& srcText, |
| int32_t srcStart, |
| int32_t srcLength) const; |
| |
| /** |
| * Compare two Unicode strings in code point order. |
| * This is different in UTF-16 from how compare(), operator==, startsWith() etc. work |
| * if supplementary characters are present: |
| * |
| * In UTF-16, supplementary characters (with code points U+10000 and above) are |
| * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff, |
| * which means that they compare as less than some other BMP characters like U+feff. |
| * This function compares Unicode strings in code point order. |
| * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined. |
| * |
| * @param srcChars A pointer to another string to compare this one to. |
| * @param srcLength The number of code units from that string to compare. |
| * @return a negative/zero/positive integer corresponding to whether |
| * this string is less than/equal to/greater than the second one |
| * in code point order |
| */ |
| inline int8_t compareCodePointOrder(UChar const *srcChars, |
| int32_t srcLength) const; |
| |
| /** |
| * Compare two Unicode strings in code point order. |
| * This is different in UTF-16 from how compare(), operator==, startsWith() etc. work |
| * if supplementary characters are present: |
| * |
| * In UTF-16, supplementary characters (with code points U+10000 and above) are |
| * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff, |
| * which means that they compare as less than some other BMP characters like U+feff. |
| * This function compares Unicode strings in code point order. |
| * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined. |
| * |
| * @param start The start offset in this string at which the compare operation begins. |
| * @param length The number of code units from this string to compare. |
| * @param srcChars A pointer to another string to compare this one to. |
| * @return a negative/zero/positive integer corresponding to whether |
| * this string is less than/equal to/greater than the second one |
| * in code point order |
| */ |
| inline int8_t compareCodePointOrder(int32_t start, |
| int32_t length, |
| UChar const *srcChars) const; |
| |
| /** |
| * Compare two Unicode strings in code point order. |
| * This is different in UTF-16 from how compare(), operator==, startsWith() etc. work |
| * if supplementary characters are present: |
| * |
| * In UTF-16, supplementary characters (with code points U+10000 and above) are |
| * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff, |
| * which means that they compare as less than some other BMP characters like U+feff. |
| * This function compares Unicode strings in code point order. |
| * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined. |
| * |
| * @param start The start offset in this string at which the compare operation begins. |
| * @param length The number of code units from this string to compare. |
| * @param srcChars A pointer to another string to compare this one to. |
| * @param srcStart The start offset in that string at which the compare operation begins. |
| * @param srcLength The number of code units from that string to compare. |
| * @return a negative/zero/positive integer corresponding to whether |
| * this string is less than/equal to/greater than the second one |
| * in code point order |
| */ |
| inline int8_t compareCodePointOrder(int32_t start, |
| int32_t length, |
| UChar const *srcChars, |
| int32_t srcStart, |
| int32_t srcLength) const; |
| |
| /** |
| * Compare two Unicode strings in code point order. |
| * This is different in UTF-16 from how compare(), operator==, startsWith() etc. work |
| * if supplementary characters are present: |
| * |
| * In UTF-16, supplementary characters (with code points U+10000 and above) are |
| * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff, |
| * which means that they compare as less than some other BMP characters like U+feff. |
| * This function compares Unicode strings in code point order. |
| * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined. |
| * |
| * @param start The start offset in this string at which the compare operation begins. |
| * @param limit The offset after the last code unit from this string to compare. |
| * @param srcText Another string to compare this one to. |
| * @param srcStart The start offset in that string at which the compare operation begins. |
| * @param srcLimit The offset after the last code unit from that string to compare. |
| * @return a negative/zero/positive integer corresponding to whether |
| * this string is less than/equal to/greater than the second one |
| * in code point order |
| */ |
| inline int8_t compareCodePointOrderBetween(int32_t start, |
| int32_t limit, |
| const UnicodeStringRef& srcText, |
| int32_t srcStart, |
| int32_t srcLimit) const; |
| |
| /** |
| * Compare two strings case-insensitively using full case folding. |
| * This is equivalent to this->foldCase(options).compare(text.foldCase(options)). |
| * |
| * @param text Another string to compare this one to. |
| * @param options Either U_FOLD_CASE_DEFAULT or U_FOLD_CASE_EXCLUDE_SPECIAL_I |
| * @return A negative, zero, or positive integer indicating the comparison result. |
| */ |
| inline int8_t caseCompare(const UnicodeStringRef& text, uint32_t options) const; |
| |
| /** |
| * Compare two strings case-insensitively using full case folding. |
| * This is equivalent to this->foldCase(options).compare(srcText.foldCase(options)). |
| * |
| * @param start The start offset in this string at which the compare operation begins. |
| * @param length The number of code units from this string to compare. |
| * @param srcText Another string to compare this one to. |
| * @param options Either U_FOLD_CASE_DEFAULT or U_FOLD_CASE_EXCLUDE_SPECIAL_I |
| * @return A negative, zero, or positive integer indicating the comparison result. |
| */ |
| inline int8_t caseCompare(int32_t start, |
| int32_t length, |
| const UnicodeStringRef& srcText, |
| uint32_t options) const; |
| |
| /** |
| * Compare two strings case-insensitively using full case folding. |
| * This is equivalent to this->foldCase(options).compare(srcText.foldCase(options)). |
| * |
| * @param start The start offset in this string at which the compare operation begins. |
| * @param length The number of code units from this string to compare. |
| * @param srcText Another string to compare this one to. |
| * @param srcStart The start offset in that string at which the compare operation begins. |
| * @param srcLength The number of code units from that string to compare. |
| * @param options Either U_FOLD_CASE_DEFAULT or U_FOLD_CASE_EXCLUDE_SPECIAL_I |
| * @return A negative, zero, or positive integer indicating the comparison result. |
| */ |
| inline int8_t caseCompare(int32_t start, |
| int32_t length, |
| const UnicodeStringRef& srcText, |
| int32_t srcStart, |
| int32_t srcLength, |
| uint32_t options) const; |
| |
| /** |
| * Compare two strings case-insensitively using full case folding. |
| * This is equivalent to this->foldCase(options).compare(srcChars.foldCase(options)). |
| * |
| * @param srcChars A pointer to another string to compare this one to. |
| * @param srcLength The number of code units from that string to compare. |
| * @param options Either U_FOLD_CASE_DEFAULT or U_FOLD_CASE_EXCLUDE_SPECIAL_I |
| * @return A negative, zero, or positive integer indicating the comparison result. |
| */ |
| inline int8_t caseCompare(UChar const *srcChars, |
| int32_t srcLength, |
| uint32_t options) const; |
| |
| /** |
| * Compare two strings case-insensitively using full case folding. |
| * This is equivalent to this->foldCase(options).compare(srcChars.foldCase(options)). |
| * |
| * @param start The start offset in this string at which the compare operation begins. |
| * @param length The number of code units from this string to compare. |
| * @param srcChars A pointer to another string to compare this one to. |
| * @param options Either U_FOLD_CASE_DEFAULT or U_FOLD_CASE_EXCLUDE_SPECIAL_I |
| * @return A negative, zero, or positive integer indicating the comparison result. |
| */ |
| inline int8_t caseCompare(int32_t start, |
| int32_t length, |
| UChar const *srcChars, |
| uint32_t options) const; |
| |
| /** |
| * Compare two strings case-insensitively using full case folding. |
| * This is equivalent to this->foldCase(options).compare(srcChars.foldCase(options)). |
| * |
| * @param start The start offset in this string at which the compare operation begins. |
| * @param length The number of code units from this string to compare. |
| * @param srcChars A pointer to another string to compare this one to. |
| * @param srcStart The start offset in that string at which the compare operation begins. |
| * @param srcLength The number of code units from that string to compare. |
| * @param options Either U_FOLD_CASE_DEFAULT or U_FOLD_CASE_EXCLUDE_SPECIAL_I |
| * @return A negative, zero, or positive integer indicating the comparison result. |
| */ |
| inline int8_t caseCompare(int32_t start, |
| int32_t length, |
| UChar const *srcChars, |
| int32_t srcStart, |
| int32_t srcLength, |
| uint32_t options) const; |
| |
| /** |
| * Compare two strings case-insensitively using full case folding. |
| * This is equivalent to this->foldCase(options).compareBetween(text.foldCase(options)). |
| * |
| * @param start The start offset in this string at which the compare operation begins. |
| * @param limit The offset after the last code unit from this string to compare. |
| * @param srcText Another string to compare this one to. |
| * @param srcStart The start offset in that string at which the compare operation begins. |
| * @param srcLimit The offset after the last code unit from that string to compare. |
| * @param options Either U_FOLD_CASE_DEFAULT or U_FOLD_CASE_EXCLUDE_SPECIAL_I |
| * @return A negative, zero, or positive integer indicating the comparison result. |
| */ |
| inline int8_t caseCompareBetween(int32_t start, |
| int32_t limit, |
| const UnicodeStringRef& srcText, |
| int32_t srcStart, |
| int32_t srcLimit, |
| uint32_t options) const; |
| |
| /** |
| * Determine if this starts with the characters in <TT>text</TT> |
| * @param text The text to match. |
| * @return TRUE if this starts with the characters in <TT>text</TT>, |
| * FALSE otherwise |
| * @stable |
| */ |
| inline bool startsWith(const UnicodeStringRef& text) const; |
| |
| /** |
| * Determine if this starts with the characters in <TT>srcText</TT> |
| * in the range [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>). |
| * @param srcText The text to match. |
| * @param srcStart the offset into <TT>srcText</TT> to start matching |
| * @param srcLength the number of characters in <TT>srcText</TT> to match |
| * @return TRUE if this starts with the characters in <TT>text</TT>, |
| * FALSE otherwise |
| * @stable |
| */ |
| inline bool startsWith(const UnicodeStringRef& srcText, |
| int32_t srcStart, |
| int32_t srcLength) const; |
| |
| /** |
| * Determine if this starts with the characters in <TT>srcChars</TT> |
| * @param srcChars The characters to match. |
| * @param srcLength the number of characters in <TT>srcChars</TT> |
| * @return TRUE if this starts with the characters in <TT>srcChars</TT>, |
| * FALSE otherwise |
| * @stable |
| */ |
| inline bool startsWith(UChar const *srcChars, |
| int32_t srcLength) const; |
| |
| /** |
| * Determine if this starts with the characters in <TT>srcChars</TT> |
| * in the range [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>). |
| * @param srcChars The characters to match. |
| * @param srcStart the offset into <TT>srcText</TT> to start matching |
| * @param srcLength the number of characters in <TT>srcChars</TT> to match |
| * @return TRUE if this starts with the characters in <TT>srcChars</TT>, |
| * FALSE otherwise |
| * @stable |
| */ |
| inline bool startsWith(UChar const *srcChars, |
| int32_t srcStart, |
| int32_t srcLength) const; |
| |
| /** |
| * Determine if this ends with the characters in <TT>text</TT> |
| * @param text The text to match. |
| * @return TRUE if this ends with the characters in <TT>text</TT>, |
| * FALSE otherwise |
| * @stable |
| */ |
| inline bool endsWith(const UnicodeStringRef& text) const; |
| |
| /** |
| * Determine if this ends with the characters in <TT>srcText</TT> |
| * in the range [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>). |
| * @param srcText The text to match. |
| * @param srcStart the offset into <TT>srcText</TT> to start matching |
| * @param srcLength the number of characters in <TT>srcText</TT> to match |
| * @return TRUE if this ends with the characters in <TT>text</TT>, |
| * FALSE otherwise |
| * @stable |
| */ |
| inline bool endsWith(const UnicodeStringRef& srcText, |
| int32_t srcStart, |
| int32_t srcLength) const; |
| |
| /** |
| * Determine if this ends with the characters in <TT>srcChars</TT> |
| * @param srcChars The characters to match. |
| * @param srcLength the number of characters in <TT>srcChars</TT> |
| * @return TRUE if this ends with the characters in <TT>srcChars</TT>, |
| * FALSE otherwise |
| * @stable |
| */ |
| inline bool endsWith(UChar const *srcChars, |
| int32_t srcLength) const; |
| |
| /** |
| * Determine if this ends with the characters in <TT>srcChars</TT> |
| * in the range [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>). |
| * @param srcChars The characters to match. |
| * @param srcStart the offset into <TT>srcText</TT> to start matching |
| * @param srcLength the number of characters in <TT>srcChars</TT> to match |
| * @return TRUE if this ends with the characters in <TT>srcChars</TT>, |
| * FALSE otherwise |
| * @stable |
| */ |
| inline bool endsWith(UChar const *srcChars, |
| int32_t srcStart, |
| int32_t srcLength) const; |
| |
| |
| /* Searching - bitwise only */ |
| |
| /** |
| * Locate in this the first occurrence of the characters in <TT>text</TT>, |
| * using bitwise comparison. |
| * @param text The text to search for. |
| * @return The offset into this of the start of <TT>text</TT>, |
| * or -1 if not found. |
| * @stable |
| */ |
| inline int32_t indexOf(const UnicodeStringRef& text) const; |
| |
| /** |
| * Locate in this the first occurrence of the characters in <TT>text</TT> |
| * starting at offset <TT>start</TT>, using bitwise comparison. |
| * @param text The text to search for. |
| * @param start The offset at which searching will start. |
| * @return The offset into this of the start of <TT>text</TT>, |
| * or -1 if not found. |
| * @stable |
| */ |
| inline int32_t indexOf(const UnicodeStringRef& text, |
| int32_t start) const; |
| |
| /** |
| * Locate in this the first occurrence in the range |
| * [<TT>start</TT>, <TT>start + length</TT>) of the characters |
| * in <TT>text</TT>, using bitwise comparison. |
| * @param text The text to search for. |
| * @param start The offset at which searching will start. |
| * @param length The number of characters to search |
| * @return The offset into this of the start of <TT>text</TT>, |
| * or -1 if not found. |
| * @stable |
| */ |
| inline int32_t indexOf(const UnicodeStringRef& text, |
| int32_t start, |
| int32_t length) const; |
| |
| /** |
| * Locate in this the first occurrence in the range |
| * [<TT>start</TT>, <TT>start + length</TT>) of the characters |
| * in <TT>srcText</TT> in the range |
| * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>), |
| * using bitwise comparison. |
| * @param srcText The text to search for. |
| * @param srcStart the offset into <TT>srcText</TT> at which |
| * to start matching |
| * @param srcLength the number of characters in <TT>srcText</TT> to match |
| * @param start the offset into this at which to start matching |
| * @param length the number of characters in this to search |
| * @return The offset into this of the start of <TT>text</TT>, |
| * or -1 if not found. |
| * @stable |
| */ |
| inline int32_t indexOf(const UnicodeStringRef& srcText, |
| int32_t srcStart, |
| int32_t srcLength, |
| int32_t start, |
| int32_t length) const; |
| |
| /** |
| * Locate in this the first occurrence of the characters in |
| * <TT>srcChars</TT> |
| * starting at offset <TT>start</TT>, using bitwise comparison. |
| * @param srcChars The text to search for. |
| * @param srcLength the number of characters in <TT>srcChars</TT> to match |
| * @param start the offset into this at which to start matching |
| * @return The offset into this of the start of <TT>text</TT>, |
| * or -1 if not found. |
| * @stable |
| */ |
| inline int32_t indexOf(UChar const *srcChars, |
| int32_t srcLength, |
| int32_t start) const; |
| |
| /** |
| * Locate in this the first occurrence in the range |
| * [<TT>start</TT>, <TT>start + length</TT>) of the characters |
| * in <TT>srcChars</TT>, using bitwise comparison. |
| * @param srcChars The text to search for. |
| * @param srcLength the number of characters in <TT>srcChars</TT> |
| * @param start The offset at which searching will start. |
| * @param length The number of characters to search |
| * @return The offset into this of the start of <TT>srcChars</TT>, |
| * or -1 if not found. |
| * @stable |
| */ |
| inline int32_t indexOf(UChar const *srcChars, |
| int32_t srcLength, |
| int32_t start, |
| int32_t length) const; |
| |
| /** |
| * Locate in this the first occurrence in the range |
| * [<TT>start</TT>, <TT>start + length</TT>) of the characters |
| * in <TT>srcChars</TT> in the range |
| * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>), |
| * using bitwise comparison. |
| * @param srcChars The text to search for. |
| * @param srcStart the offset into <TT>srcChars</TT> at which |
| * to start matching |
| * @param srcLength the number of characters in <TT>srcChars</TT> to match |
| * @param start the offset into this at which to start matching |
| * @param length the number of characters in this to search |
| * @return The offset into this of the start of <TT>text</TT>, |
| * or -1 if not found. |
| * @stable |
| */ |
| int32_t indexOf(UChar const *srcChars, |
| int32_t srcStart, |
| int32_t srcLength, |
| int32_t start, |
| int32_t length) const; |
| |
| /** |
| * Locate in this the first occurrence of the code unit <TT>c</TT>, |
| * using bitwise comparison. |
| * @param c The code unit to search for. |
| * @return The offset into this of <TT>c</TT>, or -1 if not found. |
| * @stable |
| */ |
| inline int32_t indexOf(UChar c) const; |
| |
| /** |
| * Locate in this the first occurrence of the code point <TT>c</TT>, |
| * using bitwise comparison. |
| * @param c The code point to search for. |
| * @return The offset into this of <TT>c</TT>, or -1 if not found. |
| * @stable |
| */ |
| inline int32_t indexOf(UChar32 c) const; |
| |
| /** |
| * Locate in this the first occurrence of the code unit <TT>c</TT> |
| * starting at offset <TT>start</TT>, using bitwise comparison. |
| * @param c The code unit to search for. |
| * @param start The offset at which searching will start. |
| * @return The offset into this of <TT>c</TT>, or -1 if not found. |
| * @stable |
| */ |
| inline int32_t indexOf(UChar c, |
| int32_t start) const; |
| |
| /** |
| * Locate in this the first occurrence of the code point <TT>c</TT> |
| * starting at offset <TT>start</TT>, using bitwise comparison. |
| * @param c The code point to search for. |
| * @param start The offset at which searching will start. |
| * @return The offset into this of <TT>c</TT>, or -1 if not found. |
| * @stable |
| */ |
| inline int32_t indexOf(UChar32 c, |
| int32_t start) const; |
| |
| /** |
| * Locate in this the first occurrence of the code unit <TT>c</TT> |
| * in the range [<TT>start</TT>, <TT>start + length</TT>), |
| * using bitwise comparison. |
| * @param c The code unit to search for. |
| * @param start the offset into this at which to start matching |
| * @param length the number of characters in this to search |
| * @return The offset into this of <TT>c</TT>, or -1 if not found. |
| * @stable |
| */ |
| inline int32_t indexOf(UChar c, |
| int32_t start, |
| int32_t length) const; |
| |
| /** |
| * Locate in this the first occurrence of the code point <TT>c</TT> |
| * in the range [<TT>start</TT>, <TT>start + length</TT>), |
| * using bitwise comparison. |
| * @param c The code point to search for. |
| * @param start the offset into this at which to start matching |
| * @param length the number of characters in this to search |
| * @return The offset into this of <TT>c</TT>, or -1 if not found. |
| * @stable |
| */ |
| inline int32_t indexOf(UChar32 c, |
| int32_t start, |
| int32_t length) const; |
| |
| /** |
| * Locate in this the last occurrence of the characters in <TT>text</TT>, |
| * using bitwise comparison. |
| * @param text The text to search for. |
| * @return The offset into this of the start of <TT>text</TT>, |
| * or -1 if not found. |
| * @stable |
| */ |
| inline int32_t lastIndexOf(const UnicodeStringRef& text) const; |
| |
| /** |
| * Locate in this the last occurrence of the characters in <TT>text</TT> |
| * starting at offset <TT>start</TT>, using bitwise comparison. |
| * @param text The text to search for. |
| * @param start The offset at which searching will start. |
| * @return The offset into this of the start of <TT>text</TT>, |
| * or -1 if not found. |
| * @stable |
| */ |
| inline int32_t lastIndexOf(const UnicodeStringRef& text, |
| int32_t start) const; |
| |
| /** |
| * Locate in this the last occurrence in the range |
| * [<TT>start</TT>, <TT>start + length</TT>) of the characters |
| * in <TT>text</TT>, using bitwise comparison. |
| * @param text The text to search for. |
| * @param start The offset at which searching will start. |
| * @param length The number of characters to search |
| * @return The offset into this of the start of <TT>text</TT>, |
| * or -1 if not found. |
| * @stable |
| */ |
| inline int32_t lastIndexOf(const UnicodeStringRef& text, |
| int32_t start, |
| int32_t length) const; |
| |
| /** |
| * Locate in this the last occurrence in the range |
| * [<TT>start</TT>, <TT>start + length</TT>) of the characters |
| * in <TT>srcText</TT> in the range |
| * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>), |
| * using bitwise comparison. |
| * @param srcText The text to search for. |
| * @param srcStart the offset into <TT>srcText</TT> at which |
| * to start matching |
| * @param srcLength the number of characters in <TT>srcText</TT> to match |
| * @param start the offset into this at which to start matching |
| * @param length the number of characters in this to search |
| * @return The offset into this of the start of <TT>text</TT>, |
| * or -1 if not found. |
| * @stable |
| */ |
| inline int32_t lastIndexOf(const UnicodeStringRef& srcText, |
| int32_t srcStart, |
| int32_t srcLength, |
| int32_t start, |
| int32_t length) const; |
| |
| /** |
| * Locate in this the last occurrence of the characters in <TT>srcChars</TT> |
| * starting at offset <TT>start</TT>, using bitwise comparison. |
| * @param srcChars The text to search for. |
| * @param srcLength the number of characters in <TT>srcChars</TT> to match |
| * @param start the offset into this at which to start matching |
| * @return The offset into this of the start of <TT>text</TT>, |
| * or -1 if not found. |
| * @stable |
| */ |
| inline int32_t lastIndexOf(UChar const *srcChars, |
| int32_t srcLength, |
| int32_t start) const; |
| |
| /** |
| * Locate in this the last occurrence in the range |
| * [<TT>start</TT>, <TT>start + length</TT>) of the characters |
| * in <TT>srcChars</TT>, using bitwise comparison. |
| * @param srcChars The text to search for. |
| * @param srcLength the number of characters in <TT>srcChars</TT> |
| * @param start The offset at which searching will start. |
| * @param length The number of characters to search |
| * @return The offset into this of the start of <TT>srcChars</TT>, |
| * or -1 if not found. |
| * @stable |
| */ |
| inline int32_t lastIndexOf(UChar const *srcChars, |
| int32_t srcLength, |
| int32_t start, |
| int32_t length) const; |
| |
| /** |
| * Locate in this the last occurrence in the range |
| * [<TT>start</TT>, <TT>start + length</TT>) of the characters |
| * in <TT>srcChars</TT> in the range |
| * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>), |
| * using bitwise comparison. |
| * @param srcChars The text to search for. |
| * @param srcStart the offset into <TT>srcChars</TT> at which |
| * to start matching |
| * @param srcLength the number of characters in <TT>srcChars</TT> to match |
| * @param start the offset into this at which to start matching |
| * @param length the number of characters in this to search |
| * @return The offset into this of the start of <TT>text</TT>, |
| * or -1 if not found. |
| * @stable |
| */ |
| int32_t lastIndexOf(UChar const *srcChars, |
| int32_t srcStart, |
| int32_t srcLength, |
| int32_t start, |
| int32_t length) const; |
| |
| /** |
| * Locate in this the last occurrence of the code unit <TT>c</TT>, |
| * using bitwise comparison. |
| * @param c The code unit to search for. |
| * @return The offset into this of <TT>c</TT>, or -1 if not found. |
| * @stable |
| */ |
| inline int32_t lastIndexOf(UChar c) const; |
| |
| /** |
| * Locate in this the last occurrence of the code point <TT>c</TT>, |
| * using bitwise comparison. |
| * @param c The code point to search for. |
| * @return The offset into this of <TT>c</TT>, or -1 if not found. |
| * @stable |
| */ |
| inline int32_t lastIndexOf(UChar32 c) const; |
| |
| /** |
| * Locate in this the last occurrence of the code unit <TT>c</TT> |
| * starting at offset <TT>start</TT>, using bitwise comparison. |
| * @param c The code unit to search for. |
| * @param start The offset at which searching will start. |
| * @return The offset into this of <TT>c</TT>, or -1 if not found. |
| * @stable |
| */ |
| inline int32_t lastIndexOf(UChar c, |
| int32_t start) const; |
| |
| /** |
| * Locate in this the last occurrence of the code point <TT>c</TT> |
| * starting at offset <TT>start</TT>, using bitwise comparison. |
| * @param c The code point to search for. |
| * @param start The offset at which searching will start. |
| * @return The offset into this of <TT>c</TT>, or -1 if not found. |
| * @stable |
| */ |
| inline int32_t lastIndexOf(UChar32 c, |
| int32_t start) const; |
| |
| /** |
| * Locate in this the last occurrence of the code unit <TT>c</TT> |
| * in the range [<TT>start</TT>, <TT>start + length</TT>), |
| * using bitwise comparison. |
| * @param c The code unit to search for. |
| * @param start the offset into this at which to start matching |
| * @param length the number of characters in this to search |
| * @return The offset into this of <TT>c</TT>, or -1 if not found. |
| * @stable |
| */ |
| inline int32_t lastIndexOf(UChar c, |
| int32_t start, |
| int32_t length) const; |
| |
| /** |
| * Locate in this the last occurrence of the code point <TT>c</TT> |
| * in the range [<TT>start</TT>, <TT>start + length</TT>), |
| * using bitwise comparison. |
| * @param c The code point to search for. |
| * @param start the offset into this at which to start matching |
| * @param length the number of characters in this to search |
| * @return The offset into this of <TT>c</TT>, or -1 if not found. |
| * @stable |
| */ |
| inline int32_t lastIndexOf(UChar32 c, |
| int32_t start, |
| int32_t length) const; |
| |
| |
| /* Character access */ |
| |
| /** |
| * Return the code unit at offset <tt>offset</tt>. |
| * @param offset a valid offset into the text |
| * @returns the code unit at offset <tt>offset</tt> |
| * @stable |
| */ |
| inline UChar charAt(int32_t offset) const; |
| |
| /** |
| * Return the code unit at offset <tt>offset</tt>. |
| * @param offset a valid offset into the text |
| * @returns the code unit at offset <tt>offset</tt> |
| * @stable |
| */ |
| inline UChar operator [] (int32_t offset) const; |
| |
| /** |
| * Return the code point that contains the code unit |
| * at offset <tt>offset</tt>. |
| * @param offset a valid offset into the text |
| * that indicates the text offset of any of the code units |
| * that will be assembled into a code point (21-bit value) and returned |
| * @returns the code point of text at <tt>offset</tt> |
| * @stable |
| */ |
| inline UChar32 char32At(int32_t offset) const; |
| |
| /** |
| * Adjust a random-access offset so that |
| * it points to the beginning of a Unicode character. |
| * The offset that is passed in points to |
| * any code unit of a code point, |
| * while the returned offset will point to the first code unit |
| * of the same code point. |
| * In UTF-16, if the input offset points to a iv_uiLength surrogate |
| * of a surrogate pair, then the returned offset will point |
| * to the first surrogate. |
| * @param offset a valid offset into one code point of the text |
| * @return offset of the first code unit of the same code point |
| */ |
| inline int32_t getChar32Start(int32_t offset) const; |
| |
| /** |
| * Adjust a random-access offset so that |
| * it points behind a Unicode character. |
| * The offset that is passed in points behind |
| * any code unit of a code point, |
| * while the returned offset will point behind the last code unit |
| * of the same code point. |
| * In UTF-16, if the input offset points behind the first surrogate |
| * (i.e., to the iv_uiLength surrogate) |
| * of a surrogate pair, then the returned offset will point |
| * behind the iv_uiLength surrogate (i.e., to the first surrogate). |
| * @param offset a valid offset after any code unit of a code point of the text |
| * @return offset of the first code unit after the same code point |
| */ |
| inline int32_t getChar32Limit(int32_t offset) const; |
| |
| /** |
| * Move the code unit index along the string by delta code points. |
| * Interpret the input index as a code unit-based offset into the string, |
| * move the index forward or backward by delta code points, and |
| * return the resulting index. |
| * The input index should point to the first code unit of a code point, |
| * if there is more than one. |
| * |
| * Both input and output indexes are code unit-based as for all |
| * string indexes/offsets in ICU (and other libraries, like MBCS char*). |
| * If delta<0 then the index is moved backward (toward the start of the string). |
| * If delta>0 then the index is moved forward (toward the end of the string). |
| * |
| * This behaves like CharacterIterator::move32(delta, kCurrent). |
| * |
| * Examples: |
| * <code> |
| * // s has code points 'a' U+10000 'b' U+10ffff U+2029 |
| * UnicodeStringRef s=UNICODE_STRING("a\\U00010000b\\U0010ffff\\u2029", 31).unescape(); |
| * |
| * // initial index: position of U+10000 |
| * int32_t index=1; |
| * |
| * // the following examples will all result in index==4, position of U+10ffff |
| * |
| * // skip 2 code points from some position in the string |
| * index=s.moveIndex32(index, 2); // skips U+10000 and 'b' |
| * |
| * // go to the 3rd code point from the start of s (0-based) |
| * index=s.moveIndex32(0, 3); // skips 'a', U+10000, and 'b' |
| * |
| * // go to the next-to-last code point of s |
| * |
| * index=s.moveIndex32(s.length(), -2); // backward-skips U+2029 and U+10ffff |
| * </code> |
| * |
| * @param index input code unit index |
| * @param delta (signed) code point count to move the index forward or backward |
| * in the string |
| * @return the resulting code unit index |
| */ |
| int32_t moveIndex32(int32_t index, int32_t delta) const; |
| |
| /* Substring extraction without conversion */ |
| |
| /** |
| * Copy the characters in the range |
| * [<tt>start</tt>, <tt>start + length</tt>) into the array <tt>dst</tt>, |
| * beginning at <tt>dstStart</tt>. |
| * If the string aliases to <code>dst</code> itself as an external buffer, |
| * then extract() will not copy the contents. |
| * |
| * @param start offset of first character which will be copied into the array |
| * @param length the number of characters to extract |
| * @param dst array in which to copy characters. The length of <tt>dst</tt> |
| * must be at least (<tt>dstStart + length</tt>). |
| * @param dstStart the offset in <TT>dst</TT> where the first character |
| * will be extracted |
| * @stable |
| */ |
| inline void extract(int32_t start, |
| int32_t length, |
| UChar *dst, |
| int32_t dstStart = 0) const; |
| |
| /** |
| * Copy the characters in the range [<tt>start</tt>, <tt>limit</tt>) |
| * into the array <tt>dst</tt>, beginning at <tt>dstStart</tt>. |
| * @param start offset of first character which will be copied into the array |
| * @param limit offset immediately following the last character to be copied |
| * @param dst array in which to copy characters. The length of <tt>dst</tt> |
| * must be at least (<tt>dstStart + (limit - start)</tt>). |
| * @param dstStart the offset in <TT>dst</TT> where the first character |
| * will be extracted |
| * @stable |
| */ |
| inline void extractBetween(int32_t start, |
| int32_t limit, |
| UChar *dst, |
| int32_t dstStart = 0) const; |
| |
| /** |
| * Copy the contents of the string into dst. |
| * This is a convenience function that |
| * checks if there is enough space in dst, |
| * extracts the entire string if possible, |
| * and NUL-terminates dst if possible. |
| * |
| * If the string fits into dst but cannot be NUL-terminated |
| * (length()==dstCapacity) then the error code is set to U_STRING_NOT_TERMINATED_WARNING. |
| * If the string itself does not fit into dst |
| * (length()>dstCapacity) then the error code is set to U_BUFFER_OVERFLOW_ERROR. |
| * |
| * If the string aliases to <code>dst</code> itself as an external buffer, |
| * then extract() will not copy the contents. |
| * |
| * @param dst Destination string buffer. |
| * @param dstCapacity Number of UChars available at dst. |
| * @param errorCode ICU error code. |
| * @return length() |
| */ |
| int32_t |
| extract(UChar *dst, int32_t dstCapacity, |
| UErrorCode &errorCode) const; |
| |
| /** |
| * Copy the characters in the range |
| * [<tt>start</tt>, <tt>start + length</tt>) into the UnicodeString |
| * <tt>dst</tt>. |
| * @param start offset of first character which will be copied |
| * @param length the number of characters to extract |
| * @param dst UnicodeString into which to copy characters. |
| * @return A reference to <TT>dst</TT> |
| * @stable |
| */ |
| inline void extract(int32_t start, |
| int32_t length, |
| UnicodeString& dst) const; |
| |
| /** |
| * Copy the characters in the range [<tt>start</tt>, <tt>limit</tt>) |
| * into the UnicodeString <tt>dst</tt>. |
| * @param start offset of first character which will be copied |
| * @param limit offset immediately following the last character to be copied |
| * @param dst UnicodeString into which to copy characters. |
| * @return A reference to <TT>dst</TT> |
| * @stable |
| */ |
| inline void extractBetween(int32_t start, |
| int32_t limit, |
| UnicodeString& dst) const; |
| |
| /* Substring extraction with conversion */ |
| |
| /** |
| * Copy the characters in the range |
| * [<tt>start</TT>, <tt>start + length</TT>) into an array of characters |
| * in a specified codepage. |
| * The output string is NUL-terminated. |
| * |
| * @param start offset of first character which will be copied |
| * @param startLength the number of characters to extract |
| * @param target the target buffer for extraction |
| * @param codepage the desired codepage for the characters. 0 has |
| * the special meaning of the default codepage |
| * If <code>codepage</code> is an empty string (<code>""</code>), |
| * then a simple conversion is performed on the codepage-invariant |
| * subset ("invariant characters") of the platform encoding. See utypes.h. |
| * If <TT>target</TT> is NULL, then the number of bytes required for |
| * <TT>target</TT> is returned. |
| * NOTE: It is assumed that the target is big enough to fit all of the characters. |
| * @return the output string length, not including the terminating NUL |
| * @stable |
| */ |
| inline int32_t extract(int32_t start, |
| int32_t startLength, |
| char *target, |
| const char *codepage = 0) const; |
| |
| /** |
| * Copy the characters in the range |
| * [<tt>start</TT>, <tt>start + length</TT>) into an array of characters |
| * in a specified codepage. |
| * This function does not write any more than <code>targetLength</code> |
| * characters but returns the length of the entire output string |
| * so that one can allocate a larger buffer and call the function again |
| * if necessary. |
| * The output string is NUL-terminated if possible. |
| * |
| * @param start offset of first character which will be copied |
| * @param startLength the number of characters to extract |
| * @param target the target buffer for extraction |
| * @param targetLength the length of the target buffer |
| * @param codepage the desired codepage for the characters. 0 has |
| * the special meaning of the default codepage |
| * If <code>codepage</code> is an empty string (<code>""</code>), |
| * then a simple conversion is performed on the codepage-invariant |
| * subset ("invariant characters") of the platform encoding. See utypes.h. |
| * If <TT>target</TT> is NULL, then the number of bytes required for |
| * <TT>target</TT> is returned. |
| * @return the output string length, not including the terminating NUL |
| * @stable |
| */ |
| int32_t extract(int32_t start, |
| int32_t startLength, |
| char *target, |
| uint32_t targetLength, |
| const char *codepage = 0) const; |
| |
| /** |
| * Convert the UnicodeStringRef into a codepage string using an existing UConverter. |
| * The output string is NUL-terminated if possible. |
| * |
| * This function avoids the overhead of opening and closing a converter if |
| * multiple strings are extracted. |
| * |
| * @param target destination string buffer, can be NULL if targetCapacity==0 |
| * @param targetCapacity the number of chars available at target |
| * @param cnv the converter object to be used (ucnv_resetFromUnicode() will be called), |
| * or NULL for the default converter |
| * @param errorCode normal ICU error code |
| * @return the length of the output string, not counting the terminating NUL; |
| * if the length is greater than targetCapacity, then the string will not fit |
| * and a buffer of the indicated length would need to be passed in |
| * @stable |
| */ |
| int32_t extract(char *target, int32_t targetCapacity, |
| UConverter *cnv, |
| UErrorCode &errorCode) const; |
| |
| /** |
| * Copy the characters in the range |
| * [<tt>start</TT>, <tt>start + length</TT>) into a std::string object |
| * in a specified codepage. |
| * The output string is NUL-terminated. |
| * |
| * @param start offset of first character which will be copied |
| * @param startLength the number of characters to extract |
| * @param target the target string for extraction |
| * @param codepage the desired codepage for the characters. 0 has |
| * the special meaning of the default codepage. |
| * If <code>codepage</code> is an empty string (<code>""</code>), |
| * then a simple conversion is performed on the codepage-invariant |
| * subset ("invariant characters") of the platform encoding. See utypes.h. |
| * @return the output string length, not including the terminating NUL |
| * @stable |
| */ |
| int32_t extract(int32_t start, |
| int32_t startLength, |
| std::string & target, |
| const char *codepage = 0) const; |
| |
| /** |
| * Copy all the characters in the string into an std::string object |
| * in a specified codepage. Equivalent to |
| * extract(0, length(), target, codepage) |
| * |
| * @param target the target string for extraction |
| * @param codepage the desired codepage for the characters. |
| * @return the output string length, not including the terminating NUL |
| * @stable |
| */ |
| inline int32_t extract(std::string & target, |
| const char *codepage = 0) const; |
| |
| /** |
| * Copy all the characters in the string into an std::string object |
| * in UTF-8. Slightly more efficient than asUTF8() as avoids |
| * one copy. |
| * |
| * @param target the target string for extraction |
| * @return the output string length, not including the terminating NUL |
| */ |
| int32_t extractUTF8(std::string & target) const; |
| |
| /** |
| * Convert to a UTF8 string |
| * @return a std::string |
| */ |
| inline std::string asUTF8(void) const; |
| |
| /** |
| * Release contents of string container allocated by extract methods |
| * Useful when caller and callee use different heaps, |
| * e.g. when debug code uses a release library. |
| * Is static so can be called on the <TT>UnicodeStringRef</TT> class directly. |
| */ |
| static void release(std::string & target); |
| |
| /* Length operations */ |
| |
| /** |
| * Return the length of the UnicodeStringRef object. |
| * The length is the number of characters in the text. |
| * @returns the length of the UnicodeStringRef object |
| * @stable |
| */ |
| inline int32_t length(void) const; |
| |
| /** |
| * Count Unicode code points in the length UChar code units of the string. |
| * A code point may occupy either one or two UChar code units. |
| * Counting code points involves reading all code units. |
| * |
| * This functions is basically the inverse of moveIndex32(). |
| * |
| * @param start the index of the first code unit to check |
| * @param length the number of UChar code units to check |
| * @return the number of code points in the specified code units |
| */ |
| int32_t |
| countChar32(int32_t start=0, int32_t length=0x7fffffff) const; |
| |
| /** |
| * Determine if this string is empty. |
| * @return TRUE if this string contains 0 characters, FALSE otherwise. |
| */ |
| inline bool isEmpty(void) const; |
| |
| /** |
| * Set the text in the UnicodeString object to the characters in |
| * <TT>srcText</TT>. |
| * <TT>srcText</TT> is not modified. |
| * @param srcText the source for the new characters |
| * @return a reference to this |
| * @stable |
| */ |
| inline UnicodeStringRef& setTo(const UnicodeStringRef& srcText); |
| |
| /** |
| * Set the text in the UnicodeString object to the characters in |
| * <TT>srcText</TT>. |
| * <TT>srcText</TT> is not modified. |
| * @param srcText the source for the new characters |
| * @return a reference to this |
| * @stable |
| */ |
| inline UnicodeStringRef& setTo(const UnicodeString& srcText); |
| |
| /** |
| * Set the characters in the UnicodeString object to the characters |
| * in <TT>srcChars</TT>. <TT>srcChars</TT> is not modified. |
| * @param srcChars the source for the new characters |
| * @param srcLength the number of Unicode characters in srcChars. |
| * @return a reference to this |
| * @stable |
| */ |
| inline UnicodeStringRef& setTo(const UChar *srcChars, |
| int32_t srcLength); |
| /** |
| * Print a single byte version to outStream. |
| * The encoding is UTF-8 if outStream is directed to disk, |
| * if outStream is cout our cerr the encoding is a Console-CCSID |
| * that will allow most character to be readable in a shell/command window. |
| */ |
| void toSingleByteStream(std::ostream & outStream) const; |
| |
| |
| |
| private: |
| /* --- functions -------------------------------------------------------- */ |
| |
| inline int8_t |
| doCompare( int32_t start, |
| int32_t length, |
| const UnicodeStringRef& srcText, |
| int32_t srcStart, |
| int32_t srcLength) const; |
| |
| int8_t |
| doCompare( int32_t start, |
| int32_t length, |
| const UChar *srcChars, |
| int32_t srcStart, |
| int32_t srcLength) const; |
| inline int8_t |
| doCompareCodePointOrder(int32_t start, |
| int32_t length, |
| const UnicodeStringRef& srcText, |
| int32_t srcStart, |
| int32_t srcLength) const; |
| int8_t |
| doCompareCodePointOrder(int32_t start, |
| int32_t length, |
| const UChar *srcChars, |
| int32_t srcStart, |
| int32_t srcLength) const; |
| inline int8_t |
| doCaseCompare(int32_t start, |
| int32_t length, |
| const UnicodeStringRef& srcText, |
| int32_t srcStart, |
| int32_t srcLength, |
| uint32_t options) const; |
| |
| int8_t |
| doCaseCompare(int32_t start, |
| int32_t length, |
| const UChar *srcChars, |
| int32_t srcStart, |
| int32_t srcLength, |
| uint32_t options) const; |
| int32_t doIndexOf(UChar c, |
| int32_t start, |
| int32_t length) const; |
| int32_t doLastIndexOf(UChar c, |
| int32_t start, |
| int32_t length) const; |
| |
| inline void doExtract(int32_t start, |
| int32_t length, |
| UChar *dst, |
| int32_t dstStart) const; |
| inline void doExtract(int32_t start, |
| int32_t length, |
| UnicodeString& dst) const; |
| |
| inline void |
| pinIndices(int32_t& start, |
| int32_t& length) const; |
| // constants |
| enum { |
| kInvalidUChar=0xffff // invalid UChar index |
| }; |
| /* --- variables -------------------------------------------------------- */ |
| UChar const * iv_pUChars; |
| int32_t iv_uiLength; |
| } |
| ; // class UnicodeStringRef |
| |
| ///Output stream support for UnicodeStringRef (Note: inside namespace) |
| UIMA_LINK_IMPORTSPEC std::ostream & |
| operator << ( |
| std::ostream & outStream, |
| const uima::UnicodeStringRef & crUStrRef |
| ); |
| } // namespace uima |
| |
| |
| /* ----------------------------------------------------------------------- */ |
| /* Implementation UnicodeStringRef */ |
| /* ----------------------------------------------------------------------- */ |
| |
| namespace uima { |
| |
| inline |
| UnicodeStringRef::UnicodeStringRef( void ) : |
| iv_pUChars(NULL), |
| iv_uiLength(0) {} |
| |
| inline UnicodeStringRef::UnicodeStringRef( |
| const icu::UnicodeString & crUniString |
| ) : |
| iv_pUChars(crUniString.getBuffer()), |
| iv_uiLength(crUniString.length()) {} |
| |
| inline |
| UnicodeStringRef::UnicodeStringRef( |
| UChar const * cpacString |
| ) : |
| iv_pUChars(cpacString), |
| iv_uiLength(cpacString==NULL ? 0 : u_strlen(cpacString)) { |
| assert( (EXISTS(iv_pUChars) ) |
| || ((iv_pUChars == NULL ) && (iv_uiLength == 0)) ); |
| } |
| |
| inline |
| UnicodeStringRef::UnicodeStringRef( |
| UChar const * cpacString, |
| int32_t uiLength |
| ) : |
| iv_pUChars(cpacString), |
| iv_uiLength(uiLength) { |
| assert( (EXISTS(iv_pUChars) ) |
| || ((iv_pUChars == NULL ) && (iv_uiLength == 0)) ); |
| } |
| |
| inline |
| UnicodeStringRef::UnicodeStringRef( |
| UChar const * paucStringBegin, |
| UChar const * paucStringEnd |
| ) : |
| iv_pUChars(paucStringBegin), |
| iv_uiLength(paucStringEnd - paucStringBegin) { |
| assert(EXISTS(paucStringBegin)); |
| assert(EXISTS(paucStringEnd)); |
| assert(paucStringEnd >= paucStringBegin); |
| assert( (EXISTS(iv_pUChars) ) |
| || ((iv_pUChars == NULL) && (iv_uiLength == 0)) ); |
| } |
| |
| inline int32_t |
| UnicodeStringRef::length( void ) const { |
| return iv_uiLength; |
| } |
| |
| inline int32_t |
| UnicodeStringRef::getSizeInBytes( void ) const { |
| return (iv_uiLength * sizeof(UChar)); |
| } |
| |
| inline UChar |
| UnicodeStringRef::operator[]( int32_t uiIndex ) const { |
| assert(uiIndex < iv_uiLength); |
| assert(EXISTS(iv_pUChars)); |
| return iv_pUChars[uiIndex]; //lint !e613: Possible use of null pointer 'UnicodeStringRef<wchar_t>::iv_pUChars' in left argument to operator '[' |
| } |
| |
| inline int |
| UnicodeStringRef::operator==( const UnicodeStringRef & crclRHS ) const { |
| if (iv_uiLength != crclRHS.iv_uiLength) { |
| return false; |
| } |
| return u_strncmp(iv_pUChars, crclRHS.iv_pUChars, iv_uiLength) == 0; |
| } |
| |
| inline int |
| UnicodeStringRef::operator!=( const UnicodeStringRef & crclRHS ) const { |
| return !((*this)==crclRHS); |
| } |
| |
| inline UnicodeStringRef & |
| UnicodeStringRef::operator=( UnicodeStringRef const & crclRHS ) { |
| iv_pUChars = crclRHS.iv_pUChars; |
| iv_uiLength = crclRHS.iv_uiLength; |
| return (*this); |
| } |
| |
| //======================================== |
| // Read-only alias methods |
| //======================================== |
| inline void |
| UnicodeStringRef::pinIndices(int32_t& start, |
| int32_t& length) const { |
| // pin indices |
| if (start > iv_uiLength) { |
| start = iv_uiLength; |
| } |
| if (length > (iv_uiLength - start)) { |
| length = (iv_uiLength - start); |
| } |
| } |
| |
| inline bool |
| UnicodeStringRef::operator> (const UnicodeStringRef& text) const { |
| return doCompare(0, iv_uiLength, text, 0, text.iv_uiLength) == 1; |
| } |
| |
| inline bool |
| UnicodeStringRef::operator< (const UnicodeStringRef& text) const { |
| return doCompare(0, iv_uiLength, text, 0, text.iv_uiLength) == -1; |
| } |
| |
| inline bool |
| UnicodeStringRef::operator>= (const UnicodeStringRef& text) const { |
| return doCompare(0, iv_uiLength, text, 0, text.iv_uiLength) != -1; |
| } |
| |
| inline bool |
| UnicodeStringRef::operator<= (const UnicodeStringRef& text) const { |
| return doCompare(0, iv_uiLength, text, 0, text.iv_uiLength) != 1; |
| } |
| |
| inline int8_t |
| UnicodeStringRef::compare(const UnicodeStringRef& text) const { |
| return doCompare(0, iv_uiLength, text, 0, text.iv_uiLength); |
| } |
| |
| inline int8_t |
| UnicodeStringRef::compare(int32_t start, |
| int32_t length, |
| const UnicodeStringRef& srcText) const { |
| return doCompare(start, length, srcText, 0, srcText.iv_uiLength); |
| } |
| |
| inline int8_t |
| UnicodeStringRef::compare(const UChar *srcChars, |
| int32_t srcLength) const { |
| return doCompare(0, iv_uiLength, srcChars, 0, srcLength); |
| } |
| |
| |
| inline int8_t |
| UnicodeStringRef::compare(icu::UnicodeString const &src ) const { |
| return doCompare(0, iv_uiLength, src.getBuffer(), 0, src.length()); |
| } |
| |
| |
| inline int8_t |
| UnicodeStringRef::compare(int32_t start, |
| int32_t length, |
| const UChar *srcChars) const { |
| return doCompare(start, length, srcChars, 0, length); |
| } |
| |
| inline int8_t |
| UnicodeStringRef::compare(int32_t start, |
| int32_t length, |
| const UChar *srcChars, |
| int32_t srcStart, |
| int32_t srcLength) const { |
| return doCompare(start, length, srcChars, srcStart, srcLength); |
| } |
| |
| inline int8_t |
| UnicodeStringRef::compare(int32_t start, |
| int32_t length, |
| const UnicodeStringRef& srcText, |
| int32_t srcStart, |
| int32_t srcLength) const { |
| return doCompare(start, length, srcText, srcStart, srcLength); |
| } |
| |
| inline int8_t |
| UnicodeStringRef::compareBetween(int32_t start, |
| int32_t limit, |
| const UnicodeStringRef& srcText, |
| int32_t srcStart, |
| int32_t srcLimit) const { |
| return doCompare(start, limit - start, |
| srcText, srcStart, srcLimit - srcStart); |
| } |
| |
| inline int8_t |
| UnicodeStringRef::doCompare(int32_t start, |
| int32_t length, |
| const UnicodeStringRef& srcText, |
| int32_t srcStart, |
| int32_t srcLength) const { |
| const UChar *srcChars = srcText.getBuffer(); |
| return doCompare(start, length, srcChars, srcStart, srcLength); |
| } |
| |
| inline int8_t |
| UnicodeStringRef::compareCodePointOrder(const UnicodeStringRef& text) const { |
| return doCompareCodePointOrder(0, iv_uiLength, text, 0, text.iv_uiLength); |
| } |
| |
| inline int8_t |
| UnicodeStringRef::compareCodePointOrder(int32_t start, |
| int32_t length, |
| const UnicodeStringRef& srcText) const { |
| return doCompareCodePointOrder(start, length, srcText, 0, srcText.iv_uiLength); |
| } |
| |
| inline int8_t |
| UnicodeStringRef::compareCodePointOrder(const UChar *srcChars, |
| int32_t srcLength) const { |
| return doCompareCodePointOrder(0, iv_uiLength, srcChars, 0, srcLength); |
| } |
| |
| inline int8_t |
| UnicodeStringRef::compareCodePointOrder(int32_t start, |
| int32_t length, |
| const UnicodeStringRef& srcText, |
| int32_t srcStart, |
| int32_t srcLength) const { |
| return doCompareCodePointOrder(start, length, srcText, srcStart, srcLength); |
| } |
| |
| inline int8_t |
| UnicodeStringRef::compareCodePointOrder(int32_t start, |
| int32_t length, |
| const UChar *srcChars) const { |
| return doCompareCodePointOrder(start, length, srcChars, 0, length); |
| } |
| |
| inline int8_t |
| UnicodeStringRef::compareCodePointOrder(int32_t start, |
| int32_t length, |
| const UChar *srcChars, |
| int32_t srcStart, |
| int32_t srcLength) const { |
| return doCompareCodePointOrder(start, length, srcChars, srcStart, srcLength); |
| } |
| |
| inline int8_t |
| UnicodeStringRef::compareCodePointOrderBetween(int32_t start, |
| int32_t limit, |
| const UnicodeStringRef& srcText, |
| int32_t srcStart, |
| int32_t srcLimit) const { |
| return doCompareCodePointOrder(start, limit - start, |
| srcText, srcStart, srcLimit - srcStart); |
| } |
| |
| inline int8_t |
| UnicodeStringRef::doCompareCodePointOrder(int32_t start, |
| int32_t length, |
| const UnicodeStringRef& srcText, |
| int32_t srcStart, |
| int32_t srcLength) const { |
| const UChar *srcChars = srcText.getBuffer(); |
| return doCompareCodePointOrder(start, length, srcChars, srcStart, srcLength); |
| } |
| |
| inline int8_t |
| UnicodeStringRef::caseCompare(const UnicodeStringRef &text, uint32_t options) const { |
| return doCaseCompare(0, iv_uiLength, text, 0, text.iv_uiLength, options); |
| } |
| |
| inline int8_t |
| UnicodeStringRef::caseCompare(int32_t start, |
| int32_t length, |
| const UnicodeStringRef &srcText, |
| uint32_t options) const { |
| return doCaseCompare(start, length, srcText, 0, srcText.iv_uiLength, options); |
| } |
| |
| inline int8_t |
| UnicodeStringRef::caseCompare(const UChar *srcChars, |
| int32_t srcLength, |
| uint32_t options) const { |
| return doCaseCompare(0, iv_uiLength, srcChars, 0, srcLength, options); |
| } |
| |
| inline int8_t |
| UnicodeStringRef::caseCompare(int32_t start, |
| int32_t length, |
| const UnicodeStringRef &srcText, |
| int32_t srcStart, |
| int32_t srcLength, |
| uint32_t options) const { |
| return doCaseCompare(start, length, srcText, srcStart, srcLength, options); |
| } |
| |
| inline int8_t |
| UnicodeStringRef::caseCompare(int32_t start, |
| int32_t length, |
| const UChar *srcChars, |
| uint32_t options) const { |
| return doCaseCompare(start, length, srcChars, 0, length, options); |
| } |
| |
| inline int8_t |
| UnicodeStringRef::caseCompare(int32_t start, |
| int32_t length, |
| const UChar *srcChars, |
| int32_t srcStart, |
| int32_t srcLength, |
| uint32_t options) const { |
| return doCaseCompare(start, length, srcChars, srcStart, srcLength, options); |
| } |
| |
| inline int8_t |
| UnicodeStringRef::caseCompareBetween(int32_t start, |
| int32_t limit, |
| const UnicodeStringRef &srcText, |
| int32_t srcStart, |
| int32_t srcLimit, |
| uint32_t options) const { |
| return doCaseCompare(start, limit - start, srcText, srcStart, srcLimit - srcStart, options); |
| } |
| |
| inline int8_t |
| UnicodeStringRef::doCaseCompare(int32_t start, |
| int32_t length, |
| const UnicodeStringRef &srcText, |
| int32_t srcStart, |
| int32_t srcLength, |
| uint32_t options) const { |
| const UChar *srcChars = srcText.getBuffer(); |
| return doCaseCompare(start, length, srcChars, srcStart, srcLength, options); |
| } |
| |
| inline int32_t |
| UnicodeStringRef::indexOf(const UnicodeStringRef& text) const { |
| return indexOf(text, 0, text.iv_uiLength, 0, iv_uiLength); |
| } |
| |
| inline int32_t |
| UnicodeStringRef::indexOf(const UnicodeStringRef& text, |
| int32_t start) const { |
| return indexOf(text, 0, text.iv_uiLength, start, iv_uiLength - start); |
| } |
| |
| inline int32_t |
| UnicodeStringRef::indexOf(const UnicodeStringRef& text, |
| int32_t start, |
| int32_t length) const { |
| return indexOf(text, 0, text.iv_uiLength, start, length); |
| } |
| |
| inline int32_t |
| UnicodeStringRef::indexOf(const UnicodeStringRef& srcText, |
| int32_t srcStart, |
| int32_t srcLength, |
| int32_t start, |
| int32_t length) const { |
| return indexOf(srcText.getBuffer(), srcStart, srcLength, start, length); |
| } |
| |
| inline int32_t |
| UnicodeStringRef::indexOf(const UChar *srcChars, |
| int32_t srcLength, |
| int32_t start) const { |
| return indexOf(srcChars, 0, srcLength, start, iv_uiLength - start); |
| } |
| |
| inline int32_t |
| UnicodeStringRef::indexOf(const UChar *srcChars, |
| int32_t srcLength, |
| int32_t start, |
| int32_t length) const { |
| return indexOf(srcChars, 0, srcLength, start, length); |
| } |
| |
| inline int32_t |
| UnicodeStringRef::indexOf(UChar c) const { |
| return doIndexOf(c, 0, iv_uiLength); |
| } |
| |
| inline int32_t |
| UnicodeStringRef::indexOf(UChar32 c) const { |
| if (!UTF_NEED_MULTIPLE_UCHAR(c)) { |
| return doIndexOf((UChar)c, 0, iv_uiLength); |
| } else { |
| UChar buffer[UTF_MAX_CHAR_LENGTH]; |
| int32_t length = 0; |
| UTF_APPEND_CHAR_UNSAFE(buffer, length, c); |
| return indexOf(buffer, length, 0); |
| } |
| } |
| |
| inline int32_t |
| UnicodeStringRef::indexOf(UChar c, |
| int32_t start) const { |
| return doIndexOf(c, start, iv_uiLength - start); |
| } |
| |
| inline int32_t |
| UnicodeStringRef::indexOf(UChar32 c, |
| int32_t start) const { |
| if (!UTF_NEED_MULTIPLE_UCHAR(c)) { |
| return doIndexOf((UChar)c, start, iv_uiLength - start); |
| } else { |
| UChar buffer[UTF_MAX_CHAR_LENGTH]; |
| int32_t length = 0; |
| UTF_APPEND_CHAR_UNSAFE(buffer, length, c); |
| return indexOf(buffer, length, start); |
| } |
| } |
| |
| inline int32_t |
| UnicodeStringRef::indexOf(UChar c, |
| int32_t start, |
| int32_t length) const { |
| return doIndexOf(c, start, length); |
| } |
| |
| inline int32_t |
| UnicodeStringRef::indexOf(UChar32 c, |
| int32_t start, |
| int32_t length) const { |
| if (!UTF_NEED_MULTIPLE_UCHAR(c)) { |
| return doIndexOf((UChar)c, start, length); |
| } else { |
| UChar buffer[UTF_MAX_CHAR_LENGTH]; |
| int32_t cLength = 0; |
| UTF_APPEND_CHAR_UNSAFE(buffer, cLength, c); |
| return indexOf(buffer, cLength, start, length); |
| } |
| } |
| |
| inline int32_t |
| UnicodeStringRef::lastIndexOf(const UnicodeStringRef& text) const { |
| return lastIndexOf(text, 0, text.iv_uiLength, 0, iv_uiLength); |
| } |
| |
| inline int32_t |
| UnicodeStringRef::lastIndexOf(const UnicodeStringRef& text, |
| int32_t start) const { |
| return lastIndexOf(text, 0, text.iv_uiLength, start, iv_uiLength - start); |
| } |
| |
| inline int32_t |
| UnicodeStringRef::lastIndexOf(const UnicodeStringRef& text, |
| int32_t start, |
| int32_t length) const { |
| return lastIndexOf(text, 0, text.iv_uiLength, start, length); |
| } |
| |
| inline int32_t |
| UnicodeStringRef::lastIndexOf(const UnicodeStringRef& srcText, |
| int32_t srcStart, |
| int32_t srcLength, |
| int32_t start, |
| int32_t length) const { |
| return lastIndexOf(srcText.getBuffer(), srcStart, srcLength, start, length); |
| } |
| |
| inline int32_t |
| UnicodeStringRef::lastIndexOf(const UChar *srcChars, |
| int32_t srcLength, |
| int32_t start) const { |
| return lastIndexOf(srcChars, 0, srcLength, start, iv_uiLength - start); |
| } |
| |
| inline int32_t |
| UnicodeStringRef::lastIndexOf(const UChar *srcChars, |
| int32_t srcLength, |
| int32_t start, |
| int32_t length) const { |
| return lastIndexOf(srcChars, 0, srcLength, start, length); |
| } |
| |
| inline int32_t |
| UnicodeStringRef::lastIndexOf(UChar c) const { |
| return doLastIndexOf(c, 0, iv_uiLength); |
| } |
| |
| inline int32_t |
| UnicodeStringRef::lastIndexOf(UChar32 c) const { |
| if (!UTF_NEED_MULTIPLE_UCHAR(c)) { |
| return doLastIndexOf((UChar)c, 0, iv_uiLength); |
| } else { |
| UChar buffer[UTF_MAX_CHAR_LENGTH]; |
| int32_t count = 0; |
| UTF_APPEND_CHAR_UNSAFE(buffer, count, c); |
| return lastIndexOf(buffer, count, 0); |
| } |
| } |
| |
| inline int32_t |
| UnicodeStringRef::lastIndexOf(UChar c, |
| int32_t start) const { |
| return doLastIndexOf(c, start, iv_uiLength - start); |
| } |
| |
| inline int32_t |
| UnicodeStringRef::lastIndexOf(UChar32 c, |
| int32_t start) const { |
| if (!UTF_NEED_MULTIPLE_UCHAR(c)) { |
| return doLastIndexOf((UChar)c, start, iv_uiLength - start); |
| } else { |
| UChar buffer[UTF_MAX_CHAR_LENGTH]; |
| int32_t count = 0; |
| UTF_APPEND_CHAR_UNSAFE(buffer, count, c); |
| return lastIndexOf(buffer, count, start); |
| } |
| } |
| |
| inline int32_t |
| UnicodeStringRef::lastIndexOf(UChar c, |
| int32_t start, |
| int32_t length) const { |
| return doLastIndexOf(c, start, length); |
| } |
| |
| inline int32_t |
| UnicodeStringRef::lastIndexOf(UChar32 c, |
| int32_t start, |
| int32_t length) const { |
| if (!UTF_NEED_MULTIPLE_UCHAR(c)) { |
| return doLastIndexOf((UChar)c, start, length); |
| } else { |
| UChar buffer[UTF_MAX_CHAR_LENGTH]; |
| int32_t count = 0; |
| UTF_APPEND_CHAR_UNSAFE(buffer, count, c); |
| return lastIndexOf(buffer, count, start, length); |
| } |
| } |
| |
| inline bool |
| UnicodeStringRef::startsWith(const UnicodeStringRef& text) const { |
| return compare(0, text.iv_uiLength, text, 0, text.iv_uiLength) == 0; |
| } |
| |
| inline bool |
| UnicodeStringRef::startsWith(const UnicodeStringRef& srcText, |
| int32_t srcStart, |
| int32_t srcLength) const { |
| return doCompare(0, srcLength, srcText, srcStart, srcLength) == 0; |
| } |
| |
| inline bool |
| UnicodeStringRef::startsWith(const UChar *srcChars, |
| int32_t srcLength) const { |
| return doCompare(0, srcLength, srcChars, 0, srcLength) == 0; |
| } |
| |
| inline bool |
| UnicodeStringRef::startsWith(const UChar *srcChars, |
| int32_t srcStart, |
| int32_t srcLength) const { |
| return doCompare(0, srcLength, srcChars, srcStart, srcLength) == 0; |
| } |
| |
| inline bool |
| UnicodeStringRef::endsWith(const UnicodeStringRef& text) const { |
| return doCompare(iv_uiLength - text.iv_uiLength, text.iv_uiLength, |
| text, 0, text.iv_uiLength) == 0; |
| } |
| |
| inline bool |
| UnicodeStringRef::endsWith(const UnicodeStringRef& srcText, |
| int32_t srcStart, |
| int32_t srcLength) const { |
| return doCompare(iv_uiLength - srcLength, srcLength, |
| srcText, srcStart, srcLength) == 0; |
| } |
| |
| inline bool |
| UnicodeStringRef::endsWith(const UChar *srcChars, |
| int32_t srcLength) const { |
| return doCompare(iv_uiLength - srcLength, srcLength, |
| srcChars, 0, srcLength) == 0; |
| } |
| |
| inline bool |
| UnicodeStringRef::endsWith(const UChar *srcChars, |
| int32_t srcStart, |
| int32_t srcLength) const { |
| return doCompare(iv_uiLength - srcLength, srcLength, |
| srcChars, srcStart, srcLength) == 0; |
| } |
| |
| // ============================ |
| // extract implementations (some in .cpp) |
| // ============================ |
| inline void |
| UnicodeStringRef::extract(int32_t start, |
| int32_t length, |
| UChar *dst, |
| int32_t dstStart) const { |
| pinIndices(start, length); |
| memcpy(dst+dstStart, getBuffer()+start, length*sizeof(UChar)); |
| } |
| |
| |
| inline void |
| UnicodeStringRef::extract(int32_t start, |
| int32_t length, |
| UnicodeString& target) const { |
| target.replace(0, target.length(), getBuffer(), start, length); |
| } |
| // Replaces all of target by substring of src |
| // Could use setTo(getBuffer()+start,length) but that is implemented as a replace |
| |
| inline void |
| UnicodeStringRef::extractBetween(int32_t start, |
| int32_t limit, |
| UChar *dst, |
| int32_t dstStart) const { |
| extract(start, limit - start, dst, dstStart); |
| } |
| |
| inline void |
| UnicodeStringRef::extractBetween(int32_t start, |
| int32_t limit, |
| UnicodeString& dst) const { |
| extract(start, limit - start, dst); |
| } |
| |
| |
| |
| inline int32_t |
| UnicodeStringRef::extract(int32_t start, |
| int32_t length, |
| char *target, |
| const char *codepage) const { |
| // User-beware ... assumes target buffer is large enough |
| // Capacity assumed to be either large, or 0 if no buffer provided (pre-flighting) |
| return extract(start, length, target, target!=0 ? 0xffffffff : 0, codepage); |
| } |
| |
| inline int32_t |
| UnicodeStringRef::extract(std::string & target, |
| const char *codepage) const { |
| return extract(0, iv_uiLength, target, codepage); |
| } |
| |
| inline std::string |
| UnicodeStringRef::asUTF8(void) const { |
| std::string target; |
| extractUTF8(target); |
| return target; |
| } |
| |
| inline UChar |
| UnicodeStringRef::charAt(int32_t offset) const { |
| assert(EXISTS(iv_pUChars)); |
| if ((uint32_t)offset < (uint32_t)iv_uiLength) { |
| return iv_pUChars[offset]; |
| } else { |
| return kInvalidUChar; |
| } |
| } |
| |
| inline UChar32 |
| UnicodeStringRef::char32At(int32_t offset) const { |
| if ((uint32_t)offset < (uint32_t)iv_uiLength) { |
| UChar32 c; |
| UTF_GET_CHAR(iv_pUChars, 0, offset, iv_uiLength, c); |
| return c; |
| } else { |
| return kInvalidUChar; |
| } |
| } |
| |
| inline int32_t |
| UnicodeStringRef::getChar32Start(int32_t offset) const { |
| if ((uint32_t)offset < (uint32_t)iv_uiLength) { |
| UTF_SET_CHAR_START(iv_pUChars, 0, offset); |
| return offset; |
| } else { |
| return 0; |
| } |
| } |
| |
| inline int32_t |
| UnicodeStringRef::getChar32Limit(int32_t offset) const { |
| if ((uint32_t)offset < (uint32_t)iv_uiLength) { |
| UTF_SET_CHAR_LIMIT(iv_pUChars, 0, offset, iv_uiLength); |
| return offset; |
| } else { |
| return iv_uiLength; |
| } |
| } |
| |
| inline bool |
| UnicodeStringRef::isEmpty() const { |
| return iv_uiLength == 0; |
| } |
| |
| inline UChar const * |
| UnicodeStringRef::getBuffer() const { |
| return iv_pUChars; |
| } |
| |
| inline int8_t |
| UnicodeStringRef::doCaseCompare(int32_t start, |
| int32_t length, |
| const UChar *srcChars, |
| int32_t srcStart, |
| int32_t srcLength, |
| uint32_t options) const { |
| icu::UnicodeString s(iv_pUChars+start, (int32_t)length); |
| return s.caseCompare(srcChars + srcStart, (int32_t)srcLength, options); |
| } |
| |
| inline UnicodeStringRef& UnicodeStringRef::setTo(const UnicodeStringRef& srcText) { |
| iv_pUChars = srcText.iv_pUChars; |
| iv_uiLength = srcText.iv_uiLength; |
| return (*this); |
| } |
| |
| inline UnicodeStringRef& UnicodeStringRef::setTo(const UnicodeString& srcText) { |
| iv_pUChars = srcText.getBuffer(); |
| iv_uiLength = srcText.length(); |
| return (*this); |
| } |
| |
| inline UnicodeStringRef& UnicodeStringRef::setTo(const UChar *srcChars, int32_t srcLength) { |
| iv_pUChars = srcChars; |
| iv_uiLength = srcLength; |
| return (*this); |
| } |
| |
| UIMA_LINK_IMPORTSPEC std::ostream & |
| operator << ( |
| std::ostream & rclOStream, |
| const UnicodeStringRef & crclLString |
| ); |
| |
| |
| |
| /* ----------------------------------------------------------------------- */ |
| /** @name vector to/from delimited string conversion routines */ |
| /* ----------------------------------------------------------------------- */ |
| /*@{*/ |
| |
| /** |
| Removes whitespace from both ends of a string. |
| Template function using <TT>isspace_templ()</TT>. |
| */ |
| inline UnicodeStringRef |
| strtrim( |
| const UnicodeStringRef & s |
| ) { |
| if (s.length() == 0) { |
| return s; |
| } |
| UChar const * beg = s.getBuffer(); |
| UChar const * end = s.getBuffer()+s.length()-1; |
| while (end >= beg && u_isspace(*end) ) { |
| --end; |
| } |
| while (beg < end && u_isspace(*beg) ) { |
| ++beg; |
| } |
| return UnicodeStringRef(beg, end-beg+1); |
| } |
| |
| /** |
| Splits a delimited string into pieces and stores the results in a vector |
| of strings. Delimiters are passed as a zero terminated string. |
| |
| @param rveclstrOutput (Output) The vector where the results are stored |
| @param pcInput The delimited string to split. |
| @param uiInputLength The number of chars in pcInput |
| @param cpszDelimiters The delimiters. UChar* are interpreted as a set of delimiters. |
| @param bTrimString Flag: If true, all pieces will be trimmed before storing in <TT>storeVar</TT> |
| @param bInsertEmptyStrings Flag: If false, pieces that have length 0 will not be stored in <TT>storeVar</TT> |
| |
| @return The number of strings added to <TT>rvecstrOutput</TT> |
| */ |
| UIMA_LINK_IMPORTSPEC int32_t |
| delimitedUnicodeStringRef2Vector( |
| std::vector< uima::UnicodeStringRef > & rveclstrOutput, |
| const UChar * pcInput, |
| int32_t uiInputLength, |
| const UChar * cpszDelimiters, |
| bool bTrimString, |
| bool bInsertEmptyStrings |
| ); |
| |
| inline int32_t |
| delimitedUnicodeStringRef2Vector( |
| std::vector< UnicodeStringRef > & veclstrOutput, |
| const UChar * pcInput, |
| const UChar * cpszDelimiters, |
| bool bTrimString, |
| bool bInsertEmptyStrings |
| ) { |
| return delimitedUnicodeStringRef2Vector(veclstrOutput, pcInput, u_strlen(pcInput), cpszDelimiters, bTrimString, bInsertEmptyStrings); |
| } |
| |
| //@} |
| |
| } // namespace uima |
| |
| #endif /* UIMA_UNICODESTRINGREF_HPP */ |
| |
| /* <EOF> */ |
| |