src/framework/unistrref.cpp - uima-uimacpp - Git at Google

 /**
 -----------------------------------------------------------------------------

            string interface of icu::UnicodeString

  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
  * regarding copyright ownership.  The ASF licenses this file
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
  *
  *   http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing,
  * software distributed under the License is distributed on an
  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
  * KIND, either express or implied.  See the License for the
  * specific language governing permissions and limitations
  * under the License.

 -----------------------------------------------------------------------------


        6/26/1998     Initial creation

 -----------------------------------------------------------------------------
 */

 #include "uima/unistrref.hpp"
 #include <algorithm> // for min
 #ifdef _MSC_VER
 #include <minmax.h> // for min
 #endif
 /* ----------------------------------------------------------------------- */
 /*       Types / Classes                                                   */
 /* ----------------------------------------------------------------------- */
 using namespace std;
 namespace uima {


 //========================================
 // Read-only implementation
 //========================================

   int8_t
   UnicodeStringRef::doCompare( int32_t start,
                                int32_t length,
                                const UChar *srcChars,
                                int32_t srcStart,
                                int32_t srcLength) const {
     // compare illegal string values
     if (srcChars==0) {
       return 1;
     }

     // pin indices to legal values
     pinIndices(start, length);

     // get the correct pointer
     const UChar *chars = getBuffer();

     chars += start;
     srcChars += srcStart;

     int32_t minLength;
     int8_t lengthResult;

     // are we comparing different lengths?
     if (length != srcLength) {
       if (length < srcLength) {
         minLength = length;
         lengthResult = -1;
       } else {
         minLength = srcLength;
         lengthResult = 1;
       }
     } else {
       minLength = length;
       lengthResult = 0;
     }

     /*
      * note that uprv_memcmp() returns an int but we return an int8_t;
      * we need to take care not to truncate the result -
      * one way to do this is to right-shift the value to
      * move the sign bit into the lower 8 bits and making sure that this
      * does not become 0 itself
      */

     if (minLength > 0 && chars != srcChars) {
       int32_t result;

 #   ifdef WORDS_BIGENDIAN
       // big-endian: byte comparison works
       result = memcmp(chars, srcChars, minLength * sizeof(UChar));
       if (result != 0) {
         return (int8_t)(result >> 15 | 1);
       }
 #   else
       // little-endian: compare UChar units
       do {
         result = ((int32_t)*(chars++) - (int32_t)*(srcChars++));
         if (result != 0) {
           return (int8_t)(result >> 15 | 1);
         }
       } while (--minLength > 0);
 #   endif
     }
     return lengthResult;
   }


   /* String compare in code point order - doCompare() compares in code unit order. */
   int8_t
   UnicodeStringRef::doCompareCodePointOrder(int32_t start,
       int32_t length,
       const UChar *srcChars,
       int32_t srcStart,
       int32_t srcLength) const {
     if (srcChars==NULL) {
       return 1;
     }

     // pin indices to legal values
     pinIndices(start, length);

     int32_t diff = u_strncmpCodePointOrder(getBuffer() + start, srcChars + srcStart, min(length, srcLength));
     /* translate the 32-bit result into an 8-bit one */
     if (diff!=0) {
       return (int8_t)(diff >> 15 | 1);
     } else {
       return 0;
     }
   }

   int32_t UnicodeStringRef::indexOf(UChar const *srcChars,
                                     int32_t srcStart,
                                     int32_t srcLength,
                                     int32_t start,
                                     int32_t length) const {
     if (srcChars == 0 || srcLength == 0) {
       return -1;
     }

     // get the indices within bounds
     pinIndices(start, length);

     if (length < srcLength) {
       return -1;
     }

     // now we will only work with srcLength-1
     --srcLength;

     // set length for the last possible match start position
     // note the --srcLength above
     length -= srcLength;


     const UChar *array = getBuffer();
     int32_t limit = start + length;

     // search for the first char, then compare the rest of the string
     // increment srcStart here for that, matching the --srcLength above
     UChar ch = srcChars[srcStart++];

     do {
       if (array[start] == ch && (srcLength == 0 || compare(start + 1, srcLength, srcChars, srcStart, srcLength) == 0)) {
         return start;
       }
     } while (++start < limit);

     return -1;
   }

   int32_t UnicodeStringRef::lastIndexOf(UChar const *srcChars,
                                         int32_t srcStart,
                                         int32_t srcLength,
                                         int32_t start,
                                         int32_t length) const {
     if (srcChars == 0 || srcLength == 0) {
       return -1;
     }

     // get the indices within bounds
     pinIndices(start, length);

     if (length < srcLength) {
       return -1;
     }

     // now we will only work with srcLength-1
     --srcLength;

     // set length for the last possible match start position
     // note the --srcLength above
     length -= srcLength;

     const UChar *array = getBuffer();
     int32_t pos;

     // search for the first char, then compare the rest of the string
     // increment srcStart here for that, matching the --srcLength above
     UChar ch = srcChars[srcStart++];

     pos = start + length;
     do {
       if (array[--pos] == ch && (srcLength == 0 || compare(pos + 1, srcLength, srcChars, srcStart, srcLength) == 0)) {
         return pos;
       }
     } while (pos > start);

     return -1;
   }


   int32_t
   UnicodeStringRef::doIndexOf(UChar c,
                               int32_t start,
                               int32_t length) const {
     // pin indices
     pinIndices(start, length);
     if (length == 0) {
       return -1;
     }

     // find the first occurrence of c
     const UChar *begin = getBuffer() + start;
     const UChar *limit = begin + length;

     do {
       if (*begin == c) {
         return (int32_t)(begin - getBuffer());
       }
     } while (++begin < limit);

     return -1;
   }

   int32_t
   UnicodeStringRef::doLastIndexOf(UChar c,
                                   int32_t start,
                                   int32_t length) const {
     // pin indices
     pinIndices(start, length);
     if (length == 0) {
       return -1;
     }

     const UChar *begin = getBuffer() + start;
     const UChar *limit = begin + length;

     do {
       if (*--limit == c) {
         return (int32_t)(limit - getBuffer());
       }
     } while (limit > begin);

     return -1;
   }

   int32_t UnicodeStringRef::moveIndex32(int32_t index, int32_t delta) const {
     icu::UnicodeString s((UBool)false, getBuffer(), length());
     return s.moveIndex32(index, delta);
   }

   int32_t
   UnicodeStringRef::extract(UChar *dest, int32_t destCapacity,
                             UErrorCode &errorCode) const {
     // This readonly aliasing constructor should be cheap as no copy is done
     icu::UnicodeString s((UBool)false, getBuffer(), length());
     return s.extract(dest, destCapacity, errorCode);
   }

   int32_t UnicodeStringRef::extract(int32_t start,
                                     int32_t startLength,
                                     char *target,
                                     uint32_t targetLength,
                                     const char *codepage) const {
     icu::UnicodeString s((UBool)false, getBuffer(), length());
     return s.extract(start, startLength, target, targetLength, codepage);
   }

   int32_t UnicodeStringRef::extract(char *target, int32_t targetCapacity,
                                     UConverter *cnv,
                                     UErrorCode &errorCode) const {
     icu::UnicodeString s((UBool)false, getBuffer(), length());
     return s.extract(target, targetCapacity, cnv, errorCode);
   }

 // Copy with conversion into a std::string
   int32_t UnicodeStringRef::extract(int32_t start,
                                     int32_t startLength,
                                     std::string & target,
                                     const char *codepage) const {
     if (length() == 0) {
       target.clear();
       return 0;
     }

     // First use a buffer on the stack ... if too small allocate and try again
     const int32_t STACK_BUF_SIZE = 256;
     char  stackBuf [STACK_BUF_SIZE];
     char* heapBuf = NULL;
     char* buf = stackBuf;

     // Use a converter so can be left open if have to convert twice
     // If fail to open converter simply return empty string ... must be unknown!
     UErrorCode err = U_ZERO_ERROR;
     UConverter* cnv = ucnv_open(codepage, &err);
     if ( U_FAILURE(err) ) {
       target.clear();
       return 0;
     }

     const UChar* src = getBuffer() + start;
     int len = ucnv_fromUChars(cnv, buf, STACK_BUF_SIZE, src, startLength, &err);
     if ( err == U_BUFFER_OVERFLOW_ERROR || err == U_STRING_NOT_TERMINATED_WARNING ) {
       buf = heapBuf = new char [len+1];
       err = U_ZERO_ERROR;
       len = ucnv_fromUChars(cnv, buf, len+1, src, startLength, &err);
     }

     target.assign(buf, len);                   // Copy the result to the string

     if (heapBuf != NULL)
       delete [] heapBuf;
     ucnv_close(cnv);

     return len;
   }

 // Extract into a UTF-8 std::string
   int32_t UnicodeStringRef::extractUTF8(std::string & target) const {
     if (length() == 0) {
       target.clear();
       return 0;
     }

     // First use a buffer on the stack ... if too small allocate and try again
     const int32_t STACK_BUF_SIZE = 256;
     char  stackBuf [STACK_BUF_SIZE];
     char* heapBuf = NULL;
     char* buf = stackBuf;
     int32_t len;

     UErrorCode err = U_ZERO_ERROR;
     u_strToUTF8(buf, STACK_BUF_SIZE, &len, getBuffer(), length(), &err);
     if ( err == U_BUFFER_OVERFLOW_ERROR || err == U_STRING_NOT_TERMINATED_WARNING ) {
       buf = heapBuf = new char [len+1];
       err = U_ZERO_ERROR;
       u_strToUTF8(buf, len+1, &len, getBuffer(), length(), &err);
     }

     target.assign(buf, len);                   // Copy the result to the string

     if (heapBuf != NULL)
       delete [] heapBuf;

     return len;
   }

 // Static method releases contents of string container allocated by extract methods
   void UnicodeStringRef::release(std::string & target) {
     target.clear();               // Empty string
     target.reserve(1);            // Reduce capacity so will use internal buffer & free external one
   }


   void
   UnicodeStringRef::toSingleByteStream(std::ostream & outStream) const {
     const char* codepage;

     // If output goes to console use default encoding
     if (outStream.rdbuf() == cout.rdbuf() || outStream.rdbuf() == cerr.rdbuf()) {
       codepage = 0;
     } else {
       codepage = "utf-8";
     }
     std::string s;
     extract(s, codepage);                       // get a single byte string
     outStream << s;
   }

   std::ostream &
   operator << (
     std::ostream                & outStream,
     const uima::UnicodeStringRef & crUStrRef
   ) {
     crUStrRef.toSingleByteStream(outStream);
     return outStream;
   }

   int32_t
   delimitedUnicodeStringRef2Vector(
     std::vector< uima::UnicodeStringRef > & rveclstrOutput,
     const UChar                          * pcInput,
     int32_t                                 uiInputLength,
     const UChar                          * cpszDelimiters,
     bool                                   bTrimString,
     bool                                   bInsertEmptyStrings
   ) {
     UChar const * pcBegin = pcInput;
     int32_t uiEnd;
     UChar const * pcEnd = pcBegin;
     int32_t uiNumFound = 0;
     int32_t uiDelimitersLen = u_strlen(cpszDelimiters);

     if (uiInputLength == 0) {
       return 0;
     }
     UChar const * pcInputEnd = pcInput + uiInputLength;
     UnicodeStringRef _s;

     while (pcBegin < pcInputEnd) {
       //      uiBegin--;
       uiEnd   = str_find_first_of(cpszDelimiters, uiDelimitersLen, pcBegin, (int32_t)(pcInputEnd-pcBegin));
       pcEnd = pcBegin+uiEnd;
       if (uiEnd != STRING_NPOS) {
         ++pcEnd;
       }
       if (uiEnd == STRING_NPOS) {
         uiEnd = uiInputLength+1;
         pcEnd = pcInputEnd+1;
       }
       assert(pcEnd > pcBegin);
       _s = UnicodeStringRef(pcBegin, pcEnd-pcBegin-1);
       if (bTrimString) {
         _s = strtrim(_s);
       }
       if (bInsertEmptyStrings || _s.length() > 0) {
         rveclstrOutput.push_back(_s);
         uiNumFound++;
       }
       pcBegin = pcEnd;
     }
     return uiNumFound;
   }

 } // namespace uima

 std::ostream &
 operator << (
   std::ostream                & outStream,
   const uima::UnicodeStringRef & crUStrRef
 ) {
   crUStrRef.toSingleByteStream(outStream);
   return outStream;
 }


 /* <EOF> */
	/**
	-----------------------------------------------------------------------------

	string interface of icu::UnicodeString

	* Licensed to the Apache Software Foundation (ASF) under one
	* or more contributor license agreements. See the NOTICE file
	* distributed with this work for additional information
	* regarding copyright ownership. The ASF licenses this file
	* to you under the Apache License, Version 2.0 (the
	* "License"); you may not use this file except in compliance
	* with the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing,
	* software distributed under the License is distributed on an
	* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
	* KIND, either express or implied. See the License for the
	* specific language governing permissions and limitations
	* under the License.

	-----------------------------------------------------------------------------


	6/26/1998 Initial creation

	-----------------------------------------------------------------------------
	*/

	#include "uima/unistrref.hpp"
	#include <algorithm> // for min
	#ifdef _MSC_VER
	#include <minmax.h> // for min
	#endif
	/* ----------------------------------------------------------------------- */
	/* Types / Classes */
	/* ----------------------------------------------------------------------- */
	using namespace std;
	namespace uima {


	//========================================
	// Read-only implementation
	//========================================

	int8_t
	UnicodeStringRef::doCompare( int32_t start,
	int32_t length,
	const UChar *srcChars,
	int32_t srcStart,
	int32_t srcLength) const {
	// compare illegal string values
	if (srcChars==0) {
	return 1;
	}

	// pin indices to legal values
	pinIndices(start, length);

	// get the correct pointer
	const UChar *chars = getBuffer();

	chars += start;
	srcChars += srcStart;

	int32_t minLength;
	int8_t lengthResult;

	// are we comparing different lengths?
	if (length != srcLength) {
	if (length < srcLength) {
	minLength = length;
	lengthResult = -1;
	} else {
	minLength = srcLength;
	lengthResult = 1;
	}
	} else {
	minLength = length;
	lengthResult = 0;
	}

	/*
	* note that uprv_memcmp() returns an int but we return an int8_t;
	* we need to take care not to truncate the result -
	* one way to do this is to right-shift the value to
	* move the sign bit into the lower 8 bits and making sure that this
	* does not become 0 itself
	*/

	if (minLength > 0 && chars != srcChars) {
	int32_t result;

	# ifdef WORDS_BIGENDIAN
	// big-endian: byte comparison works
	result = memcmp(chars, srcChars, minLength * sizeof(UChar));
	if (result != 0) {
	return (int8_t)(result >> 15 \| 1);
	}
	# else
	// little-endian: compare UChar units
	do {
	result = ((int32_t)(chars++) - (int32_t)(srcChars++));
	if (result != 0) {
	return (int8_t)(result >> 15 \| 1);
	}
	} while (--minLength > 0);
	# endif
	}
	return lengthResult;
	}


	/* String compare in code point order - doCompare() compares in code unit order. */
	int8_t
	UnicodeStringRef::doCompareCodePointOrder(int32_t start,
	int32_t length,
	const UChar *srcChars,
	int32_t srcStart,
	int32_t srcLength) const {
	if (srcChars==NULL) {
	return 1;
	}

	// pin indices to legal values
	pinIndices(start, length);

	int32_t diff = u_strncmpCodePointOrder(getBuffer() + start, srcChars + srcStart, min(length, srcLength));
	/* translate the 32-bit result into an 8-bit one */
	if (diff!=0) {
	return (int8_t)(diff >> 15 \| 1);
	} else {
	return 0;
	}
	}

	int32_t UnicodeStringRef::indexOf(UChar const *srcChars,
	int32_t srcStart,
	int32_t srcLength,
	int32_t start,
	int32_t length) const {
	if (srcChars == 0 \|\| srcLength == 0) {
	return -1;
	}

	// get the indices within bounds
	pinIndices(start, length);

	if (length < srcLength) {
	return -1;
	}

	// now we will only work with srcLength-1
	--srcLength;

	// set length for the last possible match start position
	// note the --srcLength above
	length -= srcLength;


	const UChar *array = getBuffer();
	int32_t limit = start + length;

	// search for the first char, then compare the rest of the string
	// increment srcStart here for that, matching the --srcLength above
	UChar ch = srcChars[srcStart++];

	do {
	if (array[start] == ch && (srcLength == 0 \|\| compare(start + 1, srcLength, srcChars, srcStart, srcLength) == 0)) {
	return start;
	}
	} while (++start < limit);

	return -1;
	}

	int32_t UnicodeStringRef::lastIndexOf(UChar const *srcChars,
	int32_t srcStart,
	int32_t srcLength,
	int32_t start,
	int32_t length) const {
	if (srcChars == 0 \|\| srcLength == 0) {
	return -1;
	}

	// get the indices within bounds
	pinIndices(start, length);

	if (length < srcLength) {
	return -1;
	}

	// now we will only work with srcLength-1
	--srcLength;

	// set length for the last possible match start position
	// note the --srcLength above
	length -= srcLength;

	const UChar *array = getBuffer();
	int32_t pos;

	// search for the first char, then compare the rest of the string
	// increment srcStart here for that, matching the --srcLength above
	UChar ch = srcChars[srcStart++];

	pos = start + length;
	do {
	if (array[--pos] == ch && (srcLength == 0 \|\| compare(pos + 1, srcLength, srcChars, srcStart, srcLength) == 0)) {
	return pos;
	}
	} while (pos > start);

	return -1;
	}


	int32_t
	UnicodeStringRef::doIndexOf(UChar c,
	int32_t start,
	int32_t length) const {
	// pin indices
	pinIndices(start, length);
	if (length == 0) {
	return -1;
	}

	// find the first occurrence of c
	const UChar *begin = getBuffer() + start;
	const UChar *limit = begin + length;

	do {
	if (*begin == c) {
	return (int32_t)(begin - getBuffer());
	}
	} while (++begin < limit);

	return -1;
	}

	int32_t
	UnicodeStringRef::doLastIndexOf(UChar c,
	int32_t start,
	int32_t length) const {
	// pin indices
	pinIndices(start, length);
	if (length == 0) {
	return -1;
	}

	const UChar *begin = getBuffer() + start;
	const UChar *limit = begin + length;

	do {
	if (*--limit == c) {
	return (int32_t)(limit - getBuffer());
	}
	} while (limit > begin);

	return -1;
	}

	int32_t UnicodeStringRef::moveIndex32(int32_t index, int32_t delta) const {
	icu::UnicodeString s((UBool)false, getBuffer(), length());
	return s.moveIndex32(index, delta);
	}

	int32_t
	UnicodeStringRef::extract(UChar *dest, int32_t destCapacity,
	UErrorCode &errorCode) const {
	// This readonly aliasing constructor should be cheap as no copy is done
	icu::UnicodeString s((UBool)false, getBuffer(), length());
	return s.extract(dest, destCapacity, errorCode);
	}

	int32_t UnicodeStringRef::extract(int32_t start,
	int32_t startLength,
	char *target,
	uint32_t targetLength,
	const char *codepage) const {
	icu::UnicodeString s((UBool)false, getBuffer(), length());
	return s.extract(start, startLength, target, targetLength, codepage);
	}

	int32_t UnicodeStringRef::extract(char *target, int32_t targetCapacity,
	UConverter *cnv,
	UErrorCode &errorCode) const {
	icu::UnicodeString s((UBool)false, getBuffer(), length());
	return s.extract(target, targetCapacity, cnv, errorCode);
	}

	// Copy with conversion into a std::string
	int32_t UnicodeStringRef::extract(int32_t start,
	int32_t startLength,
	std::string & target,
	const char *codepage) const {
	if (length() == 0) {
	target.clear();
	return 0;
	}

	// First use a buffer on the stack ... if too small allocate and try again
	const int32_t STACK_BUF_SIZE = 256;
	char stackBuf [STACK_BUF_SIZE];
	char* heapBuf = NULL;
	char* buf = stackBuf;

	// Use a converter so can be left open if have to convert twice
	// If fail to open converter simply return empty string ... must be unknown!
	UErrorCode err = U_ZERO_ERROR;
	UConverter* cnv = ucnv_open(codepage, &err);
	if ( U_FAILURE(err) ) {
	target.clear();
	return 0;
	}

	const UChar* src = getBuffer() + start;
	int len = ucnv_fromUChars(cnv, buf, STACK_BUF_SIZE, src, startLength, &err);
	if ( err == U_BUFFER_OVERFLOW_ERROR \|\| err == U_STRING_NOT_TERMINATED_WARNING ) {
	buf = heapBuf = new char [len+1];
	err = U_ZERO_ERROR;
	len = ucnv_fromUChars(cnv, buf, len+1, src, startLength, &err);
	}

	target.assign(buf, len); // Copy the result to the string

	if (heapBuf != NULL)
	delete [] heapBuf;
	ucnv_close(cnv);

	return len;
	}

	// Extract into a UTF-8 std::string
	int32_t UnicodeStringRef::extractUTF8(std::string & target) const {
	if (length() == 0) {
	target.clear();
	return 0;
	}

	// First use a buffer on the stack ... if too small allocate and try again
	const int32_t STACK_BUF_SIZE = 256;
	char stackBuf [STACK_BUF_SIZE];
	char* heapBuf = NULL;
	char* buf = stackBuf;
	int32_t len;

	UErrorCode err = U_ZERO_ERROR;
	u_strToUTF8(buf, STACK_BUF_SIZE, &len, getBuffer(), length(), &err);
	if ( err == U_BUFFER_OVERFLOW_ERROR \|\| err == U_STRING_NOT_TERMINATED_WARNING ) {
	buf = heapBuf = new char [len+1];
	err = U_ZERO_ERROR;
	u_strToUTF8(buf, len+1, &len, getBuffer(), length(), &err);
	}

	target.assign(buf, len); // Copy the result to the string

	if (heapBuf != NULL)
	delete [] heapBuf;

	return len;
	}

	// Static method releases contents of string container allocated by extract methods
	void UnicodeStringRef::release(std::string & target) {
	target.clear(); // Empty string
	target.reserve(1); // Reduce capacity so will use internal buffer & free external one
	}


	void
	UnicodeStringRef::toSingleByteStream(std::ostream & outStream) const {
	const char* codepage;

	// If output goes to console use default encoding
	if (outStream.rdbuf() == cout.rdbuf() \|\| outStream.rdbuf() == cerr.rdbuf()) {
	codepage = 0;
	} else {
	codepage = "utf-8";
	}
	std::string s;
	extract(s, codepage); // get a single byte string
	outStream << s;
	}

	std::ostream &
	operator << (
	std::ostream & outStream,
	const uima::UnicodeStringRef & crUStrRef
	) {
	crUStrRef.toSingleByteStream(outStream);
	return outStream;
	}

	int32_t
	delimitedUnicodeStringRef2Vector(
	std::vector< uima::UnicodeStringRef > & rveclstrOutput,
	const UChar * pcInput,
	int32_t uiInputLength,
	const UChar * cpszDelimiters,
	bool bTrimString,
	bool bInsertEmptyStrings
	) {
	UChar const * pcBegin = pcInput;
	int32_t uiEnd;
	UChar const * pcEnd = pcBegin;
	int32_t uiNumFound = 0;
	int32_t uiDelimitersLen = u_strlen(cpszDelimiters);

	if (uiInputLength == 0) {
	return 0;
	}
	UChar const * pcInputEnd = pcInput + uiInputLength;
	UnicodeStringRef _s;

	while (pcBegin < pcInputEnd) {
	// uiBegin--;
	uiEnd = str_find_first_of(cpszDelimiters, uiDelimitersLen, pcBegin, (int32_t)(pcInputEnd-pcBegin));
	pcEnd = pcBegin+uiEnd;
	if (uiEnd != STRING_NPOS) {
	++pcEnd;
	}
	if (uiEnd == STRING_NPOS) {
	uiEnd = uiInputLength+1;
	pcEnd = pcInputEnd+1;
	}
	assert(pcEnd > pcBegin);
	_s = UnicodeStringRef(pcBegin, pcEnd-pcBegin-1);
	if (bTrimString) {
	_s = strtrim(_s);
	}
	if (bInsertEmptyStrings \|\| _s.length() > 0) {
	rveclstrOutput.push_back(_s);
	uiNumFound++;
	}
	pcBegin = pcEnd;
	}
	return uiNumFound;
	}

	} // namespace uima

	std::ostream &
	operator << (
	std::ostream & outStream,
	const uima::UnicodeStringRef & crUStrRef
	) {
	crUStrRef.toSingleByteStream(outStream);
	return outStream;
	}



	/* <EOF> */