src/xercesc/util/Transcoders/ICU/ICUTransService.cpp - xerces-c - Git at Google

 /*
  * The Apache Software License, Version 1.1
  *
  * Copyright (c) 1999-2000 The Apache Software Foundation.  All rights
  * reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  *
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  *
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in
  *    the documentation and/or other materials provided with the
  *    distribution.
  *
  * 3. The end-user documentation included with the redistribution,
  *    if any, must include the following acknowledgment:
  *       "This product includes software developed by the
  *        Apache Software Foundation (http://www.apache.org/)."
  *    Alternately, this acknowledgment may appear in the software itself,
  *    if and wherever such third-party acknowledgments normally appear.
  *
  * 4. The names "Xerces" and "Apache Software Foundation" must
  *    not be used to endorse or promote products derived from this
  *    software without prior written permission. For written
  *    permission, please contact apache\@apache.org.
  *
  * 5. Products derived from this software may not be called "Apache",
  *    nor may "Apache" appear in their name, without prior written
  *    permission of the Apache Software Foundation.
  *
  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
  * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
  * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
  * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * ====================================================================
  *
  * This software consists of voluntary contributions made by many
  * individuals on behalf of the Apache Software Foundation, and was
  * originally based on software copyright (c) 1999, International
  * Business Machines, Inc., http://www.ibm.com .  For more information
  * on the Apache Software Foundation, please see
  * <http://www.apache.org/>.
  */

 /*
  * $Id$
  */


 // ---------------------------------------------------------------------------
 //  Includes
 // ---------------------------------------------------------------------------
 #include <xercesc/util/Janitor.hpp>
 #include <xercesc/util/TranscodingException.hpp>
 #include <xercesc/util/XMLString.hpp>
 #include <xercesc/util/XMLUniDefs.hpp>
 #include "ICUTransService.hpp"
 #include <string.h>
 #include <unicode/uloc.h>
 #include <unicode/unicode.h>
 #include <unicode/ucnv.h>
 #include <unicode/ucnv_err.h>
 #include <unicode/ustring.h>
 #include <unicode/udata.h>
 #if (U_ICU_VERSION_MAJOR_NUM >= 2)
     #include <unicode/uclean.h>
 #endif


 #if !defined(XML_OS390) && !defined(XML_AS400) && !defined(XML_HPUX) && !defined(XML_PTX)
 // Forward reference the symbol which points to the ICU converter data.
 #if (U_ICU_VERSION_MAJOR_NUM < 2)
 extern "C" const uint8_t U_IMPORT icudata_dat[];
 #endif
 #endif


 // ---------------------------------------------------------------------------
 //  Local, const data
 // ---------------------------------------------------------------------------
 static const XMLCh gMyServiceId[] =
 {
     chLatin_I, chLatin_C, chLatin_U, chNull
 };


 // ---------------------------------------------------------------------------
 //  Local functions
 // ---------------------------------------------------------------------------

 //
 //  When XMLCh and ICU's UChar are not the same size, we have to do a temp
 //  conversion of all strings. These local helper methods make that easier.
 //
 static UChar* convertToUChar(   const   XMLCh* const    toConvert
                                 , const unsigned int    srcLen = 0)
 {
     const unsigned int actualLen = srcLen
                                    ? srcLen : XMLString::stringLen(toConvert);

     UChar* tmpBuf = new UChar[actualLen + 1];
     const XMLCh* srcPtr = toConvert;
     UChar* outPtr = tmpBuf;
     while (*srcPtr)
         *outPtr++ = UChar(*srcPtr++);
     *outPtr = 0;

     return tmpBuf;
 }


 static XMLCh* convertToXMLCh(const UChar* const toConvert)
 {
     const unsigned int srcLen = u_strlen(toConvert);
     XMLCh* retBuf = new XMLCh[srcLen + 1];

     XMLCh* outPtr = retBuf;
     const UChar* srcPtr = toConvert;
     while (*srcPtr)
         *outPtr++ = XMLCh(*srcPtr++);
     *outPtr = 0;

     return retBuf;
 }


 // ---------------------------------------------------------------------------
 //  ICUTransService: Constructors and Destructor
 // ---------------------------------------------------------------------------
 ICUTransService::ICUTransService()
 {
 #if !defined(XML_OS390) && !defined(XML_AS400) && !defined(XML_HPUX) && !defined(XML_PTX)
 #if (U_ICU_VERSION_MAJOR_NUM < 2)
     // Starting with ICU 2.0, ICU itself includes a static reference to the data
     // entrypoint symbol.
     //
     // ICU 1.8 (and previous) did not include a static reference, but would
     // dynamically load the data dll when it was first needed, however this dynamic
     // loading proved unreliable in some of the odd environments that Xerces needed
     // to run in.  Hence, the static reference.

     // Pass the location of the converter data to ICU. By doing so, we are
     // forcing the load of ICU converter data DLL, after the Xerces-C DLL is
     // loaded. This implies that Xerces-C, now has to explicitly link with the
     // ICU converter dll. However, the advantage is that we no longer depend
     // on the code which does demand dynamic loading of DLL's. The demand
     // loading is highly system dependent and was a constant source of support
     // calls.
     UErrorCode uerr = U_ZERO_ERROR;
     udata_setCommonData((void *) icudata_dat, &uerr);
 #endif
 #endif
 }

 ICUTransService::~ICUTransService()
 {
     /*
      * commented out the following clean up code
      * in case users use ICU outside of the parser
      * if we clean up here, users' code may crash
      *
     #if (U_ICU_VERSION_MAJOR_NUM >= 2)
         // release all lasily allocated data
         u_cleanup();
     #endif
     */
 }


 // ---------------------------------------------------------------------------
 //  ICUTransService: The virtual transcoding service API
 // ---------------------------------------------------------------------------
 int ICUTransService::compareIString(const   XMLCh* const    comp1
                                     , const XMLCh* const    comp2)
 {
     const XMLCh* psz1 = comp1;
     const XMLCh* psz2 = comp2;

     unsigned int curCount = 0;
     while (true)
     {
         //
         //  If an inequality, then return the difference. Note that the XMLCh
         //  might be bigger physically than UChar, but it won't hold anything
         //  larger than 0xFFFF, so our cast here will work for both possible
         //  sizes of XMLCh.
         //
         if (Unicode::toUpperCase(UChar(*psz1)) != Unicode::toUpperCase(UChar(*psz2)))
             return int(*psz1) - int(*psz2);

         // If either has ended, then they both ended, so equal
         if (!*psz1 || !*psz2)
             break;

         // Move upwards for the next round
         psz1++;
         psz2++;
     }
     return 0;
 }


 int ICUTransService::compareNIString(const  XMLCh* const    comp1
                                     , const XMLCh* const    comp2
                                     , const unsigned int    maxChars)
 {
     const XMLCh* psz1 = comp1;
     const XMLCh* psz2 = comp2;

     unsigned int curCount = 0;
     while (true)
     {
         //
         //  If an inequality, then return the difference. Note that the XMLCh
         //  might be bigger physically than UChar, but it won't hold anything
         //  larger than 0xFFFF, so our cast here will work for both possible
         //  sizes of XMLCh.
         //
         if (Unicode::toUpperCase(UChar(*psz1)) != Unicode::toUpperCase(UChar(*psz2)))
             return int(*psz1) - int(*psz2);

         // If either ended, then both ended, so equal
         if (!*psz1 || !*psz2)
             break;

         // Move upwards to next chars
         psz1++;
         psz2++;

         //
         //  Bump the count of chars done. If it equals the count then we
         //  are equal for the requested count, so break out and return
         //  equal.
         //
         curCount++;
         if (maxChars == curCount)
             break;
     }
     return 0;
 }


 const XMLCh* ICUTransService::getId() const
 {
     return gMyServiceId;
 }


 bool ICUTransService::isSpace(const XMLCh toCheck) const
 {
     //
     //  <TBD>
     //  For now, we short circuit some of the control chars because ICU
     //  is not correctly reporting them as space. Later, when they change
     //  this, we can get rid of this special case.
     //
     if ((toCheck == 0x09)
     ||  (toCheck == 0x0A)
     ||  (toCheck == 0x0D))
     {
         return true;
     }
     return (Unicode::isSpaceChar(UChar(toCheck)) != 0);
 }


 XMLLCPTranscoder* ICUTransService::makeNewLCPTranscoder()
 {
     //
     //  Try to create a default converter. If it fails, return a null
     //  pointer which will basically cause the system to give up because
     //  we really can't do anything without one.
     //
     UErrorCode uerr = U_ZERO_ERROR;
     UConverter* converter = ucnv_open(NULL, &uerr);
     if (!converter)
         return 0;

     // That went ok, so create an ICU LCP transcoder wrapper and return it
     return new ICULCPTranscoder(converter);
 }


 bool ICUTransService::supportsSrcOfs() const
 {
     // This implementation supports source offset information
     return true;
 }


 void ICUTransService::upperCase(XMLCh* const toUpperCase) const
 {
     XMLCh* outPtr = toUpperCase;
     while (*outPtr)
     {
         *outPtr = XMLCh(Unicode::toUpperCase(UChar(*outPtr)));
         outPtr++;
     }
 }

 void ICUTransService::lowerCase(XMLCh* const toLowerCase) const
 {
     XMLCh* outPtr = toLowerCase;
     while (*outPtr)
     {
         *outPtr = XMLCh(Unicode::toLowerCase(UChar(*outPtr)));
         outPtr++;
     }
 }


 // ---------------------------------------------------------------------------
 //  ICUTransService: The protected virtual transcoding service API
 // ---------------------------------------------------------------------------
 XMLTranscoder* ICUTransService::
 makeNewXMLTranscoder(const  XMLCh* const            encodingName
                     ,       XMLTransService::Codes& resValue
                     , const unsigned int            blockSize)
 {
     //
     //  If UChar and XMLCh are not the same size, then we have premassage the
     //  encoding name into a UChar type string.
     //
     const UChar* actualName;
     UChar* tmpName = 0;
     if (sizeof(UChar) == sizeof(XMLCh))
     {
         actualName = (const UChar*)encodingName;
     }
      else
     {
         tmpName = convertToUChar(encodingName);
         actualName = tmpName;
     }

     ArrayJanitor<UChar> janTmp(tmpName);

     UErrorCode uerr = U_ZERO_ERROR;
     UConverter* converter = ucnv_openU(actualName, &uerr);
     if (!converter)
     {
         resValue = XMLTransService::UnsupportedEncoding;
         return 0;
     }

     return new ICUTranscoder(encodingName, converter, blockSize);
 }


 // ---------------------------------------------------------------------------
 //  ICUTranscoder: Constructors and Destructor
 // ---------------------------------------------------------------------------
 ICUTranscoder::ICUTranscoder(const  XMLCh* const        encodingName
                             ,       UConverter* const   toAdopt
                             , const unsigned int        blockSize) :

     XMLTranscoder(encodingName, blockSize)
     , fConverter(toAdopt)
     , fFixed(false)
     , fSrcOffsets(0)
 {
     // If there is a block size, then allocate our source offset array
     if (blockSize)
         fSrcOffsets = new XMLUInt32[blockSize];

     // Remember if its a fixed size encoding
     fFixed = (ucnv_getMaxCharSize(fConverter) == ucnv_getMinCharSize(fConverter));
 }

 ICUTranscoder::~ICUTranscoder()
 {
     delete [] fSrcOffsets;

     // If there is a converter, ask ICU to clean it up
     if (fConverter)
     {
         // <TBD> Does this actually delete the structure???
         ucnv_close(fConverter);
         fConverter = 0;
     }
 }


 // ---------------------------------------------------------------------------
 //  ICUTranscoder: The virtual transcoder API
 // ---------------------------------------------------------------------------
 unsigned int
 ICUTranscoder::transcodeFrom(const  XMLByte* const          srcData
                             , const unsigned int            srcCount
                             ,       XMLCh* const            toFill
                             , const unsigned int            maxChars
                             ,       unsigned int&           bytesEaten
                             ,       unsigned char* const    charSizes)
 {
     // If debugging, insure the block size is legal
     #if defined(XERCES_DEBUG)
     checkBlockSize(maxChars);
     #endif

     // Set up pointers to the start and end of the source buffer
     const XMLByte*  startSrc = srcData;
     const XMLByte*  endSrc = srcData + srcCount;

     //
     //  And now do the target buffer. This works differently according to
     //  whether XMLCh and UChar are the same size or not.
     //
     UChar* startTarget;
     if (sizeof(XMLCh) == sizeof(UChar))
         startTarget = (UChar*)toFill;
      else
         startTarget = new UChar[maxChars];
     UChar* orgTarget = startTarget;

     //
     //  Transoode the buffer.  Buffer overflow errors are normal, occuring
     //  when the raw input buffer holds more characters than will fit in
     //  the Unicode output buffer.
     //
     UErrorCode  err = U_ZERO_ERROR;
     ucnv_toUnicode
     (
         fConverter
         , &startTarget
         , startTarget + maxChars
         , (const char**)&startSrc
         , (const char*)endSrc
         , (fFixed ? 0 : (int32_t*)fSrcOffsets)
         , false
         , &err
     );

     if ((err != U_ZERO_ERROR) && (err != U_BUFFER_OVERFLOW_ERROR))
     {
         if (orgTarget != (UChar*)toFill)
             delete [] orgTarget;

         if (fFixed)
         {
             XMLCh tmpBuf[16];
             XMLString::binToText((unsigned int)(*startTarget), tmpBuf, 16, 16);
             ThrowXML2
             (
                 TranscodingException
                 , XMLExcepts::Trans_BadSrcCP
                 , tmpBuf
                 , getEncodingName()
             );
         }
          else
         {
             ThrowXML(TranscodingException, XMLExcepts::Trans_BadSrcSeq);
         }
     }

     // Calculate the bytes eaten and store in caller's param
     bytesEaten = startSrc - srcData;

     // And the characters decoded
     const unsigned int charsDecoded = startTarget - orgTarget;

     //
     //  Translate the array of char offsets into an array of character
     //  sizes, which is what the transcoder interface semantics requires.
     //  If its fixed, then we can optimize it.
     //
     if (fFixed)
     {
         const unsigned char fillSize = (unsigned char)ucnv_getMaxCharSize(fConverter);;
         memset(charSizes, fillSize, maxChars);
     }
      else
     {
         //
         //  We have to convert the series of offsets into a series of
         //  sizes. If just one char was decoded, then its the total bytes
         //  eaten. Otherwise, do a loop and subtract out each element from
         //  its previous element.
         //
         if (charsDecoded == 1)
         {
             charSizes[0] = (unsigned char)bytesEaten;
         }
          else
         {
             //  ICU does not return an extra element to allow us to figure
             //  out the last char size, so we have to compute it from the
             //  total bytes used.
             unsigned int index;
             for (index = 0; index < charsDecoded - 1; index++)
             {
                 charSizes[index] = (unsigned char)(fSrcOffsets[index + 1]
                                                     - fSrcOffsets[index]);
             }
             if( charsDecoded > 0 ) {
                 charSizes[charsDecoded - 1] = (unsigned char)(bytesEaten
                                               - fSrcOffsets[charsDecoded - 1]);
             }
         }
     }

     //
     //  If XMLCh and UChar are not the same size, then we need to copy over
     //  the temp buffer to the new one.
     //
     if (sizeof(UChar) != sizeof(XMLCh))
     {
         XMLCh* outPtr = toFill;
         startTarget = orgTarget;
         for (unsigned int index = 0; index < charsDecoded; index++)
             *outPtr++ = XMLCh(*startTarget++);

         // And delete the temp buffer
         delete [] orgTarget;
     }

     // Return the chars we put into the target buffer
     return charsDecoded;
 }


 unsigned int
 ICUTranscoder::transcodeTo( const   XMLCh* const    srcData
                             , const unsigned int    srcCount
                             ,       XMLByte* const  toFill
                             , const unsigned int    maxBytes
                             ,       unsigned int&   charsEaten
                             , const UnRepOpts       options)
 {
     //
     //  Get a pointer to the buffer to transcode. If UChar and XMLCh are
     //  the same size here, then use the original. Else, create a temp
     //  one and put a janitor on it.
     //
     const UChar* srcPtr;
     UChar* tmpBufPtr = 0;
     if (sizeof(XMLCh) == sizeof(UChar))
     {
         srcPtr = (const UChar*)srcData;
     }
      else
     {
         tmpBufPtr = convertToUChar(srcData, srcCount);
         srcPtr = tmpBufPtr;
     }
     ArrayJanitor<UChar> janTmpBuf(tmpBufPtr);

     //
     //  Set the appropriate callback so that it will either fail or use
     //  the rep char. Remember the old one so we can put it back.
     //
     UErrorCode  err = U_ZERO_ERROR;
     UConverterFromUCallback oldCB = NULL;
     #if (U_ICU_VERSION_MAJOR_NUM < 2)
     void* orgContent;
     #else
     const void* orgContent;
     #endif
     ucnv_setFromUCallBack
     (
         fConverter
         , (options == UnRep_Throw) ? UCNV_FROM_U_CALLBACK_STOP
                                    : UCNV_FROM_U_CALLBACK_SUBSTITUTE
         , NULL
         , &oldCB
         , &orgContent
         , &err
     );

     //
     //  Ok, lets transcode as many chars as we we can in one shot. The
     //  ICU API gives enough info not to have to do this one char by char.
     //
     XMLByte*        startTarget = toFill;
     const UChar*    startSrc = srcPtr;
     err = U_ZERO_ERROR;
     ucnv_fromUnicode
     (
         fConverter
         , (char**)&startTarget
         , (char*)(startTarget + maxBytes)
         , &startSrc
         , srcPtr + srcCount
         , 0
         , false
         , &err
     );

     // Rememember the status before we possibly overite the error code
     const bool res = (err == U_ZERO_ERROR);

     // Put the old handler back
     err = U_ZERO_ERROR;
     UConverterFromUCallback orgAction = NULL;

     ucnv_setFromUCallBack(fConverter, oldCB, NULL, &orgAction, &orgContent, &err);

     if (!res)
     {
         XMLCh tmpBuf[16];
         XMLString::binToText((unsigned int)*startSrc, tmpBuf, 16, 16);
         ThrowXML2
         (
             TranscodingException
             , XMLExcepts::Trans_Unrepresentable
             , tmpBuf
             , getEncodingName()
         );
     }

     // Fill in the chars we ate from the input
     charsEaten = startSrc - srcPtr;

     // Return the chars we stored
     return startTarget - toFill;
 }


 bool ICUTranscoder::canTranscodeTo(const unsigned int toCheck) const
 {
     //
     //  If the passed value is really a surrogate embedded together, then
     //  we need to break it out into its two chars. Else just one. While
     //  we are ate it, convert them to UChar format if required.
     //
     UChar           srcBuf[2];
     unsigned int    srcCount = 1;
     if (toCheck & 0xFFFF0000)
     {
         srcBuf[0] = UChar((toCheck >> 10) + 0xD800);
         srcBuf[1] = UChar(toCheck & 0x3FF) + 0xDC00;
         srcCount++;
     }
      else
     {
         srcBuf[0] = UChar(toCheck);
     }

     //
     //  Set the callback so that it will fail instead of using the rep char.
     //  Remember the old one so we can put it back.
     //
      UErrorCode  err = U_ZERO_ERROR;
      UConverterFromUCallback oldCB = NULL;
      #if (U_ICU_VERSION_MAJOR_NUM < 2)
      void* orgContent;
      #else
      const void* orgContent;
      #endif

      ucnv_setFromUCallBack
          (
          fConverter
          , UCNV_FROM_U_CALLBACK_STOP
          , NULL
          , &oldCB
          , &orgContent
          , &err
          );

     // Set upa temp buffer to format into. Make it more than big enough
     char            tmpBuf[64];
     char*           startTarget = tmpBuf;
     const UChar*    startSrc = srcBuf;

     err = U_ZERO_ERROR;
     ucnv_fromUnicode
     (
         fConverter
         , &startTarget
         , startTarget + 64
         , &startSrc
         , srcBuf + srcCount
         , 0
         , false
         , &err
     );

     // Save the result before we overight the error code
     const bool res = (err == U_ZERO_ERROR);

     // Put the old handler back
     err = U_ZERO_ERROR;
     UConverterFromUCallback orgAction = NULL;

     ucnv_setFromUCallBack(fConverter, oldCB, NULL, &orgAction, &orgContent, &err);

     return res;
 }


 // ---------------------------------------------------------------------------
 //  ICULCPTranscoder: Constructors and Destructor
 // ---------------------------------------------------------------------------
 ICULCPTranscoder::ICULCPTranscoder(UConverter* const toAdopt) :

     fConverter(toAdopt)
 {
 }

 ICULCPTranscoder::~ICULCPTranscoder()
 {
     // If there is a converter, ask ICU to clean it up
     if (fConverter)
     {
         // <TBD> Does this actually delete the structure???
         ucnv_close(fConverter);
         fConverter = 0;
     }
 }


 // ---------------------------------------------------------------------------
 //  ICULCPTranscoder: Constructors and Destructor
 // ---------------------------------------------------------------------------
 unsigned int ICULCPTranscoder::calcRequiredSize(const XMLCh* const srcText)
 {
     if (!srcText)
         return 0;

     //
     //  We do two different versions of this, according to whether XMLCh
     //  is the same size as UChar or not.
     //
     UErrorCode err = U_ZERO_ERROR;
     int32_t targetCap;
     if (sizeof(XMLCh) == sizeof(UChar))
     {
         // Use a faux scope to synchronize while we do this
         {
             XMLMutexLock lockConverter(&fMutex);

             targetCap = ucnv_fromUChars
             (
                 fConverter
                 , 0
                 , 0
                 , (const UChar*)srcText
                 , -1
                 , &err
             );
         }
     }
      else
     {
         // Copy the source to a local temp
         UChar* tmpBuf = convertToUChar(srcText);
         ArrayJanitor<UChar> janTmp(tmpBuf);

         // Use a faux scope to synchronize while we do this
         {
             XMLMutexLock lockConverter(&fMutex);

             targetCap = ucnv_fromUChars
             (
                 fConverter
                 , 0
                 , 0
                 , tmpBuf
                 , -1
                 , &err
             );
         }
     }

     if (err != U_BUFFER_OVERFLOW_ERROR)
         return 0;

     return (unsigned int)targetCap;
 }

 unsigned int ICULCPTranscoder::calcRequiredSize(const char* const srcText)
 {
     if (!srcText)
         return 0;

     int32_t targetCap;
     UErrorCode err = U_ZERO_ERROR;

     // Use a faux scope to synchronize while we do this
     {
         XMLMutexLock lockConverter(&fMutex);
         targetCap = ucnv_toUChars
         (
             fConverter
             , 0
             , 0
             , srcText
             , strlen(srcText)
             , &err
         );
     }

     if (err != U_BUFFER_OVERFLOW_ERROR)
         return 0;

 #if (U_ICU_VERSION_MAJOR_NUM < 2)
     // Subtract one since it includes the terminator space
     return (unsigned int)(targetCap - 1);
 #else
     // Starting ICU 2.0, this is fixed and all ICU String functions have consistent NUL-termination behavior.
     // The returned length is always the number of output UChar's, not counting an additional, terminating NUL.
     return (unsigned int)(targetCap);
 #endif
 }


 char* ICULCPTranscoder::transcode(const XMLCh* const toTranscode)
 {
     char* retBuf = 0;

     // Check for a couple of special cases
     if (!toTranscode)
         return retBuf;

     if (!*toTranscode)
     {
         retBuf = new char[1];
         retBuf[0] = 0;
         return retBuf;
     }

     //
     //  Get the length of the source string since we'll have to use it in
     //  a couple places below.
     //
     const unsigned int srcLen = XMLString::stringLen(toTranscode);

     //
     //  If XMLCh and UChar are not the same size, then we have to make a
     //  temp copy of the text to pass to ICU.
     //
     const UChar* actualSrc;
     UChar* ncActual = 0;
     if (sizeof(XMLCh) == sizeof(UChar))
     {
         actualSrc = (const UChar*)toTranscode;
     }
      else
     {
         // Allocate a non-const temp buf, but store it also in the actual
         ncActual = convertToUChar(toTranscode);
         actualSrc = ncActual;
     }

     // Insure that the temp buffer, if any, gets cleaned up via the nc pointer
     ArrayJanitor<UChar> janTmp(ncActual);

     // Caculate a return buffer size not too big, but less likely to overflow
     int32_t targetLen = (int32_t)(srcLen * 1.25);

     // Allocate the return buffer
     retBuf = new char[targetLen + 1];

     //
     //  Lock now while we call the converter. Use a faux block to do the
     //  lock so that it unlocks immediately afterwards.
     //
     UErrorCode err = U_ZERO_ERROR;
     int32_t targetCap;
     {
         XMLMutexLock lockConverter(&fMutex);

         targetCap = ucnv_fromUChars
         (
             fConverter
             , retBuf
             , targetLen + 1
             , actualSrc
             , -1
             , &err
         );
     }

     // If targetLen is not enough then buffer overflow might occur
     if (err == U_BUFFER_OVERFLOW_ERROR)
     {
         //
         //  Reset the error, delete the old buffer, allocate a new one,
         //  and try again.
         //
         err = U_ZERO_ERROR;
         delete [] retBuf;
         retBuf = new char[targetCap + 1];

         // Lock again before we retry
         XMLMutexLock lockConverter(&fMutex);
         targetCap = ucnv_fromUChars
         (
             fConverter
             , retBuf
             , targetCap
             , actualSrc
             , -1
             , &err
         );
     }

     if (U_FAILURE(err))
     {
         delete [] retBuf;
         return 0;
     }

     // Cap it off and return
     retBuf[targetCap] = 0;
     return retBuf;
 }

 XMLCh* ICULCPTranscoder::transcode(const char* const toTranscode)
 {
     // Watch for a few pyscho corner cases
     if (!toTranscode)
         return 0;

     if (!*toTranscode)
     {
         XMLCh* retVal = new XMLCh[1];
         retVal[0] = 0;
         return retVal;
     }

     //
     //  Get the length of the string to transcode. The Unicode string will
     //  almost always be no more chars than were in the source, so this is
     //  the best guess as to the storage needed.
     //
     const int32_t srcLen = (int32_t)strlen(toTranscode);

     // We need a target buffer of UChars to fill in
     UChar* targetBuf = 0;

     // Now lock while we do these calculations
     UErrorCode err = U_ZERO_ERROR;
     int32_t targetCap;
     {
         XMLMutexLock lockConverter(&fMutex);

         //
         //  Here we don't know what the target length will be so use 0 and
         //  expect an U_BUFFER_OVERFLOW_ERROR in which case it'd get resolved
         //  by the correct capacity value.
         //
         targetCap = ucnv_toUChars
         (
             fConverter
             , 0
             , 0
             , toTranscode
             , srcLen
             , &err
         );

         if (err != U_BUFFER_OVERFLOW_ERROR)
             return 0;

         err = U_ZERO_ERROR;
         targetBuf = new UChar[targetCap + 1];
         ucnv_toUChars
         (
             fConverter
             , targetBuf
             , targetCap
             , toTranscode
             , srcLen
             , &err
         );
     }

     if (U_FAILURE(err))
     {
         // Clean up if we got anything allocated
         delete [] targetBuf;
         return 0;
     }

     // Cap it off to make sure
     targetBuf[targetCap] = 0;

     //
     //  If XMLCh and UChar are the same size, then we can return retVal
     //  as is. Else, we have to allocate another buffer and copy the data
     //  over to it.
     //
     XMLCh* actualRet;
     if (sizeof(XMLCh) == sizeof(UChar))
     {
         actualRet = (XMLCh*)targetBuf;
     }
      else
     {
         actualRet = convertToXMLCh(targetBuf);
         delete [] targetBuf;
     }
     return actualRet;
 }


 bool ICULCPTranscoder::transcode(const  char* const     toTranscode
                                 ,       XMLCh* const    toFill
                                 , const unsigned int    maxChars)
 {
     // Check for a couple of psycho corner cases
     if (!toTranscode || !maxChars)
     {
         toFill[0] = 0;
         return true;
     }

     if (!*toTranscode)
     {
         toFill[0] = 0;
         return true;
     }

     // We'll need this in a couple of places below
     const unsigned int srcLen = strlen(toTranscode);

     //
     //  Set up the target buffer. If XMLCh and UChar are not the same size
     //  then we have to use a temp buffer and convert over.
     //
     UChar* targetBuf;
     if (sizeof(XMLCh) == sizeof(UChar))
         targetBuf = (UChar*)toFill;
     else
         targetBuf = new UChar[maxChars + 1];

     //
     //  Use a faux block to enforce a lock on the converter, which will
     //  unlock immediately after its completed.
     //
     UErrorCode err = U_ZERO_ERROR;
     {
         XMLMutexLock lockConverter(&fMutex);
         ucnv_toUChars
         (
             fConverter
             , targetBuf
             , maxChars + 1
             , toTranscode
             , srcLen
             , &err
         );
     }

     if (U_FAILURE(err))
     {
         if (targetBuf != (UChar*)toFill)
             delete [] targetBuf;
         return false;
     }

     // If the sizes are not the same, then copy the data over
     if (sizeof(XMLCh) != sizeof(UChar))
     {
         UChar* srcPtr = targetBuf;
         XMLCh* outPtr = toFill;
         while (*srcPtr)
             *outPtr++ = XMLCh(*srcPtr++);
         *outPtr = 0;

         // And delete the temp buffer
         delete [] targetBuf;
     }

     return true;
 }


 bool ICULCPTranscoder::transcode(   const   XMLCh* const    toTranscode
                                     ,       char* const     toFill
                                     , const unsigned int    maxChars)
 {
     // Watch for a few psycho corner cases
     if (!toTranscode || !maxChars)
     {
         toFill[0] = 0;
         return true;
     }

     if (!*toTranscode)
     {
         toFill[0] = 0;
         return true;
     }

     //
     //  If XMLCh and UChar are not the same size, then we have to make a
     //  temp copy of the text to pass to ICU.
     //
     const UChar* actualSrc;
     UChar* ncActual = 0;
     if (sizeof(XMLCh) == sizeof(UChar))
     {
         actualSrc = (const UChar*)toTranscode;
     }
      else
     {
         // Allocate a non-const temp buf, but store it also in the actual
         ncActual = convertToUChar(toTranscode);
         actualSrc = ncActual;
     }

     // Insure that the temp buffer, if any, gets cleaned up via the nc pointer
     ArrayJanitor<UChar> janTmp(ncActual);

     //
     //  Use a faux block to enforce a lock on the converter while we do this.
     //  It will be released immediately after its done.
     //
     UErrorCode err = U_ZERO_ERROR;
     int32_t targetCap;
     {
         XMLMutexLock lockConverter(&fMutex);
         targetCap = ucnv_fromUChars
         (
             fConverter
             , toFill
             , maxChars
             , actualSrc
             , -1
             , &err
         );
     }

     if (U_FAILURE(err))
         return false;

     toFill[targetCap] = 0;
     return true;
 }