| /* |
| * The Apache Software License, Version 1.1 |
| * |
| * Copyright (c) 1999 The Apache Software Foundation. All rights |
| * reserved. |
| * |
| * Redistribution and use in source and binary forms, with or without |
| * modification, are permitted provided that the following conditions |
| * are met: |
| * |
| * 1. Redistributions of source code must retain the above copyright |
| * notice, this list of conditions and the following disclaimer. |
| * |
| * 2. Redistributions in binary form must reproduce the above copyright |
| * notice, this list of conditions and the following disclaimer in |
| * the documentation and/or other materials provided with the |
| * distribution. |
| * |
| * 3. The end-user documentation included with the redistribution, |
| * if any, must include the following acknowledgment: |
| * "This product includes software developed by the |
| * Apache Software Foundation (http://www.apache.org/)." |
| * Alternately, this acknowledgment may appear in the software itself, |
| * if and wherever such third-party acknowledgments normally appear. |
| * |
| * 4. The names "Xerces" and "Apache Software Foundation" must |
| * not be used to endorse or promote products derived from this |
| * software without prior written permission. For written |
| * permission, please contact apache\@apache.org. |
| * |
| * 5. Products derived from this software may not be called "Apache", |
| * nor may "Apache" appear in their name, without prior written |
| * permission of the Apache Software Foundation. |
| * |
| * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED |
| * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES |
| * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE |
| * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR |
| * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
| * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
| * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF |
| * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND |
| * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, |
| * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT |
| * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
| * SUCH DAMAGE. |
| * ==================================================================== |
| * |
| * This software consists of voluntary contributions made by many |
| * individuals on behalf of the Apache Software Foundation, and was |
| * originally based on software copyright (c) 1999, International |
| * Business Machines, Inc., http://www.ibm.com . For more information |
| * on the Apache Software Foundation, please see |
| * <http://www.apache.org/>. |
| */ |
| |
| /** |
| * $Log$ |
| * Revision 1.3 1999/11/18 20:16:52 abagchi |
| * Now works with ICU 1.3.1 |
| * |
| * Revision 1.2 1999/11/17 22:36:41 rahulj |
| * Code works with ICU transcoding service |
| * |
| * Revision 1.1.1.1 1999/11/09 01:06:07 twl |
| * Initial checkin |
| * |
| * Revision 1.3 1999/11/08 20:45:33 rahul |
| * Swat for adding in Product name and CVS comment log variable. |
| * |
| */ |
| |
| |
| // --------------------------------------------------------------------------- |
| // Includes |
| // --------------------------------------------------------------------------- |
| #include <util/TranscodingException.hpp> |
| #include "ICUTransService.hpp" |
| #include <string.h> |
| #include <uloc.h> |
| #include <unicode.h> |
| #include <ucnv.h> |
| #include <ustring.h> |
| |
| |
| |
| // --------------------------------------------------------------------------- |
| // ICUTransService: Public, static methods |
| // --------------------------------------------------------------------------- |
| void ICUTransService::setICUPath(const char* const pathToSet) |
| { |
| uloc_setDataDirectory(pathToSet); |
| } |
| |
| |
| // --------------------------------------------------------------------------- |
| // ICUTransService: Constructors and Destructor |
| // --------------------------------------------------------------------------- |
| ICUTransService::ICUTransService() |
| { |
| } |
| |
| ICUTransService::~ICUTransService() |
| { |
| } |
| |
| |
| // --------------------------------------------------------------------------- |
| // ICUTransService: The virtual transcoding service API |
| // --------------------------------------------------------------------------- |
| int ICUTransService::compareIString(const XMLCh* const comp1 |
| , const XMLCh* const comp2) |
| { |
| const XMLCh* psz1 = comp1; |
| const XMLCh* psz2 = comp2; |
| |
| unsigned int curCount = 0; |
| while (true) |
| { |
| // If an inequality, then return the difference |
| if (Unicode::toUpperCase(*psz1) != Unicode::toUpperCase(*psz2)) |
| return int(*psz1) - int(*psz2); |
| |
| // If either has ended, then they both ended, so equal |
| if (!*psz1 || !*psz2) |
| break; |
| |
| // Move upwards for the next round |
| psz1++; |
| psz2++; |
| } |
| return 0; |
| } |
| |
| |
| int ICUTransService::compareNIString(const XMLCh* const comp1 |
| , const XMLCh* const comp2 |
| , const unsigned int maxChars) |
| { |
| const XMLCh* psz1 = comp1; |
| const XMLCh* psz2 = comp2; |
| |
| unsigned int curCount = 0; |
| while (true) |
| { |
| // If an inequality, then return difference |
| if (Unicode::toUpperCase(*psz1) != Unicode::toUpperCase(*psz2)) |
| return int(*psz1) - int(*psz2); |
| |
| // If either ended, then both ended, so equal |
| if (!*psz1 || !*psz2) |
| break; |
| |
| // Move upwards to next chars |
| psz1++; |
| psz2++; |
| |
| // |
| // Bump the count of chars done. If it equals the count then we |
| // are equal for the requested count, so break out and return |
| // equal. |
| // |
| curCount++; |
| if (maxChars == curCount) |
| break; |
| } |
| return 0; |
| } |
| |
| |
| bool ICUTransService::isSpace(const XMLCh toCheck) const |
| { |
| return (Unicode::isSpaceChar(toCheck) != 0); |
| } |
| |
| |
| XMLTranscoder* ICUTransService::makeNewDefTranscoder() |
| { |
| // |
| // Try to create a default converter. If it fails, return a null pointer |
| // which will basically cause the system to give up because we really can't |
| // do anything without one. |
| // |
| UErrorCode uerr = U_ZERO_ERROR; |
| UConverter* converter = ucnv_open(NULL, &uerr); |
| if (!converter) |
| return 0; |
| |
| // That went ok, so create an ICU transcoder wrapper and return it |
| return new ICUTranscoder(converter, 0); |
| } |
| |
| |
| XMLTranscoder* |
| ICUTransService::makeNewTranscoderFor( const XMLCh* const encodingName |
| , XMLTransService::Codes& resValue |
| , const unsigned int blockSize) |
| { |
| UErrorCode uerr = U_ZERO_ERROR; |
| UConverter* converter = ucnv_openU(encodingName, &uerr); |
| if (!converter) |
| { |
| resValue = XMLTransService::UnsupportedEncoding; |
| return 0; |
| } |
| return new ICUTranscoder(converter, blockSize); |
| } |
| |
| |
| |
| |
| // --------------------------------------------------------------------------- |
| // ICUTranscoder: Constructors and Destructor |
| // --------------------------------------------------------------------------- |
| ICUTranscoder::ICUTranscoder( UConverter* const toAdopt |
| , const unsigned int blockSize) : |
| fCharOfsBuf(0) |
| , fConverter(toAdopt) |
| { |
| // There won't be a block size if this is for a default transcoder |
| if (blockSize) |
| fCharOfsBuf = new long[blockSize]; |
| } |
| |
| ICUTranscoder::~ICUTranscoder() |
| { |
| delete [] fCharOfsBuf; |
| |
| // If there is a converter, ask ICU to clean it up |
| if (fConverter) |
| { |
| // <TBD> Does this actually delete the structure??? |
| ucnv_close(fConverter); |
| fConverter = 0; |
| } |
| } |
| |
| |
| // --------------------------------------------------------------------------- |
| // ICUTranscoder: The virtual transcoder API |
| // --------------------------------------------------------------------------- |
| unsigned int ICUTranscoder::calcRequiredSize(const XMLCh* const srcText) |
| { |
| if (!srcText) |
| return 0; |
| |
| XMLMutexLock lockConverter(&fMutex); |
| |
| UErrorCode err = U_ZERO_ERROR; |
| const int32_t targetCap = ucnv_fromUChars |
| ( |
| fConverter |
| , 0 |
| , 0 |
| , srcText |
| , &err |
| ); |
| |
| if (err != U_BUFFER_OVERFLOW_ERROR) |
| return 0; |
| |
| return (unsigned int)targetCap; |
| } |
| |
| unsigned int ICUTranscoder::calcRequiredSize(const char* const srcText) |
| { |
| if (!srcText) |
| return 0; |
| |
| XMLMutexLock lockConverter(&fMutex); |
| |
| UErrorCode err = U_ZERO_ERROR; |
| const int32_t targetCap = ucnv_toUChars |
| ( |
| fConverter |
| , 0 |
| , 0 |
| , srcText |
| , strlen(srcText) |
| , &err |
| ); |
| |
| if (err != U_BUFFER_OVERFLOW_ERROR) |
| return 0; |
| |
| // Subtract one since it includes the terminator space |
| return (unsigned int)(targetCap - 1); |
| } |
| |
| |
| XMLCh ICUTranscoder::transcodeOne( const char* const srcData |
| , const unsigned int srcBytes |
| , unsigned int& bytesEaten) |
| { |
| // Check for stupid stuff |
| if (!srcBytes) |
| return 0; |
| |
| XMLMutexLock lockConverter(&fMutex); |
| |
| UErrorCode err = U_ZERO_ERROR; |
| const char* startSrc = srcData; |
| const XMLCh chRet = ucnv_getNextUChar |
| ( |
| fConverter |
| , &startSrc |
| , (srcData + srcBytes) - 1 |
| , &err |
| ); |
| |
| // Bail out if an error |
| if (U_FAILURE(err)) |
| return 0; |
| |
| // Calculate the bytes eaten and return the char |
| bytesEaten = startSrc - srcData; |
| return chRet; |
| } |
| |
| |
| char* ICUTranscoder::transcode(const XMLCh* const toTranscode) |
| { |
| char* retBuf = 0; |
| |
| // Check for a couple of special cases |
| if (!toTranscode) |
| return 0; |
| |
| if (!*toTranscode) |
| { |
| retBuf = new char[1]; |
| retBuf[0] = 0; |
| return retBuf; |
| } |
| |
| XMLMutexLock lockConverter(&fMutex); |
| |
| // Caculate a return buffer size not too big, but less likely to overflow |
| int32_t targetLen = (int32_t)(u_strlen(toTranscode) * 1.25); |
| |
| // Allocate the return buffer |
| retBuf = new char[targetLen + 1]; |
| |
| //Convert the Unicode string to char* using Intl stuff |
| UErrorCode err = U_ZERO_ERROR; |
| int32_t targetCap = ucnv_fromUChars |
| ( |
| fConverter |
| , retBuf |
| , targetLen + 1 |
| , toTranscode |
| , &err |
| ); |
| |
| // If targetLen is not enough then buffer overflow might occur |
| if (err == U_BUFFER_OVERFLOW_ERROR) |
| { |
| // Reset the error, delete the old buffer, allocate a new one, and try again |
| err = U_ZERO_ERROR; |
| delete [] retBuf; |
| retBuf = new char[targetCap]; |
| targetCap = ucnv_fromUChars(fConverter, retBuf, targetCap, toTranscode, &err); |
| } |
| |
| if (U_FAILURE(err)) |
| { |
| delete [] retBuf; |
| return 0; |
| } |
| |
| // Cap it off and return |
| retBuf[targetCap] = 0; |
| return retBuf; |
| } |
| |
| |
| bool ICUTranscoder::transcode( const XMLCh* const toTranscode |
| , char* const toFill |
| , const unsigned int maxChars) |
| { |
| // Watch for a few psycho corner cases |
| if (!toTranscode || !maxChars) |
| { |
| toFill[0] = 0; |
| return true; |
| } |
| |
| if (!*toTranscode) |
| { |
| toFill[0] = 0; |
| return true; |
| } |
| |
| XMLMutexLock lockConverter(&fMutex); |
| |
| UErrorCode err = U_ZERO_ERROR; |
| int32_t targetCap; |
| targetCap = ucnv_fromUChars(fConverter, toFill, maxChars + 1, toTranscode, &err); |
| |
| if (U_FAILURE(err)) |
| return false; |
| |
| return true; |
| } |
| |
| |
| XMLCh* ICUTranscoder::transcode(const char* const toTranscode) |
| { |
| // Watch for a few pyscho corner cases |
| if (!toTranscode) |
| return 0; |
| |
| XMLCh* retVal = 0; |
| if (!*toTranscode) |
| { |
| retVal = new XMLCh[1]; |
| retVal[0] = 0; |
| return retVal; |
| } |
| |
| XMLMutexLock lockConverter(&fMutex); |
| |
| // |
| // Get the length of the string to transcode. The Unicode string will |
| // almost always be no more chars than were in the source, so this is |
| // the best guess as to the storage needed. |
| // |
| const int32_t srcLen = (int32_t)strlen(toTranscode); |
| |
| // |
| // Here we don't know what the target length will be so use 0 and expect |
| // an U_BUFFER_OVERFLOW_ERROR in which case it'd get resolved by the |
| // correct capacity value. |
| // |
| UErrorCode err = U_ZERO_ERROR; |
| int32_t targetCap; |
| targetCap = ucnv_toUChars |
| ( |
| fConverter |
| , 0 |
| , 0 |
| , toTranscode |
| , srcLen |
| , &err |
| ); |
| |
| if (err != U_BUFFER_OVERFLOW_ERROR) |
| return 0; |
| |
| err = U_ZERO_ERROR; |
| retVal = new XMLCh[targetCap]; |
| ucnv_toUChars |
| ( |
| fConverter |
| , retVal |
| , targetCap |
| , toTranscode |
| , srcLen |
| , &err |
| ); |
| |
| if (U_FAILURE(err)) |
| return 0; |
| |
| return retVal; |
| } |
| |
| |
| bool ICUTranscoder::transcode( const char* const toTranscode |
| , XMLCh* const toFill |
| , const unsigned int maxChars) |
| { |
| // Check for a couple of psycho corner cases |
| if (!toTranscode || !maxChars) |
| { |
| toFill[0] = 0; |
| return true; |
| } |
| |
| if (!*toTranscode) |
| { |
| toFill[0] = 0; |
| return true; |
| } |
| |
| XMLMutexLock lockConverter(&fMutex); |
| |
| UErrorCode err = U_ZERO_ERROR; |
| const int32_t srcLen = (int32_t)strlen(toTranscode); |
| |
| ucnv_toUChars |
| ( |
| fConverter |
| , toFill |
| , maxChars + 1 |
| , toTranscode |
| , srcLen |
| , &err |
| ); |
| |
| if (U_FAILURE(err)) |
| return false; |
| return true; |
| } |
| |
| |
| unsigned int |
| ICUTranscoder::transcodeXML(const char* const srcData |
| , const unsigned int srcCount |
| , XMLCh* const toFill |
| , const unsigned int maxChars |
| , unsigned int& bytesEaten) |
| { |
| // |
| // If the input encoding uses fixed size characters, we can use a |
| // simpler, faster approach to computing the character sizes to be |
| // returned in the charSizes array. |
| // |
| const int maxCharSize = ucnv_getMaxCharSize(fConverter); |
| const int minCharSize = ucnv_getMinCharSize(fConverter); |
| |
| // |
| // Set up pointers to the source and destination buffers. |
| // |
| UChar* startTarget = toFill; |
| const char* startSrc = srcData; |
| const char* endSrc = srcData + srcCount; |
| |
| // |
| // Transoode the buffer. Buffer overflow errors are normal, occuring |
| // when the raw input buffer holds more characters than will fit |
| // in the Unicode output buffer. |
| // |
| UErrorCode err = U_ZERO_ERROR; |
| ucnv_toUnicode |
| ( |
| fConverter |
| , &startTarget |
| , toFill + maxChars |
| , &startSrc |
| , endSrc |
| , 0 |
| , false |
| , &err |
| ); |
| |
| if ((err != U_ZERO_ERROR) && (err != U_INDEX_OUTOFBOUNDS_ERROR)) |
| ThrowXML(TranscodingException, XML4CExcepts::Trans_CouldNotXCodeXMLData); |
| |
| // Calculate the bytes eaten and store in caller's param |
| bytesEaten = startSrc - srcData; |
| |
| // Return the chars we put into the target buffer |
| return startTarget - toFill; |
| } |