| //////////////////////////////////////////////////////////////////////////////// | |
| // | |
| // Licensed to the Apache Software Foundation (ASF) under one or more | |
| // contributor license agreements. See the NOTICE file distributed with | |
| // this work for additional information regarding copyright ownership. | |
| // The ASF licenses this file to You under the Apache License, Version 2.0 | |
| // (the "License"); you may not use this file except in compliance with | |
| // the License. You may obtain a copy of the License at | |
| // | |
| // http://www.apache.org/licenses/LICENSE-2.0 | |
| // | |
| // Unless required by applicable law or agreed to in writing, software | |
| // distributed under the License is distributed on an "AS IS" BASIS, | |
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
| // See the License for the specific language governing permissions and | |
| // limitations under the License. | |
| // | |
| //////////////////////////////////////////////////////////////////////////////// | |
| /* | |
| CharConv.cpp | |
| Author: Sudhakar Pandey <sudhakar@adobe.com> | |
| */ | |
| #include <iostream> | |
| #include <cstring> | |
| #include <stdexcept> | |
| #include "EncConv.h" | |
| #include "unicode/ucnv.h" //Adding this as an alternative to native iconv library | |
| #include "unicode/ustring.h" //Adding this as an alternative to native iconv library | |
| #ifndef MAX | |
| #define MAX(a,b) (((a)>(b))?(a):(b)) | |
| #endif /* MAX */ | |
| // ============================================================================= | |
| namespace EncConv | |
| { | |
| // ----------------------------------------------------------------------------- | |
| U16Char_t* convSpecialCharsInU16Str(const U16Char_t* src) | |
| { | |
| const LM_UInt32 srcLen = GetNumOfUnits(src); | |
| U16Char_t* pdst = new U16Char_t [srcLen + 1]; | |
| LM_UInt32 i; | |
| for(i = 0; i < srcLen; ++i) | |
| { | |
| U16Char_t c = src[i]; | |
| switch (c) | |
| { | |
| case 0x2018: // U+2018: Left Single Quotation Mark | |
| case 0x2019: // U+2019: Right Single Quotation Mark | |
| c = '\''; break; | |
| case 0x201C: // U+201C: Left Double Quotation Mark | |
| case 0x201D: // U+201D: Right Double Quotation Mark | |
| c = '"'; break; | |
| } | |
| pdst[i] = c; | |
| } | |
| pdst[i] = 0; | |
| return pdst; | |
| //delete[] pdst; | |
| //should be deleted by calling releaseU16CharString() function. | |
| } | |
| const char * getPlatformEncoding(const char* enc) | |
| { | |
| //If encoding is null or not specified then try the default encoding "ISO-8859-1" | |
| if(strlen(enc) == 0) | |
| return "ISO-8859-1"; | |
| if(strcmp(enc,"ISO8859-1") == 0) | |
| return "ISO-8859-1"; | |
| else if (strcmp(enc,"ISO8859-2") == 0) | |
| return "ISO-8859-2"; | |
| else if (strcmp(enc,"ISO8859-3") == 0) | |
| return "ISO-8859-3"; | |
| else if (strcmp(enc,"ISO8859-4") == 0) | |
| return "ISO-8859-4"; | |
| else if (strcmp(enc,"ISO8859-5") == 0) | |
| return "ISO-8859-5"; | |
| else if (strcmp(enc,"ISO8859-6") == 0) | |
| return "ISO-8859-6"; | |
| else if (strcmp(enc,"ISO8859-7") == 0) | |
| return "ISO-8859-7"; | |
| else if (strcmp(enc,"ISO8859-8") == 0) | |
| return "ISO-8859-8"; | |
| else if (strcmp(enc,"ISO8859-9") == 0) | |
| return "ISO-8859-9"; | |
| else if (strcmp(enc,"ISO8859-10") == 0) | |
| return "ISO-8859-10"; | |
| else if (strcmp(enc,"KOI8-R") == 0) | |
| return "KOI8-R"; | |
| else if (strcmp(enc,"KOI8-U") == 0) | |
| return "KOI8-U"; | |
| else if (strcmp(enc,"microsoft-cp1251") == 0) | |
| return "cp1251"; | |
| else if (strcmp(enc,"ISO8859-13") == 0) | |
| return "ISO-8859-13"; | |
| else if (strcmp(enc,"ISO8859-14") == 0) | |
| return "ISO-8859-14"; | |
| else if (strcmp(enc,"ISO8859-15") == 0) | |
| return "ISO-8859-15"; | |
| else if (strcmp(enc,"ISCII-DEVANAGARI") == 0) | |
| return "ibm-1137"; | |
| else if (strcmp(enc,"TIS620-2533") == 0) | |
| return "TIS-620"; | |
| else if (strcmp(enc,"UTF-8") == 0) | |
| return "UTF-8"; | |
| else | |
| return enc; | |
| } | |
| const std::string convU16StrToCharStr(const U16Char_t* src, const char* Encoding) | |
| { | |
| //static char const* const tocode = CHARCONV_ICONV_UTF16; | |
| char const* const tocode = getPlatformEncoding(Encoding); | |
| UErrorCode status = U_ZERO_ERROR; | |
| #ifdef ENCCONV_DEBUG | |
| std::cout << "\t" "convString" << std::endl; | |
| std::cout << "\t\t" "tocode = " << tocode << std::endl; | |
| //std::cout << "\t\t" "fromcode = " << fromcode << std::endl; | |
| #endif | |
| //iconv_t cd = iconv_open(tocode, fromcode); | |
| // Initializing ICU converter | |
| UConverter *conv = ucnv_open(tocode, &status); | |
| #ifdef CHARCONV_DEBUG | |
| std::cout << "\t\t" "aft ucnv_open: status = " << status << std::endl; | |
| #endif | |
| if (conv == NULL) | |
| { // try default encoding "ISO-8859-1" | |
| //throw std::runtime_error("Unable to create Unicode converter object"); | |
| status = U_ZERO_ERROR; | |
| conv = ucnv_open("ISO-8859-1", &status); | |
| } | |
| //still if conv is null simply return blank string | |
| if (conv == NULL) | |
| { | |
| return std::string(""); | |
| } | |
| U16Char_t const* srcWrk = src; | |
| const size_t srcSizeInUnits = GetNumOfUnits(src); | |
| const size_t srcSizeInBytes = srcSizeInUnits * sizeof(U16Char_t); | |
| const size_t dstSizeInBytes = MAX(256, (srcSizeInUnits + 1)) * 4; // How much byte buffer is needed? (UTF16 --> MBCS) | |
| char* dst = new char [dstSizeInBytes]; | |
| if(dst==NULL) return std::string(""); | |
| char* dstWrk =(char*)(dst); | |
| size_t srcLeftInBytes = srcSizeInBytes; | |
| size_t dstLeftInBytes = dstSizeInBytes - sizeof(char); | |
| status = U_ZERO_ERROR; | |
| ucnv_fromUChars(conv, dstWrk, dstLeftInBytes, (UChar*)srcWrk, -1, &status); | |
| U16Char_t* reverseConvertedVal = convCharStrToU16Str(dstWrk,Encoding); | |
| if(strcmp((char*)reverseConvertedVal,(char*)src)!=0) | |
| { | |
| EncConv::releaseU16Str(reverseConvertedVal); | |
| delete[] dst; | |
| return std::string(""); | |
| } | |
| EncConv::releaseU16Str(reverseConvertedVal); | |
| #ifdef CHARCONV_DEBUG | |
| std::cout << "\t\t" "aft iconv: status = " << status << std::endl; | |
| #endif | |
| if (status != U_ZERO_ERROR ) | |
| { | |
| // throw std::runtime_error("Unable to convert to string"); | |
| *dstWrk = 0; | |
| } | |
| std::string dst2(dst); | |
| delete[] dst; | |
| //const int err = iconv_close(cd); | |
| ucnv_close(conv); | |
| //if (err == -1) | |
| // throw std::runtime_error("Unable to deallocate iconv_t object"); | |
| return dst2; | |
| } | |
| U16Char_t* convCharStrToU16Str(const char* src, const char* Encoding) | |
| { | |
| //static char const* const tocode = CHARCONV_ICONV_UTF16; | |
| char const* const fromcode = getPlatformEncoding(Encoding); | |
| UErrorCode status = U_ZERO_ERROR; | |
| #ifdef ENCCONV_DEBUG | |
| std::cout << "\t" "convString" << std::endl; | |
| //std::cout << "\t\t" "tocode = " << tocode << std::endl; | |
| std::cout << "\t\t" "fromcode = " << fromcode << std::endl; | |
| #endif | |
| //iconv_t cd = iconv_open(tocode, fromcode); | |
| // Initializing ICU converter | |
| UConverter *conv = ucnv_open(fromcode, &status); | |
| #ifdef CHARCONV_DEBUG | |
| std::cout << "\t\t" "aft ucnv_open: status = " << status << std::endl; | |
| #endif | |
| if (conv == NULL) | |
| { // try default encoding "ISO-8859-1" | |
| //throw std::runtime_error("Unable to create Unicode converter object"); | |
| conv = ucnv_open("ISO-8859-1", &status); | |
| } | |
| char const* srcWrk = src; | |
| const size_t srcSizeInBytes = std::strlen(src); | |
| const size_t dstSizeInBytes = MAX(256, (srcSizeInBytes + 1)) * sizeof(U16Char_t); | |
| U16Char_t* dst = new U16Char_t [dstSizeInBytes / sizeof(U16Char_t)]; | |
| U16Char_t* dstWrk = dst; | |
| size_t srcLeftInBytes = srcSizeInBytes; | |
| size_t dstLeftInBytes = dstSizeInBytes - sizeof(U16Char_t); | |
| status = U_ZERO_ERROR; | |
| //still if conv is null simply return blank string | |
| if (conv == NULL) | |
| { | |
| dst[0] = NULL; | |
| return dst; | |
| } | |
| ucnv_toUChars(conv, (UChar *) dstWrk, dstLeftInBytes, (char*)srcWrk, srcLeftInBytes, &status); | |
| #ifdef CHARCONV_DEBUG | |
| std::cout << "\t\t" "aft iconv: status = " << status << std::endl; | |
| #endif | |
| if (status != U_ZERO_ERROR ) | |
| { | |
| // throw std::runtime_error("Unable to convert to string"); | |
| *dstWrk = 0; | |
| } | |
| //const int err = iconv_close(cd); | |
| ucnv_close(conv); | |
| //if (err == -1) | |
| // throw std::runtime_error("Unable to deallocate iconv_t object"); | |
| return dst; | |
| } | |
| void releaseU16Str(const U16Char_t* buf) | |
| { | |
| if(buf != NULL) | |
| { | |
| delete[] buf; | |
| buf = NULL; | |
| } | |
| return; | |
| } | |
| }// namespace | |
| // ----------------------------------------------------------------------------- | |