| /********************************************************************** |
| // @@@ START COPYRIGHT @@@ |
| // |
| // Licensed to the Apache Software Foundation (ASF) under one |
| // or more contributor license agreements. See the NOTICE file |
| // distributed with this work for additional information |
| // regarding copyright ownership. The ASF licenses this file |
| // to you under the Apache License, Version 2.0 (the |
| // "License"); you may not use this file except in compliance |
| // with the License. You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, |
| // software distributed under the License is distributed on an |
| // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| // KIND, either express or implied. See the License for the |
| // specific language governing permissions and limitations |
| // under the License. |
| // |
| // @@@ END COPYRIGHT @@@ |
| **********************************************************************/ |
| |
| // |
| // This source file contains interface routines to the Open Source |
| // code character set conversion routines that are coded in C. |
| // |
| // NOTE: These routines are coded very generically so that the source |
| // for them can be used in not only the SQL/MX compiler build, |
| // but also used by the ODBC build and maybe others. |
| |
| #include <limits.h> |
| #include <iconv.h> |
| #include <stdio.h> |
| #include <stdlib.h> |
| |
| #include "multi-byte.h" |
| #include "fcconv.h" |
| #include "csconvert.h" |
| |
| #include "from_GB18030.c" |
| #include "from_GB2312.c" |
| #include "from_GBK.c" |
| |
| #define USE_OUR_MB_WC_DATA_TABLES |
| #include "UCS_jp_data.c" |
| #include "UCS_zs_data.c" |
| #include "UCS_zb_data.c" |
| #include "UCS_ko_data.c" |
| #include "mb_iconv.c" |
| #include "iconv_gen.c" |
| |
| |
| #include "mb_lconv.c" |
| #undef USE_OUR_MB_WC_DATA_TABLES |
| |
| #if 0 // Don't need these since we chose to support GBK (a superset of GB2312) instead |
| #define CODESET gb2312 |
| #define OUR_CS_GB2312_specific |
| #define OUR_CS_GBK_specific |
| #include "mb_lconv.c" |
| #undef OUR_CS_GBK_specific |
| #undef OUR_CS_GB2312_specific |
| #undef CODESET |
| #endif // Don't need these since we chose to support GBK (a superset of GB2312) instead |
| |
| #define CODESET gbk |
| #define OUR_CS_GBK_specific |
| #define OUR_CS_GB2312_specific |
| #include "mb_lconv.c" |
| #undef OUR_CS_GBK_specific |
| #undef OUR_CS_GB2312_specific |
| #undef CODESET |
| |
| #define CODESET gb18030 |
| #define OUR_CS_GB18030_specific |
| #include "mb_lconv.c" |
| #undef OUR_CS_GB18030_specific |
| #undef CODESET |
| |
| #define ENSURE_VALID_CHARSET() \ |
| { \ |
| if ( (charset == cnv_UnknownCharSet) || (charset > cnv_Last_Valid_CS) ) \ |
| return( CNV_ERR_INVALID_CS ); \ |
| } |
| |
| #define ENSURE_VALID_INPUT() \ |
| { if ( (in_bufr == NULL) || (in_len <= 0) ) return CNV_ERR_NOINPUT; } |
| |
| #define ENSURE_VALID_OUTPUT() \ |
| { if ( (out_bufr == NULL) || (out_len <= 0) ) return CNV_ERR_BUFFER_OVERRUN; } |
| |
| #define CHECK_FOR_SERIOUS_ERRORS() \ |
| { ENSURE_VALID_CHARSET(); ENSURE_VALID_INPUT(); ENSURE_VALID_OUTPUT(); } |
| |
| #define SET_TRANSLATED_CHAR_CNT() \ |
| { \ |
| if ( translated_char_cnt_p != NULL ) \ |
| *translated_char_cnt_p = (translated_char_cnt) ; \ |
| } |
| |
| #define SET_OUTPUT_DATA_LEN() \ |
| { \ |
| if ( output_data_len_p != NULL ) \ |
| *output_data_len_p = ( (char *)target - (char *)out_bufr ); \ |
| } |
| |
| #define INITIALIZE_VARIABLES() \ |
| first_untranslated_char = (char *) in_bufr; \ |
| unsigned int translated_char_cnt = 0; \ |
| unsigned char * source = (unsigned char *) in_bufr; \ |
| unsigned char * endSource = source + in_len ; \ |
| SET_TRANSLATED_CHAR_CNT(); \ |
| |
| typedef size_t (*csc_mbtowc_funcPtr) ( WChar_t *pwc, const char *ts, |
| size_t maxlen, _LC_charmap_t *hdl ) ; |
| typedef int (*csc_input_utfPtr) ( _LC_fcconv_iconv_t *, uchar_t **, int ) ; |
| typedef int (*csc_wctomb_funcPtr) ( char *s, WChar_t wc, |
| _LC_charmap_t *hdl ) ; |
| typedef int (*csc_output_utfPtr) ( _LC_fcconv_iconv_t *, uchar_t *, |
| int, ucs4_t) ; |
| |
| static const csc_mbtowc_funcPtr csc_mbtowc_ptrs[ cnv_Last_Valid_CS + 1] = { |
| NULL, // cnv_UnknownCharset |
| NULL, // cnv_UTF8 |
| NULL, // cnv_UTF16, |
| NULL, // cnv_UTF32, |
| NULL, // cnv_ISO88591 |
| Our_mbtowc_sjis_ucs4, // See Our_mbtowc_sjis_ucs4() in mb_lconv.c |
| Our_mbtowc_eucjp_ucs4, // See Our_mbtowc_eucjp_ucs4() in mb_lconv.c |
| Our_mbtowc_cp949_ucs4, // See Our_mbtowc_cp949_ucs4() in mb_lconv.c |
| Our_mbtowc_big5_ucs4, // See Our_mbtowc_big5_ucs4() in mb_lconv.c |
| __mbtowc_gbk_ucs4, // See MBTOWC() in mb_lconv.c |
| __mbtowc_gb18030_ucs4, // See MBTOWC() in mb_lconv.c |
| __mbtowc_gbk_ucs4 // See MBTOWC() in mb_lconv.c |
| }; |
| |
| static const csc_input_utfPtr csc_input_utf_ptrs[ cnv_Last_Valid_CS + 1] = { |
| NULL, // cnv_UnknownCharset |
| __input_utf8, // cnv_UTF8 |
| __input_ucs2, // cnv_UTF16, |
| __input_ucs4, // cnv_UTF32, |
| NULL, // cnv_ISO88591 |
| NULL, |
| NULL, |
| NULL, |
| NULL, |
| NULL, |
| NULL, |
| NULL, |
| }; |
| |
| static const csc_wctomb_funcPtr csc_wctomb_ptrs[ cnv_Last_Valid_CS + 1] = { |
| NULL, // cnv_UnknownCharset |
| NULL, // cnv_UTF8 |
| NULL, // cnv_UTF16, |
| NULL, // cnv_UTF32, |
| NULL, // cnv_ISO88591 |
| Our_wctomb_sjis_ucs4, // See Our_mbtowc_sjis_ucs4() in mb_lconv.c |
| Our_wctomb_eucjp_ucs4, // See Our_mbtowc_eucjp_ucs4() in mb_lconv.c |
| Our_wctomb_cp949_ucs4, // See Our_mbtowc_cp949_ucs4() in mb_lconv.c |
| Our_wctomb_big5_ucs4, // See Our_mbtowc_big5_ucs4() in mb_lconv.c |
| __wctomb_gbk_ucs4, // See MBTOWC() in mb_lconv.c |
| __wctomb_gb18030_ucs4, // See MBTOWC() in mb_lconv.c |
| __wctomb_gbk_ucs4 // See MBTOWC() in mb_lconv.c |
| }; |
| |
| static const csc_output_utfPtr csc_output_utf_ptrs[ cnv_Last_Valid_CS + 1] = { |
| NULL, // cnv_UnknownCharset |
| __output_utf8, // cnv_UTF8 |
| __output_ucs2, // cnv_UTF16, |
| __output_ucs4, // cnv_UTF32, |
| NULL, // cnv_ISO88591 |
| NULL, |
| NULL, |
| NULL, |
| NULL, |
| NULL, |
| NULL, |
| NULL, |
| }; |
| |
| // |
| // LocaleToUTF16() - Convert a string of characters in the specified |
| // character set to UTF-16. |
| // |
| int LocaleToUTF16( const enum cnv_version version , |
| const char *in_bufr , const int in_len , |
| const char *out_bufr , const int out_len , |
| enum cnv_charset charset , |
| char * & first_untranslated_char , |
| unsigned int *output_data_len_p , |
| const int cnv_flags , |
| const int addNullAtEnd_flag , |
| unsigned int * translated_char_cnt_p , |
| unsigned int max_chars_to_convert ) |
| { |
| if ( version != cnv_version1 ) |
| return CNV_ERR_INVALID_VERS; |
| |
| // Initialize some return values early ... in case we give error |
| |
| INITIALIZE_VARIABLES(); |
| |
| ucs2_t * target = (ucs2_t *) out_bufr; |
| ucs2_t * endTarget = target + ( out_len / sizeof(ucs2_t) ); |
| |
| SET_OUTPUT_DATA_LEN(); |
| |
| CHECK_FOR_SERIOUS_ERRORS(); |
| |
| // We initialize a _LC_fcconv_iconv_rec struct here. |
| // NOTE: For our purposes, the ONLY thing that |
| // must be initialized is the flags word. |
| // |
| _LC_fcconv_iconv_rec cd; |
| |
| int revBytes = cnv_flags & CNV_REVERSE_OUTBYTES; |
| cd.flags = CONV_BOM_WRITTEN | CONV_INPUT_PROCESSED | |
| (revBytes ? CONV_REVERSE_OUTBYTE : 0); |
| |
| if ( max_chars_to_convert == 0xFFFFFFFF ) |
| max_chars_to_convert = (unsigned int)in_len ; |
| // |
| // Fast path where charset is ISO88591 or a multi-byte charset. |
| // An assumption made here is that non-ASCII chars will rarely be seen. |
| // If one is found, we break out of this fast path and go down the |
| // slow path. |
| // |
| int charsetIsWide = 0; |
| if ( (charset == cnv_UTF16) || (charset == cnv_UTF32) ) |
| charsetIsWide = 1 ; |
| |
| if ( ! charsetIsWide ) |
| { |
| unsigned int UCS4 = 0; |
| |
| int maxLoopCnt = (int)( endTarget - target ); |
| if ( maxLoopCnt > in_len ) |
| maxLoopCnt = in_len ; |
| if ( maxLoopCnt > (int) max_chars_to_convert ) |
| maxLoopCnt = (int) max_chars_to_convert ; |
| |
| unsigned int maxCharToHandle = (charset == cnv_ISO88591) ? 0x0FF : 0x7F; |
| |
| if ( revBytes ) |
| { |
| while ( ( --maxLoopCnt >= 0 ) && |
| ( (UCS4 = *source) <= maxCharToHandle ) ) |
| { |
| source++; |
| *target++ = UCS4 << 8; |
| } |
| } |
| else |
| { |
| while ( ( --maxLoopCnt >= 0 ) && |
| ( (UCS4 = *source) <= maxCharToHandle ) ) |
| { |
| source++; |
| *target++ = UCS4; |
| } |
| } |
| translated_char_cnt = source - (unsigned char *)in_bufr ; |
| } |
| |
| // |
| // Slower path that handles all locales. |
| // |
| csc_mbtowc_funcPtr inputFuncPtr ; |
| csc_input_utfPtr inputFuncPtr2 = NULL; |
| |
| inputFuncPtr = csc_mbtowc_ptrs[ charset ]; |
| if ( ( inputFuncPtr == NULL ) && ( charset != cnv_ISO88591 ) ) |
| { |
| inputFuncPtr2 = csc_input_utf_ptrs[ charset ]; |
| if ( inputFuncPtr2 == NULL ) |
| return( CNV_ERR_INVALID_CS ); // Shouldn't ever happen ... |
| } |
| |
| while ( (source < endSource) && |
| (translated_char_cnt < max_chars_to_convert) ) { |
| |
| int UCS4 = 0; // Init. in loop in case new char longer than prev one. |
| |
| if ( ( (UCS4 = *source) < 0x080 ) && // If ASCII and |
| ( target < endTarget ) && // output buffer has space yet |
| ( ! charsetIsWide ) ) |
| { |
| source++ ; |
| if ( revBytes ) |
| UCS4 <<= 8; |
| *target++ = UCS4; |
| translated_char_cnt += 1; |
| } |
| else |
| { |
| size_t mblen ; |
| int ct = -1; // Init. - assume an error. |
| unsigned char * tmpsrc = source; |
| first_untranslated_char = (char *)source; //...in case char is bad |
| |
| if ( charset == cnv_ISO88591 ) { |
| UCS4 = *source; |
| mblen = 1; |
| } |
| else { |
| if ( inputFuncPtr != NULL ) |
| mblen = (*inputFuncPtr)( (WChar_t *) &UCS4, |
| (const char *)source, endSource - source, |
| NULL ); |
| else { |
| UCS4 = (*inputFuncPtr2)( &cd, &tmpsrc, endSource - source ); |
| if ( UCS4 < 0 ) mblen = -1; |
| else { |
| mblen = tmpsrc - source; |
| } |
| } |
| } |
| if ( mblen == 0 ) /* mblen==0 when data is starts with '\0' */ |
| mblen = 1; |
| if ( (mblen > 0) && (mblen < 0x7FFFFFFF) ) { |
| if ( UCS4 < 0x0000D800 ) { // If simple UCS2, just store it! |
| if ( target < endTarget ) { |
| if ( revBytes ) |
| UCS4 = ( ( UCS4 & 0x00FF ) << 8 ) | ( UCS4 >> 8 ) ; |
| |
| *target = UCS4; |
| ct = 2; |
| } |
| else ct = ERR_BUFFER_OVERRUN; |
| } |
| else { // Not simple UCS2, so call routine that can handle it |
| ct = __output_ucs2( &cd, (uchar_t *)target, |
| (endTarget - target)*sizeof(ucs2_t) , UCS4); |
| } |
| } |
| else |
| ct = mblen; /* Put error code in ct */ |
| |
| if ( ct < 0 ) { |
| // About to issue an error, so update return values first. |
| |
| SET_TRANSLATED_CHAR_CNT(); |
| SET_OUTPUT_DATA_LEN(); |
| |
| if ( ct == ERR_BUFFER_OVERRUN ) |
| return CNV_ERR_BUFFER_OVERRUN; |
| else |
| return CNV_ERR_INVALID_CHAR; |
| } |
| target += ct/sizeof( ucs2_t ) ; |
| translated_char_cnt += 1; |
| source += mblen; |
| } |
| } |
| first_untranslated_char = (char *) source; |
| SET_TRANSLATED_CHAR_CNT(); |
| |
| int rtnVal = 0; |
| if ( addNullAtEnd_flag == TRUE ) { |
| if ( target < endTarget ) { |
| *target++ = 0; // Store a 16-bit NULL |
| } |
| else { |
| rtnVal = CNV_ERR_BUFFER_OVERRUN; |
| } |
| } |
| SET_OUTPUT_DATA_LEN(); |
| return rtnVal; |
| } |
| |
| // |
| // LocaleToUTF8() - Convert a string of characters in the specified |
| // character set to UTF8. |
| // |
| int LocaleToUTF8( const enum cnv_version version , |
| const char *in_bufr , const int in_len , |
| const char *out_bufr , const int out_len , |
| enum cnv_charset charset , |
| char * & first_untranslated_char , |
| unsigned int *output_data_len_p , |
| const int addNullAtEnd_flag , |
| unsigned int * translated_char_cnt_p ) |
| { |
| if ( version != cnv_version1 ) |
| return CNV_ERR_INVALID_VERS; |
| |
| INITIALIZE_VARIABLES(); |
| |
| unsigned char * target = (unsigned char *) out_bufr; |
| unsigned char * endTarget = target + out_len ; |
| |
| SET_OUTPUT_DATA_LEN(); |
| |
| CHECK_FOR_SERIOUS_ERRORS(); |
| |
| // We initialize a _LC_fcconv_iconv_rec struct here. |
| // NOTE: For our purposes, the ONLY thing that |
| // must be initialized is the flags word. |
| // |
| _LC_fcconv_iconv_rec cd; |
| |
| cd.flags = CONV_BOM_WRITTEN | CONV_INPUT_PROCESSED ; |
| |
| // |
| // Fast path where charset is ISO88591 or a multi-byte charset. |
| // An assumption made here is that non-ASCII chars will rarely be seen. |
| // If one is found, we break out of this fast path and go down the |
| // slow path. |
| // |
| int charsetIsWide = 0; |
| if ( (charset == cnv_UTF16) || (charset == cnv_UTF32) ) |
| charsetIsWide = 1 ; |
| |
| if ( ! charsetIsWide ) |
| { |
| unsigned int UCS4 = 0; |
| |
| int maxLoopCnt = endTarget - target ; |
| if ( maxLoopCnt > in_len ) |
| maxLoopCnt = in_len ; |
| |
| while ( ( --maxLoopCnt >= 0) && ( (UCS4 = *source) < 0x080 ) ) |
| { |
| source++ ; |
| *target++ = UCS4; |
| } |
| translated_char_cnt = source - (unsigned char *)in_bufr ; |
| } |
| |
| // |
| // Slower path that handles all locales. |
| // |
| csc_mbtowc_funcPtr inputFuncPtr ; |
| csc_input_utfPtr inputFuncPtr2 = NULL; |
| |
| inputFuncPtr = csc_mbtowc_ptrs[ charset ]; |
| if ( ( inputFuncPtr == NULL ) && ( charset != cnv_ISO88591 ) ) |
| { |
| inputFuncPtr2 = csc_input_utf_ptrs[ charset ]; |
| if ( inputFuncPtr2 == NULL ) |
| return( CNV_ERR_INVALID_CS ); // Shouldn't ever happen ... |
| } |
| |
| while ( source < endSource ) { |
| |
| int UCS4 = 0; // Init. in loop in case new char longer than prev one. |
| |
| if ( ( (UCS4 = *source) < 0x080 ) && // If ASCII and |
| ( target < endTarget ) && // output buffer has space yet |
| ( ! charsetIsWide ) ) |
| { |
| source++ ; |
| *target++ = UCS4; |
| translated_char_cnt += 1; |
| } |
| else |
| { |
| size_t mblen ; |
| int ct = -1; // Init. - assume an error. |
| unsigned char * tmpsrc = source; |
| first_untranslated_char = (char *)source; //...in case char is bad |
| |
| if ( charset == cnv_ISO88591 ) { |
| UCS4 = *source; |
| mblen = 1; |
| } |
| else if ( inputFuncPtr != NULL ) |
| mblen = (*inputFuncPtr)( (WChar_t *) &UCS4, |
| (const char *)source, endSource - source, |
| NULL ); |
| else { |
| UCS4 = (*inputFuncPtr2)( &cd, &tmpsrc, endSource - source ); |
| if ( UCS4 < 0 ) mblen = -1; |
| else mblen = tmpsrc - source; |
| } |
| |
| if ( mblen == 0 ) /* mblen==0 when data is starts with '\0' */ |
| mblen = 1; |
| if ( (mblen > 0) && (mblen < 0x7FFFFFFF) ) |
| ct = __output_utf8( &cd, target, endTarget - target , UCS4); |
| else |
| ct = mblen; /* Put error code in ct */ |
| |
| if ( ct < 0 ) { |
| // About to give an error, so update return values first. |
| |
| SET_TRANSLATED_CHAR_CNT(); |
| SET_OUTPUT_DATA_LEN(); |
| |
| if ( ct == ERR_BUFFER_OVERRUN ) |
| return CNV_ERR_BUFFER_OVERRUN; |
| else |
| return CNV_ERR_INVALID_CHAR; |
| } |
| source += mblen; |
| target += ct; |
| translated_char_cnt += 1; |
| } |
| } |
| first_untranslated_char = (char *) source; |
| SET_TRANSLATED_CHAR_CNT(); |
| |
| int rtnVal = 0; |
| if ( addNullAtEnd_flag == TRUE ) { |
| if ( target < endTarget ) { |
| *target++ = 0; // Store an 8-bit NULL |
| } |
| else { |
| rtnVal = CNV_ERR_BUFFER_OVERRUN; |
| } |
| } |
| SET_OUTPUT_DATA_LEN(); |
| return rtnVal; |
| } |
| |
| // |
| // LocaleCharToUCS4() converts the FIRST char in the input string to its |
| // UCS4 value. Returns the UCS4 value at location specified AND the |
| // length of the input character in bytes as the return value. |
| // |
| int LocaleCharToUCS4( const char *in_bufr, //Ptr to Input string |
| const int in_len, //Len of Input string (bytes) |
| unsigned int *UCS4ptr , //Ptr to output location |
| enum cnv_charset charset ) //Locale Character Set |
| { |
| ENSURE_VALID_CHARSET(); |
| ENSURE_VALID_INPUT(); |
| |
| unsigned char * tmpsrc = (unsigned char *) in_bufr; |
| size_t mblen ; |
| int UCS4 = 0; |
| |
| // We initialize a _LC_fcconv_iconv_rec struct here. |
| // NOTE: For our purposes, the ONLY thing that |
| // must be initialized is the flags word. |
| // |
| _LC_fcconv_iconv_rec cd; |
| |
| cd.flags = CONV_BOM_WRITTEN | CONV_INPUT_PROCESSED ; |
| |
| if ( charset == cnv_ISO88591 ) { |
| UCS4 = *(unsigned char *)in_bufr; |
| mblen = 1; |
| } |
| else { |
| csc_mbtowc_funcPtr inputFuncPtr ; |
| csc_input_utfPtr inputFuncPtr2 ; |
| |
| inputFuncPtr = csc_mbtowc_ptrs[ charset ]; |
| if ( inputFuncPtr != NULL ) |
| mblen = (*inputFuncPtr)( (WChar_t *) &UCS4, |
| (const char *)in_bufr, in_len, NULL ); |
| else { |
| inputFuncPtr2 = csc_input_utf_ptrs[ charset ]; |
| if ( inputFuncPtr2 == NULL ) |
| return( CNV_ERR_INVALID_CS ); // Shouldn't ever happen ... |
| |
| UCS4 = (*inputFuncPtr2)( &cd, &tmpsrc, in_len ); |
| if ( UCS4 < 0 ) mblen = -1; |
| else mblen = tmpsrc - (unsigned char *)in_bufr; |
| } |
| } |
| |
| if ( mblen == 0 ) /* mblen==0 when data is starts with '\0' */ |
| mblen = 1; |
| if ( (mblen > 0) && (mblen < 0x7FFFFFFF) ) { |
| if ( UCS4ptr != NULL ) *UCS4ptr = UCS4; // Return the UCS4 value |
| return (mblen); |
| } |
| return (CNV_ERR_INVALID_CHAR); |
| } |
| |
| // |
| // UCS4ToLocaleChar() converts the UCS4 value to the specified character set |
| // and stores the character in the output buffer specified. |
| // Returns length of the output character in bytes as the return value. |
| // |
| int UCS4ToLocaleChar( const unsigned int *UCS4ptr , //Ptr to input char |
| const char *out_bufr, //Ptr to output bufr |
| const int out_len, //Len of output bufr |
| enum cnv_charset charset ) //Locale Character Set |
| { |
| char tmpspace[8]; /* big enough to ensure no buffer overflow */ |
| |
| ENSURE_VALID_CHARSET(); |
| |
| char * target = (char *) out_bufr; |
| int UCS4 = *UCS4ptr; |
| int ct = -1; |
| |
| // We initialize a _LC_fcconv_iconv_rec struct here. |
| // NOTE: For our purposes, the ONLY thing that |
| // must be initialized is the flags word. |
| // |
| _LC_fcconv_iconv_rec cd; |
| |
| cd.flags = CONV_BOM_WRITTEN | CONV_INPUT_PROCESSED ; |
| |
| if ( UCS4ptr == NULL ) |
| return CNV_ERR_NOINPUT; |
| |
| if ( charset == cnv_ISO88591 ) { |
| if ( UCS4 <= 0x0FF ) { /* If valid ISO88591 char */ |
| tmpspace[0] = UCS4; |
| ct = 1; |
| } |
| } |
| else { |
| csc_wctomb_funcPtr outputFuncPtr ; |
| csc_output_utfPtr outputFuncPtr2 ; |
| |
| outputFuncPtr = csc_wctomb_ptrs[ charset ]; |
| if ( outputFuncPtr != NULL ) |
| ct = (int) (*outputFuncPtr)( tmpspace, (WChar_t) UCS4, |
| (_LC_charmap_t *)NULL ); |
| else { |
| outputFuncPtr2 = csc_output_utf_ptrs[ charset ]; |
| if ( outputFuncPtr2 == NULL ) |
| return( CNV_ERR_INVALID_CS ); // Shouldn't ever happen ... |
| |
| ct = (*outputFuncPtr2)( &cd, (unsigned char *)(&tmpspace), |
| sizeof(tmpspace), UCS4 ); |
| } |
| } |
| |
| if ( ct < 0 ) // If Bad character or conversion error |
| return (CNV_ERR_INVALID_CHAR); |
| |
| if ( ct <= out_len ) { |
| if ( target != NULL ) { |
| char * tmpPtr = &tmpspace[0]; |
| int iii = ct; |
| while (iii-- > 0 ) |
| *target++ = *tmpPtr++; |
| } |
| return ( ct ); |
| } |
| return CNV_ERR_BUFFER_OVERRUN; |
| } |
| |
| // |
| // csc_get_subst_char() -- Get substitution char and its length |
| // |
| // Arguments: substitution_char - pointer to user's specified char |
| // tmpspace - pointer to caller's place to put the char |
| // charset - an "enum cnv_charset" value indicating the |
| // target character set |
| // |
| // Return value: Length of substitution char in bytes |
| // |
| static int csc_get_subst_char( const char * substitution_char, |
| char * tmpspace, |
| enum cnv_charset charset ) |
| { |
| int sc_ln = 1; //Default: 1-byte substitution char |
| tmpspace[0] = '?'; |
| if ( substitution_char != NULL ) { |
| if ( charset == cnv_UTF16 ) { |
| sc_ln = 2; |
| tmpspace[0] = substitution_char[0] ; |
| tmpspace[1] = substitution_char[1] ; |
| } |
| else if ( charset == cnv_UTF32 ) { |
| sc_ln = 4; |
| tmpspace[0] = substitution_char[0] ; |
| tmpspace[1] = substitution_char[1] ; |
| tmpspace[2] = substitution_char[2] ; |
| tmpspace[3] = substitution_char[3] ; |
| } |
| else { |
| // |
| // If 1st byte of substitution char string is 0, use '?'. |
| // Else, if string is 1 byte long, use it as is. |
| // Else, if string is 2 bytes long, use it as is. |
| // Else use '?'. |
| // |
| if ( ( substitution_char[0] != 0 ) && |
| ( ( substitution_char[1] == 0 ) || |
| ( substitution_char[2] == 0 ) ) ) { |
| tmpspace[0] = substitution_char[0] ; |
| tmpspace[1] = substitution_char[1] ; |
| if ( tmpspace[1] != 0 ) |
| sc_ln = 2; |
| } |
| } |
| } |
| return ( sc_ln ); |
| } |
| |
| int addVariableLengthNull( unsigned char * & target, |
| unsigned char * endTarget, |
| int len_of_NULL ) |
| { |
| if ( len_of_NULL <= (endTarget - target) ) { |
| if ( len_of_NULL >= 2 ) { |
| *target++ = 0; |
| if ( len_of_NULL == 4 ) { |
| *target++ = 0; *target++ = 0; |
| } |
| } |
| *target++ = 0; |
| return 0; |
| } |
| return CNV_ERR_BUFFER_OVERRUN; |
| } |
| |
| // |
| // UTF16ToLocale() - Convert a string of UTF-16 characters |
| // to the specified character set. |
| // |
| int UTF16ToLocale( const enum cnv_version version , |
| const char *in_bufr , const int in_len , |
| const char *out_bufr , const int out_len , |
| enum cnv_charset charset , |
| char * & first_untranslated_char , |
| unsigned int *output_data_len_p , |
| const int cnv_flags , |
| const int addNullAtEnd_flag , |
| const int allow_invalids , |
| unsigned int * translated_char_cnt_p , |
| const char *substitution_char ) |
| { |
| if ( version != cnv_version1 ) |
| return CNV_ERR_INVALID_VERS; |
| |
| INITIALIZE_VARIABLES(); |
| |
| unsigned char * target = (unsigned char *)out_bufr; |
| unsigned char * endTarget = target + out_len ; |
| |
| SET_OUTPUT_DATA_LEN(); |
| |
| CHECK_FOR_SERIOUS_ERRORS(); |
| |
| int len_of_NULL = 1; |
| int ct = 0; |
| |
| // We initialize a _LC_fcconv_iconv_rec struct here. |
| // NOTE: For our purposes, the ONLY thing that |
| // must be initialized is the flags word. |
| // |
| _LC_fcconv_iconv_rec cd; |
| |
| cd.flags = CONV_INPUT_PROCESSED | CONV_BOM_WRITTEN | |
| ((cnv_flags && CNV_REVERSE_INBYTES) ? CONV_REVERSE_INBYTE : 0); |
| |
| // |
| // Fast path where charset is ISO88591 or a multi-byte charset. |
| // An assumption made here is that non-valid chars will rarely be seen. |
| // If one is found, we break out of this fast path and go down the |
| // slow path. |
| // |
| int charsetIsWide = 0; |
| if ( (charset == cnv_UTF16) || (charset == cnv_UTF32) ) |
| charsetIsWide = 1 ; |
| |
| if ( ( ! charsetIsWide ) && |
| ( (cnv_flags & CNV_REVERSE_INBYTES) == 0 ) ) |
| { |
| unsigned int UCS4 = 0; |
| |
| int maxLoopCnt = endTarget - target ; |
| if ( maxLoopCnt > (int) ( in_len / sizeof(ucs2_t) ) ) |
| maxLoopCnt = (int) ( in_len / sizeof(ucs2_t) ) ; |
| |
| unsigned int maxCharToHandle = (charset == cnv_ISO88591) ? 0x0FF : 0x7F; |
| |
| UCS4 = *( (ucs2_t *)source ); |
| if ( cnv_flags & CNV_REVERSE_INBYTES ) |
| { |
| while ( --maxLoopCnt >= 0 ) // While more to do |
| { |
| UCS4 = *( (ucs2_t *)source ) ; |
| UCS4 = ( ( UCS4 & 0x00FF ) << 8 ) | ( UCS4 >> 8 ) ; |
| if ( UCS4 <= maxCharToHandle ) |
| { |
| source += sizeof(ucs2_t); |
| *target++ = UCS4; |
| } |
| else break; |
| } |
| } |
| else while ( ( --maxLoopCnt >= 0 ) && // While more to do and |
| ( ( UCS4 = *( (ucs2_t *)source ) ) <= maxCharToHandle ) ) |
| { |
| source += sizeof(ucs2_t); |
| *target++ = UCS4; |
| } |
| |
| translated_char_cnt = target - (unsigned char *)out_bufr ; |
| } |
| |
| // |
| // Slower path that handles all locales. |
| // |
| csc_wctomb_funcPtr outputFuncPtr ; |
| csc_output_utfPtr outputFuncPtr2 = NULL; |
| |
| outputFuncPtr = csc_wctomb_ptrs[ charset ]; |
| if ( ( outputFuncPtr == NULL ) && ( charset != cnv_ISO88591 ) ) |
| { |
| outputFuncPtr2 = csc_output_utf_ptrs[ charset ]; |
| if ( outputFuncPtr2 == NULL ) |
| return( CNV_ERR_INVALID_CS ); // Shouldn't ever happen ... |
| } |
| |
| while ( source < endSource ) { |
| |
| unsigned int UCS4 = *((ucs2_t *)source); |
| |
| if ( cnv_flags & CNV_REVERSE_INBYTES ) |
| UCS4 = ( ( UCS4 & 0x00FF ) << 8 ) | ( UCS4 >> 8 ) ; |
| |
| if ( ( UCS4 < 0x080 ) && // If ASCII and |
| ( target < endTarget ) && // there is space yet and |
| ( ! charsetIsWide ) ) // output is not wide characters |
| { |
| source += sizeof(ucs2_t); |
| *target++ = UCS4; |
| translated_char_cnt += 1 ; |
| } |
| else |
| { |
| char tmpspace[8]; /* big enough to ensure no buffer overflow */ |
| |
| first_untranslated_char = (char *) source; //...in case char is bad |
| |
| if ( UCS4 < 0xD800 ) // If simple UCS2, use it as already retrieved |
| source += sizeof(ucs2_t); |
| else |
| UCS4 = __input_ucs2( &cd, &source, endSource - source ); |
| |
| ct = -1; |
| if ( (UCS4 != ERR_INPUT_INCOMPLETE) && (UCS4 != ERR_INVALID_CHAR) ) { |
| if ( charset == cnv_ISO88591 ) { |
| if ( UCS4 <= 0x0FF ) { // If valid ISO88591 char |
| tmpspace[0] = UCS4; |
| ct = 1; |
| } |
| } |
| else { |
| if ( outputFuncPtr != NULL ) |
| ct = (*outputFuncPtr)( tmpspace, (WChar_t) UCS4, |
| (_LC_charmap_t *)NULL); |
| else { |
| ct = (*outputFuncPtr2)( &cd, (unsigned char *)(tmpspace), |
| sizeof(tmpspace), UCS4 ); |
| |
| if ( charset == cnv_UTF16 ) |
| len_of_NULL = 2; |
| else if ( charset == cnv_UTF32 ) |
| len_of_NULL = 4; |
| } |
| } |
| } |
| |
| if ( ct < 0 ) { // If Bad character or conversion error |
| if ( allow_invalids == FALSE ) { |
| SET_TRANSLATED_CHAR_CNT(); |
| SET_OUTPUT_DATA_LEN(); |
| return (CNV_ERR_INVALID_CHAR); |
| } |
| |
| ct = csc_get_subst_char( substitution_char, tmpspace , charset ); |
| |
| if ( (UCS4 == ERR_INPUT_INCOMPLETE) || (UCS4 == ERR_INVALID_CHAR) ) |
| source += 2 ; // Skip bad character |
| //else source was already incremented by __input_ucs2() |
| } |
| if ( ct <= (endTarget - target) ) { |
| char * tmpPtr = &tmpspace[0]; |
| while (ct-- > 0 ) |
| *target++ = *tmpPtr++; |
| translated_char_cnt += 1; |
| } |
| else { |
| SET_TRANSLATED_CHAR_CNT(); |
| SET_OUTPUT_DATA_LEN(); |
| return CNV_ERR_BUFFER_OVERRUN; |
| } |
| } |
| } |
| first_untranslated_char = (char *) source; |
| SET_TRANSLATED_CHAR_CNT(); |
| |
| int rtnVal = 0; |
| if ( addNullAtEnd_flag == TRUE ) { |
| rtnVal = addVariableLengthNull( target, endTarget, len_of_NULL ); |
| } |
| SET_OUTPUT_DATA_LEN(); |
| return rtnVal; |
| } |
| #define TWO_BYTE_UTF8(firstByte, src, nxtB) ( (((firstByte) & 0xE0) == 0xC0) && \ |
| (( (nxtB=(*((src)+1))) & 0xC0) == 0x80) ) |
| // |
| // UTF8ToLocale() - Convert a string of UTF8 characters |
| // to the specified character set. |
| // |
| int UTF8ToLocale( const enum cnv_version version , |
| const char *in_bufr , const int in_len , |
| const char *out_bufr , const int out_len , |
| enum cnv_charset charset , |
| char * & first_untranslated_char , |
| unsigned int *output_data_len_p , |
| const int addNullAtEnd_flag , |
| const int allow_invalids , |
| unsigned int * translated_char_cnt_p , |
| const char *substitution_char ) |
| { |
| if ( version != cnv_version1 ) |
| return CNV_ERR_INVALID_VERS; |
| |
| INITIALIZE_VARIABLES(); |
| |
| unsigned char * target = (unsigned char *)out_bufr; |
| unsigned char * endTarget = target + out_len ; |
| |
| SET_OUTPUT_DATA_LEN(); |
| |
| CHECK_FOR_SERIOUS_ERRORS(); |
| |
| int len_of_NULL = 1; |
| int ct = 0; |
| |
| // We initialize a _LC_fcconv_iconv_rec struct here. |
| // NOTE: For our purposes, the ONLY thing that |
| // must be initialized is the flags word. |
| // |
| _LC_fcconv_iconv_rec cd; |
| |
| cd.flags = CONV_BOM_WRITTEN | CONV_INPUT_PROCESSED ; |
| |
| // |
| // Fast path where charset is ISO88591 or a multi-byte charset. |
| // An assumption made here is that invalid chars will rarely be seen. |
| // If one is found, we break out of this fast path and go down the |
| // slow path. |
| // |
| int charsetIsWide = 0; |
| if ( (charset == cnv_UTF16) || (charset == cnv_UTF32) ) |
| charsetIsWide = 1 ; |
| |
| if ( ! charsetIsWide ) |
| { |
| unsigned int UCS4 = 0; |
| |
| int maxLoopCnt = endTarget - target ; |
| if ( maxLoopCnt > in_len ) |
| maxLoopCnt = in_len ; |
| |
| while ( --maxLoopCnt >= 0 ) |
| { |
| // If character is valid ASCII |
| if ( (UCS4 = *source) < 0x080 ) { |
| source++; |
| *target++ = UCS4; |
| } |
| else |
| { |
| if (charset != cnv_ISO88591) |
| // Let slower path handle the rest of the buffer. |
| break; |
| |
| int nxtByte = 0; |
| if ( ( maxLoopCnt > 0 ) && TWO_BYTE_UTF8( UCS4, source, nxtByte ) ) |
| { |
| // Convert from UTF8 to UCS4. |
| UCS4 = (UCS4 & 0x1F) << 6 | ( nxtByte & 0x3F ); |
| if ( UCS4 > 0x0FF ) |
| break; // Non-ISO88591. Let slower path handle the rest. |
| source += 2 ; |
| *target++ = UCS4; |
| if ( maxLoopCnt > (int) ( endSource - source ) ) |
| maxLoopCnt-- ; // Ensure we don't overrun input buffer |
| } |
| else break; // Let slower path handle the rest. |
| } |
| } |
| translated_char_cnt = target - (unsigned char *)out_bufr ; |
| } |
| else if ( charset == cnv_UTF16 ) |
| { |
| unsigned int UCS4 = 0; |
| while ( ( source < endSource ) && // more input and |
| ( ( UCS4 = *source ) < 0x080 ) && // it is ASCII and |
| ( (endTarget - target) >= 2 ) ) // there is space left |
| { |
| *((ucs2_t *)target) = UCS4; |
| source++; |
| target += sizeof(ucs2_t); |
| } |
| translated_char_cnt = source - (unsigned char *)in_bufr; |
| len_of_NULL = 2; |
| } |
| |
| // |
| // Slower path that handles all locales. |
| // |
| csc_wctomb_funcPtr outputFuncPtr ; |
| csc_output_utfPtr outputFuncPtr2 = NULL; |
| |
| outputFuncPtr = csc_wctomb_ptrs[ charset ]; |
| if ( ( outputFuncPtr == NULL ) && ( charset != cnv_ISO88591 ) ) |
| { |
| outputFuncPtr2 = csc_output_utf_ptrs[ charset ]; |
| if ( outputFuncPtr2 == NULL ) |
| return( CNV_ERR_INVALID_CS ); // Shouldn't ever happen ... |
| } |
| |
| while ( source < endSource ) { |
| |
| unsigned int UCS4 = *source; |
| if ( ( UCS4 < 0x080 ) && // If ASCII and |
| ( target < endTarget ) && // there is space yet and |
| ( ! charsetIsWide ) ) // output is not wide characters |
| { |
| *target++ = UCS4; |
| source++; |
| translated_char_cnt += 1; |
| } |
| else |
| { |
| char tmpspace[8]; /* big enough to ensure no buffer overflow */ |
| |
| first_untranslated_char = (char *) source; //...in case char is bad |
| |
| int UCS4 = __input_utf8( &cd, &source, endSource - source); |
| |
| ct = -1; |
| if ( (UCS4 != ERR_INPUT_INCOMPLETE) && (UCS4 != ERR_INVALID_CHAR) ) { |
| if ( charset == cnv_ISO88591 ) { |
| if ( UCS4 <= 0x0FF ) { // If valid ISO88591 char |
| tmpspace[0] = UCS4; |
| ct = 1; |
| } |
| } |
| else { |
| if ( outputFuncPtr != NULL ) |
| ct = (*outputFuncPtr)( tmpspace, (WChar_t) UCS4, NULL ); |
| else { |
| ct = (*outputFuncPtr2)( &cd, (unsigned char *)(tmpspace), |
| sizeof(tmpspace), UCS4); |
| |
| if ( charset == cnv_UTF16 ) |
| len_of_NULL = 2; |
| else if ( charset == cnv_UTF32 ) |
| len_of_NULL = 4; |
| } |
| } |
| } |
| |
| if ( ct < 0 ) { // If Bad character or conversion error |
| if ( allow_invalids == FALSE ) { |
| SET_TRANSLATED_CHAR_CNT(); |
| SET_OUTPUT_DATA_LEN(); |
| return (CNV_ERR_INVALID_CHAR); |
| } |
| |
| ct = csc_get_subst_char( substitution_char, tmpspace , charset ); |
| |
| if ( (UCS4 == ERR_INPUT_INCOMPLETE) || (UCS4 == ERR_INVALID_CHAR) ) |
| source += 1 ; // Skip bad character |
| //else source was already incremented by __input_utf8() |
| } |
| if ( ct <= (endTarget - target) ) { |
| char * tmpPtr = &tmpspace[0]; |
| translated_char_cnt += 1; |
| while (ct-- > 0 ) |
| *target++ = *tmpPtr++; |
| } |
| else { |
| SET_TRANSLATED_CHAR_CNT(); |
| SET_OUTPUT_DATA_LEN(); |
| return CNV_ERR_BUFFER_OVERRUN; |
| } |
| } |
| } |
| first_untranslated_char = (char *) source; |
| SET_TRANSLATED_CHAR_CNT(); |
| |
| int rtnVal = 0; |
| if ( addNullAtEnd_flag == TRUE ) { |
| rtnVal = addVariableLengthNull( target, endTarget, len_of_NULL ); |
| } |
| SET_OUTPUT_DATA_LEN(); |
| return rtnVal; |
| } |
| |
| |
| int lightValidateUTF8Str(const char *bufr, |
| int in_len, |
| int max_chars, |
| int ignore_trailing_blanks) |
| { |
| unsigned char c; |
| int pos = 0; |
| int numc = 0; |
| int maxc = ( max_chars ? max_chars : in_len ); |
| int byte = 1; |
| int last_good_pos = 0; |
| |
| if ( (in_len < 0) || (max_chars < 0) ) // Defensive programming: Ensure no memory access exceptions. |
| return -1; // Shouldn't ever happen, of course. |
| |
| while (pos < in_len && numc < maxc) |
| { |
| c = bufr[pos]; |
| |
| if (c < 0x80 && byte == 1) // ascii |
| numc++; |
| else if (c >= 0x80 && c < 0xc0 && byte > 1) // second, third, or fourth byte of a multi-byte sequence |
| { |
| if (--byte == 1) |
| numc++; |
| } |
| else if (c >= 0xc0 && c < 0xe0 && byte == 1) // start of 2-byte sequence |
| byte = 2; |
| else if (c >= 0xe0 && c < 0xf0 && byte == 1) // start of 3-byte sequence |
| byte = 3; |
| else if (c >= 0xf0 && c < 0xfc && byte == 1) // start of 4-byte sequence |
| byte = 4; |
| else |
| return -1; // invalid byte sequence |
| |
| pos++; |
| } |
| |
| if (byte == 1 && numc <= maxc) |
| return pos; // string is valid and has valid char count, pos == in_len |
| |
| // We encountered too many characters or a partial character. The string |
| // bufr[0..pos-1] contains numc entire characters and maybe one partial character. |
| |
| // check whether the extra characters are all blanks and it's safe to ignore them |
| if (ignore_trailing_blanks && byte == 1) |
| { |
| int blankPos = pos-1; // the previous character is already past the char. limit |
| |
| while (blankPos < in_len && bufr[blankPos] == ' ') |
| blankPos++; |
| |
| if (blankPos >= in_len) |
| return in_len; // extra chars were all blanks |
| } |
| |
| // back up until the end of the valid characters |
| |
| while (byte > 1 || numc > maxc) |
| { |
| pos--; |
| c = bufr[pos]; |
| |
| if (c < 0x80 || c >= 0xc0) |
| { |
| // this is the first byte of a character |
| if (byte > 1) |
| byte = 1; |
| else |
| numc--; |
| } |
| } |
| |
| return pos; // string needs to be truncated at position "pos" (to length "pos") |
| } |
| |
| #if 0 /* Not currently called anywhere.*/ |
| int lightValidateUTF8StrAndPad(char *bufr, |
| int in_len, |
| int max_chars, |
| int ignore_trailing_blanks) |
| { |
| int trunc = lightValidateUTF8Str(bufr, in_len, max_chars, ignore_trailing_blanks); |
| |
| if (trunc < in_len && trunc >= 0) |
| { |
| for (int i=trunc; i<in_len; i++) |
| bufr[i] = ' '; |
| } |
| |
| return trunc; |
| } |
| #endif /* Not currently called anywhere.*/ |
| |
| int fillWithMinUTF8Chars(char *bufr, |
| int in_len, |
| int max_chars) |
| { |
| int i; |
| |
| if (max_chars <= 0) |
| max_chars = in_len; |
| |
| // fill with minimum characters (NUL), up to the |
| // limit of characters |
| memset(bufr, 0, max_chars); |
| |
| // fill up the remainder with blanks, which is the |
| // convention for UTF-8 strings with a character limit |
| if (in_len > max_chars) |
| memset(&bufr[max_chars], ' ', in_len-max_chars); |
| |
| return max_chars; |
| } |
| |
| int fillWithMaxUTF8Chars(char *bufr, |
| int in_len, |
| int max_chars) |
| { |
| // max values that fit into 4,3,2 and 1 byte(s): |
| |
| // Unicode RFC 3629 limits Unicode to values up to U+10FFFF. |
| // See http://en.wikipedia.org/wiki/UTF-8 |
| |
| const char *max4 = "\xF4\x8F\xBF\xBF"; // U+10FFFF |
| const char *max3 = "\xEF\xBF\xBF"; // U+FFFF |
| const char *max2 = "\xDF\xBF"; // U+07FF |
| const char *max1 = "\x7F"; // U+7F |
| |
| int result = 0; |
| int c = 0; |
| |
| if (max_chars <= 0) |
| max_chars = in_len; |
| |
| // the highest UTF8 character has 4 bytes, fill up with |
| // those as much as possible |
| for (c=0; c<in_len/4 && c<max_chars; c++) |
| { |
| for (int j=0; j<4; j++) |
| bufr[4*c+j] = max4[j]; |
| result += 4; |
| } |
| |
| c *= 4; |
| |
| // then add a single 3, 2 or 1 byte character, if needed |
| if (c < in_len && c/4 < max_chars) |
| { |
| switch (in_len - c) |
| { |
| case 3: |
| bufr[c++] = max3[0]; |
| bufr[c++] = max3[1]; |
| bufr[c++] = max3[2]; |
| break; |
| |
| case 2: |
| bufr[c++] = max2[0]; |
| bufr[c++] = max2[1]; |
| break; |
| |
| case 1: |
| bufr[c++] = max1[0]; |
| break; |
| } |
| result = in_len; |
| } |
| |
| // pad with blanks beyond max_chars, if needed |
| if (result < in_len) |
| for (int b=result; b<in_len; b++) |
| bufr[b] = ' '; |
| |
| return result; |
| } |
| |
| /* A method to find the beginning of an ASCII or UTF8 char that |
| is at the end off a buffer. |
| */ |
| char * findStartOfChar( char *someByteInChar, char *startOfBuffer ) |
| { |
| char * rtnv = someByteInChar ; |
| while ( rtnv > startOfBuffer && ( ( *rtnv & 0x80 ) ) && |
| ( ( *rtnv & 0xC0 ) != 0xC0 ) ) |
| rtnv-- ; |
| return rtnv ; |
| } |
| |
| /* A method to do character set conversion , using Glibc iconv */ |
| static int charsetConvert(const char *srcCharset,const char *targetCharset,char *inputbuf, size_t inputlen, char *outbuf,size_t outlen) |
| { |
| char **ptrin = &inputbuf; |
| char **ptrout = &outbuf; |
| |
| iconv_t cd; |
| cd = iconv_open(targetCharset,srcCharset); |
| |
| if (cd==0) |
| return -1; |
| |
| if (iconv(cd,ptrin,(size_t*)&inputlen,ptrout,(size_t *)&outlen) == -1) |
| { |
| //error occurs |
| iconv_close(cd); |
| return -1; |
| } |
| |
| iconv_close(cd); |
| return outlen; |
| } |
| |
| /* convert gbk string into UTF8 */ |
| int gbkToUtf8(char* gbkString, size_t gbklen, |
| char* result ,size_t outlen, bool addNullAtEnd) |
| { |
| int originalOutlen = outlen; |
| int finalLength = charsetConvert( "gbk","utf-8", gbkString, gbklen, result, outlen); |
| |
| if (finalLength == -1 ) |
| return -1; |
| |
| if ( addNullAtEnd ) |
| { |
| if(originalOutlen > finalLength ) |
| result[finalLength] = 0; |
| else |
| return -1; |
| } |
| |
| return finalLength; |
| } |