| /********************************************************************** |
| // @@@ START COPYRIGHT @@@ |
| // |
| // Licensed to the Apache Software Foundation (ASF) under one |
| // or more contributor license agreements. See the NOTICE file |
| // distributed with this work for additional information |
| // regarding copyright ownership. The ASF licenses this file |
| // to you under the Apache License, Version 2.0 (the |
| // "License"); you may not use this file except in compliance |
| // with the License. You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, |
| // software distributed under the License is distributed on an |
| // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| // KIND, either express or implied. See the License for the |
| // specific language governing permissions and limitations |
| // under the License. |
| // |
| // @@@ END COPYRIGHT @@@ |
| **********************************************************************/ |
| /* |
| * HISTORY |
| * $Log: mb_lconv.c,v $ |
| * Revision 1.1.10.3 2002/03/11 18:48:04 |
| * Fix QAR 02292 - mbstowcs() problem in UCS-4 locales. |
| * |
| * Revision 1.1.10.2 2002/01/28 15:20:17 |
| * Fix wctomb() & wcstombs() problem with UDC codepoints. |
| * |
| * Revision 1.1.10.1 2001/12/07 15:37:54 |
| * Remove WCBAD and MBBAD and use only BAD to avoid mixing up. |
| * Checking for UCODE first before checking for UDC index. |
| * Introduce a dummy_cell4 table to fix locale compilation failure in |
| * some @ucs4 locales. |
| * Change conversion table access methods & add support for HKSCS & cp949. |
| * |
| * Revision 1.1.6.1 2000/10/16 18:44:56 |
| * COSIX.Zulu to Yankee merge for GB18030 support. |
| * |
| * Revision 1.1.4.3 2000/10/09 21:27:09 |
| * Fix 82769 by setting *err to the MB_CUR_MAX for incomplete MB sequence. |
| * |
| * Revision 1.1.4.2 2000/10/04 20:43:40 |
| * Fix 82696 by checking for -1 before calling the IS_UCODE() macro. |
| * |
| * Revision 1.1.4.1 2000/08/07 14:33:50 |
| * Support the new GB18030 Chinese character set. |
| * Rename some macros to have a more consistent naming convention. |
| * |
| * Revision 1.1.2.1 2000/01/13 20:25:49 |
| * Multibyte methods for @ucs4 locales. |
| * |
| * Revision 1.1.3.2 1996/11/22 17:02:38 |
| * $EndLog$ |
| * |
| * Multibyte UCS-4 locale conversion module containing templates for |
| * the following locale conversion routines: |
| * |
| * - mblen() |
| * - mbtowc() |
| * - wctomb() |
| * - mbtopc() |
| * - pctomb() |
| * - mbstowcs() |
| * - wcstombs() |
| * - mbstopcs() |
| * - pcstombs() |
| * |
| * This file may be included multiple times to generate different locale |
| * conversion routines. The macro "CODESET" is assumed to be predefined. |
| * It contains the name of the codeset to be supported. |
| */ |
| |
| #ifdef USE_OUR_MB_WC_DATA_TABLES |
| |
| /* |
| // This source file contains low-level routines for converting from |
| // a character set to/from Unicode (UCS4). These routines are our |
| // replacements for the corresponding OSF routines and were created |
| // because the DEC/OSF conversion routines/conversion tables were |
| // highly dependent on use of the Private Use Area (0xE000 - 0xF7FF) |
| // that unicode.org has reserved for vendor usage. We don't want to |
| // make heavy use of the P.U.A. (at least not yet) because we want to |
| // recognize UCS4 values produced by Java or by HP-UX. Consequently, |
| // it is better for us to stick with defacto standard Unicode values |
| // for every character we can. |
| */ |
| |
| #define _LC_charmap_t int // JAC |
| |
| /* |
| * Our_mbtowc_big5_ucs4() - routine to convert from BIG5 multi-byte |
| * character string to UCS4 (a.k.a. UCS32, a.k.a. 4-byte Unicode). |
| * |
| * Arguments: pwc - pointer to target array of 4-byte UCS4 output chars. |
| * ts - pointer to input array of multi-byte BIG5 chars. |
| * maxlen - actual length (in bytes) of input array. |
| * hdl - dummy ptr to _LC_charmap_t - needed only to |
| * make our routine take the same arguments |
| * as the __mbtowc_<cs>_ucs4() routines that we |
| * generated by the MBTOWC macro. |
| * |
| * NOTE: It is the caller's responsibility to ensure output array is |
| * big enough. |
| */ |
| #define Min_BIG5_chr 0xA140 /* Min for using lookup table */ |
| #define Max_BIG5_chr 0xF9FF /* Max for using lookup table */ |
| #define BAD_UCS_VAL 0x0000FFFF |
| |
| size_t Our_mbtowc_big5_ucs4(WChar_t *pwc, const char *ts, size_t maxlen, |
| _LC_charmap_t *hdl) |
| { |
| uchar_t *s = (uchar_t *)ts ; |
| WChar_t wc ; |
| int idx ; |
| uchar_t chr1; |
| |
| if (ts == NULL) return (0); /* If no data to convert */ |
| |
| if (maxlen == (size_t)0) |
| return((size_t)-1); |
| |
| if ( (chr1 = *s) == '\0') { |
| if (pwc) *pwc = 0; /* Avoid all function calls */ |
| return (0); |
| } |
| |
| if (isascii(chr1)) |
| { |
| if (pwc) *pwc = (chr1) ; |
| return(1) ; |
| } |
| else { |
| if ( maxlen < 2 ) goto err_exit; |
| idx = ( (chr1) << 8 ) | ( *(s+1) ) ; |
| if ( (idx >= Min_BIG5_chr) && (idx <= Max_BIG5_chr) ) { |
| wc = Our_MS_BIG5_tableF[idx - Min_BIG5_chr]; |
| if ( wc == BAD_UCS_VAL ) |
| goto err_exit; |
| s += 2; |
| } |
| else goto err_exit; |
| } |
| |
| if (pwc) *pwc = wc ; |
| return((size_t)(s - (uchar_t *)ts)) ; |
| |
| err_exit: |
| return((size_t)-1); |
| } |
| |
| /* |
| * Our_mbtowc_sjis_ucs4() - routine to convert from SJIS multi-byte |
| * character string to UCS4 (a.k.a. UCS32, a.k.a. 4-byte Unicode). |
| * |
| * Arguments: pwc - pointer to target array of 4-byte UCS4 output chars. |
| * ts - pointer to input array of multi-byte SJIS chars. |
| * maxlen - actual length (in bytes) of input array. |
| * hdl - dummy ptr to _LC_charmap_t - needed only to |
| * make our routine take the same arguments |
| * as the __mbtowc_<cs>_ucs4() routines that we |
| * generated by the MBTOWC macro. |
| * |
| * NOTE: It is the caller's responsibility to ensure output array is |
| * big enough. |
| */ |
| #define Min_SJIS_chr 0x8140 /* Min for using lookup table */ |
| #define Max_SJIS_chr 0xFC4B /* Max for using lookup table */ |
| |
| size_t Our_mbtowc_sjis_ucs4(WChar_t *pwc, const char *ts, size_t maxlen, |
| _LC_charmap_t *hdl) |
| { |
| uchar_t *s = (uchar_t *)ts ; |
| WChar_t wc ; |
| int idx ; |
| uchar_t chr1; |
| |
| if (ts == NULL) return (0); /* If no data to convert */ |
| |
| if (maxlen == (size_t)0) |
| return((size_t)-1); |
| |
| if ( (chr1 = *s) == '\0') { |
| if (pwc) *pwc = 0; /* Avoid all function calls */ |
| return (0); |
| } |
| |
| if (isascii(chr1)) |
| { |
| if (pwc) *pwc = (chr1) ; |
| return(1) ; |
| } |
| |
| if ( (chr1 >= 0xA1 ) && (chr1 <= 0xDF) ) { /* Handle these algorithmically */ |
| wc = 0xFF61 + chr1 - 0xA1; |
| s += 1; |
| } |
| else if ( (chr1 >= 0xF0 ) && (chr1 <= 0xF9) ) { /* Handle UDC algorithmically */ |
| if ( maxlen < 2 ) goto err_exit; |
| idx = ( (chr1) << 8 ) | ( *(s+1) ) ; |
| if ( (idx >= 0xF040) && (idx <= 0xF9FC) ) { /* User-defined character range */ |
| if ( (idx & 0xFF) < 0x40 ) goto err_exit; /* No such char */ |
| if ( (idx & 0xFF) > 0xFC ) goto err_exit; /* No such char */ |
| if ( (idx & 0xFF) == 0x7F ) goto err_exit; /* No such char */ |
| |
| wc = ((idx & 0x0F00) >> 8) * 188 + ((idx & 0xFF) - 0x40) + 0xE000; |
| if ( (idx & 0xFF) > 0x7F ) wc -= 1; |
| s += 2; |
| } |
| else goto err_exit; |
| } |
| else { |
| if ( maxlen < 2 ) goto err_exit; |
| idx = ( (chr1) << 8 ) | ( *(s+1) ) ; |
| if ( (idx >= Min_SJIS_chr) && (idx <= Max_SJIS_chr) ) { |
| wc = Our_MS_sjis_tableF[idx - Min_SJIS_chr]; |
| if ( wc == BAD_UCS_VAL ) |
| goto err_exit; |
| s += 2; |
| } |
| else goto err_exit; |
| } |
| |
| if (pwc) *pwc = wc ; |
| return((size_t)(s - (uchar_t *)ts)) ; |
| |
| err_exit: |
| return((size_t)-1); |
| } |
| |
| /* |
| * Our_mbtowc_cp949_ucs4() - routine to convert from KSC multi-byte |
| * character string to UCS4 (a.k.a. UCS32, a.k.a. 4-byte Unicode). |
| * |
| * Arguments: pwc - pointer to target array of 4-byte UCS4 output chars. |
| * ts - pointer to input array of multi-byte KSC chars. |
| * maxlen - actual length (in bytes) of input array. |
| * hdl - dummy ptr to _LC_charmap_t - needed only to |
| * make our routine take the same arguments |
| * as the __mbtowc_<cs>_ucs4() routines that we |
| * generated by the MBTOWC macro. |
| * |
| * NOTES: It is the caller's responsibility to ensure output array is |
| * big enough. KSC is short for KS-code and is a Korean |
| * character set. cp949 is short for CodePage 949 and that |
| * is MicroSoft's codepage for the Korean character set. |
| */ |
| #define Min_KSC_chr 0x8141 /* Min for using lookup table */ |
| #define Max_KSC_chr 0xFEFE /* Max for using lookup table */ |
| |
| size_t Our_mbtowc_cp949_ucs4(WChar_t *pwc, const char *ts, size_t maxlen, |
| _LC_charmap_t *hdl) |
| { |
| uchar_t *s = (uchar_t *)ts ; |
| WChar_t wc ; |
| int idx ; |
| uchar_t chr1; |
| |
| if (ts == NULL) return (0); /* If no data to convert */ |
| |
| if (maxlen == (size_t)0) |
| return((size_t)-1); |
| |
| if ( (chr1 = *s) == '\0') { |
| if (pwc) *pwc = 0; /* Avoid all function calls */ |
| return (0); |
| } |
| |
| if (isascii(chr1)) |
| { |
| if (pwc) *pwc = (chr1) ; |
| return(1) ; |
| } |
| |
| if ( maxlen < 2 ) goto err_exit; |
| idx = ( (chr1) << 8 ) | ( *(s+1) ) ; |
| if ( (idx >= 0xC9A1) && (idx <= 0xC9FE) ) { /* Handle UDC algorithmically */ |
| wc = idx - 0xC9A1 + 0xE000; |
| s += 2; |
| } |
| else if ( (idx >= 0xFEA1) && (idx <= 0xFEFE) ) { /* Handle UDC algorithmically */ |
| wc = idx - 0xFEA1 + 0xE05E; |
| s += 2; |
| } |
| else if ( (idx >= Min_KSC_chr) && (idx <= Max_KSC_chr) ) { |
| wc = Our_MS_KSC_tableF[idx - Min_KSC_chr]; |
| if ( wc == BAD_UCS_VAL ) |
| goto err_exit; |
| s += 2; |
| } |
| else goto err_exit; |
| |
| if (pwc) *pwc = wc ; |
| return((size_t)(s - (uchar_t *)ts)) ; |
| |
| err_exit: |
| return((size_t)-1); |
| } |
| |
| /* |
| * Our_mbtowc_eucjp_ucs4() - routine to convert from EUC-JP multi-byte |
| * character string to UCS4 (a.k.a. UCS32, a.k.a. 4-byte Unicode). |
| * |
| * Arguments: pwc - pointer to target array of 4-byte UCS4 output chars. |
| * ts - pointer to input array of multi-byte EUC-JP chars. |
| * maxlen - actual length (in bytes) of input array. |
| * hdl - dummy ptr to _LC_charmap_t - needed only to |
| * make our routine take the same arguments |
| * as the __mbtowc_<cs>_ucs4() routines that we |
| * generated by the MBTOWC macro. |
| * |
| * NOTE: It is the caller's responsibility to ensure output array is |
| * big enough. |
| */ |
| #define EUCJP_ROW_LEN (0xFE - 0xA1 + 1) |
| #define EUCJP_NUM_ROWS (0xFE - 0xA1 +1) |
| |
| size_t Our_mbtowc_eucjp_ucs4(WChar_t *pwc, const char *ts, size_t maxlen, |
| _LC_charmap_t *hdl) |
| { |
| uchar_t *s = (uchar_t *)ts ; |
| WChar_t wc ; |
| int idx ; |
| uchar_t chr1; |
| |
| if (ts == NULL) return (0); /* If no data to convert */ |
| |
| if (maxlen == (size_t)0) |
| return((size_t)-1); |
| |
| if ( (chr1 = *s) == '\0') { |
| if (pwc) *pwc = 0; /* Avoid all function calls */ |
| return (0); |
| } |
| |
| if (isascii(chr1)) |
| { |
| if (pwc) *pwc = (chr1) ; |
| return(1) ; |
| } |
| |
| if ( chr1 == 0x8E ) { /* If this is first byte of chars 0x8EA1 - 0x8EDF */ |
| if ( maxlen < 2 ) goto err_exit; |
| unsigned char ch2 = *(s+1); |
| if ( (ch2 >= 0xA1) && (ch2 <= 0xDF) ) { |
| wc = *(s+1) + 0xFF61 - 0xA1; /* Algorithmically convert! */ |
| s += 2; |
| } |
| else goto err_exit; |
| } |
| else if ( chr1 == 0x8F ) { /* If this is first byte of a 3-byte char */ |
| if ( maxlen < 3 ) goto err_exit; |
| idx = ( *(s+1) << 8 ) | *(s+2) ; |
| if ( (idx >= 0xA1A1) && (idx <= 0xFEFE) && |
| ((idx & 0xFF) >= 0xA1 ) && ((idx & 0xFF) <= 0xFE )) { |
| |
| idx = ((idx >> 8)-0xA1)*EUCJP_ROW_LEN + ((idx&0xFF)-0xA1); |
| wc = Our_eucJP_tableF8F[idx]; |
| if ( wc == BAD_UCS_VAL ) |
| goto err_exit; |
| s += 3; |
| } |
| else goto err_exit; |
| } |
| else { /* Must be a regular 2-byte char */ |
| if ( maxlen < 2 ) goto err_exit; |
| idx = ( chr1 << 8 ) | *(s+1) ; |
| if ( (idx >= 0xA1A1) && (idx <= 0xFEFE) && |
| ((idx & 0xFF) >= 0xA1 ) && ((idx & 0xFF) <= 0xFE )) { |
| |
| idx = ((idx >> 8)-0xA1)*EUCJP_ROW_LEN + ((idx&0xFF)-0xA1); |
| wc = Our_eucJP_tableF00[idx]; |
| if ( wc == BAD_UCS_VAL ) |
| goto err_exit; |
| s += 2; |
| } |
| else goto err_exit; |
| } |
| |
| if (pwc) *pwc = wc ; |
| return((size_t)(s - (uchar_t *)ts)) ; |
| |
| err_exit: |
| return((size_t)-1); |
| } |
| |
| /* |
| * Our_wctomb_big5_ucs4() - routine to convert from a UCS4 character |
| * to a multi-byte BIG5 character. |
| * |
| * Arguments: s - pointer to target output string. |
| * wc - The UCS4 character to convert |
| * hdl - dummy ptr to _LC_charmap_t - needed only to |
| * make our routine take the same arguments |
| * as the __mbtowc_<cs>_ucs4() routines that we |
| * generated by the MBTOWC macro. |
| * |
| * NOTE: It is the caller's responsibility to ensure output array is |
| * big enough. |
| */ |
| #define Max_BIG5_UCS_val (0xFFFE) |
| |
| int Our_wctomb_big5_ucs4(char *s, WChar_t wc, _LC_charmap_t *hdl) |
| { |
| WChar_t mb = 0 ; |
| |
| if (s == NULL) |
| return(0) ; |
| |
| if (isascii(wc)) |
| mb = (wc) ; |
| else { /* if not an ASCII char */ |
| if ( wc <= Max_BIG5_UCS_val ) { |
| mb = Our_MS_BIG5_tableB[wc]; |
| if ( mb == 0xFFFF ) |
| mb = (WChar_t)BAD; |
| } |
| else mb = (WChar_t)BAD; |
| } |
| |
| if (mb == (WChar_t)BAD) |
| return(-1); |
| |
| if (mb < 0x100) |
| { |
| *s = (char)( mb & 0xff ); |
| return(1) ; |
| } |
| else /* Must be a 2-byte character ... BIG5 has none 3-byte or longer */ |
| { |
| *s++ = (char)( (mb >> 8) & 0xff ); |
| *s = (char)( mb & 0xff ); |
| return(2) ; |
| } |
| } |
| |
| /* |
| * Our_wctomb_cp949_ucs4() - routine to convert from a UCS4 character |
| * to a multi-byte KSC character. |
| * |
| * Arguments: s - pointer to target output string. |
| * wc - The UCS4 character to convert |
| * hdl - dummy ptr to _LC_charmap_t - needed only to |
| * make our routine take the same arguments |
| * as the __mbtowc_<cs>_ucs4() routines that we |
| * generated by the MBTOWC macro. |
| * |
| * NOTE: It is the caller's responsibility to ensure output array is |
| * big enough. |
| */ |
| #define Max_KSC_UCS_val (0xFFFE) |
| |
| int Our_wctomb_cp949_ucs4(char *s, WChar_t wc, _LC_charmap_t *hdl) |
| { |
| WChar_t mb = 0 ; |
| |
| if (s == NULL) |
| return(0) ; |
| |
| if (isascii(wc)) |
| mb = (wc) ; |
| else { /* if not an ASCII char */ |
| if ( (wc >= 0xE000) && (wc <= 0xE05D) ) { /* Handle UDC algorithmically */ |
| mb = wc - 0xE000 + 0xC9A1; |
| } |
| else if ( (wc >= 0xE05E) && (wc <= 0xE0BB) ) { /* Handle UDC algorithmically */ |
| mb = wc - 0xE05E + 0xFEA1; |
| } |
| else if ( wc <= Max_KSC_UCS_val ) { |
| mb = Our_MS_KSC_tableB[wc]; |
| if ( mb == 0xFFFF ) |
| mb = (WChar_t)BAD; |
| } |
| else mb = (WChar_t)BAD; |
| } |
| |
| if (mb == (WChar_t)BAD) |
| return(-1); |
| |
| if (mb < 0x100) |
| { |
| *s = (char)( mb & 0xff ); |
| return(1) ; |
| } |
| else /* Must be a 2-byte character ... KSC has none 3-byte or longer */ |
| { |
| *s++ = (char)( (mb >> 8) & 0xff ); |
| *s = (char)( mb & 0xff ); |
| return(2) ; |
| } |
| } |
| |
| /* |
| * Our_wctomb_sjis_ucs4() - routine to convert from a UCS4 character |
| * to a multi-byte SJIS character. |
| * |
| * Arguments: s - pointer to target output string. |
| * wc - The UCS4 character to convert |
| * hdl - dummy ptr to _LC_charmap_t - needed only to |
| * make our routine take the same arguments |
| * as the __mbtowc_<cs>_ucs4() routines that we |
| * generated by the MBTOWC macro. |
| * |
| * NOTE: It is the caller's responsibility to ensure output array is |
| * big enough. |
| */ |
| #define Max_SJIS_UCS_val (0xFFFE) |
| |
| int Our_wctomb_sjis_ucs4(char *s, WChar_t wc, _LC_charmap_t *hdl) |
| { |
| WChar_t mb = 0 ; |
| |
| if (s == NULL) |
| return(0) ; |
| |
| if (isascii(wc)) |
| mb = (wc) ; |
| else if ( (wc >= 0xE000) && ( wc <= 0xE757 ) ) { /* if user-defined char */ |
| mb = 0xF040; |
| mb += ( (wc - 0xE000) / 188 ) * 0x100; /* Get 2nd hex digit right */ |
| mb += ( (wc - 0xE000) % 188 ) ; /* Get last 2 hex digits right */ |
| if ( (mb & 0xFF) >= 0x7F ) mb += 1; /* SJIS ending with 0x7F not used */ |
| } |
| else { /* If not an ASCII char & not UDC char */ |
| if ( wc <= Max_SJIS_UCS_val ) { |
| mb = Our_MS_sjis_tableB[wc]; |
| if ( mb == 0xFFFF ) |
| mb = (WChar_t)BAD; |
| } |
| else mb = (WChar_t)BAD; |
| } |
| |
| if (mb == (WChar_t)BAD) |
| return(-1); |
| |
| if (mb < 0x100) |
| { |
| *s = (char)( mb & 0xff ); |
| return(1) ; |
| } |
| else /* Must be a 2-byte character ... SJIS has none 3-byte or longer */ |
| { |
| *s++ = (char)( (mb >> 8) & 0xff ); |
| *s = (char)( mb & 0xff ); |
| return(2) ; |
| } |
| } |
| |
| /* |
| * Our_wctomb_eucjp_ucs4() - routine to convert from a UCS4 character |
| * to a multi-byte EUC-JP character. |
| * |
| * Arguments: s - pointer to target output string. |
| * wc - The UCS4 character to convert |
| * hdl - dummy ptr to _LC_charmap_t - needed only to |
| * make our routine take the same arguments |
| * as the __mbtowc_<cs>_ucs4() routines that we |
| * generated by the MBTOWC macro. |
| * |
| * NOTE: It is the caller's responsibility to ensure output array is |
| * big enough. |
| */ |
| int Our_wctomb_eucjp_ucs4(char *s, WChar_t wc, _LC_charmap_t *hdl) |
| { |
| WChar_t mb = 0 ; |
| |
| if (s == NULL) |
| return(0) ; |
| |
| if (isascii(wc)) |
| mb = (wc) ; |
| else if ( (wc >= 0xFF61) && (wc <=0xFF9F) ) |
| mb = wc - 0xFF61 + 0x8EA1; /* Algorithmically convert! */ |
| else { |
| mb = Our_eucJP_tableB[wc]; |
| if ( mb == 0x0000FFFF ) |
| mb = (WChar_t)BAD; |
| /* |
| * If Flag bit for this Unicode Value says to prepend 0x8F |
| * then do so. |
| */ |
| if ( Our_eucJP_tableB_8F_FB[wc/32] & 1 << (31-(wc%32)) ) |
| mb |= 0x8F0000; |
| } |
| |
| if (mb == (WChar_t)BAD) |
| return(-1); |
| |
| if (mb < 0x100) |
| { |
| *s = (char)( mb & 0xff ); |
| return(1) ; |
| } |
| else if (mb < 0x10000) |
| { |
| *s++ = (char)( (mb >> 8) & 0xff ); |
| *s = (char)( mb & 0xff ); |
| return(2) ; |
| } |
| else /* Must be a 3-byte character ... EUCJP has none 4-byte or longer */ |
| { |
| *s++ = (char)( (mb >> 16) & 0xff ); |
| *s++ = (char)( (mb >> 8) & 0xff ); |
| *s = (char)( mb & 0xff ); |
| return(3) ; |
| } |
| } |
| |
| #else /* USE_OUR_MB_WC_DATA_TABLES */ |
| |
| #ifndef MB_LCONV_C |
| #define MB_LCONV_C 1 |
| |
| /* |
| * Generic macros to access the MB to WC row and cell tables |
| * Double redirection is needed here to fully resolve the macro paramter |
| * correctly. |
| */ |
| #define __MBCELL2(cs) _ ## cs ## _to_ucs_cell2 |
| #define __MBCELL4(cs) _ ## cs ## _to_ucs_cell4 |
| #define __MBROW(cs) _ ## cs ## _to_ucs_row |
| #define __MBINDEX(cs) __ ## cs ## _index |
| #define __MBDMAP(cs) cs ## _to_ucs_DMAP |
| #define __MBDMAPVAL(cs) cs ## _to_ucs_dmap |
| |
| #define _MBCELL2(cs) __MBCELL2(cs) |
| #define _MBCELL4(cs) __MBCELL4(cs) |
| #define _MBROW(cs) __MBROW (cs) |
| #define _MBROWSIZE(cs) (sizeof(__MBROW(cs))/sizeof(__MBROW(cs)[0])) |
| #define _MBINDEX(cs) __MBINDEX (cs) |
| #define _MBDMAP(cs) __MBDMAP (cs) |
| #define _MBDMAPVAL(cs) __MBDMAPVAL(cs) |
| |
| /* |
| * Generic macros to access the WC to MB row and cell tables |
| */ |
| #define __WCCELL2(cs) _ucs_to_ ## cs ## _cell2 |
| #define __WCCELL4(cs) _ucs_to_ ## cs ## _cell4 |
| #define __WCROW(cs) _ucs_to_ ## cs ## _row |
| #define __WCDMAP(cs) ucs_to_ ## cs ## _DMAP |
| #define __WCDMAPVAL(cs) ucs_to_ ## cs ## _dmap |
| |
| #define _WCCELL2(cs) __WCCELL2(cs) |
| #define _WCCELL4(cs) __WCCELL4(cs) |
| #define _WCROW(cs) __WCROW (cs) |
| #define _WCROWSIZE(cs) (sizeof(__WCROW(cs))/sizeof(__WCROW(cs)[0])) |
| #define _WCDMAP(cs) __WCDMAP (cs) |
| #define _WCDMAPVAL(cs) __WCDMAPVAL(cs) |
| |
| /* |
| * Generic MB/WC conversion routine name macros |
| */ |
| #define __MBLEN(cs) __mblen_ ## cs ## _ucs4 |
| #define __MBTOWC(cs) __mbtowc_ ## cs ## _ucs4 |
| #define __WCTOMB(cs) __wctomb_ ## cs ## _ucs4 |
| #define __MBTOPC(cs) __mbtopc_ ## cs ## _ucs4 |
| #define __MBSTOWCS(cs) __mbstowcs_ ## cs ## _ucs4 |
| #define __WCSTOMBS(cs) __wcstombs_ ## cs ## _ucs4 |
| #define __MBSTOPCS(cs) __mbstopcs_ ## cs ## _ucs4 |
| #define __UDCTOMB(cs) __UDC_to_ ## cs |
| #define __UCSTOMB(cs) __UCS_to_ ## cs |
| #define __UDCFUNC(cs) __UDC_to_ ## cs ## _func |
| #define __UCSFUNC(cs) __UCS_to_ ## cs ## _func |
| #define __WCGETVAL(cs) __wcgetval_ ## cs |
| #define __MBGETVAL(cs) __mbgetval_ ## cs |
| #define __UTF16ToMB(cs) UTF16To_ ## cs |
| #define __MBToUTF16(cs) cs ## _ToUTF16 |
| |
| #define _MBLEN(cs) __MBLEN (cs) |
| #define _MBTOWC(cs) __MBTOWC (cs) |
| #define _WCTOMB(cs) __WCTOMB (cs) |
| #define _MBTOPC(cs) __MBTOPC (cs) |
| #define _MBSTOWCS(cs) __MBSTOWCS(cs) |
| #define _WCSTOMBS(cs) __WCSTOMBS(cs) |
| #define _MBSTOPCS(cs) __MBSTOPCS(cs) |
| #define _UDCTOMB(cs) __UDCTOMB (cs) |
| #define _UCSTOMB(cs) __UCSTOMB (cs) |
| #define _UDCFUNC(cs) __UDCFUNC (cs) |
| #define _UCSFUNC(cs) __UCSFUNC (cs) |
| #define _WCGETVAL(cs) __WCGETVAL(cs) |
| #define _MBGETVAL(cs) __MBGETVAL(cs) |
| #define _UTF16ToMB(cs) __UTF16ToMB(cs) |
| #define _MBToUTF16(cs) __MBToUTF16(cs) |
| |
| /* |
| * Miscellaneous macros |
| */ |
| #define __MBCURMAX(cs) MBCURMAX_ ## cs |
| #define _MBCURMAX(cs) __MBCURMAX(cs) |
| |
| /* |
| * Character set MB_LEN_MAX macros |
| */ |
| #define MBCURMAX_big5 2 |
| #define MBCURMAX_hkscs 2 |
| #define MBCURMAX_cp949 2 |
| #define MBCURMAX_dechanyu 4 |
| #define MBCURMAX_dechanzi 2 |
| #define MBCURMAX_deckanji 2 |
| #define MBCURMAX_deckorean 2 |
| #define MBCURMAX_eucjp 3 |
| #define MBCURMAX_euckr 2 |
| #define MBCURMAX_euctw 4 |
| #define MBCURMAX_gb18030 4 |
| #define MBCURMAX_gbk 2 |
| #define MBCURMAX_sdeckanji 3 |
| #define MBCURMAX_sjis 2 |
| |
| /* |
| * Dummy pctomb() and pcstombs() routines |
| */ |
| int __pctomb_mb_ucs4 () { return(-1) ; } |
| int __pcstombs_mb_ucs4() { return(-1) ; } |
| |
| /* |
| * Dummy cell4_t table |
| */ |
| static cell4_t dummy_cell4[1] = { 0x0000 }; // initialize it |
| |
| #endif |
| |
| /* |
| * Codeset specific macros to access the MB to WC row and cell tables |
| */ |
| #undef MBCELL2 |
| #undef MBCELL4 |
| #undef MBROW |
| #undef MBROWSIZE |
| #undef MBINDEX |
| #undef MBGETASCII |
| #undef MBGETVAL |
| #undef MBDMAP |
| #undef MBDMAPVAL |
| #undef MBCURMAX |
| |
| #define MBCURMAX _MBCURMAX (CODESET) |
| #define MBCELL2 _MBCELL2 (CODESET) |
| #define MBCELL4 _MBCELL4 (CODESET) |
| #define MBROW _MBROW (CODESET) |
| #define MBROWSIZE _MBROWSIZE(CODESET) |
| #define MBDMAP _MBDMAP (CODESET) |
| #define MBDMAPVAL _MBDMAPVAL(CODESET) |
| #define MBGETVAL _MBGETVAL (CODESET) |
| #define MBINDEX(x,y) _MBINDEX (CODESET)(NULL,x,y) |
| #define MBGETASCII(c) (MBDMAPVAL ? MBCELL2[0][c] \ |
| : MBCELL2[MBROW[ROW(c)]][COL(c)]) |
| /* |
| * Codeset specific macros to access the WC to MB row and cell tables |
| */ |
| #undef WCCELL2 |
| #undef WCCELL4 |
| #undef WCROW |
| #undef WCROWSIZE |
| #undef WCGETASCII |
| #undef WCGETVAL |
| #undef WCISBAD |
| #undef WCDMAP |
| #undef WCDMAPVAL |
| |
| #define WCCELL2 _WCCELL2 (CODESET) |
| #define WCCELL4 _WCCELL4 (CODESET) |
| #define WCROW _WCROW (CODESET) |
| #define WCROWSIZE _WCROWSIZE(CODESET) |
| #define WCDMAP _WCDMAP (CODESET) |
| #define WCDMAPVAL _WCDMAPVAL(CODESET) |
| #define WCGETVAL _WCGETVAL (CODESET) |
| #define WCGETASCII(c) (WCDMAPVAL ? WCCELL2[0][c] \ |
| : WCCELL2[WCROW[ROW(c)]][COL(c)]) |
| /* |
| * Codeset specific MB/WC conversion routine name macros |
| */ |
| #undef MBLEN |
| #undef MBTOWC |
| #undef WCTOMB |
| #undef MBTOPC |
| #undef MBSTOWCS |
| #undef WCSTOMBS |
| #undef MBSTOPCS |
| #undef UDCTOMB |
| #undef UDCFUNC |
| #undef UTF16ToMB |
| #undef MBToUTF16 |
| |
| #define MBLEN _MBLEN (CODESET) |
| #define MBTOWC _MBTOWC (CODESET) |
| #define WCTOMB _WCTOMB (CODESET) |
| #define MBTOPC _MBTOPC (CODESET) |
| #define PCTOMB _PCTOMB (CODESET) |
| #define MBSTOWCS _MBSTOWCS(CODESET) |
| #define WCSTOMBS _WCSTOMBS(CODESET) |
| #define MBSTOPCS _MBSTOPCS(CODESET) |
| #define PCSTOMBS _PCSTOMBS(CODESET) |
| #define UDCTOMB _UDCTOMB (CODESET) |
| #define UCSTOMB _UCSTOMB (CODESET) |
| #define UDCFUNC _UDCFUNC (CODESET) |
| #define UCSFUNC _UCSFUNC (CODESET) |
| #define UTF16ToMB _UTF16ToMB(CODESET) |
| #define MBToUTF16 _MBToUTF16(CODESET) |
| |
| const static udcfunc_t UDCFUNC = UDCTOMB ; |
| const static udcfunc_t UCSFUNC = UCSTOMB ; |
| const static int MBDMAPVAL = MBDMAP ; |
| const static int WCDMAPVAL = WCDMAP ; |
| |
| /*-----------------------[ Internal inline functions ]-----------------------*/ |
| |
| // #pragma inline (WCGETVAL, MBGETVAL) // BRL & JAC |
| |
| /* |
| * Map a wide character code (UCS) to its multibyte format |
| */ |
| inline static WChar_t WCGETVAL(WChar_t wc) //JAC |
| { |
| int row = ROW(wc) ; |
| if ((row >= WCROWSIZE) || ((row = WCROW[row]) == UCS2_BAD)) |
| return(BAD) ; |
| if (WCCELL4 && (row > ROW_MASK)) |
| return((WCCELL4 ? WCCELL4 : dummy_cell4)[MASKROW(row)][COL(wc)]) ; |
| else |
| { |
| WChar_t mb = WCCELL2[row][COL(wc)] ; |
| return((mb == UCS2_BAD) ? BAD : mb) ; |
| } |
| } |
| |
| /* |
| * Map a multibyte index to wide character encoding |
| */ |
| inline static WChar_t MBGETVAL(int idx) //JAC |
| { |
| int row = ROW(idx) ; |
| if ((row >= MBROWSIZE) || ((row = MBROW[row]) == UCS2_BAD)) |
| return(BAD) ; |
| if (MBCELL4 && (row > ROW_MASK)) |
| return((MBCELL4 ? MBCELL4 : dummy_cell4)[MASKROW(row)][COL(idx)]) ; |
| else |
| { |
| WChar_t wc = MBCELL2[row][COL(idx)] ; //JAC |
| return((wc == UCS2_BAD) ? BAD : wc) ; |
| } |
| } |
| |
| /*--------------------[ Conversion routines start here ]---------------------*/ |
| |
| #ifdef USING_OPEN_SOURCE_MBLEN |
| int MBLEN(const char *ts, size_t maxlen, _LC_charmap_t *hdl) |
| { |
| uchar_t *s = (uchar_t *)ts ; |
| #ifdef DONT_NEED_THIS // JAC |
| int idx, row ; |
| #else |
| int idx ; |
| #endif // DONT_NEED_THIS - JAC |
| |
| if ((s == NULL) || (*s == '\0')) |
| return (0); |
| |
| /* |
| * If maxlen is zero then treat it as an illegal character - same |
| * as for the non-UCS locale. |
| */ |
| if (maxlen == (size_t)0) |
| { |
| #ifdef DONT_NEED_THIS // JAC |
| _Seterrno(EILSEQ); |
| #endif // DONT_NEED_THIS - JAC |
| return((size_t)-1); |
| } |
| |
| if (isascii(*s)) |
| return(1) ; |
| idx = MBINDEX(&s, maxlen) ; |
| if (idx == ERR_INPUT_INCOMPLETE) |
| return((size_t)-2) ; /* Input incomplete */ |
| else if ((idx == ERR_INVALID_CHAR) || |
| (!ISIDXU(idx) && !IS_UCODE(idx) && (MBGETVAL(idx) == BAD))) |
| { |
| #ifdef DONT_NEED_THIS // JAC |
| _Seterrno(EILSEQ) ; |
| #endif // DONT_NEED_THIS - JAC |
| return((size_t)-1); |
| } |
| return((size_t)(s - (uchar_t *)ts)) ; |
| } |
| #endif // USING_OPEN_SOURCE_MBLEN |
| |
| size_t MBTOWC(WChar_t *pwc, const char *ts, size_t maxlen, _LC_charmap_t *hdl) // JAC |
| { |
| uchar_t *s = (uchar_t *)ts ; |
| WChar_t wc ; //JAC |
| #ifdef DONT_NEED_THIS // JAC |
| int idx, row ; |
| #else |
| int idx ; |
| #endif // DONT_NEED_THIS - JAC |
| |
| /* |
| * If ts == NULL, return non-zero or zero if character encodings |
| * do or do not have state-dependent encodings |
| */ |
| if (ts == NULL) return (0); /* No state dependent encodings */ |
| |
| /* |
| * If maxlen is zero then treat it as an illegal character - same |
| * as for the non-UCS locale. |
| */ |
| if (maxlen == (size_t)0) |
| { |
| #ifdef DONT_NEED_THIS // JAC |
| _Seterrno(EILSEQ); |
| #endif // DONT_NEED_THIS - JAC |
| return((size_t)-1); |
| } |
| |
| if (*s == '\0') |
| { |
| /* No need to take the hit of a function call */ |
| if (pwc) *pwc = 0; |
| return (0); |
| } |
| |
| if (isascii(*s)) |
| { |
| if (pwc) *pwc = MBGETASCII(*s) ; |
| return(1) ; |
| } |
| |
| #ifdef OUR_CS_GB18030_specific /* Deal with 8431A438 and 9 separately */ |
| if ( (*s == 0x84) && ( *(s+1) == 0x31 ) && (*(s+2) == 0xA4) ) { |
| if ( *(s+3) == 0x38 ) { |
| wc = 0x0FFFE; |
| if (pwc) *pwc = wc ; |
| return((size_t)(4)) ; |
| } |
| if ( *(s+3) == 0x39 ) { |
| wc = 0x0FFFF; |
| if (pwc) *pwc = wc ; |
| return((size_t)(4)) ; |
| } |
| } |
| #endif /* OUR_CS_GB18030_specific */ |
| #if defined(OUR_CS_GBK_specific) |
| if ( *s == 0x80 ) { /* Handle Euro Sign that GBK defines as 0x80 */ |
| wc = 0x020AC; |
| if (pwc) *pwc = wc ; |
| return((size_t)(1)) ; // Return length of input char in bytes. |
| } |
| #endif /* OUR_CS_GBK_specific */ |
| #if defined(OUR_CS_GB2312_specific) || defined(OUR_CS_GBK_specific) |
| if ( *s == 0xA9 ) { /* Disallow range of UDCs - since not UDCs in GB18030 */ |
| if ( (*(s+1) >= 0x89) && (*(s+1) <= 0x95) ) |
| goto err_exit ; |
| } |
| if ( *s == 0xFE ) { /* Disallow range of UDCs - since not UDCs in GB18030 */ |
| if ( (*(s+1) >= 0x50) && (*(s+1) <= 0x9F) ) |
| goto err_exit ; |
| } |
| if ( *s == 0xA2 ) { /* Disallow 0xA2E3 UDC - since not UDC in GB18030 */ |
| if ( *(s+1) == 0xE3 ) |
| goto err_exit ; |
| } |
| #endif /* (OUR_CS_GB2312_specific) || defined(OUR_CS_GBK_specific) */ |
| |
| idx = MBINDEX(&s, maxlen) ; |
| if (idx == ERR_INPUT_INCOMPLETE) |
| return((size_t)-2) ; /* Input incomplete */ |
| else if (idx == ERR_INVALID_CHAR) |
| goto err_exit ; |
| else if (IS_UCODE(idx)) |
| wc = GET_UCODE(idx) ; |
| else if (ISIDXU(idx)) |
| wc = IDXU_UCS(idx) ; |
| else if ((wc = MBGETVAL(idx)) == BAD) |
| goto err_exit ; |
| |
| #if defined(OUR_CS_GB2312_specific) || defined(OUR_CS_GBK_specific) |
| /* |
| * NOTE: Because gb2312 and gb18030 share data tables, it is |
| * possible that MBGETVAL() returned a gb18030 char. Here |
| * we explicitly rule those out for gb2312. These rules |
| * may need changes in the future if more characters are |
| * added to gb2312. |
| */ |
| /*************************************** |
| NOTE: Even though the official GB2312 doesn't support the following |
| 5 characters, we decided to allow them because HP-UX does. |
| ***************************************/ |
| #if 0 |
| if (wc == 0x0251) goto err_exit ; |
| if (wc == 0x0261) goto err_exit ; |
| if (wc == 0x0144) goto err_exit ; |
| if (wc == 0x0148) goto err_exit ; |
| if (wc == 0x01F9) goto err_exit ; |
| #endif |
| #if defined(OUR_CS_GBK_specific) |
| /*************************************** |
| NOTE: Even though the official GBK doesn't support User-Defined chars |
| in the range U+E000 - U+0xE8FF, we decided to allow 0xE000 - 0xE765 |
| because BOTH Java and GB18030 allow them. Java allows a few more, |
| but don't see how to support those while using GB18030 tables. |
| ***************************************/ |
| if ( (wc >= 0xE766) && (wc <= 0xE8FF) && (wc != 0xE7C7) ) goto err_exit ; |
| |
| #else /* (OUR_CS_GB2312_specific) */ |
| /*************************************** |
| NOTE: Even though the official GB2312 doesn't support 0xA8BC mapping to |
| U+0xE7C7, we decided to allow it because HP-UX does. The rest of |
| the characters ruled out by the following 2 lines are not part of |
| GB2312 and not supported by HP-UX. |
| ***************************************/ |
| if ( (wc >= 0xE000) && (wc <= 0xFF00) && !(wc == 0xE7C7)) goto err_exit ; |
| if ( (wc >= 0x2170) && (wc <= 0x2179) ) goto err_exit ; |
| |
| /*************************************** |
| NOTE: DEC/OSF code maps 0xA1AA to U+0x2014. So does the SUN mappings for |
| the GB18030 character set. However, HP-UX, Java, and GNU map it |
| to U+0x2015. Yuk! HP-China tells us to go with HP-UX's way. |
| ***************************************/ |
| if ( wc == 0x2014 ) wc = 0x2015; |
| #endif |
| |
| #endif /* defined(OUR_CS_GB2312_specific) || defined(OUR_CS_GBK_specific) */ |
| |
| if (pwc) *pwc = wc ; |
| return((size_t)(s - (uchar_t *)ts)) ; |
| |
| err_exit: |
| #ifdef DONT_NEED_THIS // JAC |
| _Seterrno(EILSEQ) ; |
| #endif // DONT_NEED_THIS - JAC |
| return((size_t)-1); |
| } |
| |
| int WCTOMB(char *s, WChar_t wc, _LC_charmap_t *hdl) // JAC |
| { |
| WChar_t mb = 0 ; |
| #ifdef DONT_NEED_THIS // JAC |
| int row ; |
| #endif // DONT_NEED_THIS - JAC |
| |
| /* |
| * If s is NULL, return 0 |
| */ |
| if (s == NULL) |
| return(0) ; |
| |
| #ifdef OUR_CS_GB18030_specific /* Deal with 8431A438 and 9 separately */ |
| if ( wc == 0xFFFE ) { |
| mb = 0x8431A438; |
| goto success_exit; |
| } |
| if ( wc == 0xFFFF ) { |
| mb = 0x8431A439; |
| goto success_exit; |
| } |
| #endif /* OUR_CS_GB18030_specific */ |
| if (isascii(wc)) |
| mb = WCGETASCII(wc) ; |
| else if (UCS_UDC(wc) && UDCFUNC) |
| mb = (*UDCFUNC)(wc) ; |
| /* |
| * UDCFUNC may return 0. In this case, look up the mapping table for |
| * the correct mb value. |
| */ |
| if (wc && (mb == 0)) |
| { |
| mb = WCGETVAL(wc) ; |
| if ((mb == (WChar_t)BAD) && UCSFUNC) //JAC |
| mb = (*UCSFUNC)(wc) ; |
| } |
| #if defined(OUR_CS_GB2312_specific) || defined(OUR_CS_GBK_specific) |
| /* |
| * NOTE: Because gb2312 and gb18030 share data tables, it is |
| * possible that WCGETVAL() returned a gb18030 char. Here |
| * we explicitly rule those out for gb2312. These rules |
| * may need changes in the future if more characters are |
| * added to gb2312. |
| */ |
| #if defined(OUR_CS_GBK_specific) |
| if ( wc == 0x20AC ) /* Handle Euro Sign that GBK defines as 0x80 */ |
| mb = (WChar_t)(0x0080); |
| /*************************************** |
| NOTE: Even though the official GBK doesn't support User-Defined chars |
| in the range U+E000 - U+0xE8FF, we decided to allow 0xE000 - 0xE765 |
| because BOTH Java and GB18030 allow them. Java allows a few more, |
| but don't see how to support those while using GB18030 tables. |
| ***************************************/ |
| if ( (wc >= 0xE766) && (wc <= 0xE8FF) && (wc != 0xE7C7) ) |
| mb = (WChar_t)BAD; |
| |
| if ( (mb >= 0xA989) && (mb <= 0xA995) ) /* Disallow range of UDCs - since not UDCs in GB18030 */ |
| mb = (WChar_t)BAD; |
| |
| if ( (mb >= 0xFE50) && (mb <= 0xFE9F) ) /* Disallow range of UDCs - since not UDCs in GB18030 */ |
| mb = (WChar_t)BAD; |
| |
| if ( mb == 0xA2E3 ) /* Disallow 0xA2E3 UDC - since not UDC in GB18030 */ |
| mb = (WChar_t)BAD; |
| |
| |
| #else /* Specific to GB2312 */ |
| if ( ( (wc >= 0xE000) && (wc <= 0xFF00) && !(wc==0xE7C7) ) || |
| ( (wc >= 0x2170) && (wc <= 0x2179) ) ) |
| mb = (WChar_t)BAD; |
| |
| if ( (wc == 0x2014) || (wc == 0x2015) ) |
| mb = (WChar_t)(0xA1AA); |
| #endif |
| |
| #endif /* defined(OUR_CS_GB2312_specific) || defined(OUR_CS_GBK_specific) */ |
| |
| if (mb == (WChar_t)BAD) //JAC |
| { |
| #ifdef DONT_NEED_THIS // JAC |
| _Seterrno(EILSEQ) ; |
| #endif // DONT_NEED_THIS - JAC |
| return(-1); |
| } |
| if (mb < 0x100) |
| { |
| *s = (char)( mb & 0xff ); // JAC |
| return(1) ; |
| } |
| else if (mb < 0x10000) |
| { |
| *s++ = (char)( (mb >> 8) & 0xff ); // JAC |
| *s = (char)( mb & 0xff ); // JAC |
| return(2) ; |
| } |
| else if (mb < 0x1000000) |
| { |
| *s++ = (char)( (mb >> 16) & 0xff ); // JAC |
| *s++ = (char)( (mb >> 8) & 0xff ); // JAC |
| *s = (char)( mb & 0xff ); // JAC |
| return(3) ; |
| } |
| else |
| { |
| #ifdef OUR_CS_GB18030_specific /* Deal with 8431A438 and 9 separately */ |
| success_exit: |
| #endif /* OUR_CS_GB18030_specific */ |
| *s++ = (char)( (mb >> 24) & 0xff ); // JAC |
| *s++ = (char)( (mb >> 16) & 0xff ); // JAC |
| *s++ = (char)( (mb >> 8) & 0xff ); // JAC |
| *s = (char)( mb & 0xff ); // JAC |
| return(4) ; |
| } |
| } |
| |
| #ifdef USING_OPEN_SOURCE_MBSTOWCS |
| size_t MBSTOWCS(WChar_t *pwcs, const char *ts, size_t n, _LC_charmap_t *hdl) // JAC |
| { |
| uchar_t *s = (uchar_t *)ts ; |
| WChar_t wc ; // JAC |
| #ifdef DONT_NEED_THIS // JAC |
| int cnt, idx, row ; |
| #else |
| int cnt, idx ; |
| #endif // DONT_NEED_THIS - JAC |
| |
| if (s == NULL) |
| return(0) ; |
| /* |
| * Fix QAR 92292 - UCS-4 locale mbstowcs problem |
| */ |
| if (*s == '\0') |
| { |
| if (pwcs && (n >= 1)) *pwcs = 0 ; |
| return(0) ; |
| } |
| |
| if (pwcs == NULL) |
| { |
| /* |
| * Count the number of multibyte characters in s |
| */ |
| for (cnt = 0 ; *s != '\0' ; cnt++) |
| { |
| if (isascii(*s)) |
| { |
| s++ ; |
| continue ; |
| } |
| idx = MBINDEX(&s, MBCURMAX) ; |
| if (idx < 0) |
| { |
| #ifdef DONT_NEED_THIS // JAC |
| _Seterrno(EILSEQ) ; |
| #endif // DONT_NEED_THIS - JAC |
| return((size_t)-1); |
| } |
| } |
| return(cnt) ; |
| } |
| |
| for (cnt = 0 ; (*s != '\0') && ((size_t)cnt < n) ; cnt++) // (size_t) added - JAC |
| { |
| if (isascii(*s)) |
| { |
| *pwcs++ = MBGETASCII(*s) ; |
| s++ ; |
| continue ; |
| } |
| idx = MBINDEX(&s, MBCURMAX) ; |
| if (idx < 0) |
| { |
| #ifdef DONT_NEED_THIS // JAC |
| _Seterrno(EILSEQ) ; |
| #endif // DONT_NEED_THIS - JAC |
| return((size_t)-1); |
| } |
| if (IS_UCODE(idx)) |
| wc = GET_UCODE(idx) ; |
| else if (ISIDXU(idx)) |
| wc = IDXU_UCS(idx) ; |
| else if ((wc = MBGETVAL(idx)) == BAD) |
| { |
| #ifdef DONT_NEED_THIS // JAC |
| _Seterrno(EILSEQ) ; |
| #endif // DONT_NEED_THIS - JAC |
| return((size_t)-1); |
| } |
| *pwcs++ = wc ; |
| } |
| if ((size_t)cnt < n) // (size_t) added - JAC |
| *pwcs = 0 ; /* Terminate wctype string */ |
| return(cnt) ; |
| } |
| #endif // USING_OPEN_SOURCE_MBSTOWCS |
| |
| #ifdef USING_OPEN_SOURCE_WCSTOMBS |
| size_t WCSTOMBS(char *s, const WChar_t *pwcs, size_t n, _LC_charmap_t *hdl) // JAC |
| { |
| #ifdef DONT_NEED_THIS // JAC |
| int cnt, len, row ; |
| #else |
| int cnt, len ; |
| #endif // DONT_NEED_THIS - JAC |
| WChar_t mb, wc ; // JAC |
| |
| if (pwcs == NULL) |
| { |
| #ifdef DONT_NEED_THIS // JAC |
| _Seterrno(EILSEQ) ; |
| #endif // DONT_NEED_THIS - JAC |
| return((size_t)-1); |
| } |
| |
| for (cnt = 0 ; (wc = *pwcs) != 0 ; cnt += len, pwcs++) |
| { |
| mb = 0 ; |
| if (isascii(wc)) |
| mb = WCGETASCII(wc) ; |
| else if (UCS_UDC(wc) && UDCFUNC) |
| mb = (*UDCFUNC)(wc) ; |
| /* |
| * UDCFUNC may return 0. In this case, look up the mapping table for |
| * the correct mb value. |
| */ |
| if (wc && (mb == 0)) |
| { |
| mb = WCGETVAL(wc) ; |
| if ((mb == (WChar_t)BAD) && UCSFUNC) // JAC |
| mb = (*UCSFUNC)(wc) ; |
| } |
| |
| if (mb == (WChar_t)BAD) // JAC |
| { |
| #ifdef DONT_NEED_THIS // JAC |
| _Seterrno(EILSEQ) ; |
| #endif // DONT_NEED_THIS - JAC |
| return((size_t)-1); |
| } |
| if (mb < 0x0000100) len = 1 ; |
| else if (mb < 0x0010000) len = 2 ; |
| else if (mb < 0x1000000) len = 3 ; |
| else len = 4 ; |
| |
| /* |
| * Write out the multibyte character if s is defined |
| */ |
| if (s) |
| { |
| if ((size_t)(cnt + len) > n) // (size_t) added - JAC |
| break ; /* Cannot stored more bytes */ |
| switch (len) |
| { |
| case 4: *s++ = (mb >> 24) & 0xff ; |
| case 3: *s++ = (mb >> 16) & 0xff ; |
| case 2: *s++ = (mb >> 8) & 0xff ; |
| case 1: *s++ = mb & 0xff ; |
| } |
| } |
| } |
| if (s && ((size_t)cnt < n)) // (size_t) added - JAC |
| *s = '\0' ; /* Terminate the string */ |
| return(cnt) ; |
| } |
| #endif // USING_OPEN_SOURCE_WCSTOMBS |
| |
| #ifdef USING_OPEN_SOURCE_MBTOPC |
| size_t |
| MBTOPC(WChar_t *pwc, char *ts, size_t maxlen, int *err, _LC_charmap_t *hdl) // JAC |
| { |
| uchar_t *s=(uchar_t *)ts ; /* Better to work with unsigned char. */ |
| WChar_t wc ; // JAC |
| #ifdef DONT_NEED_THIS // JAC |
| int idx, row, len ; |
| #else |
| int idx, len ; |
| #endif // DONT_NEED_THIS - JAC |
| |
| /* |
| * This is very similar to MBTOWC. It has an additional parameter *err. |
| * If the character is successfully converted return the number of |
| * bytes in the multibyte character and set *err to 0. If not converted |
| * due to maxlen too small return 0 and set *err to the no of bytes |
| * required to convert. If an illegal character return 0, set *err to -1. |
| */ |
| |
| *err = 0 ; |
| /* |
| * If s is NULL, return 0 |
| */ |
| if (s == NULL) |
| return(0); |
| |
| if (isascii(*s)) |
| { |
| wc = MBGETASCII(*s) ; |
| len = 1 ; |
| } |
| else |
| { |
| idx = MBINDEX (&s, maxlen) ; |
| if (idx == ERR_INPUT_INCOMPLETE) |
| { |
| *err = MBCURMAX ; /* Ask for the maximum MB length */ |
| return(0) ; |
| } |
| if (idx < 0) |
| { |
| *err = -1 ; |
| return(0) ; /* Invalid character */ |
| } |
| if (IS_UCODE(idx)) |
| wc = GET_UCODE(idx) ; |
| else if (ISIDXU(idx)) |
| wc = IDXU_UCS(idx) ; |
| else if ((wc = MBGETVAL(idx)) == BAD) |
| { |
| *err = -1 ; |
| return(0) ; /* Invalid character */ |
| } |
| len = s - (uchar_t *)ts ; |
| } |
| |
| if ((size_t)len > maxlen) // (size_t) added - JAC |
| { |
| *err = len ; |
| return(0) ; /* Not enough buffer */ |
| } |
| if (pwc) *pwc = wc ; |
| return((size_t)len); |
| } |
| #endif // USING_OPEN_SOURCE_MBTOPC |
| |
| #ifdef USING_OPEN_SOURCE_MBSTOPCS |
| size_t MBSTOPCS(WChar_t *pwcs, size_t pwcs_len, const char *s, size_t s_len, /* JAC */ |
| int stopchr, char **endptr, int *err, _LC_charmap_t *hdl) |
| { |
| int pwcs_cnt = 0 ; |
| #ifdef DONT_NEED_THIS // JAC |
| int len ; |
| #endif // DONT_NEED_THIS - JAC |
| uchar_t *us = (uchar_t *)s ; |
| |
| /* |
| * err is 0 if everything works |
| */ |
| *err = 0; |
| |
| /* |
| * Stop the processing if there is no more room for process code |
| * or all the characters in s have been processed. |
| */ |
| while (((size_t)pwcs_cnt < pwcs_len) && (s_len > 0)) // (size_t) added - JAC |
| { |
| /* |
| * If we hit stopchr in s, Set endpointer to the character after |
| * the stopchr and break out of the while |
| */ |
| if (*us == (char) stopchr) |
| { |
| us++ ; |
| break; |
| } |
| |
| /* |
| * Convert s to process code and increment s by the number |
| * of bytes. If the conversion failed, set the endpointer |
| * the the start of the character that failed, and |
| * break out of the while. |
| */ |
| if (isascii(*us)) |
| { |
| pwcs[pwcs_cnt] = MBGETASCII(*us) ; |
| us++, s_len-- ; |
| } |
| else |
| { |
| uchar_t *us_old = us ; |
| WChar_t wc ; // JAC |
| #ifdef DONT_NEED_THIS // JAC |
| int idx, row ; |
| #else |
| int idx ; |
| #endif // DONT_NEED_THIS - JAC |
| |
| idx = MBINDEX(&us, s_len) ; |
| if (idx < 0) |
| { |
| *err = -1 ; |
| break ; /* Invalid character */ |
| } |
| if (IS_UCODE(idx)) |
| wc = GET_UCODE(idx) ; |
| else if (ISIDXU(idx)) |
| wc = IDXU_UCS(idx) ; |
| else if ((wc = MBGETVAL(idx)) == BAD) |
| { |
| *err = -1 ; |
| break ; /* Invalid character */ |
| } |
| if ((s_len -= us - us_old) < 0) |
| { |
| *err = -(int)s_len ; /* Need more buffer */ // (int) added - JAC |
| break ; |
| } |
| pwcs[pwcs_cnt] = wc ; |
| } |
| |
| /* |
| * Increment the process code counter |
| */ |
| pwcs_cnt++; |
| } |
| *endptr = (char *)us ; /* Set the end pointer */ |
| return(pwcs_cnt) ; |
| } |
| #endif // USING_OPEN_SOURCE_MBSTOPCS |
| |
| #endif /* USE_OUR_MB_WC_DATA_TABLES */ |