| /**********************************************************************
|
| // @@@ START COPYRIGHT @@@ |
| // |
| // Licensed to the Apache Software Foundation (ASF) under one |
| // or more contributor license agreements. See the NOTICE file |
| // distributed with this work for additional information |
| // regarding copyright ownership. The ASF licenses this file |
| // to you under the Apache License, Version 2.0 (the |
| // "License"); you may not use this file except in compliance |
| // with the License. You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, |
| // software distributed under the License is distributed on an |
| // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| // KIND, either express or implied. See the License for the |
| // specific language governing permissions and limitations |
| // under the License. |
| // |
| // @@@ END COPYRIGHT @@@ |
| **********************************************************************/
|
| /*
|
| * HISTORY
|
| * $Log: mb_lconv.c,v $
|
| * Revision 1.1.10.3 2002/03/11 18:48:04
|
| * Fix QAR 02292 - mbstowcs() problem in UCS-4 locales.
|
| *
|
| * Revision 1.1.10.2 2002/01/28 15:20:17
|
| * Fix wctomb() & wcstombs() problem with UDC codepoints.
|
| *
|
| * Revision 1.1.10.1 2001/12/07 15:37:54
|
| * Remove WCBAD and MBBAD and use only BAD to avoid mixing up.
|
| * Checking for UCODE first before checking for UDC index.
|
| * Introduce a dummy_cell4 table to fix locale compilation failure in
|
| * some @ucs4 locales.
|
| * Change conversion table access methods & add support for HKSCS & cp949.
|
| *
|
| * Revision 1.1.6.1 2000/10/16 18:44:56
|
| * COSIX.Zulu to Yankee merge for GB18030 support.
|
| *
|
| * Revision 1.1.4.3 2000/10/09 21:27:09
|
| * Fix 82769 by setting *err to the MB_CUR_MAX for incomplete MB sequence.
|
| *
|
| * Revision 1.1.4.2 2000/10/04 20:43:40
|
| * Fix 82696 by checking for -1 before calling the IS_UCODE() macro.
|
| *
|
| * Revision 1.1.4.1 2000/08/07 14:33:50
|
| * Support the new GB18030 Chinese character set.
|
| * Rename some macros to have a more consistent naming convention.
|
| *
|
| * Revision 1.1.2.1 2000/01/13 20:25:49
|
| * Multibyte methods for @ucs4 locales.
|
| *
|
| * Revision 1.1.3.2 1996/11/22 17:02:38
|
| * $EndLog$
|
| *
|
| * Multibyte UCS-4 locale conversion module containing templates for
|
| * the following locale conversion routines:
|
| *
|
| * - mblen()
|
| * - mbtowc()
|
| * - wctomb()
|
| * - mbtopc()
|
| * - pctomb()
|
| * - mbstowcs()
|
| * - wcstombs()
|
| * - mbstopcs()
|
| * - pcstombs()
|
| *
|
| * This file may be included multiple times to generate different locale
|
| * conversion routines. The macro "CODESET" is assumed to be predefined.
|
| * It contains the name of the codeset to be supported.
|
| */
|
|
|
| #ifdef USE_OUR_MB_WC_DATA_TABLES
|
|
|
| /*
|
| // This source file contains low-level routines for converting from
|
| // a character set to/from Unicode (UCS4). These routines are our
|
| // replacements for the corresponding OSF routines and were created
|
| // because the DEC/OSF conversion routines/conversion tables were
|
| // highly dependent on use of the Private Use Area (0xE000 - 0xF7FF)
|
| // that unicode.org has reserved for vendor usage. We don't want to
|
| // make heavy use of the P.U.A. (at least not yet) because we want to
|
| // recognize UCS4 values produced by Java or by HP-UX. Consequently,
|
| // it is better for us to stick with defacto standard Unicode values
|
| // for every character we can.
|
| */
|
|
|
| #define _LC_charmap_t int // JAC
|
|
|
| /*
|
| * Our_mbtowc_big5_ucs4() - routine to convert from BIG5 multi-byte
|
| * character string to UCS4 (a.k.a. UCS32, a.k.a. 4-byte Unicode).
|
| *
|
| * Arguments: pwc - pointer to target array of 4-byte UCS4 output chars.
|
| * ts - pointer to input array of multi-byte BIG5 chars.
|
| * maxlen - actual length (in bytes) of input array.
|
| * hdl - dummy ptr to _LC_charmap_t - needed only to
|
| * make our routine take the same arguments
|
| * as the __mbtowc_<cs>_ucs4() routines that we
|
| * generated by the MBTOWC macro.
|
| *
|
| * NOTE: It is the caller's responsibility to ensure output array is
|
| * big enough.
|
| */
|
| #define Min_BIG5_chr 0xA140 /* Min for using lookup table */
|
| #define Max_BIG5_chr 0xF9FF /* Max for using lookup table */
|
| #define BAD_UCS_VAL 0x0000FFFF
|
|
|
| NA_EIDPROC size_t Our_mbtowc_big5_ucs4(WChar_t *pwc, const char *ts, size_t maxlen,
|
| _LC_charmap_t *hdl)
|
| {
|
| uchar_t *s = (uchar_t *)ts ;
|
| WChar_t wc ;
|
| int idx ;
|
| uchar_t chr1;
|
|
|
| if (ts == NULL) return (0); /* If no data to convert */
|
|
|
| if (maxlen == (size_t)0)
|
| return((size_t)-1);
|
|
|
| if ( (chr1 = *s) == '\0') {
|
| if (pwc) *pwc = 0; /* Avoid all function calls */
|
| return (0);
|
| }
|
|
|
| if (isascii(chr1))
|
| {
|
| if (pwc) *pwc = (chr1) ;
|
| return(1) ;
|
| }
|
| else {
|
| if ( maxlen < 2 ) goto err_exit;
|
| idx = ( (chr1) << 8 ) | ( *(s+1) ) ;
|
| if ( (idx >= Min_BIG5_chr) && (idx <= Max_BIG5_chr) ) {
|
| wc = Our_MS_BIG5_tableF[idx - Min_BIG5_chr];
|
| if ( wc == BAD_UCS_VAL )
|
| goto err_exit;
|
| s += 2;
|
| }
|
| else goto err_exit;
|
| }
|
|
|
| if (pwc) *pwc = wc ;
|
| return((size_t)(s - (uchar_t *)ts)) ;
|
|
|
| err_exit:
|
| return((size_t)-1);
|
| }
|
|
|
| /*
|
| * Our_mbtowc_sjis_ucs4() - routine to convert from SJIS multi-byte
|
| * character string to UCS4 (a.k.a. UCS32, a.k.a. 4-byte Unicode).
|
| *
|
| * Arguments: pwc - pointer to target array of 4-byte UCS4 output chars.
|
| * ts - pointer to input array of multi-byte SJIS chars.
|
| * maxlen - actual length (in bytes) of input array.
|
| * hdl - dummy ptr to _LC_charmap_t - needed only to
|
| * make our routine take the same arguments
|
| * as the __mbtowc_<cs>_ucs4() routines that we
|
| * generated by the MBTOWC macro.
|
| *
|
| * NOTE: It is the caller's responsibility to ensure output array is
|
| * big enough.
|
| */
|
| #define Min_SJIS_chr 0x8140 /* Min for using lookup table */
|
| #define Max_SJIS_chr 0xFC4B /* Max for using lookup table */
|
|
|
| NA_EIDPROC size_t Our_mbtowc_sjis_ucs4(WChar_t *pwc, const char *ts, size_t maxlen,
|
| _LC_charmap_t *hdl)
|
| {
|
| uchar_t *s = (uchar_t *)ts ;
|
| WChar_t wc ;
|
| int idx ;
|
| uchar_t chr1;
|
|
|
| if (ts == NULL) return (0); /* If no data to convert */
|
|
|
| if (maxlen == (size_t)0)
|
| return((size_t)-1);
|
|
|
| if ( (chr1 = *s) == '\0') {
|
| if (pwc) *pwc = 0; /* Avoid all function calls */
|
| return (0);
|
| }
|
|
|
| if (isascii(chr1))
|
| {
|
| if (pwc) *pwc = (chr1) ;
|
| return(1) ;
|
| }
|
|
|
| if ( (chr1 >= 0xA1 ) && (chr1 <= 0xDF) ) { /* Handle these algorithmically */
|
| wc = 0xFF61 + chr1 - 0xA1;
|
| s += 1;
|
| }
|
| else if ( (chr1 >= 0xF0 ) && (chr1 <= 0xF9) ) { /* Handle UDC algorithmically */
|
| if ( maxlen < 2 ) goto err_exit;
|
| idx = ( (chr1) << 8 ) | ( *(s+1) ) ;
|
| if ( (idx >= 0xF040) && (idx <= 0xF9FC) ) { /* User-defined character range */
|
| if ( (idx & 0xFF) < 0x40 ) goto err_exit; /* No such char */
|
| if ( (idx & 0xFF) > 0xFC ) goto err_exit; /* No such char */
|
| if ( (idx & 0xFF) == 0x7F ) goto err_exit; /* No such char */
|
|
|
| wc = ((idx & 0x0F00) >> 8) * 188 + ((idx & 0xFF) - 0x40) + 0xE000;
|
| if ( (idx & 0xFF) > 0x7F ) wc -= 1;
|
| s += 2;
|
| }
|
| else goto err_exit;
|
| }
|
| else {
|
| if ( maxlen < 2 ) goto err_exit;
|
| idx = ( (chr1) << 8 ) | ( *(s+1) ) ;
|
| if ( (idx >= Min_SJIS_chr) && (idx <= Max_SJIS_chr) ) {
|
| wc = Our_MS_sjis_tableF[idx - Min_SJIS_chr];
|
| if ( wc == BAD_UCS_VAL )
|
| goto err_exit;
|
| s += 2;
|
| }
|
| else goto err_exit;
|
| }
|
|
|
| if (pwc) *pwc = wc ;
|
| return((size_t)(s - (uchar_t *)ts)) ;
|
|
|
| err_exit:
|
| return((size_t)-1);
|
| }
|
|
|
| /*
|
| * Our_mbtowc_cp949_ucs4() - routine to convert from KSC multi-byte
|
| * character string to UCS4 (a.k.a. UCS32, a.k.a. 4-byte Unicode).
|
| *
|
| * Arguments: pwc - pointer to target array of 4-byte UCS4 output chars.
|
| * ts - pointer to input array of multi-byte KSC chars.
|
| * maxlen - actual length (in bytes) of input array.
|
| * hdl - dummy ptr to _LC_charmap_t - needed only to
|
| * make our routine take the same arguments
|
| * as the __mbtowc_<cs>_ucs4() routines that we
|
| * generated by the MBTOWC macro.
|
| *
|
| * NOTES: It is the caller's responsibility to ensure output array is
|
| * big enough. KSC is short for KS-code and is a Korean
|
| * character set. cp949 is short for CodePage 949 and that
|
| * is MicroSoft's codepage for the Korean character set.
|
| */
|
| #define Min_KSC_chr 0x8141 /* Min for using lookup table */
|
| #define Max_KSC_chr 0xFEFE /* Max for using lookup table */
|
|
|
| NA_EIDPROC size_t Our_mbtowc_cp949_ucs4(WChar_t *pwc, const char *ts, size_t maxlen,
|
| _LC_charmap_t *hdl)
|
| {
|
| uchar_t *s = (uchar_t *)ts ;
|
| WChar_t wc ;
|
| int idx ;
|
| uchar_t chr1;
|
|
|
| if (ts == NULL) return (0); /* If no data to convert */
|
|
|
| if (maxlen == (size_t)0)
|
| return((size_t)-1);
|
|
|
| if ( (chr1 = *s) == '\0') {
|
| if (pwc) *pwc = 0; /* Avoid all function calls */
|
| return (0);
|
| }
|
|
|
| if (isascii(chr1))
|
| {
|
| if (pwc) *pwc = (chr1) ;
|
| return(1) ;
|
| }
|
|
|
| if ( maxlen < 2 ) goto err_exit;
|
| idx = ( (chr1) << 8 ) | ( *(s+1) ) ;
|
| if ( (idx >= 0xC9A1) && (idx <= 0xC9FE) ) { /* Handle UDC algorithmically */
|
| wc = idx - 0xC9A1 + 0xE000;
|
| s += 2;
|
| }
|
| else if ( (idx >= 0xFEA1) && (idx <= 0xFEFE) ) { /* Handle UDC algorithmically */
|
| wc = idx - 0xFEA1 + 0xE05E;
|
| s += 2;
|
| }
|
| else if ( (idx >= Min_KSC_chr) && (idx <= Max_KSC_chr) ) {
|
| wc = Our_MS_KSC_tableF[idx - Min_KSC_chr];
|
| if ( wc == BAD_UCS_VAL )
|
| goto err_exit;
|
| s += 2;
|
| }
|
| else goto err_exit;
|
|
|
| if (pwc) *pwc = wc ;
|
| return((size_t)(s - (uchar_t *)ts)) ;
|
|
|
| err_exit:
|
| return((size_t)-1);
|
| }
|
|
|
| /*
|
| * Our_mbtowc_eucjp_ucs4() - routine to convert from EUC-JP multi-byte
|
| * character string to UCS4 (a.k.a. UCS32, a.k.a. 4-byte Unicode).
|
| *
|
| * Arguments: pwc - pointer to target array of 4-byte UCS4 output chars.
|
| * ts - pointer to input array of multi-byte EUC-JP chars.
|
| * maxlen - actual length (in bytes) of input array.
|
| * hdl - dummy ptr to _LC_charmap_t - needed only to
|
| * make our routine take the same arguments
|
| * as the __mbtowc_<cs>_ucs4() routines that we
|
| * generated by the MBTOWC macro.
|
| *
|
| * NOTE: It is the caller's responsibility to ensure output array is
|
| * big enough.
|
| */
|
| #define EUCJP_ROW_LEN (0xFE - 0xA1 + 1)
|
| #define EUCJP_NUM_ROWS (0xFE - 0xA1 +1)
|
|
|
| NA_EIDPROC size_t Our_mbtowc_eucjp_ucs4(WChar_t *pwc, const char *ts, size_t maxlen,
|
| _LC_charmap_t *hdl)
|
| {
|
| uchar_t *s = (uchar_t *)ts ;
|
| WChar_t wc ;
|
| int idx ;
|
| uchar_t chr1;
|
|
|
| if (ts == NULL) return (0); /* If no data to convert */
|
|
|
| if (maxlen == (size_t)0)
|
| return((size_t)-1);
|
|
|
| if ( (chr1 = *s) == '\0') {
|
| if (pwc) *pwc = 0; /* Avoid all function calls */
|
| return (0);
|
| }
|
|
|
| if (isascii(chr1))
|
| {
|
| if (pwc) *pwc = (chr1) ;
|
| return(1) ;
|
| }
|
|
|
| if ( chr1 == 0x8E ) { /* If this is first byte of chars 0x8EA1 - 0x8EDF */
|
| if ( maxlen < 2 ) goto err_exit;
|
| unsigned char ch2 = *(s+1);
|
| if ( (ch2 >= 0xA1) && (ch2 <= 0xDF) ) {
|
| wc = *(s+1) + 0xFF61 - 0xA1; /* Algorithmically convert! */
|
| s += 2;
|
| }
|
| else goto err_exit;
|
| }
|
| else if ( chr1 == 0x8F ) { /* If this is first byte of a 3-byte char */
|
| if ( maxlen < 3 ) goto err_exit;
|
| idx = ( *(s+1) << 8 ) | *(s+2) ;
|
| if ( (idx >= 0xA1A1) && (idx <= 0xFEFE) &&
|
| ((idx & 0xFF) >= 0xA1 ) && ((idx & 0xFF) <= 0xFE )) {
|
|
|
| idx = ((idx >> 8)-0xA1)*EUCJP_ROW_LEN + ((idx&0xFF)-0xA1);
|
| wc = Our_eucJP_tableF8F[idx];
|
| if ( wc == BAD_UCS_VAL )
|
| goto err_exit;
|
| s += 3;
|
| }
|
| else goto err_exit;
|
| }
|
| else { /* Must be a regular 2-byte char */
|
| if ( maxlen < 2 ) goto err_exit;
|
| idx = ( chr1 << 8 ) | *(s+1) ;
|
| if ( (idx >= 0xA1A1) && (idx <= 0xFEFE) &&
|
| ((idx & 0xFF) >= 0xA1 ) && ((idx & 0xFF) <= 0xFE )) {
|
|
|
| idx = ((idx >> 8)-0xA1)*EUCJP_ROW_LEN + ((idx&0xFF)-0xA1);
|
| wc = Our_eucJP_tableF00[idx];
|
| if ( wc == BAD_UCS_VAL )
|
| goto err_exit;
|
| s += 2;
|
| }
|
| else goto err_exit;
|
| }
|
|
|
| if (pwc) *pwc = wc ;
|
| return((size_t)(s - (uchar_t *)ts)) ;
|
|
|
| err_exit:
|
| return((size_t)-1);
|
| }
|
|
|
| /*
|
| * Our_wctomb_big5_ucs4() - routine to convert from a UCS4 character
|
| * to a multi-byte BIG5 character.
|
| *
|
| * Arguments: s - pointer to target output string.
|
| * wc - The UCS4 character to convert
|
| * hdl - dummy ptr to _LC_charmap_t - needed only to
|
| * make our routine take the same arguments
|
| * as the __mbtowc_<cs>_ucs4() routines that we
|
| * generated by the MBTOWC macro.
|
| *
|
| * NOTE: It is the caller's responsibility to ensure output array is
|
| * big enough.
|
| */
|
| #define Max_BIG5_UCS_val (0xFFFE)
|
|
|
| NA_EIDPROC int Our_wctomb_big5_ucs4(char *s, WChar_t wc, _LC_charmap_t *hdl)
|
| {
|
| WChar_t mb = 0 ;
|
|
|
| if (s == NULL)
|
| return(0) ;
|
|
|
| if (isascii(wc))
|
| mb = (wc) ;
|
| else { /* if not an ASCII char */
|
| if ( wc <= Max_BIG5_UCS_val ) {
|
| mb = Our_MS_BIG5_tableB[wc];
|
| if ( mb == 0xFFFF )
|
| mb = (WChar_t)BAD;
|
| }
|
| else mb = (WChar_t)BAD;
|
| }
|
|
|
| if (mb == (WChar_t)BAD)
|
| return(-1);
|
|
|
| if (mb < 0x100)
|
| {
|
| *s = (char)( mb & 0xff );
|
| return(1) ;
|
| }
|
| else /* Must be a 2-byte character ... BIG5 has none 3-byte or longer */
|
| {
|
| *s++ = (char)( (mb >> 8) & 0xff );
|
| *s = (char)( mb & 0xff );
|
| return(2) ;
|
| }
|
| }
|
|
|
| /*
|
| * Our_wctomb_cp949_ucs4() - routine to convert from a UCS4 character
|
| * to a multi-byte KSC character.
|
| *
|
| * Arguments: s - pointer to target output string.
|
| * wc - The UCS4 character to convert
|
| * hdl - dummy ptr to _LC_charmap_t - needed only to
|
| * make our routine take the same arguments
|
| * as the __mbtowc_<cs>_ucs4() routines that we
|
| * generated by the MBTOWC macro.
|
| *
|
| * NOTE: It is the caller's responsibility to ensure output array is
|
| * big enough.
|
| */
|
| #define Max_KSC_UCS_val (0xFFFE)
|
|
|
| NA_EIDPROC int Our_wctomb_cp949_ucs4(char *s, WChar_t wc, _LC_charmap_t *hdl)
|
| {
|
| WChar_t mb = 0 ;
|
|
|
| if (s == NULL)
|
| return(0) ;
|
|
|
| if (isascii(wc))
|
| mb = (wc) ;
|
| else { /* if not an ASCII char */
|
| if ( (wc >= 0xE000) && (wc <= 0xE05D) ) { /* Handle UDC algorithmically */
|
| mb = wc - 0xE000 + 0xC9A1;
|
| }
|
| else if ( (wc >= 0xE05E) && (wc <= 0xE0BB) ) { /* Handle UDC algorithmically */
|
| mb = wc - 0xE05E + 0xFEA1;
|
| }
|
| else if ( wc <= Max_KSC_UCS_val ) {
|
| mb = Our_MS_KSC_tableB[wc];
|
| if ( mb == 0xFFFF )
|
| mb = (WChar_t)BAD;
|
| }
|
| else mb = (WChar_t)BAD;
|
| }
|
|
|
| if (mb == (WChar_t)BAD)
|
| return(-1);
|
|
|
| if (mb < 0x100)
|
| {
|
| *s = (char)( mb & 0xff );
|
| return(1) ;
|
| }
|
| else /* Must be a 2-byte character ... KSC has none 3-byte or longer */
|
| {
|
| *s++ = (char)( (mb >> 8) & 0xff );
|
| *s = (char)( mb & 0xff );
|
| return(2) ;
|
| }
|
| }
|
|
|
| /*
|
| * Our_wctomb_sjis_ucs4() - routine to convert from a UCS4 character
|
| * to a multi-byte SJIS character.
|
| *
|
| * Arguments: s - pointer to target output string.
|
| * wc - The UCS4 character to convert
|
| * hdl - dummy ptr to _LC_charmap_t - needed only to
|
| * make our routine take the same arguments
|
| * as the __mbtowc_<cs>_ucs4() routines that we
|
| * generated by the MBTOWC macro.
|
| *
|
| * NOTE: It is the caller's responsibility to ensure output array is
|
| * big enough.
|
| */
|
| #define Max_SJIS_UCS_val (0xFFFE)
|
|
|
| NA_EIDPROC int Our_wctomb_sjis_ucs4(char *s, WChar_t wc, _LC_charmap_t *hdl)
|
| {
|
| WChar_t mb = 0 ;
|
|
|
| if (s == NULL)
|
| return(0) ;
|
|
|
| if (isascii(wc))
|
| mb = (wc) ;
|
| else if ( (wc >= 0xE000) && ( wc <= 0xE757 ) ) { /* if user-defined char */
|
| mb = 0xF040;
|
| mb += ( (wc - 0xE000) / 188 ) * 0x100; /* Get 2nd hex digit right */
|
| mb += ( (wc - 0xE000) % 188 ) ; /* Get last 2 hex digits right */
|
| if ( (mb & 0xFF) >= 0x7F ) mb += 1; /* SJIS ending with 0x7F not used */
|
| }
|
| else { /* If not an ASCII char & not UDC char */
|
| if ( wc <= Max_SJIS_UCS_val ) {
|
| mb = Our_MS_sjis_tableB[wc];
|
| if ( mb == 0xFFFF )
|
| mb = (WChar_t)BAD;
|
| }
|
| else mb = (WChar_t)BAD;
|
| }
|
|
|
| if (mb == (WChar_t)BAD)
|
| return(-1);
|
|
|
| if (mb < 0x100)
|
| {
|
| *s = (char)( mb & 0xff );
|
| return(1) ;
|
| }
|
| else /* Must be a 2-byte character ... SJIS has none 3-byte or longer */
|
| {
|
| *s++ = (char)( (mb >> 8) & 0xff );
|
| *s = (char)( mb & 0xff );
|
| return(2) ;
|
| }
|
| }
|
|
|
| /*
|
| * Our_wctomb_eucjp_ucs4() - routine to convert from a UCS4 character
|
| * to a multi-byte EUC-JP character.
|
| *
|
| * Arguments: s - pointer to target output string.
|
| * wc - The UCS4 character to convert
|
| * hdl - dummy ptr to _LC_charmap_t - needed only to
|
| * make our routine take the same arguments
|
| * as the __mbtowc_<cs>_ucs4() routines that we
|
| * generated by the MBTOWC macro.
|
| *
|
| * NOTE: It is the caller's responsibility to ensure output array is
|
| * big enough.
|
| */
|
| NA_EIDPROC int Our_wctomb_eucjp_ucs4(char *s, WChar_t wc, _LC_charmap_t *hdl)
|
| {
|
| WChar_t mb = 0 ;
|
|
|
| if (s == NULL)
|
| return(0) ;
|
|
|
| if (isascii(wc))
|
| mb = (wc) ;
|
| else if ( (wc >= 0xFF61) && (wc <=0xFF9F) )
|
| mb = wc - 0xFF61 + 0x8EA1; /* Algorithmically convert! */
|
| else {
|
| mb = Our_eucJP_tableB[wc];
|
| if ( mb == 0x0000FFFF )
|
| mb = (WChar_t)BAD;
|
| /*
|
| * If Flag bit for this Unicode Value says to prepend 0x8F
|
| * then do so.
|
| */
|
| if ( Our_eucJP_tableB_8F_FB[wc/32] & 1 << (31-(wc%32)) )
|
| mb |= 0x8F0000;
|
| }
|
|
|
| if (mb == (WChar_t)BAD)
|
| return(-1);
|
|
|
| if (mb < 0x100)
|
| {
|
| *s = (char)( mb & 0xff );
|
| return(1) ;
|
| }
|
| else if (mb < 0x10000)
|
| {
|
| *s++ = (char)( (mb >> 8) & 0xff );
|
| *s = (char)( mb & 0xff );
|
| return(2) ;
|
| }
|
| else /* Must be a 3-byte character ... EUCJP has none 4-byte or longer */
|
| {
|
| *s++ = (char)( (mb >> 16) & 0xff );
|
| *s++ = (char)( (mb >> 8) & 0xff );
|
| *s = (char)( mb & 0xff );
|
| return(3) ;
|
| }
|
| }
|
|
|
| #else /* USE_OUR_MB_WC_DATA_TABLES */
|
|
|
| #ifndef MB_LCONV_C
|
| #define MB_LCONV_C 1
|
|
|
| /*
|
| * Generic macros to access the MB to WC row and cell tables
|
| * Double redirection is needed here to fully resolve the macro paramter
|
| * correctly.
|
| */
|
| #define __MBCELL2(cs) _ ## cs ## _to_ucs_cell2
|
| #define __MBCELL4(cs) _ ## cs ## _to_ucs_cell4
|
| #define __MBROW(cs) _ ## cs ## _to_ucs_row
|
| #define __MBINDEX(cs) __ ## cs ## _index
|
| #define __MBDMAP(cs) cs ## _to_ucs_DMAP
|
| #define __MBDMAPVAL(cs) cs ## _to_ucs_dmap
|
|
|
| #define _MBCELL2(cs) __MBCELL2(cs)
|
| #define _MBCELL4(cs) __MBCELL4(cs)
|
| #define _MBROW(cs) __MBROW (cs)
|
| #define _MBROWSIZE(cs) (sizeof(__MBROW(cs))/sizeof(__MBROW(cs)[0]))
|
| #define _MBINDEX(cs) __MBINDEX (cs)
|
| #define _MBDMAP(cs) __MBDMAP (cs)
|
| #define _MBDMAPVAL(cs) __MBDMAPVAL(cs)
|
|
|
| /*
|
| * Generic macros to access the WC to MB row and cell tables
|
| */
|
| #define __WCCELL2(cs) _ucs_to_ ## cs ## _cell2
|
| #define __WCCELL4(cs) _ucs_to_ ## cs ## _cell4
|
| #define __WCROW(cs) _ucs_to_ ## cs ## _row
|
| #define __WCDMAP(cs) ucs_to_ ## cs ## _DMAP
|
| #define __WCDMAPVAL(cs) ucs_to_ ## cs ## _dmap
|
|
|
| #define _WCCELL2(cs) __WCCELL2(cs)
|
| #define _WCCELL4(cs) __WCCELL4(cs)
|
| #define _WCROW(cs) __WCROW (cs)
|
| #define _WCROWSIZE(cs) (sizeof(__WCROW(cs))/sizeof(__WCROW(cs)[0]))
|
| #define _WCDMAP(cs) __WCDMAP (cs)
|
| #define _WCDMAPVAL(cs) __WCDMAPVAL(cs)
|
|
|
| /*
|
| * Generic MB/WC conversion routine name macros
|
| */
|
| #define __MBLEN(cs) __mblen_ ## cs ## _ucs4
|
| #define __MBTOWC(cs) __mbtowc_ ## cs ## _ucs4
|
| #define __WCTOMB(cs) __wctomb_ ## cs ## _ucs4
|
| #define __MBTOPC(cs) __mbtopc_ ## cs ## _ucs4
|
| #define __MBSTOWCS(cs) __mbstowcs_ ## cs ## _ucs4
|
| #define __WCSTOMBS(cs) __wcstombs_ ## cs ## _ucs4
|
| #define __MBSTOPCS(cs) __mbstopcs_ ## cs ## _ucs4
|
| #define __UDCTOMB(cs) __UDC_to_ ## cs
|
| #define __UCSTOMB(cs) __UCS_to_ ## cs
|
| #define __UDCFUNC(cs) __UDC_to_ ## cs ## _func
|
| #define __UCSFUNC(cs) __UCS_to_ ## cs ## _func
|
| #define __WCGETVAL(cs) __wcgetval_ ## cs
|
| #define __MBGETVAL(cs) __mbgetval_ ## cs
|
| #define __UTF16ToMB(cs) UTF16To_ ## cs
|
| #define __MBToUTF16(cs) cs ## _ToUTF16
|
|
|
| #define _MBLEN(cs) __MBLEN (cs)
|
| #define _MBTOWC(cs) __MBTOWC (cs)
|
| #define _WCTOMB(cs) __WCTOMB (cs)
|
| #define _MBTOPC(cs) __MBTOPC (cs)
|
| #define _MBSTOWCS(cs) __MBSTOWCS(cs)
|
| #define _WCSTOMBS(cs) __WCSTOMBS(cs)
|
| #define _MBSTOPCS(cs) __MBSTOPCS(cs)
|
| #define _UDCTOMB(cs) __UDCTOMB (cs)
|
| #define _UCSTOMB(cs) __UCSTOMB (cs)
|
| #define _UDCFUNC(cs) __UDCFUNC (cs)
|
| #define _UCSFUNC(cs) __UCSFUNC (cs)
|
| #define _WCGETVAL(cs) __WCGETVAL(cs)
|
| #define _MBGETVAL(cs) __MBGETVAL(cs)
|
| #define _UTF16ToMB(cs) __UTF16ToMB(cs)
|
| #define _MBToUTF16(cs) __MBToUTF16(cs)
|
|
|
| /*
|
| * Miscellaneous macros
|
| */
|
| #define __MBCURMAX(cs) MBCURMAX_ ## cs
|
| #define _MBCURMAX(cs) __MBCURMAX(cs)
|
|
|
| /*
|
| * Character set MB_LEN_MAX macros
|
| */
|
| #define MBCURMAX_big5 2
|
| #define MBCURMAX_hkscs 2
|
| #define MBCURMAX_cp949 2
|
| #define MBCURMAX_dechanyu 4
|
| #define MBCURMAX_dechanzi 2
|
| #define MBCURMAX_deckanji 2
|
| #define MBCURMAX_deckorean 2
|
| #define MBCURMAX_eucjp 3
|
| #define MBCURMAX_euckr 2
|
| #define MBCURMAX_euctw 4
|
| #define MBCURMAX_gb18030 4
|
| #define MBCURMAX_gbk 2
|
| #define MBCURMAX_sdeckanji 3
|
| #define MBCURMAX_sjis 2
|
|
|
| /*
|
| * Dummy pctomb() and pcstombs() routines
|
| */
|
| NA_EIDPROC int __pctomb_mb_ucs4 () { return(-1) ; }
|
| NA_EIDPROC int __pcstombs_mb_ucs4() { return(-1) ; }
|
|
|
| /*
|
| * Dummy cell4_t table
|
| */
|
| static cell4_t dummy_cell4[1] = { 0x0000 }; // initialize it
|
|
|
| #endif
|
|
|
| /*
|
| * Codeset specific macros to access the MB to WC row and cell tables
|
| */
|
| #undef MBCELL2
|
| #undef MBCELL4
|
| #undef MBROW
|
| #undef MBROWSIZE
|
| #undef MBINDEX
|
| #undef MBGETASCII
|
| #undef MBGETVAL
|
| #undef MBDMAP
|
| #undef MBDMAPVAL
|
| #undef MBCURMAX
|
|
|
| #define MBCURMAX _MBCURMAX (CODESET)
|
| #define MBCELL2 _MBCELL2 (CODESET)
|
| #define MBCELL4 _MBCELL4 (CODESET)
|
| #define MBROW _MBROW (CODESET)
|
| #define MBROWSIZE _MBROWSIZE(CODESET)
|
| #define MBDMAP _MBDMAP (CODESET)
|
| #define MBDMAPVAL _MBDMAPVAL(CODESET)
|
| #define MBGETVAL _MBGETVAL (CODESET)
|
| #define MBINDEX(x,y) _MBINDEX (CODESET)(NULL,x,y)
|
| #define MBGETASCII(c) (MBDMAPVAL ? MBCELL2[0][c] \
|
| : MBCELL2[MBROW[ROW(c)]][COL(c)])
|
| /*
|
| * Codeset specific macros to access the WC to MB row and cell tables
|
| */
|
| #undef WCCELL2
|
| #undef WCCELL4
|
| #undef WCROW
|
| #undef WCROWSIZE
|
| #undef WCGETASCII
|
| #undef WCGETVAL
|
| #undef WCISBAD
|
| #undef WCDMAP
|
| #undef WCDMAPVAL
|
|
|
| #define WCCELL2 _WCCELL2 (CODESET)
|
| #define WCCELL4 _WCCELL4 (CODESET)
|
| #define WCROW _WCROW (CODESET)
|
| #define WCROWSIZE _WCROWSIZE(CODESET)
|
| #define WCDMAP _WCDMAP (CODESET)
|
| #define WCDMAPVAL _WCDMAPVAL(CODESET)
|
| #define WCGETVAL _WCGETVAL (CODESET)
|
| #define WCGETASCII(c) (WCDMAPVAL ? WCCELL2[0][c] \
|
| : WCCELL2[WCROW[ROW(c)]][COL(c)])
|
| /*
|
| * Codeset specific MB/WC conversion routine name macros
|
| */
|
| #undef MBLEN
|
| #undef MBTOWC
|
| #undef WCTOMB
|
| #undef MBTOPC
|
| #undef MBSTOWCS
|
| #undef WCSTOMBS
|
| #undef MBSTOPCS
|
| #undef UDCTOMB
|
| #undef UDCFUNC
|
| #undef UTF16ToMB
|
| #undef MBToUTF16
|
|
|
| #define MBLEN _MBLEN (CODESET)
|
| #define MBTOWC _MBTOWC (CODESET)
|
| #define WCTOMB _WCTOMB (CODESET)
|
| #define MBTOPC _MBTOPC (CODESET)
|
| #define PCTOMB _PCTOMB (CODESET)
|
| #define MBSTOWCS _MBSTOWCS(CODESET)
|
| #define WCSTOMBS _WCSTOMBS(CODESET)
|
| #define MBSTOPCS _MBSTOPCS(CODESET)
|
| #define PCSTOMBS _PCSTOMBS(CODESET)
|
| #define UDCTOMB _UDCTOMB (CODESET)
|
| #define UCSTOMB _UCSTOMB (CODESET)
|
| #define UDCFUNC _UDCFUNC (CODESET)
|
| #define UCSFUNC _UCSFUNC (CODESET)
|
| #define UTF16ToMB _UTF16ToMB(CODESET)
|
| #define MBToUTF16 _MBToUTF16(CODESET)
|
|
|
| const static udcfunc_t UDCFUNC = UDCTOMB ;
|
| const static udcfunc_t UCSFUNC = UCSTOMB ;
|
| const static int MBDMAPVAL = MBDMAP ;
|
| const static int WCDMAPVAL = WCDMAP ;
|
|
|
| /*-----------------------[ Internal inline functions ]-----------------------*/
|
|
|
| // #pragma inline (WCGETVAL, MBGETVAL) // BRL & JAC
|
|
|
| /*
|
| * Map a wide character code (UCS) to its multibyte format
|
| */
|
| NA_EIDPROC inline static WChar_t WCGETVAL(WChar_t wc) //JAC
|
| {
|
| int row = ROW(wc) ;
|
| if ((row >= WCROWSIZE) || ((row = WCROW[row]) == UCS2_BAD))
|
| return(BAD) ;
|
| if (WCCELL4 && (row > ROW_MASK))
|
| return((WCCELL4 ? WCCELL4 : dummy_cell4)[MASKROW(row)][COL(wc)]) ;
|
| else
|
| {
|
| WChar_t mb = WCCELL2[row][COL(wc)] ;
|
| return((mb == UCS2_BAD) ? BAD : mb) ;
|
| }
|
| }
|
|
|
| /*
|
| * Map a multibyte index to wide character encoding
|
| */
|
| NA_EIDPROC inline static WChar_t MBGETVAL(int idx) //JAC
|
| {
|
| int row = ROW(idx) ;
|
| if ((row >= MBROWSIZE) || ((row = MBROW[row]) == UCS2_BAD))
|
| return(BAD) ;
|
| if (MBCELL4 && (row > ROW_MASK))
|
| return((MBCELL4 ? MBCELL4 : dummy_cell4)[MASKROW(row)][COL(idx)]) ;
|
| else
|
| {
|
| WChar_t wc = MBCELL2[row][COL(idx)] ; //JAC
|
| return((wc == UCS2_BAD) ? BAD : wc) ;
|
| }
|
| }
|
|
|
| /*--------------------[ Conversion routines start here ]---------------------*/
|
|
|
| #ifdef USING_OPEN_SOURCE_MBLEN
|
| NA_EIDPROC int MBLEN(const char *ts, size_t maxlen, _LC_charmap_t *hdl)
|
| {
|
| uchar_t *s = (uchar_t *)ts ;
|
| #ifdef DONT_NEED_THIS // JAC
|
| int idx, row ;
|
| #else
|
| int idx ;
|
| #endif // DONT_NEED_THIS - JAC
|
|
|
| if ((s == NULL) || (*s == '\0'))
|
| return (0);
|
|
|
| /*
|
| * If maxlen is zero then treat it as an illegal character - same
|
| * as for the non-UCS locale.
|
| */
|
| if (maxlen == (size_t)0)
|
| {
|
| #ifdef DONT_NEED_THIS // JAC
|
| _Seterrno(EILSEQ);
|
| #endif // DONT_NEED_THIS - JAC
|
| return((size_t)-1);
|
| }
|
|
|
| if (isascii(*s))
|
| return(1) ;
|
| idx = MBINDEX(&s, maxlen) ;
|
| if (idx == ERR_INPUT_INCOMPLETE)
|
| return((size_t)-2) ; /* Input incomplete */
|
| else if ((idx == ERR_INVALID_CHAR) ||
|
| (!ISIDXU(idx) && !IS_UCODE(idx) && (MBGETVAL(idx) == BAD)))
|
| {
|
| #ifdef DONT_NEED_THIS // JAC
|
| _Seterrno(EILSEQ) ;
|
| #endif // DONT_NEED_THIS - JAC
|
| return((size_t)-1);
|
| }
|
| return((size_t)(s - (uchar_t *)ts)) ;
|
| }
|
| #endif // USING_OPEN_SOURCE_MBLEN
|
|
|
| NA_EIDPROC size_t MBTOWC(WChar_t *pwc, const char *ts, size_t maxlen, _LC_charmap_t *hdl) // JAC
|
| {
|
| uchar_t *s = (uchar_t *)ts ;
|
| WChar_t wc ; //JAC
|
| #ifdef DONT_NEED_THIS // JAC
|
| int idx, row ;
|
| #else
|
| int idx ;
|
| #endif // DONT_NEED_THIS - JAC
|
|
|
| /*
|
| * If ts == NULL, return non-zero or zero if character encodings
|
| * do or do not have state-dependent encodings
|
| */
|
| if (ts == NULL) return (0); /* No state dependent encodings */
|
|
|
| /*
|
| * If maxlen is zero then treat it as an illegal character - same
|
| * as for the non-UCS locale.
|
| */
|
| if (maxlen == (size_t)0)
|
| {
|
| #ifdef DONT_NEED_THIS // JAC
|
| _Seterrno(EILSEQ);
|
| #endif // DONT_NEED_THIS - JAC
|
| return((size_t)-1);
|
| }
|
|
|
| if (*s == '\0')
|
| {
|
| /* No need to take the hit of a function call */
|
| if (pwc) *pwc = 0;
|
| return (0);
|
| }
|
|
|
| if (isascii(*s))
|
| {
|
| if (pwc) *pwc = MBGETASCII(*s) ;
|
| return(1) ;
|
| }
|
|
|
| #ifdef OUR_CS_GB18030_specific /* Deal with 8431A438 and 9 separately */
|
| if ( (*s == 0x84) && ( *(s+1) == 0x31 ) && (*(s+2) == 0xA4) ) {
|
| if ( *(s+3) == 0x38 ) {
|
| wc = 0x0FFFE;
|
| if (pwc) *pwc = wc ;
|
| return((size_t)(4)) ;
|
| }
|
| if ( *(s+3) == 0x39 ) {
|
| wc = 0x0FFFF;
|
| if (pwc) *pwc = wc ;
|
| return((size_t)(4)) ;
|
| }
|
| }
|
| #endif /* OUR_CS_GB18030_specific */
|
| #if defined(OUR_CS_GBK_specific)
|
| if ( *s == 0x80 ) { /* Handle Euro Sign that GBK defines as 0x80 */
|
| wc = 0x020AC;
|
| if (pwc) *pwc = wc ;
|
| return((size_t)(1)) ; // Return length of input char in bytes.
|
| }
|
| #endif /* OUR_CS_GBK_specific */
|
| #if defined(OUR_CS_GB2312_specific) || defined(OUR_CS_GBK_specific)
|
| if ( *s == 0xA9 ) { /* Disallow range of UDCs - since not UDCs in GB18030 */
|
| if ( (*(s+1) >= 0x89) && (*(s+1) <= 0x95) )
|
| goto err_exit ;
|
| }
|
| if ( *s == 0xFE ) { /* Disallow range of UDCs - since not UDCs in GB18030 */
|
| if ( (*(s+1) >= 0x50) && (*(s+1) <= 0x9F) )
|
| goto err_exit ;
|
| }
|
| if ( *s == 0xA2 ) { /* Disallow 0xA2E3 UDC - since not UDC in GB18030 */
|
| if ( *(s+1) == 0xE3 )
|
| goto err_exit ;
|
| }
|
| #endif /* (OUR_CS_GB2312_specific) || defined(OUR_CS_GBK_specific) */
|
|
|
| idx = MBINDEX(&s, maxlen) ;
|
| if (idx == ERR_INPUT_INCOMPLETE)
|
| return((size_t)-2) ; /* Input incomplete */
|
| else if (idx == ERR_INVALID_CHAR)
|
| goto err_exit ;
|
| else if (IS_UCODE(idx))
|
| wc = GET_UCODE(idx) ;
|
| else if (ISIDXU(idx))
|
| wc = IDXU_UCS(idx) ;
|
| else if ((wc = MBGETVAL(idx)) == BAD)
|
| goto err_exit ;
|
|
|
| #if defined(OUR_CS_GB2312_specific) || defined(OUR_CS_GBK_specific)
|
| /*
|
| * NOTE: Because gb2312 and gb18030 share data tables, it is
|
| * possible that MBGETVAL() returned a gb18030 char. Here
|
| * we explicitly rule those out for gb2312. These rules
|
| * may need changes in the future if more characters are
|
| * added to gb2312.
|
| */
|
| /***************************************
|
| NOTE: Even though the official GB2312 doesn't support the following
|
| 5 characters, we decided to allow them because HP-UX does.
|
| ***************************************/
|
| #if 0
|
| if (wc == 0x0251) goto err_exit ;
|
| if (wc == 0x0261) goto err_exit ;
|
| if (wc == 0x0144) goto err_exit ;
|
| if (wc == 0x0148) goto err_exit ;
|
| if (wc == 0x01F9) goto err_exit ;
|
| #endif
|
| #if defined(OUR_CS_GBK_specific)
|
| /***************************************
|
| NOTE: Even though the official GBK doesn't support User-Defined chars
|
| in the range U+E000 - U+0xE8FF, we decided to allow 0xE000 - 0xE765
|
| because BOTH Java and GB18030 allow them. Java allows a few more,
|
| but don't see how to support those while using GB18030 tables.
|
| ***************************************/
|
| if ( (wc >= 0xE766) && (wc <= 0xE8FF) && (wc != 0xE7C7) ) goto err_exit ;
|
|
|
| #else /* (OUR_CS_GB2312_specific) */
|
| /***************************************
|
| NOTE: Even though the official GB2312 doesn't support 0xA8BC mapping to
|
| U+0xE7C7, we decided to allow it because HP-UX does. The rest of
|
| the characters ruled out by the following 2 lines are not part of
|
| GB2312 and not supported by HP-UX.
|
| ***************************************/
|
| if ( (wc >= 0xE000) && (wc <= 0xFF00) && !(wc == 0xE7C7)) goto err_exit ;
|
| if ( (wc >= 0x2170) && (wc <= 0x2179) ) goto err_exit ;
|
|
|
| /***************************************
|
| NOTE: DEC/OSF code maps 0xA1AA to U+0x2014. So does the SUN mappings for
|
| the GB18030 character set. However, HP-UX, Java, and GNU map it
|
| to U+0x2015. Yuk! HP-China tells us to go with HP-UX's way.
|
| ***************************************/
|
| if ( wc == 0x2014 ) wc = 0x2015;
|
| #endif
|
|
|
| #endif /* defined(OUR_CS_GB2312_specific) || defined(OUR_CS_GBK_specific) */
|
|
|
| if (pwc) *pwc = wc ;
|
| return((size_t)(s - (uchar_t *)ts)) ;
|
|
|
| err_exit:
|
| #ifdef DONT_NEED_THIS // JAC
|
| _Seterrno(EILSEQ) ;
|
| #endif // DONT_NEED_THIS - JAC
|
| return((size_t)-1);
|
| }
|
|
|
| NA_EIDPROC int WCTOMB(char *s, WChar_t wc, _LC_charmap_t *hdl) // JAC
|
| {
|
| WChar_t mb = 0 ;
|
| #ifdef DONT_NEED_THIS // JAC
|
| int row ;
|
| #endif // DONT_NEED_THIS - JAC
|
|
|
| /*
|
| * If s is NULL, return 0
|
| */
|
| if (s == NULL)
|
| return(0) ;
|
|
|
| #ifdef OUR_CS_GB18030_specific /* Deal with 8431A438 and 9 separately */
|
| if ( wc == 0xFFFE ) {
|
| mb = 0x8431A438;
|
| goto success_exit;
|
| }
|
| if ( wc == 0xFFFF ) {
|
| mb = 0x8431A439;
|
| goto success_exit;
|
| }
|
| #endif /* OUR_CS_GB18030_specific */
|
| if (isascii(wc))
|
| mb = WCGETASCII(wc) ;
|
| else if (UCS_UDC(wc) && UDCFUNC)
|
| mb = (*UDCFUNC)(wc) ;
|
| /*
|
| * UDCFUNC may return 0. In this case, look up the mapping table for
|
| * the correct mb value.
|
| */
|
| if (wc && (mb == 0))
|
| {
|
| mb = WCGETVAL(wc) ;
|
| if ((mb == (WChar_t)BAD) && UCSFUNC) //JAC
|
| mb = (*UCSFUNC)(wc) ;
|
| }
|
| #if defined(OUR_CS_GB2312_specific) || defined(OUR_CS_GBK_specific)
|
| /*
|
| * NOTE: Because gb2312 and gb18030 share data tables, it is
|
| * possible that WCGETVAL() returned a gb18030 char. Here
|
| * we explicitly rule those out for gb2312. These rules
|
| * may need changes in the future if more characters are
|
| * added to gb2312.
|
| */
|
| #if defined(OUR_CS_GBK_specific)
|
| if ( wc == 0x20AC ) /* Handle Euro Sign that GBK defines as 0x80 */
|
| mb = (WChar_t)(0x0080);
|
| /***************************************
|
| NOTE: Even though the official GBK doesn't support User-Defined chars
|
| in the range U+E000 - U+0xE8FF, we decided to allow 0xE000 - 0xE765
|
| because BOTH Java and GB18030 allow them. Java allows a few more,
|
| but don't see how to support those while using GB18030 tables.
|
| ***************************************/
|
| if ( (wc >= 0xE766) && (wc <= 0xE8FF) && (wc != 0xE7C7) )
|
| mb = (WChar_t)BAD;
|
|
|
| if ( (mb >= 0xA989) && (mb <= 0xA995) ) /* Disallow range of UDCs - since not UDCs in GB18030 */
|
| mb = (WChar_t)BAD;
|
|
|
| if ( (mb >= 0xFE50) && (mb <= 0xFE9F) ) /* Disallow range of UDCs - since not UDCs in GB18030 */
|
| mb = (WChar_t)BAD;
|
|
|
| if ( mb == 0xA2E3 ) /* Disallow 0xA2E3 UDC - since not UDC in GB18030 */
|
| mb = (WChar_t)BAD;
|
|
|
|
|
| #else /* Specific to GB2312 */
|
| if ( ( (wc >= 0xE000) && (wc <= 0xFF00) && !(wc==0xE7C7) ) ||
|
| ( (wc >= 0x2170) && (wc <= 0x2179) ) )
|
| mb = (WChar_t)BAD;
|
|
|
| if ( (wc == 0x2014) || (wc == 0x2015) )
|
| mb = (WChar_t)(0xA1AA);
|
| #endif
|
|
|
| #endif /* defined(OUR_CS_GB2312_specific) || defined(OUR_CS_GBK_specific) */
|
|
|
| if (mb == (WChar_t)BAD) //JAC
|
| {
|
| #ifdef DONT_NEED_THIS // JAC
|
| _Seterrno(EILSEQ) ;
|
| #endif // DONT_NEED_THIS - JAC
|
| return(-1);
|
| }
|
| if (mb < 0x100)
|
| {
|
| *s = (char)( mb & 0xff ); // JAC
|
| return(1) ;
|
| }
|
| else if (mb < 0x10000)
|
| {
|
| *s++ = (char)( (mb >> 8) & 0xff ); // JAC
|
| *s = (char)( mb & 0xff ); // JAC
|
| return(2) ;
|
| }
|
| else if (mb < 0x1000000)
|
| {
|
| *s++ = (char)( (mb >> 16) & 0xff ); // JAC
|
| *s++ = (char)( (mb >> 8) & 0xff ); // JAC
|
| *s = (char)( mb & 0xff ); // JAC
|
| return(3) ;
|
| }
|
| else
|
| {
|
| #ifdef OUR_CS_GB18030_specific /* Deal with 8431A438 and 9 separately */
|
| success_exit:
|
| #endif /* OUR_CS_GB18030_specific */
|
| *s++ = (char)( (mb >> 24) & 0xff ); // JAC
|
| *s++ = (char)( (mb >> 16) & 0xff ); // JAC
|
| *s++ = (char)( (mb >> 8) & 0xff ); // JAC
|
| *s = (char)( mb & 0xff ); // JAC
|
| return(4) ;
|
| }
|
| }
|
|
|
| #ifdef USING_OPEN_SOURCE_MBSTOWCS
|
| NA_EIDPROC size_t MBSTOWCS(WChar_t *pwcs, const char *ts, size_t n, _LC_charmap_t *hdl) // JAC
|
| {
|
| uchar_t *s = (uchar_t *)ts ;
|
| WChar_t wc ; // JAC
|
| #ifdef DONT_NEED_THIS // JAC
|
| int cnt, idx, row ;
|
| #else
|
| int cnt, idx ;
|
| #endif // DONT_NEED_THIS - JAC
|
|
|
| if (s == NULL)
|
| return(0) ;
|
| /*
|
| * Fix QAR 92292 - UCS-4 locale mbstowcs problem
|
| */
|
| if (*s == '\0')
|
| {
|
| if (pwcs && (n >= 1)) *pwcs = 0 ;
|
| return(0) ;
|
| }
|
|
|
| if (pwcs == NULL)
|
| {
|
| /*
|
| * Count the number of multibyte characters in s
|
| */
|
| for (cnt = 0 ; *s != '\0' ; cnt++)
|
| {
|
| if (isascii(*s))
|
| {
|
| s++ ;
|
| continue ;
|
| }
|
| idx = MBINDEX(&s, MBCURMAX) ;
|
| if (idx < 0)
|
| {
|
| #ifdef DONT_NEED_THIS // JAC
|
| _Seterrno(EILSEQ) ;
|
| #endif // DONT_NEED_THIS - JAC
|
| return((size_t)-1);
|
| }
|
| }
|
| return(cnt) ;
|
| }
|
|
|
| for (cnt = 0 ; (*s != '\0') && ((size_t)cnt < n) ; cnt++) // (size_t) added - JAC
|
| {
|
| if (isascii(*s))
|
| {
|
| *pwcs++ = MBGETASCII(*s) ;
|
| s++ ;
|
| continue ;
|
| }
|
| idx = MBINDEX(&s, MBCURMAX) ;
|
| if (idx < 0)
|
| {
|
| #ifdef DONT_NEED_THIS // JAC
|
| _Seterrno(EILSEQ) ;
|
| #endif // DONT_NEED_THIS - JAC
|
| return((size_t)-1);
|
| }
|
| if (IS_UCODE(idx))
|
| wc = GET_UCODE(idx) ;
|
| else if (ISIDXU(idx))
|
| wc = IDXU_UCS(idx) ;
|
| else if ((wc = MBGETVAL(idx)) == BAD)
|
| {
|
| #ifdef DONT_NEED_THIS // JAC
|
| _Seterrno(EILSEQ) ;
|
| #endif // DONT_NEED_THIS - JAC
|
| return((size_t)-1);
|
| }
|
| *pwcs++ = wc ;
|
| }
|
| if ((size_t)cnt < n) // (size_t) added - JAC
|
| *pwcs = 0 ; /* Terminate wctype string */
|
| return(cnt) ;
|
| }
|
| #endif // USING_OPEN_SOURCE_MBSTOWCS
|
|
|
| #ifdef USING_OPEN_SOURCE_WCSTOMBS
|
| NA_EIDPROC size_t WCSTOMBS(char *s, const WChar_t *pwcs, size_t n, _LC_charmap_t *hdl) // JAC
|
| {
|
| #ifdef DONT_NEED_THIS // JAC
|
| int cnt, len, row ;
|
| #else
|
| int cnt, len ;
|
| #endif // DONT_NEED_THIS - JAC
|
| WChar_t mb, wc ; // JAC
|
|
|
| if (pwcs == NULL)
|
| {
|
| #ifdef DONT_NEED_THIS // JAC
|
| _Seterrno(EILSEQ) ;
|
| #endif // DONT_NEED_THIS - JAC
|
| return((size_t)-1);
|
| }
|
|
|
| for (cnt = 0 ; (wc = *pwcs) != 0 ; cnt += len, pwcs++)
|
| {
|
| mb = 0 ;
|
| if (isascii(wc))
|
| mb = WCGETASCII(wc) ;
|
| else if (UCS_UDC(wc) && UDCFUNC)
|
| mb = (*UDCFUNC)(wc) ;
|
| /*
|
| * UDCFUNC may return 0. In this case, look up the mapping table for
|
| * the correct mb value.
|
| */
|
| if (wc && (mb == 0))
|
| {
|
| mb = WCGETVAL(wc) ;
|
| if ((mb == (WChar_t)BAD) && UCSFUNC) // JAC
|
| mb = (*UCSFUNC)(wc) ;
|
| }
|
|
|
| if (mb == (WChar_t)BAD) // JAC
|
| {
|
| #ifdef DONT_NEED_THIS // JAC
|
| _Seterrno(EILSEQ) ;
|
| #endif // DONT_NEED_THIS - JAC
|
| return((size_t)-1);
|
| }
|
| if (mb < 0x0000100) len = 1 ;
|
| else if (mb < 0x0010000) len = 2 ;
|
| else if (mb < 0x1000000) len = 3 ;
|
| else len = 4 ;
|
|
|
| /*
|
| * Write out the multibyte character if s is defined
|
| */
|
| if (s)
|
| {
|
| if ((size_t)(cnt + len) > n) // (size_t) added - JAC
|
| break ; /* Cannot stored more bytes */
|
| switch (len)
|
| {
|
| case 4: *s++ = (mb >> 24) & 0xff ;
|
| case 3: *s++ = (mb >> 16) & 0xff ;
|
| case 2: *s++ = (mb >> 8) & 0xff ;
|
| case 1: *s++ = mb & 0xff ;
|
| }
|
| }
|
| }
|
| if (s && ((size_t)cnt < n)) // (size_t) added - JAC
|
| *s = '\0' ; /* Terminate the string */
|
| return(cnt) ;
|
| }
|
| #endif // USING_OPEN_SOURCE_WCSTOMBS
|
|
|
| #ifdef USING_OPEN_SOURCE_MBTOPC
|
| NA_EIDPROC size_t
|
| MBTOPC(WChar_t *pwc, char *ts, size_t maxlen, int *err, _LC_charmap_t *hdl) // JAC
|
| {
|
| uchar_t *s=(uchar_t *)ts ; /* Better to work with unsigned char. */
|
| WChar_t wc ; // JAC
|
| #ifdef DONT_NEED_THIS // JAC
|
| int idx, row, len ;
|
| #else
|
| int idx, len ;
|
| #endif // DONT_NEED_THIS - JAC
|
|
|
| /*
|
| * This is very similar to MBTOWC. It has an additional parameter *err.
|
| * If the character is successfully converted return the number of
|
| * bytes in the multibyte character and set *err to 0. If not converted
|
| * due to maxlen too small return 0 and set *err to the no of bytes
|
| * required to convert. If an illegal character return 0, set *err to -1.
|
| */
|
|
|
| *err = 0 ;
|
| /*
|
| * If s is NULL, return 0
|
| */
|
| if (s == NULL)
|
| return(0);
|
|
|
| if (isascii(*s))
|
| {
|
| wc = MBGETASCII(*s) ;
|
| len = 1 ;
|
| }
|
| else
|
| {
|
| idx = MBINDEX (&s, maxlen) ;
|
| if (idx == ERR_INPUT_INCOMPLETE)
|
| {
|
| *err = MBCURMAX ; /* Ask for the maximum MB length */
|
| return(0) ;
|
| }
|
| if (idx < 0)
|
| {
|
| *err = -1 ;
|
| return(0) ; /* Invalid character */
|
| }
|
| if (IS_UCODE(idx))
|
| wc = GET_UCODE(idx) ;
|
| else if (ISIDXU(idx))
|
| wc = IDXU_UCS(idx) ;
|
| else if ((wc = MBGETVAL(idx)) == BAD)
|
| {
|
| *err = -1 ;
|
| return(0) ; /* Invalid character */
|
| }
|
| len = s - (uchar_t *)ts ;
|
| }
|
|
|
| if ((size_t)len > maxlen) // (size_t) added - JAC
|
| {
|
| *err = len ;
|
| return(0) ; /* Not enough buffer */
|
| }
|
| if (pwc) *pwc = wc ;
|
| return((size_t)len);
|
| }
|
| #endif // USING_OPEN_SOURCE_MBTOPC
|
|
|
| #ifdef USING_OPEN_SOURCE_MBSTOPCS
|
| NA_EIDPROC size_t MBSTOPCS(WChar_t *pwcs, size_t pwcs_len, const char *s, size_t s_len, /* JAC */
|
| int stopchr, char **endptr, int *err, _LC_charmap_t *hdl)
|
| {
|
| int pwcs_cnt = 0 ;
|
| #ifdef DONT_NEED_THIS // JAC
|
| int len ;
|
| #endif // DONT_NEED_THIS - JAC
|
| uchar_t *us = (uchar_t *)s ;
|
|
|
| /*
|
| * err is 0 if everything works
|
| */
|
| *err = 0;
|
|
|
| /*
|
| * Stop the processing if there is no more room for process code
|
| * or all the characters in s have been processed.
|
| */
|
| while (((size_t)pwcs_cnt < pwcs_len) && (s_len > 0)) // (size_t) added - JAC
|
| {
|
| /*
|
| * If we hit stopchr in s, Set endpointer to the character after
|
| * the stopchr and break out of the while
|
| */
|
| if (*us == (char) stopchr)
|
| {
|
| us++ ;
|
| break;
|
| }
|
|
|
| /*
|
| * Convert s to process code and increment s by the number
|
| * of bytes. If the conversion failed, set the endpointer
|
| * the the start of the character that failed, and
|
| * break out of the while.
|
| */
|
| if (isascii(*us))
|
| {
|
| pwcs[pwcs_cnt] = MBGETASCII(*us) ;
|
| us++, s_len-- ;
|
| }
|
| else
|
| {
|
| uchar_t *us_old = us ;
|
| WChar_t wc ; // JAC
|
| #ifdef DONT_NEED_THIS // JAC
|
| int idx, row ;
|
| #else
|
| int idx ;
|
| #endif // DONT_NEED_THIS - JAC
|
|
|
| idx = MBINDEX(&us, s_len) ;
|
| if (idx < 0)
|
| {
|
| *err = -1 ;
|
| break ; /* Invalid character */
|
| }
|
| if (IS_UCODE(idx))
|
| wc = GET_UCODE(idx) ;
|
| else if (ISIDXU(idx))
|
| wc = IDXU_UCS(idx) ;
|
| else if ((wc = MBGETVAL(idx)) == BAD)
|
| {
|
| *err = -1 ;
|
| break ; /* Invalid character */
|
| }
|
| if ((s_len -= us - us_old) < 0)
|
| {
|
| *err = -(int)s_len ; /* Need more buffer */ // (int) added - JAC
|
| break ;
|
| }
|
| pwcs[pwcs_cnt] = wc ;
|
| }
|
|
|
| /*
|
| * Increment the process code counter
|
| */
|
| pwcs_cnt++;
|
| }
|
| *endptr = (char *)us ; /* Set the end pointer */
|
| return(pwcs_cnt) ;
|
| }
|
| #endif // USING_OPEN_SOURCE_MBSTOPCS
|
|
|
| #endif /* USE_OUR_MB_WC_DATA_TABLES */
|