blob: a3ecdbd596115939d9c12989df5f673e3731e4b7 [file] [log] [blame]
/**********************************************************************
// @@@ START COPYRIGHT @@@
//
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
//
// @@@ END COPYRIGHT @@@
**********************************************************************/
/*
* HISTORY
* $Log: mb_lconv.c,v $
* Revision 1.1.10.3 2002/03/11 18:48:04
* Fix QAR 02292 - mbstowcs() problem in UCS-4 locales.
*
* Revision 1.1.10.2 2002/01/28 15:20:17
* Fix wctomb() & wcstombs() problem with UDC codepoints.
*
* Revision 1.1.10.1 2001/12/07 15:37:54
* Remove WCBAD and MBBAD and use only BAD to avoid mixing up.
* Checking for UCODE first before checking for UDC index.
* Introduce a dummy_cell4 table to fix locale compilation failure in
* some @ucs4 locales.
* Change conversion table access methods & add support for HKSCS & cp949.
*
* Revision 1.1.6.1 2000/10/16 18:44:56
* COSIX.Zulu to Yankee merge for GB18030 support.
*
* Revision 1.1.4.3 2000/10/09 21:27:09
* Fix 82769 by setting *err to the MB_CUR_MAX for incomplete MB sequence.
*
* Revision 1.1.4.2 2000/10/04 20:43:40
* Fix 82696 by checking for -1 before calling the IS_UCODE() macro.
*
* Revision 1.1.4.1 2000/08/07 14:33:50
* Support the new GB18030 Chinese character set.
* Rename some macros to have a more consistent naming convention.
*
* Revision 1.1.2.1 2000/01/13 20:25:49
* Multibyte methods for @ucs4 locales.
*
* Revision 1.1.3.2 1996/11/22 17:02:38
* $EndLog$
*
* Multibyte UCS-4 locale conversion module containing templates for
* the following locale conversion routines:
*
* - mblen()
* - mbtowc()
* - wctomb()
* - mbtopc()
* - pctomb()
* - mbstowcs()
* - wcstombs()
* - mbstopcs()
* - pcstombs()
*
* This file may be included multiple times to generate different locale
* conversion routines. The macro "CODESET" is assumed to be predefined.
* It contains the name of the codeset to be supported.
*/
#ifdef USE_OUR_MB_WC_DATA_TABLES
/*
// This source file contains low-level routines for converting from
// a character set to/from Unicode (UCS4). These routines are our
// replacements for the corresponding OSF routines and were created
// because the DEC/OSF conversion routines/conversion tables were
// highly dependent on use of the Private Use Area (0xE000 - 0xF7FF)
// that unicode.org has reserved for vendor usage. We don't want to
// make heavy use of the P.U.A. (at least not yet) because we want to
// recognize UCS4 values produced by Java or by HP-UX. Consequently,
// it is better for us to stick with defacto standard Unicode values
// for every character we can.
*/
#define _LC_charmap_t int // JAC
/*
* Our_mbtowc_big5_ucs4() - routine to convert from BIG5 multi-byte
* character string to UCS4 (a.k.a. UCS32, a.k.a. 4-byte Unicode).
*
* Arguments: pwc - pointer to target array of 4-byte UCS4 output chars.
* ts - pointer to input array of multi-byte BIG5 chars.
* maxlen - actual length (in bytes) of input array.
* hdl - dummy ptr to _LC_charmap_t - needed only to
* make our routine take the same arguments
* as the __mbtowc_<cs>_ucs4() routines that we
* generated by the MBTOWC macro.
*
* NOTE: It is the caller's responsibility to ensure output array is
* big enough.
*/
#define Min_BIG5_chr 0xA140 /* Min for using lookup table */
#define Max_BIG5_chr 0xF9FF /* Max for using lookup table */
#define BAD_UCS_VAL 0x0000FFFF
size_t Our_mbtowc_big5_ucs4(WChar_t *pwc, const char *ts, size_t maxlen,
_LC_charmap_t *hdl)
{
uchar_t *s = (uchar_t *)ts ;
WChar_t wc ;
int idx ;
uchar_t chr1;
if (ts == NULL) return (0); /* If no data to convert */
if (maxlen == (size_t)0)
return((size_t)-1);
if ( (chr1 = *s) == '\0') {
if (pwc) *pwc = 0; /* Avoid all function calls */
return (0);
}
if (isascii(chr1))
{
if (pwc) *pwc = (chr1) ;
return(1) ;
}
else {
if ( maxlen < 2 ) goto err_exit;
idx = ( (chr1) << 8 ) | ( *(s+1) ) ;
if ( (idx >= Min_BIG5_chr) && (idx <= Max_BIG5_chr) ) {
wc = Our_MS_BIG5_tableF[idx - Min_BIG5_chr];
if ( wc == BAD_UCS_VAL )
goto err_exit;
s += 2;
}
else goto err_exit;
}
if (pwc) *pwc = wc ;
return((size_t)(s - (uchar_t *)ts)) ;
err_exit:
return((size_t)-1);
}
/*
* Our_mbtowc_sjis_ucs4() - routine to convert from SJIS multi-byte
* character string to UCS4 (a.k.a. UCS32, a.k.a. 4-byte Unicode).
*
* Arguments: pwc - pointer to target array of 4-byte UCS4 output chars.
* ts - pointer to input array of multi-byte SJIS chars.
* maxlen - actual length (in bytes) of input array.
* hdl - dummy ptr to _LC_charmap_t - needed only to
* make our routine take the same arguments
* as the __mbtowc_<cs>_ucs4() routines that we
* generated by the MBTOWC macro.
*
* NOTE: It is the caller's responsibility to ensure output array is
* big enough.
*/
#define Min_SJIS_chr 0x8140 /* Min for using lookup table */
#define Max_SJIS_chr 0xFC4B /* Max for using lookup table */
size_t Our_mbtowc_sjis_ucs4(WChar_t *pwc, const char *ts, size_t maxlen,
_LC_charmap_t *hdl)
{
uchar_t *s = (uchar_t *)ts ;
WChar_t wc ;
int idx ;
uchar_t chr1;
if (ts == NULL) return (0); /* If no data to convert */
if (maxlen == (size_t)0)
return((size_t)-1);
if ( (chr1 = *s) == '\0') {
if (pwc) *pwc = 0; /* Avoid all function calls */
return (0);
}
if (isascii(chr1))
{
if (pwc) *pwc = (chr1) ;
return(1) ;
}
if ( (chr1 >= 0xA1 ) && (chr1 <= 0xDF) ) { /* Handle these algorithmically */
wc = 0xFF61 + chr1 - 0xA1;
s += 1;
}
else if ( (chr1 >= 0xF0 ) && (chr1 <= 0xF9) ) { /* Handle UDC algorithmically */
if ( maxlen < 2 ) goto err_exit;
idx = ( (chr1) << 8 ) | ( *(s+1) ) ;
if ( (idx >= 0xF040) && (idx <= 0xF9FC) ) { /* User-defined character range */
if ( (idx & 0xFF) < 0x40 ) goto err_exit; /* No such char */
if ( (idx & 0xFF) > 0xFC ) goto err_exit; /* No such char */
if ( (idx & 0xFF) == 0x7F ) goto err_exit; /* No such char */
wc = ((idx & 0x0F00) >> 8) * 188 + ((idx & 0xFF) - 0x40) + 0xE000;
if ( (idx & 0xFF) > 0x7F ) wc -= 1;
s += 2;
}
else goto err_exit;
}
else {
if ( maxlen < 2 ) goto err_exit;
idx = ( (chr1) << 8 ) | ( *(s+1) ) ;
if ( (idx >= Min_SJIS_chr) && (idx <= Max_SJIS_chr) ) {
wc = Our_MS_sjis_tableF[idx - Min_SJIS_chr];
if ( wc == BAD_UCS_VAL )
goto err_exit;
s += 2;
}
else goto err_exit;
}
if (pwc) *pwc = wc ;
return((size_t)(s - (uchar_t *)ts)) ;
err_exit:
return((size_t)-1);
}
/*
* Our_mbtowc_cp949_ucs4() - routine to convert from KSC multi-byte
* character string to UCS4 (a.k.a. UCS32, a.k.a. 4-byte Unicode).
*
* Arguments: pwc - pointer to target array of 4-byte UCS4 output chars.
* ts - pointer to input array of multi-byte KSC chars.
* maxlen - actual length (in bytes) of input array.
* hdl - dummy ptr to _LC_charmap_t - needed only to
* make our routine take the same arguments
* as the __mbtowc_<cs>_ucs4() routines that we
* generated by the MBTOWC macro.
*
* NOTES: It is the caller's responsibility to ensure output array is
* big enough. KSC is short for KS-code and is a Korean
* character set. cp949 is short for CodePage 949 and that
* is MicroSoft's codepage for the Korean character set.
*/
#define Min_KSC_chr 0x8141 /* Min for using lookup table */
#define Max_KSC_chr 0xFEFE /* Max for using lookup table */
size_t Our_mbtowc_cp949_ucs4(WChar_t *pwc, const char *ts, size_t maxlen,
_LC_charmap_t *hdl)
{
uchar_t *s = (uchar_t *)ts ;
WChar_t wc ;
int idx ;
uchar_t chr1;
if (ts == NULL) return (0); /* If no data to convert */
if (maxlen == (size_t)0)
return((size_t)-1);
if ( (chr1 = *s) == '\0') {
if (pwc) *pwc = 0; /* Avoid all function calls */
return (0);
}
if (isascii(chr1))
{
if (pwc) *pwc = (chr1) ;
return(1) ;
}
if ( maxlen < 2 ) goto err_exit;
idx = ( (chr1) << 8 ) | ( *(s+1) ) ;
if ( (idx >= 0xC9A1) && (idx <= 0xC9FE) ) { /* Handle UDC algorithmically */
wc = idx - 0xC9A1 + 0xE000;
s += 2;
}
else if ( (idx >= 0xFEA1) && (idx <= 0xFEFE) ) { /* Handle UDC algorithmically */
wc = idx - 0xFEA1 + 0xE05E;
s += 2;
}
else if ( (idx >= Min_KSC_chr) && (idx <= Max_KSC_chr) ) {
wc = Our_MS_KSC_tableF[idx - Min_KSC_chr];
if ( wc == BAD_UCS_VAL )
goto err_exit;
s += 2;
}
else goto err_exit;
if (pwc) *pwc = wc ;
return((size_t)(s - (uchar_t *)ts)) ;
err_exit:
return((size_t)-1);
}
/*
* Our_mbtowc_eucjp_ucs4() - routine to convert from EUC-JP multi-byte
* character string to UCS4 (a.k.a. UCS32, a.k.a. 4-byte Unicode).
*
* Arguments: pwc - pointer to target array of 4-byte UCS4 output chars.
* ts - pointer to input array of multi-byte EUC-JP chars.
* maxlen - actual length (in bytes) of input array.
* hdl - dummy ptr to _LC_charmap_t - needed only to
* make our routine take the same arguments
* as the __mbtowc_<cs>_ucs4() routines that we
* generated by the MBTOWC macro.
*
* NOTE: It is the caller's responsibility to ensure output array is
* big enough.
*/
#define EUCJP_ROW_LEN (0xFE - 0xA1 + 1)
#define EUCJP_NUM_ROWS (0xFE - 0xA1 +1)
size_t Our_mbtowc_eucjp_ucs4(WChar_t *pwc, const char *ts, size_t maxlen,
_LC_charmap_t *hdl)
{
uchar_t *s = (uchar_t *)ts ;
WChar_t wc ;
int idx ;
uchar_t chr1;
if (ts == NULL) return (0); /* If no data to convert */
if (maxlen == (size_t)0)
return((size_t)-1);
if ( (chr1 = *s) == '\0') {
if (pwc) *pwc = 0; /* Avoid all function calls */
return (0);
}
if (isascii(chr1))
{
if (pwc) *pwc = (chr1) ;
return(1) ;
}
if ( chr1 == 0x8E ) { /* If this is first byte of chars 0x8EA1 - 0x8EDF */
if ( maxlen < 2 ) goto err_exit;
unsigned char ch2 = *(s+1);
if ( (ch2 >= 0xA1) && (ch2 <= 0xDF) ) {
wc = *(s+1) + 0xFF61 - 0xA1; /* Algorithmically convert! */
s += 2;
}
else goto err_exit;
}
else if ( chr1 == 0x8F ) { /* If this is first byte of a 3-byte char */
if ( maxlen < 3 ) goto err_exit;
idx = ( *(s+1) << 8 ) | *(s+2) ;
if ( (idx >= 0xA1A1) && (idx <= 0xFEFE) &&
((idx & 0xFF) >= 0xA1 ) && ((idx & 0xFF) <= 0xFE )) {
idx = ((idx >> 8)-0xA1)*EUCJP_ROW_LEN + ((idx&0xFF)-0xA1);
wc = Our_eucJP_tableF8F[idx];
if ( wc == BAD_UCS_VAL )
goto err_exit;
s += 3;
}
else goto err_exit;
}
else { /* Must be a regular 2-byte char */
if ( maxlen < 2 ) goto err_exit;
idx = ( chr1 << 8 ) | *(s+1) ;
if ( (idx >= 0xA1A1) && (idx <= 0xFEFE) &&
((idx & 0xFF) >= 0xA1 ) && ((idx & 0xFF) <= 0xFE )) {
idx = ((idx >> 8)-0xA1)*EUCJP_ROW_LEN + ((idx&0xFF)-0xA1);
wc = Our_eucJP_tableF00[idx];
if ( wc == BAD_UCS_VAL )
goto err_exit;
s += 2;
}
else goto err_exit;
}
if (pwc) *pwc = wc ;
return((size_t)(s - (uchar_t *)ts)) ;
err_exit:
return((size_t)-1);
}
/*
* Our_wctomb_big5_ucs4() - routine to convert from a UCS4 character
* to a multi-byte BIG5 character.
*
* Arguments: s - pointer to target output string.
* wc - The UCS4 character to convert
* hdl - dummy ptr to _LC_charmap_t - needed only to
* make our routine take the same arguments
* as the __mbtowc_<cs>_ucs4() routines that we
* generated by the MBTOWC macro.
*
* NOTE: It is the caller's responsibility to ensure output array is
* big enough.
*/
#define Max_BIG5_UCS_val (0xFFFE)
int Our_wctomb_big5_ucs4(char *s, WChar_t wc, _LC_charmap_t *hdl)
{
WChar_t mb = 0 ;
if (s == NULL)
return(0) ;
if (isascii(wc))
mb = (wc) ;
else { /* if not an ASCII char */
if ( wc <= Max_BIG5_UCS_val ) {
mb = Our_MS_BIG5_tableB[wc];
if ( mb == 0xFFFF )
mb = (WChar_t)BAD;
}
else mb = (WChar_t)BAD;
}
if (mb == (WChar_t)BAD)
return(-1);
if (mb < 0x100)
{
*s = (char)( mb & 0xff );
return(1) ;
}
else /* Must be a 2-byte character ... BIG5 has none 3-byte or longer */
{
*s++ = (char)( (mb >> 8) & 0xff );
*s = (char)( mb & 0xff );
return(2) ;
}
}
/*
* Our_wctomb_cp949_ucs4() - routine to convert from a UCS4 character
* to a multi-byte KSC character.
*
* Arguments: s - pointer to target output string.
* wc - The UCS4 character to convert
* hdl - dummy ptr to _LC_charmap_t - needed only to
* make our routine take the same arguments
* as the __mbtowc_<cs>_ucs4() routines that we
* generated by the MBTOWC macro.
*
* NOTE: It is the caller's responsibility to ensure output array is
* big enough.
*/
#define Max_KSC_UCS_val (0xFFFE)
int Our_wctomb_cp949_ucs4(char *s, WChar_t wc, _LC_charmap_t *hdl)
{
WChar_t mb = 0 ;
if (s == NULL)
return(0) ;
if (isascii(wc))
mb = (wc) ;
else { /* if not an ASCII char */
if ( (wc >= 0xE000) && (wc <= 0xE05D) ) { /* Handle UDC algorithmically */
mb = wc - 0xE000 + 0xC9A1;
}
else if ( (wc >= 0xE05E) && (wc <= 0xE0BB) ) { /* Handle UDC algorithmically */
mb = wc - 0xE05E + 0xFEA1;
}
else if ( wc <= Max_KSC_UCS_val ) {
mb = Our_MS_KSC_tableB[wc];
if ( mb == 0xFFFF )
mb = (WChar_t)BAD;
}
else mb = (WChar_t)BAD;
}
if (mb == (WChar_t)BAD)
return(-1);
if (mb < 0x100)
{
*s = (char)( mb & 0xff );
return(1) ;
}
else /* Must be a 2-byte character ... KSC has none 3-byte or longer */
{
*s++ = (char)( (mb >> 8) & 0xff );
*s = (char)( mb & 0xff );
return(2) ;
}
}
/*
* Our_wctomb_sjis_ucs4() - routine to convert from a UCS4 character
* to a multi-byte SJIS character.
*
* Arguments: s - pointer to target output string.
* wc - The UCS4 character to convert
* hdl - dummy ptr to _LC_charmap_t - needed only to
* make our routine take the same arguments
* as the __mbtowc_<cs>_ucs4() routines that we
* generated by the MBTOWC macro.
*
* NOTE: It is the caller's responsibility to ensure output array is
* big enough.
*/
#define Max_SJIS_UCS_val (0xFFFE)
int Our_wctomb_sjis_ucs4(char *s, WChar_t wc, _LC_charmap_t *hdl)
{
WChar_t mb = 0 ;
if (s == NULL)
return(0) ;
if (isascii(wc))
mb = (wc) ;
else if ( (wc >= 0xE000) && ( wc <= 0xE757 ) ) { /* if user-defined char */
mb = 0xF040;
mb += ( (wc - 0xE000) / 188 ) * 0x100; /* Get 2nd hex digit right */
mb += ( (wc - 0xE000) % 188 ) ; /* Get last 2 hex digits right */
if ( (mb & 0xFF) >= 0x7F ) mb += 1; /* SJIS ending with 0x7F not used */
}
else { /* If not an ASCII char & not UDC char */
if ( wc <= Max_SJIS_UCS_val ) {
mb = Our_MS_sjis_tableB[wc];
if ( mb == 0xFFFF )
mb = (WChar_t)BAD;
}
else mb = (WChar_t)BAD;
}
if (mb == (WChar_t)BAD)
return(-1);
if (mb < 0x100)
{
*s = (char)( mb & 0xff );
return(1) ;
}
else /* Must be a 2-byte character ... SJIS has none 3-byte or longer */
{
*s++ = (char)( (mb >> 8) & 0xff );
*s = (char)( mb & 0xff );
return(2) ;
}
}
/*
* Our_wctomb_eucjp_ucs4() - routine to convert from a UCS4 character
* to a multi-byte EUC-JP character.
*
* Arguments: s - pointer to target output string.
* wc - The UCS4 character to convert
* hdl - dummy ptr to _LC_charmap_t - needed only to
* make our routine take the same arguments
* as the __mbtowc_<cs>_ucs4() routines that we
* generated by the MBTOWC macro.
*
* NOTE: It is the caller's responsibility to ensure output array is
* big enough.
*/
int Our_wctomb_eucjp_ucs4(char *s, WChar_t wc, _LC_charmap_t *hdl)
{
WChar_t mb = 0 ;
if (s == NULL)
return(0) ;
if (isascii(wc))
mb = (wc) ;
else if ( (wc >= 0xFF61) && (wc <=0xFF9F) )
mb = wc - 0xFF61 + 0x8EA1; /* Algorithmically convert! */
else {
mb = Our_eucJP_tableB[wc];
if ( mb == 0x0000FFFF )
mb = (WChar_t)BAD;
/*
* If Flag bit for this Unicode Value says to prepend 0x8F
* then do so.
*/
if ( Our_eucJP_tableB_8F_FB[wc/32] & 1 << (31-(wc%32)) )
mb |= 0x8F0000;
}
if (mb == (WChar_t)BAD)
return(-1);
if (mb < 0x100)
{
*s = (char)( mb & 0xff );
return(1) ;
}
else if (mb < 0x10000)
{
*s++ = (char)( (mb >> 8) & 0xff );
*s = (char)( mb & 0xff );
return(2) ;
}
else /* Must be a 3-byte character ... EUCJP has none 4-byte or longer */
{
*s++ = (char)( (mb >> 16) & 0xff );
*s++ = (char)( (mb >> 8) & 0xff );
*s = (char)( mb & 0xff );
return(3) ;
}
}
#else /* USE_OUR_MB_WC_DATA_TABLES */
#ifndef MB_LCONV_C
#define MB_LCONV_C 1
/*
* Generic macros to access the MB to WC row and cell tables
* Double redirection is needed here to fully resolve the macro paramter
* correctly.
*/
#define __MBCELL2(cs) _ ## cs ## _to_ucs_cell2
#define __MBCELL4(cs) _ ## cs ## _to_ucs_cell4
#define __MBROW(cs) _ ## cs ## _to_ucs_row
#define __MBINDEX(cs) __ ## cs ## _index
#define __MBDMAP(cs) cs ## _to_ucs_DMAP
#define __MBDMAPVAL(cs) cs ## _to_ucs_dmap
#define _MBCELL2(cs) __MBCELL2(cs)
#define _MBCELL4(cs) __MBCELL4(cs)
#define _MBROW(cs) __MBROW (cs)
#define _MBROWSIZE(cs) (sizeof(__MBROW(cs))/sizeof(__MBROW(cs)[0]))
#define _MBINDEX(cs) __MBINDEX (cs)
#define _MBDMAP(cs) __MBDMAP (cs)
#define _MBDMAPVAL(cs) __MBDMAPVAL(cs)
/*
* Generic macros to access the WC to MB row and cell tables
*/
#define __WCCELL2(cs) _ucs_to_ ## cs ## _cell2
#define __WCCELL4(cs) _ucs_to_ ## cs ## _cell4
#define __WCROW(cs) _ucs_to_ ## cs ## _row
#define __WCDMAP(cs) ucs_to_ ## cs ## _DMAP
#define __WCDMAPVAL(cs) ucs_to_ ## cs ## _dmap
#define _WCCELL2(cs) __WCCELL2(cs)
#define _WCCELL4(cs) __WCCELL4(cs)
#define _WCROW(cs) __WCROW (cs)
#define _WCROWSIZE(cs) (sizeof(__WCROW(cs))/sizeof(__WCROW(cs)[0]))
#define _WCDMAP(cs) __WCDMAP (cs)
#define _WCDMAPVAL(cs) __WCDMAPVAL(cs)
/*
* Generic MB/WC conversion routine name macros
*/
#define __MBLEN(cs) __mblen_ ## cs ## _ucs4
#define __MBTOWC(cs) __mbtowc_ ## cs ## _ucs4
#define __WCTOMB(cs) __wctomb_ ## cs ## _ucs4
#define __MBTOPC(cs) __mbtopc_ ## cs ## _ucs4
#define __MBSTOWCS(cs) __mbstowcs_ ## cs ## _ucs4
#define __WCSTOMBS(cs) __wcstombs_ ## cs ## _ucs4
#define __MBSTOPCS(cs) __mbstopcs_ ## cs ## _ucs4
#define __UDCTOMB(cs) __UDC_to_ ## cs
#define __UCSTOMB(cs) __UCS_to_ ## cs
#define __UDCFUNC(cs) __UDC_to_ ## cs ## _func
#define __UCSFUNC(cs) __UCS_to_ ## cs ## _func
#define __WCGETVAL(cs) __wcgetval_ ## cs
#define __MBGETVAL(cs) __mbgetval_ ## cs
#define __UTF16ToMB(cs) UTF16To_ ## cs
#define __MBToUTF16(cs) cs ## _ToUTF16
#define _MBLEN(cs) __MBLEN (cs)
#define _MBTOWC(cs) __MBTOWC (cs)
#define _WCTOMB(cs) __WCTOMB (cs)
#define _MBTOPC(cs) __MBTOPC (cs)
#define _MBSTOWCS(cs) __MBSTOWCS(cs)
#define _WCSTOMBS(cs) __WCSTOMBS(cs)
#define _MBSTOPCS(cs) __MBSTOPCS(cs)
#define _UDCTOMB(cs) __UDCTOMB (cs)
#define _UCSTOMB(cs) __UCSTOMB (cs)
#define _UDCFUNC(cs) __UDCFUNC (cs)
#define _UCSFUNC(cs) __UCSFUNC (cs)
#define _WCGETVAL(cs) __WCGETVAL(cs)
#define _MBGETVAL(cs) __MBGETVAL(cs)
#define _UTF16ToMB(cs) __UTF16ToMB(cs)
#define _MBToUTF16(cs) __MBToUTF16(cs)
/*
* Miscellaneous macros
*/
#define __MBCURMAX(cs) MBCURMAX_ ## cs
#define _MBCURMAX(cs) __MBCURMAX(cs)
/*
* Character set MB_LEN_MAX macros
*/
#define MBCURMAX_big5 2
#define MBCURMAX_hkscs 2
#define MBCURMAX_cp949 2
#define MBCURMAX_dechanyu 4
#define MBCURMAX_dechanzi 2
#define MBCURMAX_deckanji 2
#define MBCURMAX_deckorean 2
#define MBCURMAX_eucjp 3
#define MBCURMAX_euckr 2
#define MBCURMAX_euctw 4
#define MBCURMAX_gb18030 4
#define MBCURMAX_gbk 2
#define MBCURMAX_sdeckanji 3
#define MBCURMAX_sjis 2
/*
* Dummy pctomb() and pcstombs() routines
*/
int __pctomb_mb_ucs4 () { return(-1) ; }
int __pcstombs_mb_ucs4() { return(-1) ; }
/*
* Dummy cell4_t table
*/
static cell4_t dummy_cell4[1] = { 0x0000 }; // initialize it
#endif
/*
* Codeset specific macros to access the MB to WC row and cell tables
*/
#undef MBCELL2
#undef MBCELL4
#undef MBROW
#undef MBROWSIZE
#undef MBINDEX
#undef MBGETASCII
#undef MBGETVAL
#undef MBDMAP
#undef MBDMAPVAL
#undef MBCURMAX
#define MBCURMAX _MBCURMAX (CODESET)
#define MBCELL2 _MBCELL2 (CODESET)
#define MBCELL4 _MBCELL4 (CODESET)
#define MBROW _MBROW (CODESET)
#define MBROWSIZE _MBROWSIZE(CODESET)
#define MBDMAP _MBDMAP (CODESET)
#define MBDMAPVAL _MBDMAPVAL(CODESET)
#define MBGETVAL _MBGETVAL (CODESET)
#define MBINDEX(x,y) _MBINDEX (CODESET)(NULL,x,y)
#define MBGETASCII(c) (MBDMAPVAL ? MBCELL2[0][c] \
: MBCELL2[MBROW[ROW(c)]][COL(c)])
/*
* Codeset specific macros to access the WC to MB row and cell tables
*/
#undef WCCELL2
#undef WCCELL4
#undef WCROW
#undef WCROWSIZE
#undef WCGETASCII
#undef WCGETVAL
#undef WCISBAD
#undef WCDMAP
#undef WCDMAPVAL
#define WCCELL2 _WCCELL2 (CODESET)
#define WCCELL4 _WCCELL4 (CODESET)
#define WCROW _WCROW (CODESET)
#define WCROWSIZE _WCROWSIZE(CODESET)
#define WCDMAP _WCDMAP (CODESET)
#define WCDMAPVAL _WCDMAPVAL(CODESET)
#define WCGETVAL _WCGETVAL (CODESET)
#define WCGETASCII(c) (WCDMAPVAL ? WCCELL2[0][c] \
: WCCELL2[WCROW[ROW(c)]][COL(c)])
/*
* Codeset specific MB/WC conversion routine name macros
*/
#undef MBLEN
#undef MBTOWC
#undef WCTOMB
#undef MBTOPC
#undef MBSTOWCS
#undef WCSTOMBS
#undef MBSTOPCS
#undef UDCTOMB
#undef UDCFUNC
#undef UTF16ToMB
#undef MBToUTF16
#define MBLEN _MBLEN (CODESET)
#define MBTOWC _MBTOWC (CODESET)
#define WCTOMB _WCTOMB (CODESET)
#define MBTOPC _MBTOPC (CODESET)
#define PCTOMB _PCTOMB (CODESET)
#define MBSTOWCS _MBSTOWCS(CODESET)
#define WCSTOMBS _WCSTOMBS(CODESET)
#define MBSTOPCS _MBSTOPCS(CODESET)
#define PCSTOMBS _PCSTOMBS(CODESET)
#define UDCTOMB _UDCTOMB (CODESET)
#define UCSTOMB _UCSTOMB (CODESET)
#define UDCFUNC _UDCFUNC (CODESET)
#define UCSFUNC _UCSFUNC (CODESET)
#define UTF16ToMB _UTF16ToMB(CODESET)
#define MBToUTF16 _MBToUTF16(CODESET)
const static udcfunc_t UDCFUNC = UDCTOMB ;
const static udcfunc_t UCSFUNC = UCSTOMB ;
const static int MBDMAPVAL = MBDMAP ;
const static int WCDMAPVAL = WCDMAP ;
/*-----------------------[ Internal inline functions ]-----------------------*/
// #pragma inline (WCGETVAL, MBGETVAL) // BRL & JAC
/*
* Map a wide character code (UCS) to its multibyte format
*/
inline static WChar_t WCGETVAL(WChar_t wc) //JAC
{
int row = ROW(wc) ;
if ((row >= WCROWSIZE) || ((row = WCROW[row]) == UCS2_BAD))
return(BAD) ;
if (WCCELL4 && (row > ROW_MASK))
return((WCCELL4 ? WCCELL4 : dummy_cell4)[MASKROW(row)][COL(wc)]) ;
else
{
WChar_t mb = WCCELL2[row][COL(wc)] ;
return((mb == UCS2_BAD) ? BAD : mb) ;
}
}
/*
* Map a multibyte index to wide character encoding
*/
inline static WChar_t MBGETVAL(int idx) //JAC
{
int row = ROW(idx) ;
if ((row >= MBROWSIZE) || ((row = MBROW[row]) == UCS2_BAD))
return(BAD) ;
if (MBCELL4 && (row > ROW_MASK))
return((MBCELL4 ? MBCELL4 : dummy_cell4)[MASKROW(row)][COL(idx)]) ;
else
{
WChar_t wc = MBCELL2[row][COL(idx)] ; //JAC
return((wc == UCS2_BAD) ? BAD : wc) ;
}
}
/*--------------------[ Conversion routines start here ]---------------------*/
#ifdef USING_OPEN_SOURCE_MBLEN
int MBLEN(const char *ts, size_t maxlen, _LC_charmap_t *hdl)
{
uchar_t *s = (uchar_t *)ts ;
#ifdef DONT_NEED_THIS // JAC
int idx, row ;
#else
int idx ;
#endif // DONT_NEED_THIS - JAC
if ((s == NULL) || (*s == '\0'))
return (0);
/*
* If maxlen is zero then treat it as an illegal character - same
* as for the non-UCS locale.
*/
if (maxlen == (size_t)0)
{
#ifdef DONT_NEED_THIS // JAC
_Seterrno(EILSEQ);
#endif // DONT_NEED_THIS - JAC
return((size_t)-1);
}
if (isascii(*s))
return(1) ;
idx = MBINDEX(&s, maxlen) ;
if (idx == ERR_INPUT_INCOMPLETE)
return((size_t)-2) ; /* Input incomplete */
else if ((idx == ERR_INVALID_CHAR) ||
(!ISIDXU(idx) && !IS_UCODE(idx) && (MBGETVAL(idx) == BAD)))
{
#ifdef DONT_NEED_THIS // JAC
_Seterrno(EILSEQ) ;
#endif // DONT_NEED_THIS - JAC
return((size_t)-1);
}
return((size_t)(s - (uchar_t *)ts)) ;
}
#endif // USING_OPEN_SOURCE_MBLEN
size_t MBTOWC(WChar_t *pwc, const char *ts, size_t maxlen, _LC_charmap_t *hdl) // JAC
{
uchar_t *s = (uchar_t *)ts ;
WChar_t wc ; //JAC
#ifdef DONT_NEED_THIS // JAC
int idx, row ;
#else
int idx ;
#endif // DONT_NEED_THIS - JAC
/*
* If ts == NULL, return non-zero or zero if character encodings
* do or do not have state-dependent encodings
*/
if (ts == NULL) return (0); /* No state dependent encodings */
/*
* If maxlen is zero then treat it as an illegal character - same
* as for the non-UCS locale.
*/
if (maxlen == (size_t)0)
{
#ifdef DONT_NEED_THIS // JAC
_Seterrno(EILSEQ);
#endif // DONT_NEED_THIS - JAC
return((size_t)-1);
}
if (*s == '\0')
{
/* No need to take the hit of a function call */
if (pwc) *pwc = 0;
return (0);
}
if (isascii(*s))
{
if (pwc) *pwc = MBGETASCII(*s) ;
return(1) ;
}
#ifdef OUR_CS_GB18030_specific /* Deal with 8431A438 and 9 separately */
if ( (*s == 0x84) && ( *(s+1) == 0x31 ) && (*(s+2) == 0xA4) ) {
if ( *(s+3) == 0x38 ) {
wc = 0x0FFFE;
if (pwc) *pwc = wc ;
return((size_t)(4)) ;
}
if ( *(s+3) == 0x39 ) {
wc = 0x0FFFF;
if (pwc) *pwc = wc ;
return((size_t)(4)) ;
}
}
#endif /* OUR_CS_GB18030_specific */
#if defined(OUR_CS_GBK_specific)
if ( *s == 0x80 ) { /* Handle Euro Sign that GBK defines as 0x80 */
wc = 0x020AC;
if (pwc) *pwc = wc ;
return((size_t)(1)) ; // Return length of input char in bytes.
}
#endif /* OUR_CS_GBK_specific */
#if defined(OUR_CS_GB2312_specific) || defined(OUR_CS_GBK_specific)
if ( *s == 0xA9 ) { /* Disallow range of UDCs - since not UDCs in GB18030 */
if ( (*(s+1) >= 0x89) && (*(s+1) <= 0x95) )
goto err_exit ;
}
if ( *s == 0xFE ) { /* Disallow range of UDCs - since not UDCs in GB18030 */
if ( (*(s+1) >= 0x50) && (*(s+1) <= 0x9F) )
goto err_exit ;
}
if ( *s == 0xA2 ) { /* Disallow 0xA2E3 UDC - since not UDC in GB18030 */
if ( *(s+1) == 0xE3 )
goto err_exit ;
}
#endif /* (OUR_CS_GB2312_specific) || defined(OUR_CS_GBK_specific) */
idx = MBINDEX(&s, maxlen) ;
if (idx == ERR_INPUT_INCOMPLETE)
return((size_t)-2) ; /* Input incomplete */
else if (idx == ERR_INVALID_CHAR)
goto err_exit ;
else if (IS_UCODE(idx))
wc = GET_UCODE(idx) ;
else if (ISIDXU(idx))
wc = IDXU_UCS(idx) ;
else if ((wc = MBGETVAL(idx)) == BAD)
goto err_exit ;
#if defined(OUR_CS_GB2312_specific) || defined(OUR_CS_GBK_specific)
/*
* NOTE: Because gb2312 and gb18030 share data tables, it is
* possible that MBGETVAL() returned a gb18030 char. Here
* we explicitly rule those out for gb2312. These rules
* may need changes in the future if more characters are
* added to gb2312.
*/
/***************************************
NOTE: Even though the official GB2312 doesn't support the following
5 characters, we decided to allow them because HP-UX does.
***************************************/
#if 0
if (wc == 0x0251) goto err_exit ;
if (wc == 0x0261) goto err_exit ;
if (wc == 0x0144) goto err_exit ;
if (wc == 0x0148) goto err_exit ;
if (wc == 0x01F9) goto err_exit ;
#endif
#if defined(OUR_CS_GBK_specific)
/***************************************
NOTE: Even though the official GBK doesn't support User-Defined chars
in the range U+E000 - U+0xE8FF, we decided to allow 0xE000 - 0xE765
because BOTH Java and GB18030 allow them. Java allows a few more,
but don't see how to support those while using GB18030 tables.
***************************************/
if ( (wc >= 0xE766) && (wc <= 0xE8FF) && (wc != 0xE7C7) ) goto err_exit ;
#else /* (OUR_CS_GB2312_specific) */
/***************************************
NOTE: Even though the official GB2312 doesn't support 0xA8BC mapping to
U+0xE7C7, we decided to allow it because HP-UX does. The rest of
the characters ruled out by the following 2 lines are not part of
GB2312 and not supported by HP-UX.
***************************************/
if ( (wc >= 0xE000) && (wc <= 0xFF00) && !(wc == 0xE7C7)) goto err_exit ;
if ( (wc >= 0x2170) && (wc <= 0x2179) ) goto err_exit ;
/***************************************
NOTE: DEC/OSF code maps 0xA1AA to U+0x2014. So does the SUN mappings for
the GB18030 character set. However, HP-UX, Java, and GNU map it
to U+0x2015. Yuk! HP-China tells us to go with HP-UX's way.
***************************************/
if ( wc == 0x2014 ) wc = 0x2015;
#endif
#endif /* defined(OUR_CS_GB2312_specific) || defined(OUR_CS_GBK_specific) */
if (pwc) *pwc = wc ;
return((size_t)(s - (uchar_t *)ts)) ;
err_exit:
#ifdef DONT_NEED_THIS // JAC
_Seterrno(EILSEQ) ;
#endif // DONT_NEED_THIS - JAC
return((size_t)-1);
}
int WCTOMB(char *s, WChar_t wc, _LC_charmap_t *hdl) // JAC
{
WChar_t mb = 0 ;
#ifdef DONT_NEED_THIS // JAC
int row ;
#endif // DONT_NEED_THIS - JAC
/*
* If s is NULL, return 0
*/
if (s == NULL)
return(0) ;
#ifdef OUR_CS_GB18030_specific /* Deal with 8431A438 and 9 separately */
if ( wc == 0xFFFE ) {
mb = 0x8431A438;
goto success_exit;
}
if ( wc == 0xFFFF ) {
mb = 0x8431A439;
goto success_exit;
}
#endif /* OUR_CS_GB18030_specific */
if (isascii(wc))
mb = WCGETASCII(wc) ;
else if (UCS_UDC(wc) && UDCFUNC)
mb = (*UDCFUNC)(wc) ;
/*
* UDCFUNC may return 0. In this case, look up the mapping table for
* the correct mb value.
*/
if (wc && (mb == 0))
{
mb = WCGETVAL(wc) ;
if ((mb == (WChar_t)BAD) && UCSFUNC) //JAC
mb = (*UCSFUNC)(wc) ;
}
#if defined(OUR_CS_GB2312_specific) || defined(OUR_CS_GBK_specific)
/*
* NOTE: Because gb2312 and gb18030 share data tables, it is
* possible that WCGETVAL() returned a gb18030 char. Here
* we explicitly rule those out for gb2312. These rules
* may need changes in the future if more characters are
* added to gb2312.
*/
#if defined(OUR_CS_GBK_specific)
if ( wc == 0x20AC ) /* Handle Euro Sign that GBK defines as 0x80 */
mb = (WChar_t)(0x0080);
/***************************************
NOTE: Even though the official GBK doesn't support User-Defined chars
in the range U+E000 - U+0xE8FF, we decided to allow 0xE000 - 0xE765
because BOTH Java and GB18030 allow them. Java allows a few more,
but don't see how to support those while using GB18030 tables.
***************************************/
if ( (wc >= 0xE766) && (wc <= 0xE8FF) && (wc != 0xE7C7) )
mb = (WChar_t)BAD;
if ( (mb >= 0xA989) && (mb <= 0xA995) ) /* Disallow range of UDCs - since not UDCs in GB18030 */
mb = (WChar_t)BAD;
if ( (mb >= 0xFE50) && (mb <= 0xFE9F) ) /* Disallow range of UDCs - since not UDCs in GB18030 */
mb = (WChar_t)BAD;
if ( mb == 0xA2E3 ) /* Disallow 0xA2E3 UDC - since not UDC in GB18030 */
mb = (WChar_t)BAD;
#else /* Specific to GB2312 */
if ( ( (wc >= 0xE000) && (wc <= 0xFF00) && !(wc==0xE7C7) ) ||
( (wc >= 0x2170) && (wc <= 0x2179) ) )
mb = (WChar_t)BAD;
if ( (wc == 0x2014) || (wc == 0x2015) )
mb = (WChar_t)(0xA1AA);
#endif
#endif /* defined(OUR_CS_GB2312_specific) || defined(OUR_CS_GBK_specific) */
if (mb == (WChar_t)BAD) //JAC
{
#ifdef DONT_NEED_THIS // JAC
_Seterrno(EILSEQ) ;
#endif // DONT_NEED_THIS - JAC
return(-1);
}
if (mb < 0x100)
{
*s = (char)( mb & 0xff ); // JAC
return(1) ;
}
else if (mb < 0x10000)
{
*s++ = (char)( (mb >> 8) & 0xff ); // JAC
*s = (char)( mb & 0xff ); // JAC
return(2) ;
}
else if (mb < 0x1000000)
{
*s++ = (char)( (mb >> 16) & 0xff ); // JAC
*s++ = (char)( (mb >> 8) & 0xff ); // JAC
*s = (char)( mb & 0xff ); // JAC
return(3) ;
}
else
{
#ifdef OUR_CS_GB18030_specific /* Deal with 8431A438 and 9 separately */
success_exit:
#endif /* OUR_CS_GB18030_specific */
*s++ = (char)( (mb >> 24) & 0xff ); // JAC
*s++ = (char)( (mb >> 16) & 0xff ); // JAC
*s++ = (char)( (mb >> 8) & 0xff ); // JAC
*s = (char)( mb & 0xff ); // JAC
return(4) ;
}
}
#ifdef USING_OPEN_SOURCE_MBSTOWCS
size_t MBSTOWCS(WChar_t *pwcs, const char *ts, size_t n, _LC_charmap_t *hdl) // JAC
{
uchar_t *s = (uchar_t *)ts ;
WChar_t wc ; // JAC
#ifdef DONT_NEED_THIS // JAC
int cnt, idx, row ;
#else
int cnt, idx ;
#endif // DONT_NEED_THIS - JAC
if (s == NULL)
return(0) ;
/*
* Fix QAR 92292 - UCS-4 locale mbstowcs problem
*/
if (*s == '\0')
{
if (pwcs && (n >= 1)) *pwcs = 0 ;
return(0) ;
}
if (pwcs == NULL)
{
/*
* Count the number of multibyte characters in s
*/
for (cnt = 0 ; *s != '\0' ; cnt++)
{
if (isascii(*s))
{
s++ ;
continue ;
}
idx = MBINDEX(&s, MBCURMAX) ;
if (idx < 0)
{
#ifdef DONT_NEED_THIS // JAC
_Seterrno(EILSEQ) ;
#endif // DONT_NEED_THIS - JAC
return((size_t)-1);
}
}
return(cnt) ;
}
for (cnt = 0 ; (*s != '\0') && ((size_t)cnt < n) ; cnt++) // (size_t) added - JAC
{
if (isascii(*s))
{
*pwcs++ = MBGETASCII(*s) ;
s++ ;
continue ;
}
idx = MBINDEX(&s, MBCURMAX) ;
if (idx < 0)
{
#ifdef DONT_NEED_THIS // JAC
_Seterrno(EILSEQ) ;
#endif // DONT_NEED_THIS - JAC
return((size_t)-1);
}
if (IS_UCODE(idx))
wc = GET_UCODE(idx) ;
else if (ISIDXU(idx))
wc = IDXU_UCS(idx) ;
else if ((wc = MBGETVAL(idx)) == BAD)
{
#ifdef DONT_NEED_THIS // JAC
_Seterrno(EILSEQ) ;
#endif // DONT_NEED_THIS - JAC
return((size_t)-1);
}
*pwcs++ = wc ;
}
if ((size_t)cnt < n) // (size_t) added - JAC
*pwcs = 0 ; /* Terminate wctype string */
return(cnt) ;
}
#endif // USING_OPEN_SOURCE_MBSTOWCS
#ifdef USING_OPEN_SOURCE_WCSTOMBS
size_t WCSTOMBS(char *s, const WChar_t *pwcs, size_t n, _LC_charmap_t *hdl) // JAC
{
#ifdef DONT_NEED_THIS // JAC
int cnt, len, row ;
#else
int cnt, len ;
#endif // DONT_NEED_THIS - JAC
WChar_t mb, wc ; // JAC
if (pwcs == NULL)
{
#ifdef DONT_NEED_THIS // JAC
_Seterrno(EILSEQ) ;
#endif // DONT_NEED_THIS - JAC
return((size_t)-1);
}
for (cnt = 0 ; (wc = *pwcs) != 0 ; cnt += len, pwcs++)
{
mb = 0 ;
if (isascii(wc))
mb = WCGETASCII(wc) ;
else if (UCS_UDC(wc) && UDCFUNC)
mb = (*UDCFUNC)(wc) ;
/*
* UDCFUNC may return 0. In this case, look up the mapping table for
* the correct mb value.
*/
if (wc && (mb == 0))
{
mb = WCGETVAL(wc) ;
if ((mb == (WChar_t)BAD) && UCSFUNC) // JAC
mb = (*UCSFUNC)(wc) ;
}
if (mb == (WChar_t)BAD) // JAC
{
#ifdef DONT_NEED_THIS // JAC
_Seterrno(EILSEQ) ;
#endif // DONT_NEED_THIS - JAC
return((size_t)-1);
}
if (mb < 0x0000100) len = 1 ;
else if (mb < 0x0010000) len = 2 ;
else if (mb < 0x1000000) len = 3 ;
else len = 4 ;
/*
* Write out the multibyte character if s is defined
*/
if (s)
{
if ((size_t)(cnt + len) > n) // (size_t) added - JAC
break ; /* Cannot stored more bytes */
switch (len)
{
case 4: *s++ = (mb >> 24) & 0xff ;
case 3: *s++ = (mb >> 16) & 0xff ;
case 2: *s++ = (mb >> 8) & 0xff ;
case 1: *s++ = mb & 0xff ;
}
}
}
if (s && ((size_t)cnt < n)) // (size_t) added - JAC
*s = '\0' ; /* Terminate the string */
return(cnt) ;
}
#endif // USING_OPEN_SOURCE_WCSTOMBS
#ifdef USING_OPEN_SOURCE_MBTOPC
size_t
MBTOPC(WChar_t *pwc, char *ts, size_t maxlen, int *err, _LC_charmap_t *hdl) // JAC
{
uchar_t *s=(uchar_t *)ts ; /* Better to work with unsigned char. */
WChar_t wc ; // JAC
#ifdef DONT_NEED_THIS // JAC
int idx, row, len ;
#else
int idx, len ;
#endif // DONT_NEED_THIS - JAC
/*
* This is very similar to MBTOWC. It has an additional parameter *err.
* If the character is successfully converted return the number of
* bytes in the multibyte character and set *err to 0. If not converted
* due to maxlen too small return 0 and set *err to the no of bytes
* required to convert. If an illegal character return 0, set *err to -1.
*/
*err = 0 ;
/*
* If s is NULL, return 0
*/
if (s == NULL)
return(0);
if (isascii(*s))
{
wc = MBGETASCII(*s) ;
len = 1 ;
}
else
{
idx = MBINDEX (&s, maxlen) ;
if (idx == ERR_INPUT_INCOMPLETE)
{
*err = MBCURMAX ; /* Ask for the maximum MB length */
return(0) ;
}
if (idx < 0)
{
*err = -1 ;
return(0) ; /* Invalid character */
}
if (IS_UCODE(idx))
wc = GET_UCODE(idx) ;
else if (ISIDXU(idx))
wc = IDXU_UCS(idx) ;
else if ((wc = MBGETVAL(idx)) == BAD)
{
*err = -1 ;
return(0) ; /* Invalid character */
}
len = s - (uchar_t *)ts ;
}
if ((size_t)len > maxlen) // (size_t) added - JAC
{
*err = len ;
return(0) ; /* Not enough buffer */
}
if (pwc) *pwc = wc ;
return((size_t)len);
}
#endif // USING_OPEN_SOURCE_MBTOPC
#ifdef USING_OPEN_SOURCE_MBSTOPCS
size_t MBSTOPCS(WChar_t *pwcs, size_t pwcs_len, const char *s, size_t s_len, /* JAC */
int stopchr, char **endptr, int *err, _LC_charmap_t *hdl)
{
int pwcs_cnt = 0 ;
#ifdef DONT_NEED_THIS // JAC
int len ;
#endif // DONT_NEED_THIS - JAC
uchar_t *us = (uchar_t *)s ;
/*
* err is 0 if everything works
*/
*err = 0;
/*
* Stop the processing if there is no more room for process code
* or all the characters in s have been processed.
*/
while (((size_t)pwcs_cnt < pwcs_len) && (s_len > 0)) // (size_t) added - JAC
{
/*
* If we hit stopchr in s, Set endpointer to the character after
* the stopchr and break out of the while
*/
if (*us == (char) stopchr)
{
us++ ;
break;
}
/*
* Convert s to process code and increment s by the number
* of bytes. If the conversion failed, set the endpointer
* the the start of the character that failed, and
* break out of the while.
*/
if (isascii(*us))
{
pwcs[pwcs_cnt] = MBGETASCII(*us) ;
us++, s_len-- ;
}
else
{
uchar_t *us_old = us ;
WChar_t wc ; // JAC
#ifdef DONT_NEED_THIS // JAC
int idx, row ;
#else
int idx ;
#endif // DONT_NEED_THIS - JAC
idx = MBINDEX(&us, s_len) ;
if (idx < 0)
{
*err = -1 ;
break ; /* Invalid character */
}
if (IS_UCODE(idx))
wc = GET_UCODE(idx) ;
else if (ISIDXU(idx))
wc = IDXU_UCS(idx) ;
else if ((wc = MBGETVAL(idx)) == BAD)
{
*err = -1 ;
break ; /* Invalid character */
}
if ((s_len -= us - us_old) < 0)
{
*err = -(int)s_len ; /* Need more buffer */ // (int) added - JAC
break ;
}
pwcs[pwcs_cnt] = wc ;
}
/*
* Increment the process code counter
*/
pwcs_cnt++;
}
*endptr = (char *)us ; /* Set the end pointer */
return(pwcs_cnt) ;
}
#endif // USING_OPEN_SOURCE_MBSTOPCS
#endif /* USE_OUR_MB_WC_DATA_TABLES */