blob: 547b152bfa856efd4423f06e4dc30e3b4043b486 [file] [log] [blame]
/**********************************************************************
// @@@ START COPYRIGHT @@@
//
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
//
// @@@ END COPYRIGHT @@@
**********************************************************************/
/*
* HISTORY
* $Log: iconv_gen.c,v $
* Revision 1.1.19.1 2001/12/07 15:37:53
* * Add checking for the CONV_NO_UDC flag to disable mapping to UDC.
* * Merge codes from kernel/bsd.
*
* Revision 1.1.15.2 2000/12/11 19:41:22
* Enable table mapping for private use area character if 0 is returned
* from the UDC function.
*
* Revision 1.1.15.1 2000/10/16 18:44:52
* COSIX.Zulu to Yankee merge for GB18030 support.
*
* Revision 1.1.13.1 2000/08/07 14:33:48
* Support GB18030 and map UTF-8 surrogate pair to the right UCS code.
*
* Revision 1.1.11.2 2000/01/19 19:56:15
* Fix cut & paste error in input_ucs2().
* [2000/01/14 14:38:48 Long_Man]
*
* Revision 1.1.11.1 2000/01/13 20:25:46
* Remove the old conversion routine in favor of the enhanced
* __<codeset>_index() routine, and add algorithmic UDC conversion
* support. Also add a number of conversion routines to support
* special font charset to Unicode conversion.
*
* Revision 1.1.9.4 1998/08/21 17:51:25
* Fix iconv dump problem when converting ISO8859-1 to UCS-2.
* [1998/08/20 18:50:33 Long_Man]
*
* Revision 1.1.9.3 1998/05/04 15:21:21
* Fix typo error in output_ucs2().
* [1998/05/04 14:54:26 Long_Man]
*
* Revision 1.1.9.2 1998/03/26 18:56:28
* Prepend __ to global functions to prevent namespace pollution
* and fix problem in UCS-2 and UCS-4 routines.
* [1998/03/23 14:43:44 Long_Man]
*
* Revision 1.1.5.6 1997/06/17 21:29:46
* Fix QAR 53565 by guarding against the boundary case.
* [1997/06/16 21:37:44 Long_Man]
*
* Revision 1.1.5.5 1997/04/07 19:04:57
* Fix QAR 52035 by adjusting input pointer to failure location.
* [1997/04/03 22:40:55 Long_Man]
*
* Revision 1.1.5.4 1997/02/24 21:01:51
* Fix UCS locale build by not writing BOM if it causes E2BIG error.
* [1997/02/21 21:36:41 Long_Man]
*
* Fix QAR 51653: Missing first character in UCS-2 output if BOM enabled.
* [1997/02/21 14:58:45 Long_Man]
*
* Revision 1.1.5.3 1997/01/07 15:58:47
* Add new converters for cp437 and cp850 codeset support.
* [1996/12/24 16:15:44 Long_Man]
*
* Revision 1.1.5.2 1996/11/22 17:02:32
* Improve execution speed & other minor bug fixes.
* [1996/11/12 17:19:26 Long_Man]
*
* Consolidate UCS iconv converter binaries & support UCS-2.
* [1996/10/28 21:11:39 Long_Man]
*
* Revision 1.1.2.4 1995/07/31 14:37:37
* QAR 35010 - Verify the input buffer contains a full
* wchar_t for processing from UCS-4/UTF-8 to a target
* codeset. If inlen < wchar_t, set errno=EINVAL and
* return ICONV_TRUNC.
* [1995/07/12 14:41:34 Bill_Fountas]
*
* Fix pointer and length settings for E2BIG errors
* [1995/06/27 13:17:11 Bill_Fountas]
*
* Revision 1.1.2.3 1995/06/30 14:45:11
* Fix pointer and length settings for E2BIG errors
* [1995/06/30 13:31:45 Bill_Fountas]
*
* Revision 1.1.2.2 1995/06/07 16:12:48
* Initial iconv Unicode support check in
* [1995/06/02 14:59:11 Kelly_Mulheren]
*
* $EndLog$
*/
#ifndef _KERNEL
#include <string.h>
#else
#include <stddef.h>
#endif
/*#include <sys/malloc.h> //BRL & JAC */
#include "fcconv.h"
#include "multi-byte.h"
/*
* Macros to reverse the byte ordering
*/
#define REVERSE_UCS4_BYTE(ucs4) ((((ucs4) & 0x000000ff) << 24) | \
(((ucs4) & 0x0000ff00) << 8) | \
(((ucs4) & 0x00ff0000) >> 8) | \
(((ucs4) & 0xff000000) >> 24))
#define REVERSE_UCS2_BYTE(ucs2) ((((ucs2) & 0x00ff) << 8) | \
(((ucs2) & 0xff00) >> 8))
/*
* Macro to set error status and returned value
*/
#ifdef DONT_NEED_THIS /* JAC */
#ifndef _KERNEL
#define SET_ERR_RETURN(error) \
{ \
switch (error) \
{ \
case ERR_INVALID_CHAR : \
error = EILSEQ ; retval = ICONV_INVAL ; break ; \
case ERR_INPUT_INCOMPLETE: \
error = EINVAL ; retval = ICONV_TRUNC ; break ; \
case ERR_BUFFER_OVERRUN : \
error = E2BIG ; retval = ICONV_OVER ; break ; \
} \
_Seterrno(error ) ; \
return (retval) ; \
}
#else
#define SET_ERR_RETURN(error) \
{ \
switch (error) \
{ \
case ERR_INVALID_CHAR : \
error = EILSEQ ; retval = ICONV_INVAL ; break ; \
case ERR_INPUT_INCOMPLETE: \
error = EINVAL ; retval = ICONV_TRUNC ; break ; \
case ERR_BUFFER_OVERRUN : \
error = E2BIG ; retval = ICONV_OVER ; break ; \
} \
return (retval) ; \
}
#endif
#endif /* DONT_NEED_THIS - JAC */
/**********
* __from_ucs_exec
*
* Driver routine for converting from UCS/UTF.
**********/
#ifdef USING_OPEN_SOURCE_from_ucs_exec /* JAC */
int
__from_ucs_exec (_LC_fcconv_iconv_t *cd,
uchar_t** in_buff , size_t *in_bytes_left ,
uchar_t** out_buff, size_t *out_bytes_left)
{
uchar_t *outptr ; /* Pointer to output buffer */
uchar_t *inptr ; /* Pointer to input buffer */
uchar_t *inptr2 ; /* Pointer to input buffer */
size_t outlen ; /* Number of outbytes left */
size_t inlen ; /* Number of inbytes left */
WChar_t new_char ; /* converted character */ /*JAC */
int inword ; /* low word of input */
unsigned int char_size ; /* Output size of MB chars */ /*BRL & JAC */
int error ; /* Error code */
int retval ; /* Return value */
int idx ;
int nomap ; /* No ASCII mapping */
int d2map ; /* ASCII direct map */
int cfirst ; /* Conversion function first */
int cnext ; /* Conversion function next */
cfunc_t conv ;
if (!in_buff)
return(ICONV_DONE) ;
outptr = *out_buff ;
outlen = *out_bytes_left ;
inptr = *in_buff ;
inlen = *in_bytes_left ;
inptr2 = inptr ;
error = 0 ;
nomap = cd->flags & CONV_ASCII_ONOMAP ;
d2map = cd->flags & CONV_ASCII_ODMAP2 ;
cfirst = cd->flags & CONV_FUNC_FIRST ;
cnext = cd->flags & CONV_FUNC_NEXT ;
conv = (cfunc_t)cd->outfunc ;
/**********
* perform conversion
**********/
for ( ; inlen > 0 ; inlen -= inptr2 - inptr, inptr = inptr2) {
inword = (*cd->infunc)(cd, &inptr2, inlen) ;
if (inword < 0) {
error = inword ;
break ;
}
/**********
* Translate input word to a new character
**********/
if ((nomap || d2map) && _ISASCII(inword))
{
new_char = nomap ? inword : cd->ocell2_tab[0][inword] ;
}
else if (UCS_UDC(inword)) {
/*
* Map UCS UDC character to the corresponding multibyte
* character, if applicable, and CONV_NO_UDC flag not defined.
*/
new_char = (cd->udcfunc && !(cd->flags & CONV_NO_UDC))
? (*cd->udcfunc)(inword) : BAD ;
/*
* Do a table mapping if the UDC function returns 0
*/
if (new_char == 0)
{
GET_OVAL(cd, inword, new_char) ;
}
}
else if (cfirst && conv)
{
/*
* Invoke the conversion function first before doing a table
* lookup.
*/
if ((new_char = (*conv)(inword)) == BAD)
{
if (cd->maxucs)
if (inword > cd->maxucs)
{
error = ERR_INVALID_CHAR ; /* Invalid sequence */
break ;
}
GET_OVAL(cd, inword, new_char) ;
}
}
else
{
if (cd->maxucs)
if (inword > cd->maxucs)
{
error = ERR_INVALID_CHAR ; /* Invalid sequence */
break ;
}
GET_OVAL(cd, inword, new_char) ;
/**********
* invoke special converion function if defined
**********/
if (conv)
{
/*
* If cnext is defined, pass input word through conv function
* if table lookup fails. Otherwise, pass the new character
* to conversion function to modify it.
*/
if (cnext)
{
if (new_char == BAD)
new_char = (*conv)(inword) ;
}
else if (new_char != BAD)
new_char = (*conv)(new_char) ;
}
}
/**********
* valid character?
**********/
if (new_char == BAD) {
if (cd->defchar != 0)
new_char = cd->defchar ;
else if (cd->defstr) {
if (cd->defstrlen == 0)
continue ; /* Skip the invalid character */
if ((size_t)cd->defstrlen > outlen) { /*BRL & JAC */
error = ERR_BUFFER_OVERRUN ; /* Output buf overflow */
break ;
}
memcpy(outptr, cd->defstr, cd->defstrlen) ;
outptr += cd->defstrlen ;
outlen -= cd->defstrlen ;
continue ;
}
else if (inword == 0xFFFD) { /* Replacement character ? */
/*
* Skip replacement character, if found
*/
continue ;
}
else {
error = ERR_INVALID_CHAR ;
break ;
}
}
/**********
* calculate the character size in byte
**********/
if ((new_char & 0xffffff00) == 0)
char_size = 1 ;
else if ((new_char & 0xffff0000) == 0)
char_size = 2 ;
else if ((new_char & 0xff000000) == 0)
char_size = 3 ;
else
char_size = 4 ;
/**********
* have we exceeded size of output buffer?
**********/
if (outlen < char_size) {
error = ERR_BUFFER_OVERRUN ;
break ;
}
/**********
* Output the bytes
**********/
switch (char_size) {
case 4: *outptr++ = (new_char >> 24) & 0xff;
case 3: *outptr++ = (new_char >> 16) & 0xff;
case 2: *outptr++ = (new_char >> 8) & 0xff;
case 1: *outptr++ = new_char & 0xff;
break ;
}
outlen -= char_size ;
}
/**********
* set output parameters
**********/
*in_buff = inptr ;
*out_buff = outptr ;
*in_bytes_left = inlen ;
*out_bytes_left = outlen ;
if (!error)
return(ICONV_DONE) ;
SET_ERR_RETURN(error) ;
} /* __from_ucs_exec */
#endif /* USING_OPEN_SOURCE_from_ucs_exec // JAC */
/**********
* __sb_to_ucs_exec
*
* Driver routine for converting from single-byte to UCS/UTF.
**********/
#ifdef USING_OPEN_SOURCE_sb_to_ucs_exec /* JAC */
int
__sb_to_ucs_exec(_LC_fcconv_iconv_t *cd,
uchar_t** in_buff , size_t *in_bytes_left ,
uchar_t** out_buff, size_t *out_bytes_left)
{
uchar_t *outptr ; /* Pointer to output buffer */
uchar_t *inptr ; /* Pointer to input buffer */
size_t outlen ; /* Number of outbytes left */
size_t inlen ; /* Number of inbytes left */
WChar_t new_char ; /* converted character */ /*JAC */
WChar_t input_ch ; /* converted character */ /*JAC */
int retval ;
int error ; /* Error code */
int nomap ; /* No ASCII mapping */
int d2map ; /* ASCII direct map */
if (!in_buff)
return ICONV_DONE;
inptr = *in_buff ;
inlen = *in_bytes_left ;
outptr = *out_buff ;
outlen = *out_bytes_left ;
error = 0 ;
nomap = cd->flags & CONV_ASCII_INOMAP ;
d2map = cd->flags & CONV_ASCII_IDMAP2 ;
/**********
* perform conversion
**********/
for ( ; inlen > 0 ; inptr++, inlen--) {
input_ch = *inptr ;
if ((nomap || d2map) && _ISASCII(input_ch))
new_char = nomap ? input_ch : cd->icell2_tab[0][input_ch] ;
else
{
GET_IVAL(cd, input_ch, new_char) ;
/*
* Restrict output to less than ICONV_MAXUCS, if defined
*/
if (cd->maxucs && (new_char != BAD))
if (new_char > cd->maxucs)
new_char = BAD ;
/**********
* valid character?
**********/
if (new_char == BAD) {
if (cd->defucsch)
new_char = cd->defucsch ;
else if (cd->defstr && (cd->defstrlen == 0))
continue ; /* Skip that character */
else {
error = ERR_INVALID_CHAR ;
break ;
}
}
}
retval = (*cd->outfunc)(cd, outptr, outlen, new_char) ;
if (retval < 0)
{
error = retval ;
break ;
}
outptr += retval ;
outlen -= retval ;
}
/**********
* set output parameters
**********/
*in_buff = inptr ;
*out_buff = outptr ;
*in_bytes_left = inlen ;
*out_bytes_left = outlen ;
if (!error)
return(ICONV_DONE) ;
SET_ERR_RETURN(error) ;
} /* __sb_to_ucs_exec */
#endif /* USING_OPEN_SOURCE_sb_to_ucs_exec // JAC */
/**********
* __sb_to_sb_exec
*
* Driver routine for converting from single-byte to single-byte via UCS.
**********/
#ifdef USING_OPEN_SOURCE_sb_to_sb_exec /* JAC */
int
__sb_to_sb_exec(_LC_fcconv_iconv_t *cd,
uchar_t** in_buff , size_t *in_bytes_left ,
uchar_t** out_buff, size_t *out_bytes_left)
{
uchar_t *inptr ; /* Pointer to input buffer */
uchar_t *outptr ; /* Pointer to output buffer */
size_t inlen ; /* Number of inbytes left */
size_t outlen ; /* Number of outbytes left */
WChar_t new_char ; /* converted character */ /*JAC */
WChar_t input_ch ; /* Input character */ /*JAC */
int retval ; /* Function return value */
int error ; /* Error code */
int inomap ; /* No input ASCII mapping */
int id2map ; /* INput ASCII direct map */
int onomap ; /* No output ASCII mapping */
int od2map ; /* Output ASCII direct map */
if (!in_buff)
return ICONV_DONE;
inptr = *in_buff ;
inlen = *in_bytes_left ;
outptr = *out_buff ;
outlen = *out_bytes_left ;
error = 0 ;
inomap = cd->flags & CONV_ASCII_INOMAP ;
id2map = cd->flags & CONV_ASCII_IDMAP2 ;
onomap = cd->flags & CONV_ASCII_ONOMAP ;
od2map = cd->flags & CONV_ASCII_ODMAP2 ;
/**********
* perform conversion
**********/
while (inlen > 0) {
input_ch = *inptr ;
if ((inomap || id2map) && _ISASCII(input_ch))
new_char = inomap ? input_ch : cd->icell2_tab[0][input_ch] ;
else
{
GET_IVAL(cd, input_ch, new_char) ;
/**********
* valid character?
**********/
if (new_char == BAD) {
if (cd->defucsch)
new_char = cd->defucsch ;
else if (cd->defstr && (cd->defstrlen == 0)) {
inptr++, inlen-- ;
continue ; /* Skip that character */
}
else if (cd->defstrlen > 0) {
if (outlen < cd->defstrlen) {
/* Not enough output buffer */
error = ERR_BUFFER_OVERRUN ;
break ;
}
/* Copy default string to output */
bcopy(cd->defstr, (char *)outptr, cd->defstrlen) ;
inptr++, inlen-- ;
outptr += cd->defstrlen ;
outlen -= cd->defstrlen ;
continue ;
}
else {
error = ERR_INVALID_CHAR ;
break ;
}
}
}
if (((ssize_t)outlen) <= 0) {
error = ERR_BUFFER_OVERRUN ;
break ;
}
input_ch = new_char ;
if ((onomap || od2map) && _ISASCII(input_ch))
new_char = onomap ? input_ch : cd->ocell2_tab[0][input_ch] ;
else
{
GET_OVAL(cd, input_ch, new_char) ;
/**********
* valid character?
**********/
if (new_char == BAD) {
if (cd->defchar)
new_char = cd->defchar ;
else if (cd->defstr && (cd->defstrlen == 0)) {
inptr++, inlen-- ;
continue ; /* Skip that charcter */
}
else if (cd->defstrlen > 0) {
if (outlen < cd->defstrlen) {
/* Not enough output buffer */
error = ERR_BUFFER_OVERRUN ;
break ;
}
/* Copy default string to output */
bcopy(cd->defstr, (char *)outptr, cd->defstrlen) ;
inptr++, inlen-- ;
outptr += cd->defstrlen ;
outlen -= cd->defstrlen ;
continue ;
}
else {
error = ERR_INVALID_CHAR ;
break ;
}
}
}
*outptr = new_char ;
inptr ++, inlen -- ;
outptr++, outlen-- ;
}
/**********
* set output parameters
**********/
*in_buff = inptr ;
*out_buff = outptr ;
*in_bytes_left = inlen ;
*out_bytes_left = outlen ;
if (!error)
return(ICONV_DONE) ;
SET_ERR_RETURN(error) ;
} /* __sb_to_sb_exec */
#endif /* USING_OPEN_SOURCE_sb_to_sb_exec // JAC */
/**********
* __to_ucs_exec
*
* Generic driver routine for converting from any character set to UCS.
* It is assumed that ASCII is a proper subset of the character set except
* for UCS characters.
**********/
#ifdef USING_OPEN_SOURCE_to_ucs_exec /* JAC */
int
__to_ucs_exec(_LC_fcconv_iconv_t *cd,
uchar_t** in_buff , size_t *in_bytes_left ,
uchar_t** out_buff, size_t *out_bytes_left)
{
uchar_t *inptr ;
uchar_t *inptr2 ;
uchar_t *outptr ;
size_t inlen ;
size_t outlen ;
int inword ;
WChar_t outword ; /*JAC */
int retval ;
int error ; /* Error code */
int chkasc ; /* Check for ASCII */
int inomap ; /* No mapping needed for ASCII */
if (!in_buff)
return ICONV_DONE;
inptr = *in_buff ;
inlen = *in_bytes_left ;
outptr = *out_buff ;
outlen = *out_bytes_left ;
inptr2 = inptr ;
error = 0 ;
chkasc = cd->flags & (CONV_ASCII_INOMAP|CONV_ASCII_IDMAP2) ;
inomap = cd->flags & CONV_ASCII_INOMAP ;
/**********
* Perform conversion
**********/
if (cd->srccode == UCS) while (inlen > 0) {
/*
* infunc will return UCS-4 value
*/
if ((inword = (*cd->infunc)(cd, &inptr2, inlen)) < 0)
{
error = inword ;
break ;
}
retval = (*cd->outfunc)(cd, outptr, outlen, inword) ;
if (retval < 0)
{
error = retval ;
break ;
}
inlen -= inptr2 - inptr ;
inptr = inptr2 ;
outlen -= retval ;
outptr += retval ;
}
else for ( ; inlen > 0 ; inlen -= inptr2 - inptr, inptr = inptr2) {
if (chkasc && _ISASCII(inword = *inptr2))
{
inptr2++ ;
outword = inomap ? inword : cd->icell2_tab[0][inword] ;
}
else
{
/*
* infunc will return UCS-4 table index
*/
if ((inword = (*cd->infunc)(cd, &inptr2, inlen)) < 0)
{
error = inword ;
break ;
}
/*
* Convert table index into UCS-4
*/
if (inword == BAD)
outword = BAD ;
else if (IS_UCODE(inword))
outword = GET_UCODE(inword) ;
else
{
/*
* Check for UDC
*/
if (ISIDXU(inword))
outword = IDXU_UCS(inword) ;
else
{
GET_IVAL(cd, inword, outword)
}
/*
* Restrict output to less than ICONV_MAXUCS, if defined
*/
if (cd->maxucs && (outword != BAD))
if (outword > cd->maxucs)
outword = BAD ;
}
if ((outword == BAD) ||
((cd->flags & CONV_NO_UDC) && UCS_UDC(outword))) {
if (cd->defucsch != 0)
outword = cd->defucsch ;
else if (cd->defstr && (cd->defstrlen == 0))
continue ; /* Ignore this character */
else
/*
* Invalid character
* Setup indata & adjust error position as
* there may be a E2BIG error before that.
*/
{
error = ERR_INVALID_CHAR ;
break ;
}
}
}
retval = (*cd->outfunc)(cd, outptr, outlen, outword) ;
if (retval < 0)
{
error = retval ;
break ;
}
outlen -= retval ;
outptr += retval ;
}
/**********
* set output parameters
**********/
*in_buff = inptr ;
*out_buff = outptr ;
*in_bytes_left = inlen ;
*out_bytes_left = outlen ;
if (!error)
return(ICONV_DONE) ;
SET_ERR_RETURN(error) ;
} /* __to_ucs_exec */
#endif /* USING_OPEN_SOURCE_to_ucs_exec // JAC */
/**********
* __cs_to_ucs_exec
*
* Special driver routine for converting from character set which may
* not contain ASCII to UCS.
**********/
#ifdef USING_OPEN_SOURCE_cs_to_ucs_exec /* JAC */
int
__cs_to_ucs_exec(_LC_fcconv_iconv_t *cd,
uchar_t** in_buff , size_t *in_bytes_left ,
uchar_t** out_buff, size_t *out_bytes_left)
{
uchar_t *inptr ;
uchar_t *inptr2 ;
uchar_t *outptr ;
size_t inlen ;
size_t outlen ;
int inword ;
WChar_t outword ; /*JAC */
int error ; /* Error code */
int retval ;
if (!in_buff)
return ICONV_DONE;
inptr = *in_buff ;
inlen = *in_bytes_left ;
outptr = *out_buff ;
outlen = *out_bytes_left ;
inptr2 = inptr ;
error = 0 ;
/**********
* Perform conversion
**********/
for ( ; inlen > 0 ; inlen -= inptr2 - inptr, inptr = inptr2) {
/*
* infunc will return UCS-4 table index
*/
if ((inword = (*cd->infunc)(cd, &inptr2, inlen)) < 0)
{
error = inword ;
break ;
}
/*
* Convert table index into UCS-4
*/
if (inword != BAD)
{
/*
* Check for UDC
*/
if (ISIDXU(inword))
outword = IDXU_UCS(inword) ;
else
{
GET_IVAL(cd, inword, outword)
}
/*
* Restrict output to less than ICONV_MAXUCS, if defined
*/
if (cd->maxucs && (outword != BAD))
if (outword > cd->maxucs)
outword = BAD ;
}
else
outword = BAD ;
if ((outword == BAD) ||
((cd->flags & CONV_NO_UDC) && UCS_UDC(outword))) {
if (cd->defucsch != 0)
outword = cd->defucsch ;
else if (cd->defstr && (cd->defstrlen == 0))
continue ; /* Ignore this character */
else
/*
* Invalid character
* Setup indata & adjust error position as
* there may be a E2BIG error before that.
*/
{
error = ERR_INVALID_CHAR ;
break ;
}
}
retval = (*cd->outfunc)(cd, outptr, outlen, outword) ;
if (retval < 0)
{
error = retval ;
break ;
}
outlen -= retval ;
outptr += retval ;
}
/**********
* set output parameters
**********/
*in_buff = inptr ;
*out_buff = outptr ;
*in_bytes_left = inlen ;
*out_bytes_left = outlen ;
if (!error)
return(ICONV_DONE) ;
SET_ERR_RETURN(error) ;
} /* __cs_to_ucs_exec */
#endif /* USING_OPEN_SOURCE_cs_to_ucs_exec // JAC */
/**********
* __mb_to_mb_exec
*
* Driver routine for converting from multi-byte to multi-byte via UCS.
**********/
#ifdef USING_OPEN_SOURCE_mb_to_mb_exec /* JAC */
int
__mb_to_mb_exec(_LC_fcconv_iconv_t *cd,
uchar_t** in_buff , size_t *in_bytes_left ,
uchar_t** out_buff, size_t *out_bytes_left)
{
uchar_t *inptr ;
uchar_t *inptr2 ;
uchar_t *outptr ;
size_t inlen ;
size_t outlen ;
int inword ;
WChar_t outword ; /*JAC */
int char_size; /* Multi-byte char size */
int error ; /* Error code */
int retval ;
int inomap ; /* No input ASCII mapping */
int id2map ; /* Input ASCII direct map */
int onomap ; /* No output ASCII mapping */
int od2map ; /* Output ASCII direct map */
int idx ;
if (!in_buff)
return ICONV_DONE;
inptr = *in_buff ;
inptr2 = inptr ;
inlen = *in_bytes_left ;
outptr = *out_buff ;
outlen = *out_bytes_left ;
inomap = cd->flags & CONV_ASCII_INOMAP ;
id2map = cd->flags & CONV_ASCII_IDMAP2 ;
onomap = cd->flags & CONV_ASCII_ONOMAP ;
od2map = cd->flags & CONV_ASCII_ODMAP2 ;
/**********
* perform conversion
**********/
for ( ; inlen > 0 ; inlen -= inptr2 - inptr, inptr = inptr2) {
/*
* infunc will return UCS-4 table index
*/
if ((inword = (*cd->infunc)(cd, &inptr2, inlen)) < 0)
{
error = inword ;
break ;
}
/*
* Convert table index into UCS-4
*/
if ((inomap || id2map) && _ISASCII(inword))
outword = inomap ? inword : cd->icell2_tab[0][inword] ;
else
{
if (inword == BAD)
outword = BAD ;
else if (IS_UCODE(inword))
outword = GET_UCODE(inword) ;
else
{
/*
* Check for UDC
*/
if (ISIDXU(inword))
outword = IDXU_UCS(inword) ;
else
{
GET_IVAL(cd, inword, outword)
}
/*
* Restrict output to less than ICONV_MAXUCS, if defined
*/
if (cd->maxucs && (outword != BAD))
if (outword > cd->maxucs)
outword = BAD ;
}
if (outword == BAD) {
if (cd->defucsch != 0)
outword = cd->defucsch ;
else if (cd->defstr && (cd->defstrlen == 0))
continue ; /* Skip this character */
else if (cd->defstrlen > 0) {
if (outlen < cd->defstrlen) {
error = ERR_BUFFER_OVERRUN ;
break ;
}
bcopy(cd->defstr, (char *)outptr, cd->defstrlen) ;
outptr += cd->defstrlen ;
outlen -= cd->defstrlen ;
continue ;
}
else
/*
* Invalid character
*/
{
error = ERR_INVALID_CHAR ;
break ;
}
}
}
/*
* Convert UCS-4 into output multibyte character
*/
inword = outword ;
if ((onomap || od2map) && _ISASCII(inword))
outword = onomap ? inword : cd->ocell2_tab[0][inword] ;
else if (UCS_UDC(inword))
{
/*
* Map UCS UDC character to the corresponding multiple UDC
* character, if applicable
*/
outword = cd->udcfunc ? (*cd->udcfunc)(inword) : BAD ;
}
else
{
GET_OVAL(cd, inword, outword) ;
if (outword == BAD) {
if (cd->defchar)
outword = cd->defchar ;
else if ((cd->defstr) && (cd->defstrlen == 0))
continue ; /* Skip this character */
else if (cd->defstrlen > 0)
{
if (outlen < cd->defstrlen) {
error = ERR_BUFFER_OVERRUN ;
break ;
}
bcopy(cd->defstr, (char *)outptr, cd->defstrlen) ;
outptr += cd->defstrlen ;
outlen -= cd->defstrlen ;
continue ;
}
else
/*
* Invalid character
*/
{
error = ERR_INVALID_CHAR ;
break ;
}
}
}
/*
* calculate the character size in byte
*/
if ((outword & 0xffffff00) == 0)
char_size = 1 ;
else if ((outword & 0xffff0000) == 0)
char_size = 2 ;
else if ((outword & 0xff000000) == 0)
char_size = 3 ;
else
char_size = 4 ;
/*
* Check for output buffer overflow
*/
if (outlen < char_size) {
error = ERR_BUFFER_OVERRUN ;
break ;
}
/*
* Output the bytes
*/
switch (char_size) {
case 4: *outptr++ = (outword >> 24) & 0xff;
case 3: *outptr++ = (outword >> 16) & 0xff;
case 2: *outptr++ = (outword >> 8) & 0xff;
case 1: *outptr++ = outword & 0xff;
break ;
}
outlen -= char_size ;
}
/**********
* set output parameters
**********/
*in_buff = inptr ;
*out_buff = outptr ;
*in_bytes_left = inlen ;
*out_bytes_left = outlen ;
if (!error)
return(ICONV_DONE) ;
SET_ERR_RETURN(error) ;
} /* __mb_to_mb_exec */
#endif /* USING_OPEN_SOURCE_mb_to_mb_exec // JAC */
/************************************************************************/
/* */
/* Macros, constants & type definitions for */
/* UCS-2/UCS-4/UTF-7/UTF-8 input/output routines */
/* */
/************************************************************************/
/*
* Surrogates related macros
*
* The surrogates Area consists of 1024 low-half surrogates and 1024 high-half
* surrogates which are interpreted in pairs to access over a million codes.
* The high surrogate characters are encoded in the range U+D800 -> U+DBFF.
* The high surrogate character is always the first element of a
* surrogate-pair.
* The low surrogate characters are encoded in the range U+DC00 -> U+DFFF.
* The low surrogate character is always the second element of a
* surrogate-pair.
* These code values are drawn from planes 1-16 of group 0 of UCS-4, that is,
* the range of UCS-4 code values 0x010000 - 0x10ffff. The last two planes
& (15 & 16) are reserved for private use.
*/
#define MAX_UCS4_VALUE 0x10ffff /* Max UCS-4 value for surrogates */
#define SURROGATE_MASK 0x03ff
#define HSURROGATE_PL 0xd800
#define LSURROGATE_PL 0xdc00
#define IS_HSURROGATE(ucs2) (((ucs2) &~SURROGATE_MASK) == HSURROGATE_PL )
#define IS_LSURROGATE(ucs2) (((ucs2) &~SURROGATE_MASK) == LSURROGATE_PL )
#define GET_LSURROGATE(ucs4) (((ucs4) & SURROGATE_MASK) | LSURROGATE_PL )
#define GET_HSURROGATE(ucs4) (((((ucs4)-0x10000) >> 10) & SURROGATE_MASK) \
| HSURROGATE_PL )
#define SURROGATES_TO_UCS4(high, low) ((((low) & SURROGATE_MASK) | \
(((high ) & SURROGATE_MASK) << 10)) + \
0x10000)
#define NEED_SURROGATE(ucs4) ((0x10000 <= (ucs4)) && ((ucs4) < 0x110000))
/*
* UTF-8 related macros
*
* UTF-8 is a variable length encoding of Unicode using 8-bit sequences,
* where the high bits indicate which part of the sequence a byte belongs to.
* The following table shows how the bits in a Unicode value (or surrogate
* pair) are distributed among the bytes in the UTF-8 encoding.
*
* Unicode value 1st byte 2nd byte 3rd byte 4th byte
* ------------- -------- -------- -------- --------
* 000000000gfedcba 0gfedcba
*
* 00000kjihgfedcba 110kjihg 10fedcba
*
* ponmlkjihgfedcba 1110ponm 10lkjihg 10fedcba
*
* 110110jihgfedcba 11110UTS 10RQponm 10lkjihg 10fedcba
* 110111tsrqponmlk
* where UTSRQ = tsrq + 1
*
* The following table shows the format of the first octet of a coded
* character; the free bits available for coding the character are
* indicated by an x. [Note 2]
*
* Octets Binary Bits Free Max. UCS-4
* 1st of 1 0xxxxxxx 7 0000 007F
* 1st of 2 110xxxxx 5 0000 07FF
* 1st of 3 1110xxxx 4 0000 FFFF
* 1st of 4 11110xxx 3 001F FFFF
* 1st of 5 111110xx 2 03FF FFFF
* 1st of 6 1111110x 1 7FFF FFFF
* 2nd .. nth 10xxxxxx 6
*/
#define MIN_2BYTE_UTF8 0x0000080
#define MIN_3BYTE_UTF8 0x0000800
#define MIN_4BYTE_UTF8 0x0010000
#define MIN_5BYTE_UTF8 0x0200000
#define MIN_6BYTE_UTF8 0x4000000
#define IS_1BYTE_UTF8(ch) ( (ch) < 0x80)
#define IS_2BYTE_UTF8(ch) (((ch) & 0xe0) == 0xc0)
#define IS_3BYTE_UTF8(ch) (((ch) & 0xf0) == 0xe0)
#define IS_4BYTE_UTF8(ch) (((ch) & 0xf8) == 0xf0)
#define IS_5BYTE_UTF8(ch) (((ch) & 0xfc) == 0xf8)
#define IS_6BYTE_UTF8(ch) (((ch) & 0xfe) == 0xfc)
#define IS_UCS_TO_1B_UTF8(ucs) ((ucs) < 0x80 ) /* <= 1 byte UTF-8 */
#define IS_UCS_TO_2B_UTF8(ucs) ((ucs) < 0x800 ) /* <= 2 bytes UTF-8 */
#define IS_UCS_TO_3B_UTF8(ucs) ((ucs) < 0x10000 ) /* <= 3 bytes UTF-8 */
#define IS_UCS_TO_4B_UTF8(ucs) ((ucs) < 0x200000 ) /* <= 4 bytes UTF-8 */
#define IS_UCS_TO_5B_UTF8(ucs) ((ucs) < 0x4000000 ) /* <= 5 bytes UTF-8 */
#define IS_UCS_TO_6B_UTF8(ucs) ((ucs) < 0x80000000L) /* <= 6 bytes UTF-8 */
#define RD_1BYTE_UTF8(in) (in)[0]
#define RD_2BYTE_UTF8(in) (((in)[1] & 0x3f) | (((in)[0] & 0x1f) << 6 ))
#define RD_3BYTE_UTF8(in) (((in)[2] & 0x3f) | (((in)[1] & 0x3f) << 6 ) \
| (((in)[0] & 0x0f) << 12))
#define RD_4BYTE_UTF8(in) (((in)[3] & 0x3f) | (((in)[2] & 0x3f) << 6 ) \
| (((in)[1] & 0x3f) << 12) \
| (((in)[0] & 0x07) << 18))
#define RD_5BYTE_UTF8(in) (((in)[4] & 0x3f) | (((in)[3] & 0x3f) << 6 ) \
| (((in)[2] & 0x3f) << 12) \
| (((in)[1] & 0x3f) << 18) \
| (((in)[0] & 0x03) << 24))
#define RD_6BYTE_UTF8(in) (((in)[5] & 0x3f) | (((in)[4] & 0x3f) << 6 ) \
| (((in)[3] & 0x3f) << 12) \
| (((in)[2] & 0x3f) << 18) \
| (((in)[1] & 0x3f) << 24) \
| (((in)[0] & 0x01) << 30))
#define WR_1BYTE_UTF8(out, ucs) { *(out)++ = (ucs) & 0x7f ; }
#define WR_2BYTE_UTF8(out, ucs) { *(out)++ = (((ucs) >> 6) & 0x1f) | 0xc0 ; \
*(out)++ = ( (ucs) & 0x3f) | 0x80 ; }
#define WR_3BYTE_UTF8(out, ucs) { *(out)++ = (((ucs) >> 12) & 0x0f) | 0xe0 ; \
*(out)++ = (((ucs) >> 6) & 0x3f) | 0x80 ; \
*(out)++ = ( (ucs) & 0x3f) | 0x80 ; }
#define WR_4BYTE_UTF8(out, ucs) { *(out)++ = (((ucs) >> 18) & 0x07) | 0xf0 ; \
*(out)++ = (((ucs) >> 12) & 0x3f) | 0x80 ; \
*(out)++ = (((ucs) >> 6) & 0x3f) | 0x80 ; \
*(out)++ = ( (ucs) & 0x3f) | 0x80 ; }
#define WR_5BYTE_UTF8(out, ucs) { *(out)++ = (((ucs) >> 24) & 0x03) | 0xf8 ; \
*(out)++ = (((ucs) >> 18) & 0x3f) | 0x80 ; \
*(out)++ = (((ucs) >> 12) & 0x3f) | 0x80 ; \
*(out)++ = (((ucs) >> 6) & 0x3f) | 0x80 ; \
*(out)++ = ( (ucs) & 0x3f) | 0x80 ; }
#define WR_6BYTE_UTF8(out, ucs) { *(out)++ = (((ucs) >> 30) & 0x01) | 0xfc ; \
*(out)++ = (((ucs) >> 24) & 0x3f) | 0x80 ; \
*(out)++ = (((ucs) >> 18) & 0x3f) | 0x80 ; \
*(out)++ = (((ucs) >> 12) & 0x3f) | 0x80 ; \
*(out)++ = (((ucs) >> 6) & 0x3f) | 0x80 ; \
*(out)++ = ( (ucs) & 0x3f) | 0x80 ; }
/*
* Check if the trailing bytes of a UTF-8 byte sequence is a valid one
* The trailing bytes are valid only if it is of the form 10xxxxxx.
* The macro will return ERR_INVALID_CHAR if an invalid byte is found.
*/
#define CHECK_UTF8(in,len) \
{ \
if (len > 1) { \
register uchar_t *ptr = in + 1 ; \
register int cnt = len - 1 ; \
while (cnt-- > 0) \
if ((*ptr++ & 0xc0) != 0x80) \
return(ERR_INVALID_CHAR) ; \
} \
}
#ifdef USING_OPEN_SOURCE_UTF7 /* JAC */
/*
* UTF-7 related macros and constants
*
* UTF-7 is a 7-bit form of UCS Transformation Format. UTF-7 depends on some
* definition of US-ASCII character subsets:
*
* Set D (directly encoded characters, derived from RFC 1521) consists of
* the following characters:
* A-Z, a-z, 0-9, "'(),-./:?"
*
* Set O (optional direct characters) consists of the following 20 characters:
* "!\"#$%&*;<=>@[]^_`{|}"
*
* Set B (Modified Base 64) is the set of characters in the Base64 alphabet
* defined in RFC 1521, excluding the pad character "=".
*
*/
#define SHIFT_IN '+'
#define SHIFT_OUT '-'
#define SETD_SIZE (sizeof(direct ) - 1)
#define SETO_SIZE (sizeof(optional) - 1)
#define SETS_SIZE (sizeof(spaces ) - 1)
#define SETB_SIZE (sizeof(base64 ) - 1)
#define READ_N_BITS(n) ((buffertemp = (table->bitbuffer >> (32-(n)))), \
(table->bitbuffer <<= (n)), \
(table->bits_in_buffer -= (n)), buffertemp)
#define WRITE_N_BITS(x,n) ((table->bitbuffer |= \
(((x) & ~(-1L << (n))) << (32-(n)-table->bits_in_buffer))),\
table->bits_in_buffer += (n))
static const char base64[] =
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/" ;
static const char direct[] =
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789'(),-./:?" ;
static const char optional[] =
"!\"#$%&*;<=>@[]^_`{|}" ;
static const char spaces[] =
" \011\015\012" ; /* Space, tab, return, line feed */
#endif /* USING_OPEN_SOURCE_UTF7 // JAC */
#ifdef USING_OPEN_SOURCE_UTF7 /* JAC */
/*
* UTF-7 conversion structure
*/
typedef struct utf7_conv
{
char mustshift[128] ;
int invbase64[128] ;
int shifted ; /* Set if in shifted state */
int first ; /* Set for first character after SHIFT_IN */
int wroteone ; /* Set if any least one UCS written */
int bits_in_buffer ; /* Number of valid bits in buffer */
int bitbuffer ; /* Buffer for base64 bits */
ucs4_t high_surrogate ; /* Contain high surrogate */
ucs4_t last_ucs4 ; /* Record last converted UCS-4 character */
int last_cnvlen ; /* Record last conversion length */
} utf7_conv_t ;
/*
* UTF-7 conversion table initialization routine
*/
static void
utf7_convtable_init(_LC_fcconv_iconv_t *cd)
{
int idx ;
utf7_conv_t *table ;
if (cd->flags & CONV_UTF7_CONVTABLE)
return ;
#ifdef _KERNEL
MALLOC(table, utf7_conv_t *, sizeof(utf7_conv_t),
M_KERN, M_WAITOK) ;
#else
table = (utf7_conv_t *)malloc(sizeof(utf7_conv_t)) ;
#endif
if (table == NULL) {
#ifndef _KERNEL
perror("Memory allocation fails") ;
exit (1) ;
#else
return ;
#endif
}
bzero ((char *)table, sizeof(utf7_conv_t)) ;
#ifndef _KERNEL
memset(table->mustshift, 1, sizeof(table->mustshift)) ;
memset(table->invbase64, -1, sizeof(table->invbase64)) ;
#endif
for (idx = 0 ; idx < SETD_SIZE ; idx++)
table->mustshift[direct[idx]] = 0 ;
for (idx = 0 ; idx < SETS_SIZE ; idx++)
table->mustshift[spaces[idx]] = 0 ;
if (cd->flags & CONV_UTF7_OPTIONAL)
for (idx = 0 ; idx < SETO_SIZE ; idx++)
table->mustshift[optional[idx]] = 0 ;
for (idx = 0 ; idx < SETB_SIZE ; idx++)
table->invbase64[base64[idx]] = idx ;
cd->convtable = (caddr_t) table ;
cd->flags |= CONV_UTF7_CONVTABLE ;
}
#endif /* USING_OPEN_SOURCE_UTF7 // JAC */
/************************************************************************/
/* */
/* UCS-2/UCS-4/UTF-7/UTF-8 input routines */
/* Return: The UCS-4 character or an error code */
/* The in string reference is updated */
/* */
/************************************************************************/
//LCOV_EXCL_START :cnu -- As of 8/30/2011, not used on SQ platform, but may be used on Clients
int __input_ucs4(_LC_fcconv_iconv_t *cd, uchar_t **in, int len)
{
ucs4_t *inptr = (ucs4_t *)*in ;
int first = !(cd->flags & CONV_INPUT_PROCESSED) ;
WChar_t word ; /*JAC */
do {
if (len < sizeof(ucs4_t))
return(ERR_INPUT_INCOMPLETE) ;
word = *inptr++ ;
len -= sizeof(ucs4_t) ;
cd->flags |= CONV_INPUT_PROCESSED ;
if (word == UCS4_BOM) { /* Skip over initial byte order mark */
if (first) {
cd->flags &= ~CONV_REVERSE_INBYTE ;
continue ;
}
}
if (word == UCS4_BOM_REVERSE) {
if (first) {
cd->flags |= CONV_REVERSE_INBYTE ;
continue ;
}
}
break ;
} while (1) ;
if (cd->flags & CONV_REVERSE_INBYTE)
word = REVERSE_UCS4_BYTE(word) ;
*in = (uchar_t *)inptr ;
return((int)word) ;
}
//LCOV_EXCL_STOP
/*#endif /* USING_OPEN_SOURCE_input_ucs4 // JAC */
int __input_ucs2(_LC_fcconv_iconv_t *cd, uchar_t **in, int len)
{
ucs2_t *inptr = (ucs2_t *)*in ;
int first = !(cd->flags & CONV_INPUT_PROCESSED) ;
WChar_t word ; /*JAC */
do {
if (len < sizeof(ucs2_t))
return(ERR_INPUT_INCOMPLETE) ;
word = *inptr++ ;
len -= sizeof(ucs2_t) ;
cd->flags |= CONV_INPUT_PROCESSED ;
if (word == UCS2_BOM) { /* Skip over initial byte order mark */
if (first) {
cd->flags &= ~CONV_REVERSE_INBYTE ;
continue ;
}
}
if (word == UCS2_BOM_REVERSE) {
if (first) {
cd->flags |= CONV_REVERSE_INBYTE ;
continue ;
}
}
break ;
} while (1) ;
if (cd->flags & CONV_REVERSE_INBYTE)
word = REVERSE_UCS2_BYTE(word) ;
/*
* convert surrogates pair (<high-surrogate> <low-surrogate>) to UCS-4
*/
if (IS_LSURROGATE(word)) /* Low surrogate without high surrogate */
return(ERR_INVALID_CHAR) ;
else if (IS_HSURROGATE(word)) {
WChar_t low_surrogate ; /*JAC */
if (len < sizeof(ucs2_t))
return(ERR_INPUT_INCOMPLETE) ; /* Not enough input */
low_surrogate = *inptr++ ;
if (cd->flags & CONV_REVERSE_INBYTE)
low_surrogate = REVERSE_UCS2_BYTE(low_surrogate) ;
if (!IS_LSURROGATE(low_surrogate))
return(ERR_INVALID_CHAR) ;
word = SURROGATES_TO_UCS4(word, low_surrogate) ;
}
*in = (uchar_t *)inptr ;
return((int)word) ;
}
int __input_utf8(_LC_fcconv_iconv_t *cd, uchar_t **in, int len)
{
int first = !(cd->flags & CONV_INPUT_PROCESSED) ;
uchar_t *inptr = *in ;
int char_size ;
WChar_t word ; /*JAC */
word = *inptr ;
if (IS_1BYTE_UTF8(word))
char_size = 1 ;
else if (IS_2BYTE_UTF8(word))
char_size = 2 ;
else if (IS_3BYTE_UTF8(word))
char_size = 3 ;
else if (IS_4BYTE_UTF8(word))
char_size = 4 ;
//LCOV_EXCL_START : cnu - We don't claim support for 5 or 6 byte long UTF8 chars yet.
else if (IS_5BYTE_UTF8(word))
char_size = 5 ;
else if (IS_6BYTE_UTF8(word))
char_size = 6 ;
//LCOV_EXCL_STOP
else
return(ERR_INVALID_CHAR) ;
if (len < char_size)
return(ERR_INPUT_INCOMPLETE) ;
CHECK_UTF8(inptr, char_size) ;
switch (char_size)
{
/*
* Over-long UTF-8 sequences are rejected for better security
*/
case 1: word = RD_1BYTE_UTF8(inptr) ; break ;
case 2: word = RD_2BYTE_UTF8(inptr) ;
if (word < MIN_2BYTE_UTF8)
return(ERR_INVALID_CHAR);
break ;
case 3: word = RD_3BYTE_UTF8(inptr) ;
if (word < MIN_3BYTE_UTF8)
return(ERR_INVALID_CHAR);
break ;
case 4: word = RD_4BYTE_UTF8(inptr) ;
if (word < MIN_4BYTE_UTF8)
return(ERR_INVALID_CHAR);
break ;
//LCOV_EXCL_START : cnu - We don't claim support for 5 or 6 byte long UTF8 chars yet.
case 5: word = RD_5BYTE_UTF8(inptr) ;
if (word < MIN_5BYTE_UTF8)
return(ERR_INVALID_CHAR);
break ;
case 6: word = RD_6BYTE_UTF8(inptr) ;
if (word < MIN_6BYTE_UTF8)
return(ERR_INVALID_CHAR);
break ;
//LCOV_EXCL_STOP
}
inptr += char_size ;
/*
* Properly handle non-BMP characters formed by UTF-8 surrogate pairs
*/
if ((char_size == 3) && (IS_HSURROGATE(word)))
{
WChar_t low_surrogate ; /*JAC */
if (len == 3)
return(ERR_INPUT_INCOMPLETE) ;
if (!IS_3BYTE_UTF8(*inptr))
return(ERR_INVALID_CHAR) ;
low_surrogate = RD_3BYTE_UTF8(inptr) ;
if (!IS_LSURROGATE(low_surrogate))
return(ERR_INVALID_CHAR) ;
word = SURROGATES_TO_UCS4(word, low_surrogate) ;
}
cd->flags |= CONV_INPUT_PROCESSED ;
*in = inptr ;
/*
* Skip BOM if at the beginning of the file
*/
if (first && (word == UCS4_BOM))
return(__input_utf8(cd, in, len - char_size)) ;
return((int)word) ;
}
/*
* Due to the complex nature of UTF-7 to UCS conversion, it is done on a
* character-by-character basis.
*/
#ifdef USING_OPEN_SOURCE_UTF7 /* JAC */
int __input_utf7(_LC_fcconv_iconv_t *cd, uchar_t **in, int len)
{
uchar_t *inptr = *in ;
utf7_conv_t *table ;
ucs4_t ch, word ;
int base64value ;
int buffertemp ;
if (!(cd->flags & CONV_UTF7_CONVTABLE))
utf7_convtable_init(cd) ;
table = (utf7_conv_t *)cd->convtable ;
/*
* If inptr is NULL, it indicates the end of conversion and
* hence it is necessary to check the current states of conversion
* for error, e.g. partially converted character.
*/
if ((inptr == NULL) || (len <= 0)) {
return(ERR_NOINPUT) ;
}
while (1)
{
/*
* Incomplete input buffer sequence
*/
if (len < 1)
return(ERR_INPUT_INCOMPLETE) ;
ch = *inptr++ ;
len-- ;
if (ch > 0x7f)
return(ERR_INPUT_INCOMPLETE) ;
if (table->shifted) {
base64value = table->invbase64[ch] ;
if (base64value < 0) {
table->shifted = 0 ;
/*
* If SHIFT_IN is immediately followed by SHIFT_OUT,
* this is a special case for the SHIFT_OUT character.
*/
if (table->first && (ch == SHIFT_OUT)) {
word = SHIFT_IN ;
table->first = 0 ;
break ;
}
if (!table->wroteone ||
(READ_N_BITS(table->bits_in_buffer) != 0))
return(ERR_INVALID_CHAR) ;
if (ch == SHIFT_OUT)
continue ; /* The SHIFT_OUT character is discarded */
/* Fall through to the non-shifted state */
}
else {
/* Add another 6 bits of base64 value to the buffer */
WRITE_N_BITS(base64value, 6) ;
table->first = 0 ;
if (table->bits_in_buffer >= 16) {
word = READ_N_BITS(16) ;
table->wroteone = 1 ;
if (table->high_surrogate) {
if (!IS_LSURROGATE(word))
return(ERR_INVALID_CHAR) ;
word = SURROGATES_TO_UCS4(table->high_surrogate, word) ;
table->high_surrogate = 0 ;
}
else if (IS_HSURROGATE(word)) {
table->high_surrogate = word ;
continue ;
}
break ;
}
continue ; /* Read next byte */
}
}
if (ch == SHIFT_IN) {
table->first = 1 ;
table->shifted = 1 ;
table->wroteone = 0 ;
table->bits_in_buffer = 0 ;
continue ; /* Read next byte */
}
/* It must be a directly encoded character */
word = ch ;
break ;
}
*in = (uchar_t *)inptr ;
return((int)word) ;
}
#endif /* USING_OPEN_SOURCE_UTF7 // JAC */
/************************************************************************/
/* */
/* UCS-2/UCS-4/UTF-7/UTF-8 output routines */
/* Return: Number of bytes written to output buffer */
/* */
/************************************************************************/
//LCOV_EXCL_START :cnu -- As of 8/30/2011, not used on SQ platform, but may be used on Clients
int __output_ucs4(_LC_fcconv_iconv_t *cd, uchar_t *out, int len, ucs4_t word)
{
ucs4_t *outptr = (ucs4_t *)out ;
/*
* For the special case that len == sizeof(ucs4_t)
* and BOM has not been sent out, don't send out the BOM to avoid
* sending out E2BIG error unnecessarily.
*/
if (!(cd->flags & CONV_BOM_WRITTEN)) {
if (len == sizeof(ucs4_t))
cd->flags |= CONV_BOM_WRITTEN ; /* Don't send BOM */
else if (len > sizeof(ucs4_t)) {
*outptr++ = (cd->flags & CONV_REVERSE_OUTBYTE)
? UCS4_BOM_REVERSE : UCS4_BOM ;
len -= sizeof(ucs4_t) ;
cd->flags |= CONV_BOM_WRITTEN ;
}
}
if (len < sizeof(ucs4_t))
return(ERR_BUFFER_OVERRUN) ;
if (cd->flags & CONV_REVERSE_OUTBYTE)
word = REVERSE_UCS4_BYTE(word) ;
*outptr++ = word ;
return((uchar_t *)outptr - out) ;
}
//LCOV_EXCL_STOP
/*#endif /* USING_OPEN_SOURCE_output_ucs4 // JAC */
int __output_ucs2(_LC_fcconv_iconv_t *cd, uchar_t *out, int len, ucs4_t word)
{
ucs2_t *outptr = (ucs2_t *)out ;
/*
* For the special case that len = sizeof(ucs2_t)
* and BOM has not been sent out, don't send out the BOM to avoid
* sending out E2BIG error unnecessarily.
*/
if (!(cd->flags & CONV_BOM_WRITTEN)) {
if (len == sizeof(ucs2_t))
cd->flags |= CONV_BOM_WRITTEN ; /* Don't send BOM */
else if (len > sizeof(ucs2_t)) {
*outptr++ = (cd->flags & CONV_REVERSE_OUTBYTE)
? UCS2_BOM_REVERSE : UCS2_BOM ;
len -= sizeof(ucs2_t) ;
cd->flags |= CONV_BOM_WRITTEN ;
}
}
/*
* Not enough output buffer space?
*/
if (len < sizeof(ucs2_t))
return(ERR_BUFFER_OVERRUN) ;
if (word > MAX_UCS4_VALUE)
return(ERR_INVALID_CHAR) ;
if (NEED_SURROGATE(word)) {
ucs4_t high_surrogate ;
if (len < 2 * sizeof(ucs2_t))
return(ERR_BUFFER_OVERRUN) ;
high_surrogate = GET_HSURROGATE(word) ;
word = GET_LSURROGATE(word) ;
if (cd->flags & CONV_REVERSE_OUTBYTE)
high_surrogate = REVERSE_UCS2_BYTE(high_surrogate) ;
*outptr++ = high_surrogate ;
}
if (cd->flags & CONV_REVERSE_OUTBYTE)
word = REVERSE_UCS2_BYTE(word) ;
*outptr++ = word ;
return((uchar_t *)outptr - out) ;
}
int __output_utf8(_LC_fcconv_iconv_t *cd, uchar_t *out, int len, ucs4_t word)
{
uchar_t *outptr = out ;
int char_size ;
if (IS_UCS_TO_1B_UTF8(word))
char_size = 1 ;
else if (IS_UCS_TO_2B_UTF8(word))
char_size = 2 ;
else if (IS_UCS_TO_3B_UTF8(word))
char_size = 3 ;
else if (IS_UCS_TO_4B_UTF8(word))
char_size = 4 ;
//LCOV_EXCL_START : cnu - We don't claim support for 5 or 6 byte long UTF8 chars yet.
else if (IS_UCS_TO_5B_UTF8(word))
char_size = 5 ;
else if (IS_UCS_TO_6B_UTF8(word))
char_size = 6 ;
//LCOV_EXCL_STOP
else
return(ERR_INVALID_CHAR) ;
/*
* Not enough output buffer space?
*/
if (len < char_size)
return(ERR_BUFFER_OVERRUN) ;
switch (char_size)
{
case 1: WR_1BYTE_UTF8(outptr, word) ; break ;
case 2: WR_2BYTE_UTF8(outptr, word) ; break ;
case 3: WR_3BYTE_UTF8(outptr, word) ; break ;
case 4: WR_4BYTE_UTF8(outptr, word) ; break ;
//LCOV_EXCL_START : cnu - We don't claim support for 5 or 6 byte long UTF8 chars yet.
case 5: WR_5BYTE_UTF8(outptr, word) ; break ;
case 6: WR_6BYTE_UTF8(outptr, word) ; break ;
//LCOV_EXCL_STOP
}
return(char_size) ;
}
#ifdef USING_OPEN_SOURCE_output_utf7 /* JAC */
int __output_utf7(_LC_fcconv_iconv_t *cd, uchar_t *out, int len, ucs4_t word)
{
uchar_t *outptr = out ;
utf7_conv_t *table ;
uchar_t *orig_outptr ;
int buffertemp ;
int needshift ;
int char_size ;
if (!(cd->flags & CONV_UTF7_CONVTABLE))
utf7_convtable_init(cd) ;
table = (utf7_conv_t *)cd->convtable ;
orig_outptr = outptr ;
needshift = (word > 0x7f) || table->mustshift[word] ;
/*
* Estimate required size of output buffer
*/
if (!needshift)
char_size = table->shifted ? 2 : 1 ;
else if (word < 0x10000)
char_size = table->shifted ? 3 : 4 ;
else if (NEED_SURROGATE(word))
char_size = table->shifted ? 6 : 7 ;
else
return(ERR_INVALID_CHAR) ;
/*
* Not enough output buffer space?
*/
if (len < char_size)
return(ERR_BUFFER_OVERRUN) ;
/*
* Write output bytes
*/
if (needshift && !table->shifted) {
*outptr++ = SHIFT_IN ;
/*
* Check for the special case of SHIFT_IN character
*/
if (word == SHIFT_IN) {
*outptr++ = SHIFT_OUT ;
return(outptr - out) ;
}
else
table->shifted = 1 ;
}
if (table->shifted) {
/*
* Either write the character to the bit buffer,
* or pad the bit buffer out to a full base64 character.
*/
if (needshift) {
if (NEED_SURROGATE(word)) {
register WChar_t hiword ; /*JAC */
hiword = GET_HSURROGATE(word) ;
word = GET_LSURROGATE(word) ;
WRITE_N_BITS(hiword, 16) ;
while (table->bits_in_buffer > 6)
*outptr++ = base64[READ_N_BITS(6)] ;
}
WRITE_N_BITS(word, 16) ;
}
else
WRITE_N_BITS(0, (6 - (table->bits_in_buffer%6)) % 6) ;
/*
* Flush out as many full base64 characters as possible from the
* bit buffer.
*/
while (table->bits_in_buffer > 6)
*outptr++ = base64[READ_N_BITS(6)] ;
if (!needshift) {
/*
* Write the explicit shift out character if
* 1) The caller has requested we always do it, or
* 2) The directly encoded character is in the base64 set.
*/
if ((cd->flags & CONV_UTF7_VERBOSE ) ||
(table->invbase64[word] >= 0))
*outptr++ = SHIFT_OUT ;
table->shifted = 0 ;
}
}
/*
* The character can be directly encoded as ASCII
*/
if (!needshift) {
/*
* Check buffer space again as the byte size estimate
* may not be correct.
*/
if (len < 1)
return(ERR_BUFFER_OVERRUN) ;
*outptr++ = word ;
}
return(outptr - out) ;
}
#endif /* USING_OPEN_SOURCE_output_utf7 // JAC */