blob: 6823eab2b26f09ac788a0f4642960ef8b48b77cc [file] [log] [blame]
/***************************************************************************
*
* wcodecvt.cpp - definition of codecvt<wchar_t, char, mbstate_t> members
*
* $Id$
*
***************************************************************************
*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed
* with this work for additional information regarding copyright
* ownership. The ASF licenses this file to you under the Apache
* License, Version 2.0 (the "License"); you may not use this file
* except in compliance with the License. You may obtain a copy of
* the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied. See the License for the specific language governing
* permissions and limitations under the License.
*
* Copyright 2001-2006 Rogue Wave Software.
*
**************************************************************************/
#define _RWSTD_LIB_SRC
#include <rw/_defs.h>
// working around a Compaq C++ bug (see PR #26778)
#if __DECCXX_VER >= 60300000 && __DECCXX_VER < 60400000
# include <stdarg.h>
_USING (std::va_list);
// override autoconfigured macro whose value is incorrect
// if <unistd.h> is #included before <iconv.h>
# include <unistd.h>
# define _RWSTD_NO_ICONV_CONST_CHAR
#endif // Compaq C++ 6.3
#include <wchar.h>
#include <limits.h>
#include <locale.h>
#include <stdlib.h> // for MB_CUR_MAX, mblen(), mbtowc()
#include <string.h> // for memcmp()
#include <errno.h>
#if defined (__SUNPRO_CC) && __SUNPRO_CC <= 0x540 \
&& (defined (__SunOS_5_8) || defined (__SunOS_5_9))
// working around SunOS/SunPro header dependencies (see PR #26255)
# undef _TIME_T
#endif // SunPro <= 5.4 && SunOS 5.{8,9}
#include <loc/_codecvt.h>
#include <loc/_locale.h>
#include <loc/_localedef.h>
#include <rw/_traits.h> // for _RWSTD_WCSLEN()
#include "iso2022.h"
#include "setlocale.h"
#include "use_facet.h"
// declare mbrlen() if it's not declared in the system headers
// but is known to be defined in the libc binary
#if defined (_RWSTD_NO_MBRLEN) && !defined (_RWSTD_NO_MBRLEN_IN_LIBC)
# undef _RWSTD_NO_MBRLEN
extern "C" _RWSTD_SIZE_T
mbrlen (const char*, _RWSTD_SIZE_T, _RWSTD_MBSTATE_T*) _LIBC_THROWS();
#endif // _RWSTD_NO_MBRLEN && !_RWSTD_NO_MBRLEN_IN_LIBC
// declare mblen() if it's not declared in the system headers
// but is known to be defined in the libc binary
#if defined (_RWSTD_NO_MBLEN) && !defined (_RWSTD_NO_MBLEN_IN_LIBC)
# undef _RWSTD_NO_MBLEN
extern "C" _RWSTD_SIZE_T
mblen (const char*, _RWSTD_SIZE_T) _LIBC_THROWS();
#endif // _RWSTD_NO_MBLEN && !_RWSTD_NO_MBLEN_IN_LIBC
// declare mbtowc() if it's not declared in the system headers
// but is known to be defined in the libc binary
#if defined (_RWSTD_NO_MBTOWC) && !defined (_RWSTD_NO_MBTOWC_IN_LIBC)
# undef _RWSTD_NO_MBTOWC
extern "C" int
mbtowc (wchar_t*, const char*, _RWSTD_SIZE_T) _LIBC_THROWS();
#endif // _RWSTD_NO_MBTOWC && !_RWSTD_NO_MBTOWC_IN_LIBC
// declare wcsrtombs() if it's not declared in the system headers
// but is known to be defined in the libc binary
#if defined (_RWSTD_NO_WCSRTOMBS) && !defined (_RWSTD_NO_WCSRTOMBS_IN_LIBC)
# undef _RWSTD_NO_WCSRTOMBS
extern "C" _RWSTD_SIZE_T
wcsrtombs (char*, const wchar_t**,
_RWSTD_SIZE_T, _RWSTD_MBSTATE_T*) _LIBC_THROWS();
#endif // _RWSTD_NO_WCSRTOMBS && !_RWSTD_NO_WCSRTOMBS_IN_LIBC
// declare wcrtomb() if it's not declared in the system headers
// but is known to be defined in the libc binary
#if defined (_RWSTD_NO_WCRTOMB) && !defined (_RWSTD_NO_WCRTOMB_IN_LIBC)
# undef _RWSTD_NO_WCRTOMB
extern "C" _RWSTD_SIZE_T
wcrtomb (char*, wchar_t, _RWSTD_MBSTATE_T*) _LIBC_THROWS();
#endif // _RWSTD_NO_WCRTOMB && !_RWSTD_NO_WCRTOMB_IN_LIBC
// declare wctomb() if it's not declared in the system headers
// but is known to be defined in the libc binary
#if defined (_RWSTD_NO_WCTOMB) && !defined (_RWSTD_NO_WCTOMB_IN_LIBC)
# undef _RWSTD_NO_WCTOMB
extern "C" {
_RWSTD_DLLIMPORT int
wctomb (char*, wchar_t) _LIBC_THROWS();
} // extern "C"
#endif // _RWSTD_NO_WCTOMB && !_RWSTD_NO_WCTOMB_IN_LIBC
// utf8 encoding maximum size
#undef _UTF8_MB_CUR_MAX
#define _UTF8_MB_CUR_MAX 6
enum {
// mask to retrieve ISO-2022 encoding type
__rw_2022_mask = 0x000f,
// UCS-4 or UCS-2 (wchar_t width) encoding type
__rw_ucs_be = 0x000010, // big-endian word and byte order
__rw_ucs_le = 0x000040, // little-endian word and byte order
__rw_ucs = 0x0000f0, // platform-endianness
// UCS-4 encoding type
__rw_ucs4_be = 0x000100, // big-endian word and byte order
__rw_ucs4_be_le = 0x000200, // big-endian word, little-endian byte order
__rw_ucs4_le = 0x000400, // little-endian word and byte order
__rw_ucs4_le_be = 0x000800, // little-endian word, big-endian byte order
__rw_ucs4 = 0x000f00, // platform-endianness
// UCS-2 encoding type
__rw_ucs2_be = 0x001000, // big-endian word and byte order
__rw_ucs2_le = 0x004000, // little-endian word and byte order
__rw_ucs2 = 0x00f000, // platform-endianness
// UTF encoding type
__rw_utf8 = 0x010000, // UTF-8
__rw_utf16_be = 0x020000, // UTF-16, big-endian byte order
__rw_utf16_le = 0x040000, // UTF-16, little-endian byte order
__rw_utf16 = 0x060000, // UTF-16, platform endianness
__rw_strict = 0x100000, // do strict UCS validation
__rw_use_libc = 0x200000, // always use libc locale
// mask to retrieve UCS encoding type
__rw_ucs_mask = __rw_ucs_be | __rw_ucs_le | __rw_ucs
| __rw_ucs4_be | __rw_ucs4_le
| __rw_ucs4_be_le | __rw_ucs4_le_be | __rw_ucs4
| __rw_ucs2_be | __rw_ucs2_le | __rw_ucs2,
// mask to retrieve UTF encoding type
__rw_utf_mask = __rw_utf8 | __rw_utf16_be | __rw_utf16_le
};
#define ISO2022_TYPE(x) ((x) & __rw_2022_mask)
// helper macros to retrieve the type of an UCS and UTF encoding
#define UCS_TYPE(x) ((x) & __rw_ucs_mask)
#define UTF_TYPE(x) ((x) & __rw_utf_mask)
#define IS_UCS(x) (UCS_TYPE (x) == __rw_ucs)
#define IS_UCS_BE(x) (UCS_TYPE (x) == __rw_ucs_be)
#define IS_UCS_LE(x) (UCS_TYPE (x) == __rw_ucs_le)
#define IS_UCS_BE_LE(x) (UCS_TYPE (x) == __rw_ucs_be_le)
#define IS_UCS_LE_BE(x) (UCS_TYPE (x) == __rw_ucs_le_be)
#define IS_UTF8(x) (UCS_TYPE (x) == __rw_utf8)
#define IS_UTF16(x) (UCS_TYPE (x) == __rw_utf16)
#define IS_UTF16_BE(x) (UCS_TYPE (x) == __rw_utf16_be)
#define IS_UTF16_LE(x) (UCS_TYPE (x) == __rw_utf16_le)
// evaluates to 1 if the facet should use the libc locale, 0 otherwise
#define USE_LIBC(impl, x) \
((!(impl) || ((x) & __rw_use_libc)) && !(UCS_TYPE (x) || UTF_TYPE (x)))
// _RWSTD_MBSTATE_T macro might expand to char* (on AIX)
typedef _RWSTD_MBSTATE_T StateT;
_RWSTD_NAMESPACE (__rw) {
static const struct {
const char *mod;
int flags;
} __rw_ucsmods[] = {
{ "UCS", __rw_ucs },
{ "UCS-4", __rw_ucs4 },
{ "UCS-2", __rw_ucs2 },
{ "UCS-4-BE", __rw_ucs4_be },
{ "UCS-4-LE", __rw_ucs4_le },
{ "UCS-2-BE", __rw_ucs2_be },
{ "UCS-2-LE", __rw_ucs2_le },
{ "UCS-4-BE-LE", __rw_ucs4_be_le },
{ "UCS-4-LE-BE", __rw_ucs4_le_be },
{ "UCS-BE", __rw_ucs_be },
{ "UCS-LE", __rw_ucs_le }
};
static const _RWSTD_SIZE_T
__rw_n_ucsmods = sizeof __rw_ucsmods / sizeof *__rw_ucsmods;
static inline int
__rw_mbsinit (const StateT *psrc)
{
#ifndef _RWSTD_NO_MBSINIT
return mbsinit (psrc);
#else // if defined (_RWSTD_NO_MBSINIT)
// commented out to work around an HP aCC 1.21 bug
/* static */ const StateT state = StateT ();
return !psrc || 0 == memcmp (psrc, &state, sizeof state);
#endif // _RWSTD_NO_MBSINIT
}
#undef min
#define min(a, b) ((a) < (b) ? (a) : (b))
#undef max
#define max(a,b) ((a) > (b)) ? (a) : (b)
#ifndef _RWSTD_NO_WCHAR_T
// behaves just like mbrlen(), except that if the character pointed to
// by `str' is the NUL character and `emax' is non-zero, the function
// returns 1
static inline _RWSTD_SIZE_T
__rw_libc_mbrlen (_RWSTD_MBSTATE_T &state,
const char *str,
_RWSTD_SIZE_T emax)
{
_RWSTD_ASSERT (0 != str);
if ('\0' == *str)
return 0 < emax;
#ifndef _RWSTD_NO_MBRLEN
// `emax' is the maximum number of elements of type char in `str'
return mbrlen (str, emax, &state);
#elif !defined (_RWSTD_NO_MBLEN)
_RWSTD_UNUSED (state);
return mblen (str, emax);
#else // if defined (_RWSTD_NO_MBLEN)
_RWSTD_UNUSED (state);
// this is bogus but it's the best we can do given the absence
// of libc support for this functionality (more likely than not,
// this code will never end up getting executed because we'll
// never get this far)
const char *next = str;
for (; *next && emax; ++next, --emax);
return next - str;
#endif // _RWSTD_NO_MBRLEN
}
// does a simple transliteration of the UTF-8 encoded character string
static unsigned int
__rw_xlit (const _RW::__rw_codecvt_t* impl,
const char *utf8s, _RWSTD_SIZE_T sz)
{
const unsigned int* const ptbls = impl->get_xliteration_tab ();
unsigned int index = 0;
for (const unsigned int* ptbl = ptbls; sz; --sz, ++utf8s) {
typedef unsigned char UChar;
index = ptbl [UChar (*utf8s)];
if (_RWSTD_UINT_MAX == index)
// transliteration not provided for this character
return 0;
if (index & 0x80000000) {
ptbl = ptbls + 256 * index;
}
else {
// utf-8 string representation should end here
_RWSTD_ASSERT (0 == sz);
}
}
return index & 0x80000000 ? 0 : index;
}
// This returns two result codes: error and ok. The partial error result
// is not returned because there is no way to know whether or not the
// input sequence contains any more valid characters.
static _STD::codecvt_base::result
__rw_libc_do_in (_RWSTD_MBSTATE_T &state,
const char *from,
const char *from_end,
const char* &from_next,
wchar_t *to,
wchar_t *to_limit,
wchar_t* &to_next)
{
_RWSTD_ASSERT (from <= from_end);
_RWSTD_ASSERT (to <= to_limit);
_STD::codecvt_base::result res = _STD::codecvt_base::ok;
_RWSTD_MBSTATE_T save_state = state; // saved state before conversion
_RWSTD_SIZE_T src_len = from_end - from; // source length
_RWSTD_SIZE_T dst_len = to_limit - to; // destination length
const char* psrc = from_next ? from_next : ""; // source
wchar_t* pdst = to_next; // destination
#ifndef _RWSTD_NO_MBSRTOWCS
// mbsrtowcs() requires the input to be a NULL-terminated string
const _RWSTD_SIZE_T ret = mbsrtowcs (pdst, &psrc, dst_len, &state);
#else // if defined (_RWSTD_NO_MBSRTOWCS)
const _RWSTD_SIZE_T ret = _RWSTD_SIZE_MAX;
#endif // _RWSTD_NO_MBSRTOWCS
// if an error occurred during the restartable function
// or if that function is not available
if (_RWSTD_SIZE_MAX == ret) {
// the conversion here (besides the previous failure) is done
// one character at a time because the non-reentrant/restartable
// counterpart of mbsrtowcs() does not provide any information
// about the size of the input that has been processed.
_RWSTD_UNUSED (state);
// restore `psrc' value
psrc = from_next ? from_next : "";
while (dst_len && src_len) {
_RWSTD_SIZE_T tmp;
#ifndef _RWSTD_NO_MBRTOWC
tmp = mbrtowc (pdst, psrc, src_len, &state);
#elif !defined (_RWSTD_NO_MBTOWC)
tmp = mbtowc (pdst, psrc, src_len);
#else
tmp = _RWSTD_SIZE_MAX;
#endif
// error; -1 result comes only from an illegal sequence
if (_RWSTD_SIZE_MAX == tmp) {
res = _STD::codecvt_base::error;
break;
}
// not enough bytes in input to form a valid
// character - translates to an ok result
if (tmp == (_RWSTD_SIZE_T)(-2))
break;
// the multibyte sequence is the NULL character
if (tmp == 0)
tmp++;
// adjust the pointers
psrc += tmp;
src_len -= tmp;
++pdst;
--dst_len;
}
// adjust "next" pointers
from_next = psrc;
to_next = pdst;
}
else {
// the conversion succeeded on the first attempt
if (psrc)
from_next = psrc;
else {
// mbsrtowcs() sets `psrc' to 0 if the conversions
// stops due to the terminating NUL character
const _RWSTD_SIZE_T tmp =
__rw_libc_mbrlen (save_state, from_next, ret);
from_next += tmp;
}
to_next += ret;
}
// if the conversion has exhausted all space in the destination
// range AND there are more COMPLETE characters in the source
// range then we have a "partial" conversion
if (res == _STD::codecvt_base::ok && src_len && !dst_len) {
_RWSTD_MBSTATE_T tmp_state = state;
_RWSTD_SIZE_T tmp = __rw_libc_mbrlen (tmp_state, psrc, src_len);
if (tmp < (_RWSTD_SIZE_T)(-2))
res = _STD::codecvt_base::partial;
}
return res;
}
static _STD::codecvt_base::result
__rw_libc_do_out (_RWSTD_MBSTATE_T &state,
const wchar_t *from,
const wchar_t *from_end,
const wchar_t* &from_next,
char *to,
char *to_limit,
char* &to_next)
{
_RWSTD_ASSERT (from <= from_end);
_RWSTD_ASSERT (to <= to_limit);
// verify that both ranges are valid
_RWSTD_ASSERT (from && from_end || !from && !from_end);
_RWSTD_ASSERT (to && to_limit || !to && !to_limit);
// set the (possibly uninitialized) next pointers
// to point to the beginning of each sequence
from_next = from;
to_next = to;
// save the value of MB_CUR_MAX and avoid repeatedly using
// the macro for efficiency (it may expand to a function call)
const _RWSTD_SIZE_T mb_cur_max =
_RWSTD_STATIC_CAST (_RWSTD_SIZE_T, MB_CUR_MAX);
// the result of conversion
_STD::codecvt_base::result res = _STD::codecvt_base::ok;
// the size of the available space in the destination range
_RWSTD_SIZE_T dst_free =
_RWSTD_STATIC_CAST (_RWSTD_SIZE_T, to_limit - to_next);
// small temporary buffer used when the space in the destination
// buffer is less than MB_CUR_MAX
char buff [_RWSTD_MB_LEN_MAX];
// convert the source sequence one character at a time
for ( ; from_next < from_end; ++from_next) {
// since there is no guarantee that the converted internal
// character would fit in the remaining space in the
// destination buffer, use the small local buffer if
// the remaining space is less that the longest external
// multibyte character (i.e., MB_CUR_MAX)
char* const tmpdst = dst_free < mb_cur_max ? buff : to_next;
// save the state in case it's changed but the converted
// character doesn't fit in the destination buffer
const _RWSTD_MBSTATE_T save_state = state;
// the number of bytes in the resulting multibyte character
// sequence, not including the terminating NUL
_RWSTD_SIZE_T dst_len = 0;
#ifndef _RWSTD_NO_WCRTOMB
// convert the next source character (note that it would be
// unsafe to use wcsrtombs() since the source sequence may
// not be NUL terminated)
dst_len = wcrtomb (tmpdst, *from_next, &state);
#elif !defined (_RWSTD_NO_WCTOMB)
_RWSTD_UNUSED (state);
dst_len = wctomb (tmpdst, *from_next);
#else // error
_RWSTD_UNUSED (state);
dst_len = _RWSTD_SIZE_MAX;
#endif // _RWSTD_NO_WCRTOMB, _RWSTD_NO_WCTOMB
// -1 is returned as an indication of an illegal sequence
if (_RWSTD_SIZE_MAX == dst_len) {
res = _STD::codecvt_base::error;
break;
}
// if the multibyte sequence is the NUL character
// adjust the result by one (i.e., the length of
// the multibyte NUL character)
if (0 == dst_len)
++dst_len;
// if the remaining space in the destination sequence
// is less than MB_CUR_MAX, check to see if the multibyte
// character will fit in there
if (dst_free < mb_cur_max) {
if (dst_free < dst_len) {
// the source character converted to a multibyte
// character whose length in bytes is greater than
// the available space in the destination sequence
// restore the state to the value prior to the last
// conversion and return partial
state = save_state;
res = _STD::codecvt_base::partial;
break;
}
// the destination sequence has sufficient space
// for the multibyte character
memcpy (to_next, buff, dst_len);
}
// advance the destination next pointer one past the end
// of the multibyte character and decrement the size of
// of the available space in the destination sequence
to_next += dst_len;
dst_free -= dst_len;
}
return res;
}
static _STD::codecvt_base::result
__rw_libc_do_unshift (_RWSTD_MBSTATE_T& state, char*& to_next, char* to_limit)
{
// save current state
_RWSTD_MBSTATE_T tmp_state = state;
// use libc locale to obtain the shift sequence
char tmp [_RWSTD_MB_LEN_MAX];
_RWSTD_SIZE_T ret;
#ifndef _RWSTD_NO_WCRTOMB
ret = wcrtomb (tmp, wchar_t (0), &state);
#elif !defined (_RWSTD_NO_WCTOMB)
ret = wctomb (tmp, wchar_t (0));
#else
ret = _RWSTD_SIZE_MAX;
#endif
if (_RWSTD_SIZE_MAX == ret)
return _STD::codecvt_base::error;
if (ret > (_RWSTD_SIZE_T)(to_limit - to_next)) {
// restore the state and return partial
state = tmp_state;
return _STD::codecvt_base::partial;
}
// copy the shift sequence
memcpy (to_next, tmp, ret);
to_next += ret;
return _STD::codecvt_base::ok;
}
// checks a UTF-8 sequence representing a single character
// for validity by performing a number of computationally
// relatively expensive tests; used only in strict mode
static bool
__rw_utf8validate (const char* from, _RWSTD_SIZE_T nbytes)
{
_RWSTD_ASSERT (0 != from);
_RWSTD_ASSERT (1 < nbytes && 7 > nbytes);
const unsigned char* const byte =
_RWSTD_REINTERPRET_CAST (const unsigned char*, from);
// check for overlong sequences with the bit pattern shown below
// 2 bytes: 1100 000x (10xx xxxx)
if (2 == nbytes && (byte [0] & 0xfeU) == 0xfeU)
return false;
// 3 bytes: 1110 0000 100x xxxx (10xx xxxx)
if (3 == nbytes) {
// first detect and reject all overlong sequences
if (0xe0U == byte [0] && (byte [1] & 0xe0U) == 0x80U)
return false;
// detect and reject UTF-16 surrogate pairs, i.e., UCS characters
// in the range [U+D800, U+DFFF] (i.e., inclusive of both ends),
// i.e., "\xed\xa0\x80" through "\xed\xbf\xbf", as well as U+FFFE
// and U+FFFF, i.e., "\xef\xbf\xbe" and "\xef\xbf\xbf"
if (0xedU == byte [0]) {
if (byte [1] >= 0xa0)
return false;
}
else if ( 0xefU == byte [0] && 0xbfU == byte [1]
&& 0xbeU <= byte [2] && 0xbfU >= byte [2])
return false;
}
// 4 bytes: 1111 0000 1000 xxxx (10xx xxxx 10xx xxxx)
if (4 == nbytes && 0xf0U == byte [0] && (byte [1] & 0xf0U) == 0x80U)
return false;
// 5 bytes: 11111 000 1000 0xxx (10xxx xxx 10xx xxxx 10xx xxxx)
if (5 == nbytes && 0xf8U == byte [0] && (byte [1] & 0xf8U) == 0x80U)
return false;
// 6 bytes: 1111 1100 1000 00xx (10xx xxxx 10xx xxxx 10xxx xxx 10xxx xxx)
if (6 == nbytes && 0xfcU == byte [0] && (byte [1] & 0xfcU) == 0x80U)
return false;
for (_RWSTD_SIZE_T i = 1; i < nbytes; ++i) {
if ((byte [i] & ~0x3fU) != 0x80U)
return false; // invalid byte
}
return true;
}
static _STD::codecvt_base::result
__rw_libstd_do_in (const char *from_end,
const char *&from_next,
wchar_t *to_limit,
wchar_t *&to_next,
int flags,
const _RW::__rw_codecvt_t *impl)
{
// use UCS as the internal encoding if `impl' is 0
// (i.e., if the codecvt database is not available)
if (!impl && !UCS_TYPE (flags))
flags |= __rw_ucs;
const bool use_ucs = 0 != UCS_TYPE (flags);
const bool strict_utf = 0 != (flags & __rw_strict);
_STD::codecvt_base::result res;
for (const unsigned* const tab = impl ? impl->n_to_w_tab () : 0; ; ) {
if (from_next == from_end) {
res = _STD::codecvt_base::ok;
break;
}
if (to_next == to_limit) {
res = _STD::codecvt_base::partial;
break;
}
const char* from = from_next;
wchar_t wchar;
if (impl) {
// compute the offset of the multibyte character into
// one of the tables (UCS or wchar_t) where the value
// of the internal character corresponding to the
// multibyte character is stored
const unsigned off = __rw_mbtowco (tab, from, from_end);
if (_RWSTD_UINT_MAX == off) {
// the source sequence forms neither a valid multibyte
// character, nor is it an initial subsequence of one
res = _STD::codecvt_base::error;
break;
}
if (from == from_next) {
// the source sequence forms an incomplete initial
// subsequence of a valid multibyte character
res = _STD::codecvt_base::partial;
break;
}
wchar = use_ucs ? impl->get_ucs4_at_offset (off)
: impl->get_internal_at_offset (off);
}
else {
// use an algorithmic transformation from UTF-8 to UCS-4
_RWSTD_INT32_T wi;
const char* tmp = __rw_utf8toucs4 (&wi, from, from_end);
if (strict_utf && tmp) {
// perform additional expensive UTF-8 validation
const _RWSTD_SIZE_T utf_len = tmp - from;
if (utf_len > 1 && !__rw_utf8validate (from, utf_len))
tmp = 0;
}
from = tmp;
if (!from) {
// the source sequence forms neither a valid UTF-8
// character, nor is it an initial subsequence of one
res = _STD::codecvt_base::error;
break;
}
if (from_next == from) {
// the source sequence forms an incomplete initial
// subsequence of a valid UTF-8 character
res = _STD::codecvt_base::partial;
break;
}
wchar = wchar_t (wi);
}
from_next = from;
*to_next++ = wchar;
}
return res;
}
static _STD::codecvt_base::result
__rw_libstd_do_out (const wchar_t *from,
const wchar_t *from_end,
const wchar_t *&from_next,
char *to,
char *to_limit,
char *&to_next,
int flags,
const _RW::__rw_codecvt_t *impl)
{
// final result of the transformation
_STD::codecvt_base::result res = _STD::codecvt_base::ok;
const bool use_ucs = IS_UCS (flags);
// utf8 temporary buffer
char utfbuf [_UTF8_MB_CUR_MAX + 1];
const unsigned int* const tbl =
impl ? use_ucs ? impl->utf8_to_ext_tab () : impl->w_to_n_tab () : 0;
for (from_next = from, to_next = to; from_next != from_end; ++from_next) {
if (to_next == to_limit) {
res = _STD::codecvt_base::partial;
break;
}
if (flags & __rw_strict) {
// in strict mode check wide character for validity
// (i.e., diagnose surrogate pairs as illegal)
# if _RWSTD_WCHAR_T_SIZE == _RWSTD_CHAR_SIZE
typedef unsigned char WIntT;
# elif _RWSTD_WCHAR_T_SIZE == _RWSTD_SHRT_SIZE
typedef unsigned short WIntT;
# elif _RWSTD_WCHAR_T_SIZE ==_RWSTD_INT_SIZE
typedef unsigned int WIntT;
# elif _RWSTD_WCHAR_T_SIZE ==_RWSTD_LLONG_SIZE
typedef unsigned _RWSTD_LONG_LONG WIntT;
# else
typedef unsigned long WIntT;
# endif
// convert wchar_t to an unsigned integer safe for comaprison
const unsigned long wi = _RWSTD_STATIC_CAST (WIntT, *from_next);
if ( WIntT (0xd800U) <= wi && wi <= WIntT (0xdfffU)
|| WIntT (0xfffeU) <= wi && wi <= WIntT (0xffffU)) {
res = _STD::codecvt_base::error;
break;
}
}
// compute the number of bytes available in the destination sequence
const _RWSTD_SIZE_T bytes_avail = to_limit - to_next;
_RWSTD_SIZE_T utf8_len;
if (impl) {
// encode the wide character value in UTF-8 as if it was UCS
utf8_len = __rw_itoutf8 (*from_next, utfbuf);
// convert the UTF-8 encoded wchar_t value into
// into the external representation
const char* utf = utfbuf;
unsigned off = __rw_mbtowco (tbl, utf, utf + utf8_len);
// FIXME: block below has been disabled but is being compiled
// to avoid syntax regressions; it might produce "unreachable
// code" warnings with some compilers
if (0 /* disbled */ && _RWSTD_UINT_MAX == off) {
// try transliteration
off = __rw_xlit (impl, utfbuf, utf8_len);
if (0 == off) {
res = _STD::codecvt_base::error;
break;
}
}
if (_RWSTD_UINT_MAX == off) {
// the sequence does not form a valid character
res = _STD::codecvt_base::error;
break;
}
if (utf == utfbuf) {
// the next multibyte character position was incomplete
res = _STD::codecvt_base::ok;
break;
}
// get the UTF-8 sequence corresponding to the wide character
utf = _RWSTD_REINTERPRET_CAST (const char*, impl + 1) + off;
// check that there's enough space in the destination sequence
utf8_len = *utf ? strlen (utf) : 1;
if (bytes_avail < utf8_len) {
res = _STD::codecvt_base::partial;
break;
}
// copy the UTF-8 sequence into the destination sequence
memcpy (to_next, utf, utf8_len);
}
else {
// codeset is UTF-8
// external encoding is UTF-8, so no lookup is necessary
// encode the wide char in the destination buffer if it fits
if (bytes_avail < _UTF8_MB_CUR_MAX) {
utf8_len = __rw_itoutf8 (*from_next, utfbuf);
if (bytes_avail < utf8_len) {
res = _STD::codecvt_base::partial;
break;
}
// move from temporary buffer to destination buffer
memcpy (to_next, utfbuf, utf8_len);
}
else
utf8_len = __rw_itoutf8 (*from_next, to_next);
}
to_next += utf8_len;
}
return res;
}
// implements do_length() on top of libc mbrlen()
static _RWSTD_SIZE_T
__rw_libc_do_length (_RWSTD_MBSTATE_T &state,
const char *from,
const char *from_end,
_RWSTD_SIZE_T imax)
{
const char* const from_begin = from;
for ( ; imax && from < from_end; --imax) {
// compute the maximum length (in bytes) of the multibyte
// character sequence starting at `from'
_RWSTD_SIZE_T nbytes = from_end - from;
if (_RWSTD_MB_LEN_MAX < nbytes)
nbytes = _RWSTD_MB_LEN_MAX;
// get the number of bytes used to encode the next multibyte
// character (including NUL), or -1 if the sequence at `from'
// does not form a valid multibyte character (and -2 if it's
// an incomplete subsequence of a valid multibyte character)
nbytes = __rw_libc_mbrlen (state, from, nbytes);
// stop when an invalid or incomplete character is encountered
if (nbytes >= (_RWSTD_SIZE_T)(-2))
break;
from += nbytes;
}
// return the number of bytes (extern_type characters) in the sequence
return from - from_begin;
}
// implements do_length() for UTF-8@UCS
static _RWSTD_SIZE_T
__rw_utf8_do_length (const char *from,
const char *from_end,
_RWSTD_SIZE_T imax,
int flags)
{
_RWSTD_ASSERT (from <= from_end);
const bool strict_utf = 0 != (flags & __rw_strict);
const char *from_next = from;
// count successive utf8 characters - this is a UTF-8 encoding
for ( ; imax && from_next < from_end; --imax) {
// use an algorithmic transformation from UTF-8 to UCS-4
_RWSTD_INT32_T wi;
const char* const next = __rw_utf8toucs4 (&wi, from_next, from_end);
// stop if the character is either invalid or incomplete
if (!next || next == from_next)
break;
_RWSTD_ASSERT (from_next <= next);
_RWSTD_ASSERT (next <= from_end);
// perform additional expensive UTF-8 validation in strict mode
const _RWSTD_SIZE_T utf_len = next - from_next;
if ( strict_utf
&& utf_len > 1 && !__rw_utf8validate (from_next, utf_len))
break;
from_next = next;
}
// return the number of extern characters
return from_next - from;
}
// 22.2.1.5.2 [lib.locale.codecvt.virtuals]
// including the resolution of lwg issue 305
//
// -9- Preconditions: (from<=from_end) well-defined and true; state
// initialized, if at the beginning of a sequence, or else equal
// to the result of converting the preceding characters in the
// sequence.
//
// -9a- Effects: The effect on the state argument is "as if" it called
// do_in(state, from, from_end, from, to, to+max, to) for to pointing
// to a buffer of at least max elements.
//
// -10- Returns: (from_next-from) where from_next is the largest value
// in the range [from,from_end] such that the sequence of values
// in the range [from,from_next) represents max or fewer valid
// complete characters of type internT. The instantiation
// codecvt<char, char, mbstate_t> returns the lesser of max
// and (from_end-from).
// Note that the function returns the number of externT characters
// (i.e., those of type char for the required instantiations).
static _RWSTD_SIZE_T
__rw_libstd_do_length (const char* from,
const char* from_end,
_RWSTD_SIZE_T imax,
int flags,
const _RW::__rw_codecvt_t* impl)
{
// use UCS as the internal encoding if `impl' is 0
// (i.e., if the codecvt database is not available)
if (!impl && !UCS_TYPE (flags))
flags |= __rw_ucs;
const bool use_ucs = 0 != UCS_TYPE (flags);
if (impl) {
const char* from_next = from;
const unsigned char* const limit =
_RWSTD_REINTERPRET_CAST (const unsigned char*, from_end);
const unsigned int* const tbls =
use_ucs ? impl->utf8_to_ext_tab () : impl->n_to_w_tab ();
_RWSTD_ASSERT (tbls);
// `imax' is the maximum number of intern_type characters
for ( ; imax && from_next < from_end; --imax) {
const unsigned char* next =
_RWSTD_REINTERPRET_CAST (const unsigned char*, from_next);
for (const unsigned* tbl = tbls; tbl [*next] & 0x80000000; ) {
// check if the extern character is valid
if (_RWSTD_UINT_MAX == tbl [*next])
return from_next - from;
// the value here is the table index
const unsigned index = tbl [*next] & 0x7fffffff;
tbl = tbls + 256 * index;
++next;
if (next == limit)
return from_next - from;
}
from_next = _RWSTD_REINTERPRET_CAST (const char*, next + 1);
}
// return the number of extern characters
return from_next - from;
}
return __rw_utf8_do_length (from, from_end, imax, flags);
}
} // namespace __rw
_RWSTD_NAMESPACE (_V3_LOCALE) {
_RW::__rw_facet_id codecvt<wchar_t, char, _RWSTD_MBSTATE_T>::id;
/* explicit */ codecvt<wchar_t, char, _RWSTD_MBSTATE_T>::
codecvt (_RWSTD_SIZE_T __ref /* = 0 */)
: _RW::__rw_facet (__ref)
{
// no-op
}
/* virtual */ codecvt_base::result
codecvt<wchar_t, char, _RWSTD_MBSTATE_T>::
do_out (state_type &state,
const intern_type *from,
const intern_type *from_end,
const intern_type *&from_next,
extern_type *to,
extern_type *to_end,
extern_type *&to_next) const
{
// 22.2.1.5.2, p1 - preconditions
_RWSTD_ASSERT (from <= from_end);
_RWSTD_ASSERT (to <= to_end);
// verify that both ranges are valid
_RWSTD_ASSERT (from && from_end || !from && !from_end);
_RWSTD_ASSERT (to && to_end || !to && !to_end);
// next pointers must always be set before returning, even on error
from_next = from;
to_next = to;
#ifdef _RWSTDDEBUG
// verify that the conversion state is valid
const int mbstate_valid = _RW::__rw_mbsinit (&state);
_RWSTD_ASSERT (0 != mbstate_valid);
#else // if !defined (_RWSTDDEBUG)
_RWSTD_UNUSED (state);
#endif // _RWSTDDEBUG
// copy internal sequence to external
for (; from_next != from_end && to_next != to_end; ++from_next, ++to_next)
// prevent conversion problems due to char signedness
*to_next = _RWSTD_STATIC_CAST (unsigned char, *from_next);
// Table 53, and lwg issue 382: do_out() returns partial
// if not all source characters could be converted (e.g.,
// because the destination range is full)
return from_next == from_end ? ok : partial;
}
/* virtual */ codecvt_base::result
codecvt<wchar_t, char, _RWSTD_MBSTATE_T>::
do_in (state_type &state,
const extern_type *from,
const extern_type *from_end,
const extern_type *&from_next,
intern_type *to,
intern_type *to_end,
intern_type *&to_next) const
{
// 22.2.1.5.2, p1 - preconditions
_RWSTD_ASSERT (from <= from_end);
_RWSTD_ASSERT (to <= to_end);
// verify that both ranges are valid
_RWSTD_ASSERT (from && from_end || !from && !from_end);
_RWSTD_ASSERT (to && to_end || !to && !to_end);
// next pointers must always be set before returning, even on error
from_next = from;
to_next = to;
const int mbstate_valid = _RW::__rw_mbsinit (&state);
_RWSTD_ASSERT (0 != mbstate_valid);
if (!mbstate_valid)
return error;
// copy external sequence to internal
for (; from_next != from_end && to_next != to_end;
++from_next, ++to_next) {
// prevent conversion problems due to char signedness
*to_next = _RWSTD_STATIC_CAST (unsigned char, *from_next);
}
return from_next == from_end ? ok : partial;
}
/* virtual */ codecvt_base::result
codecvt<wchar_t, char, _RWSTD_MBSTATE_T>::
do_unshift (state_type &state,
extern_type *to,
extern_type *to_end,
extern_type *&to_next) const
{
// verify that the range is valid
_RWSTD_ASSERT (to <= to_end);
_RWSTD_ASSERT (to && to_end || !to && !to_end);
_RWSTD_UNUSED (to_end);
// next pointer must always be set before returning, even on error
to_next = to;
const int mbstate_valid = _RW::__rw_mbsinit (&state);
_RWSTD_ASSERT (0 != mbstate_valid);
if (!mbstate_valid)
return error;
return noconv;
}
/* virtual */ int
codecvt<wchar_t, char, _RWSTD_MBSTATE_T>::
do_length (state_type &state,
const extern_type *from,
const extern_type *from_end,
_RWSTD_SIZE_T imax) const
{
// 22.2.1.5.2, p9 - preconditions
_RWSTD_ASSERT (from <= from_end);
// verify that the range is valid
_RWSTD_ASSERT (from && from_end || !from && !from_end);
const int mbstate_valid = _RW::__rw_mbsinit (&state);
_RWSTD_ASSERT (0 != mbstate_valid);
if (!mbstate_valid)
return 0;
// 22.2.1.5.2, p10
const _RWSTD_SIZE_T len = from_end - from;
return int (len < imax ? len : imax);
}
// codecvt_byname <wchar,char> specialization
codecvt_byname<wchar_t, char, _RWSTD_MBSTATE_T>::
codecvt_byname (const char *name, _RWSTD_SIZE_T ref)
: codecvt<wchar_t, char, _RWSTD_MBSTATE_T>(ref)
{
_C_flags = _RW::__rw_encoding_from_name (name);
#if 0
// FIXME: reliably detect whether the encoding is really stateless
// when `name' refers to a libc locale name
if (_RW::stateless == (_C_flags & 0xf)) {
const _RW::__rw_setlocale clocale (_C_name, LC_CTYPE);
_C_flags = mbtowc (0, 0, 0) ? _RW::stateful : _RW::stateless;
}
#endif // 0/1
char locale_name [256];
// look for the `@' name modifier in the locale name
const char* mod = strchr (name, '@');
if (mod) {
const char* const mod_nam = mod + 1;
const _RWSTD_SIZE_T mod_len = strlen (mod_nam);
// search for one of the known modifiers
if (mod_len > 2 && !memcmp (mod_nam, "UCS", 3)) {
int flags = 0;
for (_RWSTD_SIZE_T i = 0; i != _RW::__rw_n_ucsmods; ++i) {
if (!strcmp (_RW::__rw_ucsmods [i].mod, mod_nam)) {
flags = _RW::__rw_ucsmods [i].flags;
break;
}
}
if (flags)
_C_flags |= flags;
else
mod = 0;
}
else
mod = 0; // not a "@UCS-" modifier
}
if (mod) {
_RWSTD_ASSERT (long (mod - name) < long (sizeof locale_name));
memcpy (locale_name, name, mod - name);
locale_name [mod - name] = '\0';
name = locale_name;
}
// handle the special names: UTF-8, UTF-16, UTF-16-BE, UTF-16-LE,
// denoting an external UTF encoding with strict validation rules
// but slower processing, and their relaxed but faster equivalents,
// utf-8, utf-16, utf-16-be, utf-16-le
const _RWSTD_SIZE_T name_len = strlen (name);
if (4 < name_len) {
// check whether the UTF- prefix is in lowercase or capital letters
const bool pfx_low = !memcmp (name, "utf-", 4);
const bool pfx_cap = !pfx_low && !memcmp (name, "UTF-", 4);
int flags = 0;
if (pfx_low || pfx_cap) {
if (5 == name_len && '8' == name [4])
flags = __rw_utf8;
else if (!strcmp (name + 4, "16"))
flags = __rw_utf16;
else if (pfx_low) {
if (!strcmp (name + 4, "16-BE"))
flags = __rw_utf16_be;
else if (!strcmp (name + 4, "16-LE"))
flags = __rw_utf16_le;
}
else if (pfx_cap) {
if (!strcmp (name + 4, "16-BE"))
flags = __rw_utf16_be;
else if (!strcmp (name + 4, "16-LE"))
flags = __rw_utf16_le;
}
}
if (flags) {
_C_flags |= flags;
if (pfx_cap)
_C_flags |= __rw_strict ;
*locale_name = '\0';
name = locale_name;
}
}
if ( this->_C_opts & this->_C_use_libc
&& !UCS_TYPE (_C_flags) && !UTF_TYPE (_C_flags))
_C_flags |= __rw_use_libc;
this->_C_set_name (name, _C_namebuf, sizeof _C_namebuf);
}
/* virtual */ codecvt_base::result
codecvt_byname<wchar_t, char, _RWSTD_MBSTATE_T>::
do_in (state_type& state,
const extern_type* from,
const extern_type* from_end,
const extern_type*& from_next,
intern_type* to,
intern_type* to_limit,
intern_type*& to_next) const
{
// 22.2.1.5.2 p1
_RWSTD_ASSERT (from <= from_end);
_RWSTD_ASSERT (to <= to_limit);
// verify that both ranges are valid
_RWSTD_ASSERT (from && from_end || !from && !from_end);
_RWSTD_ASSERT (to && to_limit || !to && !to_limit);
// the standard does not specify the value of
// *next pointers at entry;
from_next = from;
to_next = to;
// do_in result
result res = error;
// test the type of the encoding that the facet is interpreting
switch (ISO2022_TYPE (_C_flags)) {
case _RW::stateless: {
// obtain the mapping of the database file
const _RW::__rw_codecvt_t* const impl =
_RWSTD_STATIC_CAST (const _RW::__rw_codecvt_t*, this->_C_data ());
if (USE_LIBC (impl, _C_flags)) {
// use libc locale
const _RW::__rw_setlocale clocale (_C_name, LC_CTYPE);
#ifndef _RWSTD_NO_MBTOWC
// verify that either the encoding is stateful
// or the state is in its initial shift state
const bool mbstate_valid =
mbtowc (0, 0, 0) || _RW::__rw_mbsinit (&state);
_RWSTD_ASSERT (mbstate_valid);
_RWSTD_UNUSED (mbstate_valid);
#endif // _RWSTD_NO_MBTOWC
res = _RW::__rw_libc_do_in (state,
from, from_end, from_next,
to, to_limit, to_next);
}
else {
// verify that the state is in its initial shift state
const int mbstate_valid = _RW::__rw_mbsinit (&state);
_RWSTD_ASSERT (mbstate_valid);
_RWSTD_UNUSED (mbstate_valid);
res = _RW::__rw_libstd_do_in (from_end, from_next,
to_limit, to_next,
_C_flags, impl);
}
break;
}
case _RW::iso2022_jp:
res = _RW::__rw_iso2022jp_do_in (state, from_next, from_end,
to_next, to_limit);
break;
case _RW::iso2022_jp2:
res = _RW::__rw_iso2022jp2_do_in (state, from_next, from_end,
to_next, to_limit);
break;
case _RW::iso2022_kr:
case _RW::iso2022_cn:
default:
break;
};
return res;
}
/* virtual */ codecvt_base::result
codecvt_byname<wchar_t, char, _RWSTD_MBSTATE_T>::
do_out (state_type &state,
const intern_type *from,
const intern_type *from_end,
const intern_type *&from_next,
extern_type *to,
extern_type *to_limit,
extern_type *&to_next) const
{
// 22.2.1.5.2 p1
_RWSTD_ASSERT (from <= from_end);
_RWSTD_ASSERT (to <= to_limit);
// verify that both ranges are valid
_RWSTD_ASSERT (from && from_end || !from && !from_end);
_RWSTD_ASSERT (to && to_limit || !to && !to_limit);
// the standard does not specify the value of the next pointers on entry
from_next = from;
to_next = to;
// conversion result
codecvt_base::result res = codecvt_base::error;
// test the type of the encoding that the facet is interpreting
switch (ISO2022_TYPE (_C_flags)) {
case _RW::stateless: {
// obtain the mapping of the database file
const _RW::__rw_codecvt_t* impl =
_RWSTD_STATIC_CAST(const _RW::__rw_codecvt_t*, this->_C_data ());
if (USE_LIBC (impl, _C_flags)) {
// use libc locale
const _RW::__rw_setlocale clocale (_C_name, LC_CTYPE);
#ifndef _RWSTD_NO_MBTOWC
// verify that either the encoding is stateful
// or the state is in its initial shift state
const bool mbstate_valid =
mbtowc (0, 0, 0) || _RW::__rw_mbsinit (&state);
_RWSTD_ASSERT (mbstate_valid);
_RWSTD_UNUSED (mbstate_valid);
#endif // _RWSTD_NO_MBTOWC
res = _RW::__rw_libc_do_out (state, from, from_end, from_next,
to, to_limit, to_next);
}
else {
// verify that the state is in its initial shift state
const int mbstate_valid = _RW::__rw_mbsinit (&state);
_RWSTD_ASSERT (mbstate_valid);
_RWSTD_UNUSED (mbstate_valid);
// use own implementation
res = _RW::__rw_libstd_do_out (from, from_end, from_next,
to, to_limit, to_next,
_C_flags, impl);
}
break;
}
case _RW::iso2022_jp:
res = _RW::__rw_iso2022jp_do_out (state, from_next, from_end,
to_next, to_limit);
break;
case _RW::iso2022_jp2:
res = _RW::__rw_iso2022jp2_do_out (state, from_next, from_end,
to_next, to_limit);
break;
case _RW::iso2022_kr:
case _RW::iso2022_cn:
default:
break;
};
return res;
}
/* virtual */ codecvt_base::result
codecvt_byname<wchar_t, char, _RWSTD_MBSTATE_T>::
do_unshift (state_type& state,
extern_type* to,
extern_type* to_limit,
extern_type*& to_next) const
{
// verify that the range is valid
_RWSTD_ASSERT (to <= to_limit);
_RWSTD_ASSERT (to && to_limit || !to && !to_limit);
// the standard does not specify the value of
// to_next pointer at entry;
to_next = to;
// do_unshift result
codecvt_base::result res = codecvt_base::error;
// test the type of the encoding that the facet is interpreting
switch (ISO2022_TYPE (_C_flags)) {
case _RW::stateless:
if (USE_LIBC (_C_impdata, _C_flags)) {
const _RW::__rw_setlocale clocale (_C_name, LC_CTYPE);
if (mbtowc (0, 0, 0) == 0) {
// verify that the state is in its initial shift state
const int mbstate_valid = _RW::__rw_mbsinit (&state);
_RWSTD_ASSERT (mbstate_valid);
res = mbstate_valid ? noconv : error;
}
else
res = _RW::__rw_libc_do_unshift (state, to_next, to_limit);
}
else {
// use our own implementation
// verify that the state is in its initial shift state
const int mbstate_valid = _RW::__rw_mbsinit (&state);
_RWSTD_ASSERT (mbstate_valid);
res = mbstate_valid ? noconv : error;
}
break;
case _RW::iso2022_jp:
res = _RW::__rw_iso2022jp_do_unshift (state, to_next, to_limit);
break;
case _RW::iso2022_jp2:
res = _RW::__rw_iso2022jp2_do_unshift (state, to_next, to_limit);
break;
case _RW::iso2022_kr:
case _RW::iso2022_cn:
default:
break;
};
return res;
}
/* virtual */ int
codecvt_byname<wchar_t, char, _RWSTD_MBSTATE_T>::
do_length (state_type& state,
const extern_type *from,
const extern_type *from_end,
_RWSTD_SIZE_T cmax) const
{
// 22.2.1.5.2 p1
_RWSTD_ASSERT (from <= from_end);
// verify that the range is valid
_RWSTD_ASSERT (from && from_end || !from && !from_end);
_RWSTD_SIZE_T len = 0;
// test the type of the encoding that the facet is interpreting
switch (ISO2022_TYPE (_C_flags)) {
case _RW::stateless: {
// obtain the mapping of the database file
const _RW::__rw_codecvt_t* impl =
_RWSTD_STATIC_CAST(const _RW::__rw_codecvt_t*, this->_C_data ());
if (USE_LIBC (impl, _C_flags)) {
// use libc locale
const _RW::__rw_setlocale clocale (this->_C_name, LC_CTYPE);
#ifndef _RWSTD_NO_MBTOWC
// verify that either the encoding is stateful
// or the state is in its initial shift state
const bool mbstate_valid =
mbtowc (0, 0, 0) || _RW::__rw_mbsinit (&state);
_RWSTD_ASSERT (mbstate_valid);
_RWSTD_UNUSED (mbstate_valid);
#endif // _RWSTD_NO_MBTOWC
len = _RW::__rw_libc_do_length (state, from, from_end, cmax);
}
else {
// verify that the state is in its initial shift state
const int mbstate_valid = _RW::__rw_mbsinit (&state);
_RWSTD_ASSERT (mbstate_valid);
_RWSTD_UNUSED (mbstate_valid);
len = _RW::__rw_libstd_do_length (from, from_end,
cmax, _C_flags, impl);
}
break;
}
case _RW::iso2022_jp:
len = _RW::__rw_iso2022jp_do_length (state, from, from_end, cmax);
break;
case _RW::iso2022_jp2:
len = _RW::__rw_iso2022jp2_do_length (state, from, from_end, cmax);
break;
case _RW::iso2022_kr:
case _RW::iso2022_cn:
default:
break;
};
return int (len);
}
/* virtual */ int
codecvt_byname<wchar_t, char, _RWSTD_MBSTATE_T>::
do_encoding () const _THROWS (())
{
// -7- Returns: -1 if the encoding of the extern_type sequence is
// state-dependent; else the constant number of extern_type
// characters needed to produce an internal character;
// or 0 if this number is not a constant 227).
// _____________
// Footnote 227: If encoding() yields -1, then more than max_length()
// extern_type elements may be consumed when producing a single internT
// character, and additional extern_type elements may appear at the end
// of a sequence after those that yield the final intern_type character.
int enc = 0; // variable-width stateless encoding
// test the type of the encoding that the facet is interpreting
switch (ISO2022_TYPE (_C_flags)) {
case _RW::stateless: {
// obtain the database file mapping
const _RW::__rw_codecvt_t* impl =
_RWSTD_STATIC_CAST(const _RW::__rw_codecvt_t*, this->_C_data ());
if (USE_LIBC (impl, _C_flags)) {
// 7.20.7.2 of C99: if first arg is 0, mbtowc() returns non-zero
// or 0 whether or not the encoding is stateful or stateless
const _RW::__rw_setlocale clocale (_C_name, LC_CTYPE);
if (mbtowc (0, 0, 0)) {
// in a stateful encoding, the number of extern_type
// elements required to produce a single intern_type
// character is indeterminate
enc = -1;
}
else {
// in a stateless encoding, the number of extern_type
// elements required to produce a single intern_type
// character is guaranteed to be 1 only when MB_CUR_MAX
// is also 1, otherwise it's most likely to be variable
// (e.g., EUC, UTF-8, etc.)
if (1 == MB_CUR_MAX)
enc = 1;
}
}
else {
// use own implementation
// 22.2.1.5.2 p6
if (impl)
enc = 1 < impl->mb_cur_max ? 0 : 1;
else
enc = 0;
}
break;
}
case _RW::iso2022_jp:
enc = _RW::__rw_iso2022jp_do_encoding ();
break;
case _RW::iso2022_jp2:
enc = _RW::__rw_iso2022jp2_do_encoding ();
break;
case _RW::iso2022_kr:
case _RW::iso2022_cn:
default:
break;
};
return enc;
}
/* virtual */ int
codecvt_byname<wchar_t, char, _RWSTD_MBSTATE_T>::
do_max_length () const _THROWS (())
{
// returns the max value do_length (s, from, from_end, 1) can return
// for any valid range [from, from_end) - see LWG issue 74 (a DR)
_RWSTD_SIZE_T max_len = 0;
// test the type of the encoding that the facet is interpreting
switch (ISO2022_TYPE (_C_flags)) {
case _RW::stateless: {
// obtain the database file mapping
const _RW::__rw_codecvt_t* impl =
_RWSTD_STATIC_CAST(const _RW::__rw_codecvt_t*, this->_C_data ());
if (USE_LIBC (impl, _C_flags)) {
// use libc locale
const _RW::__rw_setlocale clocale (_C_name, LC_CTYPE);
max_len = _RWSTD_STATIC_CAST (_RWSTD_SIZE_T, MB_CUR_MAX);
}
else {
// use own implementation
if (IS_UTF8 (_C_flags))
max_len = _UTF8_MB_CUR_MAX;
else if (impl)
max_len = _RWSTD_STATIC_CAST (_RWSTD_SIZE_T, impl->mb_cur_max);
}
break;
}
case _RW::iso2022_jp:
max_len = _RW::__rw_iso2022jp_do_max_length ();
break;
case _RW::iso2022_jp2:
max_len = _RW::__rw_iso2022jp2_do_max_length ();
break;
case _RW::iso2022_kr:
case _RW::iso2022_cn:
default:
break;
};
return int (max_len);
}
#endif // _RWSTD_NO_WCHAR_T
} // namespace _V3_LOCALE
#ifndef _RWSTD_NO_WCHAR_T
# define TARGS_W <wchar_t, char, _RWSTD_MBSTATE_T>
_RWSTD_DEFINE_FACET_FACTORY (static, codecvt, TARGS_W, wcodecvt);
_RWSTD_SPECIALIZE_USE_FACET (wcodecvt);
#endif // _RWSTD_NO_WCHAR_T