src/iso2022.cpp - stdcxx - Git at Google

 /***************************************************************************
  *
  * iso2022.cpp
  *
  * $Id$
  *
  ***************************************************************************
  *
  * Licensed to the Apache Software  Foundation (ASF) under one or more
  * contributor  license agreements.  See  the NOTICE  file distributed
  * with  this  work  for  additional information  regarding  copyright
  * ownership.   The ASF  licenses this  file to  you under  the Apache
  * License, Version  2.0 (the  License); you may  not use  this file
  * except in  compliance with the License.   You may obtain  a copy of
  * the License at
  *
  * http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the  License is distributed on an  "AS IS" BASIS,
  * WITHOUT  WARRANTIES OR CONDITIONS  OF ANY  KIND, either  express or
  * implied.   See  the License  for  the  specific language  governing
  * permissions and limitations under the License.
  *
  * Copyright 1994-2008 Rogue Wave Software, Inc.
  *
  **************************************************************************/

 #define _RWSTD_LIB_SRC

 #include <rw/_defs.h>

 #include <string.h>     // for size_t, strxxx()
 #include <iosfwd>       // for mbstate_t

 #include <loc/_codecvt.h>
 #include <loc/_localedef.h>

 #include "iso2022.h"
 #include "locale_body.h"


 #define CHSET_ANSI_X_3_4_1968             6
 #define CHSET_JIS_X_0201_KANA            13
 #define CHSET_JIS_X_0201_ROMAN           14
 #define CHSET_JIS_X_0208_1978            42
 #define CHSET_JIS_X_0208_1983            87
 #define CHSET_JIS_X_0212_1990           159
 #define CHSET_GB2312_1980                58
 #define CHSET_KSC_5601_1987             149
 #define CHSET_ISO_8859_1                100
 #define CHSET_ISO_8859_7                126

 #define DBIDX_ANSI_X_3_4_1968             1
 #define DBIDX_ISO_8859_1                  2
 #define DBIDX_ISO_8859_2                  3
 #define DBIDX_ISO_8859_3                  4
 #define DBIDX_ISO_8859_4                  5
 #define DBIDX_ISO_8859_5                  6
 #define DBIDX_ISO_8859_6                  7
 #define DBIDX_ISO_8859_7                  8
 #define DBIDX_ISO_8859_8                  9
 #define DBIDX_EUC_JP                     10
 #define DBIDX_EUC_KR                     11
 #define DBIDX_GB2312                     12

 #define ISO_2022_JP_SET_ASCII              "\x1B(B"
 #define ISO_2022_JP_SET_JIS201_ROMAN       "\x1B(J"
 #define ISO_2022_JP_SET_JIS201_KANA        "\x1B(I"
 #define ISO_2022_JP_SET_JIS208_78          "\x1B$@"
 #define ISO_2022_JP_SET_JIS208_83          "\x1B$B"
 #define ISO_2022_JP_SET_GB2312_80          "\x1B$A"
 #define ISO_2022_JP_SET_KSC5601_87         "\x1B$(C"
 #define ISO_2022_JP_SET_JIS212_90          "\x1B$(D"
 #define ISO_2022_JP_SET_ISO_8859_1         "\x1B.A"
 #define ISO_2022_JP_SET_ISO_8859_7         "\x1B.F"
 #define ISO_2022_JP_SET_SS2                "\x1BN"


 #define ASCII_CHAR_LEN                          1

 #define EUCJP_ASCII_CHAR_LEN                    1
 #define EUCJP_JISX0201_ROMAN_CHAR_LEN           1
 #define EUCJP_JISX0201_KANA_CHAR_LEN            2
 #define EUCJP_JISX0208_CHAR_LEN                 2
 #define EUCJP_JISX0212_CHAR_LEN                 3

 #define GB2312_CHAR_LEN                         2
 #define KSC5601_CHAR_LEN                        2

 #define ISO_8859_1_CHAR_LEN                     1
 #define ISO_8859_7_CHAR_LEN                     1


 #define ISO_2022_SET_SB94_G0                 0x28
 #define ISO_2022_SET_SB94_G1                 0x29
 #define ISO_2022_SET_SB94_G2                 0x2A
 #define ISO_2022_SET_SB94_G3                 0x2B

 #define ISO_2022_SET_SB96_G1                 0x2D
 #define ISO_2022_SET_SB96_G2                 0x2E
 #define ISO_2022_SET_SB96_G3                 0x2F

 #define ISO_2022_SET_MB                      0x24

 #define ISO_2022_SET_MB94_G0                 0x28
 #define ISO_2022_SET_MB94_G1                 0x29
 #define ISO_2022_SET_MB94_G2                 0x2A
 #define ISO_2022_SET_MB94_G3                 0x2B

 #define ISO_2022_SET_MB96_G1                 0x2D
 #define ISO_2022_SET_MB96_G2                 0x2E
 #define ISO_2022_SET_MB96_G3                 0x2F

 #define ISO_2022_SET_SS2                     0x4E

 #define ESCAPE_CHAR                          0x1B

 #if !defined (_UTF8_MB_CUR_MAX)
 #  define _UTF8_MB_CUR_MAX                      6
 #endif


 #define CODECVT_ERROR   _STD::codecvt_base::error
 #define CODECVT_OK      _STD::codecvt_base::ok
 #define CODECVT_PARTIAL _STD::codecvt_base::partial


 _RWSTD_NAMESPACE (__rw) {


 enum __rw_codeset_type_t {
     sb94 = 0, sb96, mb94, mb96
 };

 // State of the ISO-2022 conversions
 struct __rw_iso2022_state_t {
     // 0 - unused, 1 - used, default 0
     unsigned used:1;

     // invoked code elements: valid values 1-4, invalid 0, default 0
     unsigned gl_map:3;
     unsigned gr_map:3;

     // shift state: 0 - one character, 1 - two characters, default 0
     unsigned shift_state:1;

     // single-shift 2 and 3 functions
     unsigned sshift2:1;
     unsigned sshift3:1;

     // equivalent of G0-G3 code elements
     unsigned char g_map [4];    // default 0
 };

 // Mapping between the code of the character set and the name
 struct __rw_chset_map_t
 {
     const char*   name;         // codeset canonical name
     unsigned char index;        // database index
     unsigned char width;        // encoding width
     const char*   seq;          // designating sequence
     unsigned int  seqlen;       // designating sequence length
 };

 // Database file mappings
 struct __rw_db_map_t
 {
     const void* pv;           // pointer to mapping
     size_t      size;         // size of mapping
     const char* name;         // canonical name
 };


 // make sure there is just one copy of the empty string used
 // in the table below (space optimization in case the compiler
 // doesn't collapse them automatically
 static const char __rw_empty[] = "";


 // Array of mappings: code registration number to name
 static const __rw_chset_map_t __rw_chset_map [256] = {

 #define EMPTY   __rw_empty, 0, 0, 0, 0

     {/* 0x00 */ EMPTY }, {/* 0x01 */ EMPTY }, {/* 0x02 */ EMPTY },
     {/* 0x03 */ EMPTY }, {/* 0x04 */ EMPTY }, {/* 0x05 */ EMPTY },

     {
         /* 0x06 */ "ANSI_X3.4-1968", DBIDX_ANSI_X_3_4_1968, 0,
         ISO_2022_JP_SET_ASCII,
         sizeof (ISO_2022_JP_SET_ASCII) - 1U
     },

     {/* 0x07 */ EMPTY }, {/* 0x08 */ EMPTY }, {/* 0x09 */ EMPTY },
     {/* 0x0a */ EMPTY }, {/* 0x0b */ EMPTY }, {/* 0x0c */ EMPTY },

     {
         /* 0x0d */ "EUC-JP", DBIDX_EUC_JP, 0,
         ISO_2022_JP_SET_JIS201_KANA,
         sizeof (ISO_2022_JP_SET_JIS201_KANA) - 1U
     },

     {
         /* 0x0e */ "EUC-JP", DBIDX_EUC_JP, 0,
         ISO_2022_JP_SET_JIS201_ROMAN,
         sizeof (ISO_2022_JP_SET_JIS201_ROMAN) - 1U
     },

     {/* 0x0f */ EMPTY }, {/* 0x10 */ EMPTY }, {/* 0x11 */ EMPTY },
     {/* 0x12 */ EMPTY }, {/* 0x13 */ EMPTY }, {/* 0x14 */ EMPTY },
     {/* 0x15 */ EMPTY }, {/* 0x16 */ EMPTY }, {/* 0x17 */ EMPTY },
     {/* 0x18 */ EMPTY }, {/* 0x19 */ EMPTY }, {/* 0x1a */ EMPTY },
     {/* 0x1b */ EMPTY }, {/* 0x1c */ EMPTY }, {/* 0x1d */ EMPTY },
     {/* 0x1e */ EMPTY }, {/* 0x1f */ EMPTY }, {/* 0x20 */ EMPTY },
     {/* 0x21 */ EMPTY }, {/* 0x22 */ EMPTY }, {/* 0x23 */ EMPTY },
     {/* 0x24 */ EMPTY }, {/* 0x25 */ EMPTY }, {/* 0x26 */ EMPTY },
     {/* 0x27 */ EMPTY }, {/* 0x28 */ EMPTY }, {/* 0x29 */ EMPTY },

     {
         /* 0x2a */ "EUC-JP", DBIDX_EUC_JP, 1,
         ISO_2022_JP_SET_JIS208_78,
         sizeof (ISO_2022_JP_SET_JIS208_78) - 1U
     },

     {/* 0x2b */ EMPTY }, {/* 0x2c */ EMPTY }, {/* 0x2d */ EMPTY },
     {/* 0x2e */ EMPTY }, {/* 0x2f */ EMPTY }, {/* 0x30 */ EMPTY },
     {/* 0x31 */ EMPTY }, {/* 0x32 */ EMPTY }, {/* 0x33 */ EMPTY },
     {/* 0x34 */ EMPTY }, {/* 0x35 */ EMPTY }, {/* 0x36 */ EMPTY },
     {/* 0x37 */ EMPTY }, {/* 0x38 */ EMPTY }, {/* 0x39 */ EMPTY },

     {
         /* 0x3a */ "GB2312", DBIDX_GB2312, 1,
         ISO_2022_JP_SET_GB2312_80,
         sizeof (ISO_2022_JP_SET_GB2312_80) - 1U
     },

     {/* 0x3b */ EMPTY },
     {/* 0x3c */ EMPTY }, {/* 0x3d */ EMPTY }, {/* 0x3e */ EMPTY },
     {/* 0x3f */ EMPTY }, {/* 0x40 */ EMPTY }, {/* 0x41 */ EMPTY },
     {/* 0x42 */ EMPTY }, {/* 0x43 */ EMPTY }, {/* 0x44 */ EMPTY },
     {/* 0x45 */ EMPTY }, {/* 0x46 */ EMPTY }, {/* 0x47 */ EMPTY },
     {/* 0x48 */ EMPTY }, {/* 0x49 */ EMPTY }, {/* 0x4a */ EMPTY },
     {/* 0x4b */ EMPTY }, {/* 0x4c */ EMPTY }, {/* 0x4d */ EMPTY },
     {/* 0x4e */ EMPTY }, {/* 0x4f */ EMPTY }, {/* 0x50 */ EMPTY },
     {/* 0x51 */ EMPTY }, {/* 0x52 */ EMPTY }, {/* 0x53 */ EMPTY },
     {/* 0x54 */ EMPTY }, {/* 0x55 */ EMPTY }, {/* 0x56 */ EMPTY },

     {
         /* 0x57 */ "EUC-JP", DBIDX_EUC_JP, 1,
         ISO_2022_JP_SET_JIS208_83,
         sizeof (ISO_2022_JP_SET_JIS208_83) - 1U
     },

     {/* 0x58 */ EMPTY }, {/* 0x59 */ EMPTY }, {/* 0x5a */ EMPTY },
     {/* 0x5b */ EMPTY }, {/* 0x5c */ EMPTY }, {/* 0x5d */ EMPTY },
     {/* 0x5e */ EMPTY }, {/* 0x5f */ EMPTY }, {/* 0x60 */ EMPTY },
     {/* 0x61 */ EMPTY }, {/* 0x62 */ EMPTY }, {/* 0x63 */ EMPTY },

     {
         /* 0x64 */ "ISO-8859-1", DBIDX_ISO_8859_1, 0,
         ISO_2022_JP_SET_ISO_8859_1,
         sizeof (ISO_2022_JP_SET_ISO_8859_1) - 1U
     },

     {/* 0x65 */ EMPTY }, {/* 0x66 */ EMPTY }, {/* 0x67 */ EMPTY },
     {/* 0x68 */ EMPTY }, {/* 0x69 */ EMPTY }, {/* 0x6a */ EMPTY },
     {/* 0x6b */ EMPTY }, {/* 0x6c */ EMPTY }, {/* 0x6d */ EMPTY },
     {/* 0x6e */ EMPTY }, {/* 0x6f */ EMPTY }, {/* 0x70 */ EMPTY },
     {/* 0x71 */ EMPTY }, {/* 0x72 */ EMPTY }, {/* 0x73 */ EMPTY },
     {/* 0x74 */ EMPTY }, {/* 0x75 */ EMPTY }, {/* 0x76 */ EMPTY },
     {/* 0x77 */ EMPTY }, {/* 0x78 */ EMPTY }, {/* 0x79 */ EMPTY },
     {/* 0x7a */ EMPTY }, {/* 0x7b */ EMPTY }, {/* 0x7c */ EMPTY },
     {/* 0x7d */ EMPTY },

     {
         /* 0x7e */ "ISO-8859-7", DBIDX_ISO_8859_7, 0,
         ISO_2022_JP_SET_ISO_8859_7,
         sizeof (ISO_2022_JP_SET_ISO_8859_7) - 1U
     },

     {/* 0x7f */ EMPTY }, {/* 0x80 */ EMPTY }, {/* 0x81 */ EMPTY },
     {/* 0x82 */ EMPTY }, {/* 0x83 */ EMPTY }, {/* 0x84 */ EMPTY },
     {/* 0x85 */ EMPTY }, {/* 0x86 */ EMPTY }, {/* 0x87 */ EMPTY },
     {/* 0x88 */ EMPTY }, {/* 0x89 */ EMPTY }, {/* 0x8a */ EMPTY },
     {/* 0x8b */ EMPTY }, {/* 0x8c */ EMPTY }, {/* 0x8d */ EMPTY },
     {/* 0x8e */ EMPTY }, {/* 0x8f */ EMPTY }, {/* 0x90 */ EMPTY },
     {/* 0x91 */ EMPTY }, {/* 0x92 */ EMPTY }, {/* 0x93 */ EMPTY },
     {/* 0x94 */ EMPTY },

     {
         /* 0x95 */ "EUC-KR", DBIDX_EUC_KR, 1,
         ISO_2022_JP_SET_KSC5601_87,
         sizeof (ISO_2022_JP_SET_KSC5601_87) - 1U
     },

     {/* 0x96 */ EMPTY }, {/* 0x97 */ EMPTY }, {/* 0x98 */ EMPTY },
     {/* 0x99 */ EMPTY }, {/* 0x9a */ EMPTY }, {/* 0x9b */ EMPTY },
     {/* 0x9c */ EMPTY }, {/* 0x9d */ EMPTY }, {/* 0x9e */ EMPTY },

     {
         /* 0x9f */ "EUC-JP", DBIDX_EUC_JP, 1,
         ISO_2022_JP_SET_JIS212_90,
         sizeof (ISO_2022_JP_SET_JIS212_90) - 1U
     },

     {/* 0xa0 */ EMPTY }, {/* 0xa1 */ EMPTY }, {/* 0xa2 */ EMPTY },
     {/* 0xa3 */ EMPTY }, {/* 0xa4 */ EMPTY }, {/* 0xa5 */ EMPTY },
     {/* 0xa6 */ EMPTY }, {/* 0xa7 */ EMPTY }, {/* 0xa8 */ EMPTY },
     {/* 0xa9 */ EMPTY }, {/* 0xaa */ EMPTY }, {/* 0xab */ EMPTY },
     {/* 0xac */ EMPTY }, {/* 0xad */ EMPTY }, {/* 0xae */ EMPTY },
     {/* 0xaf */ EMPTY }, {/* 0xb0 */ EMPTY }, {/* 0xb1 */ EMPTY },
     {/* 0xb2 */ EMPTY }, {/* 0xb3 */ EMPTY }, {/* 0xb4 */ EMPTY },
     {/* 0xb5 */ EMPTY }, {/* 0xb6 */ EMPTY }, {/* 0xb7 */ EMPTY },
     {/* 0xb8 */ EMPTY }, {/* 0xb9 */ EMPTY }, {/* 0xba */ EMPTY },
     {/* 0xbb */ EMPTY }, {/* 0xbc */ EMPTY }, {/* 0xbd */ EMPTY },
     {/* 0xbe */ EMPTY }, {/* 0xbf */ EMPTY }, {/* 0xc0 */ EMPTY },
     {/* 0xc1 */ EMPTY }, {/* 0xc2 */ EMPTY }, {/* 0xc3 */ EMPTY },
     {/* 0xc4 */ EMPTY }, {/* 0xc5 */ EMPTY }, {/* 0xc6 */ EMPTY },
     {/* 0xc7 */ EMPTY }, {/* 0xc8 */ EMPTY }, {/* 0xc9 */ EMPTY },
     {/* 0xca */ EMPTY }, {/* 0xcb */ EMPTY }, {/* 0xcc */ EMPTY },
     {/* 0xcd */ EMPTY }, {/* 0xce */ EMPTY }, {/* 0xcf */ EMPTY },
     {/* 0xd0 */ EMPTY }, {/* 0xd1 */ EMPTY }, {/* 0xd2 */ EMPTY },
     {/* 0xd3 */ EMPTY }, {/* 0xd4 */ EMPTY }, {/* 0xd5 */ EMPTY },
     {/* 0xd6 */ EMPTY }, {/* 0xd7 */ EMPTY }, {/* 0xd8 */ EMPTY },
     {/* 0xd9 */ EMPTY }, {/* 0xda */ EMPTY }, {/* 0xdb */ EMPTY },
     {/* 0xdc */ EMPTY }, {/* 0xdd */ EMPTY }, {/* 0xde */ EMPTY },
     {/* 0xdf */ EMPTY }, {/* 0xe0 */ EMPTY }, {/* 0xe1 */ EMPTY },
     {/* 0xe2 */ EMPTY }, {/* 0xe3 */ EMPTY }, {/* 0xe4 */ EMPTY },
     {/* 0xe5 */ EMPTY }, {/* 0xe6 */ EMPTY }, {/* 0xe7 */ EMPTY },
     {/* 0xe8 */ EMPTY }, {/* 0xe9 */ EMPTY }, {/* 0xea */ EMPTY },
     {/* 0xeb */ EMPTY }, {/* 0xec */ EMPTY }, {/* 0xed */ EMPTY },
     {/* 0xee */ EMPTY }, {/* 0xef */ EMPTY }, {/* 0xf0 */ EMPTY },
     {/* 0xf1 */ EMPTY }, {/* 0xf2 */ EMPTY }, {/* 0xf3 */ EMPTY },
     {/* 0xf4 */ EMPTY }, {/* 0xf5 */ EMPTY }, {/* 0xf6 */ EMPTY },
     {/* 0xf7 */ EMPTY }, {/* 0xf8 */ EMPTY }, {/* 0xf9 */ EMPTY },
     {/* 0xfa */ EMPTY }, {/* 0xfb */ EMPTY }, {/* 0xfc */ EMPTY },
     {/* 0xfd */ EMPTY }, {/* 0xfe */ EMPTY }, {/* 0xff */ EMPTY }
 };


 // Mapping pointer - size of mapping - canonical name
 static __rw_db_map_t __rw_db_map [32] = {
     { 0, 0, __rw_empty   }, { 0, 0, "ANSI_X3.4-1968" },
     { 0, 0, "ISO-8859-1" }, { 0, 0, "ISO-8859-2" },
     { 0, 0, "ISO-8859-3" }, { 0, 0, "ISO-8859-4" },
     { 0, 0, "ISO-8859-5" }, { 0, 0, "ISO-8859-6" },
     { 0, 0, "ISO-8859-7" }, { 0, 0, "ISO-8859-8" },
     { 0, 0, "EUC-JP" },     { 0, 0, "EUC-KR" },
     { 0, 0, "GB2312" },     { 0, 0, __rw_empty },
     { 0, 0, __rw_empty },   { 0, 0, __rw_empty },
     { 0, 0, __rw_empty },   { 0, 0, __rw_empty },
     { 0, 0, __rw_empty },   { 0, 0, __rw_empty },
     { 0, 0, __rw_empty },   { 0, 0, __rw_empty },
     { 0, 0, __rw_empty },   { 0, 0, __rw_empty },
     { 0, 0, __rw_empty },   { 0, 0, __rw_empty },
     { 0, 0, __rw_empty },   { 0, 0, __rw_empty },
     { 0, 0, __rw_empty },   { 0, 0, __rw_empty },
     { 0, 0, __rw_empty },   { 0, 0, __rw_empty }
 };


 // Retrieves the type of the encoding as indicated by the name
 int __rw_encoding_from_name (const char* name)
 {
     _RWSTD_ASSERT (0 != name);

     static const char pfx [] = "ISO-2022-";

     if (0 != strncmp (pfx, name, sizeof pfx - 1U))
         return stateless;

     name += sizeof pfx - 1U;

     if (0 == strcmp (name, "JP-2")) {
         return iso2022_jp2;
     }
     if (0 == strcmp (name, "JP")) {
         return iso2022_jp;
     }
     if (0 == strcmp (name, "KR")) {
         return iso2022_kr;
     }
     if (0 == strcmp (name, "CN")) {
         return iso2022_cn;
     }
     return stateless;
 }


 //  Array of ISO-2022 encoding states; the array has a default size of 256
 //  entries; the declaration of the array is guarded for size
 //
 //  The usage of the structs in the array:
 //  - the struct "allocated" are marked as  used;
 //  - the index of the struct in the array is stored in the std::mbstate_t
 //  struct  used in  codecvt facet;
 //  - the  std::mbstate_t  values are shifted from  the real iso2022_state
 //  struct index  by one  so  that a  value of  zero in  an std::mbstate_t
 //  variable indicates a new conversion for which a state has to be
 //  allocated
 #if _RWSTD_MBSTATE_T_SIZE == 1
 #  define ISO_2022_ARRAY_SIZE     255
 #  define ISO_2022_STATE_INDEX_T  unsigned char
 #else
 #  define ISO_2022_ARRAY_SIZE     511
 #  define ISO_2022_STATE_INDEX_T  unsigned short
 #endif


 // array of available states (limits the number of distinct mbstate_t
 // objects with unique value that may be simultaneously valid)
 static __rw_iso2022_state_t __rw_iso2022_states [ISO_2022_ARRAY_SIZE];


 // returns an index in the array of state structures or -1 if none
 // is available; ISO-2022-JP and ISO-2022-JP-2 assume different
 // initializations
 static inline int
 __rw_allocate_state ()
 {
     _RWSTD_MT_CLASS_GUARD (__rw_iso2022_state_t);

     for (int i = 0; i < ISO_2022_ARRAY_SIZE; i++)
         if (__rw_iso2022_states [i].used == 0) {
             __rw_iso2022_states [i].used = 1;
             return i;
         }

     return -1;
 }


 // deallocates state and makes it available for future conversions
 // if `initial_only' is non-zero suceeds only if the `iso_state'
 // argument represents an initial shift state
 static void
 __rw_deallocate_state (__rw_iso2022_state_t &iso_state,
                             _RWSTD_MBSTATE_T     &state,
                             bool                  initial_only)
 {
     _RWSTD_MT_CLASS_GUARD (__rw_iso2022_state_t);

     if (initial_only) {
         // test the state object for return to initial shift state
         const int i = iso_state.gl_map - 1;
         _RWSTD_ASSERT (i >= 0 && i < 4);

         if (   iso_state.g_map [i] != CHSET_ANSI_X_3_4_1968
             || iso_state.sshift2
             || iso_state.sshift3)
             return;

         // proceed only if `iostate' is in an initial shift state
     }

     // zero out both structures
     memset (&state, 0, sizeof state);
     memset (&iso_state, 0, sizeof iso_state);
 }


 static __rw_iso2022_state_t*
 __rw_get_iso2022_state (_RWSTD_MBSTATE_T& state, int enc)
 {
     __rw_iso2022_state_t* pstate = 0;

     // retrieve the array index
     ISO_2022_STATE_INDEX_T* pc =
         _RWSTD_REINTERPRET_CAST (ISO_2022_STATE_INDEX_T*, &state);

     int n = *pc;

     _RWSTD_ASSERT(n >= 0 && n <= ISO_2022_ARRAY_SIZE);

     // a value of zero in std::mbstate_t indicates conversion anew,
     // calling for a state allocation
     if (n == 0) {
         n = __rw_allocate_state ();

         // the case when the allocation was not possible
         if (n == -1)
             return pstate;

         // store the state index
         *pc = _RWSTD_STATIC_CAST (ISO_2022_STATE_INDEX_T, n + 1);
     }
     else
         n--;

     // store the pointer to the state object
     pstate = &__rw_iso2022_states [n];

     // initialize the state according to the encoding type
     switch (enc) {
     case iso2022_jp:
     case iso2022_jp2:
         pstate->g_map [0] = CHSET_ANSI_X_3_4_1968;
         pstate->gl_map = 1;
         break;
     default:
         break;
     }

     return pstate;
 }


 // Retrieval of the character set's registration number; the code
 // designator values overlap (they are, however, distinct in each category)
 static inline
 unsigned char __rw_sb94_encoding_reg (unsigned char c)
 {
     unsigned char reg = 0;

     switch (c) {
     case 0x42:
         reg = CHSET_ANSI_X_3_4_1968;    /* ANSI X-3.4/1968     */
         break;
     case 0x49:
         reg = CHSET_JIS_X_0201_KANA;    /* JIS X 0201 Katakana */
         break;
     case 0x4a:
         reg = CHSET_JIS_X_0201_ROMAN;   /* JIS X 0201 Roman    */
         break;
     }

     return reg;
 }


 static inline
 unsigned char __rw_sb96_encoding_reg (unsigned char c)
 {
     unsigned char reg = 0;

     switch (c) {
     case 0x41:
         reg = CHSET_ISO_8859_1;         /* ISO-8859-1 */
         break;
     case 0x46:
         reg = CHSET_ISO_8859_7;         /* ISO-8859-7 */
         break;
     }

     return reg;
 }


 static inline
 unsigned char __rw_mb94_encoding_reg (unsigned char c)
 {
     unsigned char reg = 0;

     switch (c) {
     case 0x40:
         reg = CHSET_JIS_X_0208_1978;    /* JIS X 0208-1978 */
         break;
     case 0x41:
         reg = CHSET_GB2312_1980;        /* GB 2312-80      */
         break;
     case 0x42:
         reg = CHSET_JIS_X_0208_1983;    /* JIS X 0208-1983 */
         break;
     case 0x43:
         reg = CHSET_KSC_5601_1987;      /* KSC 5601 Korean */
         break;
     case 0x44:
         reg = CHSET_JIS_X_0212_1990;    /* JIS X 0212-1990 */
         break;
     }

     return reg;
 }


 static inline
 unsigned char __rw_mb96_encoding_reg (unsigned char)
 {
     return 0;
 }


 // Retrieves the code registration associated with a particular encoding class
 // and a particular character set designator in that class
 static inline
 unsigned char __rw_get_encoding_reg (__rw_codeset_type_t type,
                                      unsigned char code)
 {
     switch (type) {
     case sb94:
         return __rw_sb94_encoding_reg (code);
     case sb96:
         return __rw_sb96_encoding_reg (code);
     case mb94:
         return __rw_mb94_encoding_reg (code);
     case mb96:
         return __rw_mb96_encoding_reg (code);
     }

     return 0;
 }


 #if 0   // unused

 // Retrieval of the character set's registration name
 static inline
 const char* __rw_get_encoding_name (unsigned char n)
 {
     return (__rw_chset_map [n].name [0] == 0)?0:__rw_chset_map [n].name;
 }


 //  A way of  mapping a combination type-code to an index  in the array of
 //  codecvt databases  mappings. The  databases mappings are  installed in
 //  the array of mappings like this:
 //    1  - ANSI.X-3.4-1968
 //  2-9  - ISO-8859-1, 2, 3, 4, 5, 6, 7, 8
 //   10  - EUC-JP
 //   11  - EUC-KR
 //   12  - GB2312-1980
 static inline
 unsigned char __rw_get_encoding_dbindex (__rw_codeset_type_t t,
                                          unsigned char code)
 {
     unsigned char reg = __rw_get_encoding_reg (t, code);
     if (reg == 0)
         return 0;

     return __rw_chset_map [reg].index;
 }

 #endif   // 0/1


 static inline
 const void* __rw_get_encoding_database (unsigned char n)
 {
     // if already mapped return it
     if (__rw_db_map [n].pv)
         return __rw_db_map [n].pv;

     // a wrong index would point to a struct bearing no name
     if (__rw_db_map [n].name [0] == 0)
         return 0;

     // if not, obtain it from __rw_get_facet_data
     int cat = __rw_get_cat ((__rw_facet::_C_wcodecvt_byname + 1) / 2);
     __rw_db_map [n].pv =
         __rw_get_facet_data (cat, __rw_db_map[n].size, 0,
                              __rw_db_map [n].name);

     return __rw_db_map [n].pv;
 }


 // Insert designating sequence in the destination buffer; the encoding is
 // given by the last parameter and is currently one of the following:
 //   - ISO_2022-JP
 //   - ISO_2022-JP2
 static _STD::codecvt_base::result
 __rw_iso2022jp_designate (__rw_iso2022_state_t& state,
                           char*&                to,
                           char*                 to_end,
                           unsigned char         reg,
                           int                   enc)
 {
     // length of a designating sequence in ISO-2022-JP and
     // ISO-2022-JP-2 is 3 or 4 elements
     size_t len   = sizeof (ISO_2022_JP_SET_ASCII) - 1U;
     size_t sslen = 0;

     // register designation
     bool g2 = false;

     const char* esc = 0;
     const char* ss  = 0;

     switch (reg) {
     case CHSET_ANSI_X_3_4_1968:
         esc = ISO_2022_JP_SET_ASCII;
         break;
     case CHSET_JIS_X_0201_ROMAN:
         esc = ISO_2022_JP_SET_JIS201_ROMAN;
         break;
     case CHSET_JIS_X_0208_1978:
         esc = ISO_2022_JP_SET_JIS208_78;
         break;
     case CHSET_JIS_X_0208_1983:
         esc = ISO_2022_JP_SET_JIS208_83;
         break;
     case CHSET_GB2312_1980:
         esc = (enc == iso2022_jp)?0:ISO_2022_JP_SET_GB2312_80;
         break;
     case CHSET_ISO_8859_1:
         esc   = (enc == iso2022_jp)?0:ISO_2022_JP_SET_ISO_8859_1;
         ss    = ISO_2022_JP_SET_SS2;
         sslen = sizeof (ISO_2022_JP_SET_SS2) - 1U;
         g2    = true;
         break;
     case CHSET_ISO_8859_7:
         esc   = (enc == iso2022_jp)?0:ISO_2022_JP_SET_ISO_8859_7;
         ss    = ISO_2022_JP_SET_SS2;
         sslen = sizeof (ISO_2022_JP_SET_SS2) - 1U;
         g2    = true;
         break;

     // only JIS X 0212-1990 and KSC5601-1987 are 4 elements long
     case CHSET_JIS_X_0212_1990:
         if (enc != iso2022_jp) {
             esc = ISO_2022_JP_SET_JIS212_90;
             len = sizeof (ISO_2022_JP_SET_JIS212_90) - 1U;
         }
         break;

     case CHSET_KSC_5601_1987:
         if (enc != iso2022_jp) {
             esc = ISO_2022_JP_SET_KSC5601_87;
             len = sizeof (ISO_2022_JP_SET_KSC5601_87) - 1U;
         }
         break;

     default:
         return CODECVT_ERROR;
     }

     if (g2) {
         if (state.sshift2 == 1) {
             // there is already a correct designation and a single
             // shift function inserted in the destination buffer
             return CODECVT_OK;
         }

         // insert the designation
         if (reg != state.g_map [2]) {
             // FIXME - check correctness
             if (_RWSTD_STATIC_CAST (size_t, to_end - to) < len)
                 return CODECVT_PARTIAL;

             if (len) {
                 _RWSTD_ASSERT (to && esc);

                 memcpy (to, esc, len);
                 to += len;
             }

             // adjust the state
             state.g_map [2] = reg;
         }

         // insert the single shift function
         // FIXME - check correctness
         if (_RWSTD_STATIC_CAST (size_t, to_end - to) < len)
             return CODECVT_PARTIAL;

         if (sslen) {
             _RWSTD_ASSERT (to && ss);

             memcpy (to, ss, sslen);
             to += sslen;
         }

         // adjust the single shift indicator
         state.sshift2 = 1;
     } else {

         // FIXME - check correctness
         if (_RWSTD_STATIC_CAST (size_t, to_end - to) < len)
             return CODECVT_PARTIAL;

         if (len) {
             _RWSTD_ASSERT (to && esc);

             memcpy (to, esc, len);
             to += len;
         }

         // adjust the state
         state.g_map [0]   = reg;
         state.gl_map      = 1;
         state.shift_state = __rw_chset_map [reg].width;
     }
     return CODECVT_OK;
 }

 // Process one escape sequence in an ISO-2022 stream;
 //
 // Upon entry:
 //   - from  - points to the first byte after the ESCAPE_CHAR indicator;
 //   - enc   - encoding type.
 static _STD::codecvt_base::result
 __rw_iso2022_escape (__rw_iso2022_state_t& state,
                      const char*&          from,
                      const char*           from_end,
                      int                   enc)
 {
     // copy of source pointer
     const char* ps = from + 1;

     // test for end of sequence
     if (from_end == ps)
         return CODECVT_OK;


     __rw_codeset_type_t type = sb94;
     switch (*ps) {
     case ISO_2022_SET_SS2:
         // Single-Shift 2 invokes G2 in GL for one character;
         // adjust the source pointer...
         from = ++ps;

         // ...and set the single-shift 2 flag in the state structure
         state.sshift2 = 1;
         return CODECVT_OK;

     case ISO_2022_SET_SB94_G0:
         type = sb94;
         break;

     case ISO_2022_SET_SB96_G2:
         type = sb96;
         break;

     case ISO_2022_SET_MB:
         type = mb94;

         // test for end of sequence
         if (from_end == ps + 1)
             return CODECVT_OK;

         // advance one more; this designation is 4 bytes long
         if (*(ps + 1) == ISO_2022_SET_MB94_G0)
             ps++;
         break;

     default:
         return CODECVT_ERROR;
     }

     if (from_end == ++ps)
         return CODECVT_OK;

     // retrieve the registration code and store the state
     const unsigned char reg = __rw_get_encoding_reg (type, *ps++);
     if (!reg)
         return CODECVT_ERROR;

     // check the registration code returned by the function above
     if (enc == iso2022_jp &&
         (reg == CHSET_JIS_X_0212_1990 || reg == CHSET_GB2312_1980 ||
          reg == CHSET_KSC_5601_1987   || reg == CHSET_ISO_8859_1  ||
          reg == CHSET_ISO_8859_7)) {
         return CODECVT_ERROR;
     }

     // designation is always in G0 unless the type is sb96
     state.g_map [type == sb96 ? 2 : 0] = reg;

     // however, the invocation is always in GL for ISO-2022-xxx!!
     state.gl_map = 1;
     state.shift_state = __rw_chset_map [reg].width;

     // adjust the source pointer
     from = ps;
     return CODECVT_OK;
 }

 /****************************************************************************/

 // ISO-2022-JP conversion from ANSI_X3.4-1968
 // Convert one character.
 // Returns:
 //    std::codecvt_base::ok      in case the conversion succeeded
 //    std::codecvt_base::partial for partial conversions
 //    std::codecvt_base::error   erroneous sequence
 static _STD::codecvt_base::result
 __rw_ascii_to_iso2022 (__rw_iso2022_state_t& state,
                        const char*&          from,
                        const char*           from_end,
                        char*&                to,
                        char*                 to_end,
                        int                   enc)
 {
     // the registration
     unsigned char reg   = CHSET_ANSI_X_3_4_1968;
     unsigned int  width = __rw_chset_map [reg].width + 1;

     _RWSTD_ASSERT (from_end-from >= int(width));
     _RWSTD_UNUSED (from_end);

     _STD::codecvt_base::result ret;

     // check the designation sequence
     if (state.g_map [0] != reg) {
         char* pd = to;

         // insert a designation sequence in this place
         ret = __rw_iso2022jp_designate (state, pd, to_end, reg, enc);
         if (ret != CODECVT_OK)
             return ret;

         state.gl_map = 1;
         state.g_map [0] = reg;
         state.shift_state = __rw_chset_map [reg].width;

         // adjust destination pointer
         to = pd;

         if (to == to_end)
             return ret;
     }

     // FIXME - check the validity of this test
     if (to_end - to < int (width))
         return CODECVT_PARTIAL;

     *to++ = char (*from++ & 0x7f);
     if (width > 1)
         *to++ = char (*from++ & 0x7f);

     return CODECVT_OK;
 }


 // ISO-2022-JP conversion from KSC5601-1987
 // Convert one character.
 // Returns:
 //    std::codecvt_base::ok      in case the conversion succeeded
 //    std::codecvt_base::partial for partial conversions
 //    std::codecvt_base::error   erroneous sequence
 static _STD::codecvt_base::result
 __rw_ksc5601_to_iso2022 (__rw_iso2022_state_t& state,
                          const char*&        from,
                          const char*         from_end,
                          char*&              to,
                          char*               to_end,
                          int                 enc)
 {
     // the registration
     unsigned char reg   = CHSET_KSC_5601_1987;
     unsigned int  width = __rw_chset_map [reg].width + 1;

     _RWSTD_ASSERT(from_end-from >= int(width));
     _RWSTD_UNUSED(from_end);

     _STD::codecvt_base::result ret;

     // check the designation sequence
     if (state.g_map [0] != reg) {
         char* pd = to;

         // insert a designation sequence in this place
         ret = __rw_iso2022jp_designate (state, pd, to_end, reg, enc);
         if (ret != CODECVT_OK)
             return ret;

         state.gl_map = 1;
         state.g_map [0] = reg;
         state.shift_state = __rw_chset_map [reg].width;

         // adjust destination pointer
         to = pd;

         if (to == to_end)
             return ret;
     }

     // FIXME - check the validity of this test
     if (to_end - to < int (width))
         return CODECVT_PARTIAL;

     *to++ = char (*from++ & 0x7f);
     if (width > 1)
         *to++ = char (*from++ & 0x7f);

     return CODECVT_OK;
 }


 // ISO-2022-JP conversion from GB2312-1980
 // Convert one character.
 // Returns:
 //    std::codecvt_base::ok      in case the conversion succeeded
 //    std::codecvt_base::partial for partial conversions
 //    std::codecvt_base::error   erroneous sequence
 static _STD::codecvt_base::result
 __rw_gb2312_to_iso2022 (__rw_iso2022_state_t& state,
                         const char*&          from,
                         const char*           from_end,
                         char*&                to,
                         char*                 to_end,
                         int                   enc)
 {
     // the registration
     unsigned char reg   = CHSET_GB2312_1980;
     unsigned int  width = __rw_chset_map [reg].width + 1;

     _RWSTD_ASSERT(from_end-from >= int(width));
     _RWSTD_UNUSED(from_end);

     _STD::codecvt_base::result ret;

     // check the designation sequence
     if (state.g_map [0] != reg) {
         char* pd = to;

         // insert a designation sequence in this place
         ret = __rw_iso2022jp_designate (state, pd, to_end, reg, enc);
         if (ret != CODECVT_OK)
             return ret;

         state.gl_map = 1;
         state.g_map [0] = reg;
         state.shift_state = __rw_chset_map [reg].width;

         // adjust destination pointer
         to = pd;

         if (to == to_end)
             return ret;
     }

     // FIXME - check the validity of this test
     if (to_end - to < int(width))
         return CODECVT_PARTIAL;

     *to++ = char (*from++ & 0x7f);
     if (width > 1)
         *to++ = char (*from++ & 0x7f);

     return CODECVT_OK;
 }


 // ISO-2022-JP conversion from ISO-8859-7
 // Convert one character.
 // Returns:
 //    std::codecvt_base::ok      in case the conversion succeeded
 //    std::codecvt_base::partial for partial conversions
 //    std::codecvt_base::error   erroneous sequence
 static _STD::codecvt_base::result
 __rw_iso88597_to_iso2022 (__rw_iso2022_state_t& state,
                           const char*&          from,
                           const char*           from_end,
                           char*&                to,
                           char*                 to_end,
                           int                   enc)
 {
     // the registration
     unsigned char reg2   = CHSET_ISO_8859_7;
     unsigned int  width2 = __rw_chset_map [reg2].width + 1;

     _RWSTD_ASSERT(from_end-from >= int(width2));
     _RWSTD_UNUSED(from_end);

     _STD::codecvt_base::result ret;

     // always call designate for this one; designation will at least insert a
     // single shift function in the destination buffer
     ret = __rw_iso2022jp_designate (state, to, to_end, reg2, enc);
     if (ret != CODECVT_OK)
         return ret;

     // adjust the G2 designation
     state.g_map [2]   = reg2;

     // FIXME - check the validity of this test
     if (to_end - to < int (width2))
         return CODECVT_PARTIAL;

     *to++ = char (*from++ & 0x7f);
     if (width2 > 1)
         *to++ = char (*from++ & 0x7f);

     // clear the single shift functions
     state.sshift2 = state.sshift3 = 0;

     return CODECVT_OK;
 }


 // ISO-2022-JP conversion from ISO-8859-1
 // Convert one character.
 // Returns:
 //    std::codecvt_base::ok      in case the conversion succeeded
 //    std::codecvt_base::partial for partial conversions
 //    std::codecvt_base::error   erroneous sequence
 static _STD::codecvt_base::result
 __rw_iso88591_to_iso2022 (__rw_iso2022_state_t& state,
                           const char*&          from,
                           const char*           from_end,
                           char*&                to,
                           char*                 to_end,
                           int                   enc)
 {
     // the registration
     unsigned char reg2   = CHSET_ISO_8859_1;
     unsigned int  width2 = __rw_chset_map [reg2].width + 1;

     _RWSTD_ASSERT(from_end-from >= int(width2));
     _RWSTD_UNUSED(from_end);

     _STD::codecvt_base::result ret;

     // always call designate for this one; designation will at least insert a
     // single shift function in the destination buffer
     ret = __rw_iso2022jp_designate (state, to, to_end, reg2, enc);
     if (ret != CODECVT_OK)
         return ret;

     // adjust the G2 designation
     state.g_map [2]   = reg2;

     // FIXME - check the validity of this test
     if (to_end - to < int (width2))
         return CODECVT_PARTIAL;

     *to++ = char (*from++ & 0x7f);
     if (width2 > 1)
         *to++ = char (*from++ & 0x7f);

     // clear the single shift functions
     state.sshift2 = state.sshift3 = 0;

     return CODECVT_OK;
 }


 // ISO-2022-JP conversion from packed EUC-JP
 // Convert one character.
 // Returns:
 //    std::codecvt_base::ok      in case the conversion succeeded
 //    std::codecvt_base::partial for partial conversions
 //    std::codecvt_base::error   erroneous sequence
 static _STD::codecvt_base::result
 __rw_eucjp_to_iso2022 (__rw_iso2022_state_t& state,
                        const char*&          from,
                        const char*           from_end,
                        char*&                to,
                        char*                 to_end,
                        int                   enc)
 {
     int width = 0;
     unsigned char reg = 0;

     _STD::codecvt_base::result ret;

     // convert and store the encoding
     unsigned char c = *_RWSTD_REINTERPRET_CAST(const unsigned char*, from);
     switch (c) {
     case 0x8e:
         // JIS X 0201 Katakana
         reg   = CHSET_JIS_X_0201_KANA;
         from++;
         break;

     case 0x8f:
         // JIS X 0212 1990
         reg   = CHSET_JIS_X_0212_1990;
         from++;
         break;

     default:
         if (c > 0x80) {
             // JIS X 0208-1978
             reg   = CHSET_JIS_X_0208_1978;
         } else {
             // JIS X 0201 Roman/ANSI_X3.4-1968
             reg   = CHSET_ANSI_X_3_4_1968;
         }
         break;
     }

     // width in elements of the ISO-2022 output
     width = __rw_chset_map [reg].width + 1;

     // check the designation sequence
     if (state.g_map [0] != reg) {
         char* pd = to;

         // insert a designation sequence in this place
         ret = __rw_iso2022jp_designate (state, pd, to_end, reg, enc);
         if (ret != CODECVT_OK)
             return ret;

         // adjust destination pointer
         to = pd;

         state.gl_map = 1;
         state.g_map [0] = reg;
         state.shift_state = __rw_chset_map [reg].width;

         if (to == to_end)
             return ret;
     }

     // insert the character in the destination buffer
     if (width > from_end - from)
         return CODECVT_OK;

     // FIXME - check the validity of this test
     if (to_end - to < __rw_chset_map[reg].width)
         return CODECVT_PARTIAL;

     *to++ = char (*from++ & 0x7f);
     if (width > 1)
         *to++ = char (*from++ & 0x7f);

     return CODECVT_OK;
 }


 // ISO-2022-JP conversion to GB2312-1980
 // Converts one character.
 // Returns:
 //    std::codecvt_base::ok      in case the conversion succeeded
 //    std::codecvt_base::partial for partial conversions
 //    std::codecvt_base::error   erroneous sequence
 static inline
 _STD::codecvt_base::result
 __rw_iso2022_to_gb2312 (const char*& from,
                         const char*  from_end,
                         char*&       to,
                         int          /* reg */,
                         int          /* enc */)
 {
     _STD::codecvt_base::result res = CODECVT_OK;

     if (from_end - from < GB2312_CHAR_LEN)
         return res;

     // store the destination bytes after setting the highest bit
     *to++ = char (*from++ | 0x80);
     *to++ = char (*from++ | 0x80);

     return res;
 }


 // ISO-2022-JP conversion to KSC2312
 // Converts one character.
 // Returns:
 //    std::codecvt_base::ok      in case the conversion succeeded
 //    std::codecvt_base::partial for partial conversions
 //    std::codecvt_base::error   erroneous sequence
 static inline
 _STD::codecvt_base::result
 __rw_iso2022_to_ksc5601 (const char*& from,
                          const char*  from_end,
                          char*&       to,
                          int          /* reg */,
                          int          /* enc */)
 {
     _STD::codecvt_base::result res = CODECVT_OK;

     if (from_end - from < KSC5601_CHAR_LEN)
         return res;

     // store the destination bytes after setting the highest bit
     *to++ = char (*from++ | 0x80);
     *to++ = char (*from++ | 0x80);

     return res;
 }


 // ISO-2022-JP conversion to ISO-8859-1
 // Converts one character.
 // Returns:
 //    std::codecvt_base::ok      in case the conversion succeeded
 //    std::codecvt_base::partial for partial conversions
 //    std::codecvt_base::error   erroneous sequence
 static inline
 _STD::codecvt_base::result
 __rw_iso2022_to_iso88591 (const char*& from,
                           const char*  from_end,
                           char*&       to,
                           int          /* reg */,
                           int          /* enc */)
 {
     _STD::codecvt_base::result res = CODECVT_OK;

     if (from_end - from < ISO_8859_1_CHAR_LEN)
         return res;

     // store the destination bytes after setting the highest bit
     *to++ = char (*from++ | 0x80);

     return res;
 }


 // ISO-2022-JP conversion to ISO-8859-7
 // Converts one character.
 // Returns:
 //    std::codecvt_base::ok      in case the conversion succeeded
 //    std::codecvt_base::partial for partial conversions
 //    std::codecvt_base::error   erroneous sequence
 static inline
 _STD::codecvt_base::result
 __rw_iso2022_to_iso88597 (const char*& from,
                           const char*  from_end,
                           char*&       to,
                           int          /* reg */,
                           int          /* enc */)
 {
     _STD::codecvt_base::result res = CODECVT_OK;

     if (from_end - from < ISO_8859_7_CHAR_LEN)
         return res;

     // store the destination bytes after setting the highest bit
     *to++ = char (*from++ | 0x80);

     return res;
 }


 // ISO-2022-JP conversion to packed EUC-JP
 // Converts one character.
 // Returns:
 //    std::codecvt_base::ok      in case the conversion succeeded
 //    std::codecvt_base::partial for partial conversions
 //    std::codecvt_base::error   erroneous sequence
 static _STD::codecvt_base::result
 __rw_iso2022_to_eucjp (const char*&          from,
                        const char*           from_end,
                        char*&                to,
                        int                   reg,
                        int                   enc)
 {
     // encoding registration number
     int width = __rw_chset_map [reg].width + 1;

     // test the length of the input
     if (from_end - from < width)
         return CODECVT_OK;

     _STD::codecvt_base::result res = CODECVT_OK;

     // from either the ANSI_X3.4-1968 or JIS X 0201 Roman or JIS X 0201 Kana
     switch (reg) {
     case CHSET_ANSI_X_3_4_1968:
     case CHSET_JIS_X_0201_ROMAN:
         // these character sets encode on one byte in EUC-JP
         *to++ = *from++;

         break;

     case CHSET_JIS_X_0208_1978:
     case CHSET_JIS_X_0208_1983:
         *to++ = char (*from++ | 0x80);
         *to++ = char (*from++ | 0x80);

         break;

     // The following belong to ISO-2022-JP-2
     case CHSET_JIS_X_0201_KANA:
         if (enc == iso2022_jp) {
             res = CODECVT_ERROR;
             break;
         }

         // this character set encode on two bytes in EUC-JP
         *to++ = '\x8e';
         *to++ = char (*from++ | 0x80);

         break;

     case CHSET_JIS_X_0212_1990:
         if (enc == iso2022_jp) {
             res = CODECVT_ERROR;
             break;
         }

         *to++ = '\x8f';
         *to++ = char (*from++ | 0x80);
         *to++ = char (*from++ | 0x80);

         break;

     default:
         res = CODECVT_ERROR;
         break;
     }

     return res;
 }


 //  Converts  one  character  from  the  external  representation  to  the
 //  intermediary encoding  that is later  used in retrieving  the internal
 //  representation of that character
 static _STD::codecvt_base::result
 __rw_iso2022_to_interm (__rw_iso2022_state_t& state,
                         const char*&          from,
                         const char*           from_end,
                         char*&                to,
                         unsigned char&        reg,
                         int                   enc)
 {
     _STD::codecvt_base::result res = CODECVT_OK;

     bool first = true;
     for (; *from == ESCAPE_CHAR; first = false) {
         // clear up the single shift functions set before
         if (first == false)
             state.sshift2 = state.sshift3 = 0;

         const char* tmp = from;
         res = __rw_iso2022_escape (state, from, from_end, enc);

         // the detected escape sequence has not been consumed because it
         // was incomplete or an error has been returned
         if (res != CODECVT_OK || tmp == from)
             return res;
     }

     if (*from <= 0x20) {
         ;// FIXME - process control characters
     }

     // if any single shift sequence has been selected, then clear up the
     // flag and adjust the registration code accordingly
     if (state.sshift2 == 1) {
         state.sshift2 = 0;
         // registration code switches to whatever code has been selected
         // in G2 (one of ISO-8859-1 and ISO-8859-7)
         reg = state.g_map [2];
     } else {
         // refresh the reg value
         reg = state.g_map [0];
     }

     // ISO-2022-JP and ISO-2022-JP-2 use G0 exclusively
     switch (reg) {
     case CHSET_ANSI_X_3_4_1968:
     case CHSET_JIS_X_0201_KANA:
     case CHSET_JIS_X_0201_ROMAN:
     case CHSET_JIS_X_0208_1978:
     case CHSET_JIS_X_0208_1983:
     case CHSET_JIS_X_0212_1990:
         res = __rw_iso2022_to_eucjp (from, from_end, to, reg, enc);
         break;

     case CHSET_GB2312_1980:
         res = __rw_iso2022_to_gb2312 (from, from_end, to, reg, enc);
         break;

     case CHSET_KSC_5601_1987:
         res = __rw_iso2022_to_ksc5601 (from, from_end, to, reg, enc);
         break;

     case CHSET_ISO_8859_1:
         res = __rw_iso2022_to_iso88591 (from, from_end, to, reg, enc);
         break;

     case CHSET_ISO_8859_7:
         res = __rw_iso2022_to_iso88597 (from, from_end, to, reg, enc);
         break;
     }

     // clear any single shift functions, if any
     state.sshift2 = state.sshift3 = 0;

     return CODECVT_OK;
 }


 static _STD::codecvt_base::result
 __rw_ucs4_to_eucjp (const wchar_t*& from,
                     const wchar_t*  from_end,
                     char*&          to,
                     char*           to_end,
                     const _RW::__rw_codecvt_t* impl)
 {
     typedef unsigned char UChar;

     // "impl" as raw pointer to unsigned ints
     const char* impl_raw =
         _RWSTD_REINTERPRET_CAST(const char*,impl);

     // utf8 temporary buffer
     char tmp [_UTF8_MB_CUR_MAX];
     char* ptmp = tmp;

     // tbls will point to the first lookup table
     const unsigned int* tbls = impl->utf8_to_ext_tab ();;
     const unsigned int* tbl  = tbls;
     _RWSTD_ASSERT(tbls);

     if (from == from_end || to == to_end)
         return CODECVT_OK;

     size_t ret = __rw_itoutf8 (*from, ptmp);

     unsigned int wc = tbl [UChar (*ptmp)];
     while (wc & 0x80000000) {
         if (wc == _RWSTD_UINT_MAX)
             // FIXME - this is an error condition
             return CODECVT_ERROR;

         if (ret-- == 0)
             // FIXME - this is an error condition
             return CODECVT_ERROR;

         // get next table
         wc &= 0x7fffffff;
         tbl = tbls + 256 * wc;
         wc = tbl [UChar (*++ptmp)];
     }

     // store the encoding sequence at destination
     size_t offset = wc + sizeof (_RW::__rw_codecvt_t);
     while (impl_raw [offset]) {
         if (to == to_end)
             return CODECVT_PARTIAL;
         else
             *to++ = impl_raw [offset++];
     }

     from++;
     return CODECVT_OK;
 }


 static unsigned char /* registration code */
 __rw_ucs4_to_interm (  const wchar_t*& from,
                        char*&          to)
 {
     typedef unsigned char UChar;

     static const struct {
         UChar db;
         UChar reg;
     } db_array [] = {
         { DBIDX_ANSI_X_3_4_1968, CHSET_ANSI_X_3_4_1968 },
         { DBIDX_EUC_JP         , CHSET_JIS_X_0212_1990 },
         { DBIDX_EUC_KR         , CHSET_KSC_5601_1987   },
         { DBIDX_GB2312         , CHSET_GB2312_1980     },
         { DBIDX_ISO_8859_7     , CHSET_ISO_8859_7      },
         { DBIDX_ISO_8859_1     , CHSET_ISO_8859_1      }
     };

     // utf8 temporary buffer
     char tmp [_UTF8_MB_CUR_MAX];

     for (size_t i = 0; i < sizeof db_array / sizeof *db_array; ++i) {
         char* ptmp = tmp;

         // obtain the database mapping
         const void* pdb = __rw_get_encoding_database (db_array [i].db);
         if (pdb == 0)
             continue;

         // cast the database map pointer to the correct type
         const _RW::__rw_codecvt_t* impl =
             _RWSTD_STATIC_CAST(const _RW::__rw_codecvt_t*, pdb);

         // the "raw access" pointer used in finding the external repr.
         const char* impl_raw = _RWSTD_REINTERPRET_CAST(const char*,impl);

         // tbls will point to the first lookup table
         const unsigned int* tbls = impl->utf8_to_ext_tab ();
         const unsigned int* tbl  = tbls;
         _RWSTD_ASSERT(tbls);

         size_t ret = __rw_itoutf8 (*from, ptmp);

         bool success = true;
         unsigned int wc = tbl [UChar (*ptmp)];
         while (wc & 0x80000000) {
             if (wc == _RWSTD_UINT_MAX || ret-- == 0) {
                 success = false;
                 break;
             }

             // get next table
             wc &= 0x7fffffff;
             tbl = tbls + 256 * wc;
             wc = tbl [UChar (*++ptmp)];
         }

         // continue on the next lookup
         if (!success)
             continue;

         // store the encoding sequence at destination
         size_t offset = wc + sizeof (_RW::__rw_codecvt_t);
         while (impl_raw [offset])
             *to++ = impl_raw [offset++];

         // advance source pointer to next position
         from++;

         // Find the registration code of the encoding and return it
         return db_array [i].reg;
     }

     // invalid registration code
     return 0;
 }


 // does the conversion of one character to internal representation
 static _STD::codecvt_base::result
 __rw_iso2022_to_ucs4 (_RWSTD_MBSTATE_T&     state,
                       __rw_iso2022_state_t* iso_state,
                       const char*&          from,
                       const char*           from_end,
                       wchar_t*&             to,
                       wchar_t*              /* to_end */,
                       int                   enc)
 {
     _STD::codecvt_base::result res;

     // the registration code of the character set
     unsigned char reg   = iso_state->g_map [0];
     int           width = iso_state->shift_state + 1;

     // test input width
     if (from_end - from < width) {
         // test and deallocate state
         if (iso_state->g_map [0] == CHSET_ANSI_X_3_4_1968)
             __rw_deallocate_state (*iso_state, state, false);

         return CODECVT_OK;
     }

     // temporary buffer to accomodate the intermediary encoding
     char tmp_buffer [8];
     char* tmp_to = tmp_buffer;
     char* tmp_to_end;

     const char* from_next  = from;

     // convert one character at a time and note that the registration
     // code is actually modified by the following call in the case
     // of single shift functions
     res = __rw_iso2022_to_interm (*iso_state, from_next, from_end,
                                   tmp_to, reg, enc);
     if (res != CODECVT_OK ||
         (from_next == from && tmp_to == tmp_buffer)) {

         __rw_deallocate_state (*iso_state, state, CODECVT_ERROR != res);

         return res;
     }

     // the input sequence started with an escape sequence
     if (tmp_to == tmp_buffer) {
         from = from_next;
         return res;
     }

     tmp_to_end = tmp_to;
     tmp_to = tmp_buffer;

     // get the proper database for decoding input
     const void* pdb = __rw_get_encoding_database (__rw_chset_map [reg].index);
     if (pdb == 0) {
         __rw_deallocate_state (*iso_state, state, false);
         return CODECVT_ERROR;
     }
     const _RW::__rw_codecvt_t* pimpl =
         _RWSTD_STATIC_CAST(const _RW::__rw_codecvt_t*, pdb);

     // retrieve the lookup table start
     const unsigned int* tbls = pimpl->n_to_w_tab ();
     const unsigned int* tbl  = tbls;
     _RWSTD_ASSERT(tbls);

     // wider character representations will be looked upon in the
     // database tables
     unsigned int wc = tbl [*(unsigned char*)tmp_to];
     while (wc & 0x80000000) {
         if (wc == _RWSTD_UINT_MAX) {
             __rw_deallocate_state (*iso_state, state, false);
             return CODECVT_ERROR;
         }

         // wc is table index
         tbl = tbls + 256*(wc & 0x7fffffff);
         tmp_to++;

         // we have reached end of EUC-JP temporary buffer and we found
         // here an incomplete sequence; the incomplete sequence here
         // is an error
         if (tmp_to == tmp_to_end) {
             __rw_deallocate_state (*iso_state, state, false);
             return CODECVT_ERROR;
         }

         wc = tbl [*(unsigned char*)tmp_to];
     }

     // update source pointer and store the value at destination
     from = from_next;
     *to++ = pimpl->get_ucs4_at_offset(wc);

     return CODECVT_OK;
 }

 /****************************************************************************/

 // Conversion from ISO-2022-JP to UCS-4
 _STD::codecvt_base::result
 __rw_iso2022jp_do_in (_RWSTD_MBSTATE_T& state,
                       const char*&      from,
                       const char*       from_end,
                       wchar_t*&         to,
                       wchar_t*          to_end)
 {
     // the iso2022 state
     __rw_iso2022_state_t* iso_state =
         __rw_get_iso2022_state (state, iso2022_jp);

     if (iso_state == 0)
         return CODECVT_ERROR;

     // Loop until the source buffer is consumed, an error occurs, or
     // the destination buffer reaches capacity
     const char* from_next = from;
     wchar_t*    to_next   = to;

     while (from_end - from && to_end - to) {
         // operation result
         _STD::codecvt_base::result res;
         res = __rw_iso2022_to_ucs4 (state, iso_state, from_next,
                                     from_end, to_next, to_end, iso2022_jp);

         if (res != CODECVT_OK)
             return res;

         // an ok result with no conversion
         if (res == CODECVT_OK &&
             from_next == from && to_next == to)
             return res;

         from = from_next;
         to   = to_next;
     }

     // deallocate state if the conversion has gone back to initial state
     __rw_deallocate_state (*iso_state, state, true);

     return CODECVT_OK;
 }


 // Conversion from UCS-4 to ISO-2022-JP
 _STD::codecvt_base::result
 __rw_iso2022jp_do_out (_RWSTD_MBSTATE_T& state,
                        const wchar_t*&   from,
                        const wchar_t*    from_end,
                        char*&            to,
                        char*             to_end)
 {
     // the iso2022 state
     __rw_iso2022_state_t* iso_state =
         __rw_get_iso2022_state (state, iso2022_jp);

     if (iso_state == 0)
         return CODECVT_ERROR;

     // destination buffer to accomodate the EUC-JP encoding
     char euc_buffer  [8];

     // use the EUC-JP database for a full lookup
     const void* pdb = __rw_get_encoding_database (DBIDX_EUC_JP);
     if (pdb == 0){
         __rw_deallocate_state (*iso_state, state, false);
         return CODECVT_ERROR;
     }

     const _RW::__rw_codecvt_t* pimpl =
         _RWSTD_STATIC_CAST(const _RW::__rw_codecvt_t*, pdb);
     _RWSTD_ASSERT(pimpl);

     // Loop until the source buffer is consumed, an error occurs, or
     // the destination buffer reaches capacity
     while (from_end - from && to_end - to) {
         _STD::codecvt_base::result res;

         // convert the UCS-4 value to EUC-JP
         const wchar_t* ps   = from;
         const wchar_t* pse  = from_end;

         char* euc   = euc_buffer;
         char* euce  = euc_buffer + 8;

         res = __rw_ucs4_to_eucjp (ps, pse, euc, euce, pimpl);
         if (res != CODECVT_OK ||
             (ps == from && euc == euc_buffer)) {

             __rw_deallocate_state (*iso_state, state, CODECVT_ERROR != res);

             return res;
         }

         char* pd = to;

         // convert the EUC-JP value to ISO-2022
         euce = euc;
         const char* ceuc = euc_buffer;
         res  = __rw_eucjp_to_iso2022 (*iso_state, ceuc,
                                       euce, pd, to_end, iso2022_jp);
         if (res != CODECVT_OK ||
             (ceuc == euc_buffer && pd == to)) {

             __rw_deallocate_state (*iso_state, state, CODECVT_ERROR != res);

             return res;
         }

         //adjust source pointer
         from = ps;
         to   = pd;
     }

     // deallocate state if the conversion has gone back to
     // initial state
     __rw_deallocate_state (*iso_state, state, true);

     return CODECVT_OK;
 }


 _STD::codecvt_base::result
 __rw_iso2022jp_do_unshift (_RWSTD_MBSTATE_T& state,
                            char*& to, char* to_end)
 {
     _STD::codecvt_base::result res =
         CODECVT_ERROR;

     // the iso2022 state
     __rw_iso2022_state_t* iso_state =
         __rw_get_iso2022_state (state, iso2022_jp);

     if (iso_state == 0)
         return res;

     if (iso_state->g_map [iso_state->gl_map - 1] != CHSET_ANSI_X_3_4_1968)
         res = __rw_iso2022jp_designate (*iso_state, to, to_end,
                                         CHSET_ANSI_X_3_4_1968, iso2022_jp);
     else
         res = CODECVT_OK;

     // deallocate a state if the conversion has gone back to
     // initial shift state and operation succeeded, OR, if
     // the result is error
     __rw_deallocate_state (*iso_state, state, CODECVT_ERROR != res);

     return res;
 }


 size_t
 __rw_iso2022jp_do_length (_RWSTD_MBSTATE_T& state,
                           const char* from, const char* from_end,
                           size_t max)
 {
     _RWSTD_ASSERT(from <= from_end);

     // the iso2022 state
     __rw_iso2022_state_t* iso_state =
         __rw_get_iso2022_state (state, iso2022_jp);

     if (iso_state == 0)
         return CODECVT_ERROR;

     int ret = 0;
     _STD::codecvt_base::result res = CODECVT_OK;
     while (max && from_end - from) {
         while (*from == ESCAPE_CHAR)


         if (*from == ESCAPE_CHAR) {
             const char* tmp = from;
             res = __rw_iso2022_escape (*iso_state, from, from_end, iso2022_jp);

             if (res != CODECVT_OK || from == tmp)
                 break;

             continue;
         }

         if (from_end - from < int(iso_state->shift_state) + 1)
             break;

         ret++, max--;
         from += iso_state->shift_state + 1;
     }

     // deallocate a state if the conversion has gone back to initial shift
     // state
     __rw_deallocate_state (*iso_state, state, CODECVT_ERROR != res);

     return ret;
 }


 size_t
 __rw_iso2022jp_do_max_length ()
 {
     return 2;
 }


 int __rw_iso2022jp_do_encoding ()
 {
     return -1;
 }

 bool __rw_iso2022jp_do_always_noconv ()
 {
     return false;
 }

 /****************************************************************************/

 // Conversion from ISO-2022-JP to UCS-4
 _STD::codecvt_base::result
 __rw_iso2022jp2_do_in (_RWSTD_MBSTATE_T& state,
                        const char*&      from,
                        const char*       from_end,
                        wchar_t*&         to,
                        wchar_t*          to_end)
 {
     // the iso2022 state
     __rw_iso2022_state_t* iso_state =
         __rw_get_iso2022_state (state, iso2022_jp);

     if (iso_state == 0)
         return CODECVT_ERROR;

     // Loop until the source buffer is consumed, an error occurs, or
     // the destination buffer reaches capacity
     while (from_end - from && to_end - to) {

         const char* from_next = from;
         wchar_t*    to_next   = to;

         // operation result
         _STD::codecvt_base::result res;
         res = __rw_iso2022_to_ucs4 (state, iso_state, from_next,
                                     from_end, to_next, to_end, iso2022_jp2);

         if (res != CODECVT_OK)
             return res;

         // an ok result with no conversion
         if (res == CODECVT_OK &&
             from_next == from && to_next == to)
             return res;

         from = from_next;
         to   = to_next;
     }

     // deallocate state if the conversion has gone back to initial state
     __rw_deallocate_state (*iso_state, state, true);

     return CODECVT_OK;
 }


 // Conversion from UCS-4 to ISO-2022-JP-2
 _STD::codecvt_base::result
 __rw_iso2022jp2_do_out (_RWSTD_MBSTATE_T& state,
                        const wchar_t*&    from,
                        const wchar_t*     from_end,
                        char*&             to,
                        char*              to_end)
 {
     // the iso2022 state
     __rw_iso2022_state_t* iso_state =
         __rw_get_iso2022_state (state, iso2022_jp);

     if (iso_state == 0)
         return CODECVT_ERROR;

     // destination buffer to accomodate the intermediate encoding
     char tmp  [8];

     // Loop until the source buffer is consumed, an error occurs, or
     // the destination buffer reaches capacity
     while (from_end - from && to_end - to) {
         _STD::codecvt_base::result res;

         // convert the UCS-4 value to intermediary encoding
         const wchar_t* ps   = from;

         char* tmps  = tmp;
         char* tmpe  = tmp + 8;

         unsigned char reg =
             __rw_ucs4_to_interm (ps, tmps);
         if (reg == 0) {
             // deallocate state
             __rw_deallocate_state (*iso_state, state, false);

             // error condition - none of the databases contained a mapping
             // of the character
             return CODECVT_ERROR;
         }

         char* pd = to;

         // convert the intermediary value to ISO-2022
         tmpe = tmps;
         const char* ctmps = tmp;

         // the registration of the encoding dictates how the conversion is
         // to be performed further
         switch (reg) {
         case CHSET_ANSI_X_3_4_1968:
             res = __rw_ascii_to_iso2022     (*iso_state, ctmps, tmpe,
                                              pd, to_end, iso2022_jp2);
             break;
         case CHSET_JIS_X_0212_1990:
             res = __rw_eucjp_to_iso2022     (*iso_state, ctmps, tmpe,
                                              pd, to_end, iso2022_jp2);
             break;
         case CHSET_KSC_5601_1987:
             res = __rw_ksc5601_to_iso2022   (*iso_state, ctmps, tmpe,
                                              pd, to_end, iso2022_jp2);
             break;
         case CHSET_GB2312_1980:
             res = __rw_gb2312_to_iso2022    (*iso_state, ctmps, tmpe,
                                              pd, to_end, iso2022_jp2);
             break;
         case CHSET_ISO_8859_7:
             res = __rw_iso88597_to_iso2022  (*iso_state, ctmps, tmpe,
                                              pd, to_end, iso2022_jp2);
             break;
         case CHSET_ISO_8859_1:
             res = __rw_iso88591_to_iso2022  (*iso_state, ctmps, tmpe,
                                              pd, to_end, iso2022_jp2);
             break;
         default:
             return CODECVT_ERROR;
         }

         if (res != CODECVT_OK || (ctmps==tmp && pd==to)) {

             __rw_deallocate_state (*iso_state, state, CODECVT_ERROR != res);

             return res;
         }

         //adjust source pointer
         from = ps;
         to   = pd;

         // after a successful insertion the single shift functions
         // must be cleared
         iso_state->sshift2 = iso_state->sshift3 = 0;

     }

     // deallocate state if the conversion has gone back to
     // initial state
     __rw_deallocate_state (*iso_state, state, true);

     return CODECVT_OK;
 }


 _STD::codecvt_base::result
 __rw_iso2022jp2_do_unshift (_RWSTD_MBSTATE_T& state,
                             char*& to, char* to_end)
 {
     return __rw_iso2022jp_do_unshift (state, to, to_end);
 }


 size_t
 __rw_iso2022jp2_do_length (_RWSTD_MBSTATE_T& state,
                            const char* from, const char* from_end,
                            size_t max)
 {
     _RWSTD_ASSERT(from <= from_end);

     // the iso2022 state
     __rw_iso2022_state_t* iso_state =
         __rw_get_iso2022_state (state, iso2022_jp);

     if (iso_state == 0)
         return CODECVT_ERROR;

     int ret = 0;
     _STD::codecvt_base::result res = CODECVT_OK;

     while (max && from_end - from) {

         bool first = true;
         for (; *from == ESCAPE_CHAR; first = false) {
             // clear up the single shift functions set before
             if (first == false)
                 iso_state->sshift2 = iso_state->sshift3 = 0;

             const char* tmp = from;
             res = __rw_iso2022_escape (*iso_state, from, from_end,
                                        iso2022_jp2);

             // the detected escape sequence has not been consumed because it
             // was incomplete
             if (res != CODECVT_OK || tmp == from)
                 return ret;
         }

         if (*from <= 0x20) {
             ;// FIXME - process control characters
         }

         // the registration for all character sets other than the ISO's
         // is stored in the first register, G0.
         unsigned char reg = iso_state->g_map [0];
         unsigned int  width;
         if (iso_state->sshift2 == 1) {
             width = 1;
         } else {
             width = __rw_chset_map [reg].width + 1;
         }

         if (from_end - from < int (width))
             break;

         ret++, max--;
         from += width;

         // clean up the single shift function
         iso_state->sshift2 = iso_state->sshift3 = 0;

     }

     // deallocate a state if the conversion has gone back to initial shift
     // state
     __rw_deallocate_state (*iso_state, state, CODECVT_ERROR != res);

     return ret;
 }


 size_t
 __rw_iso2022jp2_do_max_length ()
 {
     return 2;
 }


 int __rw_iso2022jp2_do_encoding ()
 {
     return -1;
 }


 bool __rw_iso2022jp2_do_always_noconv ()
 {
     return false;
 }

 }   // namespace __rw