| /*************************************************************************** |
| * |
| * 22.codecvt.out.cpp - test exercising the std::codecvt::out() |
| * |
| * $Id$ |
| * |
| *************************************************************************** |
| * |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed |
| * with this work for additional information regarding copyright |
| * ownership. The ASF licenses this file to you under the Apache |
| * License, Version 2.0 (the "License"); you may not use this file |
| * except in compliance with the License. You may obtain a copy of |
| * the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or |
| * implied. See the License for the specific language governing |
| * permissions and limitations under the License. |
| * |
| * Copyright 2005-2006 Rogue Wave Software. |
| * |
| **************************************************************************/ |
| |
| #ifdef __SUNPRO_CC |
| // working around a SunPro/SunOS bug (PR #26255) |
| # include <time.h> |
| #endif // __SUNPRO_CC |
| |
| #include <locale> // for codecvt |
| |
| #include <climits> // for MB_LEN_MAX |
| #include <clocale> // for LC_CTYPE, setlocale() |
| #include <cstdio> // for sprintf() |
| #include <cstdlib> // for MB_CUR_MAX, free(), size_t |
| #include <cstring> // for strcpy(), strlen() |
| #include <cwchar> // for codecvt |
| |
| #include <driver.h> // for rw_test(), ... |
| #include <file.h> // for rw_fwrite() |
| #include <rw_locale.h> // for rw_locales(), rw_set_locale_root() |
| #include <rw_printf.h> // for rw_sprintf() |
| #include <valcmp.h> // for rw_strcmp() |
| |
| /****************************************************************************/ |
| |
| // the root of the locale directory (RWSTD_LOCALE_ROOT) |
| // set in main() instead of here to avoid Solaris 7 putenv() bug (PR #30017) |
| const char* locale_root /* = set in main() */; |
| |
| |
| // creates a table-based multibyte locale |
| static const char* |
| create_locale () |
| { |
| char cm_fname [1024]; |
| std::sprintf (cm_fname, "%s%c%s", locale_root, _RWSTD_PATH_SEP, "charmap"); |
| |
| static const char charmap[] = { |
| "<code_set_name> test_charmap \n" |
| "<comment_char> % \n" |
| "<escape_char> / \n" |
| "<mb_cur_min> 1 \n" |
| "<mb_cur_max> 9 \n" |
| "CHARMAP \n" |
| "<U0000> /x30 0 \n" |
| "<U0001> /x31 1 \n" |
| "<U0002> /x32/x32 22 \n" |
| "<U0003> /x33/x33/x33 333 \n" |
| "<U0004> /x34/x34/x34/x34 4444 \n" |
| "<U0005> /x35/x35/x35/x35/x35 55555 \n" |
| "<U0006> /x36/x36/x36/x36/x36/x36 666666 \n" |
| "<U0007> /x37/x37/x37/x37/x37/x37/x37 7777777 \n" |
| "<U0008> /x38/x38/x38/x38/x38/x38/x38/x38 88888888 \n" |
| "<U0009> /x39/x39/x39/x39/x39/x39/x39/x39/x39 999999999 \n" |
| "<U0010> /x41 A \n" |
| "<U0011> /x42 B \n" |
| "<U0012> /x43 C \n" |
| "<U0013> /x44 D \n" |
| "<U0014> /x45 E \n" |
| "<U0015> /x46 F \n" |
| "END CHARMAP \n" |
| }; |
| |
| if (std::size_t (-1) == rw_fwrite (cm_fname, charmap)) |
| return 0; |
| |
| char src_fname [1024]; |
| std::sprintf (src_fname, "%s%c%s", locale_root, _RWSTD_PATH_SEP, "source"); |
| |
| if (std::size_t (-1) == rw_fwrite (src_fname, "LC_CTYPE\nEND LC_CTYPE\n")) |
| return 0; |
| |
| // invoke localedef to create the named locale |
| // silence the following warnings: |
| // 701: no compatible locale found |
| // 702: member of portable character set <x> not found |
| // in the character map |
| // 706: iconv_open() failed |
| const char* const locname = |
| rw_localedef ("-w701 -w702 -w706", |
| src_fname, cm_fname, "mb_cur_max-9"); |
| |
| return locname; |
| } |
| |
| /****************************************************************************/ |
| |
| // finds a multibyte character that is `bytes' long if `bytes' is less |
| // than or equal to MB_CUR_MAX, or the longest multibyte sequence in |
| // the current locale, and sets `wchar' to the wide character that |
| // corresponds to it |
| // returns `mbchar' on success, 0 on failure to find a multibute |
| // character of the specified length |
| static const char* |
| get_mb_char (wchar_t *wchar, char *mbchar, std::size_t bytes) |
| { |
| RW_ASSERT (0 != wchar); |
| RW_ASSERT (0 != mbchar); |
| |
| *mbchar = '\0'; |
| |
| if (0 == bytes) |
| return mbchar; |
| |
| const bool exact = bytes <= MB_CUR_MAX; |
| |
| if (!exact) |
| bytes = MB_CUR_MAX; |
| |
| wchar_t wc; |
| |
| // search the first 64K characters sequentially |
| for (wc = wchar_t (1); wc != wchar_t (0xffff); ++wc) { |
| |
| if ( int (bytes) == std::wctomb (mbchar, wc) |
| && int (bytes) == std::mblen (mbchar, bytes)) { |
| // NUL-terminate the multibyte character of the requested length |
| mbchar [bytes] = '\0'; |
| *wchar = wc; |
| break; |
| } |
| |
| *mbchar = '\0'; |
| } |
| |
| #if 2 < _RWSTD_WCHAR_SIZE |
| |
| // if a multibyte character of the requested size is not found |
| // in the low 64K range, try to find one using a random search |
| if (wchar_t (0xffff) == wc) { |
| |
| // iterate only so many times to prevent an infinite loop |
| // in case when MB_CUR_MAX is greater than the longest |
| // multibyte character |
| for (int i = 0; i != 0x100000; ++i) { |
| |
| wc = 0; |
| |
| typedef unsigned char UChar; |
| |
| // set wc to a random value (rand() returns a value |
| // less than or equal to RAND_MAX so the loop makes |
| // sure all bits are initialized) |
| for (int j = 0; j < int (sizeof wc); ++j) { |
| wc <<= 8; |
| wc |= wchar_t (UChar (std::rand ())); |
| } |
| |
| if ( int (bytes) == std::wctomb (mbchar, wc) |
| && int (bytes) == std::mblen (mbchar, bytes)) { |
| // NUL-terminate the multibyte character |
| mbchar [bytes] = '\0'; |
| *wchar = wc; |
| break; |
| } |
| |
| *mbchar = '\0'; |
| } |
| } |
| |
| #endif // 2 < _RWSTD_WCHAR_SIZE |
| |
| // return 0 on failure to find a sequence exactly `bytes' long |
| return !exact || bytes == std::strlen (mbchar) ? mbchar : 0; |
| } |
| |
| /****************************************************************************/ |
| |
| struct WideCode |
| { |
| wchar_t wchar; |
| char mbchar [MB_LEN_MAX]; |
| }; |
| |
| typedef WideCode mb_char_array_t [MB_LEN_MAX]; |
| |
| // fills consecutive elemenets of the `mb_chars' array with wide |
| // and multibyte characters between 1 and MB_CUR_MAX bytes long |
| // for the given locale returns the number of elements populated |
| // (normally, MB_CUR_MAX) |
| static std::size_t |
| get_mb_chars (mb_char_array_t mb_chars) |
| { |
| RW_ASSERT (0 != mb_chars); |
| |
| const char* mbc = |
| get_mb_char (&mb_chars [0].wchar, |
| mb_chars [0].mbchar, |
| std::size_t (-1)); |
| |
| if (!mbc) { |
| std::fprintf (stderr, "*** unable to find any multibyte characters " |
| "in locale \"%s\" with MB_CUR_MAX = %u\n", |
| std::setlocale (LC_CTYPE, 0), MB_CUR_MAX); |
| return 0; |
| } |
| |
| std::size_t mb_cur_max = std::strlen (mbc); |
| |
| if (MB_LEN_MAX < mb_cur_max) |
| mb_cur_max = MB_LEN_MAX; |
| |
| // fill each element of `mb_chars' a multibyte character |
| // of the corresponding length |
| for (std::size_t i = mb_cur_max; i; --i) { |
| |
| const std::size_t inx = i - 1; |
| |
| // try to generate a multibyte character `i' bytes long |
| mbc = get_mb_char (&mb_chars [inx].wchar, mb_chars [inx].mbchar, i); |
| |
| if (0 == mbc) { |
| if (i < mb_cur_max) { |
| std::fprintf (stderr, "*** unable to find %u-byte characters " |
| "in locale \"%s\" with MB_CUR_MAX = %u\n", |
| i + 1, std::setlocale (LC_CTYPE, 0), MB_CUR_MAX); |
| mb_cur_max = 0; |
| break; |
| } |
| --mb_cur_max; |
| } |
| } |
| |
| return mb_cur_max; |
| } |
| |
| /****************************************************************************/ |
| |
| // finds the multibyte locale with the largest MB_CUR_MAX value and |
| // fills consecutive elemenets of the `mb_chars' array with multibyte |
| // characters between 1 and MB_CUR_MAX bytes long for such a locale |
| static const char* |
| find_mb_locale (std::size_t *mb_cur_max, mb_char_array_t mb_chars) |
| { |
| RW_ASSERT (0 != mb_cur_max); |
| RW_ASSERT (0 != mb_chars); |
| |
| if (2 > MB_LEN_MAX) { |
| std::fprintf (stderr, "MB_LEN_MAX = %d, giving up\n", MB_LEN_MAX); |
| return 0; |
| } |
| |
| static const char *mb_locale_name; |
| |
| char saved_locale_name [1024]; |
| std::strcpy (saved_locale_name, std::setlocale (LC_CTYPE, 0)); |
| |
| *mb_cur_max = 0; |
| |
| // iterate over all installed locales |
| for (const char *name = rw_locales (); name && *name; |
| name += std::strlen (name) + 1) { |
| |
| if (std::setlocale (LC_CTYPE, name)) { |
| |
| // try to generate a set of multibyte characters |
| // with lengths from 1 and MB_CUR_MAX (or less) |
| const std::size_t cur_max = get_mb_chars (mb_chars); |
| |
| if (*mb_cur_max < cur_max) { |
| *mb_cur_max = cur_max; |
| mb_locale_name = name; |
| |
| // break when we've found a multibyte locale |
| // with the longest possible encoding |
| if (MB_LEN_MAX == *mb_cur_max) |
| break; |
| } |
| } |
| } |
| |
| if (*mb_cur_max < 2) { |
| std::fprintf (stderr, "*** unable to find a full set of multibyte " |
| "characters in locale \"%s\" with MB_CUR_MAX = %u " |
| "(computed)", mb_locale_name, *mb_cur_max); |
| mb_locale_name = 0; |
| } |
| else { |
| // (re)generate the multibyte characters for the saved locale |
| // as they may have been overwritten in subsequent iterations |
| // of the loop above (while searching for a locale with greater |
| // value of MB_CUR_MAX) |
| std::setlocale (LC_CTYPE, mb_locale_name); |
| get_mb_chars (mb_chars); |
| } |
| |
| std::setlocale (LC_CTYPE, saved_locale_name); |
| |
| return mb_locale_name; |
| } |
| |
| /****************************************************************************/ |
| |
| inline const char* codecvt_result (std::codecvt_base::result res) |
| { |
| return std::codecvt_base::error == res ? "error" |
| : std::codecvt_base::ok == res ? "ok" |
| : std::codecvt_base::partial == res ? "partial" |
| : "noconv"; |
| } |
| |
| |
| template <class internT> |
| void test_out (int line, |
| const char* tname, |
| const std::mbstate_t *pstate, |
| const std::codecvt<internT, char, std::mbstate_t> &cvt, |
| const internT *src, |
| std::size_t src_len, |
| std::size_t src_off, |
| const char *res, |
| std::size_t res_len, |
| std::size_t res_off, |
| std::codecvt_base::result result) |
| { |
| static const std::mbstate_t initial_state = std::mbstate_t (); |
| |
| std::mbstate_t state = pstate ? *pstate : initial_state; |
| |
| // create and invalidate a buffer for the destination sequence |
| char buf [1024]; |
| std::memset (buf, -1, sizeof buf); |
| |
| // set up from, from_end, and from_next arguments |
| const internT* const from = src; |
| const internT* const from_end = from + src_len; |
| const internT* from_next = 0; |
| |
| // set up to, to_end, and to_next arguments |
| char* const to = buf; |
| char* const to_end = to + res_len; |
| char* to_next = 0; |
| |
| // call codecvt::out () with the arguments above |
| const std::codecvt_base::result cvtres = |
| cvt.out (state, |
| from, from_end, from_next, |
| to, to_end, to_next); |
| |
| // format a string describing the function call above |
| static char fcall [4096]; |
| std::memset (fcall, 0, sizeof fcall); |
| |
| rw_sprintf (fcall, |
| "codecvt<%s, char, mbstate_t>::out(state, " |
| "from = %{*.*Ac}, from + %td, from + %td, " |
| "to = %{#*s}, to + %d, to + %d)", |
| tname, int (sizeof *from), int (src_len), from, |
| from_end - from, from_next - from, |
| int (to_next - to), to, to_end - to, to_next - to); |
| |
| RW_ASSERT (std::strlen (fcall) < sizeof fcall); |
| |
| // verify the expected result of the conversion |
| rw_assert (cvtres == result, __FILE__, line, |
| "line %d: %s == %s, got %s", |
| __LINE__, fcall, |
| codecvt_result (result), |
| codecvt_result (cvtres)); |
| |
| // verify that the from_next pointer is set just past the last |
| // successfully converted character in the source sequence |
| rw_assert (from_next == from + src_off, __FILE__, line, |
| "line %d: %s: from_next == from + %d, got from + %d", |
| __LINE__, fcall, |
| src_off, from_next - from); |
| |
| // verify that the to_next pointer is set just past the last |
| // external character in the converted (destination) sequence |
| rw_assert (to_next == to + res_off, __FILE__, line, |
| "line %d: %s: to_next == to + %d, got to + %d", |
| __LINE__, fcall, |
| res_off, to_next - to); |
| |
| // compare the converted sequence against the expected result |
| rw_assert (0 == rw_strncmp (to, res, res_off), __FILE__, line, |
| "line %d: %s: expected %{#*s}, got %{#*s}", |
| __LINE__, fcall, |
| int (res_off), res, int (to_next - to), to); |
| |
| // verify that the function didn't write past the end |
| // of the destination buffer |
| rw_assert (char (-1) == to [res_off], 0, line, |
| "line %d: %s: expected %{#lc}, got %{#lc} " |
| "at end of destination buffer (offset %zu)", |
| __LINE__, fcall, |
| -1, to [res_off], res_off); |
| |
| // verify that the conversion state is as expected |
| rw_assert (!pstate || !std::memcmp (pstate, &state, sizeof state), |
| __FILE__, line, |
| "line %d: %s: unexpected conversion state", |
| __LINE__, fcall); |
| } |
| |
| /****************************************************************************/ |
| |
| typedef std::codecvt<char, char, std::mbstate_t> Codecvt; |
| typedef std::codecvt_byname<char, char, std::mbstate_t> CodecvtByname; |
| |
| |
| static void |
| test_codecvt (const Codecvt *pcvt = 0) |
| { |
| if (0 == pcvt) |
| rw_info (0, 0, __LINE__, |
| "std::codecvt<char, char, mbstate_t>::out(" |
| "state_type&, " |
| "const intern_type*, const intern_type*, " |
| "const intern_type*, " |
| "extern_type*, extern_type*, exterm_type*&)"); |
| |
| const std::locale classic = std::locale::classic (); |
| |
| const Codecvt &cvt = pcvt ? *pcvt : std::use_facet<Codecvt>(classic); |
| |
| #undef TEST |
| #define TEST(from, from_end_off, from_next_off, \ |
| res, res_end_off, res_next_off, result) \ |
| test_out (__LINE__, "char", 0, cvt, \ |
| from, from_end_off, from_next_off, \ |
| res, res_end_off, res_next_off, \ |
| std::codecvt_base::result) |
| |
| // +---------------------------- source sequence before conversion |
| // | +--------------------- (from_end - from) before conversion |
| // | | +------------------ (from_next - from) after conversion |
| // | | | +--------------- destination sequence after conversion |
| // | | | | +-------- (to_end - to) before conversion |
| // | | | | | +----- (to_next - to) after conversion |
| // | | | | | | +-- conversion result |
| // | | | | | | | |
| // V V V V V V V |
| TEST ("", 0, 0, "", 0, 0, noconv); |
| TEST ("a", 1, 0, "", 0, 0, noconv); |
| TEST ("b", 1, 0, "", 1, 0, noconv); |
| TEST ("ab", 2, 0, "", 0, 0, noconv); |
| TEST ("bc", 2, 0, "", 1, 0, noconv); |
| TEST ("cd", 2, 0, "", 2, 0, noconv); |
| TEST ("abc", 3, 0, "", 0, 0, noconv); |
| TEST ("\0", 1, 0, "", 0, 0, noconv); |
| } |
| |
| /****************************************************************************/ |
| |
| static void |
| test_codecvt_byname () |
| { |
| rw_info (0, 0, __LINE__, |
| "std::codecvt_byname<char, char, mbstate_t>::out(" |
| "state_type&, const intern_type*, const intern_type*, " |
| "const intern_type*, " |
| "extern_type*, extern_type*, exterm_type*&)"); |
| |
| const CodecvtByname cvt (""); |
| |
| test_codecvt (&cvt); |
| } |
| |
| /****************************************************************************/ |
| |
| #ifndef _RWSTD_NO_WCHAR_T |
| |
| typedef std::codecvt<wchar_t, char, std::mbstate_t> WCodecvt; |
| typedef std::codecvt_byname<wchar_t, char, std::mbstate_t> WCodecvtByname; |
| |
| |
| static void |
| test_wcodecvt () |
| { |
| rw_info (0, 0, __LINE__, |
| "std::codecvt<wchar_t, char, mbstate_t>::out(" |
| "state_type&, " |
| "const intern_type*, const intern_type*, " |
| "const intern_type*, " |
| "extern_type*, extern_type*, exterm_type*&)"); |
| |
| const std::locale classic = std::locale::classic (); |
| |
| const WCodecvt &cvt = std::use_facet<WCodecvt>(classic); |
| |
| #undef TEST |
| #define TEST(from, from_end_off, from_next_off, \ |
| res, res_end_off, res_next_off, result) \ |
| test_out (__LINE__, "wchar_t", 0, cvt, \ |
| from, from_end_off, from_next_off, \ |
| res, res_end_off, res_next_off, \ |
| std::codecvt_base::result) |
| |
| // +------------------------------- source sequence before conversion |
| // | +---------------------- (from_end - from) before conversion |
| // | | +------------------- (from_next - from) after conversion |
| // | | | +---------------- destination sequence |
| // | | | | +-------- (to_end - to) before conversion |
| // | | | | | +----- (to_next - to) after conversion |
| // | | | | | | +-- conversion result |
| // | | | | | | | |
| // V V V V V V V |
| TEST (L"", 0, 0, "", 0, 0, ok); |
| TEST (L"a", 1, 1, "a", 1, 1, ok); |
| TEST (L"ab", 2, 2, "ab", 2, 2, ok); |
| TEST (L"abc", 3, 3, "abc", 3, 3, ok); |
| TEST (L"\0", 1, 1, "\0", 1, 1, ok); |
| TEST (L"\x80", 1, 1, "\x80", 1, 1, ok); |
| TEST (L"\xff", 1, 1, "\xff", 1, 1, ok); |
| TEST (L"a\0", 2, 2, "a\0", 2, 2, ok); |
| TEST (L"a\0b", 3, 3, "a\0b", 3, 3, ok); |
| TEST (L"\0\0", 2, 2, "\0\0", 2, 2, ok); |
| |
| TEST (L"123", 3, 0, "", 0, 0, partial); |
| TEST (L"234", 3, 1, "2", 1, 1, partial); |
| TEST (L"345", 3, 2, "34", 2, 2, partial); |
| TEST (L"456", 3, 3, "456", 3, 3, ok); |
| TEST (L"567", 3, 3, "567", 4, 3, ok); |
| TEST (L"6789", 3, 3, "678", 4, 3, ok); |
| } |
| |
| /****************************************************************************/ |
| |
| // exercises an algorithmic multibyte encoding |
| static void |
| test_wcodecvt_byname_algorithmic () |
| { |
| rw_info (0, 0, __LINE__, "locale (\"UTF-8@UCS\") [algorithmic encoding]"); |
| |
| // lowercase utf ==> relaxed checking (i.e., some, but not all, |
| // invalid UTF-8 sequence are accepted) |
| const WCodecvtByname cvt_relaxd ("utf-8@UCS"); |
| |
| // capital UTF ==> strict checking |
| const WCodecvtByname cvt_strict ("UTF-8@UCS"); |
| |
| #undef STRICT |
| #define STRICT(from, from_end_off, from_next_off, \ |
| res, res_end_off, res_next_off, result) \ |
| test_out (__LINE__, "wchar_t", 0, cvt_strict, \ |
| from, from_end_off, from_next_off, \ |
| res, res_end_off, res_next_off, \ |
| std::codecvt_base::result) |
| |
| #undef RELAXD |
| #define RELAXD(from, from_end_off, from_next_off, \ |
| res, res_end_off, res_next_off, result) \ |
| test_out (__LINE__, "wchar_t", 0, cvt_relaxd, \ |
| from, from_end_off, from_next_off, \ |
| res, res_end_off, res_next_off, \ |
| std::codecvt_base::result) |
| |
| #undef TEST |
| #define TEST(from, from_end_off, from_next_off, \ |
| res, res_end_off, res_next_off, result) \ |
| STRICT (from, from_end_off, from_next_off, \ |
| res, res_end_off, res_next_off, result); \ |
| RELAXD (from, from_end_off, from_next_off, \ |
| res, res_end_off, res_next_off, result) |
| |
| // +------------------------------- source sequence before conversion |
| // | +---------------------- (from_end - from) before conversion |
| // | | +------------------- (from_next - from) after conversion |
| // | | | +---------------- destination sequence |
| // | | | | +-------- (to_end - to) before conversion |
| // | | | | | +----- (to_next - to) after conversion |
| // | | | | | | +-- conversion result |
| // | | | | | | | |
| // V V V V V V V |
| TEST (L"", 0, 0, "", 0, 0, ok); |
| TEST (L"a", 1, 1, "a", 1, 1, ok); |
| TEST (L"ab", 2, 2, "ab", 2, 2, ok); |
| TEST (L"abc", 3, 3, "abc", 3, 3, ok); |
| TEST (L"\0", 1, 1, "\0", 1, 1, ok); |
| TEST (L"a\0", 2, 2, "a\0", 2, 2, ok); |
| TEST (L"a\0b", 3, 3, "a\0b", 3, 3, ok); |
| |
| TEST (L"123", 3, 0, "", 0, 0, partial); |
| TEST (L"234", 3, 1, "2", 1, 1, partial); |
| TEST (L"345", 3, 2, "34", 2, 2, partial); |
| TEST (L"456", 3, 3, "456", 3, 3, ok); |
| TEST (L"567", 3, 3, "567", 4, 3, ok); |
| |
| // exercise multibyte sequences |
| TEST (L"\x100", 1, 0, "\xc4\x80", 0, 0, partial); |
| TEST (L"\x101", 1, 0, "\xc4\x81", 1, 0, partial); |
| TEST (L"\x102", 1, 1, "\xc4\x82", 2, 2, ok); |
| |
| TEST (L"\x103x", 2, 0, "\xc4\x83", 0, 0, partial); |
| TEST (L"\x104y", 2, 1, "\xc4\x84", 2, 2, partial); |
| TEST (L"\x105z", 2, 2, "\xc4\x85z", 3, 3, ok); |
| TEST (L"\x106zz", 2, 2, "\xc4\x86z", 4, 3, ok); |
| |
| TEST (L"\x0901", 1, 0, "\xe0\xa4\x81", 0, 0, partial); |
| TEST (L"\x0902", 1, 0, "\xe0\xa4\x82", 1, 0, partial); |
| TEST (L"\x0903", 1, 0, "\xe0\xa4\x83", 2, 0, partial); |
| TEST (L"\x0904", 1, 1, "\xe0\xa4\x84", 3, 3, ok); |
| |
| TEST (L"\x0905\x0916", 2, 1, "\xe0\xa4\x85\xe0\xa4\x96", 4, 3, partial); |
| TEST (L"\x0906\x0917", 2, 1, "\xe0\xa4\x86\xe0\xa4\x97", 5, 3, partial); |
| TEST (L"\x0907\x0918", 2, 2, "\xe0\xa4\x87\xe0\xa4\x98", 6, 6, ok); |
| |
| // U+D800 to U+DFFF (UTF-16 surrogates) as well as U+FFFE and U+FFFF |
| // must not occur in normal UCS-4 data and should be treated like |
| // malformed or overlong sequences. |
| |
| STRICT (L"\xd800", 1, 0, "", 6, 0, error); |
| STRICT (L"\xd801", 1, 0, "", 6, 0, error); |
| STRICT (L"A\xd802", 2, 1, "A", 6, 1, error); |
| STRICT (L"AB\xd803", 3, 2, "AB", 6, 2, error); |
| STRICT (L"ABC\xd804", 4, 3, "ABC", 6, 3, error); |
| STRICT (L"\xdffe", 1, 0, "", 6, 0, error); |
| STRICT (L"\xdfff", 1, 0, "", 6, 0, error); |
| |
| TEST (L"\xe000", 1, 1, "\xee\x80\x80", 6, 3, ok); |
| TEST (L"\xd7ff", 1, 1, "\xed\x9f\xbf", 6, 3, ok); |
| |
| STRICT (L"\xd800", 1, 0, "", 6, 0, error); |
| STRICT (L"\xd801", 1, 0, "", 6, 0, error); |
| STRICT (L"\xdffe", 1, 0, "", 6, 0, error); |
| STRICT (L"\xdfff", 1, 0, "", 6, 0, error); |
| |
| TEST (L"\xe000", 1, 1, "\xee\x80\x80", 6, 3, ok); |
| |
| // verify that surrogate pairs are accepted in relaxed mode |
| RELAXD (L"\xd800", 1, 1, "\xed\xa0\x80", 6, 3, ok); |
| RELAXD (L"\xd801", 1, 1, "\xed\xa0\x81", 6, 3, ok); |
| RELAXD (L"\xdffe", 1, 1, "\xed\xbf\xbe", 6, 3, ok); |
| RELAXD (L"\xdfff", 1, 1, "\xed\xbf\xbf", 6, 3, ok); |
| } |
| |
| /****************************************************************************/ |
| |
| // exercises a table-based multibyte encoding |
| static void |
| test_wcodecvt_byname_table_based () |
| { |
| // create a locale from a generated character set description file |
| // where L'\1' maps to "1", L'\2' to "22", ..., and L'\x10' to "A", |
| // L'\x11' to "B", L'\x12' to "C", etc. |
| const char* const locname = create_locale (); |
| |
| if (!locname) { |
| rw_error (0, 0, __LINE__, "failed to create a locale database"); |
| return; |
| } |
| |
| std::locale loc; |
| |
| _TRY { |
| loc = std::locale (locname); |
| } |
| _CATCH (...) { |
| rw_error (0, 0, __LINE__, "locale(\"%s\") unexpectedly threw", locname); |
| return; |
| } |
| |
| const WCodecvt &cvt_table = std::use_facet<WCodecvt>(loc); |
| |
| rw_info (0, 0, __LINE__, "locale (\"%s\") [table-based encoding]", locname); |
| |
| #undef TEST |
| #define TEST(from, from_end_off, from_next_off, \ |
| res, res_end_off, res_next_off, result) \ |
| test_out (__LINE__, "wchar_t", 0, cvt_table, \ |
| from, from_end_off, from_next_off, \ |
| res, res_end_off, res_next_off, \ |
| std::codecvt_base::result) |
| |
| TEST (L"", 0, 0, "", 0, 0, ok); |
| TEST (L"\x10", 1, 1, "A", 1, 1, ok); |
| TEST (L"\x11\x12", 2, 2, "BC", 2, 2, ok); |
| TEST (L"\x12\x13\x14", 3, 3, "CDE", 3, 3, ok); |
| |
| TEST (L"\1\2\3\4\5\6", 1, 1, "1", 21, 1, ok); |
| TEST (L"\1\2\3\4\5\6", 2, 1, "1", 1, 1, partial); |
| TEST (L"\1\2\3\4\5\6", 2, 1, "1", 2, 1, partial); |
| TEST (L"\1\2\3\4\5\6", 2, 2, "122", 3, 3, ok); |
| TEST (L"\1\2\3\4\5\6", 2, 2, "122", 21, 3, ok); |
| TEST (L"\1\2\3\4\5\6", 3, 2, "122", 3, 3, partial); |
| TEST (L"\1\2\3\4\5\6", 3, 2, "122", 4, 3, partial); |
| TEST (L"\1\2\3\4\5\6", 3, 2, "122", 5, 3, partial); |
| TEST (L"\1\2\3\4\5\6", 3, 3, "122333", 6, 6, ok); |
| TEST (L"\1\2\3\4\5\6", 4, 3, "122333", 7, 6, partial); |
| TEST (L"\1\2\3\4\5\6", 3, 3, "122333", 21, 6, ok); |
| TEST (L"\1\2\3\4\5\6", 4, 4, "1223334444", 21, 10, ok); |
| TEST (L"\1\2\3\4\5\6", 5, 5, "122333444455555", 21, 15, ok); |
| TEST (L"\6\5\4\3\2\1", 6, 3, "666666555554444", 15, 15, partial); |
| TEST (L"\6\5\4\3\2\1", 6, 3, "666666555554444", 16, 15, partial); |
| |
| // exercise the ability to detect invalid characters (e.g., '*') |
| TEST (L"*\3\4\5\6\7", 6, 0, "", 27, 0, error); |
| TEST (L"\2*\4\5\6\7", 6, 0, "", 1, 0, partial); |
| TEST (L"\2*\4\5\6\7", 6, 1, "22", 2, 2, partial); |
| TEST (L"\2*\4\5\6\7", 6, 1, "22", 27, 2, error); |
| TEST (L"\2*\4\5\6\7", 6, 1, "22", 27, 2, error); |
| TEST (L"\2\3*\5\6\7", 6, 2, "22333", 27, 5, error); |
| TEST (L"\2\3\4*\6\7", 6, 3, "223334444", 27, 9, error); |
| } |
| |
| /****************************************************************************/ |
| |
| static void |
| make_strings (const char *pat, std::size_t patsize, |
| wchar_t *wstr, char *mbstr, |
| const mb_char_array_t mb_chars) |
| { |
| RW_ASSERT (0 != pat); |
| RW_ASSERT (0 != wstr); |
| RW_ASSERT (0 != mbstr); |
| RW_ASSERT (0 != mb_chars); |
| |
| typedef unsigned char UChar; |
| |
| *mbstr = '\0'; |
| |
| for (const char *s = pat; s != pat + patsize; ++s, ++wstr) { |
| |
| if ('%' == *s) { |
| |
| ++s; |
| |
| RW_ASSERT ('0' <= *s && *s <= char ('0' + MB_LEN_MAX)); |
| |
| std::size_t char_inx = *s - '0'; |
| |
| *wstr = mb_chars [char_inx].wchar; |
| |
| std::strcpy (mbstr, mb_chars [char_inx].mbchar); |
| mbstr += std::strlen (mbstr); |
| } |
| else { |
| *wstr = UChar (*s); |
| *mbstr++ = *s; |
| *mbstr = '\0'; |
| } |
| } |
| |
| *wstr = L'\0'; |
| } |
| |
| /****************************************************************************/ |
| |
| // exercises a libc-based multibyte encoding |
| static void |
| test_wcodecvt_byname_libc_based () |
| { |
| // find `mb_cur_max' multibyte characters in increasing length |
| // from 1 to mb_cur_max bytes long |
| |
| // i.e., initialize the first (N + 1) elements of mb_chars as follows: |
| // [0].mbstr = "0"; // where "0" is a single byte character |
| // [1].mbstr = "11"; // where "11" is a two-byte character |
| // [2].mbstr = "222"; // where "222" is a three-byte character |
| // ... |
| // [N].mbstr = "NNN...N"; // where "NNN...N" is an N-byte character |
| |
| std::size_t mb_cur_max = 0; |
| |
| mb_char_array_t mb_chars; |
| |
| const char* const locname = find_mb_locale (&mb_cur_max, mb_chars); |
| |
| if (0 == locname) { |
| rw_warn (0, 0, __LINE__, "unable to find a multibyte locale"); |
| return; |
| } |
| |
| std::locale loc; |
| |
| _TRY { |
| loc = std::locale (locname); |
| } |
| _CATCH (...) { |
| rw_error (0, 0, __LINE__, "locale(\"%s\") unexpectedly threw", locname); |
| return; |
| } |
| |
| const WCodecvt &cvt_libc = std::use_facet<WCodecvt>(loc); |
| |
| rw_info (0, 0, __LINE__, "locale (\"%s\") [libc-based encoding, " |
| "single-byte characters]", locname); |
| |
| ////////////////////////////////////////////////////////////////// |
| // exercise sequences containing single-byte characters |
| |
| #undef TEST |
| #define TEST(from, from_end_off, from_next_off, \ |
| res, res_end_off, res_next_off, result) \ |
| test_out (__LINE__, "wchar_t", 0, cvt_libc, \ |
| from, from_end_off, from_next_off, \ |
| res, res_end_off, res_next_off, \ |
| std::codecvt_base::result) |
| |
| // +----------------------------------- source sequence (from) |
| // | +------------------------ from_end offset from from |
| // | | +--------------------- expected from_next offset |
| // | | | +------------------ expected destination sequence |
| // | | | | +-------- to_end offset from to |
| // | | | | | +----- expected to_next offset |
| // | | | | | | +-- expected result (to) |
| // | | | | | | | |
| // V V V V V V V |
| TEST (L"", 0, 0, "", 0, 0, ok); |
| TEST (L"a", 1, 1, "a", 1, 1, ok); |
| TEST (L"b", 1, 1, "b", 2, 1, ok); |
| TEST (L"c", 1, 1, "c", 3, 1, ok); |
| TEST (L"d", 1, 1, "d", 4, 1, ok); |
| TEST (L"e", 1, 1, "e", 5, 1, ok); |
| TEST (L"f", 1, 1, "f", 6, 1, ok); |
| TEST (L"ab", 2, 2, "ab", 6, 2, ok); |
| TEST (L"abc", 3, 3, "abc", 6, 3, ok); |
| TEST (L"abcd", 4, 4, "abcd", 6, 4, ok); |
| TEST (L"abcde", 5, 5, "abcde", 6, 5, ok); |
| TEST (L"abcdef", 6, 6, "abcdef", 6, 6, ok); |
| |
| TEST (L"\n", 1, 1, "\n", 1, 1, ok); |
| TEST (L"\n\377", 1, 1, "\n", 1, 1, ok); |
| |
| // exercise embedded NULs |
| TEST (L"\0abcdef", 7, 7, "\0abcdef", 7, 7, ok); |
| TEST (L"a\0bcdef", 7, 7, "a\0bcdef", 7, 7, ok); |
| TEST (L"ab\0cdef", 7, 7, "ab\0cdef", 7, 7, ok); |
| TEST (L"abc\0def", 7, 7, "abc\0def", 7, 7, ok); |
| TEST (L"abcd\0ef", 7, 7, "abcd\0ef", 7, 7, ok); |
| TEST (L"abcde\0f", 7, 7, "abcde\0f", 7, 7, ok); |
| TEST (L"abcdef\0", 7, 7, "abcdef\0", 7, 7, ok); |
| |
| TEST (L"ab\0cd\0ef", 8, 8, "ab\0cd\0ef", 8, 8, ok); |
| |
| ////////////////////////////////////////////////////////////////// |
| // exercise sequences containing 2-byte characters |
| |
| if (mb_cur_max < 2) { |
| rw_warn (0, 0, __LINE__, "no multibyte characters found"); |
| return; |
| } |
| |
| size_t bufsize = 0; |
| char *sequences = 0; |
| |
| // verify the length of each character |
| for (std::size_t i = 0; i < mb_cur_max; ++i) { |
| const std::size_t mb_len = std::strlen (mb_chars [i].mbchar); |
| |
| if (i + 1 != mb_len) { |
| rw_assert (0, 0, __LINE__, |
| "unexpected multibyte character length: " |
| "%u, expected %u", mb_len, i + 1); |
| return; |
| } |
| |
| rw_asnprintf (&sequences, &bufsize, |
| "%{+}%s{ %{#lc}, %{#s} }", |
| i ? ", " : "", |
| mb_chars [i].wchar, |
| mb_chars [i].mbchar); |
| } |
| |
| rw_info (0, 0, __LINE__, |
| "locale (\"%s\") [libc-based encoding, " |
| "MB_CUR_MAX = %u, multi-byte characters: %s]", |
| locname, mb_cur_max, (const char*)sequences); |
| |
| std::free (sequences); |
| |
| #ifdef _RWSTD_OS_SUNOS |
| |
| if (0 == std::strcmp ("5.7", _RWSTD_OS_RELEASE)) { |
| |
| rw_warn (0, 0, __LINE__, "skipping tests due to a SunOS 5.7 libc bug"); |
| return; |
| } |
| |
| #endif // _RWSTD_OS_SUNOS |
| |
| wchar_t wstr [256]; |
| char mbstr [256]; |
| |
| #undef TEST |
| #define TEST(pat, from_end_off, from_next_off, \ |
| res_end_off, res_next_off, result) \ |
| make_strings (pat, sizeof pat - 1, \ |
| wstr, mbstr, mb_chars); \ |
| test_out (__LINE__, "wchar_t", 0, cvt_libc, \ |
| wstr, from_end_off, from_next_off, \ |
| mbstr, res_end_off, res_next_off, \ |
| std::codecvt_base::result) |
| |
| // %N for N in [0, MB_CUR_MAX) represents a wide character |
| // whose multibyte representation is (N + 1) bytes long |
| // any other (narrow) character, including the NUL, is |
| // widened to a wchar_t as if by an ordinary cast |
| |
| // +------------------------------ source sequence (from) |
| // | +-------------- initial (from_end - from) |
| // | | +----------- expected (from_next - from) |
| // | | | +-------- initial (to_limit - to) |
| // | | | | +----- expected (to_next - to) |
| // | | | | | +-- expected conversion result |
| // | | | | | | |
| // V V V V V V |
| TEST ("%0", 0, 0, 0, 0, ok); |
| TEST ("%0", 1, 0, 0, 0, partial); |
| TEST ("%0", 1, 1, 1, 1, ok); |
| TEST ("%1", 1, 0, 0, 0, partial); |
| TEST ("%1", 1, 0, 1, 0, partial); |
| TEST ("%1", 1, 1, 2, 2, ok); |
| |
| TEST ("a%1", 2, 1, 2, 1, partial); |
| TEST ("b%1", 2, 2, 3, 3, ok); |
| |
| TEST ("%1%1", 2, 0, 1, 0, partial); |
| TEST ("%1%1", 2, 1, 2, 2, partial); |
| TEST ("%1%1", 2, 1, 3, 2, partial); |
| TEST ("%1%1", 2, 2, 4, 4, ok); |
| |
| TEST ("%1X%1Y%1Z", 6, 0, 1, 0, partial); |
| TEST ("%1X%1Y%1Z", 6, 1, 2, 2, partial); |
| TEST ("%1X%1Y%1Z", 6, 2, 3, 3, partial); |
| TEST ("%1X%1Y%1Z", 6, 2, 4, 3, partial); |
| TEST ("%1X%1Y%1Z", 6, 3, 5, 5, partial); |
| TEST ("%1X%1Y%1Z", 6, 4, 6, 6, partial); |
| TEST ("%1X%1Y%1Z", 6, 4, 7, 6, partial); |
| TEST ("%1X%1Y%1Z", 6, 5, 8, 8, partial); |
| TEST ("%1X%1Y%1Z", 6, 6, 9, 9, ok); |
| |
| // exercise embedded NULs |
| TEST ("\0", 1, 1, 1, 1, ok); |
| TEST ("\0X", 2, 2, 2, 2, ok); |
| TEST ("\0X\0", 3, 3, 3, 3, ok); |
| TEST ("\0X\0Y", 4, 4, 4, 4, ok); |
| TEST ("\0X\0Y\0", 5, 5, 5, 5, ok); |
| TEST ("\0X\0Y\0Z", 6, 6, 6, 6, ok); |
| TEST ("\0XYZ\0", 5, 5, 5, 5, ok); |
| |
| TEST ("%1\0", 2, 2, 3, 3, ok); |
| TEST ("\0%1", 2, 2, 3, 3, ok); |
| |
| TEST ("%1\0%1", 3, 3, 5, 5, ok); |
| TEST ("\0%1\0", 3, 3, 4, 4, ok); |
| |
| TEST ("%1\0%1\0%1\0", 6, 2, 3, 3, partial); |
| TEST ("%1\0%1\0%1\0", 6, 3, 5, 5, partial); |
| TEST ("%1\0%1\0%1\0", 6, 4, 6, 6, partial); |
| TEST ("%1\0%1\0%1\0", 6, 5, 8, 8, partial); |
| TEST ("%1\0%1\0%1\0", 6, 6, 9, 9, ok); |
| |
| // exercise consecutive embedded NULs |
| TEST ("\0\0%1\0\0%1\0\0", 8, 8, 10, 10, ok); |
| TEST ("\0\0\0\0\0\0\0XY", 9, 9, 9, 9, ok); |
| |
| ////////////////////////////////////////////////////////////////// |
| // exercise sequences containing 3-byte characters |
| |
| if (mb_cur_max < 3) |
| return; |
| |
| TEST ("%2", 1, 0, 0, 0, partial); |
| TEST ("%2", 1, 0, 1, 0, partial); |
| TEST ("%2", 1, 0, 2, 0, partial); |
| TEST ("%2", 1, 1, 3, 3, ok); |
| |
| TEST ("%2%1", 2, 0, 0, 0, partial); |
| TEST ("%2%1", 2, 0, 1, 0, partial); |
| TEST ("%2%1", 2, 0, 2, 0, partial); |
| TEST ("%2%1", 2, 1, 3, 3, partial); |
| TEST ("%2%1", 2, 1, 4, 3, partial); |
| TEST ("%2%1", 2, 2, 5, 5, ok); |
| |
| TEST ("%2%2", 2, 0, 0, 0, partial); |
| TEST ("%2%2", 2, 0, 1, 0, partial); |
| TEST ("%2%2", 2, 0, 2, 0, partial); |
| TEST ("%2%2", 2, 1, 3, 3, partial); |
| TEST ("%2%2", 2, 1, 4, 3, partial); |
| TEST ("%2%2", 2, 1, 5, 3, partial); |
| TEST ("%2%2", 2, 2, 6, 6, ok); |
| |
| TEST ("%2%1%2", 3, 0, 0, 0, partial); |
| TEST ("%2%1%2", 3, 0, 1, 0, partial); |
| TEST ("%2%1%2", 3, 0, 2, 0, partial); |
| TEST ("%2%1%2", 3, 1, 3, 3, partial); |
| TEST ("%2%1%2", 3, 1, 4, 3, partial); |
| TEST ("%2%1%2", 3, 2, 5, 5, partial); |
| TEST ("%2%1%2", 3, 2, 6, 5, partial); |
| TEST ("%2%1%2", 3, 2, 7, 5, partial); |
| TEST ("%2%1%2", 3, 3, 8, 8, ok); |
| |
| TEST ("%2%1%2%0%1%2$", 6, 6, 14, 14, ok); |
| |
| // exercise embedded NULs |
| TEST ("\0%2$", 2, 2, 4, 4, ok); |
| TEST ("%2\0$", 2, 2, 4, 4, ok); |
| TEST ("\0\0%2$", 3, 3, 5, 5, ok); |
| TEST ("%2\0\0$", 3, 3, 5, 5, ok); |
| TEST ("\0\0%2\0%2$", 5, 5, 9, 9, ok); |
| TEST ("\0%2\0\0%2$", 5, 5, 9, 9, ok); |
| TEST ("\0%2\0%2\0$", 5, 5, 9, 9, ok); |
| TEST ("%2\0\0%2\0$", 5, 5, 9, 9, ok); |
| TEST ("%2\0%2\0\0$", 5, 5, 9, 9, ok); |
| |
| ////////////////////////////////////////////////////////////////// |
| // exercise sequences containing 4-byte characters |
| |
| if (mb_cur_max < 4) |
| return; |
| |
| TEST ("%3", 1, 0, 0, 0, partial); |
| TEST ("%3", 1, 0, 1, 0, partial); |
| TEST ("%3", 1, 0, 2, 0, partial); |
| TEST ("%3", 1, 0, 3, 0, partial); |
| TEST ("%3", 1, 1, 4, 4, ok); |
| |
| TEST ("%3%3", 2, 0, 3, 0, partial); |
| TEST ("%3%3", 2, 1, 4, 4, partial); |
| TEST ("%3%3", 2, 1, 5, 4, partial); |
| TEST ("%3%3", 2, 1, 6, 4, partial); |
| TEST ("%3%3", 2, 1, 7, 4, partial); |
| TEST ("%3%3", 2, 2, 8, 8, ok); |
| |
| TEST ("%3G%3", 3, 2, 8, 5, partial); |
| TEST ("%3G%3", 3, 3, 9, 9, ok); |
| |
| TEST ("%3%1%3", 3, 2, 9, 6, partial); |
| TEST ("%3%1%3", 3, 3, 10, 10, ok); |
| |
| TEST ("%3%2%1%0%1%2%3", 7, 7, 19, 19, ok); |
| |
| // 4+1+3+1+2+1+1+1+2+1+3+1+4 = 25 bytes (13 wchar_t) |
| TEST ("%3\0%2\0%1\0%0\0%1\0%2\0%3$", 13, 13, 25, 25, ok); |
| // 4+1+1+3+2+1+1+1+2+1+3+4+1 = 25 bytes (13 wchar_t) |
| TEST ("%3\0\0%2%1\0%0\0%1\0%2%3\0$", 13, 13, 25, 25, ok); |
| // 1+4+3+1+2+1+1+1+2+1+3+4+1 = 25 bytes (13 wchar_t) |
| TEST ("\0%3%2\0%1\0\0%0%1\0%2%3\0$", 13, 13, 25, 25, ok); |
| // 1+4+3+1+2+1+2+1+3+4+1+1+1 = 25 bytes (13 wchar_t) |
| TEST ("\0%3%2\0%1%0%1\0%2%3\0\0\0$", 13, 13, 25, 25, ok); |
| // 1+1+1+1+1+1+4+3+2+1+2+3+4 = 25 bytes (13 wchar_t) |
| TEST ("\0\0\0\0\0\0%3%2%1%0%1%2%3$", 13, 13, 25, 25, ok); |
| } |
| |
| /****************************************************************************/ |
| |
| static void |
| test_wcodecvt_byname () |
| { |
| rw_info (0, 0, __LINE__, |
| "std::codecvt_byname<wchar_t, char, mbstate_t>::out(" |
| "state_type&, " |
| "const intern_type*, const intern_type*, " |
| "const intern_type*, " |
| "extern_type*, extern_type*, exterm_type*&)"); |
| |
| test_wcodecvt_byname_algorithmic (); |
| test_wcodecvt_byname_table_based (); |
| test_wcodecvt_byname_libc_based (); |
| } |
| |
| #endif // _RWSTD_NO_WCHAR_T |
| |
| /****************************************************************************/ |
| |
| static int |
| run_test (int, char**) |
| { |
| // set up RWSTD_LOCALE_ROOT and other environment variables |
| // here as opposed to at program startup to work around a |
| // SunOS 5.7 bug in putenv() (PR #30017) |
| locale_root = rw_set_locale_root (); |
| |
| test_codecvt (); |
| test_codecvt_byname (); |
| |
| #ifndef _RWSTD_NO_WCHAR_T |
| |
| // exercise wchar_t specializations |
| test_wcodecvt (); |
| test_wcodecvt_byname (); |
| |
| #else // if defined (_RWSTD_NO_WCHAR_T) |
| |
| rw_warn (0, 0, __LINE__, "_RWSTD_NO_WCHAR_T #defined, cannot test"); |
| |
| #endif // _RWSTD_NO_WCHAR_T |
| |
| return 0; |
| } |
| |
| /****************************************************************************/ |
| |
| int main (int argc, char *argv[]) |
| { |
| return rw_test (argc, argv, __FILE__, |
| "lib.locale.codecvt.virtuals", |
| 0 /* no comment */, |
| run_test, |
| "", |
| (void*)0 /* sentinel */); |
| } |