blob: 22cb02e7715c4e1fce1215cf8af3c7174d65f517 [file] [log] [blame]
/***************************************************************************
*
* 22.locale.ctype.narrow.cpp - tests exercising the narrow() and widen()
* member functions of the ctype facet
*
* $Id$
*
***************************************************************************
*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed
* with this work for additional information regarding copyright
* ownership. The ASF licenses this file to you under the Apache
* License, Version 2.0 (the "License"); you may not use this file
* except in compliance with the License. You may obtain a copy of
* the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied. See the License for the specific language governing
* permissions and limitations under the License.
*
* Copyright 2001-2008 Rogue Wave Software, Inc.
*
**************************************************************************/
// DESCRIPTION: The test iterates over a subset of locales installed
// on a machine, calling the C character classification functions and
// their C++ counterpart(s), comparing the results of the calls against
// one another.
#include <rw/_defs.h>
#if defined __linux__
// on Linux define _XOPEN_SOURCE to get CODESET defined in <langinfo.h>
# define _XOPEN_SOURCE 500 /* Single Unix conformance */
// bring __int32_t into scope (otherwise <wctype.h> fails to compile)
# include <sys/types.h>
#endif // __linux__
// see Onyx PR #28150
#if defined (__SUNPRO_CC) && __SUNPRO_CC <= 0x540
# include <wchar.h>
#endif // defined (__SUNPRO_CC) && __SUNPRO_CC <= 0x540
#include <locale>
#include <climits>
#include <clocale>
#include <cstring>
#include <cctype>
#include <cstdio>
#include <cstdlib>
#include <cwchar> // for WEOF, btowc(), wctob()
#include <cwctype> // for iswxxx()
#if !defined (_WIN32)
# if !defined (LC_MESSAGES)
# define LC_MESSAGES _RWSTD_LC_MESSAGES
# endif // LC_MESSAGES
# include <langinfo.h>
#endif // _WIN32
#include <rw_driver.h>
#include <rw_file.h> // for SLASH
#include <rw_locale.h> // for rw_locale_query()
/**************************************************************************/
// the root of the locale directory (RWSTD_LOCALE_ROOT)
// not set here to avoid Solaris 7 putenv() bug (PR #30017)
const char* locale_root;
#define NLOOPS 25
#define MAX_STR_SIZE 16
#define LOCALES "{{en-US,de-DE,fr-FR,es-ES}-*-{ISO-8859-*,UTF-8,CP125?}," \
"{ja-JP-*-{EUC-JP,SHIFT_JIS,UTF-8,CP125?}}," \
"{zh-CN-*-{GB*,UTF-8,CP125?}}," \
"{ru-RU-*-{KOI*,UTF-8,CP125?}}}"
const char* locale_list = 0;
#define BEGIN_LOCALE_LOOP(num, locname, loop_cntrl) \
if (!locale_list) locale_list = rw_locale_query (LC_CTYPE, LOCALES); \
for (const char* locname = locale_list; \
locname && *locname; locname += std::strlen (locname) + 1) { \
_TRY { \
const std::locale loc (locname); \
const std::ctype<char> &ctc = \
_STD_USE_FACET (std::ctype<char>, loc); \
_RWSTD_UNUSED (ctc); \
const std::ctype<charT> &ctp = \
_STD_USE_FACET (std::ctype<charT>, loc); \
for (int loop_cntrl = 0; loop_cntrl < int (num); loop_cntrl++)
#define END_LOCALE_LOOP(locname) \
} \
_CATCH (...) { \
rw_assert (0, 0, __LINE__, \
"locale (\"%s\") threw an exception", locname); \
} \
}
// for notational convenience
typedef unsigned char UChar;
#define ALPHA std::ctype_base::alpha
#define UPPER std::ctype_base::upper
#define LOWER std::ctype_base::lower
#define DIGIT std::ctype_base::digit
#define SPACE std::ctype_base::space
#define CNTRL std::ctype_base::cntrl
#define PUNCT std::ctype_base::punct
#define XDIGIT std::ctype_base::xdigit
#define GRAPH std::ctype_base::graph
#define PRINT std::ctype_base::print
// wrapper functions for the c library char and wchar_t functions
std::ctype_base::mask libc_mask (int mask, char ch, const char *locname)
{
char curlocname [256];
if (locname) {
std::strcpy (curlocname, std::setlocale (LC_CTYPE, 0));
if (0 == std::setlocale (LC_CTYPE, locname))
return std::ctype_base::mask ();
}
const int c = UChar (ch);
int result = 0;
if (mask & ALPHA && (std::isalpha)(c))
result |= ALPHA;
if (mask & CNTRL && (std::iscntrl)(c))
result |= CNTRL;
if (mask & DIGIT && (std::isdigit)(c))
result |= DIGIT;
if (mask & GRAPH && (std::isgraph)(c))
result |= GRAPH;
if (mask & LOWER && (std::islower)(c))
result |= LOWER;
if (mask & PRINT && (std::isprint)(c))
result |= PRINT;
if (mask & PUNCT && (std::ispunct)(c))
result |= PUNCT;
if (mask & SPACE && (std::isspace)(c))
result |= SPACE;
if (mask & UPPER && (std::isupper)(c))
result |= UPPER;
if (mask & XDIGIT && (std::isxdigit)(c))
result |= XDIGIT;
if (locname)
std::setlocale (LC_CTYPE, curlocname);
return std::ctype_base::mask (result);
}
inline bool libc_is (std::ctype_base::mask mask, char ch, const char *locname)
{
const std::ctype_base::mask m = libc_mask (mask, ch, locname);
return 0 != (m & mask);
}
const char* narrow (char *dst, const char *src)
{
if (src == dst || !src || !dst)
return src;
std::memcpy (dst, src, std::strlen (src) + 1);
return dst;
}
const char* widen (char *dst, const char *src)
{
if (src == dst || !src || !dst)
return src;
std::memcpy (dst, src, std::strlen (src) + 1);
return dst;
}
char widen (char, char ch, const char*)
{
return ch;
}
char narrow (char ch, const char*)
{
return ch;
}
// cond1() verifies condition [1] in test_narrow_widen()
// below using libc functions
bool cond1 (std::ctype_base::mask mask, char ch, const char *locname)
{
char curlocname [256];
std::strcpy (curlocname, std::setlocale (LC_CTYPE, 0));
if (0 == std::setlocale (LC_CTYPE, locname))
return false;
#ifdef __SUNPRO_CC
// working around a SunPro bug (PR #28150)
using std::wint_t;
#endif // __SUNPRO_CC
#ifndef _RWSTD_NO_BTOWC
const std::wint_t wc = std::btowc (UChar (ch));
#elif !defined (_RWSTD_NO_MBSTOWCS)
wchar_t tmp;
const std::wint_t wc = 1 == std::mbstowcs (&tmp, &ch, 1) ? tmp : WEOF;
#else
const std::wint_t wc = WEOF;
#endif // _RWSTD_NO_BTOWC, _RWSTD_NO_MBSTOWCS
const bool result =
WEOF == wc || libc_is (mask, ch, 0) || !libc_is (mask, wchar_t (wc), 0);
std::setlocale (LC_CTYPE, curlocname);
return result;
}
// cond3() overloads verify condition [3] in test_narrow_widen()
// below using libc functions
bool cond3 (std::ctype_base::mask, char, const char*)
{
return true;
}
#ifndef _RWSTD_NO_WCHAR_T
const char* narrow (char *dst, const wchar_t *src)
{
static char buf [4096];
if (!src)
return 0;
if (!dst)
dst = buf;
std::size_t len = std::wcslen (src);
_RWSTD_ASSERT (len < sizeof buf);
len = std::wcstombs (dst, src, sizeof buf / sizeof *buf);
if (std::size_t (-1) == len)
*dst = 0;
return dst;
}
const wchar_t* widen (wchar_t *dst, const char *src)
{
static wchar_t buf [4096];
if (!src)
return 0;
if (!dst)
dst = buf;
std::size_t len = std::strlen (src);
_RWSTD_ASSERT (len < sizeof buf /sizeof *buf);
len = std::mbstowcs (dst, src, sizeof buf / sizeof *buf);
if (std::size_t (-1) == len)
*dst = 0;
return dst;
}
std::ctype_base::mask libc_mask (int mask, wchar_t ch, const char *locname)
{
char curlocname [256];
if (locname) {
std::strcpy (curlocname, std::setlocale (LC_CTYPE, 0));
if (0 == std::setlocale (LC_CTYPE, locname))
return std::ctype_base::mask ();
}
int result = 0;
if (mask & ALPHA && (std::iswalpha)(ch))
result |= ALPHA;
if (mask & CNTRL && (std::iswcntrl)(ch))
result |= CNTRL;
if (mask & DIGIT && (std::iswdigit)(ch))
result |= DIGIT;
if (mask & GRAPH && (std::iswgraph)(ch))
result |= GRAPH;
if (mask & LOWER && (std::iswlower)(ch))
result |= LOWER;
if (mask & PRINT && (std::iswprint)(ch))
result |= PRINT;
if (mask & PUNCT && (std::iswpunct)(ch))
result |= PUNCT;
if (mask & SPACE && (std::iswspace)(ch))
result |= SPACE;
if (mask & UPPER && (std::iswupper)(ch))
result |= UPPER;
if (mask & XDIGIT && (std::iswxdigit)(ch))
result |= XDIGIT;
if (locname)
std::setlocale (LC_CTYPE, curlocname);
return std::ctype_base::mask (result);
}
bool libc_is (std::ctype_base::mask mask, wchar_t ch, const char *locname)
{
const std::ctype_base::mask m = libc_mask (mask, ch, locname);
return 0 != (m & mask);
}
wchar_t widen (wchar_t, char ch, const char *locname)
{
char curlocname [256];
std::strcpy (curlocname, std::setlocale (LC_CTYPE, 0));
if (0 == std::setlocale (LC_CTYPE, locname))
return UChar (ch);
wchar_t result;
#ifndef _RWSTD_NO_BTOWC
result = std::btowc (UChar (ch));
#elif !defined (_RWSTD_NO_MBTOWC)
if (1 != std::mbtowc (&result, &ch, 1))
result = wchar_t (WEOF);
#else
result = UChar (ch);
#endif // _RWSTD_NO_BTOWC, _RWSTD_NO_MBTOWC
if (locname)
std::setlocale (LC_CTYPE, curlocname);
return result;
}
char narrow (wchar_t ch, const char *locname)
{
char curlocname [256];
std::strcpy (curlocname, std::setlocale (LC_CTYPE, 0));
if (0 == std::setlocale (LC_CTYPE, locname))
return UChar (ch);
char result [MB_LEN_MAX];
#ifndef _RWSTD_NO_WCTOB
result [0] = std::wctob (ch);
#elif !defined (_RWSTD_NO_WCTOMB)
if (1 != std::wctomb (result, ch))
result [0] = '\377';
#else
result [0] = char (ch);
#endif // _RWSTD_NO_WCTOB, _RWSTD_NO_WCTOMB
if (locname)
std::setlocale (LC_CTYPE, curlocname);
return result [0];
}
bool cond3 (std::ctype_base::mask mask, wchar_t ch, const char *locname)
{
char curlocname [256];
std::strcpy (curlocname, std::setlocale (LC_CTYPE, 0));
if (0 == std::setlocale (LC_CTYPE, locname))
return false;
#ifndef _RWSTD_NO_WCTOB
const int byte = std::wctob (ch);
#elif !defined (_RWSTD_NO_WCTOMB)
char buf [MB_LEN_MAX];
const int byte = 1 == std::wctomb (buf, ch) ? buf [0] : EOF;
#else
const int byte = EOF;
#endif // _RWSTD_NO_WCTOB, _RWSTD_NO_WCTOMB
const bool result =
EOF == byte || !libc_is (mask, char (byte), 0) || libc_is (mask, ch, 0);
std::setlocale (LC_CTYPE, curlocname);
return result;
}
#endif // _RWSTD_NO_WCHAR_T
/**************************************************************************/
template <class charT>
void test_narrow_widen (charT, const char *cname)
{
// 22.2.1.1.2, p11 requires that the conditions below hold for all
// facets ctc and ct whose types are ctype<char> and ctype<charT>,
// respectively:
// [1] (ctc.is (M, c) || !ct.is (M, ctc.do_widen (c))) holds for
// all narrow characters c
// i.e., narrow characters that are NOT members of a certain
// category may not belong to the same category when widened
// Note: this implies that some sort of code conversion may
// be necessary in order to implement a conforming do_widen()
// 22.2.1.1.2, p13 requires that:
// [2] (ct.do_widen (ct.do_narrow (c, dfault)) == c) holds unless
// (ct.do_narrow (c, dfault) == dfault) holds
// [3] (ct.is (M, c) || !ctc.is (M, ct.do_narrow (c, dfault))) holds
// unless (ct.do_narrow(c, dfault) == dfault) holds
//
// C99: each of the iswxxx() functions returns true for each
// wide character that corresponds (as if by a call to the
// wctob() function) to a single-byte character for which the
// corresponding character classification function from 7.4.1
// returns true, except that the iswgraph() and iswpunct()
// functions may differ with respect to wide characters other
// than L' ' that are both printing and white-space wide
// characters.
//
// [4] (ct.do_narrow (c, default) - '0') evaluates to the digit
// value of the character for all c for which ct.is(digit, c)
// returns true
rw_info (0, 0, __LINE__,
"std::ctype<%s>::narrow(%1$s), widen(char)",
cname);
rw_info (0, 0, __LINE__,
"std::ctype<%s>::narrow(const %1$s*, const %1$s*, char*), "
"widen(const char*, const char*, %1$s*)", cname);
#define STR(x) #x
// verify condition [1] above; if it fails, verify that
// the same condition also fails to hold when using the
// corresponding libc functions
#define COND1(what) \
if (!(ctc.is (what, c) || !ctp.is (what, ctp.widen (c)))) { \
rw_assert (!cond1 (what, c, locname), 0, __LINE__, \
"ctype<char>::is (" STR (what) ", %{#lc})" \
" || !ctype<%1$s>::is (" STR (what) ", " \
"ctype<%s>::widen (%{#lc}) = %{#lc})" \
" returned false in locale(%#s)", \
c, cname, c, ctp.widen (c), locname); \
} else (void)0
// verify condition [3] above; if it fails, verify that
// the same condition also fails to hold when using the
// corresponding libc functions
#define COND3(what) \
if ( ctp.narrow (ch, dfault) != dfault \
&& !(ctp.is (what, ch) || !ctc.is (what, ctp.narrow (ch, dfault)))) { \
rw_assert (!cond3 (what, ch, locname), 0, __LINE__, \
"ctype<%s>::is (" STR (what) ", %{#lc})" \
" || !ctype<char>::is (" STR (what) ", " \
"ctype<%1$s>::narrow (%{#lc}, %{#c}) = %{#lc})" \
" returned false in locale(%#s)", cname, ch, \
ch, dfault, ctp.narrow (ch, '\0'), \
locname); \
} else (void)0
char c_locname [256];
std::strcpy (c_locname, std::setlocale (LC_ALL, 0));
BEGIN_LOCALE_LOOP (UCHAR_MAX, locname, i) {
#if defined (_RWSTD_OS_SUNOS) && _RWSTD_OS_MAJOR == 5 && _RWSTD_OS_MINOR <= 10
// avoid a libc SIGSEGV in mbtowc() in zh_HK and zh_TW
// locales encoded using the BIG5 codeset (see bug #603)
if ( 0 == std::strncmp ("zh_HK.BIG5", locname, 10)
|| 0 == std::strncmp ("zh_TW.BIG5", locname, 10))
continue;
#endif // SunOS < 5.10
{
// verify that the global C locale stays unchanged
const char* const curlocname = std::setlocale (LC_ALL, 0);
rw_assert (!std::strcmp (c_locname, curlocname), 0, __LINE__,
"setlocale(LC_ALL, 0) == \"%s\", got \"%s\"",
c_locname, curlocname);
}
if (0 == i)
rw_info (0, 0, __LINE__, "std::ctype<%s> in locale(%#s)",
cname, locname);
const char c = char (i);
const charT ch = charT (i);
// verify that condition [1] holds
COND1 (ALPHA);
COND1 (CNTRL);
COND1 (DIGIT);
COND1 (GRAPH);
COND1 (LOWER);
COND1 (PRINT);
COND1 (PUNCT);
COND1 (SPACE);
COND1 (UPPER);
COND1 (XDIGIT);
// verify that condition [2] holds
char dfault = c ? '\0' : '\1';
const charT ret = ctp.widen (ctp.narrow (ch, dfault));
if (ret != charT (dfault) && ret != ch) {
rw_assert (ch != widen (ch, narrow (ch, locname), locname),
0, __LINE__,
"ctype<%s>::widen (ctype<%1$s>::narrow "
"(%{#lc}, %{#c})) == %{#c}; got %{#c} "
"in locale (%#s)",
cname, ch, dfault, ch, ret, locname);
}
// finally verify that condition [3] holds
COND3 (ALPHA);
COND3 (CNTRL);
COND3 (DIGIT);
COND3 (GRAPH);
COND3 (LOWER);
COND3 (PRINT);
COND3 (PUNCT);
COND3 (SPACE);
COND3 (UPPER);
COND3 (XDIGIT);
// now perform a relitively simple sanity check on the 3-argument
// overloads of narrow() and widen(). Make sure that the 3-argument
// overloads return the same value that the other overload produces
// Only do this the first time through the locale list.
if (i == 0) {
// arrays of all tested narrow and wide characters
charT wide_chars [UCHAR_MAX + 1];
char narrow_chars [UCHAR_MAX + 1];
charT narrow_in [UCHAR_MAX + 1];
char widen_in [UCHAR_MAX + 1];
// zero out the last element to allow printing
wide_chars [UCHAR_MAX] = charT ();
narrow_chars [UCHAR_MAX] = char ();
narrow_in [UCHAR_MAX] = charT ();
widen_in [UCHAR_MAX] = char ();
// set the `dfault' character to something unlikely
// but other than '\0'
dfault = '\377';
for (unsigned j = 0; j <= UCHAR_MAX; j++) {
wide_chars [j] = ctp.widen (char (j));
narrow_chars [j] = ctp.narrow (wide_chars [j], dfault);
narrow_in [j] = ctp.widen (char (j));
widen_in [j] = char (j);
}
charT widen_out [UCHAR_MAX + 1];
char narrow_out [UCHAR_MAX + 1];
widen_out [UCHAR_MAX] = charT ();
narrow_out [UCHAR_MAX] = char ();
// narrow source buffer into the destination
// and compare with expected values
ctp.narrow (narrow_in,
narrow_in + UCHAR_MAX + 1,
dfault,
narrow_out);
bool success =
!std::memcmp (narrow_chars, narrow_out, sizeof narrow_chars);
rw_assert (success, 0, __LINE__,
"ctype<%s>::narrow (%{*.*Ac}\", ... , %{#c}) "
"== %{.*Ac}, got %{.*Ac} in locale (%#s)", cname,
int (sizeof *narrow_in), UCHAR_MAX, narrow_in, dfault,
UCHAR_MAX, narrow_chars, UCHAR_MAX, narrow_out,
locname);
// widen source buffer into the destination
// and compare with expected values
ctp.widen (widen_in,
widen_in + UCHAR_MAX + 1,
widen_out);
success = !std::memcmp (wide_chars, widen_out, sizeof wide_chars);
rw_assert (success, 0, __LINE__,
"ctype<%s>::widen (%{.*Ac}, ...) == "
"%{*.*Ac}, got %{*.*Ac} in locale (%#s)",
cname, UCHAR_MAX, widen_in,
int (sizeof *wide_chars), UCHAR_MAX, wide_chars,
int (sizeof *wide_chars), UCHAR_MAX, widen_out,
locname);
}
} END_LOCALE_LOOP (locname);
}
/**************************************************************************/
// exercise the behavior of the libc-based C++ locale implementation
template <class charT>
void test_libc (charT, const char *cname)
{
test_narrow_widen (charT (), cname);
}
/**************************************************************************/
// exercise the behavior of our own C++ locale implementation
template <class charT>
void test_libstd (charT, const char *cname)
{
rw_warn (0, 0, __LINE__,
"stdcxx implementation of std::ctype<%s> not exercised",
cname);
}
/**************************************************************************/
template <class charT>
void run_test (charT, const char *cname)
{
if (0) {
// do a compile time only test on use_facet and has_facet
_STD_HAS_FACET (std::ctype_byname<charT>, std::locale ());
_STD_USE_FACET (std::ctype_byname<charT>, std::locale ());
}
// exercise the behavior of the libc-based C++ locale implementation
test_libc (charT (), cname);
// exercise the behavior of our own C++ locale implementation
test_libstd (charT (), cname);
}
/**************************************************************************/
static int
run_test (int, char**)
{
// set the global locale_list pointer to point to the array
// of NUL-separated locale names set on the command line via
// the --locales=... option, if specified, or to 0 (in which
// case we'll generate our own list)
locale_list = rw_opt_locales;
run_test (char (), "char");
#ifndef _RWSTD_NO_WCHAR_T
run_test (wchar_t (), "wchar_t");
#endif // _RWSTD_NO_WCHAR_T
return 0;
}
/**************************************************************************/
int main (int argc, char *argv[])
{
return rw_test (argc, argv, __FILE__,
"lib.category.ctype",
"narrow and widen",
run_test,
"|-locales= ",
&rw_opt_setlocales,
(void*)0 /* sentinel */);
}