| /*************************************************************************** |
| * |
| * 22.locale.collate.cpp -- tests for collate-facet member functions |
| * |
| * $Id$ |
| * |
| *************************************************************************** |
| * |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed |
| * with this work for additional information regarding copyright |
| * ownership. The ASF licenses this file to you under the Apache |
| * License, Version 2.0 (the "License"); you may not use this file |
| * except in compliance with the License. You may obtain a copy of |
| * the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or |
| * implied. See the License for the specific language governing |
| * permissions and limitations under the License. |
| * |
| * Copyright 1994-2008 Rogue Wave Software. |
| * |
| **************************************************************************/ |
| |
| #include <locale> // for collate, locale |
| #include <string> // for string |
| |
| #include <algorithm> // for sort and unique |
| #include <climits> // for UCHAR_MAX |
| #include <clocale> // for LC_COLLATE, setlocale |
| #include <cstdlib> // for exit() |
| #include <cstdio> // for fprintf() |
| #include <cstring> // for strcmp(), strcoll(), ... |
| #include <cwchar> // for wcscoll() |
| |
| #include <rw_driver.h> |
| #include <rw_environ.h> |
| #include <rw_file.h> |
| #include <rw_locale.h> |
| #include <rw_process.h> |
| |
| #define IGNORE 0 |
| #define STR_SIZE 16 |
| #define LOCNAME_SIZE 256 |
| |
| #if _RWSTD_PATH_SEP == '/' |
| # define SLASH "/" |
| #else |
| # define SLASH "\\" |
| #endif |
| |
| // strings declared extern to work around a SunPro bug (PR #28124) |
| // get the source root |
| #define RELPATH "etc" SLASH "nls" |
| #define TESTS_ETC_PATH "tests" SLASH "etc" |
| |
| // the root of the locale directory (RWSTD_LOCALE_ROOT) |
| #define LOCALE_ROOT "RWSTD_LOCALE_ROOT" |
| const char* locale_root; |
| |
| /**************************************************************************/ |
| |
| // These overloads are necessary in our template |
| // functions so that we can make a single function call reguardless |
| // of the charT we are using |
| |
| int c_strcoll (const char* s1, const char* s2) |
| { |
| const int ret = std::strcoll(s1, s2); |
| return ret ? ret > 0 ? 1 : -1 : 0; |
| } |
| |
| std::size_t c_xfrm (char* to, const char* from, std::size_t size) |
| { |
| char safety_buf [8] = { 0 }; |
| |
| if (0 == to && 0 == size) { |
| // prevent buggy implementations (such as MSVC 8) from trying |
| // to write to the destination buffer even though it's 0 and |
| // its size is zero (see stdcxx-69) |
| to = safety_buf; |
| } |
| |
| std::size_t n = std::strxfrm (to, from, size); |
| |
| if (to && to != safety_buf) |
| n = std::strlen (to); |
| |
| return n; |
| } |
| |
| std::size_t c_strlen (const char* s1) |
| { |
| return std::strlen (s1); |
| } |
| |
| const char* narrow (char* dst, const char* src) |
| { |
| if (src == dst || !src || !dst) |
| return src; |
| |
| std::memcpy (dst, src, std::strlen (src) + 1); |
| return dst; |
| } |
| |
| const char* widen (char* dst, const char* src) |
| { |
| if (src == dst || !src || !dst) |
| return src; |
| |
| std::memcpy (dst, src, std::strlen (src) + 1); |
| return dst; |
| } |
| |
| #if !defined (_RWSTD_NO_WCHAR_T) |
| |
| int c_strcoll (const wchar_t* s1, const wchar_t* s2) |
| { |
| const int ret = std::wcscoll(s1, s2); |
| return ret ? ret > 0 ? 1 : -1 : 0; |
| } |
| |
| std::size_t c_xfrm (wchar_t* to, const wchar_t* from, std::size_t size) |
| { |
| std::size_t n = 0; |
| |
| #if !defined (_MSC_VER) || _MSC_VER > 1200 |
| |
| wchar_t safety_buf [8] = { 0 }; |
| |
| if (0 == to && 0 == size) { |
| // prevent buggy implementations (such as MSVC 8) from trying |
| // to write to the destination buffer even though it's 0 and |
| // its size is zero (see stdcxx-69) |
| to = safety_buf; |
| } |
| |
| n = std::wcsxfrm (to, from, size); |
| |
| if (to && to != safety_buf) |
| n = std::wcslen (to); |
| |
| #else // MSVC 6 and prior |
| |
| // working around an MSVC 6.0 libc bug (PR #26437) |
| if (to) { |
| std::wcsxfrm (to, from, size); |
| n = std::wcslen (to); |
| } |
| else { |
| wchar_t tmp [1024]; |
| |
| n = std::wcslen (from); |
| _RWSTD_ASSERT (n < sizeof tmp / sizeof *tmp); |
| |
| std::wcscpy (tmp, from); |
| std::wcsxfrm (tmp, from, sizeof tmp / sizeof *tmp); |
| |
| n = std::wcslen (tmp); |
| } |
| |
| #endif // MSVC 6 |
| |
| return n; |
| } |
| |
| std::size_t c_strlen (const wchar_t* s1) |
| { |
| return std::wcslen (s1); |
| } |
| |
| const wchar_t* widen (wchar_t* dst, const char* src) |
| { |
| static wchar_t buf [4096]; |
| |
| if (!src) |
| return 0; |
| |
| if (!dst) |
| dst = buf; |
| |
| std::size_t len = std::strlen (src); |
| |
| _RWSTD_ASSERT (len < sizeof buf /sizeof *buf); |
| |
| len = std::mbstowcs (dst, src, sizeof buf / sizeof *buf); |
| |
| if (std::size_t (-1) == len) |
| *dst = 0; |
| |
| return dst; |
| } |
| |
| const char* narrow (char* dst, const wchar_t* src) |
| { |
| static char buf [4096]; |
| |
| if (!src) |
| return 0; |
| |
| if (!dst) |
| dst = buf; |
| |
| std::size_t len = std::wcslen (src); |
| |
| _RWSTD_ASSERT (len < sizeof buf); |
| |
| len = std::wcstombs (dst, src, sizeof buf / sizeof *buf); |
| |
| if (std::size_t (-1) == len) |
| *dst = 0; |
| |
| return dst; |
| } |
| |
| #endif //_RWSTD_NO_WCHAR_T |
| |
| /**************************************************************************/ |
| |
| template <class charT> |
| void |
| gen_str (charT* str, std::size_t size) |
| { |
| // generate a random string with the given size |
| if (!size) |
| return; |
| |
| // use ASCII characters in the printable range |
| for (std::size_t i = 0; i < size - 1; ++i) |
| str [i] = ' ' + std::rand () % ('~' - ' '); |
| |
| str [size - 1] = charT (); |
| } |
| |
| /**************************************************************************/ |
| |
| template <class charT> |
| void |
| check_libc_locale (const char* charTname, char const* locname, |
| int (&nfail) [3]) |
| { |
| typedef std::char_traits<charT> traits_type; |
| typedef std::allocator<charT> allocator_type; |
| typedef std::basic_string <charT, traits_type, allocator_type> string_type; |
| |
| std::locale loc (locname); |
| |
| const std::collate<charT> &co = |
| _STD_USE_FACET (std::collate<charT>, loc); |
| |
| co._C_opts |= co._C_use_libc; |
| co._C_opts &= ~co._C_use_libstd; |
| |
| for (int nloops = 0; nloops < 10; ++nloops) { |
| |
| charT str1 [STR_SIZE] = { 0 }; |
| charT str2 [STR_SIZE] = { 0 }; |
| |
| // generate two random NUL-terminated strings |
| gen_str (str1, sizeof str1 / sizeof *str1); |
| gen_str (str2, sizeof str2 / sizeof *str2); |
| |
| // call transform on the generated string |
| // not including the terminating NUL |
| string_type out = co.transform ( |
| str1, str1 + sizeof str1 / sizeof *str1 - 1); |
| |
| // get the size of the buffer needed to hold the |
| // transformed string (with the terminating NUL) |
| std::size_t size = 1U + c_xfrm (0, str1, 0); |
| |
| // prevent errors caused by huge return values (e.g., MSVC) |
| if (size > STR_SIZE * 64) |
| size = 0; |
| |
| string_type c_out; |
| |
| if (size) { |
| c_out.resize (size); |
| |
| // call the C-library transform function |
| size = c_xfrm (&c_out [0], str1, size); |
| |
| if (size > STR_SIZE * 64) |
| size = 0; |
| |
| // shrink to fit (chop off the terminating NUL) |
| c_out.resize (size); |
| } |
| |
| if (out != c_out) |
| ++nfail [0]; |
| |
| // make sure the output is the same |
| rw_assert (out == c_out, __FILE__, __LINE__, |
| "%d. collate<%s>::transform(%{*.*Ac}, ...) " |
| "== %{*.*Ac}, got %{*.*Ac} in locale(\"%s\")", |
| nloops, charTname, |
| sizeof (charT), sizeof str1 / sizeof *str1, str1, |
| sizeof (charT), c_out.size (), c_out.c_str (), |
| sizeof (charT), out.size (), out.c_str (), |
| locname); |
| |
| // now call compare on the two generated strings |
| int ret1 = co.compare ( |
| str1, str1 + sizeof str1 / sizeof *str1, |
| str2, str2 + sizeof str2 / sizeof *str2); |
| |
| // call the C-library comparison function |
| int ret2 = c_strcoll (str1, str2); |
| |
| if (ret1 != ret2) |
| ++nfail [1]; |
| |
| // make sure the results are the same |
| rw_assert (ret1 == ret2, __FILE__, __LINE__, |
| "%d. collate<%s>::compare(%{*.*Ac}, ..., " |
| "%{*.*Ac}, ...) == %d, got %d in locale(\"%s\")", |
| nloops, charTname, |
| sizeof (charT), sizeof str1 / sizeof *str1, str1, |
| sizeof (charT), sizeof str2 / sizeof *str2, str2, |
| ret2, ret1, locname); |
| |
| // two strings that compare identically must hash |
| // identically as well. Calling hash on the same string is |
| // not very conclusive but generating strings that have exactly |
| // the same weights is not possible without knowing all the |
| // weight orderings |
| const long hashNum1 = |
| co.hash (str1, str1 + sizeof str1 / sizeof *str1); |
| |
| const long hashNum2 = |
| co.hash (str1, str1 + sizeof str1 / sizeof *str1); |
| |
| if (hashNum1 != hashNum2) |
| ++nfail [2]; |
| |
| rw_assert (hashNum1 == hashNum2, __FILE__, __LINE__, |
| "%d. collate<%s>::hash(%{*.*Ac}, ...) == %d, " |
| "got %d in locale(\"%s\")", |
| nloops, charTname, |
| sizeof (charT), sizeof str1 / sizeof *str1, str1, |
| hashNum1, hashNum2, locname); |
| } |
| } |
| |
| template <class charT> |
| void |
| check_libc (const char* charTname) |
| { |
| // the libc implementation of the library should act the same as |
| // the c-library. Go through all the locales, generate some random |
| // strings and make sure that the following holds true: |
| // transform acts like strxfrm and wcsxfrm, |
| // compare acts like strcoll and wcscoll |
| |
| rw_info (0, __FILE__, __LINE__, |
| "libc std::collate<%s>::transform ()", charTname); |
| |
| rw_info (0, __FILE__, __LINE__, |
| "libc std::collate<%s>::compare ()", charTname); |
| |
| rw_info (0, __FILE__, __LINE__, |
| "std::collate<%s>::hash ()", charTname); |
| |
| int nfail [3] = { 0 }; |
| char curlocname [256]; |
| |
| for (const char* locname = rw_locales (LC_COLLATE); |
| *locname; locname += std::strlen (locname) + 1) { |
| |
| std::strcpy (curlocname, std::setlocale (LC_COLLATE, 0)); |
| |
| if (0 == std::setlocale (LC_COLLATE, locname)) |
| continue; |
| |
| int max = MB_CUR_MAX; |
| |
| if (max == 1) { |
| // |
| // FIXME test variable length multibyte encodings |
| // |
| _TRY { |
| check_libc_locale<charT> (charTname, locname, nfail); |
| } |
| _CATCH (...) { |
| rw_assert (false, __FILE__, __LINE__, |
| "locale(\"%s\") threw an exception", |
| locname); |
| } |
| } |
| |
| std::setlocale (LC_COLLATE, curlocname); |
| } |
| |
| |
| rw_assert (0 == nfail [0], __FILE__, __LINE__, |
| "collate<%s>::transform () failed %d times", |
| charTname, nfail [0]); |
| |
| rw_assert (0 == nfail [1], __FILE__, __LINE__, |
| "collate<%s>::compare () failed %d times", |
| charTname, nfail [1]); |
| |
| rw_assert (0 == nfail [2], __FILE__, __LINE__, |
| "collate<%s>::hash () failed %d times", |
| charTname, nfail [2]); |
| } |
| |
| /**************************************************************************/ |
| |
| static const char* |
| make_test_locale () |
| { |
| // Create a synthetic locale to exercises as many different parts |
| // of the collate standard as possible. |
| |
| static const char charmap [] = { |
| // |
| // The portable character set |
| // |
| "<code_set_name> \"UTF-8\"\n" |
| "<mb_cur_max> 1\n<mb_cur_min> 1\n" |
| "CHARMAP\n" |
| "<NUL> \\x00\n<SOH> \\x01\n<STX> \\x02\n<ETX> \\x03\n<EOT> \\x04\n" |
| "<ENQ> \\x05\n<ACK> \\x06\n<BEL> \\x07\n" |
| "<backspace> \\x08\n<tab> \\x09\n<newline> \\x0a\n" |
| "<vertical-tab> \\x0b\n<form-feed> \\x0c\n" |
| "<carriage-return> \\x0d\n" |
| "<SO> \\x0e\n<SI> \\x0f\n<DLE> \\x10\n<DC1> \\x11\n<DC2> \\x12\n" |
| "<DC3> \\x13\n<DC4> \\x14\n<NAK> \\x15\n<SYN> \\x16\n<ETB> \\x17\n" |
| "<CAN> \\x18\n<EM> \\x19\n<SUB> \\x1a\n<ESC> \\x1b\n<IS4> \\x1c\n" |
| "<IS3> \\x1d\n<IS2> \\x1e\n<IS1> \\x1f\n" |
| "<space> \\x20\n" |
| "<exclamation-mark> \\x21\n" |
| "<quotation-mark> \\x22\n" |
| "<number-sign> \\x23\n" |
| "<dollar-sign> \\x24\n" |
| "<percent-sign> \\x25\n" |
| "<ampersand> \\x26\n" |
| "<apostrophe> \\x27\n" |
| "<left-parenthesis> \\x28\n" |
| "<right-parenthesis> \\x29\n" |
| "<asterisk> \\x2a\n" |
| "<plus-sign> \\x2b\n" |
| "<comma> \\x2c\n" |
| "<hyphen> \\x2d\n" |
| "<period> \\x2e\n" |
| "<slash> \\x2f\n" |
| "<zero> \\x30\n<one> \\x31\n<two> \\x32\n<three> \\x33\n" |
| "<four> \\x34\n<five> \\x35\n<six> \\x36\n<seven> \\x37\n" |
| "<eight> \\x38\n<nine> \\x39\n" |
| "<colon> \\x3a\n" |
| "<semicolon> \\x3b\n" |
| "<less-than-sign> \\x3c\n" |
| "<equals-sign> \\x3d\n" |
| "<greater-than-sign> \\x3e\n" |
| "<question-mark> \\x3f\n" |
| "<commercial-at> \\x40\n" |
| "<A> \\x41\n<B> \\x42\n<C> \\x43\n<D> \\x44\n<E> \\x45\n<F> \\x46\n" |
| "<G> \\x47\n<H> \\x48\n<I> \\x49\n<J> \\x4a\n<K> \\x4b\n<L> \\x4c\n" |
| "<M> \\x4d\n<N> \\x4e\n<O> \\x4f\n<P> \\x50\n<Q> \\x51\n<R> \\x52\n" |
| "<S> \\x53\n<T> \\x54\n<U> \\x55\n<V> \\x56\n<W> \\x57\n<X> \\x58\n" |
| "<Y> \\x59\n<Z> \\x5a\n" |
| "<left-square-bracket> \\x5b\n" |
| "<backslash> \\x5c\n" |
| "<right-square-bracket> \\x5d\n" |
| "<circumflex> \\x5e\n" |
| "<underscore> \\x5f\n" |
| "<grave-accent> \\x60\n" |
| "<a> \\x61\n<b> \\x62\n<c> \\x63\n<d> \\x64\n<e> \\x65\n<f> \\x66\n" |
| "<g> \\x67\n<h> \\x68\n<i> \\x69\n<j> \\x6a\n<k> \\x6b\n<l> \\x6c\n" |
| "<m> \\x6d\n<n> \\x6e\n<o> \\x6f\n<p> \\x70\n<q> \\x71\n<r> \\x72\n" |
| "<s> \\x73\n<t> \\x74\n<u> \\x75\n<v> \\x76\n<w> \\x77\n<x> \\x78\n" |
| "<y> \\x79\n<z> \\x7a\n" |
| "<left-brace> \\x7b\n" |
| "<vertical-line> \\x7c\n" |
| "<right-brace> \\x7d\n" |
| "<tilde> \\x7e\n" |
| "<DEL> \\x7f\n" |
| "END CHARMAP\n\n" |
| }; |
| |
| // create a temporary locale definition file that exercises as |
| // many different parts of the collate standard as possible |
| const char lc_collate [] = { |
| "LC_COLLATE\n" |
| "script <ALL_FORWARD>\n" |
| "collating-element <er> from \"<e><r>\"\n" |
| "collating-element <ic> from \"ic\"\n" |
| "collating-symbol <LETTER>\n" |
| "collating-symbol <COLLATING_ELEMENT>\n" |
| "collating-symbol <DIGIT>\n" |
| |
| "order_start forward;backward;forward,position\n" |
| "<LETTER>\n" |
| "<COLLATING_ELEMENT>\n" |
| "<DIGIT>\n" |
| |
| "<a> <a> <LETTER> IGNORE\n" |
| "<b> <b> <LETTER> IGNORE\n" |
| |
| // "<c>" will have a non-ignored position ordering |
| "<c> <c> <LETTER> <c>\n" |
| |
| // try giving "<d>" a many-to-one weight |
| "<d> \"<d><a>\" <LETTER> IGNORE\n" |
| |
| // try giving "<e>" a decimal value weight |
| "<e> \\d139 <LETTER> IGNORE\n" |
| |
| // try giving "<f>" an octal value weight |
| "<f> \\36 <LETTER> IGNORE\n" |
| |
| // try giving "<g>" a hex value weight |
| "<g> \\x3A <LETTER> IGNORE\n" |
| |
| "<zero> <zero> <DIGIT> IGNORE\n" |
| "<one> <one> <DIGIT> <zero>\n" |
| "<two> <two> <DIGIT> IGNORE\n" |
| "<three> <three> <DIGIT> IGNORE\n" |
| "<er> <a> <COLLATING_ELEMENT> IGNORE\n" |
| |
| // the <ic> collating element will be equivalent to the letter <c> |
| "<ic> <c> <LETTER> <c>\n" |
| "UNDEFINED IGNORE IGNORE IGNORE\n" |
| |
| "order_end\n" |
| |
| // define a section in which all of the orders are forward orders |
| "order_start <ALL_FORWARD>;forward;forward;forward\n" |
| "<h>\n<i>\n<j>\n<k>\n" |
| "order_end\n" |
| |
| // reorder the elementes in the <ALL_FORWARD> section to appear |
| // after the letter "<g>" |
| "reorder-after <g>\n" |
| "<h>\n<i>\n<j>\n<k>\n" |
| |
| // try to reorder "<a>" after "<b>" |
| "reorder-after <b>\n" |
| "<a> <a> <LETTER> IGNORE\n" |
| "reorder-end\n" |
| |
| "\nEND LC_COLLATE\n" |
| }; |
| |
| return rw_create_locale (charmap, lc_collate); |
| } |
| |
| /**************************************************************************/ |
| |
| template <class charT> |
| void |
| test_hash (const char*, const std::collate<charT>&, |
| const char*, const char*); |
| |
| template <class charT> |
| void |
| test_string (const char*, const std::collate<charT>&, |
| const char*, const char*, int); |
| |
| template <class charT> |
| void |
| test_weight_val (const char*, const std::collate<charT>&, |
| charT, int, int, int, int, bool); |
| |
| template <class charT> |
| void |
| check_libstd_test_locale (const char* charTname) |
| { |
| rw_info (0, __FILE__, __LINE__, |
| "libstd std::collate<%s>::transform () " |
| "collate test database", charTname); |
| rw_info (0, __FILE__, __LINE__, |
| "libstd std::collate<%s>::compare () collate test " |
| "database", charTname); |
| rw_info (0, __FILE__, __LINE__, |
| "libstd std::collate<%s>::hash () collate test " |
| "database", charTname); |
| |
| const char* const locname = make_test_locale (); |
| if (locname) { |
| |
| std::locale loc; |
| |
| _TRY { |
| loc = std::locale (locname); |
| } |
| _CATCH (...) { |
| const char* const var = std::getenv (LOCALE_ROOT); |
| |
| rw_assert (false, __FILE__, __LINE__, |
| "std::locale(\"%s\") unexpectedly threw " |
| "an exception; " LOCALE_ROOT "=%s", |
| locname, var ? var : "(null)"); |
| return; |
| } |
| |
| const std::collate<charT> &co = |
| _STD_USE_FACET (std::collate<charT>, loc); |
| co._C_opts |= co._C_use_libstd; |
| co._C_opts &= ~co._C_use_libc; |
| |
| #define IGNORE 0 |
| |
| // first lets make sure that each character was given the |
| // correct weight for each level. |
| |
| #undef TEST |
| #define TEST(ch, w0, w1, w2, w3, w3_is_fp) \ |
| test_weight_val (charTname, co, charT (ch), w0, w1, w2, w3, w3_is_fp) |
| |
| TEST ('a', 6, IGNORE, 2, IGNORE, true); |
| TEST ('b', 5, IGNORE, 2, IGNORE, true); |
| TEST ('c', 7, IGNORE, 2, 7, true); |
| TEST ('d', 8, 6, 2, IGNORE, true); |
| TEST ('e', 139, IGNORE, 2, IGNORE, true); |
| TEST ('f', 30, IGNORE, 2, IGNORE, true); |
| TEST ('g', 58, IGNORE, 2, IGNORE, true); |
| TEST ('h', 12, IGNORE, 12, 12, false); |
| TEST ('i', 13, IGNORE, 13, 13, false); |
| TEST ('j', 14, IGNORE, 14, 14, false); |
| TEST ('k', 15, IGNORE, 15, 15, false); |
| TEST ('0', 16, IGNORE, 4, IGNORE, true); |
| TEST ('1', 17, IGNORE, 4, 16, true); |
| TEST ('2', 18, IGNORE, 4, IGNORE, true); |
| TEST ('3', 19, IGNORE, 4, IGNORE, true); |
| TEST ('l', IGNORE, IGNORE, IGNORE, IGNORE, true); |
| |
| // make sure that strings collate the way we expect them to |
| |
| // a should collate greater then b |
| test_string (charTname, co, "a", "b", 1) ; |
| |
| // the collating element "er" should collate after 'a' and 'b' |
| // but before 'c' |
| test_string (charTname, co, "er", "a", 1); |
| test_string (charTname, co, "er", "b", 1); |
| test_string (charTname, co, "er", "c", -1); |
| |
| // the collating element "ic" should be equivalent to the letter 'c' |
| test_string (charTname, co, "ic", "c", 0); |
| |
| |
| // two strings that compare identically must hash |
| // identically as well. |
| // since ic and c are equivalent elements string they should hash |
| // the same |
| test_hash (charTname, co, "c", "ic"); |
| } |
| else |
| rw_assert (false, __FILE__, __LINE__, |
| "unable to create a locale database"); |
| } |
| |
| /**************************************************************************/ |
| |
| template <class charT> |
| void |
| test_hash (const char* charTname, const std::collate<charT>& co, |
| const char* str1, const char* str2) |
| { |
| // convert narrow string to a (possibly) wide representation |
| charT wstrbuf [256]; |
| charT wstrbuf2 [256]; |
| |
| const charT* const wstr = widen (wstrbuf, str1); |
| const charT* const wstr2 = widen (wstrbuf2, str2); |
| |
| long hashNum1 = co.hash (wstr, wstr + c_strlen (wstr)); |
| long hashNum2 = co.hash (wstr2, wstr2 + c_strlen (wstr2)); |
| |
| if (hashNum1 != hashNum2) { |
| rw_assert (false, __FILE__, __LINE__, |
| "collate<%s>::hash(%s, ...) returned %d and\n " |
| "collate<%s>::hash(%s, ...) returned %d", |
| charTname, str1, |
| hashNum1, charTname, str2, hashNum2); |
| } |
| } |
| |
| /**************************************************************************/ |
| |
| template <class charT> |
| void |
| test_string (const char* charTname, const std::collate<charT>& co, |
| const char* str1, const char* str2, |
| int expected_val) |
| { |
| // convert narrow string to a (possibly) wide representation |
| charT wstrbuf [256]; |
| charT wstrbuf2 [256]; |
| |
| const charT* const wstr = widen (wstrbuf, str1); |
| const charT* const wstr2 = widen (wstrbuf2, str2); |
| |
| int ret = co.compare (wstr, wstr + c_strlen (wstr), |
| wstr2, wstr2 + c_strlen(wstr2)); |
| if (ret != expected_val) |
| rw_assert (false, __FILE__, __LINE__, |
| "libstd std::collate<%s>::compare" |
| "(%s, ..., %s, ...) == %d, got %d", |
| charTname, str1, str2, expected_val, ret); |
| } |
| |
| /**************************************************************************/ |
| |
| template <class charT> |
| void |
| test_weight_val (const char* charTname, const std::collate<charT>& co, |
| charT ch, int w1a, int w1b, int w2, int w3, bool w3_is_fp) |
| { |
| int w [3][2] = { { w1a, w1b }, { w2, IGNORE }, { w3, IGNORE } }; |
| |
| typedef std::char_traits<charT> Traits; |
| typedef std::allocator<charT> Alloc; |
| typedef std::basic_string <charT, Traits, Alloc> String; |
| |
| // construct an expected transformed string out of the weight arguments |
| String expected; |
| |
| if (sizeof (charT) == sizeof (char)) { |
| for (int i = 0; i < 3; ++i) { |
| for (int k = 0; k < 2; ++k) { |
| if (w [i][k] != IGNORE) { |
| while (w [i][k] > _RWSTD_CHAR_MAX) { |
| expected += charT (_RWSTD_CHAR_MAX); |
| w [i][k] -= _RWSTD_CHAR_MAX; |
| } |
| expected += charT (w [i][k]); |
| } |
| else if (i == 2 && k == 0 && w3_is_fp) |
| expected += charT (_RWSTD_CHAR_MAX); |
| } |
| |
| // mark the end of the pass |
| expected += charT (1); |
| } |
| } |
| else { |
| for (int i = 0; i < 3; ++i) { |
| for (int k = 0; k < 2; ++k) { |
| if (w [i][k] != IGNORE) { |
| expected += charT (w [i][k]); |
| } |
| else if (i == 2 && k == 0 && w3_is_fp) |
| expected += charT (_RWSTD_WCHAR_MAX); |
| } |
| |
| expected += charT (1); |
| } |
| } |
| |
| // get the transformed string |
| const String actual = co.transform (&ch, &ch + 1); |
| |
| // make sure the strings are equal |
| rw_assert (expected == actual, __FILE__, __LINE__, |
| "collate<%s>::transform (\"%c\", ...) == %{*.*Ac}, " |
| "got %{*.*Ac}", charTname, ch, sizeof (charT), |
| expected.size (), expected.c_str (), sizeof (charT), |
| actual.size (), actual.c_str ()); |
| } |
| |
| /**************************************************************************/ |
| |
| template <class charT> |
| void |
| check_libstd (const char* charTname) |
| { |
| rw_info (0, __FILE__, __LINE__, |
| "libstd std::collate<%s>::transform () sorting " |
| "file test", charTname); |
| |
| rw_info (0, __FILE__, __LINE__, |
| "libstd std::collate<%s>::compare () sorting " |
| "file test", charTname); |
| |
| |
| // This test works by using a series of sorted input files |
| // we randomize the words in the input files and sort them using |
| // the proper locale's collate facet. This test will automatically |
| // generate the required locales. |
| |
| static const char* const locales[][3] = { |
| // |
| // +-- locale name |
| // | +-- character set |
| // | | +-- input file name |
| // | | | |
| // V V V |
| { "cs_CZ", "ISO-8859-2", "collate.cs_CZ.in" }, // Czech, Czech Rep. |
| { "da_DK", "ISO-8859-1", "collate.da_DK.in" }, // Danish, Denmark |
| { "en_US", "ISO-8859-1", "collate.en_US.in" }, // English, US |
| { "hr_HR", "ISO-8859-2", "collate.hr_HR.in" }, // Hungarian, Hungary |
| { "sv_SE", "ISO-8859-1", "collate.sv_SE.in" }, // Swedish, Sweden |
| { "th_TH", "TIS-620", "collate.th_TH.in" } // Thai, Thailand |
| }; |
| |
| const std::size_t nlocales = sizeof locales / sizeof *locales; |
| |
| typedef std::char_traits<charT> Traits; |
| typedef std::allocator<charT> Allocator; |
| typedef std::basic_string<charT, Traits, Allocator> String; |
| |
| for (std::size_t i = 0; i < nlocales; ++i) { |
| |
| const char* const locname = |
| rw_localedef ("-w --no_position", |
| locales [i][0], locales [i][1], 0); |
| |
| if (locname) { |
| |
| std::locale loc; |
| |
| _TRY { |
| loc = std::locale (locname); |
| } |
| _CATCH (...) { |
| const char* const var = std::getenv (LOCALE_ROOT); |
| |
| rw_assert (false, __FILE__, __LINE__, |
| "std::locale(\"%s\") unexpectedly threw " |
| "an exception; " LOCALE_ROOT "=%s", |
| locname, var ? var : "(null)"); |
| continue; |
| } |
| |
| const std::collate<charT> &co = |
| _STD_USE_FACET (std::collate<charT>, loc); |
| |
| co._C_opts |= co._C_use_libstd; |
| co._C_opts &= ~co._C_use_libc; |
| |
| typedef std::codecvt<charT, char, std::mbstate_t> CodeCvt; |
| |
| const CodeCvt &cvt = _STD_USE_FACET (CodeCvt, loc); |
| |
| cvt._C_opts |= cvt._C_use_libstd; |
| cvt._C_opts &= ~cvt._C_use_libc; |
| |
| // 'in' holds the strings from the input file and is there |
| // sorting will take place. |
| String in [1000]; |
| |
| // out holds the strings located in the output file |
| String out [1000]; |
| |
| const char* in_path = std::getenv ("TOPDIR"); |
| if (!in_path || !*in_path) { |
| std::fprintf (stderr, "TOPDIR not defined or empty"); |
| std::exit (1); |
| } |
| |
| std::string path (in_path); |
| path += SLASH TESTS_ETC_PATH SLASH; |
| path += locales [i][2]; |
| |
| std::FILE* const f = std::fopen (path.c_str (), "r"); |
| if (!f) { |
| rw_assert (false, __FILE__, __LINE__, |
| "file \"%s\" could not be opened", path.c_str ()); |
| break; |
| } |
| |
| std::size_t j = 0; |
| |
| while (1) { |
| |
| char next_line [256]; |
| |
| if (0 != std::fgets (next_line, 256, f)) { |
| |
| std::size_t line_len = std::strlen (next_line); |
| |
| // get rid of the newline character |
| next_line [--line_len] = '\0'; |
| |
| // convert from external to internal encoding |
| // (both of which might be the same type) |
| charT to [256]; |
| const char* from_next; |
| charT* to_next; |
| |
| static std::mbstate_t initial; |
| std::mbstate_t mbs = initial; |
| |
| const std::codecvt_base::result res = |
| cvt.in (mbs, |
| next_line, next_line + line_len + 1, |
| from_next, |
| to, to + sizeof to / sizeof *to, |
| to_next); |
| |
| if (cvt.ok == res) { |
| in [j] = to; |
| out [j] = to; |
| } |
| else if (cvt.noconv == res) { |
| in [j] = (charT*)next_line; |
| out [j] = (charT*)next_line; |
| } |
| |
| j++; |
| } |
| else |
| break; |
| } |
| // close the file |
| std::fclose (f); |
| |
| // now bubble sort the items in the array |
| std::size_t idx; |
| std::size_t idx2; |
| String tmp; |
| String tmp2; |
| |
| bool flipped; |
| |
| if (j > 1) { |
| idx = 1; |
| do { |
| flipped = false; |
| for (idx2 = j - 1; idx2 >= idx; --idx2) { |
| |
| const std::size_t idx1 = idx2 - 1; |
| |
| if (co.compare (in [idx1].c_str (), |
| in [idx1].c_str () + in [idx1].size (), |
| in [idx2].c_str (), |
| in [idx2].c_str () + in [idx2].size ()) |
| > 0) { |
| in [idx1].swap (in [idx2]); |
| flipped = true; |
| } |
| } |
| } while (++idx < j && flipped); |
| } |
| |
| // the items are sorted now lets make sure that they are sorted |
| // the same way they are sorted in the output file. |
| std::size_t nfail = 0; |
| |
| for (std::size_t k = 0; k < j; ++k) { |
| |
| if (in [k] != out [k]) { |
| |
| nfail++; |
| |
| rw_assert (false, __FILE__, __LINE__, |
| "%{S} != %{S} at line %u of %s", |
| &out [k], &in [k], |
| k + 1, locales [i][2]); |
| |
| } |
| } |
| |
| rw_assert (!nfail, __FILE__, __LINE__, |
| "collate<%s>::compare() failed %d times", |
| charTname, nfail); |
| } |
| } |
| } |
| |
| /**************************************************************************/ |
| |
| |
| template <class charT> |
| void |
| check_hash_eff (const char* charTname) |
| { |
| // test effectiveness of hash function |
| rw_info (0, __FILE__, __LINE__, |
| "std::collate<%s>::hash () -- effectiveness", charTname); |
| |
| // since the same hash algorithm is used for both byname and non-byname |
| // facets, simply set up a std::locale that uses the "C" locale |
| std::locale loc ("C"); |
| const std::collate<charT> &co = |
| _STD_USE_FACET (std::collate<charT>, loc); |
| |
| |
| int nfail = 0; |
| |
| charT s[100]; |
| bool next = true; |
| |
| // generate `N' unique strings and hash them, storing each value |
| static const std::size_t N = 100; |
| long hashed [N] = { 0 }; |
| |
| std::size_t k; |
| for (k = 1; k != N && next; ++k) { |
| // generate a unique string |
| gen_str (s, k); |
| |
| // compute hash value |
| hashed [k] = co.hash (s, s + std::char_traits<charT>::length(s)); |
| } |
| |
| // sort hashed values, then remove all duplicates |
| std::sort (hashed, hashed + k); |
| k = std::unique (hashed, hashed + k) - hashed; |
| |
| // assert that the probability of a collision is less than 1% |
| // according to 22.2.4.1, p3, the likelihood should be very small, |
| // approaching 1.0 / numeric_limits<unsigned long>::max() |
| if (N - k > N /100) { |
| nfail++; |
| rw_assert (false, __FILE__, __LINE__, |
| "collate<%s>::do_hash (const char_type*, " |
| "const char_type*); " |
| "probability of collision %f", |
| charTname, double (N - k) / N); |
| } |
| |
| rw_assert (!nfail, __FILE__, __LINE__, |
| "collate<%s>::do_hash () failed %d times", charTname, |
| nfail); |
| |
| } |
| |
| /**************************************************************************/ |
| |
| template <class charT> |
| void |
| check_NUL_collate (const char* charTname, const char* locname, |
| const charT* s1, size_t s1_len, |
| const charT* s2, size_t s2_len) |
| { |
| std::locale loc (locname); |
| |
| typedef typename std::collate<charT> Collate; |
| typedef typename Collate::string_type String; |
| |
| const Collate &col = std::use_facet<Collate> (loc); |
| |
| const String x1 = col.transform (s1, s1 + s1_len); |
| const String x2 = col.transform (s2, s2 + s2_len); |
| |
| const int colcmp = col.compare (s1, s1 + s1_len, s2, s2 + s2_len); |
| |
| int lexcmp = x1.compare (x2); |
| lexcmp = lexcmp < -1 ? -1 : 1 < lexcmp ? 1 : lexcmp; |
| |
| rw_assert (colcmp == lexcmp, __FILE__, __LINE__, |
| "collate<%s>::compare (%{*.*Ac}, %{*.*Ac}) = %d, " |
| "lexicographical comparison of transformed strings = %d, " |
| "mismatch in locale (\"%s\")", charTname, |
| sizeof (charT), s1_len, s1, |
| sizeof (charT), s2_len, s2, |
| colcmp, lexcmp, locname); |
| |
| const bool eq = |
| std::string (s1, s1 + s1_len) == |
| std::string (s2, s2 + s2_len); |
| |
| rw_assert (bool (colcmp) != eq, __FILE__, __LINE__, |
| "collate<%s>::compare (%{*.*Ac}, %{*.*Ac}) = %d, " |
| "lexicographical compare = %s, mismatch in locale (\"%s\")", |
| charTname, |
| sizeof (charT), s1_len, s1, |
| sizeof (charT), s2_len, s2, colcmp, |
| (eq ? "true" : "false"), locname); |
| } |
| |
| static void |
| check_NUL_collate (const char* charTname, const char* locname, char) |
| { |
| #define T(s, t) \ |
| check_NUL_collate (charTname, locname, \ |
| s, sizeof s / sizeof *s - 1, \ |
| t, sizeof t / sizeof *t - 1) |
| |
| T ("", ""); |
| T ("", "\0"); |
| T ("", "\0\0"); |
| T ("\0", ""); |
| T ("\0", "\0"); |
| T ("\0", "\0\0"); |
| T ("a", "\0"); |
| T ("a", "\0a"); |
| T ("a", "a\0"); |
| T ("a", "a\0\0"); |
| T ("a\0", "a"); |
| T ("a\0", "a\0"); |
| T ("a\0", "a\0\0"); |
| T ("\0a", ""); |
| T ("\0a", "\0"); |
| T ("\0a", "\0a"); |
| T ("\0a", "\0a\0"); |
| T ("a\0\0b", ""); |
| T ("a\0\0b", "a"); |
| T ("a\0\0b", "ab"); |
| T ("a\0\0b", "a\0"); |
| T ("a\0\0b", "a\0\0"); |
| T ("a\0\0b", "a\0b"); |
| T ("a\0\0b", "a\0\0b"); |
| } |
| |
| #if !defined (_RWSTD_NO_WCHAR_T) |
| |
| static void |
| check_NUL_collate (const char* charTname, const char* locname, wchar_t) |
| { |
| T (L"", L""); |
| T (L"", L"\0"); |
| T (L"", L"\0\0"); |
| T (L"\0", L""); |
| T (L"\0", L"\0"); |
| T (L"\0", L"\0\0"); |
| T (L"a", L"\0"); |
| T (L"a", L"\0a"); |
| T (L"a", L"a\0"); |
| T (L"a", L"a\0\0"); |
| T (L"a\0", L"a"); |
| T (L"a\0", L"a\0"); |
| T (L"a\0", L"a\0\0"); |
| T (L"\0a", L""); |
| T (L"\0a", L"\0"); |
| T (L"\0a", L"\0a"); |
| T (L"\0a", L"\0a\0"); |
| T (L"a\0\0b", L""); |
| T (L"a\0\0b", L"a"); |
| T (L"a\0\0b", L"ab"); |
| T (L"a\0\0b", L"a\0"); |
| T (L"a\0\0b", L"a\0\0"); |
| T (L"a\0\0b", L"a\0b"); |
| T (L"a\0\0b", L"a\0\0b"); |
| T (L"a\0\0b\0", L"a\0\0b"); |
| T (L"a\0\0b\0\0", L"a\0\0b"); |
| T (L"a\0\0b\0\0", L"a\0\0b\0"); |
| T (L"a\0\0b\0\0", L"a\0\0bc"); |
| |
| #undef T |
| } |
| |
| #endif // _RWSTD_NO_WCHAR_T |
| |
| template <class charT> |
| void |
| check_NUL (const char* charTname) |
| { |
| // Verify that the collate facet correctly handles character |
| // sequences with embedded NULs. |
| |
| rw_info (0, __FILE__, __LINE__, |
| "std::collate<%s>::compare () with embedded NUL's", charTname); |
| |
| size_t i = 0; |
| |
| for (const char* locname = rw_locales (LC_COLLATE); |
| *locname; locname += std::strlen (locname) + 1) { |
| try { |
| check_NUL_collate (charTname, locname, charT ()); |
| } |
| catch (...) { |
| } |
| } |
| } |
| |
| /**************************************************************************/ |
| |
| template <class charT> |
| void |
| do_test (const char* charTname) |
| { |
| check_libstd_test_locale<charT> (charTname); |
| check_libstd<charT> (charTname); |
| check_libc<charT> (charTname); |
| check_NUL<charT> (charTname); |
| check_hash_eff<charT> (charTname); |
| } |
| |
| static int |
| run_test (int /*argc*/, char* /*argv*/ []) |
| { |
| do_test<char> ("char"); |
| |
| #if !defined (_RWSTD_NO_WCHAR_T) |
| do_test<wchar_t> ("wchar_t"); |
| #endif // _RWSTD_NO_WCHAR_T |
| |
| return 0; |
| } |
| |
| int |
| main (int argc, char* argv []) |
| { |
| return rw_test (argc, argv, __FILE__, |
| "[lib.category.collate]", |
| "22.2.4 The collate category", |
| run_test, "", 0); |
| } |
| |