tests/localization/22.locale.collate.cpp - stdcxx - Git at Google

 /***************************************************************************
  *
  * 22.locale.collate.cpp -- tests for collate-facet member functions
  *
  * $Id$
  *
  ***************************************************************************
  *
  * Licensed to the Apache Software  Foundation (ASF) under one or more
  * contributor  license agreements.  See  the NOTICE  file distributed
  * with  this  work  for  additional information  regarding  copyright
  * ownership.   The ASF  licenses this  file to  you under  the Apache
  * License, Version  2.0 (the  "License"); you may  not use  this file
  * except in  compliance with the License.   You may obtain  a copy of
  * the License at
  *
  * http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the  License is distributed on an  "AS IS" BASIS,
  * WITHOUT  WARRANTIES OR CONDITIONS  OF ANY  KIND, either  express or
  * implied.   See  the License  for  the  specific language  governing
  * permissions and limitations under the License.
  *
  * Copyright 1994-2008 Rogue Wave Software.
  *
  **************************************************************************/

 #include <locale>     // for collate, locale
 #include <string>     // for string

 #include <algorithm>  // for sort and unique
 #include <climits>    // for UCHAR_MAX
 #include <clocale>    // for LC_COLLATE, setlocale
 #include <cstdlib>    // for exit()
 #include <cstdio>     // for fprintf()
 #include <cstring>    // for strcmp(), strcoll(), ...
 #include <cwchar>     // for wcscoll()

 #include <rw_driver.h>
 #include <rw_environ.h>
 #include <rw_file.h>
 #include <rw_locale.h>
 #include <rw_process.h>

 #define IGNORE    0
 #define STR_SIZE  16
 #define LOCNAME_SIZE  256

 #if _RWSTD_PATH_SEP == '/'
 #  define SLASH                 "/"
 #else
 #  define SLASH                 "\\"
 #endif

 // strings declared extern to work around a SunPro bug (PR #28124)
 // get the source root
 #define RELPATH                 "etc" SLASH "nls"
 #define TESTS_ETC_PATH          "tests" SLASH "etc"

 // the root of the locale directory (RWSTD_LOCALE_ROOT)
 #define LOCALE_ROOT             "RWSTD_LOCALE_ROOT"
 const char* locale_root;

 /**************************************************************************/

 // These overloads are necessary in our template
 // functions so that we can make a single function call reguardless
 // of the charT we are using

 int c_strcoll (const char* s1, const char* s2)
 {
     const int ret = std::strcoll(s1, s2);
     return ret ? ret > 0 ? 1 : -1 : 0;
 }

 std::size_t c_xfrm (char* to, const char* from, std::size_t size)
 {
     char safety_buf [8] = { 0 };

     if (0 == to && 0 == size) {
         // prevent buggy implementations (such as MSVC 8) from trying
         // to write to the destination buffer even though it's 0 and
         // its size is zero (see stdcxx-69)
         to = safety_buf;
     }

     std::size_t n = std::strxfrm (to, from, size);

     if (to && to != safety_buf)
         n = std::strlen (to);

     return n;
 }

 std::size_t c_strlen (const char* s1)
 {
     return std::strlen (s1);
 }

 const char* narrow (char* dst, const char* src)
 {
     if (src == dst || !src || !dst)
         return src;

     std::memcpy (dst, src, std::strlen (src) + 1);
     return dst;
 }

 const char* widen (char* dst, const char* src)
 {
     if (src == dst || !src || !dst)
         return src;

     std::memcpy (dst, src, std::strlen (src) + 1);
     return dst;
 }

 #if !defined (_RWSTD_NO_WCHAR_T)

 int c_strcoll (const wchar_t* s1, const wchar_t* s2)
 {
     const int ret = std::wcscoll(s1, s2);
     return ret ? ret > 0 ? 1 : -1 : 0;
 }

 std::size_t c_xfrm (wchar_t* to, const wchar_t* from, std::size_t size)
 {
     std::size_t n = 0;

 #if !defined (_MSC_VER) || _MSC_VER > 1200

     wchar_t safety_buf [8] = { 0 };

     if (0 == to && 0 == size) {
         // prevent buggy implementations (such as MSVC 8) from trying
         // to write to the destination buffer even though it's 0 and
         // its size is zero (see stdcxx-69)
         to = safety_buf;
     }

     n = std::wcsxfrm (to, from, size);

     if (to && to != safety_buf)
         n = std::wcslen (to);

 #else   // MSVC 6 and prior

     // working around an MSVC 6.0 libc bug (PR #26437)
     if (to) {
         std::wcsxfrm (to, from, size);
         n = std::wcslen (to);
     }
     else {
         wchar_t tmp [1024];

         n = std::wcslen (from);
         _RWSTD_ASSERT (n < sizeof tmp / sizeof *tmp);

         std::wcscpy (tmp, from);
         std::wcsxfrm (tmp, from, sizeof tmp / sizeof *tmp);

         n = std::wcslen (tmp);
     }

 #endif   // MSVC 6

     return n;
 }

 std::size_t c_strlen (const wchar_t* s1)
 {
     return std::wcslen (s1);
 }

 const wchar_t* widen (wchar_t* dst, const char* src)
 {
     static wchar_t buf [4096];

     if (!src)
         return 0;

     if (!dst)
         dst = buf;

     std::size_t len = std::strlen (src);

     _RWSTD_ASSERT (len < sizeof buf /sizeof *buf);

     len = std::mbstowcs (dst, src, sizeof buf / sizeof *buf);

     if (std::size_t (-1) == len)
         *dst = 0;

     return dst;
 }

 const char* narrow (char* dst, const wchar_t* src)
 {
     static char buf [4096];

     if (!src)
         return 0;

     if (!dst)
         dst = buf;

     std::size_t len = std::wcslen (src);

     _RWSTD_ASSERT (len < sizeof buf);

     len = std::wcstombs (dst, src, sizeof buf / sizeof *buf);

     if (std::size_t (-1) == len)
         *dst = 0;

     return dst;
 }

 #endif   //_RWSTD_NO_WCHAR_T

 /**************************************************************************/

 template <class charT>
 void
 gen_str (charT* str, std::size_t size)
 {
     // generate a random string with the given size
     if (!size)
         return;

     // use ASCII characters in the printable range
     for (std::size_t i = 0; i < size - 1; ++i)
         str [i] = ' ' + std::rand () % ('~' - ' ');

     str [size - 1] = charT ();
 }

 /**************************************************************************/

 template <class charT>
 void
 check_libc_locale (const char* charTname, char const* locname,
                    int (&nfail) [3])
 {
     typedef std::char_traits<charT> traits_type;
     typedef std::allocator<charT> allocator_type;
     typedef std::basic_string <charT, traits_type, allocator_type> string_type;

     std::locale loc (locname);

     const std::collate<charT> &co =
         _STD_USE_FACET (std::collate<charT>, loc);

     co._C_opts |= co._C_use_libc;
     co._C_opts &= ~co._C_use_libstd;

     for (int nloops = 0; nloops < 10; ++nloops) {

         charT str1 [STR_SIZE] = { 0 };
         charT str2 [STR_SIZE] = { 0 };

         // generate two random NUL-terminated strings
         gen_str (str1, sizeof str1 / sizeof *str1);
         gen_str (str2, sizeof str2 / sizeof *str2);

         // call transform on the generated string
         // not including the terminating NUL
         string_type out = co.transform (
             str1, str1 + sizeof str1 / sizeof *str1 - 1);

         // get the size of the buffer needed to hold the
         // transformed string (with the terminating NUL)
         std::size_t size = 1U + c_xfrm (0, str1, 0);

         // prevent errors caused by huge return values (e.g., MSVC)
         if (size > STR_SIZE * 64)
             size = 0;

         string_type c_out;

         if (size) {
             c_out.resize (size);

             // call the C-library transform function
             size = c_xfrm (&c_out [0], str1, size);

             if (size > STR_SIZE * 64)
                 size = 0;

             // shrink to fit (chop off the terminating NUL)
             c_out.resize (size);
         }

         if (out != c_out)
             ++nfail [0];

         // make sure the output is the same
         rw_assert (out == c_out, __FILE__, __LINE__,
                    "%d. collate<%s>::transform(%{*.*Ac}, ...) "
                    "== %{*.*Ac}, got %{*.*Ac} in locale(\"%s\")",
                    nloops, charTname,
                    sizeof (charT), sizeof str1 / sizeof *str1, str1,
                    sizeof (charT), c_out.size (), c_out.c_str (),
                    sizeof (charT),   out.size (),   out.c_str (),
                    locname);

         // now call compare on the two generated strings
         int ret1 = co.compare (
             str1, str1 + sizeof str1 / sizeof *str1,
             str2, str2 + sizeof str2 / sizeof *str2);

         // call the C-library comparison function
         int ret2 = c_strcoll (str1, str2);

         if (ret1 != ret2)
             ++nfail [1];

         // make sure the results are the same
         rw_assert (ret1 == ret2, __FILE__, __LINE__,
                    "%d. collate<%s>::compare(%{*.*Ac}, ..., "
                    "%{*.*Ac}, ...) == %d, got %d in locale(\"%s\")",
                    nloops, charTname,
                    sizeof (charT), sizeof str1 / sizeof *str1, str1,
                    sizeof (charT), sizeof str2 / sizeof *str2, str2,
                    ret2, ret1, locname);

         // two strings that compare identically must hash
         // identically as well.  Calling hash on the same string is
         // not very conclusive but generating strings that have exactly
         // the same weights is not possible without knowing all the
         // weight orderings
         const long hashNum1 =
             co.hash (str1, str1 + sizeof str1 / sizeof *str1);

         const long hashNum2 =
             co.hash (str1, str1 + sizeof str1 / sizeof *str1);

         if (hashNum1 != hashNum2)
             ++nfail [2];

         rw_assert (hashNum1 == hashNum2, __FILE__, __LINE__,
                    "%d. collate<%s>::hash(%{*.*Ac}, ...) == %d, "
                    "got %d in locale(\"%s\")",
                    nloops, charTname,
                    sizeof (charT), sizeof str1 / sizeof *str1, str1,
                    hashNum1, hashNum2, locname);
     }
 }

 template <class charT>
 void
 check_libc (const char* charTname)
 {
     // the libc implementation of the library should act the same as
     // the c-library.  Go through all the locales, generate some random
     // strings and make sure that the following holds true:
     // transform acts like strxfrm and wcsxfrm,
     // compare acts like strcoll and wcscoll

     rw_info (0, __FILE__, __LINE__,
              "libc std::collate<%s>::transform ()", charTname);

     rw_info (0, __FILE__, __LINE__,
              "libc std::collate<%s>::compare ()", charTname);

     rw_info (0, __FILE__, __LINE__,
              "std::collate<%s>::hash ()", charTname);

     int nfail [3] = { 0 };
     char curlocname [256];

     for (const char* locname = rw_locales (LC_COLLATE);
          *locname; locname += std::strlen (locname) + 1) {

         std::strcpy (curlocname, std::setlocale (LC_COLLATE, 0));

         if (0 == std::setlocale (LC_COLLATE, locname))
             continue;

         int max = MB_CUR_MAX;

         if (max == 1) {
             //
             // FIXME test variable length multibyte encodings
             //
             _TRY {
                 check_libc_locale<charT> (charTname, locname, nfail);
             }
             _CATCH (...) {
                 rw_assert (false, __FILE__, __LINE__,
                            "locale(\"%s\") threw an exception",
                            locname);
             }
         }

         std::setlocale (LC_COLLATE, curlocname);
     }


     rw_assert (0 == nfail [0], __FILE__, __LINE__,
                "collate<%s>::transform () failed %d times",
                charTname, nfail [0]);

     rw_assert (0 == nfail [1], __FILE__, __LINE__,
                "collate<%s>::compare () failed %d times",
                charTname, nfail [1]);

     rw_assert (0 == nfail [2], __FILE__, __LINE__,
                "collate<%s>::hash () failed %d times",
                charTname, nfail [2]);
 }

 /**************************************************************************/

 static const char*
 make_test_locale ()
 {
     // Create a synthetic locale to exercises as many different parts
     // of the collate standard as possible.

     static const char charmap [] = {
         //
         // The portable character set
         //
         "<code_set_name> \"UTF-8\"\n"
         "<mb_cur_max> 1\n<mb_cur_min> 1\n"
         "CHARMAP\n"
         "<NUL> \\x00\n<SOH> \\x01\n<STX> \\x02\n<ETX> \\x03\n<EOT> \\x04\n"
         "<ENQ> \\x05\n<ACK> \\x06\n<BEL> \\x07\n"
         "<backspace> \\x08\n<tab> \\x09\n<newline> \\x0a\n"
         "<vertical-tab> \\x0b\n<form-feed> \\x0c\n"
         "<carriage-return> \\x0d\n"
         "<SO> \\x0e\n<SI> \\x0f\n<DLE> \\x10\n<DC1> \\x11\n<DC2> \\x12\n"
         "<DC3> \\x13\n<DC4> \\x14\n<NAK> \\x15\n<SYN> \\x16\n<ETB> \\x17\n"
         "<CAN> \\x18\n<EM> \\x19\n<SUB> \\x1a\n<ESC> \\x1b\n<IS4> \\x1c\n"
         "<IS3> \\x1d\n<IS2> \\x1e\n<IS1> \\x1f\n"
         "<space> \\x20\n"
         "<exclamation-mark> \\x21\n"
         "<quotation-mark> \\x22\n"
         "<number-sign> \\x23\n"
         "<dollar-sign> \\x24\n"
         "<percent-sign> \\x25\n"
         "<ampersand> \\x26\n"
         "<apostrophe> \\x27\n"
         "<left-parenthesis> \\x28\n"
         "<right-parenthesis> \\x29\n"
         "<asterisk> \\x2a\n"
         "<plus-sign> \\x2b\n"
         "<comma> \\x2c\n"
         "<hyphen> \\x2d\n"
         "<period> \\x2e\n"
         "<slash> \\x2f\n"
         "<zero> \\x30\n<one> \\x31\n<two> \\x32\n<three> \\x33\n"
         "<four> \\x34\n<five> \\x35\n<six> \\x36\n<seven> \\x37\n"
         "<eight> \\x38\n<nine> \\x39\n"
         "<colon> \\x3a\n"
         "<semicolon> \\x3b\n"
         "<less-than-sign> \\x3c\n"
         "<equals-sign> \\x3d\n"
         "<greater-than-sign> \\x3e\n"
         "<question-mark> \\x3f\n"
         "<commercial-at> \\x40\n"
         "<A> \\x41\n<B> \\x42\n<C> \\x43\n<D> \\x44\n<E> \\x45\n<F> \\x46\n"
         "<G> \\x47\n<H> \\x48\n<I> \\x49\n<J> \\x4a\n<K> \\x4b\n<L> \\x4c\n"
         "<M> \\x4d\n<N> \\x4e\n<O> \\x4f\n<P> \\x50\n<Q> \\x51\n<R> \\x52\n"
         "<S> \\x53\n<T> \\x54\n<U> \\x55\n<V> \\x56\n<W> \\x57\n<X> \\x58\n"
         "<Y> \\x59\n<Z> \\x5a\n"
         "<left-square-bracket> \\x5b\n"
         "<backslash> \\x5c\n"
         "<right-square-bracket> \\x5d\n"
         "<circumflex> \\x5e\n"
         "<underscore> \\x5f\n"
         "<grave-accent> \\x60\n"
         "<a> \\x61\n<b> \\x62\n<c> \\x63\n<d> \\x64\n<e> \\x65\n<f> \\x66\n"
         "<g> \\x67\n<h> \\x68\n<i> \\x69\n<j> \\x6a\n<k> \\x6b\n<l> \\x6c\n"
         "<m> \\x6d\n<n> \\x6e\n<o> \\x6f\n<p> \\x70\n<q> \\x71\n<r> \\x72\n"
         "<s> \\x73\n<t> \\x74\n<u> \\x75\n<v> \\x76\n<w> \\x77\n<x> \\x78\n"
         "<y> \\x79\n<z> \\x7a\n"
         "<left-brace> \\x7b\n"
         "<vertical-line> \\x7c\n"
         "<right-brace> \\x7d\n"
         "<tilde> \\x7e\n"
         "<DEL> \\x7f\n"
         "END CHARMAP\n\n"
     };

     // create a temporary locale definition file that exercises as
     // many different parts of the collate standard as possible
     const char lc_collate [] = {
         "LC_COLLATE\n"
         "script <ALL_FORWARD>\n"
         "collating-element <er> from \"<e><r>\"\n"
         "collating-element <ic> from \"ic\"\n"
         "collating-symbol <LETTER>\n"
         "collating-symbol <COLLATING_ELEMENT>\n"
         "collating-symbol <DIGIT>\n"

         "order_start forward;backward;forward,position\n"
         "<LETTER>\n"
         "<COLLATING_ELEMENT>\n"
         "<DIGIT>\n"

         "<a> <a> <LETTER> IGNORE\n"
         "<b> <b> <LETTER> IGNORE\n"

         // "<c>" will have a non-ignored position ordering
         "<c> <c> <LETTER> <c>\n"

         // try giving "<d>" a many-to-one weight
         "<d> \"<d><a>\" <LETTER> IGNORE\n"

         // try giving "<e>" a decimal value weight
         "<e> \\d139 <LETTER> IGNORE\n"

         // try giving "<f>" an octal value weight
         "<f> \\36 <LETTER> IGNORE\n"

         // try giving "<g>" a hex value weight
         "<g> \\x3A <LETTER> IGNORE\n"

         "<zero> <zero> <DIGIT> IGNORE\n"
         "<one> <one> <DIGIT> <zero>\n"
         "<two> <two> <DIGIT> IGNORE\n"
         "<three> <three> <DIGIT> IGNORE\n"
         "<er> <a> <COLLATING_ELEMENT> IGNORE\n"

         // the <ic> collating element will be equivalent to the letter <c>
         "<ic> <c> <LETTER> <c>\n"
         "UNDEFINED IGNORE IGNORE IGNORE\n"

         "order_end\n"

         // define a section in which all of the orders are forward orders
         "order_start <ALL_FORWARD>;forward;forward;forward\n"
         "<h>\n<i>\n<j>\n<k>\n"
         "order_end\n"

         // reorder the elementes in the <ALL_FORWARD> section to appear
         // after the letter "<g>"
         "reorder-after <g>\n"
         "<h>\n<i>\n<j>\n<k>\n"

         // try to reorder "<a>" after "<b>"
         "reorder-after <b>\n"
         "<a> <a> <LETTER> IGNORE\n"
         "reorder-end\n"

         "\nEND LC_COLLATE\n"
     };

     return rw_create_locale (charmap, lc_collate);
 }

 /**************************************************************************/

 template <class charT>
 void
 test_hash (const char*, const std::collate<charT>&,
            const char*, const char*);

 template <class charT>
 void
 test_string (const char*, const std::collate<charT>&,
              const char*, const char*, int);

 template <class charT>
 void
 test_weight_val (const char*, const std::collate<charT>&,
                  charT, int, int, int, int, bool);

 template <class charT>
 void
 check_libstd_test_locale (const char* charTname)
 {
     rw_info (0, __FILE__, __LINE__,
              "libstd std::collate<%s>::transform () "
              "collate test database", charTname);
     rw_info (0, __FILE__, __LINE__,
              "libstd std::collate<%s>::compare () collate test "
              "database", charTname);
     rw_info (0, __FILE__, __LINE__,
              "libstd std::collate<%s>::hash () collate test "
              "database", charTname);

     const char* const locname = make_test_locale ();
     if (locname) {

         std::locale loc;

         _TRY {
             loc = std::locale (locname);
         }
         _CATCH (...) {
             const char* const var = std::getenv (LOCALE_ROOT);

             rw_assert (false, __FILE__, __LINE__,
                        "std::locale(\"%s\") unexpectedly threw "
                        "an exception; " LOCALE_ROOT "=%s",
                        locname, var ? var : "(null)");
             return;
         }

         const std::collate<charT> &co =
             _STD_USE_FACET (std::collate<charT>, loc);
         co._C_opts |=  co._C_use_libstd;
         co._C_opts &= ~co._C_use_libc;

 #define IGNORE 0

         // first lets make sure that each character was given the
         // correct weight for each level.

 #undef TEST
 #define TEST(ch, w0, w1, w2, w3, w3_is_fp)                              \
         test_weight_val (charTname, co, charT (ch), w0, w1, w2, w3, w3_is_fp)

         TEST ('a',       6, IGNORE,      2, IGNORE, true);
         TEST ('b',       5, IGNORE,      2, IGNORE, true);
         TEST ('c',       7, IGNORE,      2,      7, true);
         TEST ('d',       8,      6,      2, IGNORE, true);
         TEST ('e',     139, IGNORE,      2, IGNORE, true);
         TEST ('f',      30, IGNORE,      2, IGNORE, true);
         TEST ('g',      58, IGNORE,      2, IGNORE, true);
         TEST ('h',      12, IGNORE,     12,     12, false);
         TEST ('i',      13, IGNORE,     13,     13, false);
         TEST ('j',      14, IGNORE,     14,     14, false);
         TEST ('k',      15, IGNORE,     15,     15, false);
         TEST ('0',      16, IGNORE,      4, IGNORE, true);
         TEST ('1',      17, IGNORE,      4,     16, true);
         TEST ('2',      18, IGNORE,      4, IGNORE, true);
         TEST ('3',      19, IGNORE,      4, IGNORE, true);
         TEST ('l',  IGNORE, IGNORE, IGNORE, IGNORE, true);

         // make sure that strings collate the way we expect them to

         // a should collate greater then b
         test_string (charTname, co, "a", "b", 1) ;

         // the collating element "er" should collate after 'a' and 'b'
         // but before 'c'
         test_string (charTname, co, "er", "a", 1);
         test_string (charTname, co, "er", "b", 1);
         test_string (charTname, co, "er", "c", -1);

         // the collating element "ic" should be equivalent to the letter 'c'
         test_string (charTname, co, "ic", "c", 0);


         // two strings that compare identically must hash
         // identically as well.
         // since ic and c are equivalent elements string they should hash
         // the same
         test_hash (charTname, co, "c", "ic");
     }
     else
         rw_assert (false, __FILE__, __LINE__,
                    "unable to create a locale database");
 }

 /**************************************************************************/

 template <class charT>
 void
 test_hash (const char* charTname, const std::collate<charT>& co,
            const char* str1, const char* str2)
 {
     // convert narrow string to a (possibly) wide representation
     charT wstrbuf [256];
     charT wstrbuf2 [256];

     const charT* const wstr = widen (wstrbuf, str1);
     const charT* const wstr2 = widen (wstrbuf2, str2);

     long hashNum1 = co.hash (wstr, wstr + c_strlen (wstr));
     long hashNum2 = co.hash (wstr2, wstr2 + c_strlen (wstr2));

     if (hashNum1 != hashNum2) {
         rw_assert (false, __FILE__, __LINE__,
                    "collate<%s>::hash(%s, ...) returned %d and\n "
                    "collate<%s>::hash(%s, ...) returned %d",
                    charTname, str1,
                    hashNum1, charTname, str2, hashNum2);
     }
 }

 /**************************************************************************/

 template <class charT>
 void
 test_string (const char* charTname, const std::collate<charT>& co,
              const char* str1, const char* str2,
              int expected_val)
 {
     // convert narrow string to a (possibly) wide representation
     charT wstrbuf [256];
     charT wstrbuf2 [256];

     const charT* const wstr = widen (wstrbuf, str1);
     const charT* const wstr2 = widen (wstrbuf2, str2);

     int ret = co.compare (wstr, wstr + c_strlen (wstr),
                           wstr2, wstr2 +  c_strlen(wstr2));
     if (ret != expected_val)
         rw_assert (false, __FILE__, __LINE__,
                    "libstd std::collate<%s>::compare"
                    "(%s, ..., %s, ...) == %d, got %d",
                    charTname, str1, str2, expected_val, ret);
 }

 /**************************************************************************/

 template <class charT>
 void
 test_weight_val (const char* charTname, const std::collate<charT>& co,
                  charT ch, int w1a, int w1b, int w2, int w3, bool w3_is_fp)
 {
     int w [3][2] = { { w1a, w1b }, { w2, IGNORE }, { w3, IGNORE } };

     typedef std::char_traits<charT>                  Traits;
     typedef std::allocator<charT>                    Alloc;
     typedef std::basic_string <charT, Traits, Alloc> String;

     // construct an expected transformed string out of the weight arguments
     String expected;

     if (sizeof (charT) == sizeof (char)) {
         for (int i = 0; i < 3; ++i) {
             for (int k = 0; k < 2; ++k) {
                 if (w [i][k] != IGNORE) {
                     while (w [i][k] > _RWSTD_CHAR_MAX) {
                         expected += charT (_RWSTD_CHAR_MAX);
                         w [i][k] -= _RWSTD_CHAR_MAX;
                     }
                     expected += charT (w [i][k]);
                 }
                 else if (i == 2 && k == 0 && w3_is_fp)
                     expected += charT (_RWSTD_CHAR_MAX);
             }

             // mark the end of the pass
             expected += charT (1);
         }
     }
     else {
         for (int i = 0; i < 3; ++i) {
             for (int k = 0; k < 2; ++k) {
                 if (w [i][k] != IGNORE) {
                     expected += charT (w [i][k]);
                 }
                 else if (i == 2 && k == 0 && w3_is_fp)
                     expected += charT (_RWSTD_WCHAR_MAX);
             }

             expected += charT (1);
         }
     }

     // get the transformed string
     const String actual = co.transform (&ch, &ch + 1);

     // make sure the strings are equal
     rw_assert (expected == actual, __FILE__, __LINE__,
                "collate<%s>::transform (\"%c\", ...) == %{*.*Ac}, "
                "got %{*.*Ac}", charTname, ch, sizeof (charT),
                expected.size (), expected.c_str (), sizeof (charT),
                actual.size (), actual.c_str ());
 }

 /**************************************************************************/

 template <class charT>
 void
 check_libstd (const char* charTname)
 {
     rw_info (0, __FILE__, __LINE__,
              "libstd std::collate<%s>::transform () sorting "
              "file test", charTname);

     rw_info (0, __FILE__, __LINE__,
              "libstd std::collate<%s>::compare () sorting "
              "file test", charTname);


     // This test works by using a series of sorted input files
     // we randomize the words in the input files and sort them using
     // the proper locale's collate facet.  This test will automatically
     // generate the required locales.

     static const char* const locales[][3] = {
         //
         // +-- locale name
         // |        +-- character set
         // |        |             +-- input file name
         // |        |             |
         // V        V             V
         { "cs_CZ", "ISO-8859-2", "collate.cs_CZ.in" },   // Czech, Czech Rep.
         { "da_DK", "ISO-8859-1", "collate.da_DK.in" },   // Danish, Denmark
         { "en_US", "ISO-8859-1", "collate.en_US.in" },   // English, US
         { "hr_HR", "ISO-8859-2", "collate.hr_HR.in" },   // Hungarian, Hungary
         { "sv_SE", "ISO-8859-1", "collate.sv_SE.in" },   // Swedish, Sweden
         { "th_TH", "TIS-620",    "collate.th_TH.in" }    // Thai, Thailand
     };

     const std::size_t nlocales = sizeof locales / sizeof *locales;

     typedef std::char_traits<charT>                     Traits;
     typedef std::allocator<charT>                       Allocator;
     typedef std::basic_string<charT, Traits, Allocator> String;

     for (std::size_t i = 0; i < nlocales; ++i) {

         const char* const locname =
             rw_localedef ("-w --no_position",
                           locales [i][0], locales [i][1], 0);

         if (locname) {

             std::locale loc;

             _TRY {
                 loc = std::locale (locname);
             }
             _CATCH (...) {
                 const char* const var = std::getenv (LOCALE_ROOT);

                 rw_assert (false, __FILE__, __LINE__,
                            "std::locale(\"%s\") unexpectedly threw "
                            "an exception; " LOCALE_ROOT "=%s",
                            locname, var ? var : "(null)");
                 continue;
             }

             const std::collate<charT> &co =
                 _STD_USE_FACET (std::collate<charT>, loc);

             co._C_opts |= co._C_use_libstd;
             co._C_opts &= ~co._C_use_libc;

             typedef std::codecvt<charT, char, std::mbstate_t> CodeCvt;

             const CodeCvt &cvt = _STD_USE_FACET (CodeCvt, loc);

             cvt._C_opts |= cvt._C_use_libstd;
             cvt._C_opts &= ~cvt._C_use_libc;

             // 'in' holds the strings from the input file and is there
             // sorting will take place.
             String in [1000];

             // out holds the strings located in the output file
             String out [1000];

             const char* in_path = std::getenv ("TOPDIR");
             if (!in_path || !*in_path) {
                 std::fprintf (stderr, "TOPDIR not defined or empty");
                 std::exit (1);
             }

             std::string path (in_path);
             path += SLASH TESTS_ETC_PATH SLASH;
             path += locales [i][2];

             std::FILE* const f = std::fopen (path.c_str (), "r");
             if (!f) {
                 rw_assert (false, __FILE__, __LINE__,
                            "file \"%s\" could not be opened", path.c_str ());
                 break;
             }

             std::size_t j = 0;

             while (1) {

                 char next_line [256];

                 if (0 != std::fgets (next_line, 256, f)) {

                     std::size_t line_len = std::strlen (next_line);

                     // get rid of the newline character
                     next_line [--line_len] = '\0';

                     // convert from external to internal encoding
                     // (both of which might be the same type)
                     charT to [256];
                     const char* from_next;
                     charT*      to_next;

                     static std::mbstate_t initial;
                     std::mbstate_t mbs = initial;

                     const std::codecvt_base::result res =
                         cvt.in (mbs,
                                 next_line, next_line + line_len + 1,
                                 from_next,
                                 to, to + sizeof to / sizeof *to,
                                 to_next);

                     if (cvt.ok == res) {
                         in [j]  = to;
                         out [j] = to;
                     }
                     else if (cvt.noconv == res) {
                         in [j]  = (charT*)next_line;
                         out [j] = (charT*)next_line;
                     }

                     j++;
                 }
                 else
                     break;
             }
             // close the file
             std::fclose (f);

             // now bubble sort the items in the array
             std::size_t idx;
             std::size_t idx2;
             String tmp;
             String tmp2;

             bool flipped;

             if (j > 1) {
                 idx = 1;
                 do {
                     flipped = false;
                     for (idx2 = j - 1; idx2 >= idx; --idx2) {

                         const std::size_t idx1 = idx2 - 1;

                         if (co.compare (in [idx1].c_str (),
                                         in [idx1].c_str () + in [idx1].size (),
                                         in [idx2].c_str (),
                                         in [idx2].c_str () + in [idx2].size ())
                             > 0) {
                             in [idx1].swap (in [idx2]);
                             flipped = true;
                         }
                     }
                 } while (++idx < j && flipped);
             }

             // the items are sorted now lets make sure that they are sorted
             // the same way they are sorted in the output file.
             std::size_t nfail = 0;

             for (std::size_t k = 0; k < j; ++k) {

                 if (in [k] != out [k]) {

                     nfail++;

                     rw_assert (false, __FILE__, __LINE__,
                                "%{S} != %{S} at line %u of %s",
                                &out [k], &in [k],
                                k + 1, locales [i][2]);

                 }
             }

             rw_assert (!nfail, __FILE__, __LINE__,
                        "collate<%s>::compare() failed %d times",
                        charTname, nfail);
         }
     }
 }

 /**************************************************************************/


 template <class charT>
 void
 check_hash_eff (const char* charTname)
 {
     // test effectiveness of hash function
     rw_info (0, __FILE__, __LINE__,
              "std::collate<%s>::hash () -- effectiveness", charTname);

     // since the same hash algorithm is used for both byname and non-byname
     // facets, simply set up a std::locale that uses the "C" locale
     std::locale loc ("C");
     const std::collate<charT> &co =
         _STD_USE_FACET (std::collate<charT>, loc);


     int nfail = 0;

     charT s[100];
     bool next = true;

     // generate `N' unique strings and hash them, storing each value
     static const std::size_t N = 100;
     long hashed [N] = { 0 };

     std::size_t k;
     for (k = 1; k != N && next; ++k) {
         // generate a unique string
         gen_str (s, k);

         // compute hash value
         hashed [k] = co.hash (s, s + std::char_traits<charT>::length(s));
     }

     // sort hashed values, then remove all duplicates
     std::sort (hashed, hashed + k);
     k = std::unique (hashed, hashed + k) - hashed;

     // assert that the probability of a collision is less than 1%
     // according to 22.2.4.1, p3, the likelihood should be very small,
     // approaching 1.0 / numeric_limits<unsigned long>::max()
     if (N - k > N /100) {
         nfail++;
         rw_assert (false, __FILE__, __LINE__,
                    "collate<%s>::do_hash (const char_type*, "
                    "const char_type*); "
                    "probability of collision %f",
                    charTname, double (N - k) / N);
     }

     rw_assert (!nfail, __FILE__, __LINE__,
                "collate<%s>::do_hash () failed %d times", charTname,
                nfail);

 }

 /**************************************************************************/

 template <class charT>
 void
 check_NUL_collate (const char* charTname, const char* locname,
                    const charT* s1, size_t s1_len,
                    const charT* s2, size_t s2_len)
 {
     std::locale loc (locname);

     typedef typename std::collate<charT> Collate;
     typedef typename Collate::string_type String;

     const Collate &col = std::use_facet<Collate> (loc);

     const String x1 = col.transform (s1, s1 + s1_len);
     const String x2 = col.transform (s2, s2 + s2_len);

     const int colcmp = col.compare (s1, s1 + s1_len, s2, s2 + s2_len);

     int lexcmp = x1.compare (x2);
     lexcmp = lexcmp < -1 ? -1 : 1 < lexcmp ? 1 : lexcmp;

     rw_assert (colcmp == lexcmp, __FILE__, __LINE__,
                "collate<%s>::compare (%{*.*Ac}, %{*.*Ac}) = %d, "
                "lexicographical comparison of transformed strings = %d, "
                "mismatch in locale (\"%s\")", charTname,
                sizeof (charT), s1_len, s1,
                sizeof (charT), s2_len, s2,
                colcmp, lexcmp, locname);

     const bool eq =
         std::string (s1, s1 + s1_len) ==
         std::string (s2, s2 + s2_len);

     rw_assert (bool (colcmp) != eq, __FILE__, __LINE__,
                "collate<%s>::compare (%{*.*Ac}, %{*.*Ac}) = %d, "
                "lexicographical compare = %s, mismatch in locale (\"%s\")",
                charTname,
                sizeof (charT), s1_len, s1,
                sizeof (charT), s2_len, s2, colcmp,
                (eq ? "true" : "false"), locname);
 }

 static void
 check_NUL_collate (const char* charTname, const char* locname, char)
 {
 #define T(s, t)                                     \
     check_NUL_collate (charTname, locname,          \
                        s, sizeof s / sizeof *s - 1, \
                        t, sizeof t / sizeof *t - 1)

     T ("", "");
     T ("", "\0");
     T ("", "\0\0");
     T ("\0", "");
     T ("\0", "\0");
     T ("\0", "\0\0");
     T ("a", "\0");
     T ("a", "\0a");
     T ("a", "a\0");
     T ("a", "a\0\0");
     T ("a\0", "a");
     T ("a\0", "a\0");
     T ("a\0", "a\0\0");
     T ("\0a", "");
     T ("\0a", "\0");
     T ("\0a", "\0a");
     T ("\0a", "\0a\0");
     T ("a\0\0b", "");
     T ("a\0\0b", "a");
     T ("a\0\0b", "ab");
     T ("a\0\0b", "a\0");
     T ("a\0\0b", "a\0\0");
     T ("a\0\0b", "a\0b");
     T ("a\0\0b", "a\0\0b");
 }

 #if !defined (_RWSTD_NO_WCHAR_T)

 static void
 check_NUL_collate (const char* charTname, const char* locname, wchar_t)
 {
     T (L"", L"");
     T (L"", L"\0");
     T (L"", L"\0\0");
     T (L"\0", L"");
     T (L"\0", L"\0");
     T (L"\0", L"\0\0");
     T (L"a", L"\0");
     T (L"a", L"\0a");
     T (L"a", L"a\0");
     T (L"a", L"a\0\0");
     T (L"a\0", L"a");
     T (L"a\0", L"a\0");
     T (L"a\0", L"a\0\0");
     T (L"\0a", L"");
     T (L"\0a", L"\0");
     T (L"\0a", L"\0a");
     T (L"\0a", L"\0a\0");
     T (L"a\0\0b", L"");
     T (L"a\0\0b", L"a");
     T (L"a\0\0b", L"ab");
     T (L"a\0\0b", L"a\0");
     T (L"a\0\0b", L"a\0\0");
     T (L"a\0\0b", L"a\0b");
     T (L"a\0\0b", L"a\0\0b");
     T (L"a\0\0b\0", L"a\0\0b");
     T (L"a\0\0b\0\0", L"a\0\0b");
     T (L"a\0\0b\0\0", L"a\0\0b\0");
     T (L"a\0\0b\0\0", L"a\0\0bc");

 #undef T
 }

 #endif // _RWSTD_NO_WCHAR_T

 template <class charT>
 void
 check_NUL (const char* charTname)
 {
     // Verify that the collate facet correctly handles character
     // sequences with embedded NULs.

     rw_info (0, __FILE__, __LINE__,
              "std::collate<%s>::compare () with embedded NUL's", charTname);

     size_t i = 0;

     for (const char* locname = rw_locales (LC_COLLATE);
          *locname; locname += std::strlen (locname) + 1) {
         try {
             check_NUL_collate (charTname, locname, charT ());
         }
         catch (...) {
         }
     }
 }

 /**************************************************************************/

 template <class charT>
 void
 do_test (const char* charTname)
 {
     check_libstd_test_locale<charT> (charTname);
     check_libstd<charT> (charTname);
     check_libc<charT> (charTname);
     check_NUL<charT> (charTname);
     check_hash_eff<charT> (charTname);
 }

 static int
 run_test (int /*argc*/, char* /*argv*/ [])
 {
     do_test<char> ("char");

 #if !defined (_RWSTD_NO_WCHAR_T)
     do_test<wchar_t> ("wchar_t");
 #endif   // _RWSTD_NO_WCHAR_T

     return 0;
 }

 int
 main (int argc, char* argv [])
 {
     return rw_test (argc, argv, __FILE__,
                     "[lib.category.collate]",
                     "22.2.4 The collate category",
                     run_test, "", 0);
 }