2012-11-4 Liviu Nicoara <lnicoara@apache.org>
Fixes to collate facet and test enhancements:
* src/collate.cpp (__rw_strnxfrm): corrected processing of
embedded NULs. (__rw_wcsnxfrm) same (duplicated code).
(collate_byname<wchar_t>::do_compare): fixed string comparison
return values, re-implemented the wcscoll-based comparison.
* tests/localization/22.locale.collate.cpp: implemented a simpler
collation test for strings with embedded NULs.
git-svn-id: https://svn.apache.org/repos/asf/stdcxx/branches/4.2.x@1405545 13f79535-47bb-0310-9956-ffa450edef68
diff --git a/src/collate.cpp b/src/collate.cpp
index a08f1c9..d615df3 100644
--- a/src/collate.cpp
+++ b/src/collate.cpp
@@ -488,99 +488,100 @@
while (nchars) {
- // using a C-style cast instead of static_cast to avoid
- // a gcc 2.95.2 bug causing an error on some platforms:
- // static_cast from `void *' to `const char *'
- const char* const last = (const char*)memchr (src, '\0', nchars);
+ if (src [0]) {
- if (0 == last) {
+ // using a C-style cast instead of static_cast to avoid
+ // a gcc 2.95.2 bug causing an error on some platforms:
+ // static_cast from `void *' to `const char *'
+ const char* const last = (const char*)memchr (src, '\0', nchars);
- // no NUL found in the initial portion of the source string
- // that fits into the local temporary buffer; copy as many
- // characters as fit into the buffer
+ if (0 == last) {
- if (bufsize <= nchars) {
- if (pbuf != buf)
- delete[] pbuf;
- pbuf = new char [nchars + 1];
+ // no NUL found in the initial portion of the source string
+ // that fits into the local temporary buffer; copy as many
+ // characters as fit into the buffer
+
+ if (bufsize <= nchars) {
+ if (pbuf != buf)
+ delete[] pbuf;
+ pbuf = new char [nchars + 1];
+ }
+
+ psrc = pbuf;
+ memcpy (psrc, src, nchars);
+
+ // append a terminating NUL and decrement the number
+ // of characters that remain to be processed
+ psrc [nchars] = '\0';
+ src += nchars;
+ nchars = 0;
+ }
+ else {
+ // terminating NUL found in the source buffer
+ nchars -= (last - src) + 1;
+ psrc = _RWSTD_CONST_CAST (char*, src);
+ src += (last - src) + 1;
}
- psrc = pbuf;
- memcpy (psrc, src, nchars);
+#ifdef _RWSTD_OS_SUNOS
+ // Solaris 10u5 on AMD64 overwrites memory past the end of
+ // just_in_case_buf[8], to avoid this, pass a null pointer
+ char* const just_in_case_buf = 0;
+#else
+ // provide a destination buffer to strxfrm() in case
+ // it's buggy (such as MSVC's) and tries to write to
+ // the buffer even if it's 0
+ char just_in_case_buf [8];
+#endif // _RWSTD_OS_SUNOS
- // append a terminating NUL and decrement the number
- // of characters that remain to be processed
- psrc [nchars] = '\0';
- src += nchars;
- nchars = 0;
+ const size_t dst_size = strxfrm (just_in_case_buf, psrc, 0);
+
+ // check for strxfrm() errors
+ if (0 == (dst_size << 1)) {
+ if (pbuf != buf)
+ delete[] pbuf;
+
+ return _STD::string ();
+ }
+
+ size_t res_size = res.size ();
+
+ _TRY {
+ // resize the result string to fit itself plus the result
+ // of the transformation including the terminating NUL
+ // appended by strxfrm()
+ res.resize (res_size + dst_size + 1);
+ }
+ _CATCH (...) {
+ if (pbuf != buf)
+ delete[] pbuf;
+ _RETHROW;
+ }
+
+ strxfrm (&res [0] + res_size, psrc, dst_size + 1);
+ res.resize (res.size () - !last);
}
else {
- // terminating NUL found in the source buffer
- nchars -= (last - src) + 1;
- psrc = _RWSTD_CONST_CAST (char*, src);
- src += (last - src) + 1;
- }
+ // count and append the consecutive NULs embedded in the
+ // input string
-#ifdef _RWSTD_OS_SUNOS
- // Solaris 10u5 on AMD64 overwrites memory past the end of
- // just_in_case_buf[8], to avoid this, pass a null pointer
- char* const just_in_case_buf = 0;
-#else
- // provide a destination buffer to strxfrm() in case
- // it's buggy (such as MSVC's) and tries to write to
- // the buffer even if it's 0
- char just_in_case_buf [8];
-#endif
+ size_t i = 0;
+ for (; i < nchars && 0 == src [i]; ++i) ;
- const size_t dst_size = strxfrm (just_in_case_buf, psrc, 0);
+ _TRY {
+ // resize the result string to fit itself plus the
+ // embedded NULs
+ res.resize (res.size () + i);
+ }
+ _CATCH (...) {
+ if (pbuf != buf)
+ delete[] pbuf;
+ _RETHROW;
+ }
- // check for strxfrm() errors
- if (0 == (dst_size << 1)) {
- if (pbuf != buf)
- delete[] pbuf;
-
- return _STD::string ();
- }
-
- size_t res_size = res.size ();
-
- _TRY {
- // resize the result string to fit itself plus the result
- // of the transformation including the terminatin NUL
- // appended by strxfrm()
- res.resize (res_size + dst_size + 1);
- }
- _CATCH (...) {
- if (pbuf != buf)
- delete[] pbuf;
- _RETHROW;
- }
-
- // transfor the source string up to the terminating NUL
- size_t xfrm_size =
- strxfrm (&res [0] + res_size, psrc, dst_size + 1);
-
-#if defined _MSC_VER && _MSC_VER < 1400
- // compute the correct value that should have been returned from
- // strxfrm() after the transformation has completed (MSVC strxfrm()
- // returns a bogus result; see PR #29935)
- xfrm_size = strlen (&res [0] + res_size);
-#endif // MSVC < 8.0
-
- // increment the size of the result string by the number
- // of transformed characters excluding the terminating NUL
- // if strxfrm() transforms the empty string into the empty
- // string, keep the terminating NUL, otherwise drop it
- res_size += xfrm_size + (last && !*psrc && !xfrm_size);
-
- _TRY {
- res.resize (res_size);
- }
- _CATCH (...) {
- if (pbuf != buf)
- delete[] pbuf;
- _RETHROW;
+ nchars -= i;
+ src += i;
}
}
@@ -702,99 +703,102 @@
while (nchars) {
- typedef _STD::char_traits<wchar_t> Traits;
+ if (src [0]) {
- const wchar_t* const last = Traits::find (src, nchars, L'\0');
+ typedef _STD::char_traits<wchar_t> Traits;
- if (0 == last) {
+ const wchar_t* const last = Traits::find (src, nchars, L'\0');
- // no NUL found in the initial portion of the source string
- // that fits into the local temporary buffer; copy as many
- // characters as fit into the buffer
+ if (0 == last) {
- if (bufsize <= nchars) {
- if (pbuf != buf)
- delete[] pbuf;
- pbuf = new wchar_t [nchars + 1];
+ // no NUL found in the initial portion of the source string
+ // that fits into the local temporary buffer; copy as many
+ // characters as fit into the buffer
+
+ if (bufsize <= nchars) {
+ if (pbuf != buf)
+ delete[] pbuf;
+ pbuf = new wchar_t [nchars + 1];
+ }
+
+ psrc = pbuf;
+ memcpy (psrc, src, nchars * sizeof *psrc);
+
+ // append a terminating NUL and decrement the number
+ // of characters that remain to be processed
+ psrc [nchars] = 0;
+ src += nchars;
+ nchars = 0;
+ }
+ else {
+
+ // terminating NUL found in the source buffer
+ nchars -= (last - src) + 1;
+ psrc = _RWSTD_CONST_CAST (wchar_t*, src);
+ src += (last - src) + 1;
}
- psrc = pbuf;
- memcpy (psrc, src, nchars * sizeof *psrc);
+#ifdef _RWSTD_OS_SUNOS
+ // just in case Solaris wcsxfrm() has the same bug
+ // as its strxfrm() (see above)
+ wchar_t* const just_in_case_buf = 0;
+#else
+ // provide a destination buffer to strxfrm() in case
+ // it's buggy (such as MSVC's) and tries to write to
+ // the buffer even if it's 0
+ wchar_t just_in_case_buf [8];
+#endif
- // append a terminating NUL and decrement the number
- // of characters that remain to be processed
- psrc [nchars] = 0;
- src += nchars;
- nchars = 0;
+ const size_t dst_size =
+ _RWSTD_WCSXFRM (just_in_case_buf, psrc, 0);
+
+ // check for wcsxfrm() errors
+ if (_RWSTD_SIZE_MAX == dst_size) {
+ if (pbuf != buf)
+ delete[] pbuf;
+
+ return _STD::wstring ();
+ }
+
+ size_t res_size = res.size ();
+
+ _TRY {
+ // resize the result string to fit itself plus the result
+ // of the transformation including the terminatin NUL
+ // appended by strxfrm()
+ res.resize (res_size + dst_size + 1);
+ }
+ _CATCH (...) {
+ if (pbuf != buf)
+ delete[] pbuf;
+ _RETHROW;
+ }
+
+ // transform the source string up to the terminating NUL
+ _RWSTD_WCSXFRM (&res [0] + res_size, psrc, dst_size + 1);
+ res.resize (res.size () - !last);
}
else {
- // terminating NUL found in the source buffer
- nchars -= (last - src) + 1;
- psrc = _RWSTD_CONST_CAST (wchar_t*, src);
- src += (last - src) + 1;
- }
+ // count and append the consecutive NULs embedded in the
+ // input string
-#ifdef _RWSTD_OS_SUNOS
- // just in case Solaris wcsxfrm() has the same bug
- // as its strxfrm() (see above)
- wchar_t* const just_in_case_buf = 0;
-#else
- // provide a destination buffer to strxfrm() in case
- // it's buggy (such as MSVC's) and tries to write to
- // the buffer even if it's 0
- wchar_t just_in_case_buf [8];
-#endif
+ size_t i = 0;
+ for (; i < nchars && 0 == src [i]; ++i) ;
- const size_t dst_size =
- _RWSTD_WCSXFRM (just_in_case_buf, psrc, 0);
+ _TRY {
+ // resize the result string to fit itself plus the
+ // embedded NULs
+ res.resize (res.size () + i);
+ }
+ _CATCH (...) {
+ if (pbuf != buf)
+ delete[] pbuf;
+ _RETHROW;
+ }
- // check for wcsxfrm() errors
- if (_RWSTD_SIZE_MAX == dst_size) {
- if (pbuf != buf)
- delete[] pbuf;
-
- return _STD::wstring ();
- }
-
- size_t res_size = res.size ();
-
- _TRY {
- // resize the result string to fit itself plus the result
- // of the transformation including the terminatin NUL
- // appended by strxfrm()
- res.resize (res_size + dst_size + 1);
- }
- _CATCH (...) {
- if (pbuf != buf)
- delete[] pbuf;
- _RETHROW;
- }
-
- // transfor the source string up to the terminating NUL
- size_t xfrm_size =
- _RWSTD_WCSXFRM (&res [0] + res_size, psrc, dst_size + 1);
-
-# if defined _MSC_VER && _MSC_VER < 1400
- // compute the correct value that should have been returned from
- // strxfrm() after the transformation has completed (MSVC strxfrm()
- // returns a bogus result; see PR #29935)
- xfrm_size = Traits::length (&res [0] + res_size);
-# endif // MSVC < 8.0
-
- // increment the size of the result string by the number
- // of transformed characters excluding the terminating NUL
- // if strxfrm() transforms the empty string into the empty
- // string, keep the terminating NUL, otherwise drop it
- res_size += xfrm_size + (last && !*psrc && !xfrm_size);
-
- _TRY {
- res.resize (res_size);
- }
- _CATCH (...) {
- if (pbuf != buf)
- delete[] pbuf;
- _RETHROW;
+ nchars -= i;
+ src += i;
}
}
@@ -1136,43 +1140,94 @@
const string_type s2 = do_transform (low2, high2);
// FIXME: optimize
- return s1.compare (s2);
+ const int cmp = s1.compare (s2);
+
+ // adjust return value
+ return cmp < 0 ? -1 : cmp ? 1 : 0;
}
#ifndef _RWSTD_NO_WCSCOLL
// use the system C library to compare the strings
-
_RW::__rw_setlocale clocale (this->_C_name, _RWSTD_LC_COLLATE);
- const size_t len1 = high1 - low1;
- const size_t len2 = high2 - low2;
- const size_t len = len1 + len2;
+ size_t len1 = high1 - low1;
+ size_t len2 = high2 - low2;
- // small local buffer
- wchar_t local_buffer [256];
- const size_t bufsize = sizeof local_buffer / sizeof *local_buffer;
+ if (0 == len1 || 0 == len2)
+ return len1 ? 1 : len2 ? -1 : 0;
- // allocate only if local buffer is too small
- wchar_t* const wbuf =
- len + 2 >= bufsize ? new wchar_t [len + 2] : local_buffer;
+ // attempt to use a small buffer
+ wchar_t wbuf [256], *pwbuf = wbuf;
+ const size_t bufsize = sizeof wbuf / sizeof *wbuf;
- // copy and null-terminate first sequence
- char_traits<wchar_t>::copy (wbuf, low1, len1);
- wbuf [len1] = '\0';
+ wchar_t* pwbuf1 = high1 [-1] ? wbuf : const_cast< wchar_t* > (low1);
+ wchar_t* pwbuf2 = high2 [-1] ? wbuf : const_cast< wchar_t* > (low2);
+
+ size_t len =
+ (pwbuf1 == wbuf ? (len1 + 1) : 0) +
+ (pwbuf2 == wbuf ? (len2 + 1) : 0);
- // append and null-terminate first sequence
- char_traits<wchar_t>::copy (wbuf + len1 + 1, low2, len2);
- wbuf [len1 + 1 + len2] = '\0';
+ if (len >= bufsize)
+ pwbuf = new wchar_t [len];
+
+ wchar_t* ptmp = pwbuf;
- // compare sequences using wcscoll()
- const int result = wcscoll (wbuf, wbuf + len1 + 1);
+ // only copy non NUL-terminated buffers
+ if (pwbuf1 == wbuf) {
+ pwbuf1 = pwbuf;
- // deallocate only if allocated
- if (wbuf != local_buffer)
- delete[] wbuf;
+ // append and null-terminate first sequence
+ char_traits<wchar_t>::copy (pwbuf1, low1, len1);
+ pwbuf1 [len1] = '\0';
- return result ? result > 0 ? 1 : -1 : 0;
+ ptmp = pwbuf + len1 + 1;
+ }
+
+ if (pwbuf2 == wbuf) {
+ pwbuf2 = ptmp;
+
+ // append and null-terminate second sequence
+ char_traits<wchar_t>::copy (pwbuf2, low2, len2);
+ pwbuf2 [len2] = '\0';
+ }
+
+ int cmp = 0;
+
+ for (; len1 && len2;) {
+
+ for (; len1 && len2 && 0 == pwbuf1 [0] && 0 == pwbuf2 [0];
+ ++pwbuf1, ++pwbuf2, --len1, --len2) ;
+
+ // compare sequences using wcscoll, stopping at first NUL
+ cmp = wcscoll (pwbuf1, pwbuf2);
+
+ if (cmp) {
+ if (pwbuf != wbuf)
+ delete [] pwbuf;
+ return cmp > 0 ? 1 : -1;
+ }
+
+ // if they compared equal, they may have embedded NULs
+ size_t n = _RWSTD_WCSLEN (pwbuf1);
+
+ len1 -= n;
+ pwbuf1 += n;
+
+ n = _RWSTD_WCSLEN (pwbuf2);
+
+ len2 -= n;
+ pwbuf2 += n;
+ }
+
+ // adjust return value
+ if (0 == cmp)
+ cmp = len1 ? 1 : len2 ? -1 : 0;
+
+ if (pwbuf != wbuf)
+ delete [] pwbuf;
+
+ return cmp;
#else // if defined (_RWSTD_NO_WCSCOLL)
@@ -1180,7 +1235,10 @@
const string_type s1 = do_transform (low1, high1);
const string_type s2 = do_transform (low2, high2);
- return s1.compare (s2);
+ const int cmp = s1.compare (s2);
+
+ // adjust return value
+ return cmp < 0 ? -1 : cmp ? 1 : 0;
#endif // _RWSTD_NO_WCSCOLL
diff --git a/tests/localization/22.locale.collate.cpp b/tests/localization/22.locale.collate.cpp
index 4051c45..a8ba71e 100644
--- a/tests/localization/22.locale.collate.cpp
+++ b/tests/localization/22.locale.collate.cpp
@@ -116,7 +116,7 @@
return dst;
}
-#ifndef _RWSTD_NO_WCHAR_T
+#if !defined (_RWSTD_NO_WCHAR_T)
int c_strcoll (const wchar_t* s1, const wchar_t* s2)
{
@@ -1029,65 +1029,119 @@
template <class charT>
void
-check_NUL_locale (const char* charTname, const char* locname)
+check_NUL_collate (const char* charTname, const char* locname,
+ const charT* s1, size_t s1_len,
+ const charT* s2, size_t s2_len)
{
std::locale loc (locname);
- charT s [STR_SIZE];
- gen_str (s, STR_SIZE);
-
- charT buf [2][STR_SIZE];
-
- std::memcpy (buf [0], s, sizeof s);
- std::memcpy (buf [1], s, sizeof s);
-
- //
- // Verify that first buffer compares more:
- // |--------0----| = buf [0]
- // |----0--------| = buf [1]
- //
- buf [0][4] = charT ();
- buf [1][3] = charT ();
-
- typedef std::collate<charT> Collate;
+ typedef typename std::collate<charT> Collate;
+ typedef typename Collate::string_type String;
const Collate &col = std::use_facet<Collate> (loc);
- int cmp = col.compare (
- buf [0], buf [0] + sizeof buf [0] / sizeof *buf [0],
- buf [1], buf [1] + sizeof buf [1] / sizeof *buf [1]);
+ const String x1 = col.transform (s1, s1 + s1_len);
+ const String x2 = col.transform (s2, s2 + s2_len);
- rw_assert (cmp > 0, __FILE__, __LINE__,
- "collate<%s>::compare (%{*.*Ac}, %{*.*Ac}) "
- " > 0, failed in locale (\"%s\")", charTname,
- sizeof (charT), sizeof buf [0] / sizeof *buf [0], buf [0],
- sizeof (charT), sizeof buf [1] / sizeof *buf [1], buf [1],
- locname);
+ const int colcmp = col.compare (s1, s1 + s1_len, s2, s2 + s2_len);
- std::memcpy (buf [0], s, sizeof s);
- std::memcpy (buf [1], s, sizeof s);
+ int lexcmp = x1.compare (x2);
+ lexcmp = lexcmp < -1 ? -1 : 1 < lexcmp ? 1 : lexcmp;
+
+ rw_assert (colcmp == lexcmp, __FILE__, __LINE__,
+ "collate<%s>::compare (%{*.*Ac}, %{*.*Ac}) = %d, "
+ "lexicographical comparison of transformed strings = %d, "
+ "mismatch in locale (\"%s\")", charTname,
+ sizeof (charT), s1_len, s1,
+ sizeof (charT), s2_len, s2,
+ colcmp, lexcmp, locname);
- //
- // Verify that first compare less:
- // |----0---0----| = buf [0]
- // |----0--------| = buf [1]
- //
- buf [0][3] = charT ();
- buf [0][5] = charT ();
- buf [1][3] = charT ();
+ const bool eq =
+ std::string (s1, s1 + s1_len) ==
+ std::string (s2, s2 + s2_len);
- cmp = col.compare (
- buf [0], buf [0] + sizeof buf [0] / sizeof *buf [0],
- buf [1], buf [1] + sizeof buf [1] / sizeof *buf [1]);
-
- rw_assert (cmp < 0, __FILE__, __LINE__,
- "collate<%s>::compare (%{*.*Ac}, ..., %{*.*Ac}, ...) "
- " < 0, failed in locale (\"%s\")", charTname,
- sizeof (charT), sizeof buf [0] / sizeof *buf [0], buf [0],
- sizeof (charT), sizeof buf [1] / sizeof *buf [1], buf [1],
- locname);
+ rw_assert (bool (colcmp) != eq, __FILE__, __LINE__,
+ "collate<%s>::compare (%{*.*Ac}, %{*.*Ac}) = %d, "
+ "lexicographical compare = %s, mismatch in locale (\"%s\")",
+ charTname,
+ sizeof (charT), s1_len, s1,
+ sizeof (charT), s2_len, s2, colcmp,
+ (eq ? "true" : "false"), locname);
}
+static void
+check_NUL_collate (const char* charTname, const char* locname, char)
+{
+#define T(s, t) \
+ check_NUL_collate (charTname, locname, \
+ s, sizeof s / sizeof *s - 1, \
+ t, sizeof t / sizeof *t - 1)
+
+ T ("", "");
+ T ("", "\0");
+ T ("", "\0\0");
+ T ("\0", "");
+ T ("\0", "\0");
+ T ("\0", "\0\0");
+ T ("a", "\0");
+ T ("a", "\0a");
+ T ("a", "a\0");
+ T ("a", "a\0\0");
+ T ("a\0", "a");
+ T ("a\0", "a\0");
+ T ("a\0", "a\0\0");
+ T ("\0a", "");
+ T ("\0a", "\0");
+ T ("\0a", "\0a");
+ T ("\0a", "\0a\0");
+ T ("a\0\0b", "");
+ T ("a\0\0b", "a");
+ T ("a\0\0b", "ab");
+ T ("a\0\0b", "a\0");
+ T ("a\0\0b", "a\0\0");
+ T ("a\0\0b", "a\0b");
+ T ("a\0\0b", "a\0\0b");
+}
+
+#if !defined (_RWSTD_NO_WCHAR_T)
+
+static void
+check_NUL_collate (const char* charTname, const char* locname, wchar_t)
+{
+ T (L"", L"");
+ T (L"", L"\0");
+ T (L"", L"\0\0");
+ T (L"\0", L"");
+ T (L"\0", L"\0");
+ T (L"\0", L"\0\0");
+ T (L"a", L"\0");
+ T (L"a", L"\0a");
+ T (L"a", L"a\0");
+ T (L"a", L"a\0\0");
+ T (L"a\0", L"a");
+ T (L"a\0", L"a\0");
+ T (L"a\0", L"a\0\0");
+ T (L"\0a", L"");
+ T (L"\0a", L"\0");
+ T (L"\0a", L"\0a");
+ T (L"\0a", L"\0a\0");
+ T (L"a\0\0b", L"");
+ T (L"a\0\0b", L"a");
+ T (L"a\0\0b", L"ab");
+ T (L"a\0\0b", L"a\0");
+ T (L"a\0\0b", L"a\0\0");
+ T (L"a\0\0b", L"a\0b");
+ T (L"a\0\0b", L"a\0\0b");
+ T (L"a\0\0b\0", L"a\0\0b");
+ T (L"a\0\0b\0\0", L"a\0\0b");
+ T (L"a\0\0b\0\0", L"a\0\0b\0");
+ T (L"a\0\0b\0\0", L"a\0\0bc");
+
+#undef T
+}
+
+#endif // _RWSTD_NO_WCHAR_T
+
template <class charT>
void
check_NUL (const char* charTname)
@@ -1101,9 +1155,9 @@
size_t i = 0;
for (const char* locname = rw_locales (LC_COLLATE);
- *locname; locname += std::strlen (locname) + 1, ++i) {
+ *locname; locname += std::strlen (locname) + 1) {
try {
- check_NUL_locale<charT> (charTname, locname);
+ check_NUL_collate (charTname, locname, charT ());
}
catch (...) {
}
@@ -1128,14 +1182,13 @@
{
do_test<char> ("char");
-#if defined (_RWSTD_NO_WCHAR_T)
+#if !defined (_RWSTD_NO_WCHAR_T)
do_test<wchar_t> ("wchar_t");
#endif // _RWSTD_NO_WCHAR_T
return 0;
}
-
int
main (int argc, char* argv [])
{