blob: 16873ec27331a44d199c92376047288120c143b2 [file] [log] [blame]
/***************************************************************************
*
* collate.cpp - specializations of collate facet
*
* $Id$
*
***************************************************************************
*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed
* with this work for additional information regarding copyright
* ownership. The ASF licenses this file to you under the Apache
* License, Version 2.0 (the "License"); you may not use this file
* except in compliance with the License. You may obtain a copy of
* the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied. See the License for the specific language governing
* permissions and limitations under the License.
*
* Copyright 2001-2008 Rogue Wave Software, Inc.
*
**************************************************************************/
#define _RWSTD_LIB_SRC
#if 3 == __GNUG__ && 3 > __GNUC_MINOR__ \
|| 3 == __GNUC_MINOR__ && 1 >__GNUC_PATCHLEVEL__
// working around a gcc bug (PR #29570)
# include <rw/_config.h>
# ifndef _RWSTD_NO_EXTERN_TEMPLATE
# define _RWSTD_NO_EXTERN_TEMPLATE
# endif
#endif // gcc >= 3.0 && gcc < 3.3.1
#include <rw/_defs.h>
#include <limits> // for numeric_limits
#include <limits.h>
#include <stdlib.h> // for wcstombs()
#include <string.h> // for memchr(), memcpy()
#ifndef _RWSTD_NO_WCHAR_H
# include <wchar.h> // for wcscoll(), wcsxfrm(), wmemcmp()
#endif // _RWSTD_NO_WCHAR_H
#include <loc/_collate.h>
#include <loc/_locale.h>
#include <loc/_localedef.h>
#include "locale_body.h"
#include "podarray.h" // for __rw_pod_array
#include "setlocale.h" // for __rw_setlocale
// define _RWSTD_MB_MAX to the greater of MB_LEN_MAX and 8
// where 8 the maximum length necessary to encode a Unicode
// character in UTF-8
#if _RWSTD_MB_LEN_MAX < 8
# define _RWSTD_MB_MAX 8
#else
# define _RWSTD_MB_MAX _RWSTD_MB_LEN_MAX
#endif // _RWSTD_MB_LEN_MAX
#if defined (_RWSTD_NO_WCSCOLL) && !defined (_RWSTD_NO_WCSCOLL_IN_LIBC)
extern "C" {
// declare if not declared in the system header(s)
int wcscoll (const wchar_t*, const wchar_t*) _LIBC_THROWS ();
# undef _RWSTD_NO_WCSCOLL
} // extern "C"
#endif // _RWSTD_NO_WCSCOLL && !_RWSTD_NO_WCSCOLL_IN_LIBC
#ifdef _RWSTD_NO_WCSXFRM
# ifndef _RWSTD_NO_WCSXFRM_IN_LIBC
extern "C" {
// declare if not declared in the system header(s)
_RWSTD_SIZE_T wcsxfrm (wchar_t*, const wchar_t*, _RWSTD_SIZE_T) _LIBC_THROWS ();
# define _RWSTD_WCSXFRM wcsxfrm
# undef _RWSTD_NO_WCSXFRM
} // extern "C"
# else
# define _RWSTD_WCSXFRM _RW::__rw_wcsxfrm
# endif // _RWSTD_NO_WCSXFRM_IN_LIBC
#else
# define _RWSTD_WCSXFRM wcsxfrm
#endif // _RWSTD_NO_WCSXFRM
#if defined (_RWSTD_NO_WCSTOMBS) && !defined (_RWSTD_NO_WCSTOMBS_IN_LIBC)
extern "C" {
// declare if not declared in the system header(s)
_RWSTD_DLLIMPORT _RWSTD_SIZE_T
wcstombs (char*, const wchar_t*, _RWSTD_SIZE_T) _LIBC_THROWS ();
# undef _RWSTD_NO_WCSTOMBS
} // extern "C"
#endif // _RWSTD_NO_WCSTOMBS && !_RWSTD_NO_WCSTOMBS_IN_LIBC
// for convenience
typedef unsigned char UChar;
_RWSTD_NAMESPACE (__rw) {
#undef min
#undef max
// computes LC_XXX category from a numeric facet id, returns the
// LC_XXX category for standard facets, LC_ALL for all others
int __rw_get_cat (int);
#ifndef _RWSTD_NO_WCHAR_T
_RWSTD_INTERNAL void
__rw_append_weight (const _RW::__rw_collate_t *impl,
const unsigned *weights,
_STD::wstring &out)
{
// append the weight(s) to the out string
for (int i = 0; i < impl->longest_weight; ++i, ++weights) {
const unsigned int wt = *weights;
// if the weight is not an IGNORE weight
// then add it to the out string
if (wt && wt != _RWSTD_UINT_MAX) {
out += wchar_t (wt);
}
}
}
#endif // _RWSTD_NO_WCHAR_T
_RWSTD_INTERNAL void
__rw_append_weight (const _RW::__rw_collate_t *impl,
const unsigned int *weights,
_STD::string &out)
{
// append the weight(s) to the out string
for (int i = 0; i < impl->longest_weight; ++i, ++weights) {
unsigned wt = *weights;
// if the weight is not an IGNORE weight
// then add it to the out string
if (wt && wt != _RWSTD_UINT_MAX) {
while (_RWSTD_CHAR_MAX < wt) {
out += char (_RWSTD_CHAR_MAX);
wt -= _RWSTD_CHAR_MAX;
}
// the weight will fit a signed char
// so add it to the out str
out += char (wt);
}
}
}
template <class _STR_T>
static void
__rw_process_offsets (_RW::__rw_collate_t *impl,
const unsigned **start,
const unsigned **end,
_STR_T &out)
{
// process the offset list for each pass.
for (unsigned int pass = 0; pass < impl->num_weights; pass++) {
// first get the order for the start element in this pass
unsigned int shift_value = (impl->num_weights - (pass + 1)) * 2;
const unsigned int** cur_start = start;
const unsigned int** cur_end = start;
if (*cur_start == *end)
return;
do {
// calculate the order from the bitmask order in front of each
// character's weight information. Each character may have
// different orderings although most likely there will be groups
// of characters with the same ordering.
unsigned int cur_ord = (**cur_end >> shift_value) & 0x00000003;
while (cur_end != end && ((**cur_end >> shift_value)
& 0x00000003) == cur_ord) {
cur_end++;
}
const unsigned int** cur;
switch (cur_ord) {
case 0:
// forward weight type
for (cur = cur_start; cur < cur_end; cur++) {
const unsigned int* weightp = *cur;
weightp = weightp + 1 + pass * impl->longest_weight;
// qualify the call to make sure the function
// is found during lookup when declared static
_RW::__rw_append_weight (impl, weightp, out);
}
break;
case 1:
// backward weight type
for (cur = cur_end - 1; cur >= cur_start; cur --) {
const unsigned int* weightp = *cur;
weightp = weightp + 1 + pass * impl->longest_weight;
// qualify the call to make sure the function
// is found during lookup when declared static
_RW::__rw_append_weight (impl, weightp, out);
}
break;
case 2:
// forward,position weight type.
// The string with a non-IGNOREd element after the
// fewest IGNORED elements should collate first
for (cur = cur_start; cur < cur_end; cur++) {
const unsigned int* weightp = *cur;
weightp = weightp + 1 + pass * impl->longest_weight;
// output the CHAR_MAX value for every IGNORE
// weight that we see. The first string that has a
// non-ignore value will collate first
if (*weightp == 0) {
typedef typename _STR_T::value_type CharT;
const CharT ign =
_STD::numeric_limits<CharT>::max ();
out += ign;
}
else {
// qualify the call to make sure the function
// is found during lookup when declared static
_RW::__rw_append_weight (impl, weightp, out);
}
}
break;
case 3:
// backward,position weight type
for (cur = cur_end - 1; cur >= cur_start; cur --) {
const unsigned int* weightp = *cur;
weightp = weightp + 1 + pass * impl->longest_weight;
// non-ignore value will collate first
if (*weightp == 0) {
typedef typename _STR_T::value_type CharT;
const CharT ign =
_STD::numeric_limits<CharT>::max ();
out += ign;
}
else
// qualify the call to make sure the function
// is found during lookup when declared static
_RW::__rw_append_weight (impl, weightp, out);
}
break;
}
// append a 1 to designate the end of the pass
typedef typename _STR_T::value_type CharT;
out += CharT (1);
cur_start = cur_end;
} while (cur_end != end);
}
}
#ifndef _RWSTD_NO_WCHAR_T
static int
__rw_get_w_ce_offset (const __rw_collate_t *impl,
const wchar_t** cur_char,
const wchar_t* end)
{
// obtain the offset of a wide collating element. If no collating
// elements can be made with the current position of the string
// then return -1
if (impl->largest_ce > 1) {
unsigned int cur_tab = 0;
char utf8_enc [_RWSTD_MB_MAX];
const unsigned int* tab;
while (*cur_char < end) {
// convert the next wchar_t character to a utf8 encoded character
const _RWSTD_SIZE_T nbytes =
_RW::__rw_itoutf8 (**cur_char, utf8_enc);
for (_RWSTD_SIZE_T i = 0; i < nbytes; i++) {
const unsigned c1 = impl->get_first_char_in_w_ce_tab (cur_tab);
const unsigned c2 = impl->get_last_char_in_w_ce_tab (cur_tab);
if (UChar (utf8_enc [i]) < c1 || UChar (utf8_enc [i]) > c2)
return -1;
tab = impl->get_w_ce_tab (cur_tab);
const unsigned int next_off = tab [UChar (utf8_enc [i]) - c1];
if (next_off == _RWSTD_UINT_MAX) {
return -1;
}
if (next_off & 0x80000000) {
cur_tab = next_off &~ 0x80000000;
continue;
}
return next_off;
}
(*cur_char)++;
}
}
return -1;
}
static int
__rw_get_wchar_offset (const __rw_collate_t *impl,
const wchar_t** cur_char,
const wchar_t* end)
{
// get the offset of the weight information for the current wide character
unsigned cur_tab = 0;
char utf8_enc [_RWSTD_MB_MAX];
while (*cur_char < end) {
// convert the next wchar_t character to a utf8 encoded character
const _RWSTD_SIZE_T nbytes =
_RW::__rw_itoutf8 (**cur_char, utf8_enc);
for (_RWSTD_SIZE_T i = 0; i < nbytes; i++) {
const unsigned c1 = impl->get_first_char_in_w_tab (cur_tab);
if (UChar (utf8_enc [i]) < c1)
return -1;
const unsigned* const tab = impl->get_w_tab (cur_tab);
const unsigned int next_off = tab [UChar (utf8_enc [i]) - c1];
if (next_off == _RWSTD_UINT_MAX) {
return -1;
}
if (next_off & 0x80000000) {
cur_tab = next_off &~ 0x80000000;
continue;
}
return next_off;
}
(*cur_char)++;
}
return -1;
}
#endif // _RWSTD_NO_WCHAR_T
static int
__rw_get_n_ce_offset (const __rw_collate_t *impl,
const char **cur_char,
const char *end)
{
if (impl->largest_ce > 1) {
unsigned int cur_tab = 0;
const unsigned int* tab;
while (*cur_char < end ) {
const unsigned c1 = impl->get_first_char_in_n_ce_tab (cur_tab);
const unsigned c2 = impl->get_last_char_in_n_ce_tab (cur_tab);
if (UChar (**cur_char) < c1 || UChar (**cur_char) > c2)
return -1;
tab = impl->get_n_ce_tab (cur_tab);
unsigned int next_off = tab [UChar (**cur_char) - c1];
if (next_off == _RWSTD_UINT_MAX) {
return -1;
}
if (next_off & 0x80000000) {
(*cur_char)++;
cur_tab = next_off &~ 0x80000000;
}
else {
return next_off;
}
}
}
return -1;
}
static int
__rw_get_char_offset (const __rw_collate_t *impl,
const char** cur_char,
const char* end)
{
unsigned int cur_tab = 0;
while (*cur_char < end) {
unsigned c1 = impl->get_first_char_in_n_tab (cur_tab);
if (UChar (**cur_char) < c1)
return -1;
const unsigned int* tab = impl->get_n_tab (cur_tab);
c1 = impl->get_first_char_in_n_tab (cur_tab);
unsigned int next_off = tab [UChar (**cur_char) - c1];
if (next_off == _RWSTD_UINT_MAX) {
return -1;
}
if (next_off & 0x80000000) {
(*cur_char)++;
cur_tab = next_off &~ 0x80000000;
}
else {
return next_off;
}
}
return -1;
}
// returns true if the character sequence starting at `from'
// does not form a valid complete multibyte character
static inline bool
__rw_is_invalid (const unsigned int* first_tab, const char *from)
{
return 0 != (__rw_mbtowco (first_tab, from, 0) & 0x80000000);
}
// same as strxfrm() except that it takes the number of characters
// in an array that may contain embedded NULs; these are inserted
// into the transformed string
static _STD::string
__rw_strnxfrm (const char *src, _RWSTD_SIZE_T nchars)
{
_STD::string res;
char buf [256];
char *pbuf = buf;
_RWSTD_SIZE_T bufsize = sizeof buf;
char *psrc = buf;
while (nchars) {
// using a C-style cast instead of static_cast to avoid
// a gcc 2.95.2 bug causing an error on some platforms:
// static_cast from `void *' to `const char *'
const char* const last = (const char*)memchr (src, '\0', nchars);
if (0 == last) {
// no NUL found in the initial portion of the source string
// that fits into the local temporary buffer; copy as many
// characters as fit into the buffer
if (bufsize <= nchars) {
if (pbuf != buf)
delete[] pbuf;
pbuf = new char [nchars + 1];
}
psrc = pbuf;
memcpy (psrc, src, nchars);
// append a terminating NUL and decrement the number
// of characters that remain to be processed
psrc [nchars] = '\0';
src += nchars;
nchars = 0;
}
else {
// terminating NUL found in the source buffer
nchars -= (last - src) + 1;
psrc = _RWSTD_CONST_CAST (char*, src);
src += (last - src) + 1;
}
// provide a destination buffer to strxfrm() in case
// it's buggy (such as MSVC's) and tries to write to
// the buffer even if it's 0
char just_in_case_buf [8];
const _RWSTD_SIZE_T dst_size = strxfrm (just_in_case_buf, psrc, 0);
// check for strxfrm() errors
if (0 == (dst_size << 1))
return _STD::string ();
_RWSTD_SIZE_T res_size = res.size ();
_TRY {
// resize the result string to fit itself plus the result
// of the transformation including the terminatin NUL
// appended by strxfrm()
res.resize (res_size + dst_size + 1);
}
_CATCH (...) {
if (pbuf != buf)
delete[] pbuf;
_RETHROW;
}
// transfor the source string up to the terminating NUL
_RWSTD_SIZE_T xfrm_size =
strxfrm (&res [0] + res_size, psrc, dst_size + 1);
#if defined _MSC_VER && _MSC_VER < 1400
// compute the correct value that should have been returned from
// strxfrm() after the transformation has completed (MSVC strxfrm()
// returns a bogus result; see PR #29935)
xfrm_size = strlen (&res [0] + res_size);
#endif // MSVC < 8.0
// increment the size of the result string by the number
// of transformed characters excluding the terminating NUL
// if strxfrm() transforms the empty string into the empty
// string, keep the terminating NUL, otherwise drop it
res_size += xfrm_size + (last && !*psrc && !xfrm_size);
_TRY {
res.resize (res_size);
}
_CATCH (...) {
if (pbuf != buf)
delete[] pbuf;
_RETHROW;
}
}
if (pbuf != buf)
delete[] pbuf;
return res;
}
#ifndef _RWSTD_NO_WCHAR_T
# ifdef _RWSTD_NO_WCSXFRM
// implements wcsxfrm() using wcstombs() and strxfrm() on platforms
// such as some versions of BSD where the function isn't defined in
// the C Standard Library
static _RWSTD_SIZE_T
__rw_wcsxfrm (wchar_t *dst, const wchar_t *src, _RWSTD_SIZE_T dstsize)
{
// src must be non-null
_RWSTD_ASSERT (0 != src);
// dst is permitted to be null only when (dstsize == 0)
_RWSTD_ASSERT (0 == dstsize || dst);
# ifndef _RWSTD_NO_WCSTOMBS
// convert wide string to a multibyte string before tranforming it
// using strxfrm() and widening the result into the destination buffer
const _RWSTD_SIZE_T srclen = _RWSTD_WCSLEN (src);
// compute the size of the temporary nearrow buffer where to narrow
// the source wide string to
const _RWSTD_SIZE_T needbytes =
(dstsize ? dstsize : srclen) * MB_LEN_MAX;
char narrow_buf [256];
char* const nbuf =
sizeof narrow_buf < needbytes ? new char [needbytes + 1] : narrow_buf;
_RWSTD_SIZE_T result;
const _RWSTD_SIZE_T nmbchars = wcstombs (nbuf, src, needbytes);
if (_RWSTD_SIZE_MAX == nmbchars)
result = _RWSTD_SIZE_MAX;
else {
// allocate a small buffer 8 times the size of the multibyte
// buffer (where 8 is a guess at the maximum number of bytes
// needed to transform the longest multibyte character)
char xfrm_buf [sizeof narrow_buf * 8];
const _RWSTD_SIZE_T xbufsize = sizeof xfrm_buf;
const _RWSTD_SIZE_T xbufneed = needbytes * 8;
// allocate a larger buffer if the small statically buffer
// isn't big enough
char* const xbuf =
xbufsize < xbufneed ? new char [xbufneed + 1] : xfrm_buf;
// transform the multibyte character string into the narrow
// buffer, storing the returned value
result = strxfrm (xbuf, nbuf, xbufneed);
if (_RWSTD_SIZE_MAX != result && dstsize) {
// widen the bytes (not characters) of the transformed string
// if the transformation was successful and the size of the
// destination buffer is non-zero
if (result < dstsize)
dstsize = result;
for (_RWSTD_SIZE_T i = 0; i != dstsize; ++i)
dst [i] = wchar_t (UChar (xbuf [i]));
}
// free the transformation buffer if dynamically allocated
if (xbuf != xfrm_buf)
delete[] xbuf;
}
// free the multibyte buffer if dynamically allocated
if (nbuf != narrow_buf)
delete[] nbuf;
return result;
# else // if defined (_RWSTD_NO_WCSTOMBS)
_RWSTD_UNUSED (dst);
_RWSTD_UNUSED (src);
_RWSTD_UNUSED (dstsize);
// fail when there is no way to convert a wchar_t array
// to a multibyte string
return _RWSTD_SIZE_MAX;
# endif // _RWSTD_NO_WCSTOMBS
}
# endif // _RWSTD_NO_WCSXFRM
// same as wcsxfrm() except that it takes the number of characters
// in an array that may contain embedded NULs; these are inserted
// into the transformed string
static _STD::wstring
__rw_wcsnxfrm (const wchar_t *src, _RWSTD_SIZE_T nchars)
{
_STD::wstring res;
wchar_t buf [256];
wchar_t *pbuf = buf;
_RWSTD_SIZE_T bufsize = sizeof buf / sizeof *buf;
wchar_t *psrc = buf;
while (nchars) {
typedef _STD::char_traits<wchar_t> Traits;
const wchar_t* const last = Traits::find (src, nchars, L'\0');
if (0 == last) {
// no NUL found in the initial portion of the source string
// that fits into the local temporary buffer; copy as many
// characters as fit into the buffer
if (bufsize <= nchars) {
if (pbuf != buf)
delete[] pbuf;
pbuf = new wchar_t [nchars + 1];
}
psrc = pbuf;
memcpy (psrc, src, nchars * sizeof *psrc);
// append a terminating NUL and decrement the number
// of characters that remain to be processed
psrc [nchars] = 0;
src += nchars;
nchars = 0;
}
else {
// terminating NUL found in the source buffer
nchars -= (last - src) + 1;
psrc = _RWSTD_CONST_CAST (wchar_t*, src);
src += (last - src) + 1;
}
// provide a destination buffer to strxfrm() in case
// it's buggy (such as MSVC's) and tries to write to
// the buffer even if it's 0
wchar_t just_in_case_buf [8];
const _RWSTD_SIZE_T dst_size =
_RWSTD_WCSXFRM (just_in_case_buf, psrc, 0);
// check for wcsxfrm() errors
if (_RWSTD_SIZE_MAX == dst_size)
return _STD::wstring ();
_RWSTD_SIZE_T res_size = res.size ();
_TRY {
// resize the result string to fit itself plus the result
// of the transformation including the terminatin NUL
// appended by strxfrm()
res.resize (res_size + dst_size + 1);
}
_CATCH (...) {
if (pbuf != buf)
delete[] pbuf;
_RETHROW;
}
// transfor the source string up to the terminating NUL
_RWSTD_SIZE_T xfrm_size =
_RWSTD_WCSXFRM (&res [0] + res_size, psrc, dst_size + 1);
# if defined _MSC_VER && _MSC_VER < 1400
// compute the correct value that should have been returned from
// strxfrm() after the transformation has completed (MSVC strxfrm()
// returns a bogus result; see PR #29935)
xfrm_size = Traits::length (&res [0] + res_size);
# endif // MSVC < 8.0
// increment the size of the result string by the number
// of transformed characters excluding the terminating NUL
// if strxfrm() transforms the empty string into the empty
// string, keep the terminating NUL, otherwise drop it
res_size += xfrm_size + (last && !*psrc && !xfrm_size);
_TRY {
res.resize (res_size);
}
_CATCH (...) {
if (pbuf != buf)
delete[] pbuf;
_RETHROW;
}
}
if (pbuf != buf)
delete[] pbuf;
return res;
}
#endif // _RWSTD_NO_WCHAR_T
template <class _CharT>
long __rw_hash (const _CharT *lo, const _CharT *hi) _THROWS (())
{
// Peter Weinberger's generic hashing algorithm, adapted by Andrew
// Binstock from a version by Allen Holub (see Andrew Binstock,
// "Hashing Revisited", Dr. Dobb's Journal, April 1996)
const int long_bits = _RWSTD_CHAR_BIT * int (sizeof (long));
const int one_eighth = long_bits / 8;
const int three_fourths = long_bits * 3 / 4;
// subexpression parenthesized to prevent MSVC 13.00 warning C4554
const long hi_bits = ~0L << (long_bits - one_eighth);
long res = 0;
for ( ; lo != hi; ++lo) {
typedef _STD::char_traits<_CharT> _Traits;
res = (res << one_eighth) + _Traits::to_int_type (*lo);
const long tmp = res & hi_bits;
if (tmp)
res = (res ^ (tmp >> three_fourths)) & ~hi_bits;
}
return res;
}
#ifndef _RWSTD_NO_INSTANTIATE
# ifndef _RWSTD_NO_EXPLICIT_FUNC_INSTANTIATION
// explicitly instantiate for compilers with automatic template
// instantiation that don't emit symbols for implicitly instantiated
// templates even of they are completely defined in .cpp files
// (e.g., Compaq C++)
template long __rw_hash (const char*, const char*) _THROWS(());
# ifndef _RWSTD_NO_WCHAR_T
template long __rw_hash (const wchar_t*, const wchar_t*) _THROWS(());
# endif // _RWSTD_NO_WCHAR_T
# endif // _RWSTD_NO_EXPLICIT_FUNC_INSTANTIATION
#endif // _RWSTD_NO_INSTANTIATE
} // namespace __rw
_RWSTD_NAMESPACE (std) {
_RW::__rw_facet_id collate<char>::id;
// outlined to avoid generating a vtable in each translation unit
// that uses the class
/* virtual */ collate<char>::
~collate () /* nothrow */
{
// no-op
}
int collate<char>::
do_compare (const char_type *__lo1, const char_type *__hi1,
const char_type *__lo2, const char_type *__hi2) const
{
_RWSTD_ASSERT (__lo1 <= __hi1 && __lo2 <= __hi2);
const _RWSTD_SIZE_T __len1 = __hi1 - __lo1;
const _RWSTD_SIZE_T __len2 = __hi2 - __lo2;
const int cmp = memcmp (__lo1, __lo2, __len1 < __len2 ? __len1 : __len2);
if (cmp)
return cmp < 0 ? -1 : 1;
return __len1 < __len2 ? -1 : __len2 < __len1 ? +1 : 0;
}
collate<char>::string_type collate<char>::
do_transform (const char_type *__lo, const char_type *__hi) const
{
_RWSTD_ASSERT (0 != __lo);
_RWSTD_ASSERT (__lo <= __hi);
return string_type (__lo, size_t (__hi - __lo));
}
long collate<char>::
do_hash (const char_type *__lo, const char_type *__hi) const
{
// hash the result of do_transform, so that keys that transform equally
// will hash equally, as per 22.2.4.1.2, p3
const string_type __str = do_transform (__lo, __hi);
__lo = __str.data ();
__hi = __lo + __str.length ();
const long res = _RW::__rw_hash (__lo, __hi);
return res;
}
// outlined to avoid generating a vtable in each translation unit
// that uses the class
/* virtual */ collate_byname<char>::
~collate_byname () /* nothrow */
{
// no-op
}
int collate_byname<char>::
do_compare (const char* low1, const char* high1,
const char* low2, const char* high2) const
{
const string_type s1 = do_transform (low1, high1);
const string_type s2 = do_transform (low2, high2);
// FIXME: optimize: doing a full transformation of the two
// strings is not necessary, it might be quicker to only do
// a partial transformation
const int cmp = s1.compare (s2);
// normalize
return cmp < 0 ? -1 : cmp ? 1 : 0;
}
collate_byname<char>::string_type
collate_byname<char>::
do_transform (const char* low, const char* high) const
{
_RWSTD_ASSERT (low <= high);
const int ccvt_cat = _RW::__rw_get_cat ((_C_wcodecvt_byname + 1) / 2);
_RW::__rw_collate_t* const impl =
_RWSTD_CONST_CAST (_RW::__rw_collate_t*,
_RWSTD_STATIC_CAST (const _RW::__rw_collate_t*, _C_data ()));
if (!impl || (this->_C_opts & this->_C_use_libc)) {
// set the global libc locale in a thread-safe way
const _RW::__rw_setlocale clocale (_C_name, _RWSTD_LC_COLLATE);
return _RW::__rw_strnxfrm (low, high - low);
}
_RWSTD_ASSERT (0 != impl);
// the maximum size that we could need to hold all the weight offsets
// is high - low
_RW::__rw_pod_array<const unsigned int*, 1024> indexes;
// first go through the string getting a weight offset for
// each character, in the process check for collating elements.
const char* tmp_lo = low;
for (; tmp_lo < high; tmp_lo++) {
const char* tmp_lo2 = tmp_lo;
int ret = _RW::__rw_get_n_ce_offset (impl, &tmp_lo2, high);
if (ret == -1) {
tmp_lo2 = tmp_lo;
ret = _RW::__rw_get_char_offset (impl, &tmp_lo2, high);
if (ret == -1) {
// The character is not explicitely defined, but it
// may have been defined implicitly by UNDEFINED.
// because we cannot tell the difference between
// an UNDEFINED character and an invalid character
// in the collate database we must use the codecvt
// database to discover this information
if (impl->undefined_optimization) {
_RWSTD_SIZE_T size;
const _RW::__rw_codecvt_t *cvt =
_RWSTD_STATIC_CAST (const _RW::__rw_codecvt_t*,
_RW::__rw_get_facet_data (
ccvt_cat, size, _C_name,
impl->codeset_name ()));
if (_RW::__rw_is_invalid (cvt->n_to_w_tab(), tmp_lo2))
return 0;
const unsigned int *pwt =
impl->get_weight (impl->undefined_weight_idx);
indexes.append (&pwt, 1);
tmp_lo = tmp_lo2;
}
}
else {
const unsigned int *pwt = impl->get_weight (ret);
indexes.append (&pwt, 1);
tmp_lo = tmp_lo2;
}
}
else {
// we found a collating element
const unsigned int *pwt = impl->get_weight (ret);
indexes.append (&pwt, 1);
tmp_lo = tmp_lo2;
}
}
// now process the weights
string_type out;
_RW::__rw_process_offsets (impl, indexes.data (),
indexes.data () + indexes.size (), out);
return out;
}
#ifndef _RWSTD_NO_WCHAR_T
_RW::__rw_facet_id collate<wchar_t>::id;
// outlined to avoid generating a vtable in each translation unit
// that uses the class
/* virtual */ collate<wchar_t>::
~collate () /* nothrow */
{
// no-op
}
int collate<wchar_t>::
do_compare (const char_type *__lo1, const char_type *__hi1,
const char_type *__lo2, const char_type *__hi2) const
{
_RWSTD_ASSERT (__lo1 <= __hi1 && __lo2 <= __hi2);
const _RWSTD_SIZE_T __len1 = __hi1 - __lo1;
const _RWSTD_SIZE_T __len2 = __hi2 - __lo2;
#ifndef _RWSTD_NO_WMEMCMP
const int cmp = wmemcmp (__lo1, __lo2, __len1 < __len2 ? __len1 : __len2);
if (cmp)
return cmp < 0 ? -1 : 1;
#else // if defined (_RWSTD_NO_WMEMCMP)
for (_RWSTD_SIZE_T __len = __len1 < __len2 ? __len1 : __len2;
__len--; ++__lo1, ++__lo2) {
typedef string_type::traits_type _Traits;
typedef _Traits::int_type _Int;
// avoid arithmetic on unknown char types
const _Int __i1 = _Traits::to_int_type (*__lo1);
const _Int __i2 = _Traits::to_int_type (*__lo2);
// use int_type to prevent signed versus unsigned char comparison
if (!_Traits::eq_int_type (__i1, __i2))
return __i1 < __i2 ? -1 : 1;
}
#endif // _RWSTD_NO_WMEMCMP
return __len1 < __len2 ? -1 : __len2 < __len1 ? +1 : 0;
}
collate<wchar_t>::string_type collate<wchar_t>::
do_transform (const char_type *__lo, const char_type *__hi) const
{
_RWSTD_ASSERT (0 != __lo);
_RWSTD_ASSERT (__lo <= __hi);
return string_type (__lo, size_t (__hi - __lo));
}
long collate<wchar_t>::
do_hash (const char_type *__lo, const char_type *__hi) const
{
// hash the result of do_transform, so that keys that transform equally
// will hash equally, as per 22.2.4.1.2, p3
const string_type __str = do_transform (__lo, __hi);
__lo = __str.data ();
__hi = __lo + __str.length ();
const long res = _RW::__rw_hash (__lo, __hi);
return res;
}
// outlined to avoid generating a vtable in each translation unit
// that uses the class
/* virtual */ collate_byname<wchar_t>::
~collate_byname () /* nothrow */
{
// no-op
}
int collate_byname<wchar_t>::
do_compare (const wchar_t* low1, const wchar_t* high1,
const wchar_t* low2, const wchar_t* high2) const
{
if (this->_C_opts & this->_C_use_libstd) {
const string_type s1 = do_transform (low1, high1);
const string_type s2 = do_transform (low2, high2);
// FIXME: optimize
return s1.compare (s2);
}
#ifndef _RWSTD_NO_WCSCOLL
// use the system C library to compare the strings
_RW::__rw_setlocale clocale (this->_C_name, _RWSTD_LC_COLLATE);
const _RWSTD_SIZE_T len1 = high1 - low1;
const _RWSTD_SIZE_T len2 = high2 - low2;
const _RWSTD_SIZE_T len = len1 + len2;
// small local buffer
wchar_t local_buffer [256];
const _RWSTD_SIZE_T bufsize = sizeof local_buffer / sizeof *local_buffer;
// allocate only if local buffer is too small
wchar_t* const wbuf =
len + 2 >= bufsize ? new wchar_t [len + 2] : local_buffer;
// copy and null-terminate first sequence
char_traits<wchar_t>::copy (wbuf, low1, len1);
wbuf [len1] = '\0';
// append and null-terminate first sequence
char_traits<wchar_t>::copy (wbuf + len1 + 1, low2, len2);
wbuf [len1 + 1 + len2] = '\0';
// compare sequences using wcscoll()
const int result = wcscoll (wbuf, wbuf + len1 + 1);
// deallocate only if allocated
if (wbuf != local_buffer)
delete[] wbuf;
return result ? result > 0 ? 1 : -1 : 0;
#else // if defined (_RWSTD_NO_WCSCOLL)
// transform strings first and compare the transformed results
const string_type s1 = do_transform (low1, high1);
const string_type s2 = do_transform (low2, high2);
return s1.compare (s2);
#endif // _RWSTD_NO_WCSCOLL
}
collate_byname<wchar_t>::string_type
collate_byname<wchar_t>::
do_transform (const wchar_t* low, const wchar_t* high) const
{
const int ccvt_cat = _RW::__rw_get_cat ((_C_wcodecvt_byname + 1) / 2);
_RW::__rw_collate_t *impl =
_RWSTD_CONST_CAST (_RW::__rw_collate_t*, _RWSTD_STATIC_CAST (
const _RW::__rw_collate_t*, _C_data ()));
if (!impl || (this->_C_opts & this->_C_use_libc)) {
// set the global libc locale in a thread-safe way
const _RW::__rw_setlocale clocale (_C_name, _RWSTD_LC_COLLATE);
return _RW::__rw_wcsnxfrm (low, high - low);
}
else {
_RWSTD_ASSERT (0 != impl);
// the maximum size that we could need to hold all the weight offsets
// is high - low
_RW::__rw_pod_array<const unsigned int*, 1024> indexes;
// first go through the string getting a weight offset for
// each character, in the process check for collating elements.
for (const wchar_t* tmp_lo =low; tmp_lo < high; tmp_lo++) {
const wchar_t* tmp_lo2 = tmp_lo;
int ret = _RW::__rw_get_w_ce_offset (impl, &tmp_lo2, high);
if (ret == -1) {
tmp_lo2 = tmp_lo;
ret = _RW::__rw_get_wchar_offset (impl, &tmp_lo2, high);
if (ret == -1) {
// The character is not explicitely defined, but it
// may have been defined implicitly by UNDEFINED.
// because we cannot tell the difference between
// an UNDEFINED character and an invalid character
// in the collate database we must use the codecvt
// database to discover this information
if (impl->undefined_optimization) {
_RWSTD_SIZE_T size;
const _RW::__rw_codecvt_t *cvt =
_RWSTD_STATIC_CAST (const _RW::__rw_codecvt_t*,
_RW::__rw_get_facet_data (
ccvt_cat, size, _C_name,
impl->codeset_name ()));
char tmp [_RWSTD_MB_MAX];
const _RWSTD_SIZE_T nbytes =
_RW::__rw_itoutf8 (*tmp_lo2, tmp);
tmp [nbytes] = '\0';
if (_RW::__rw_is_invalid (cvt->w_to_n_tab (), tmp))
return 0;
const unsigned int *pwt =
impl->get_weight (impl->undefined_weight_idx);
indexes.append (&pwt, 1);
tmp_lo = tmp_lo2;
}
}
else {
const unsigned int *pwt = impl->get_weight (ret);
indexes.append (&pwt, 1);
tmp_lo = tmp_lo2;
}
}
else {
// we found a collating element
const unsigned int *pwt = impl->get_weight (ret);
indexes.append (&pwt, 1);
tmp_lo = tmp_lo2;
}
}
// now process the weights
string_type out;
_RW::__rw_process_offsets (impl, indexes.data (),
indexes.data () + indexes.size (), out);
return out;
}
}
#endif // _RWSTD_NO_WCHAR_T
} // namespace std