blob: 07d21f29c96124db34f9ce1fe9815cc5b6bba62a [file] [log] [blame]
/************************************************************************
*
* locale.cpp - definitions of locale helpers
*
* $Id$
*
************************************************************************
*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed
* with this work for additional information regarding copyright
* ownership. The ASF licenses this file to you under the Apache
* License, Version 2.0 (the "License"); you may not use this file
* except in compliance with the License. You may obtain a copy of
* the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied. See the License for the specific language governing
* permissions and limitations under the License.
*
* Copyright 2001-2008 Rogue Wave Software, Inc.
*
**************************************************************************/
// expand _TEST_EXPORT macros
#define _RWSTD_TEST_SRC
#include <rw_locale.h>
#include <rw_environ.h> // for rw_putenv()
#include <rw_file.h> // for SHELL_RM_RF, rw_tmpnam
#include <rw_process.h> // for rw_system()
#include <rw_printf.h> // for rw_snprintf()
#include <rw_fnmatch.h> // for rw_fnmatch()
#include <rw_braceexp.h> // for rw_shell_expand()
#include <rw_driver.h> // for rw_error()
#ifdef _RWSTD_OS_LINUX
# ifdef _RWSTD_NO_PURE_C_HEADERS
// on Linux define _XOPEN_SOURCE to get CODESET defined in <langinfo.h>
// (avoid this hackery when using pure "C' headers (i.e., with the EDG
// eccp demo)
# define _XOPEN_SOURCE 500 /* Single Unix conformance */
// bring __int32_t into scope (otherwise <wctype.h> fails to compile)
# include <sys/types.h>
# endif
#endif // Linux
#include <fcntl.h>
#include <sys/stat.h> // for stat
#ifndef _WIN32
# include <unistd.h>
# include <sys/wait.h> // for WIFEXITED(), WIFSIGNALED(), WTERMSIG()
#else
# include <io.h>
# ifdef _MSC_VER
# include <crtdbg.h> // for _malloc_dbg()
# endif
#endif
#include <ios> // for ios::*
#include <limits> // for numeric_limits
#include <locale> // for money_base::pattern
#include <assert.h> // for assert
#include <limits.h> // for PATH_MAX
#include <locale.h> // for LC_XXX macros, setlocale
#include <stdarg.h> // for va_copy, va_list, ...
#include <stdio.h> // for fgets, remove, sprintf, ...
#include <stdlib.h> // for getenv, free, malloc, realloc
#include <string.h> // for strcat, strcpy, strlen, ...
#include <ctype.h>
#include <wchar.h> // for wcslen, ...
#ifndef _WIN32
# include <clocale>
# ifndef LC_MESSAGES
# define LC_MESSAGES _RWSTD_LC_MESSAGES
# endif // LC_MESSAGES
# define EXE_SUFFIX ""
#else // if Windows
# define _RWSTD_NO_LANGINFO
# define EXE_SUFFIX ".exe"
#endif // _WIN32
#ifndef _RWSTD_NO_LANGINFO
# include <langinfo.h>
#endif
#if !defined (PATH_MAX) || PATH_MAX < 128 || 4096 < PATH_MAX
// deal with undefined, bogus, or excessive values
# undef PATH_MAX
# define PATH_MAX 1024
#endif
#define TOPDIR "TOPDIR" /* the TOPDIR environment variable */
#define BINDIR "BINDIR" /* the BINDIR environment variable */
#if _RWSTD_PATH_SEP == '/'
# define SLASH "/"
# define IS_ABSOLUTE_PATHNAME(path) (_RWSTD_PATH_SEP == *(path))
#else
# define SLASH "\\"
# define IS_ABSOLUTE_PATHNAME(path) \
( ( 'A' <= *(path) && 'Z' >= *(path) \
|| 'a' <= *(path) && 'z' >= *(path)) \
&& ':' == (path)[1] \
&& _RWSTD_PATH_SEP == (path)[2])
#endif
// relative paths to the etc/nls directory and its subdirectories
#define RELPATH "etc" SLASH "nls"
#define TESTS_ETC_PATH "tests" SLASH "etc"
// extension of the catalog file
#ifndef _WIN32
# define RW_CAT_EXT ".cat"
#else
# define RW_CAT_EXT ".dll"
#endif
/**************************************************************************/
_TEST_EXPORT int
rw_locale (const char *args, const char *fname)
{
// use BINDIR to determine the location of the locale command
const char* bindir = getenv (BINDIR);
if (!bindir)
bindir = ".." SLASH "bin";
int ret;
if (fname)
ret = rw_system ("%s%slocale%s %s",
bindir, SLASH, EXE_SUFFIX, args);
else
ret = rw_system ("%s%slocale%s %s >%s",
bindir, SLASH, EXE_SUFFIX, args, fname);
return ret;
}
/**************************************************************************/
_TEST_EXPORT const char*
rw_localedef (const char *args,
const char* src, const char *charmap, const char *locname)
{
assert (src && charmap);
// create a fully qualified pathname of the locale database
// when (locname == 0), the pathname is computed by appending
// the name of the character map file `charmap' to the name
// of the locale definition file `src'
// otherwise, when `locname' is not a pathname, the pathname
// of the locale database is formed by appending `locname'
// to the name of the locale root directory
static char locale_path [PATH_MAX];
const char* locale_root = getenv (LOCALE_ROOT_ENVAR);
if (!locale_root)
locale_root = ".";
assert ( strlen (locale_root)
+ strlen (src)
+ strlen (charmap)
+ 2 < sizeof locale_path);
strcpy (locale_path, locale_root);
if (locname) {
if (strchr (locname, _RWSTD_PATH_SEP))
strcpy (locale_path, locname);
else {
strcat (locale_path, SLASH);
strcat (locale_path, locname);
}
}
else {
// compute the locale pathname from `src', `charmap',
// and `locale_root'
strcpy (locale_path, locale_root);
strcat (locale_path, SLASH);
const char *slash = strrchr (src, _RWSTD_PATH_SEP);
slash = slash ? slash + 1 : src;
strcat (locale_path, src);
strcat (locale_path, ".");
slash = strrchr (charmap, _RWSTD_PATH_SEP);
slash = slash ? slash + 1 : charmap;
strcat (locale_path, slash);
}
// check to see if the locale database already exists and
// if so, return immediately the locale filename to the caller
#if !defined (_MSC_VER)
struct stat sb;
if (!stat (locale_path, &sb)) {
#else
struct _stat sb;
if (!_stat (locale_path, &sb)) {
#endif
return strrchr (locale_path, _RWSTD_PATH_SEP) + 1;
}
// otherwise, try to create the locale database
// fallback for when TOPDIR is unset or empty
char topdir_path_buf [] = __FILE__;
// use TOPDIR to determine the root of the source tree
const char* topdir = getenv (TOPDIR);
if (!topdir || !*topdir) {
// try to get TOPDIR from __FILE__
char* const slash = strrchr (topdir_path_buf, _RWSTD_PATH_SEP);
if (slash) {
slash [-1] = '\0';
topdir = topdir_path_buf;
}
}
if (!topdir || !*topdir) {
rw_error (0, __FILE__, __LINE__,
"the environment variable %s is %s",
TOPDIR, topdir ? "empty" : "undefined");
return 0;
}
// use BINDIR to determine the location of the localedef command
const char* bindir = getenv (BINDIR);
if (!bindir)
bindir = ".." SLASH "bin";
// if `src' is relative pathname (or a filename) construct the fully
// qualified absolute pathname to the locale definition file from it
char src_path [PATH_MAX];
if (!IS_ABSOLUTE_PATHNAME (src)) {
strcpy (src_path, topdir);
strcat (src_path, SLASH RELPATH SLASH "src" SLASH);
strcat (src_path, src);
// if the file doesn't exist, see if there is a file
// with that name in the locale root directory (e.g.,
// a temporary file)
FILE* const file_exists = fopen (src_path, "r");
if (file_exists)
fclose (file_exists);
else {
strcpy (src_path, locale_root);
strcat (src_path, SLASH);
strcat (src_path, src);
}
src = src_path;
}
char charmap_path [PATH_MAX];
if (!IS_ABSOLUTE_PATHNAME (charmap)) {
strcpy (charmap_path, topdir);
strcat (charmap_path, SLASH RELPATH SLASH "charmaps" SLASH);
strcat (charmap_path, charmap);
// if the file doesn't exist, see if there is a file
// with that name in the locale root directory (e.g.,
// a temporary file)
FILE* const file_exists = fopen (charmap_path, "r");
if (file_exists)
fclose (file_exists);
else {
strcpy (charmap_path, locale_root);
strcat (charmap_path, SLASH);
strcat (charmap_path, charmap);
}
charmap = charmap_path;
}
if (!args)
args = "";
const int ret = rw_system ("%s%slocaledef%s %s -c -f %s -i %s %s",
bindir, SLASH, EXE_SUFFIX, args,
charmap, src, locale_path);
// return the unqualified locale file name on success or 0 on failure
return ret ? (char*)0 : strrchr (locale_path, _RWSTD_PATH_SEP) + 1;
}
/**************************************************************************/
extern "C" {
static char rw_locale_root [PATH_MAX];
static void atexit_rm_locale_root ()
{
const bool e = rw_enable (rw_error, false);
const bool n = rw_enable (rw_note , false);
// remove temporary locale databases created by the test
rw_system (SHELL_RM_RF "%s", rw_locale_root);
rw_enable (rw_note , n);
rw_enable (rw_error, e);
}
}
_TEST_EXPORT const char*
rw_set_locale_root ()
{
// set any additional environment variables defined in
// the RW_PUTENV environment variable (if it exists)
rw_putenv (0);
// create a temporary directory for files created by the test
const char* const locale_root = rw_tmpnam (rw_locale_root);
if (!locale_root)
return 0;
char envvar [sizeof LOCALE_ROOT_ENVAR + sizeof rw_locale_root] =
LOCALE_ROOT_ENVAR "=";
strcat (envvar, locale_root);
// remove temporary file if mkstemp() rw_tmpnam() called mkstemp()
if (rw_system (SHELL_RM_RF " %s", locale_root)) {
#ifdef _WIN32
// ignore errors on WIN32 where the stupid DEL command
// fails even with /Q /S when the files don't exist
#else
// assume a sane implementation of SHELL_RM_RF
return 0;
#endif // _WIN32
}
if (rw_system ("mkdir %s", locale_root))
return 0;
// set the "RWSTD_LOCALE_ROOT" environment variable
// where std::locale looks for locale database files
rw_putenv (envvar);
rw_error (0 == atexit (atexit_rm_locale_root), __FILE__, __LINE__,
"atexit(atexit_rm_locale_root) failed: %m");
return locale_root;
}
/**************************************************************************/
_TEST_EXPORT char*
rw_locales (int loc_cat, const char* grep_exp, bool prepend_c_loc)
{
static char deflocname [3] = "C\0";
static char* slocname = 0;
static const size_t grow_size = 5120;
static size_t size = 0; // the number of elements in the array
static size_t total_size = grow_size; // the size of the array
static int last_cat = loc_cat; // last category
#ifndef _MSC_VER
# define _QUIET_MALLOC(n) malloc(n)
# define _QUIET_FREE(p) free(p)
#else
// prevent allocation from causing failures in tests that
// keep track of storage allocated in _NORMAL_BLOCKS
# define _QUIET_MALLOC(n) _malloc_dbg (n, _CLIENT_BLOCK, 0, 0)
# define _QUIET_FREE(p) _free_dbg (p, _CLIENT_BLOCK);
#endif
// allocate first time through
if (!slocname) {
slocname = _RWSTD_STATIC_CAST (char*, _QUIET_MALLOC (total_size));
if (!slocname)
return deflocname;
*slocname = '\0';
}
// return immediately if buffer is already initialized
if (*slocname && loc_cat == last_cat)
return slocname;
// remmeber the category we were last called with
last_cat = loc_cat;
char* locname = slocname;
char* save_localename = 0;
char namebuf [PATH_MAX];
if (loc_cat != _UNUSED_CAT) {
// copy the locale name, the original may be overwitten by libc
save_localename = strcpy (namebuf, setlocale (loc_cat, 0));
}
const char* const fname = rw_tmpnam (0);
if (!fname) {
return deflocname; // error
}
// make sure that grep_exp is <= 80
if (grep_exp && 80 < strlen (grep_exp)) {
abort ();
}
// execute a shell command and redirect its output into the file
const int exit_status =
grep_exp && *grep_exp
? rw_system ("locale -a | grep \"%s\" > %s", grep_exp, fname)
: rw_system ("locale -a > %s", fname);
if (exit_status) {
return deflocname; // error
}
// open file containing the list of installed locales
FILE *file = fopen (fname, "r");
if (file) {
char linebuf [256];
// even simple locale names can be very long (e.g., on HP-UX,
// where a locale name always consists of the names of all
// categories, such as "C C C C C C")
char last_name [256];
*last_name = '\0';
// put the C locale at the front
if (prepend_c_loc) {
strcpy (locname, deflocname);
const size_t defnamelen = strlen (deflocname) + 1;
locname += defnamelen;
size += defnamelen;
}
// if successful, construct a char array with the locales
while (fgets (linebuf, int (sizeof linebuf), file)) {
const size_t linelen = strlen (linebuf);
linebuf [linelen ? linelen - 1 : 0] = '\0';
// don't allow C locale to be in the list again
// if we put it at the front of the locale list
if (prepend_c_loc && !strcmp (linebuf, deflocname))
continue;
#ifdef _RWSTD_OS_SUNOS
const char iso_8859_pfx[] = "iso_8859_";
// avoid locales named common and iso_8859_* on SunOS
// since they are known to cause setlocale() to fail
if ( !strcmp ("common", linebuf)
|| sizeof iso_8859_pfx <= linelen
&& !memcmp (iso_8859_pfx, linebuf, sizeof iso_8859_pfx - 1))
continue;
#endif // _RWSTD_OS_SUNOS
// if our buffer is full then dynamically allocate a new one
size += linelen;
if (total_size < size) {
total_size += grow_size;
char* tmp =
_RWSTD_STATIC_CAST (char*, _QUIET_MALLOC (total_size));
if (!tmp)
break;
memcpy (tmp, slocname, total_size - grow_size);
_QUIET_FREE (slocname);
slocname = tmp;
locname = slocname + size - linelen;
}
#ifdef _WIN64
// prevent a hang (OS/libc bug?)
strcpy (locname, linebuf);
locname += linelen;
#else // if !defined (_WIN64)
if (loc_cat != _UNUSED_CAT) {
// set the C locale to verify that the name is valid
const char *name = setlocale (loc_cat, linebuf);
// if it is and if the actual locale name different
// from the last one, append it to the list
if (name && strcmp (last_name, name)) {
strcpy (locname, linebuf);
locname += linelen;
// save the last locale name
assert (strlen (name) < sizeof last_name);
strcpy (last_name, name);
}
}
else {
strcpy (locname, linebuf);
locname += linelen;
}
#endif // _WIN64
}
*locname = '\0';
}
if (loc_cat != _UNUSED_CAT)
setlocale (loc_cat, save_localename);
// close before removing
fclose (file);
remove (fname);
return *slocname ? slocname : deflocname;
}
/**************************************************************************/
// finds a multibyte character that is `bytes' long if `bytes' is less
// than or equal to MB_CUR_MAX, or the longest multibyte sequence in
// the current locale
static const char*
_get_mb_char (char *buf, size_t bytes)
{
_RWSTD_ASSERT (0 != buf);
*buf = '\0';
if (0 == bytes)
return buf;
const bool exact = bytes <= size_t (MB_CUR_MAX);
if (!exact)
bytes = MB_CUR_MAX;
wchar_t wc;
// search the first 64K characters sequentially
for (wc = wchar_t (1); wc != wchar_t (0xffff); ++wc) {
if ( int (bytes) == wctomb (buf, wc)
&& int (bytes) == mblen (buf, bytes)) {
// NUL-terminate the multibyte character of the requested length
buf [bytes] = '\0';
break;
}
*buf = '\0';
}
#if 2 < _RWSTD_WCHAR_SIZE
// if a multibyte character of the requested size is not found
// in the low 64K range, try to find one using a random search
if (wchar_t (0xffff) == wc) {
// iterate only so many times to prevent an infinite loop
// in case when MB_CUR_MAX is greater than the longest
// multibyte character
for (int i = 0; i != 0x100000; ++i) {
wc = wchar_t (rand ());
if (RAND_MAX < 0x10000) {
wc <<= 16;
wc |= wchar_t (rand ());
}
if ( int (bytes) == wctomb (buf, wc)
&& int (bytes) == mblen (buf, bytes)) {
// NUL-terminate the multibyte character
buf [bytes] = '\0';
break;
}
*buf = '\0';
}
}
#endif // 2 < _RWSTD_WCHAR_SIZE
// return 0 on failure to find a sequence exactly `bytes' long
return !exact || bytes == strlen (buf) ? buf : 0;
}
_TEST_EXPORT size_t
rw_get_mb_chars (rw_mbchar_array_t mb_chars)
{
_RWSTD_ASSERT (0 != mb_chars);
const char* mbc = _get_mb_char (mb_chars [0], size_t (-1));
if (0 == rw_note (0 != mbc, __FILE__, __LINE__,
"failed to find any multibyte characters "
"in locale \"%s\" with MB_CUR_MAX = %u",
setlocale (LC_CTYPE, 0), MB_CUR_MAX))
return 0;
size_t mb_cur_max = strlen (mbc);
if (_RWSTD_MB_LEN_MAX < mb_cur_max)
mb_cur_max = _RWSTD_MB_LEN_MAX;
// fill each element of `mb_chars' with a multibyte character
// of the corresponding length
for (size_t i = mb_cur_max; i; --i) {
// try to generate a multibyte character `i' bytes long
mbc = _get_mb_char (mb_chars [i - 1], i);
if (0 == mbc) {
// zh_CN.gb18030 and zh_TW.euctw on Linux are examples
// of multibyte locales where MB_CUR_MAX == 4 but,
// apparently, no 3-byte characters
if (0 == rw_note (mb_cur_max <= i, __FILE__, __LINE__,
"failed to find %u-byte characters "
"in locale \"%s\" with MB_CUR_MAX = %u",
i, setlocale (LC_CTYPE, 0), MB_CUR_MAX)) {
mb_cur_max = 0;
break;
}
--mb_cur_max;
}
}
return mb_cur_max;
}
_TEST_EXPORT size_t
rw_get_wchars (wchar_t *wbuf, size_t bufsize, int nbytes /* = 0 */)
{
if (0 == bufsize)
return 0;
char tmp [_RWSTD_MB_LEN_MAX];
size_t nchars = 0;
for (int i = 0; i != 65536; ++i) {
// determine whether the wide character is valid
// and if so, the length of the multibyte character
// that corresponds to it
const wchar_t wc = wchar_t (i);
const int len = wctomb (tmp, wc);
if ((nbytes == 0 && 0 < len) || (nbytes != 0 && nbytes == len)) {
// if the requested length is 0 (i.e., the caller doesn't
// care) and the character is valid, store it
// if the requested length is non-zero (including -1),
// and the value returned from mblen() is the same, store
// it (this makes it possible to find invalid characters
// as well as valid ones)
wbuf [nchars++] = wc;
if (nchars == bufsize)
return nchars;
}
}
#if 2 < _RWSTD_WCHAR_SIZE
// try to find the remaining wide characters by a random
// search, iterating only so many times to prevent an
// infinite loop
for (int i = 0; i != 0x100000; ++i) {
// make a wide character with a random bit pattern
wchar_t wc = wchar_t (rand ());
if (RAND_MAX < 0x10000) {
wc <<= 16;
wc |= wchar_t (rand ());
}
const int len = wctomb (tmp, wc);
if ((nbytes == 0 && 0 < len) || (nbytes != 0 && nbytes == len)) {
wbuf [nchars++] = wc;
if (nchars == bufsize)
return nchars;
}
}
#endif // 2 < _RWSTD_WCHAR_SIZE
return nchars;
}
_TEST_EXPORT const char*
rw_find_mb_locale (size_t *mb_cur_max,
rw_mbchar_array_t mb_chars)
{
_RWSTD_ASSERT (0 != mb_cur_max);
_RWSTD_ASSERT (0 != mb_chars);
if (2 > _RWSTD_MB_LEN_MAX) {
rw_warn (0, __FILE__, __LINE__, "MB_LEN_MAX = %d, giving up",
_RWSTD_MB_LEN_MAX);
return 0;
}
static const char *mb_locale_name;
char saved_locale_name [1024];
strcpy (saved_locale_name, setlocale (LC_CTYPE, 0));
_RWSTD_ASSERT (strlen (saved_locale_name) < sizeof saved_locale_name);
*mb_cur_max = 0;
// iterate over all installed locales
for (const char *name = rw_locales (_RWSTD_LC_CTYPE, 0); name && *name;
name += strlen (name) + 1) {
if (setlocale (LC_CTYPE, name)) {
// try to generate a set of multibyte characters
// with lengths from 1 and MB_CUR_MAX (or less)
const size_t cur_max = rw_get_mb_chars (mb_chars);
if (*mb_cur_max < cur_max) {
*mb_cur_max = cur_max;
mb_locale_name = name;
// break when we've found a multibyte locale
// with the longest possible encoding
if (_RWSTD_MB_LEN_MAX == *mb_cur_max)
break;
}
}
}
if (*mb_cur_max < 2) {
rw_warn (0, __FILE__, __LINE__,
"failed to find a full set of multibyte "
"characters in locale \"%s\" with MB_CUR_MAX = %u "
"(computed)", mb_locale_name, *mb_cur_max);
mb_locale_name = 0;
}
else {
// (re)generate the multibyte characters for the saved locale
// as they may have been overwritten in subsequent iterations
// of the loop above (while searching for a locale with greater
// value of MB_CUR_MAX)
setlocale (LC_CTYPE, mb_locale_name);
rw_get_mb_chars (mb_chars);
}
setlocale (LC_CTYPE, saved_locale_name);
return mb_locale_name;
}
/**************************************************************************/
_TEST_EXPORT const char*
rw_create_locale (const char *charmap, const char *locale)
{
// only one locale is enough (avoid invoking localedef more than once)
static const char* locname;
const char* locale_root;
if (locname)
return locname;
// set up RWSTD_LOCALE_ROOT and other environment variables
locale_root = rw_set_locale_root ();
if (0 == locale_root)
return 0;
// create a temporary locale definition file that exercises as
// many different parts of the collate standard as possible
char srcfname [PATH_MAX];
if (rw_snprintf (srcfname, PATH_MAX, "%s%slocale.src",
locale_root, SLASH) < 0) {
return 0;
}
FILE *fout = fopen (srcfname, "w");
if (!fout) {
rw_error (0, __FILE__, __LINE__,
"fopen(#%s, \"w\") failed: %m", srcfname);
return 0;
}
fprintf (fout, "%s", locale);
fclose (fout);
// create a temporary character map file
char cmfname [PATH_MAX];
if (rw_snprintf (cmfname, PATH_MAX, "%s%scharmap.src",
locale_root, SLASH) < 0) {
return 0;
}
fout = fopen (cmfname, "w");
if (!fout) {
rw_error (0, __FILE__, __LINE__,
"fopen(%#s, \"w\") failed: %m", cmfname);
return 0;
}
fprintf (fout, "%s", charmap);
fclose (fout);
locname = "test-locale";
// process the locale definition file and character map
if (0 == rw_localedef ("-w", srcfname, cmfname, locname))
locname = 0;
return locname;
}
/**************************************************************************/
static const char*
_rw_locale_names;
_TEST_EXPORT const char* const&
rw_opt_locales = _rw_locale_names;
_TEST_EXPORT int
rw_opt_setlocales (int argc, char* argv[])
{
if (1 == argc && argv && 0 == argv [0]) {
static const char helpstr[] = {
"Use the locales specified by the space-parated list of locale"
"names given by <arg>.\n"
};
argv [0] = _RWSTD_CONST_CAST (char*, helpstr);
return 0;
}
// the option requires an equals sign followed by an optional argument
char *args = strchr (argv [0], '=');
RW_ASSERT (0 != args);
// small static buffer should be sufficient in most cases
static char buffer [256];
const size_t len = strlen (++args);
// dynamically allocate a bigger buffer when the small buffer
// isn't big enough (let the dynamically allocated buffer leak)
char* const locale_names =
sizeof buffer < len + 2 ? (char*)malloc (len + 2) : buffer;
if (0 == locale_names)
return 1;
locale_names [len] = '\0';
locale_names [len + 1] = '\0';
memcpy (locale_names, args, len);
for (char *next = locale_names; ; ) {
next = strpbrk (next, ", ");
if (next)
*next++ = '\0';
else
break;
}
_rw_locale_names = locale_names;
// return 0 on success
return 0;
}
/**************************************************************************/
_TEST_EXPORT int
rw_create_catalog (const char * catname, const char * catalog)
{
RW_ASSERT (catname && catalog);
FILE* const f = fopen (catname, "w");
if (!f)
return -1;
#ifndef _WIN32
for (int i = 1; *catalog; ++catalog, ++i) {
fprintf (f, "$set %d This is Set %d\n", i, i);
for (int j = 1; *catalog; catalog += strlen (catalog) + 1, ++j)
fprintf (f, "%d %s\n", j, catalog);
}
#else // if defined (_WIN32)
fprintf (f, "STRINGTABLE\nBEGIN\n");
for (int i = 1; *catalog; ++catalog) {
for (; *catalog; catalog += strlen (catalog) + 1, ++i)
fprintf (f, "%d \"%s\"\n", i, catalog);
}
fprintf (f, "END\n");
#endif // _WIN32
fclose (f);
char *cat_name = new char [strlen (catname) + 1];
strcpy (cat_name, catname);
if (char *dot = strrchr (cat_name, '.'))
*dot = '\0';
const int ret = rw_system ("gencat %s" RW_CAT_EXT " %s",
cat_name, catname);
delete[] cat_name;
remove (catname);
return ret;
}
inline bool
_rw_isspace (char ch)
{
return 0 != isspace ((unsigned char)ch);
}
inline char
_rw_toupper (char ch)
{
return toupper ((unsigned char)ch);
}
inline char
_rw_tolower (char ch)
{
return tolower ((unsigned char)ch);
}
// our locale database is a big array of these
struct _rw_locale_entry {
char locale_name [64]; // English_United States.1252
char canonical_name [32]; // en-US-1-1252
struct _rw_locale_entry* next;
};
struct _rw_locale_array {
_rw_locale_entry* entries;
_RWSTD_SIZE_T count;
};
struct _rw_lookup_entry_t {
const char* native;
const char* canonical;
};
extern "C" {
static int
_rw_lookup_comparator (const void* _lhs, const void* _rhs)
{
const _rw_lookup_entry_t* lhs = (const _rw_lookup_entry_t*)_lhs;
const _rw_lookup_entry_t* rhs = (const _rw_lookup_entry_t*)_rhs;
return strcmp (lhs->native, rhs->native);
}
} // extern "C"
struct _rw_lookup_table_t {
_rw_lookup_table_t ()
: entries_ (0), count_ (0), table_data_ (0) {
}
~_rw_lookup_table_t () {
if (entries_)
free (entries_);
entries_ = 0;
count_ = 0;
if (table_data_)
free (table_data_);
table_data_ = 0;
}
bool load_from_file (const char* path, const char* file, int upper_or_lower);
const _rw_lookup_entry_t* get_entries () const {
return entries_;
}
size_t get_num_entries () const {
return count_;
}
const char* get_canonical_name (const char* name) const;
private:
_rw_lookup_entry_t* entries_;
size_t count_;
char* table_data_;
private:
// intentionally hidden
_rw_lookup_table_t (const _rw_lookup_table_t& rhs);
_rw_lookup_table_t& operator= (const _rw_lookup_table_t& rhs);
};
static void
_rw_reset_locales (_rw_locale_array* a)
{
// reset the next pointers so that all locales are included
for (size_t i = 0; i < a->count; ++i)
a->entries [i].next = &a->entries [i+1];
a->entries [a->count - 1].next = 0;
}
//
// this function gets a list of all of the locales that are installed. it
// only queries the system once and caches the result for use in future
// requests.
//
static _rw_locale_array
_rw_all_locales ()
{
static _rw_locale_array result;
// if we have already collection, reuse it
if (result.entries && result.count != 0) {
_rw_reset_locales (&result);
return result;
}
static _rw_locale_entry fallback = {
"C", "C", 0
};
result.entries = &fallback;
result.count = 1;
const char* const fname = rw_tmpnam (0);
if (!fname) {
return result;
}
const int status = rw_system ("locale -a > %s", fname);
if (status) {
return result;
}
FILE* file = fopen (fname, "r");
if (file) {
// looks to be the first time, get a list of all locales
const size_t entry_size = sizeof (_rw_locale_entry);
const size_t grow_size = 64;
_rw_locale_entry* entries = 0;
size_t capacity = 0;
size_t size = 0;
// load the native to canonical lookup table
_rw_lookup_table_t languages_map;
_rw_lookup_table_t countries_map;
_rw_lookup_table_t encodings_map;
// fallback for when TOPDIR is unset or empty
char topdir_path_buf [] = __FILE__;
// use TOPDIR to determine the root of the source tree
const char* topdir = getenv (TOPDIR);
if (!topdir || !*topdir) {
// try to get TOPDIR from __FILE__
char* const slash = strrchr (topdir_path_buf, _RWSTD_PATH_SEP);
if (slash) {
slash [-1] = '\0';
topdir = topdir_path_buf;
}
}
if (!topdir || !*topdir) {
rw_error (0, __FILE__, __LINE__,
"the environment variable %s is %s",
TOPDIR, topdir ? "empty" : "undefined");
}
else {
// we should be loading this from some other well
// known path so we don't depend on $TOPDIR. sadly
// __FILE__ is not an absolute path on msvc
char path [PATH_MAX];
strcpy (path, topdir);
strcat (path, SLASH RELPATH SLASH);
// load mapping from local to canonical names
languages_map.load_from_file (path, "languages", -1);
countries_map.load_from_file (path, "countries", 1);
encodings_map.load_from_file (path, "encodings", 1);
}
char locale [128];
while (fgets (locale, int (sizeof (locale)), file)) {
// ensure sufficient space in array
if (! (size < capacity)) {
capacity += grow_size;
_rw_locale_entry* new_entries =
_RWSTD_STATIC_CAST(_rw_locale_entry*,
_QUIET_MALLOC (entry_size * capacity));
if (!new_entries) {
break;
}
memcpy (new_entries, entries, entry_size * size);
// deallocate the old buffer
_QUIET_FREE (entries);
entries = new_entries;
}
// grab entry to update
_rw_locale_entry* const entry = &entries [size];
entry->next = 0;
const size_t len = strlen (locale);
locale [len ? len - 1 : 0] = '\0';
// make sure that the named locale is one that we can use
if (!setlocale (LC_CTYPE, locale)) {
rw_note (0, __FILE__, __LINE__,
"setlocale() failed for '%s'", locale);
continue;
}
// is not an alias for the C or POSIX locale
else if (!strcmp (locale, "C") || !strcmp (locale, "POSIX")) {
continue; // we don't do C/POSIX locale
}
// has a name that is short enough for our buffer
else if (sizeof (entry->locale_name) < len) {
rw_note (0, __FILE__, __LINE__,
"locale name '%s' was to long for fixed buffer",
locale);
continue; // locale name didn't fit, so we skip it
}
#ifndef _RWSTD_NO_LANGINFO
char codeset [40];
int i = 0;
for (const char* charset = nl_langinfo (CODESET);
*charset;
++charset) {
codeset [i++] = _rw_toupper (*charset);
}
codeset [i] = '\0';
#endif
// copy the locale name
strcpy (entry->locale_name, locale);
// attempt to split line into parts
char* extension = strrchr (locale, '@');
if (extension) {
*extension++ = '\0';
}
char* encoding = strrchr (locale, '.');
if (encoding) {
*encoding++ = '\0';
for (int n = 0; encoding [n]; ++n)
encoding [n] = _rw_toupper (encoding [n]);
}
char* country = strrchr (locale, '_');
if (country) {
*country++ = '\0';
for (int n = 0; country [n]; ++n)
country [n] = _rw_toupper (country [n]);
}
char* language = locale;
for (int n = 0; language [n]; ++n)
language [n] = _rw_tolower (language [n]);
// use mapping databases to find the canonical
// names for each part of the locale name
const char* planguage =
languages_map.get_canonical_name (language);
if (!planguage)
planguage = language;
// if country name was provided, then lookup in the country
// mapping. otherwise use language to guess country.
const char* pcountry =
countries_map.get_canonical_name (country);
if (!pcountry)
pcountry = country;
#ifndef _RWSTD_NO_LANGINFO
const char* pencoding =
encodings_map.get_canonical_name (codeset);
if (!pencoding)
pencoding = codeset;
#else
const char* pencoding =
encodings_map.get_canonical_name (encoding);
if (!pencoding)
pencoding = encoding;
#endif
// require all three mappings are valid
if (!planguage || !*planguage) {
//rw_note (0, __FILE__, __LINE__,
// "failed to get language for locale '%s'",
// entry->locale_name);
continue;
}
else if (!pcountry || !*pcountry) {
//rw_note (0, __FILE__, __LINE__,
// "failed to get country for locale '%s'",
// entry->locale_name);
continue;
}
else if (!pencoding || !*pencoding) {
//rw_note (0, __FILE__, __LINE__,
// "failed to get codeset for locale '%s'",
// entry->locale_name);
continue;
}
// the canonical name for lookup
sprintf (entry->canonical_name, "%s-%s-%d-%s",
planguage, pcountry, int (MB_CUR_MAX), pencoding);
//
// eliminate locales that are duplicates according to
// canonical name. we do this because the setlocale()
// doesn't seem to tell us about aliases.
//
bool duplicate = false;
// search backward as matches are more likely to be near
// the back
for (size_t e = size; 0 != e; --e) {
if (!strcmp (entries [e-1].canonical_name,
entry->canonical_name)) {
//rw_note (0, __FILE__, __LINE__,
// "ignoring duplicate locale '%s'",
// entry->locale_name);
duplicate = true;
break;
}
}
if (!duplicate)
size += 1;
}
fclose (file);
// delete temp file
remove (fname);
// link all of the nodes into result
if (size) {
result.entries = entries;
result.count = size;
}
else
_QUIET_FREE (entries);
}
// link each node to the next. if the array is sorted,
// the list will be sorted.
_rw_reset_locales (&result);
return result;
}
_TEST_EXPORT char*
rw_locale_query (int loc_cat, const char* query, size_t wanted)
{
// query format <language>-<COUNTRY>-<MB_CUR_LEN>-<CODESET>
// the null query string will return any locale
if (!query)
query = "*";
if (!wanted)
wanted = _RWSTD_SIZE_MAX;
char buf [256];
// get a brace expanded representation of query, each expansion
// is a null terminated string. the entire buffer is also null
// terminated
char* res = rw_shell_expand (query, 0, buf, sizeof (buf), '\0');
if (!res)
return 0;
// cache the locale name so we can restore later, this must happen
// before _rw_all_locales() because that function just changes the
// locale without restoring it
char save_locale [PATH_MAX];
strcpy (save_locale, setlocale (LC_ALL, 0));
const _rw_locale_array all = _rw_all_locales ();
// make these local and require the user to deallocate
// with free?
static char* string = 0;
static size_t length = 0;
static size_t capacity = 0;
_rw_locale_entry rejects;
rejects.canonical_name [0] = '\0';
rejects.locale_name [0] = '\0';
rejects.next = all.entries;
// for each result locale name
size_t count = 0;
for (const char* name = res; *name; name += strlen (name) + 1)
{
_rw_locale_entry* dummy = &rejects;
// linear search for matches in the reject list
while (dummy->next)
{
// append name to the output buffer
const _rw_locale_entry* entry = dummy->next;
// see if we found a match
if (rw_fnmatch (name, entry->canonical_name, 0)) {
// not a match, advance past it leaving it in the
// rejects list
dummy = dummy->next;
// and move along to next one
continue;
}
// remove the accepted entry from the reject list
// so we will not include it again
dummy->next = entry->next;
// if the user requested locales from a specific category
if (loc_cat != _UNUSED_CAT) {
// make sure that the matching locale has the specified
// locale category and that we can use it.
if (!setlocale (loc_cat, entry->locale_name)) {
// if we can't use it, then bail. this effectively
// removes the locale from the rejects list and
// doesn't add it to the accepted list.
continue;
}
}
const size_t add_length = strlen (entry->locale_name) + 1;
const size_t new_length = length + add_length;
// grow buffer if necessary
if (! (new_length < capacity)) {
while (capacity < new_length)
capacity += 256;
// one additional character for the second null
char* new_string =
_RWSTD_STATIC_CAST(char*, _QUIET_MALLOC (capacity + 1));
if (!new_string) {
// setup to get out of outer loop
count = wanted;
// get out of inner loop
break;
}
memcpy (new_string, string, length);
_QUIET_FREE (string);
string = new_string;
}
// append the name, and update the length
memcpy (string + length, entry->locale_name, add_length);
length = new_length;
count += 1;
if (! (count < wanted))
break;
}
if (! (count < wanted))
break;
}
// restore the previous locale
setlocale (LC_ALL, save_locale);
// deallocate the shell expand buffer if needed
if (res != buf)
free (res);
// double null terminated
if (string) {
string [length+0] = '\0';
string [length+1] = '\0';
}
return string;
}
const char*
_rw_lookup_table_t::get_canonical_name (const char* name) const
{
if (!name)
return 0; // don't search for null string
const _rw_lookup_entry_t entry = { name, 0 };
const _rw_lookup_entry_t* found =
(const _rw_lookup_entry_t*)bsearch (&entry,
entries_,
count_,
sizeof (_rw_lookup_entry_t),
_rw_lookup_comparator);
if (found)
return found->canonical;
return 0;
}
bool
_rw_lookup_table_t::load_from_file (const char* path, const char* name, int upper_or_lower)
{
if (entries_)
return false; // should never happen
// buffer overflow!
char filename [PATH_MAX];
strcpy (filename, path);
strcat (filename, name);
FILE* file = fopen (filename, "rb");
if (file) {
// get the size of the file in bytes
fseek (file, 0, SEEK_END);
const size_t table_data_size = ftell (file);
fseek (file, 0, SEEK_SET);
char* table_data =
(char*)malloc (table_data_size + 1);
if (!table_data) {
fclose (file);
return false;
}
// read the entire file into the data buffer
const size_t bytes_read =
fread (table_data, 1, table_data_size, file);
if (bytes_read != table_data_size) {
free (table_data);
fclose (file);
return false;
}
// null terminate
table_data [bytes_read] = '\0';
const size_t entry_size = sizeof (_rw_lookup_entry_t);
_rw_lookup_entry_t* entries = 0;
size_t capacity = 0;
size_t size = 0;
const char* canonical_name = 0;
for (size_t offset = 0; offset < bytes_read; /**/) {
char* key = table_data + offset;
const size_t len = strcspn (key, "\r\n");
key [len] = '\0';
// skip the newline if it is there
offset += (len + 1);
// special handling for line ends and comments
if (!*key || *key == '\n'
|| *key == '#')
continue;
// make upper or lower case as requested
if (upper_or_lower < 0) {
for (char* s = key; *s; ++s)
*s = _rw_tolower (*s);
}
else if (0 < upper_or_lower) {
for (char* s = key; *s; ++s)
*s = _rw_toupper (*s);
}
// if first character of new line is not whitespace, then we have a new
// canonical name token
if (!_rw_isspace (*key)) {
canonical_name = key;
// increment key past cannonical name
for (/**/; *key; ++key)
if (_rw_isspace (*key))
break;
}
// kill whitespace
while (_rw_isspace (*key))
*key++ = '\0';
// key points to first non-whitespace after canonical name
while (*key) {
// key is first non-whitespace character, which is the
// next native name we should record
const char* native_name = key;
// find first comma character, that is the end of the
// native name
while (*key && *key != ',')
++key;
// if we found a comma, setup next name
if (*key)
*key++ = '\0';
// kill any whitespace before comma
for (char* bey = key - 1; _rw_isspace (*bey); --bey)
*bey = '\0';
// kill whitespace after comma
while (_rw_isspace (*key))
*key++ = '\0';
// ensure we have enough entries
if (! (size < capacity)) {
capacity += 64;
_rw_lookup_entry_t* new_entries =
(_rw_lookup_entry_t*)malloc (entry_size * capacity);
if (!new_entries) {
free (entries);
free (table_data);
fclose (file);
return false;
}
memcpy (new_entries, entries, entry_size * size);
free (entries);
entries = new_entries;
}
// add the new mapping entry
_rw_lookup_entry_t* const entry = &entries [size];
entry->native = native_name;
entry->canonical = canonical_name;
// increment number of entries
size += 1;
}
}
fclose (file);
// sort the entries by native name for efficient searching
qsort (entries, size, entry_size, _rw_lookup_comparator);
// setup the table for return
entries_ = entries;
count_ = size;
table_data_ = table_data;
}
else {
rw_error (0, __FILE__, __LINE__,
"failed to open the file %s", filename);
}
return true;
}