blob: 547e09c470a1d3b3b8cc6a7507d3d6905b0ec08c [file] [log] [blame]
/**********************************************************************
// @@@ START COPYRIGHT @@@
//
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
//
// @@@ END COPYRIGHT @@@
**********************************************************************/
/* -*-C++-*-
*****************************************************************************
*
* File: CharInfo.cpp
* Description: The implementation for the CharInfo class. This class defines
* and provides features/information about character sets,
* collations and coercibility that are supported by SQL/MX.
*
*
* Created: 7/8/98
* Modified: $Date: 2006/11/01 01:38:09 $ (GMT)
* Language: C++
* Status: $State: Exp $
*
*
*
*****************************************************************************
*/
#include "NAWinNT.h"
#include "BaseTypes.h"
#include "charinfo.h"
#include "ComASSERT.h"
#include "ComMPLoc.h"
#include "dfs2rec.h" // for LOCALE stuff: REC_xBYTE_LOCALE_*
#include "SQLCLIdev.h" // for LOCALE stuff: SQLCHARSETCODE_*
#include "str.h"
#include "wstr.h"
#include "SQLTypeDefs.h"
#include "CmpMessage.h"
#include "CmpConnection.h"
#include "CmpContext.h"
#include "CmpCommon.h"
#include "CliSemaphore.h"
using namespace std;
//****************************************************************************
// CHARSET stuff
//****************************************************************************
#define IF_WIDE TRUE
#define IF_NSK FALSE
struct mapCS {
CharInfo::CharSet cs;
const char* name;
size_t namelen; // len(SQLCHARSETSTRING_xxx)
NABoolean supported;
NABoolean fully_supported;
Int32 minBytesPerChar;
Int32 maxBytesPerChar;
const char* replacementChar;
};
// Arranged as an array, starting with CHARSET_MIN and ending with CHARSET_MAX
static const struct mapCS mapCSArray[] = {
// CharSet enum value string value of charset name supp. fully min / max rep.
// len supp. bytes/char char
{ /*-2*/ CharInfo::KSC5601_MP, SQLCHARSETSTRING_KSC5601, 7, IF_NSK, IF_NSK, 2, 2, NULL },
{ /*-1*/ CharInfo::KANJI_MP, SQLCHARSETSTRING_KANJI, 5, IF_NSK, IF_NSK, 2, 2, NULL },
{ /* 0*/ CharInfo::UnknownCharSet, SQLCHARSETSTRING_UNKNOWN, 9, FALSE, FALSE, 1, 1, "?" },
{ /* 1*/ CharInfo::ISO88591, SQLCHARSETSTRING_ISO88591, 8, TRUE, TRUE, 1, 1, "?" },
{ /* 2*/ CharInfo::ISO88592, SQLCHARSETSTRING_ISO88592, 8, FALSE, FALSE, 1, 1, "?" },
{ /* 3*/ CharInfo::ISO88593, SQLCHARSETSTRING_ISO88593, 8, FALSE, FALSE, 1, 1, "?" },
{ /* 4*/ CharInfo::ISO88594, SQLCHARSETSTRING_ISO88594, 8, FALSE, FALSE, 1, 1, "?" },
{ /* 5*/ CharInfo::ISO88595, SQLCHARSETSTRING_ISO88595, 8, FALSE, FALSE, 1, 1, "?" },
{ /* 6*/ CharInfo::ISO88596, SQLCHARSETSTRING_ISO88596, 8, FALSE, FALSE, 1, 1, "?" },
{ /* 7*/ CharInfo::ISO88597, SQLCHARSETSTRING_ISO88597, 8, FALSE, FALSE, 1, 1, "?" },
{ /* 8*/ CharInfo::ISO88598, SQLCHARSETSTRING_ISO88598, 8, FALSE, FALSE, 1, 1, "?" },
{ /* 9*/ CharInfo::ISO88599, SQLCHARSETSTRING_ISO88599, 8, FALSE, FALSE, 1, 1, "?" },
{ /*10*/ CharInfo::SJIS, SQLCHARSETSTRING_SJIS, 4, TRUE, FALSE, 1, 2, "?" },
{ /*11*/ CharInfo::UCS2, SQLCHARSETSTRING_UCS2, 4, IF_WIDE, IF_WIDE,2, 2, "\xff\xfd" },
{ /*12*/ CharInfo::EUCJP, SQLCHARSETSTRING_EUCJP, 5, TRUE, FALSE, 1, 3, "?" },
{ /*13*/ CharInfo::BIG5, SQLCHARSETSTRING_BIG5, 4, TRUE, FALSE, 1, 2, "?" },
{ /*14*/ CharInfo::GB18030, SQLCHARSETSTRING_GB18030, 7, FALSE, FALSE, 1, 4, "?" },
{ /*15*/ CharInfo::UTF8, SQLCHARSETSTRING_UTF8, 4, TRUE, TRUE, 1, 4, "\xef\xbf\xbd" },
{ /*16*/ CharInfo::KSC5601, SQLCHARSETSTRING_MB_KSC5601, 10, TRUE, FALSE, 1, 2, "?" },
{ /*17*/ CharInfo::GB2312, SQLCHARSETSTRING_GB2312, 6, TRUE, FALSE, 1, 2, "?" },
{ /*18*/ CharInfo::GBK, SQLCHARSETSTRING_GBK, 3, TRUE, FALSE, 1, 2, "?" },
};
#define SIZEOF_CS (sizeof(mapCSArray)/sizeof(mapCS))
const char* CharInfo::getCharSetName(CharSet cs, NABoolean retUnknownAsBlank)
{
if (cs >= CHARSET_MIN && cs <= CHARSET_MAX)
{
if (cs != CharInfo::UnknownCharSet)
return mapCSArray[cs-CHARSET_MIN].name;
}
return retUnknownAsBlank ? "" : SQLCHARSETSTRING_UNKNOWN;
}
CharInfo::CharSet CharInfo::getCharSetEnum(const char* name)
{
if (*name == '\0' || *name == ' ') // fastpath: if name is empty
return CharInfo::UnknownCharSet; // or all blanks (begins w/ ' ')
for (size_t i = 0; i < SIZEOF_CS; i++) {
const mapCS *map = &mapCSArray[i];
if (name == map->name) // fastpath: pointers identical
return map->cs;
else {
// Can't use plain old strcmp here, because we want both
// "SJIS" and "SJIS "
// to be matched (see smdio/CmColumnsRow.cpp).
// ##Note that this will fail if given an Ansi delimited identifier
// ##(in a USER-DEFINED collation name, of course) w/ an embedded space!
// ##
// ##As no user-defined collations are allowed in NSK Rel 1,
// ##we are not fixing this now...
//
size_t len = map->namelen;
if (strncmp(name, map->name, len) == 0 &&
(name[len] == '\0' || name[len] == ' ')) //##fails if "My Coll"!
return map->cs;
}
}
// handle alias names for charsets, those are the exception
const char *alias = SQLCHARSETSYNONYM_SQL_TEXT;
size_t aliasLen = strlen(alias);
if (strncmp(name, alias, aliasLen) == 0 &&
(name[aliasLen] == '\0' || name[aliasLen] == ' '))
{
return CharInfo::SQL_TEXT;
}
return CharInfo::UnknownCharSet;
}
NABoolean CharInfo::isCharSetSupported(CharSet cs)
{
if (cs >= CHARSET_MIN && cs <= CHARSET_MAX)
{
// Special for running regress/fullstack/TEST001 on NSK:
#ifdef _DEBUG
if (IF_WIDE == FALSE &&
mapCSArray[cs-CHARSET_MIN].maxBytesPerChar > 1 && // SJIS or UNICODE
getenv("NCHAR_SJIS_DEBUG"))
return TRUE;
#endif
return mapCSArray[cs-CHARSET_MIN].supported;
}
return FALSE;
}
NABoolean CharInfo::isCharSetFullySupported(CharSet cs)
{
if (cs >= CHARSET_MIN && cs <= CHARSET_MAX)
{
// Special for running regress/fullstack/TEST001 on NSK:
#ifdef _DEBUG
if (IF_WIDE == FALSE &&
mapCSArray[cs-CHARSET_MIN].maxBytesPerChar > 1 && // SJIS or UNICODE
getenv("NCHAR_SJIS_DEBUG"))
return TRUE;
#endif
return mapCSArray[cs-CHARSET_MIN].fully_supported;
}
return FALSE;
}
NABoolean CharInfo::isHexFormatSupported(CharSet cs) {
return ( (cs == CharInfo::ISO88591) || (cs == CharInfo::UNICODE) ||
(cs == CharInfo::UTF8) ||
(is_NCHAR_MP(cs))
);
}
NABoolean CharInfo::isTerminalCharSetSupported(CharSet cs) {
return ((cs == CharInfo::ISO88591) || (cs == CharInfo::SJIS) ||
(cs == CharInfo::EUCJP) || (cs == CharInfo::BIG5) ||
(cs == CharInfo::GB18030) || (cs == CharInfo::GB2312) ||
(cs == CharInfo::GBK ) ||
(cs == CharInfo::KSC5601) || (cs == CharInfo::UTF8)) ;
}
NABoolean CharInfo::isMsgCharSetSupported(CharSet cs) {
return ( (cs == CharInfo::UTF8) || (cs == CharInfo::UNICODE) );
}
// see TESTCHARSET in CmpMain.cpp
void CharInfo::toggleCharSetSupport(CharSet cs)
{
#ifdef _DEBUG
size_t i;
for (i = 0; i < SIZEOF_CS; i++)
if (cs == mapCSArray[i].cs)
break;
cerr << "toggleCharSetSupport: " << getCharSetName(cs) << " ";
if (cs == UnknownCharSet || i >= SIZEOF_CS)
cerr << "*not* toggled: "<< (Int32)cs << ", " << i << endl;
else {
cerr << "toggled from " << mapCSArray[i].supported
<< " to " << !mapCSArray[i].supported << endl;
NABoolean *nonconstSupported = (NABoolean *)&mapCSArray[i].supported;
*nonconstSupported = !*nonconstSupported;
}
#endif
}
// for R2 FCS.
CharInfo::CharSet CharInfo::getEncoding(const CharInfo::CharSet x)
{
switch (x)
{
case CharInfo::ISO88591:
case CharInfo::UNICODE:
return x;
break;
case CharInfo::SJIS:
case CharInfo::KANJI_MP:
case CharInfo::KSC5601_MP:
return CharInfo::ISO88591;
break;
default:
return x;
break;
}
}
Int32 CharInfo::minBytesPerChar(CharSet cs)
{
ComASSERT(cs >= CHARSET_MIN && cs <= CHARSET_MAX);
return mapCSArray[cs-CHARSET_MIN].minBytesPerChar;
}
Int32 CharInfo::maxBytesPerChar(CharSet cs)
{
ComASSERT(cs >= CHARSET_MIN && cs <= CHARSET_MAX);
return mapCSArray[cs-CHARSET_MIN].maxBytesPerChar;
}
Int32 CharInfo::getFSTypeFixedChar(CharSet cs)
{
if (cs == UCS2)
return REC_BYTE_F_DOUBLE;
return REC_BYTE_F_ASCII;
}
Int32 CharInfo::getFSTypeANSIChar(CharSet cs)
{
if (cs == UCS2)
return REC_BYTE_V_ANSI_DOUBLE;
return REC_BYTE_V_ANSI;
}
const char* CharInfo::getReplacementCharacter(CharSet cs)
{
ComASSERT(cs >= CHARSET_MIN && cs <= CHARSET_MAX);
return mapCSArray[cs-CHARSET_MIN].replacementChar;
}
NABoolean CharInfo::isVariableWidthMultiByteCharSet(CharSet cs)
{
if (cs >= CHARSET_MIN && cs <= CHARSET_MAX)
{
return (mapCSArray[cs-CHARSET_MIN].minBytesPerChar !=
mapCSArray[cs-CHARSET_MIN].maxBytesPerChar);
}
return FALSE;
}
NABoolean
CharInfo::checkCodePoint(const NAWchar* inputStr, Int32 inputLen, CharInfo::CharSet cs)
{
if (!inputStr || (inputLen <= 0) ) return TRUE;
if (cs == CharInfo::UNICODE) {
for (Int32 i = 0; i < inputLen; i++) {
if (!unicode_char_set::isValidUCS2CodePoint(inputStr[i]))
return FALSE;
}
return TRUE;
}
return FALSE;
}
//****************************************************************************
// COLLATION stuff: CollationInfo methods
//
// The design here, particularly the 4-part name, is because we need to
// allow the Parser to lookup names, and it has no notion of
// applying defaults to a possibly qualified identifier
// (and, builtin/predefined system collations are 1-part and must take
// precedence!). This should probably be changed so that Parser need
// do no lookup at all, that collation names be resolved later, in Binder!
//****************************************************************************
CollationInfo::CollationInfo(CollHeap *h,
CharInfo::Collation co,
const char *name,
CollationFlags flags,
size_t *siz) /* array[SIZEARRAY_SIZE] */
: co_(co), flags_(flags)
{
ComASSERT(name);
namelen_ = strlen(name); // allowed to be 0 if siz[] not passed in
if (siz) {
size_t cnt = siz[0];
ComASSERT(cnt >= 0 && cnt < MAX_NAME_PARTS);
ComASSERT(namelen_ > 0 && namelen_ == siz[1]);
for (size_t off = 0; off < OFFSETARRAY_SIZE; off++) {
synonymOffset_[off] = (off < cnt) ? siz[off+2] : 0;
}
} else
synonymOffset_[0] = synonymOffset_[1] = synonymOffset_[2] = 0;
if (flags_ & NO_ALLOC_AND_COPY_IN_CTOR)
name_ = name;
else {
name_ = new (h) char[namelen_+1];
strcpy((char *)name_, name);
}
}
void CollationInfo::display() const
{
}
//
CollationDB::CollationDB(CollHeap *h)
: CollationDBSupertype(h), heap_(h), refreshNeeded_(TRUE)
{
if (this == CharInfo::builtinCollationDB_) return;
if (cmpCurrentContext != NULL)
cmpCurrentContext->getCollationDBList()->insert(this);
}
CollationDB::CollationDB(CollHeap *h, const CollationInfo *co, size_t count)
: CollationDBSupertype(h), heap_(h), refreshNeeded_(!!count)
{
while (count--) CollationDBSupertype::insert(co++);
if (this == CharInfo::builtinCollationDB_) return;
if (cmpCurrentContext != NULL)
cmpCurrentContext->getCollationDBList()->insert(this);
}
CollationDB::~CollationDB()
{
if (this == CharInfo::builtinCollationDB_) return;
clearAndReset();
cmpCurrentContext->getCollationDBList()->remove(this);
}
void CollationDB::display() const
{
}
void CollationDB::Display()
{
CollationDBList *CDBlist = cmpCurrentContext->getCollationDBList();
CollIndex i, n = CDBlist->entries();
for (i = 0; i < n; i++)
(*CDBlist)[i]->display();
}
//****************************************************************************
// COLLATION stuff: CollationDB data and methods
//
// Collations may be simple 1-part names for system predefined collations;
// they are also allowed to be user-defined, hence qualified names
// (3-part Ansi, or 4-part NSK).
//
// The insert methods and their static data are implemented in
// ../optimizer/SchemaDB.cpp instead of here, because
// one method uses a QualifiedName, which is defined in ../optimizer --
// #include and DLL-link problems occur if we try to implement
// here in ../common.
// The self-maintaining CDB-chain and the static CharInfo::getCollation*()
// caller interface make this work.
//****************************************************************************
inline
CollationDB * CollationDB::nextCDB() const
{
// If this is in the CDB chain [should always be true -- defensive prog'ing],
// return the next CDB in the chain, if there is one.
CollationDBList *CDBlist = cmpCurrentContext->getCollationDBList();
CollIndex i = CDBlist->index((CollationDB *)this);
if (i != NULL_COLL_INDEX) // [defensive prog'ing]
for (CollIndex n = CDBlist->entries(); ++i < n; )
if ((*CDBlist)[i] && (*CDBlist)[i] != this) // [defensive prog'ing]
return (*CDBlist)[i];
return NULL;
}
const CollationInfo* CollationDB::getCollationInfo(CharInfo::Collation co) const
{
CollIndex i, n;
n = entries();
for (i = 0; i < n; i++)
if (co == at(i)->co_)
return at(i);
CollationDB *next = nextCDB();
return next ? next->getCollationInfo(co) : NULL;
}
const char* CollationDB::getCollationName(CharInfo::Collation co,
NABoolean retUnknownAsBlank) const
{
if (co != CharInfo::UNKNOWN_COLLATION) {
const CollationInfo *ci = getCollationInfo(co);
if (ci) return ci->name_;
}
return retUnknownAsBlank ? "" : SQLCOLLATIONSTRING_UNKNOWN;
}
Int32 CollationDB::getCollationFlags(CharInfo::Collation co) const
{
const CollationInfo *ci = getCollationInfo(co);
if (ci) return ci->flags_;
return CollationInfo::ALL_NEGATIVE_SYNTAX_FLAGS;
}
// We need the namlen arg here, unlike CharInfo::getCharSetEnum(),
// because whereas CHARSETs are SQL simple identifiers
// (must begin with a Latin letter, a regular not "delimited" identifier),
// COLLATIONs are SQL identifiers
// (can be delimited and contain spaces).
//
// So we must check that any spaces are *trailing* spaces only.
//
// We can't use plain old strcmp here, because we want both
// "SJIS" and "SJIS "
// to be matched. The public caller CharInfo::getCollationEnum()
// inputs to us a correct namlen in either case (4, for the SJIS example).
//
CharInfo::Collation CollationDB::getCollationEnum(const char* name,
NABoolean formatNSK,
size_t namlen) const
{
ComASSERT(namlen);
CollIndex n = entries();
for (CollIndex i = 0; i < n; i++) {
const CollationInfo *map = at(i);
if (name == map->name_) // fastpath: pointers identical
return map->co_;
// If we want NSK format and this i'th name is not NSK, or
// if we don't want NSK fmt and this name is NSK,
// then skip this name.
// Exception is that the builtin collations are always compared.
if (map->co_ >= CharInfo::FIRST_USER_DEFINED_COLLATION &&
formatNSK XOR HasMPLocPrefix(map->name_))
continue;
size_t off = 0;
for (size_t j = 0; ; j++) {
size_t len = map->namelen_ - off;
if (len == namlen) {
if (strncmp(name, &map->name_[off], len) == 0)
return map->co_;
else break;
}
if (len < namlen) break;
if (j == CollationInfo::OFFSETARRAY_SIZE) break;
off = map->synonymOffset_[j];
if (off == 0) break;
} // loop j
} // loop i
CollationDB *next = nextCDB();
return next ? next->getCollationEnum(name, formatNSK, namlen)
: CharInfo::UNKNOWN_COLLATION;
} // CollationDB::getCollationEnum()
//****************************************************************************
// COLLATION stuff: CharInfo methods
//****************************************************************************
#define STATIC_STR CollationInfo::NO_ALLOC_AND_COPY_IN_CTOR
#define STATIC_NEG CollationInfo::ALL_NEGATIVE_PLUS_STATIC
static const CollationInfo mapCOArray[] = {
CollationInfo(NULL, CharInfo::DefaultCollation, SQLCOLLATIONSTRING_DEFAULT,
STATIC_STR),
CollationInfo(NULL, CharInfo::CZECH_COLLATION, SQLCOLLATIONSTRING_CZECH,
STATIC_STR),
CollationInfo(NULL, CharInfo::CZECH_COLLATION_CI, SQLCOLLATIONSTRING_CZECH_CI,
STATIC_STR),
CollationInfo(NULL, CharInfo::UNKNOWN_COLLATION, SQLCOLLATIONSTRING_UNKNOWN,
STATIC_NEG)
};
#define SIZEOF_CO (sizeof(mapCOArray)/sizeof(CollationInfo))
const CollationDB *CharInfo::builtinCollationDB_ = NULL;
CharInfo::Collation CharInfo::getCollationEnum(const char* name,
NABoolean formatNSK,
size_t namlen)
{
if (namlen == 0)
namlen = strlen(name);
else {
const char *n = &name[namlen-1];
for (;
name < n && (*n == ' ' || *n == '\0');
n--)
;
namlen = (*n == ' ' || *n == '\0') ? 0 : n - name + 1;
}
if (namlen == 0) // fastpath: if name is empty
return CharInfo::UNKNOWN_COLLATION;
// Collapse any nonzero formatNSK to single bit, for XOR
return builtinCollationDB()->getCollationEnum(name, !!formatNSK, namlen);
}
const char* CharInfo::getCollationName(Collation co,
NABoolean retUnknownAsBlank)
{
return builtinCollationDB()->getCollationName(co, retUnknownAsBlank);
}
Int32 CharInfo::getCollationFlags(Collation co)
{
return builtinCollationDB()->getCollationFlags(co);
}
//****************************************************************************
// COERCIBILITY stuff
//****************************************************************************
const char* CharInfo::getCoercibilityText(Coercibility ce)
{
// These are not keywords, not tokens, not part of Ansi syntax.
// They are part of Ansi concepts; cf. Ansi 4.2.3.
switch (ce) {
case IMPLICIT: return "implicit";
case EXPLICIT: return "explicit";
case COERCIBLE: return "coercible";
case NO_COLLATING_SEQUENCE: return "no-collating-sequence";
default: return "unknown";
}
}
// "Which coercibility wins?"
// Returns 0 if they're equal, 1 if the first one wins, 2 if the second.
// This follows the strict Ansi precedence of
// COERCIBLE < IMPLICIT < NO_COLLATING_SEQUENCE < EXPLICIT
//
// ## (As an aside, note that CharType::computeCoAndCo()
// ## could be pulled out into a static CharInfo:: method placed here.)
//
Int32 CharInfo::compareCoercibility(CharInfo::Coercibility ce1,
CharInfo::Coercibility ce2)
{
if (ce1 == ce2) return 0;
if (ce1 == CharInfo::COERCIBLE) return 2; // 1 yields to 2
if (ce2 == CharInfo::COERCIBLE) return 1; // 2 yields to 1
if (ce1 == CharInfo::EXPLICIT) return 1;
if (ce2 == CharInfo::EXPLICIT) return 2;
if (ce1 == CharInfo::NO_COLLATING_SEQUENCE) return 1;
if (ce2 == CharInfo::NO_COLLATING_SEQUENCE) return 2;
ComASSERT(FALSE); // ceN IMPLICIT already handled above!
return -1;
}
//****************************************************************************
// LOCALE stuff
//****************************************************************************
const char* const CharInfo::localeCharSet_ = NULL;
Lng32 CharInfo::findLocaleCharSet()
{
return SQLCHARSETCODE_ISO88591;
}
const char* CharInfo::getLocaleCharSetAsString()
{
if (!localeCharSet_) {
switch ( findLocaleCharSet() ) {
case SQLCHARSETCODE_ISO88591:
return SQLCHARSETSTRING_ISO88591;
case SQLCHARSETCODE_UCS2:
return SQLCHARSETSTRING_UNICODE;
case SQLCHARSETCODE_SJIS:
return SQLCHARSETSTRING_SJIS;
default:
return SQLCHARSETSTRING_UNKNOWN;
}
} else
return localeCharSet_;
return SQLCHARSETSTRING_UNKNOWN;
}
Int32 CharInfo::getTargetCharTypeFromLocale()
{
return REC_SBYTE_LOCALE_F;
}
Int32 CharInfo::getMaxConvertedLenInBytes(CharSet sourceCS,
Int32 sourceLenInBytes,
CharSet targetCS)
{
if (sourceCS == targetCS)
{
// trivial case, no conversion
return sourceLenInBytes;
}
else if (targetCS == UTF8)
{
// optimize some cases where we can exploit knowledge about
// the UTF-8 encoding
if (sourceCS == UCS2)
{
// Worst case is each 2 byte UCS2 char resulting in a 3 byte
// UTF8 char. Note that no UCS2 char requires a 4 byte UTF8
// representation.
// 2 UTF-16 surrogate pairs (4 bytes) will be translated to
// a 4-byte UTF-8 character, which is not the worst case.
return 3 * sourceLenInBytes/2;
}
if (sourceCS == ISO88591)
{
// Worst case is all upper-half ISO characters, which
// result in 2 byte UTF-8 characters. No ISO8859-1 character
// takes up more than 2 bytes in UTF-8.
return 2 * sourceLenInBytes;
}
}
// General case, assume max number of chars in input,
// and all of them convert to the longest output char.
// NOTE: This also works for UTF8 to UCS2 conversions
// and back, even though those conversions really
// treat UCS2 as UTF16.
return ((sourceLenInBytes/minBytesPerChar(sourceCS)) *
maxBytesPerChar(targetCS));
}
const CollationDB *CharInfo::builtinCollationDB()
{
if (CharInfo::builtinCollationDB_ != NULL)
return CharInfo::builtinCollationDB_;
globalSemaphore.get();
if (CharInfo::builtinCollationDB_ != NULL)
{
globalSemaphore.release();
return CharInfo::builtinCollationDB_;
}
CharInfo::builtinCollationDB_ = new CollationDB(NULL, mapCOArray, SIZEOF_CO);
globalSemaphore.release();
return CharInfo::builtinCollationDB_;
}