| /********************************************************************** |
| // @@@ START COPYRIGHT @@@ |
| // |
| // Licensed to the Apache Software Foundation (ASF) under one |
| // or more contributor license agreements. See the NOTICE file |
| // distributed with this work for additional information |
| // regarding copyright ownership. The ASF licenses this file |
| // to you under the Apache License, Version 2.0 (the |
| // "License"); you may not use this file except in compliance |
| // with the License. You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, |
| // software distributed under the License is distributed on an |
| // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| // KIND, either express or implied. See the License for the |
| // specific language governing permissions and limitations |
| // under the License. |
| // |
| // @@@ END COPYRIGHT @@@ |
| **********************************************************************/ |
| /* -*-C++-*- |
| ***************************************************************************** |
| * |
| * File: CharInfo.cpp |
| * Description: The implementation for the CharInfo class. This class defines |
| * and provides features/information about character sets, |
| * collations and coercibility that are supported by SQL/MX. |
| * |
| * |
| * Created: 7/8/98 |
| * Modified: $Date: 2006/11/01 01:38:09 $ (GMT) |
| * Language: C++ |
| * Status: $State: Exp $ |
| * |
| * |
| * |
| ***************************************************************************** |
| */ |
| |
| #include "NAWinNT.h" |
| |
| #include "BaseTypes.h" |
| #include "charinfo.h" |
| #include "ComASSERT.h" |
| #include "ComMPLoc.h" |
| #include "dfs2rec.h" // for LOCALE stuff: REC_xBYTE_LOCALE_* |
| #include "SQLCLIdev.h" // for LOCALE stuff: SQLCHARSETCODE_* |
| #include "str.h" |
| #include "wstr.h" |
| #include "SQLTypeDefs.h" |
| #include "CmpMessage.h" |
| #include "CmpConnection.h" |
| #include "CmpContext.h" |
| #include "CmpCommon.h" |
| |
| |
| |
| #ifdef NA_STD_NAMESPACE |
| using namespace std; |
| #endif |
| |
| //**************************************************************************** |
| // CHARSET stuff |
| //**************************************************************************** |
| |
| |
| |
| #ifdef NA_WIDE_CHARACTER |
| #define IF_WIDE TRUE |
| #else |
| #define IF_WIDE FALSE |
| #endif |
| |
| #define IF_NSK FALSE |
| |
| struct mapCS { |
| CharInfo::CharSet cs; |
| const char* name; |
| size_t namelen; // len(SQLCHARSETSTRING_xxx) |
| NABoolean supported; |
| NABoolean fully_supported; |
| Int32 minBytesPerChar; |
| Int32 maxBytesPerChar; |
| const char* replacementChar; |
| }; |
| |
| // Arranged as an array, starting with CHARSET_MIN and ending with CHARSET_MAX |
| static const struct mapCS mapCSArray[] = { |
| // CharSet enum value string value of charset name supp. fully min / max rep. |
| // len supp. bytes/char char |
| { /*-2*/ CharInfo::KSC5601_MP, SQLCHARSETSTRING_KSC5601, 7, IF_NSK, IF_NSK, 2, 2, NULL }, |
| { /*-1*/ CharInfo::KANJI_MP, SQLCHARSETSTRING_KANJI, 5, IF_NSK, IF_NSK, 2, 2, NULL }, |
| { /* 0*/ CharInfo::UnknownCharSet, SQLCHARSETSTRING_UNKNOWN, 9, FALSE, FALSE, 1, 1, "?" }, |
| |
| { /* 1*/ CharInfo::ISO88591, SQLCHARSETSTRING_ISO88591, 8, TRUE, TRUE, 1, 1, "?" }, |
| { /* 2*/ CharInfo::ISO88592, SQLCHARSETSTRING_ISO88592, 8, FALSE, FALSE, 1, 1, "?" }, |
| { /* 3*/ CharInfo::ISO88593, SQLCHARSETSTRING_ISO88593, 8, FALSE, FALSE, 1, 1, "?" }, |
| { /* 4*/ CharInfo::ISO88594, SQLCHARSETSTRING_ISO88594, 8, FALSE, FALSE, 1, 1, "?" }, |
| { /* 5*/ CharInfo::ISO88595, SQLCHARSETSTRING_ISO88595, 8, FALSE, FALSE, 1, 1, "?" }, |
| { /* 6*/ CharInfo::ISO88596, SQLCHARSETSTRING_ISO88596, 8, FALSE, FALSE, 1, 1, "?" }, |
| { /* 7*/ CharInfo::ISO88597, SQLCHARSETSTRING_ISO88597, 8, FALSE, FALSE, 1, 1, "?" }, |
| { /* 8*/ CharInfo::ISO88598, SQLCHARSETSTRING_ISO88598, 8, FALSE, FALSE, 1, 1, "?" }, |
| { /* 9*/ CharInfo::ISO88599, SQLCHARSETSTRING_ISO88599, 8, FALSE, FALSE, 1, 1, "?" }, |
| { /*10*/ CharInfo::SJIS, SQLCHARSETSTRING_SJIS, 4, TRUE, FALSE, 1, 2, "?" }, |
| { /*11*/ CharInfo::UCS2, SQLCHARSETSTRING_UCS2, 4, IF_WIDE, IF_WIDE,2, 2, "\xff\xfd" }, |
| { /*12*/ CharInfo::EUCJP, SQLCHARSETSTRING_EUCJP, 5, TRUE, FALSE, 1, 3, "?" }, |
| { /*13*/ CharInfo::BIG5, SQLCHARSETSTRING_BIG5, 4, TRUE, FALSE, 1, 2, "?" }, |
| { /*14*/ CharInfo::GB18030, SQLCHARSETSTRING_GB18030, 7, FALSE, FALSE, 1, 4, "?" }, |
| { /*15*/ CharInfo::UTF8, SQLCHARSETSTRING_UTF8, 4, TRUE, TRUE, 1, 4, "\xef\xbf\xbd" }, |
| { /*16*/ CharInfo::KSC5601, SQLCHARSETSTRING_MB_KSC5601, 10, TRUE, FALSE, 1, 2, "?" }, |
| { /*17*/ CharInfo::GB2312, SQLCHARSETSTRING_GB2312, 6, TRUE, FALSE, 1, 2, "?" }, |
| { /*18*/ CharInfo::GBK, SQLCHARSETSTRING_GBK, 3, TRUE, FALSE, 1, 2, "?" }, |
| }; |
| |
| static const size_t SIZEOF_CS = sizeof(mapCSArray)/sizeof(mapCS); |
| |
| const char* CharInfo::getCharSetName(CharSet cs, NABoolean retUnknownAsBlank) |
| { |
| if (cs >= CHARSET_MIN && cs <= CHARSET_MAX) |
| { |
| if (cs != CharInfo::UnknownCharSet) |
| return mapCSArray[cs-CHARSET_MIN].name; |
| } |
| |
| return retUnknownAsBlank ? "" : SQLCHARSETSTRING_UNKNOWN; |
| } |
| |
| CharInfo::CharSet CharInfo::getCharSetEnum(const char* name) |
| { |
| if (*name == '\0' || *name == ' ') // fastpath: if name is empty |
| return CharInfo::UnknownCharSet; // or all blanks (begins w/ ' ') |
| |
| for (size_t i = 0; i < SIZEOF_CS; i++) { |
| const mapCS *map = &mapCSArray[i]; |
| if (name == map->name) // fastpath: pointers identical |
| return map->cs; |
| else { |
| // Can't use plain old strcmp here, because we want both |
| // "SJIS" and "SJIS " |
| // to be matched (see smdio/CmColumnsRow.cpp). |
| // ##Note that this will fail if given an Ansi delimited identifier |
| // ##(in a USER-DEFINED collation name, of course) w/ an embedded space! |
| // ## |
| // ##As no user-defined collations are allowed in NSK Rel 1, |
| // ##we are not fixing this now... |
| // |
| size_t len = map->namelen; |
| if (strncmp(name, map->name, len) == 0 && |
| (name[len] == '\0' || name[len] == ' ')) //##fails if "My Coll"! |
| return map->cs; |
| } |
| } |
| |
| // handle alias names for charsets, those are the exception |
| const char *alias = SQLCHARSETSYNONYM_SQL_TEXT; |
| size_t aliasLen = strlen(alias); |
| |
| if (strncmp(name, alias, aliasLen) == 0 && |
| (name[aliasLen] == '\0' || name[aliasLen] == ' ')) |
| { |
| return CharInfo::SQL_TEXT; |
| } |
| |
| return CharInfo::UnknownCharSet; |
| } |
| |
| NABoolean CharInfo::isCharSetSupported(CharSet cs) |
| { |
| if (cs >= CHARSET_MIN && cs <= CHARSET_MAX) |
| { |
| // Special for running regress/fullstack/TEST001 on NSK: |
| #ifdef NA_DEBUG_C_RUNTIME |
| if (IF_WIDE == FALSE && |
| mapCSArray[cs-CHARSET_MIN].maxBytesPerChar > 1 && // SJIS or UNICODE |
| getenv("NCHAR_SJIS_DEBUG")) |
| return TRUE; |
| #endif |
| return mapCSArray[cs-CHARSET_MIN].supported; |
| } |
| |
| return FALSE; |
| } |
| |
| NABoolean CharInfo::isCharSetFullySupported(CharSet cs) |
| { |
| if (cs >= CHARSET_MIN && cs <= CHARSET_MAX) |
| { |
| // Special for running regress/fullstack/TEST001 on NSK: |
| #ifdef NA_DEBUG_C_RUNTIME |
| if (IF_WIDE == FALSE && |
| mapCSArray[cs-CHARSET_MIN].maxBytesPerChar > 1 && // SJIS or UNICODE |
| getenv("NCHAR_SJIS_DEBUG")) |
| return TRUE; |
| #endif |
| return mapCSArray[cs-CHARSET_MIN].fully_supported; |
| } |
| |
| return FALSE; |
| } |
| |
| NABoolean CharInfo::isHexFormatSupported(CharSet cs) { |
| return ( (cs == CharInfo::ISO88591) || (cs == CharInfo::UNICODE) || |
| (cs == CharInfo::UTF8) || |
| (is_NCHAR_MP(cs)) |
| ); |
| } |
| |
| NABoolean CharInfo::isTerminalCharSetSupported(CharSet cs) { |
| return ((cs == CharInfo::ISO88591) || (cs == CharInfo::SJIS) || |
| (cs == CharInfo::EUCJP) || (cs == CharInfo::BIG5) || |
| (cs == CharInfo::GB18030) || (cs == CharInfo::GB2312) || |
| (cs == CharInfo::GBK ) || |
| (cs == CharInfo::KSC5601) || (cs == CharInfo::UTF8)) ; |
| } |
| |
| NABoolean CharInfo::isMsgCharSetSupported(CharSet cs) { |
| return ( (cs == CharInfo::UTF8) || (cs == CharInfo::UNICODE) ); |
| } |
| |
| //LCOV_EXCL_START :rfi |
| // see TESTCHARSET in CmpMain.cpp |
| void CharInfo::toggleCharSetSupport(CharSet cs) |
| { |
| #ifdef NA_DEBUG_C_RUNTIME |
| size_t i; |
| for (i = 0; i < SIZEOF_CS; i++) |
| if (cs == mapCSArray[i].cs) |
| break; |
| cerr << "toggleCharSetSupport: " << getCharSetName(cs) << " "; |
| if (cs == UnknownCharSet || i >= SIZEOF_CS) |
| cerr << "*not* toggled: "<< (Int32)cs << ", " << i << endl; |
| else { |
| cerr << "toggled from " << mapCSArray[i].supported |
| << " to " << !mapCSArray[i].supported << endl; |
| NABoolean *nonconstSupported = (NABoolean *)&mapCSArray[i].supported; |
| *nonconstSupported = !*nonconstSupported; |
| } |
| #endif |
| } |
| //LCOV_EXCL_STOP |
| |
| // for R2 FCS. |
| CharInfo::CharSet CharInfo::getEncoding(const CharInfo::CharSet x) |
| { |
| switch (x) |
| { |
| case CharInfo::ISO88591: |
| case CharInfo::UNICODE: |
| return x; |
| break; |
| |
| case CharInfo::SJIS: |
| case CharInfo::KANJI_MP: |
| case CharInfo::KSC5601_MP: |
| return CharInfo::ISO88591; |
| break; |
| |
| default: |
| return x; |
| break; |
| } |
| } |
| |
| Int32 CharInfo::minBytesPerChar(CharSet cs) |
| { |
| ComASSERT(cs >= CHARSET_MIN && cs <= CHARSET_MAX); |
| |
| return mapCSArray[cs-CHARSET_MIN].minBytesPerChar; |
| } |
| |
| Int32 CharInfo::maxBytesPerChar(CharSet cs) |
| { |
| ComASSERT(cs >= CHARSET_MIN && cs <= CHARSET_MAX); |
| |
| return mapCSArray[cs-CHARSET_MIN].maxBytesPerChar; |
| } |
| |
| Int32 CharInfo::getFSTypeFixedChar(CharSet cs) |
| { |
| if (cs == UCS2) |
| return REC_BYTE_F_DOUBLE; |
| |
| return REC_BYTE_F_ASCII; |
| } |
| |
| |
| Int32 CharInfo::getFSTypeANSIChar(CharSet cs) |
| { |
| if (cs == UCS2) |
| return REC_BYTE_V_ANSI_DOUBLE; |
| |
| return REC_BYTE_V_ANSI; |
| } |
| |
| const char* CharInfo::getReplacementCharacter(CharSet cs) |
| { |
| ComASSERT(cs >= CHARSET_MIN && cs <= CHARSET_MAX); |
| |
| return mapCSArray[cs-CHARSET_MIN].replacementChar; |
| } |
| |
| NABoolean CharInfo::isVariableWidthMultiByteCharSet(CharSet cs) |
| { |
| if (cs >= CHARSET_MIN && cs <= CHARSET_MAX) |
| { |
| return (mapCSArray[cs-CHARSET_MIN].minBytesPerChar != |
| mapCSArray[cs-CHARSET_MIN].maxBytesPerChar); |
| } |
| |
| return FALSE; |
| } |
| |
| NABoolean |
| CharInfo::checkCodePoint(const NAWchar* inputStr, Int32 inputLen, CharInfo::CharSet cs) |
| { |
| if (!inputStr || (inputLen <= 0) ) return TRUE; |
| |
| if (cs == CharInfo::UNICODE) { |
| for (Int32 i = 0; i < inputLen; i++) { |
| if (!unicode_char_set::isValidUCS2CodePoint(inputStr[i])) |
| return FALSE; |
| } |
| return TRUE; |
| } |
| return FALSE; |
| } |
| |
| |
| |
| //**************************************************************************** |
| // COLLATION stuff: CollationInfo methods |
| // |
| // The design here, particularly the 4-part name, is because we need to |
| // allow the Parser to lookup names, and it has no notion of |
| // applying defaults to a possibly qualified identifier |
| // (and, builtin/predefined system collations are 1-part and must take |
| // precedence!). This should probably be changed so that Parser need |
| // do no lookup at all, that collation names be resolved later, in Binder! |
| //**************************************************************************** |
| |
| CollationInfo::CollationInfo(CollHeap *h, |
| CharInfo::Collation co, |
| const char *name, |
| CollationFlags flags, |
| size_t *siz) /* array[SIZEARRAY_SIZE] */ |
| : co_(co), flags_(flags) |
| { |
| ComASSERT(name); |
| namelen_ = strlen(name); // allowed to be 0 if siz[] not passed in |
| if (siz) { |
| //LCOV_EXCL_START :cnu -- As of 8/30/2011, no support in SQ for Collations |
| size_t cnt = siz[0]; |
| #pragma nowarn(270) // warning elimination |
| ComASSERT(cnt >= 0 && cnt < MAX_NAME_PARTS); |
| #pragma warn(270) // warning elimination |
| ComASSERT(namelen_ > 0 && namelen_ == siz[1]); |
| for (size_t off = 0; off < OFFSETARRAY_SIZE; off++) { |
| synonymOffset_[off] = (off < cnt) ? siz[off+2] : 0; |
| } |
| //LCOV_EXCL_STOP |
| } else |
| synonymOffset_[0] = synonymOffset_[1] = synonymOffset_[2] = 0; |
| if (flags_ & NO_ALLOC_AND_COPY_IN_CTOR) |
| name_ = name; |
| else { |
| name_ = new (h) char[namelen_+1]; |
| strcpy((char *)name_, name); |
| } |
| } |
| |
| |
| |
| |
| //LCOV_EXCL_START :cnu -- As of 8/30/2011, no support in SQ for Collations |
| void CollationInfo::display() const |
| { |
| } |
| //LCOV_EXCL_STOP |
| // |
| CollationDB::CollationDB(CollHeap *h) |
| : CollationDBSupertype(h), heap_(h), refreshNeeded_(TRUE) |
| { |
| if (this == &CharInfo::builtinCollationDB_) return; |
| cmpCurrentContext->getCollationDBList()->insert(this); |
| } |
| |
| CollationDB::CollationDB(CollHeap *h, const CollationInfo *co, size_t count) |
| : CollationDBSupertype(h), heap_(h), refreshNeeded_(!!count) |
| { |
| while (count--) CollationDBSupertype::insert(co++); |
| if (this == &CharInfo::builtinCollationDB_) return; |
| cmpCurrentContext->getCollationDBList()->insert(this); |
| } |
| |
| CollationDB::~CollationDB() |
| { |
| if (this == &CharInfo::builtinCollationDB_) return; |
| clearAndReset(); |
| cmpCurrentContext->getCollationDBList()->remove(this); |
| } |
| |
| |
| //LCOV_EXCL_START :cnu -- As of 8/30/2011, no support in SQ for Collations |
| void CollationDB::display() const |
| { |
| } |
| //LCOV_EXCL_STOP |
| |
| void CollationDB::Display() |
| { |
| CollationDBList *CDBlist = cmpCurrentContext->getCollationDBList(); |
| CollIndex i, n = CDBlist->entries(); |
| for (i = 0; i < n; i++) |
| (*CDBlist)[i]->display(); |
| } |
| |
| //**************************************************************************** |
| // COLLATION stuff: CollationDB data and methods |
| // |
| // Collations may be simple 1-part names for system predefined collations; |
| // they are also allowed to be user-defined, hence qualified names |
| // (3-part Ansi, or 4-part NSK). |
| // |
| // The insert methods and their static data are implemented in |
| // ../optimizer/SchemaDB.cpp instead of here, because |
| // one method uses a QualifiedName, which is defined in ../optimizer -- |
| // #include and DLL-link problems occur if we try to implement |
| // here in ../common. |
| // The self-maintaining CDB-chain and the static CharInfo::getCollation*() |
| // caller interface make this work. |
| //**************************************************************************** |
| |
| inline |
| CollationDB * CollationDB::nextCDB() const |
| { |
| // If this is in the CDB chain [should always be true -- defensive prog'ing], |
| // return the next CDB in the chain, if there is one. |
| CollationDBList *CDBlist = cmpCurrentContext->getCollationDBList(); |
| |
| CollIndex i = CDBlist->index((CollationDB *)this); |
| if (i != NULL_COLL_INDEX) // [defensive prog'ing] |
| for (CollIndex n = CDBlist->entries(); ++i < n; ) |
| if ((*CDBlist)[i] && (*CDBlist)[i] != this) // [defensive prog'ing] |
| return (*CDBlist)[i]; |
| |
| return NULL; |
| } |
| |
| const CollationInfo* CollationDB::getCollationInfo(CharInfo::Collation co) const |
| { |
| CollIndex i, n = entries(); |
| for (i = 0; i < n; i++) |
| if (co == at(i)->co_) |
| return at(i); |
| |
| CollationDB *next = nextCDB(); |
| return next ? next->getCollationInfo(co) : NULL; |
| } |
| |
| const char* CollationDB::getCollationName(CharInfo::Collation co, |
| NABoolean retUnknownAsBlank) const |
| { |
| if (co != CharInfo::UNKNOWN_COLLATION) { |
| const CollationInfo *ci = getCollationInfo(co); |
| if (ci) return ci->name_; |
| } |
| |
| return retUnknownAsBlank ? "" : SQLCOLLATIONSTRING_UNKNOWN; |
| } |
| |
| Int32 CollationDB::getCollationFlags(CharInfo::Collation co) const |
| { |
| const CollationInfo *ci = getCollationInfo(co); |
| if (ci) return ci->flags_; |
| |
| return CollationInfo::ALL_NEGATIVE_SYNTAX_FLAGS; |
| } |
| |
| // We need the namlen arg here, unlike CharInfo::getCharSetEnum(), |
| // because whereas CHARSETs are SQL simple identifiers |
| // (must begin with a Latin letter, a regular not "delimited" identifier), |
| // COLLATIONs are SQL identifiers |
| // (can be delimited and contain spaces). |
| // |
| // So we must check that any spaces are *trailing* spaces only. |
| // |
| // The smdio/CmColumnsRow.cpp and sqlcat/ReadTableDef.cpp |
| // callers do not have a '\0'-terminated string, |
| // hence our needing this length arg to pass in. |
| // |
| // We can't use plain old strcmp here, because we want both |
| // "SJIS" and "SJIS " |
| // to be matched. The public caller CharInfo::getCollationEnum() |
| // inputs to us a correct namlen in either case (4, for the SJIS example). |
| // |
| CharInfo::Collation CollationDB::getCollationEnum(const char* name, |
| NABoolean formatNSK, |
| size_t namlen) const |
| { |
| ComASSERT(namlen); |
| |
| CollIndex n = entries(); |
| for (CollIndex i = 0; i < n; i++) { |
| const CollationInfo *map = at(i); |
| |
| if (name == map->name_) // fastpath: pointers identical |
| return map->co_; |
| |
| // If we want NSK format and this i'th name is not NSK, or |
| // if we don't want NSK fmt and this name is NSK, |
| // then skip this name. |
| // Exception is that the builtin collations are always compared. |
| if (map->co_ >= CharInfo::FIRST_USER_DEFINED_COLLATION && |
| formatNSK XOR HasMPLocPrefix(map->name_)) |
| continue; |
| |
| size_t off = 0; |
| for (size_t j = 0; ; j++) { |
| size_t len = map->namelen_ - off; |
| if (len == namlen) { |
| if (strncmp(name, &map->name_[off], len) == 0) |
| return map->co_; |
| else break; |
| } |
| if (len < namlen) break; |
| if (j == CollationInfo::OFFSETARRAY_SIZE) break; |
| off = map->synonymOffset_[j]; |
| if (off == 0) break; |
| } // loop j |
| |
| } // loop i |
| |
| CollationDB *next = nextCDB(); |
| return next ? next->getCollationEnum(name, formatNSK, namlen) |
| : CharInfo::UNKNOWN_COLLATION; |
| |
| } // CollationDB::getCollationEnum() |
| |
| //**************************************************************************** |
| // COLLATION stuff: CharInfo methods |
| //**************************************************************************** |
| |
| #define STATIC_STR CollationInfo::NO_ALLOC_AND_COPY_IN_CTOR |
| #define STATIC_NEG CollationInfo::ALL_NEGATIVE_PLUS_STATIC |
| static const CollationInfo mapCOArray[] = { |
| CollationInfo(NULL, CharInfo::DefaultCollation, SQLCOLLATIONSTRING_DEFAULT, |
| STATIC_STR), |
| CollationInfo(NULL, CharInfo::CZECH_COLLATION, SQLCOLLATIONSTRING_CZECH, |
| STATIC_STR), |
| CollationInfo(NULL, CharInfo::CZECH_COLLATION_CI, SQLCOLLATIONSTRING_CZECH_CI, |
| STATIC_STR), |
| CollationInfo(NULL, CharInfo::UNKNOWN_COLLATION, SQLCOLLATIONSTRING_UNKNOWN, |
| STATIC_NEG) |
| }; |
| static const size_t SIZEOF_CO = sizeof(mapCOArray)/sizeof(CollationInfo); |
| const CollationDB CharInfo::builtinCollationDB_(NULL, mapCOArray, SIZEOF_CO); |
| |
| CharInfo::Collation CharInfo::getCollationEnum(const char* name, |
| NABoolean formatNSK, |
| size_t namlen) |
| { |
| if (namlen == 0) |
| namlen = strlen(name); |
| else { |
| const char *n = &name[namlen-1]; |
| for (; |
| name < n && (*n == ' ' || *n == '\0'); |
| n--) |
| ; |
| namlen = (*n == ' ' || *n == '\0') ? 0 : n - name + 1; |
| } |
| if (namlen == 0) // fastpath: if name is empty |
| return CharInfo::UNKNOWN_COLLATION; |
| |
| // Collapse any nonzero formatNSK to single bit, for XOR |
| return builtinCollationDB_.getCollationEnum(name, !!formatNSK, namlen); |
| } |
| |
| const char* CharInfo::getCollationName(Collation co, |
| NABoolean retUnknownAsBlank) |
| { |
| return builtinCollationDB_.getCollationName(co, retUnknownAsBlank); |
| } |
| |
| Int32 CharInfo::getCollationFlags(Collation co) |
| { |
| return builtinCollationDB_.getCollationFlags(co); |
| } |
| |
| //**************************************************************************** |
| // COERCIBILITY stuff |
| //**************************************************************************** |
| |
| //LCOV_EXCL_START :cnu -- As of 8/30/2011, no support in SQ for Collations |
| const char* CharInfo::getCoercibilityText(Coercibility ce) |
| { |
| // These are not keywords, not tokens, not part of Ansi syntax. |
| // They are part of Ansi concepts; cf. Ansi 4.2.3. |
| switch (ce) { |
| case IMPLICIT: return "implicit"; |
| case EXPLICIT: return "explicit"; |
| case COERCIBLE: return "coercible"; |
| case NO_COLLATING_SEQUENCE: return "no-collating-sequence"; |
| default: return "unknown"; |
| } |
| } |
| //LCOV_EXCL_STOP |
| |
| // "Which coercibility wins?" |
| // Returns 0 if they're equal, 1 if the first one wins, 2 if the second. |
| // This follows the strict Ansi precedence of |
| // COERCIBLE < IMPLICIT < NO_COLLATING_SEQUENCE < EXPLICIT |
| // |
| // ## (As an aside, note that CharType::computeCoAndCo() |
| // ## could be pulled out into a static CharInfo:: method placed here.) |
| // |
| //LCOV_EXCL_START :cnu -- As of 8/30/2011, no support in SQ for Collations |
| Int32 CharInfo::compareCoercibility(CharInfo::Coercibility ce1, |
| CharInfo::Coercibility ce2) |
| { |
| if (ce1 == ce2) return 0; |
| |
| if (ce1 == CharInfo::COERCIBLE) return 2; // 1 yields to 2 |
| if (ce2 == CharInfo::COERCIBLE) return 1; // 2 yields to 1 |
| |
| if (ce1 == CharInfo::EXPLICIT) return 1; |
| if (ce2 == CharInfo::EXPLICIT) return 2; |
| |
| if (ce1 == CharInfo::NO_COLLATING_SEQUENCE) return 1; |
| if (ce2 == CharInfo::NO_COLLATING_SEQUENCE) return 2; |
| |
| ComASSERT(FALSE); // ceN IMPLICIT already handled above! |
| return -1; |
| } |
| //LCOV_EXCL_STOP |
| |
| |
| //**************************************************************************** |
| // LOCALE stuff |
| //**************************************************************************** |
| |
| const char* const CharInfo::localeCharSet_ = NULL; |
| |
| Lng32 CharInfo::findLocaleCharSet() |
| { |
| return SQLCHARSETCODE_ISO88591; |
| |
| |
| } |
| |
| //LCOV_EXCL_START :cnu -- As of 8/30/2011, only caller is #if'd out |
| const char* CharInfo::getLocaleCharSetAsString() |
| { |
| if (!localeCharSet_) { |
| switch ( findLocaleCharSet() ) { |
| case SQLCHARSETCODE_ISO88591: |
| return SQLCHARSETSTRING_ISO88591; |
| |
| case SQLCHARSETCODE_UCS2: |
| return SQLCHARSETSTRING_UNICODE; |
| |
| case SQLCHARSETCODE_SJIS: |
| return SQLCHARSETSTRING_SJIS; |
| |
| default: |
| return SQLCHARSETSTRING_UNKNOWN; |
| } |
| } else |
| return localeCharSet_; |
| |
| #pragma nowarn(203) // warning elimination |
| return SQLCHARSETSTRING_UNKNOWN; |
| #pragma warn(203) // warning elimination |
| } |
| //LCOV_EXCL_STOP |
| |
| Int32 CharInfo::getTargetCharTypeFromLocale() |
| { |
| |
| |
| return REC_SBYTE_LOCALE_F; |
| } |
| |
| |
| |
| Int32 CharInfo::getMaxConvertedLenInBytes(CharSet sourceCS, |
| Int32 sourceLenInBytes, |
| CharSet targetCS) |
| { |
| if (sourceCS == targetCS) |
| { |
| // trivial case, no conversion |
| return sourceLenInBytes; |
| } |
| else if (targetCS == UTF8) |
| { |
| // optimize some cases where we can exploit knowledge about |
| // the UTF-8 encoding |
| |
| if (sourceCS == UCS2) |
| { |
| // Worst case is each 2 byte UCS2 char resulting in a 3 byte |
| // UTF8 char. Note that no UCS2 char requires a 4 byte UTF8 |
| // representation. |
| // 2 UTF-16 surrogate pairs (4 bytes) will be translated to |
| // a 4-byte UTF-8 character, which is not the worst case. |
| return 3 * sourceLenInBytes/2; |
| } |
| |
| if (sourceCS == ISO88591) |
| { |
| // Worst case is all upper-half ISO characters, which |
| // result in 2 byte UTF-8 characters. No ISO8859-1 character |
| // takes up more than 2 bytes in UTF-8. |
| return 2 * sourceLenInBytes; |
| } |
| } |
| |
| // General case, assume max number of chars in input, |
| // and all of them convert to the longest output char. |
| // NOTE: This also works for UTF8 to UCS2 conversions |
| // and back, even though those conversions really |
| // treat UCS2 as UTF16. |
| return ((sourceLenInBytes/minBytesPerChar(sourceCS)) * |
| maxBytesPerChar(targetCS)); |
| } |