| /********************************************************************** |
| // @@@ START COPYRIGHT @@@ |
| // |
| // Licensed to the Apache Software Foundation (ASF) under one |
| // or more contributor license agreements. See the NOTICE file |
| // distributed with this work for additional information |
| // regarding copyright ownership. The ASF licenses this file |
| // to you under the Apache License, Version 2.0 (the |
| // "License"); you may not use this file except in compliance |
| // with the License. You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, |
| // software distributed under the License is distributed on an |
| // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| // KIND, either express or implied. See the License for the |
| // specific language governing permissions and limitations |
| // under the License. |
| // |
| // @@@ END COPYRIGHT @@@ |
| **********************************************************************/ |
| /* -*-C++-*- |
| ************************************************************************** |
| * |
| * File: NAString.cpp |
| * Description: Utility string functions (basic/exportable). |
| * (See NAString2.cpp for more funx.) |
| * Created: 06/07/94 |
| * Language: C++ |
| * |
| * |
| ************************************************************************** |
| */ |
| |
| #include <ctype.h> |
| #include "BaseTypes.h" |
| // #include "ComAnsiNamePart.h" |
| #include "ComASSERT.h" |
| #include "ComMPLoc.h" |
| #include "ComOperators.h" |
| #include "ComSmallDefs.h" |
| #include "str.h" |
| #include "ComRtUtils.h" |
| #include "sqlcli.h" |
| #include "charinfo.h" |
| #include "csconvert.h" |
| #include "nawstring.h" |
| |
| #define SQLPARSERGLOBALSCMN__INITIALIZE |
| #define SQLPARSERGLOBALS_FLAGS |
| #include "SqlParserGlobals.h" |
| |
| |
| // Include key word header for IsSqlReservedWord(). |
| #include "ComResWords.h" |
| |
| // The timing loop in ToAnsiIdentifier below was run |
| // with this flag (using hashtable to check for reserved keywords) |
| // and without it (using strstr on one long char array to check). |
| // The winner is WITHOUT this flag; the strstr approach has marginally |
| // better average performance than the hashtable, plus it takes only |
| // half the time on a keyword hit, plus there is no initialization cost. |
| // Plus it has a smaller data size and smaller code size (smaller executable |
| // object/less disk space) and less algorithmic complexity. |
| // |
| // quantify'ing arkcmp's compilation of tpc-c queries shows the strstr-based |
| // IsSqlReservedWord taking 4.8% of total arkcmp elapsed time. A binary |
| // search implementation of IsSqlReservedWord shrinks this down to 0.01% |
| // of total arkcmp elapsed time. |
| |
| #include "NAString.h" |
| #include "ComDistribution.h" |
| |
| // Space, dquote, percent, etc, as found in Ansi 5.1, |
| // plus Tdm-extension of backslash. |
| // |
| // The character NON_SQL_TEXT_CHAR ('@', from our .h file) must *not* |
| // appear in this array: this internal special char is used to guarantee |
| // a unique parseable name in internally-generated text |
| // (unique in that it cannot conflict with any externally legal identifier). |
| // |
| static const char specialSQL_TEXT[] = " \"%&'()*+,-./:;<=>?[]_|\\"; |
| |
| #include "ReservedInternalNames.cpp" |
| |
| // ----------------------------------------------------------------------- |
| // The NAString_isoMappingCS memory cache for use by routines |
| // ToInternalIdentifier() and ToAnsiIdentifier[2|3]() in modules |
| // w:/common/NAString[2].cpp. These routines currently cannot |
| // access SqlParser_ISO_MAPPING directly due to the complex |
| // build hierarchy. |
| // ----------------------------------------------------------------------- |
| static THREAD_P SQLCHARSET_CODE NAString_isoMappingCS = SQLCHARSETCODE_UNKNOWN; |
| |
| // ----------------------------------------------------------------------- |
| Lng32 NAString_getIsoMapCS() |
| { |
| if (NAString_isoMappingCS != SQLCHARSETCODE_UNKNOWN) |
| return (Lng32)NAString_isoMappingCS; |
| NAString_isoMappingCS = (SQLCHARSET_CODE)ComRtGetIsoMappingEnum(); |
| return (Lng32)NAString_isoMappingCS; |
| } |
| |
| // ----------------------------------------------------------------------- |
| void NAString_setIsoMapCS(Lng32 isoMappingCS) |
| { |
| // ComASSERT(isoMappingCS == (Lng32)SQLCHARSETCODE_ISO88591 || |
| // isoMappingCS == (Lng32)SQLCHARSETCODE_SJIS || |
| // isoMappingCS == (Lng32)SQLCHARSETCODE_UTF8); |
| NAString_isoMappingCS = (SQLCHARSET_CODE)isoMappingCS; |
| } |
| |
| // ----------------------------------------------------------------------- |
| NABoolean isUpperIsoMapCS(unsigned char c) |
| { |
| { |
| return isUpper8859_1((NAWchar)c); |
| } |
| return FALSE; // dead code |
| } |
| |
| // ----------------------------------------------------------------------- |
| NABoolean isAlphaIsoMapCS(unsigned char c) |
| { |
| { |
| return isAlpha8859_1((NAWchar)c); |
| } |
| return FALSE; // dead code |
| } |
| |
| // ----------------------------------------------------------------------- |
| NABoolean isAlNumIsoMapCS(unsigned char c) |
| { |
| { |
| return isAlNum8859_1((NAWchar)c); |
| } |
| return FALSE; // dead code |
| } |
| |
| // ----------------------------------------------------------------------- |
| void NAStringUpshiftIsoMapCS(NAString &ns) |
| { |
| { |
| ns.toUpper8859_1(); |
| } |
| } |
| |
| // ----------------------------------------------------------------------- |
| // convertNAString() |
| // Note that this allocates memory. |
| // ----------------------------------------------------------------------- |
| char *convertNAString(const NAString& ns, CollHeap *heap, NABoolean wideNull) |
| { |
| size_t len = ns.length(); |
| size_t nullSpaceLen = 0; |
| char* buf; |
| |
| if (wideNull == TRUE) |
| nullSpaceLen = sizeof(NAWchar); |
| else |
| nullSpaceLen = 1; |
| |
| if (heap) |
| buf = new (heap) char[len + nullSpaceLen]; |
| else { |
| buf = new char[len + nullSpaceLen]; |
| #ifndef NDEBUG |
| cerr << "Possible memory leak: convertNAString called with NULL heap\n"; |
| #endif |
| } |
| str_cpy_all(buf, ns.data(), len); |
| if (wideNull == TRUE) |
| ((NAWchar *)buf)[len / sizeof(NAWchar)] = L'\0'; |
| else |
| buf[len] = '\0'; |
| return buf; |
| } |
| |
| // ----------------------------------------------------------------------- |
| // Returns TRUE if the string consists entirely of whitespace |
| // (at least one space or tab, and nothing else), |
| // FALSE if string is empty (null) or contains a non-white character. |
| // ----------------------------------- |
| NABoolean IsNAStringSpace(const NAString& ns) |
| { |
| if (ns.isNull()) |
| return FALSE; |
| return IsNAStringSpaceOrEmpty(ns); |
| } |
| |
| // ----------------------------------- |
| // Returns TRUE if the string consists entirely of whitespace |
| // (zero or more spaces or tabs, and nothing else), including none (empty str). |
| // ----------------------------------------------------------------------- |
| NABoolean IsNAStringSpaceOrEmpty(const NAString& ns) |
| { |
| StringPos len = ns.length(); |
| for (StringPos i = 0; i < len; i++) |
| if (!isSpace8859_1((unsigned char)ns[i])) |
| return FALSE; |
| return TRUE; |
| } |
| |
| // ----------------------------------------------------------------------- |
| // Returns TRUE if the string contains only 7-bit ASCII characters or |
| // if the string is empty. |
| // ----------------------------------------------------------------------- |
| NABoolean NAStringHasOnly7BitAsciiChars(const NAString& ns) |
| { |
| StringPos len = ns.length(); |
| for (StringPos i = 0; i < len; i++) |
| if ( ((unsigned char)ns[i]) > 127 ) |
| return FALSE; |
| return TRUE; |
| } |
| // ----------------------------------------------------------------------- |
| // Returns TRUE if the string contains only 7-bit ASCII characters |
| // between '0' and '9' OR if the string is empty. |
| // ----------------------------------------------------------------------- |
| NABoolean NAStringHasOnlyDecimalDigitAsciiChars(const NAString& ns) |
| { |
| StringPos len = ns.length(); |
| for (StringPos i = 0; i < len; i++) |
| if ( ((unsigned char)ns[i]) < '0' OR |
| ((unsigned char)ns[i]) > '9' ) |
| return FALSE; |
| return TRUE; |
| } |
| |
| // ----------------------------------------------------------------------- |
| // upshift a string (no funny locale stuff, just do it) |
| // ----------------------------------------------------------------------- |
| void NAStringUpshiftASCII(NAString& ns) |
| { |
| ns.toUpper(); |
| } |
| |
| // ----------------------------------------------------------------------- |
| // decode a number from a prefix of an NAString |
| // ----------------------------------------------------------------------- |
| Lng32 NAStringToLong(const NAString &ns) |
| { |
| Lng32 result; |
| sscanf(ns.data(),"%d",&result); |
| return result; |
| } |
| |
| double NAStringToReal(const NAString &ns) |
| { |
| float result; |
| sscanf(ns.data(),"%g",&result); |
| return result; |
| } |
| |
| |
| NAString LongToNAString(Lng32 l) |
| { |
| char resultstr[100]; |
| sprintf(resultstr,"%d",l); |
| return NAString(resultstr); |
| } |
| |
| NAString UnsignedToNAString(UInt32 u) |
| { |
| char resultstr[100]; |
| sprintf(resultstr,"%u",u); |
| return NAString(resultstr); |
| } |
| |
| NAString Int64ToNAString(Int64 l) |
| { |
| char resultstr[100]; |
| convertInt64ToAscii(l, resultstr); |
| return NAString(resultstr); |
| } |
| |
| NAString RealToNAString(double d) |
| { |
| char resultstr[200]; |
| sprintf(resultstr,"%G",d); |
| return NAString(resultstr); |
| } |
| |
| NAString &replaceAll(NAString &source, const NAString &searchFor, |
| const NAString &replaceWith) |
| { |
| size_t indexOfReplace = NA_NPOS; |
| indexOfReplace = source.index(searchFor); |
| if (indexOfReplace != NA_NPOS) |
| { |
| // Replace all occurences of searchFor with replaceWith. When no |
| // more occurences are found or end of string is reached, index() |
| // will return NA_NPOS. |
| while (indexOfReplace != NA_NPOS) |
| { |
| source.replace(indexOfReplace, searchFor.length(), |
| replaceWith); |
| // Find index of next occurence to replace. |
| indexOfReplace = |
| source.index(searchFor, indexOfReplace + replaceWith.length()); |
| } |
| } |
| |
| return source; |
| } |
| |
| // --------------------------------------------------------------------- |
| // Hash function for NAString types in NAKeyLookup |
| // --------------------------------------------------------------------- |
| ULng32 hashKey(const NAString& str) |
| { |
| return str.hash(); |
| } |
| |
| // --------------------------------------------------------------------- |
| // Look up names that start with a '$' or an '=' sign |
| // |
| // Right now, DEFINEs are simulated by environment variables |
| // (that might be useful for an OSS process on NSK as well) |
| // --------------------------------------------------------------------- |
| NAString LookupDefineName(const NAString &ns, NABoolean iterate) |
| { |
| const Int32 itermax = 100; // detect self-referencing env vars |
| Int32 iterlimit = iterate ? itermax : 1; |
| Int32 iterations = 0; |
| |
| NAString delimIdent; |
| const char *defineName = NULL; |
| const char *mappedName = ns.data(); |
| |
| // If the name is like $"abc", convert it to $abc and then do the lookup. |
| if ((mappedName[0] == '$' OR mappedName[0] == '=') AND |
| mappedName[1] == '"') |
| { |
| delimIdent = &mappedName[1]; |
| if (!ToInternalIdentifier(delimIdent, FALSE)) |
| { |
| delimIdent.prepend(mappedName[0]); |
| mappedName = delimIdent.data(); |
| } |
| } |
| |
| while (mappedName AND |
| (mappedName[0] == '$' OR mappedName[0] == '=') AND |
| iterations++ < iterlimit) |
| { |
| defineName = mappedName; |
| mappedName = getenv(&mappedName[1]); |
| } |
| |
| // could raise an exception if iterations >= itermax |
| |
| if (mappedName) |
| return NAString(mappedName); |
| else |
| // couldn't map name, return unresolved name |
| return NAString(defineName); |
| } |
| |
| // --------------------------------------------------------------------- |
| // Convert a NAString member of a QualifiedName, CorrName, or ColRefName |
| // from the canonical internal format required by Binder |
| // into the external delimited-identifier ANSI format. |
| // That is, |
| // A2C returns as A2C |
| // a2c "a2c" |
| // 12C "12C" |
| // A+C "A+C" |
| // A"C" "A""C" |
| // |
| // The required internal format is achieved by Parser (et alia) having |
| // previously called the companion function below, ToInternalIdentifier. |
| // --------------------------------------------------------------------- |
| |
| // look up sqlText in the ReservedWords table; return TRUE iff id is |
| // an ANSI, PotentialANSI, or Tandem reserved word. |
| NABoolean IsSqlReservedWord(const char *sqlText) |
| { |
| return ComResWords::isSqlReservedWord(sqlText,0); |
| } |
| |
| NABoolean IsCIdentifier(const char *id) |
| { |
| // trim whitespace first, if necessary |
| // Note that we allow identifiers starting with an underscore |
| for (size_t i=0; id[i] != 0; i++) |
| { |
| char c = id[i]; |
| |
| if (!(c >= 'A' && c <= 'Z' || |
| c >= 'a' && c <= 'z' || |
| c == '_' || |
| c >= '0' && c <= '9' && i > 0)) |
| return FALSE; |
| } |
| return TRUE; |
| } |
| |
| NABoolean /*NAString::*/setMPLoc() |
| { |
| if (!SqlParser_Initialized() ) |
| return TRUE; |
| else |
| return FALSE; |
| } |
| |
| |
| NAString ToAnsiIdentifier(const NAString &ns, NABoolean assertShort) |
| { |
| size_t nsLen = ns.length(); |
| |
| // Zero-length INTERNAL identifiers are fabricated by Parser and Binder; |
| // they're okay (it's only zero-length EXTERNAL ones that're illegal). |
| if (nsLen == 0) |
| return NAString(); |
| |
| // Assert various checks were previously done when converting the original |
| // external identifier (in some previous call to ToInternalIdentifier). |
| const Int32 SMAX=2048; |
| NAWString internalFormatNameInUCS2; |
| ComAnsiNameToUCS2 ( ns // in - const ComString & internalFormatName |
| , internalFormatNameInUCS2 // out - NAWString & |
| ); |
| if ((Int32) internalFormatNameInUCS2.length() > |
| (Int32) (assertShort ? ComMAX_1_PART_INTERNAL_UCS2_NAME_LEN_IN_NAWCHARS : SMAX)) |
| { |
| ComASSERT(0); |
| return NAString(); |
| } |
| |
| char buf[SMAX]; |
| size_t len; |
| |
| ToAnsiIdentifier3(ns.data(), ns.length(), buf, SMAX, &len, NAString_getIsoMapCS()); |
| if (len == 0) |
| return NAString(); |
| else |
| { |
| const NAString &nas = NAString(buf, len); |
| return nas; |
| } |
| } |
| |
| // --------------------------------------------------------------------- |
| // Helper function for ToInternalIdentifier: |
| // put the integer count of characters scanned into the first character |
| // of the return string. (We know the count will fit into a char, and we |
| // know the string does have a first char (is nonempty), so this is safe.) |
| // SqlParser uses this info for pretty syntax error messaging. |
| // --------------------------------------------------------------------- |
| static Lng32 illegalCharInIdentifier(NAString &ansiIdent, |
| size_t i, size_t countOfRemoved) |
| { |
| ansiIdent[(size_t)0] = i + countOfRemoved; |
| return -3127; |
| } |
| |
| // --------------------------------------------------------------------- |
| // The inverse of ToAnsiIdentifier -- but note that this function is the one |
| // called first, and does some essential checking that the above relies on. |
| // |
| // The purpose of this function is to convert NAStrings containing |
| // Ansi-format regular or delimited identifiers to our internal format |
| // required by Binder (RETDesc) lookups, which is the same format as |
| // in the catalog metadata tables. |
| // |
| // Leading blanks are removed, and then, |
| // if the string begins with a double quote, then this function does: |
| // - there are supposed to be double quotes surrounding the string |
| // and they are removed |
| // - any embedded double quotes (i.e., two consecutive dquotes) |
| // are turned into just one dquote |
| // - silently change tabs to spaces, just as a courtesy |
| // (officially by ANSI, tabs are illegal even in delimited identifiers |
| // because they are not a character in the SQL_TEXT default character set |
| // specification) |
| // - allow all characters to pass in delimited identifiers |
| // except the @ prefix as it being used internally by the compiler |
| // to generate unique table names for internal use. Please look at the |
| // contents of file w:/sqlshare/ReservedInternalNames.cpp for other |
| // prefix strings with embedded @ (e.g., OLD@) that are reserved for |
| // internal use. @ is allowed in delimited identifiers otherwise. |
| // - We now accept ^ in delimited identifiers if it is not a prefix. |
| // We used to disallow ^ unless acceptCircumflex is true. |
| // |
| // If the string does not begin with a double quote, then |
| // - remove trailing blanks (spaces AND tabs) |
| // - verify there are no illegal characters for a REGULAR identifier |
| // - uppercase the contents unless flagged not to |
| // - ensure that no regular identifier matches an Ansi reserved word |
| // |
| // Return value is a SqlCode value (of a message with no parameters) if error, |
| // and zero (0) if no error. |
| // It is caller's job to insert any error condition into a ComDiags. |
| // |
| // Efficiency: this function saves on space at the cost of some time. |
| // The calls to RWCString.remove() probably take linear time as a function |
| // of string length on each call. A faster version of this function |
| // would establish a transformed string in a separate buffer and then |
| // copy it back into the original. |
| // |
| // $$$ Kludge NLS (National Language Support) 7-APR-2007 $$$ We used |
| // to not accept the 7-bit ASCII characters @, /, ^, and \, in ANSI SQL |
| // delimited identifiers specified by customers, but we do allow |
| // many other 8-bit byte values between the two double quotes; |
| // our Japanese customers take advantage of this lack of restriction |
| // and put their Japanese multibyte characters in delimited identifiers. |
| // The MXCMP program treats the indentifier as if it contains a string |
| // of ISO 8859-1 characters. |
| // Note that currently, the target columns in the metadata |
| // tables containing internally-formated identifier has the |
| // CHAR(128) CHARACTER SET ISO88591 data type. |
| // This kludge workaround works for most case, but there are about |
| // 5-10% of the Japanese Shift-JIS characters rejected by MXCMP because |
| // the lower byte of their two-byte multibyte characters contains binary |
| // value equivalent to the representation of 7-bit ASCII characters @, ^, |
| // or \. |
| // |
| // These restrictions have been lifted. \, @, and ^ now can appear |
| // within delimited identifiers. $, @, and ^ reserved for internal |
| // use when they are a prefix. A few other prefixes with @ embedded |
| // are reserved for internal use also. |
| // |
| // We now allow the forward slash ( i.e., / ) character to appear |
| // within a delimited identifier. Character / is guaranteed to be a |
| // standalone one-byte character in Shift-JIS and EUC-JP and any |
| // other character sets that is the supersets of the 7-bit ASCII |
| // character set (e.g., UTF-8). |
| // --------------------------------------------------------------------- |
| Lng32 ToInternalIdentifier( NAString &ansiIdent |
| , Int32 upCase |
| , NABoolean acceptCircumflex // VO: Fix genesis solution 10-040204-2957 |
| , UInt16 pv_flags // call-by-value parameter (pv_)flags |
| ) |
| { |
| size_t i; // unsigned: beware "i--" when (i==0)! |
| |
| // Remove leading blanks (spaces AND tabs). |
| // SqlLexer/Parser do not pass in leading blanks, but we cannot trust |
| // the SchemaDB caller, nor the Catman-constructed-on-the-fly names |
| // of ComAnsiNamePart. |
| // |
| // Lines [RW] fix a RogueWave memory leak in RWCString::operator[]. |
| // |
| const char *sptr = ansiIdent.data(); // [RW] added this line |
| size_t len = ansiIdent.length(); |
| for (i = 0; i < len; i++) |
| if (isSpace8859_1((unsigned char)*sptr)) // [RW] |
| sptr++; |
| else |
| break; |
| if (i) { |
| ansiIdent.remove(0,i); |
| len = ansiIdent.length(); |
| } |
| |
| if (len == 0) |
| return -3004; // An ident must contain at least one character |
| |
| size_t countOfRemoved = i; |
| i = 0; |
| // Handle double quotes or backquotes as delimited identifiers. |
| // Backquotes are used for hive objects. |
| // An error will be returned later if they are used for traf objects. |
| NABoolean isDquote = (ansiIdent[i] == '"'); |
| if ((ansiIdent[i] != '"') && |
| (ansiIdent[i] != '`')) { // REGULAR identifier |
| |
| // ANSI 5.2 SR 13 + 14 and 8.2 SR 3a say that trailing spaces are |
| // insignificant in equality-testing of identifiers, so remove them |
| // (and tabs as well, as a courtesy). |
| // |
| // This loop transforms 'ABC ' into 'ABC' (NOT the same as delimited loop!) |
| // We also know it won't be empty, thanks to the check above. |
| // |
| for (i = len; i > 0; ) { |
| i--; |
| if (!isSpace8859_1((unsigned char)ansiIdent[i])) |
| break; |
| } |
| if (++i < len) { |
| ansiIdent.remove(i); |
| len = ansiIdent.length(); |
| } |
| |
| // ComASSERT(ComGetNameInterfaceCharSet() == SQLCHARSETCODE_UTF8); |
| NABoolean has7BitAsciiCharsOnly = NAStringHasOnly7BitAsciiChars(ansiIdent); |
| NABoolean isLatin1 = FALSE; |
| const Int32 SMAX = 2048; |
| char latin1Buf[SMAX+1]; |
| char *pFirstUntranslatedChar = NULL; |
| if (NOT IsNAStringSpaceOrEmpty(ansiIdent) AND NOT has7BitAsciiCharsOnly) |
| { |
| // Check to see if ansiIdent contains English and Western European characters only (including |
| // those in the upper half of the ISO88591 character set). Note that ansiIdent contains |
| // UTF-8 encoding values. |
| UInt32 outLen = 0; |
| UInt32 translatedCharCount = 0; |
| Int32 retCode = UTF8ToLocale ( cnv_version1 |
| , (const char*) ansiIdent.data() |
| , (const Int32) ansiIdent.length() |
| , (const char*) latin1Buf |
| , (const Int32) SMAX+1 |
| , (cnv_charset) cnv_ISO88591 |
| , (char* &) pFirstUntranslatedChar |
| , (UInt32 *) &outLen // unsigned int *output_data_len_p |
| , (const Int32) TRUE // const int addNullAtEnd_flag |
| , (const Int32) FALSE // const int allow_invalids |
| , (UInt32 *) &translatedCharCount // unsigned int * translated_char_cnt_p |
| , (const char*) NULL // const char *substitution_char_p |
| ); |
| if (retCode == 0) // success - i.e., ansiIdent contains characters that can be support by ISO 8859-1 |
| { |
| isLatin1 = TRUE; |
| len = translatedCharCount; |
| } |
| } |
| if (NOT has7BitAsciiCharsOnly AND NOT isLatin1) |
| { |
| // Regular identifiers can contains characters supported by ISO 8859-1 standard only. |
| size_t pos = 0; |
| if (pFirstUntranslatedChar != NULL AND (pFirstUntranslatedChar - ansiIdent.data()) > 0) |
| pos = pFirstUntranslatedChar - ansiIdent.data(); |
| return illegalCharInIdentifier(ansiIdent, pos, countOfRemoved); |
| } |
| |
| // First character of a regular identifier must be alpha |
| // (or '\' or '$' if NSK name). |
| // (User-input names under ANSI or SHORTANSI do not allow NSK format.) |
| // |
| i = 0; |
| unsigned char c = (unsigned char)ansiIdent[i]; |
| if (isLatin1) |
| c = (unsigned char)latin1Buf[i]; |
| |
| if (isAlphaIsoMapCS(c) || |
| (c == '$' && ((pv_flags & NASTRING_REGULAR_IDENT_WITH_DOLLAR_PREFIX) != 0))) { |
| |
| // Subsequent characters must be alphanumeric or the underscore. |
| // |
| while (++i < len) { |
| if (isLatin1) |
| c = (unsigned char)latin1Buf[i]; |
| else |
| c = (unsigned char)ansiIdent[i]; |
| if (NOT isAlNumIsoMapCS(c) && c != '_') { |
| return illegalCharInIdentifier(ansiIdent, i, countOfRemoved); |
| } |
| } |
| |
| if (upCase) { |
| if (isLatin1) |
| { |
| NAString ns = latin1Buf; |
| ns.toUpper8859_1(); |
| memcpy(latin1Buf, ns.data(), ns.length()+1); |
| } |
| else |
| NAStringUpshiftIsoMapCS(ansiIdent); |
| } |
| |
| // Reserved words cannot be regular identifiers |
| // |
| if (IsSqlReservedWord(ansiIdent)) |
| { |
| return -3128; |
| } |
| } else if ((c == '\\') || |
| (c == '$' && ((pv_flags & NASTRING_ALLOW_NSK_GUARDIAN_NAME_FORMAT) != 0) |
| && ((pv_flags & NASTRING_DELIM_IDENT_WITH_DOLLAR_PREFIX) == 0))) { |
| // ComASSERT(NAStringHasOnly7BitAsciiChars(ansiIdent) AND NOT isLatin1); |
| // For now, allow Guardian style names in ANSI mode as well. This was the |
| // old behavior since this method was not being called for Guardian names. |
| // The MX Reference manual is also a bit ambiguous in this regard. |
| // Need to get a resolution on this issue soon. RMW 11-20-2000. |
| // |
| // } else if ((c == '\\' || c == '$') && |
| // (!SqlParser_Initialized() || SqlParser_NAMETYPE == DF_NSK)) { |
| // |
| // User can enter \ or $ at beginning of ident *only* if NAMETYPE NSK; |
| // if SHORTANSI, their input is Ansi only (no \ or $), |
| // and it is only the *output*, after SHORTANSI name resolution, |
| // that may contain \ and $. |
| // |
| //## We should really call the left-to-right ComMPLoc ctor here, |
| //## allowing only a valid MP format |
| //## (one of ComMPLoc::SYS, VOL, or FILE). |
| // |
| |
| // Allow the patterns: |
| // \[a-zA-Z][a-zA-Z0-9]*.$[a-zA-Z][a-zA-Z0-9]* |
| // $[a-zA-Z][a-zA-Z0-9]* |
| // |
| i++; |
| if (c == '\\') { |
| |
| // Must start with an ascii char. |
| // |
| if((i >= len) || (NOT isAlphaIsoMapCS((unsigned char)ansiIdent[i++]))) { |
| return illegalCharInIdentifier(ansiIdent, i - 1, countOfRemoved); |
| } |
| |
| while ((i < len) && isAlNumIsoMapCS((unsigned char)ansiIdent[i])) { |
| i++; |
| } |
| |
| // Expecting a ".$" |
| // |
| if((i >= len) || ((unsigned char)ansiIdent[i++] != '.')) { |
| return illegalCharInIdentifier(ansiIdent, i - 1, countOfRemoved); |
| } |
| |
| if((i >= len) || ((unsigned char)ansiIdent[i++] != '$')) { |
| return illegalCharInIdentifier(ansiIdent, i - 1, countOfRemoved); |
| } |
| } |
| |
| // After the '$' |
| // |
| while (i < len) { |
| if (NOT isAlNumIsoMapCS((unsigned char)ansiIdent[i++])) { |
| return illegalCharInIdentifier(ansiIdent, i - 1, countOfRemoved); |
| } |
| } |
| |
| // Note that for NSK style names, it is not necessary to call |
| // IsSqlReservedWord() since there are no reserved identifiers |
| // starting with \ or $. |
| // |
| if (upCase) { |
| NAStringUpshiftIsoMapCS(ansiIdent); |
| } |
| |
| } else { |
| // Invalid first character. |
| // |
| return illegalCharInIdentifier(ansiIdent, i, countOfRemoved); |
| } |
| |
| if (isLatin1) |
| { |
| char utf8Buf[SMAX+1]; |
| char * p1stUnstranslatedChar = NULL; |
| UInt32 utf8StrLenInBytes = 0; |
| UInt32 charCount = 0; |
| Int32 returnCode = LocaleToUTF8(cnv_version1 |
| , (const char*) latin1Buf |
| , (const Int32) len |
| , (const char*) utf8Buf |
| , (const Int32) SMAX+1 |
| , cnv_ISO88591 |
| , p1stUnstranslatedChar // char * & first_untranslated_char |
| , &utf8StrLenInBytes // unsigned int * output_data_len_p |
| , (const Int32)TRUE // const int addNullAtEnd_flag |
| , &charCount // unsigned int * translated_char_cnt_p |
| ); |
| // Note that utf8StrLenInBytes includes the NULL terminator added at the end |
| // (addNullAtEnd_flag was set to TRUE in the above call). |
| // ComASSERT(returnCode == 0); |
| ansiIdent = utf8Buf; |
| } |
| |
| } // end REGULAR identifier |
| else { |
| |
| UInt32 state = (isDquote ? 1 : 3); |
| ansiIdent.remove(0,1); // remove initial dquote |
| countOfRemoved++; |
| |
| const char *sptr = ansiIdent.data(); |
| |
| len = ansiIdent.length(); |
| if (len <= 1) // A delimited ident must contain at least one character |
| return -3004; // plus an ending double-quote. |
| i = 0; |
| unsigned char c = (unsigned char)ansiIdent[i]; |
| |
| // Kludge NLS Notes: When \ character is the first character in a |
| // Japanese multibyte identifier, it is really |
| // the standalone one-byte character and is not |
| // the lower byte of a multibyte character. In |
| // Shift-JIS character set, the \ backslash is |
| // actually displayed as and representing the |
| // Japanese Yen money unit symbol. |
| // |
| // The $ character is guaranteed to be the |
| // one-byte standalone character in Shift-JIS |
| // and all other character sets that are |
| // supersets of the 7-bit ASCII character set. |
| |
| // "\SYS.$VOL" -- special handling because '\' and '$' are not Ansi-special |
| |
| if ((c == '\\') || |
| (c == '$' && ((pv_flags & NASTRING_ALLOW_NSK_GUARDIAN_NAME_FORMAT) != 0) |
| && ((pv_flags & NASTRING_DELIM_IDENT_WITH_DOLLAR_PREFIX) == 0))) { |
| // For now, allow Guardian stlye names in ANSI mode as well. This was the |
| // old behavior since this method was not being called for Guardian names. |
| // The MX Reference manual is also a bit ambiguous in this regard. |
| // Need to get a resolution on this issue soon. RMW 11-20-2000. |
| // |
| // if ((c == '\\' || c == '$') && |
| // (!SqlParser_Initialized() || SqlParser_NAMETYPE == DF_NSK)) { |
| // |
| // User can enter \ or $ at beginning of ident *only* if NAMETYPE NSK; |
| // if SHORTANSI, their input is Ansi only (no \ or $), |
| // and it is only the *output*, after SHORTANSI name resolution, |
| // that may contain \ and $. |
| // |
| //## We should really call the left-to-right ComMPLoc ctor here, |
| //## allowing only a valid MP format |
| //## (one of ComMPLoc::SYS, VOL, or FILE). |
| // |
| |
| // Allow the patterns: |
| // \[A-Z][A-Z0-9]*.$[A-Z][A-Z0-9]* |
| // $[A-Z][A-Z0-9]* |
| // |
| i++; |
| if (c == '\\') { |
| |
| // Must start with an ascii char. |
| // |
| if((i >= len) || (NOT isUpperIsoMapCS((unsigned char)ansiIdent[i++]))) { |
| return illegalCharInIdentifier(ansiIdent, i - 1, countOfRemoved); |
| } |
| |
| while ((i < len) && |
| (isUpperIsoMapCS((unsigned char)ansiIdent[i]) || |
| isDigit8859_1((unsigned char)ansiIdent[i]))) { |
| i++; |
| } |
| |
| // Expecting a ".$" |
| // |
| if((i >= len) || ((unsigned char)ansiIdent[i++] != '.')) { |
| return illegalCharInIdentifier(ansiIdent, i - 1, countOfRemoved); |
| } |
| |
| if((i >= len) || ((unsigned char)ansiIdent[i++] != '$')) { |
| return illegalCharInIdentifier(ansiIdent, i - 1, countOfRemoved); |
| } |
| } |
| |
| // After the '$' |
| // |
| while ((i < len) && |
| (isUpperIsoMapCS((unsigned char)ansiIdent[i]) || |
| isDigit8859_1((unsigned char)ansiIdent[i]))) { |
| i++; |
| } |
| |
| // Expecting a '"' character in the last position. |
| // |
| if (((unsigned char)ansiIdent[i] != '"') || (i != len - 1)) { |
| return illegalCharInIdentifier(ansiIdent, i, countOfRemoved); |
| } |
| |
| ansiIdent.remove(i, 1); |
| countOfRemoved++; |
| |
| } else |
| { |
| //## [RW memleak -- should replace ansiIdent[i] with sptr references, |
| //## refreshing sptr after every remove() or other length modification..] |
| while (i < ansiIdent.length()) { |
| |
| unsigned char c = (unsigned char)ansiIdent[i]; |
| |
| if (i == 0) { // first character |
| if ( ( c == '^' AND NOT acceptCircumflex ) |
| // ---- Allow $ to appear in delimited identifers to support routine action names. |
| // OR ( c == '$' AND ((pv_flags & NASTRING_DELIM_IDENT_WITH_DOLLAR_PREFIX) == 0) ) |
| ) |
| return illegalCharInIdentifier(ansiIdent, i, countOfRemoved); |
| if ( NOT Get_SqlParser_Flags(ALLOW_FUNNY_IDENTIFIER) AND |
| isDelimitedIdentifierReservedForInternalUse(ansiIdent.data(), |
| ansiIdent.length()) ) { |
| for ( ; i <= ansiIdent.length(); i++ ) { // look for the first '@' |
| if ( ansiIdent[i] == NON_SQL_TEXT_CHAR ) |
| break; |
| } // for |
| return illegalCharInIdentifier(ansiIdent, i, countOfRemoved); |
| } // if funny identifier not allowed and specified name is funny |
| } // if is first character in name |
| |
| // Notes: The following logic will not mess up our |
| // Japanese customer's ANSI SQL names. All 32 |
| // control characters, i.e., single-byte values |
| // ranges from 0x00 through 0x1F inclusively, are |
| // standalone characters in Shift-JIS character set |
| // and any character sets that are supersets of the |
| // 7-bit ASCII character set. The Tab character is |
| // a control character. Just keep the current |
| // behavior unless our Japanese customers complain |
| // about this "tab to space" conversion. |
| // |
| if (isSpace8859_1(c) && c != ' ') { |
| ansiIdent[i] = ' '; // tab becomes space |
| c = ' '; // tab is now space |
| } |
| |
| if (NOT isAlNumIsoMapCS(c)) { |
| // |
| // Notes: '/' is guaranteed to always be a |
| // single-byte standalone character |
| // in any multibyte character sets |
| // so it is okay to disallow it; our |
| // Japanese customer will not complain. |
| // |
| // JC: Fix genesis solution 10-040304-3817 |
| // Don't allow '/' in a delimited identifier |
| // |
| // ### SAP POC ### 11/21/2008 ### BEGIN |
| // We now accept the forward slash in delimited names as |
| // required by SAP POC. Comment out the following 3 lines of code. |
| // |
| // ### if (c == '/') { |
| // ### return illegalCharInIdentifier(ansiIdent, i, countOfRemoved); |
| // ### } |
| // |
| // ### SAP POC ### 11/21/2008 ### END |
| // |
| // Notes: The restriction of @ and \ in |
| // delimited names has been loosen. |
| // |
| } // if (NOT isAlNumIsoMapCS(c)) |
| |
| switch (state) { |
| case 1: |
| if (c == '"') { |
| ansiIdent.remove(i,1); |
| countOfRemoved++; |
| state = 2; |
| } else |
| i++; |
| break; |
| case 2: |
| if (c == '"') |
| state = 1; |
| else if (c != ' ') // tab became space |
| return illegalCharInIdentifier(ansiIdent, i, countOfRemoved); |
| i++; |
| break; |
| case 3: |
| if (c == '`') { |
| ansiIdent.remove(i,1); |
| countOfRemoved++; |
| state = 4; |
| } else |
| i++; |
| break; |
| case 4: |
| if (c == '`') |
| state = 3; |
| else if (c != ' ') // tab became space |
| return illegalCharInIdentifier(ansiIdent, i, countOfRemoved); |
| i++; |
| break; |
| default: |
| ComASSERT(FALSE); |
| } // switch |
| } // while |
| |
| if ((isDquote && (state != 2)) || |
| (NOT isDquote && (state != 4))) |
| return illegalCharInIdentifier(ansiIdent, i, countOfRemoved); |
| |
| // ANSI 5.2 SR 13 + 14 and 8.2 SR 3a say that trailing spaces |
| // are insignificant in equality-testing of identifiers, so |
| // remove them. NB: length() and resize(i) have i one greater |
| // than operator[i] positions. |
| // |
| // This loop transforms '" ABC " ' into ' ABC'. |
| // We must check that '" " ' is rejected as an empty string. //" |
| // |
| NABoolean empty = TRUE; |
| for (i = ansiIdent.length(); i > 0; ) { |
| --i; |
| if (ansiIdent[i] != ' ') { // tab became space |
| empty = FALSE; |
| break; |
| } |
| } |
| if (empty) |
| return -3004; // A delimited ident must contain at least one character |
| ansiIdent.resize(++i); |
| } |
| |
| } // end DELIMITED identifier |
| |
| if (!Get_SqlParser_Flags(ALLOW_FUNNY_IDENTIFIER)) |
| { |
| if (ansiIdent.length() > ComMAX_1_PART_INTERNAL_UTF8_NAME_LEN_IN_BYTES) |
| return -3118; // Identifier too long. |
| |
| // allocate plenty of room to avoid buffer overrun |
| NAWchar internalNameInUCS2[ComMAX_1_PART_INTERNAL_UTF8_NAME_LEN_IN_BYTES + 1 + 16]; |
| internalNameInUCS2[0] = NAWCHR('\0'); |
| Int32 iErrorCode = |
| ComAnsiNameToUCS2 ( (const char *) ansiIdent.data() // in - const char * |
| , (NAWchar *) internalNameInUCS2 // out - NAWchar * outBuf |
| , (Int32) (ComMAX_1_PART_INTERNAL_UTF8_NAME_LEN_IN_BYTES + 1 + 8) // in - outBufSizeInNAWchars |
| , FALSE // do not fill the remainder of the output buffer with spaces |
| ); |
| if (iErrorCode != 0 || NAWstrlen(internalNameInUCS2) == 0) |
| return -13001; // An internal error occurred. The SQL statement could not be translated. |
| if (NAWstrlen(internalNameInUCS2) > ComMAX_1_PART_INTERNAL_UCS2_NAME_LEN_IN_NAWCHARS) |
| return -3118; // Identifier too long. |
| } |
| |
| return 0; // no error |
| |
| } // ToInternalIdentifier |
| |
| // --------------------------------------------------------------------- |
| // Converted the external-format (quoted) string literal used by the |
| // user to to the internal-format string used by the user. This routine |
| // assumes that the syntax of the input external-format string literal |
| // is already valid so it does not perform any checking |
| // --------------------------------------------------------------------- |
| #if 0 /* Needed for possible future enhancement -- see caller in CatRoutinePassThroughParamList.cpp */ |
| void ToInternalString(NAString &internalStr, const NAString "edStr) |
| { |
| const char *extStr = quotedStr.data(); |
| ComASSERT(strlen(extStr) >= 2 AND |
| extStr[0] EQU '\'' AND |
| extStr[strlen(extStr) - 1] EQU '\''); |
| internalStr = ""; |
| if (strlen(extStr) EQU 2) return; |
| for (StringPos i = 1, j = 0; i < strlen(extStr) - 1; i++, j++) |
| { |
| internalStr[j] = extStr[i]; |
| if (internalStr[j] EQU '\'') |
| { |
| i++; |
| ComASSERT(extStr[i] EQU '\''); |
| } |
| } |
| } |
| #endif |
| |
| // --------------------------------------------------------------------- |
| // Converted the internal-format string literal used by the parser to |
| // the external-format (quoted) string used by the user. This routine |
| // assumes that the syntax of the input internal-format string is |
| // already valid so it does not perform any checking. The default behavior |
| // is to turn each single-quote (') into a double single-quote ('') and enclose |
| // the entire string in single quotes ('....'). Pass in FALSE as the third |
| // parameter to duplicate existing single quotes without enclosing the entire |
| // string in single quotes. |
| // --------------------------------------------------------------------- |
| void ToQuotedString( NAString "edStr |
| , const NAString &internalStr |
| , NABoolean encloseInQuotes ) |
| { |
| if (encloseInQuotes) quotedStr = '\''; |
| |
| for (StringPos i = 0; i < internalStr.length(); i++) |
| { |
| quotedStr += internalStr[i]; |
| if (internalStr[i] EQU '\'') quotedStr += '\''; |
| } |
| if (encloseInQuotes) quotedStr += '\''; |
| } |
| |
| // --------------------------------------------------------------------- |
| // bsearchStrcmp() is used by bsearch() within tokIsFuncOrParenKeyword() |
| // to compare two strings. |
| static Int32 bsearchStrcmp(const void *s1, const void *s2) |
| { |
| return (strcmp((char*)s1, *((char**)s2))); |
| } |
| |
| // Used by PrettifySqlText() -- depends on its having upcased unquoted tokens. |
| static NABoolean tokIsFuncOrParenKeyword(const NAString &sqlText, |
| size_t pos, size_t prevpos) |
| { |
| NAString tok(" "); // space in front |
| tok += &sqlText.data()[pos]; |
| ComASSERT(tok[tok.length()-1] == ' '); // and space after |
| |
| if (tok == " AND " || tok == " OR ") |
| return FALSE; |
| |
| // Derived table correlation names are not keywords, but we want to treat |
| // them like a paren-keyword (no space between word and lparen): |
| // SELECT COUNT(*) FROM (SELECT ...) corr(colRename,...) |
| // SELECT COUNT(*) FROM (SELECT ...) AS corr(colRename,...) |
| // But we want a space in this context: |
| // CREATE VIEW vw(col) AS (SELECT ...); |
| if (pos >= 2 && sqlText[pos - 1] == ' ') |
| { |
| if (sqlText[pos - 2] == ')' && tok != " FROM " && tok != " AS ") |
| return TRUE; |
| if (pos >= 5) |
| if (sqlText[pos - 5] == ')' && |
| sqlText[pos - 4] == ' ' && |
| sqlText[pos - 3] == 'A' && |
| sqlText[pos - 2] == 'S') |
| return TRUE; |
| } |
| |
| // Ansi reserved-word function names, tandem-extensions, and other keywords. |
| // These must be in alphabetical order. Order is checked for DEBUG builds. |
| // There must also be a trailing space for each keyword. |
| // Stored procedure names (e.g. EXPLAIN) are deliberately not in this list |
| // nor are such tokens as CHECK, PRIMARY KEY, REFERENCES, VALUES, ... |
| // Some expression names (e.g. CASE, COALESCE, NULLIF) are. |
| // |
| static const char *keywords[] = |
| { |
| "ABS ", // Tandem-extension |
| "ACOS ", // Tandem-extension |
| "ASC ", // Collation name |
| "ASCII ", // Tandem-extension |
| "ASIN ", // Tandem-extension |
| "ATAN ", // Tandem-extension |
| "ATAN2 ", // Tandem-extension |
| "AVG ", // ANSI |
| "BIT ", // Datatype with scales/precisions/length |
| "BIT_LENGTH ", // ANSI |
| "CASE ", // ANSI |
| "CAST ", // ANSI |
| "CEILING ", // Tandem-extension |
| "CHAR ", // Datatype with scales/precisions/length |
| "CHARACTER ", // Datatype with scales/precisions/length |
| "CHARACTER_LENGTH ", // ANSI |
| "CHAR_LENGTH ", // ANSI |
| "COALESCE ", // ANSI |
| "CODE_VALUE ", // Tandem-extension |
| "CONCAT ", // Tandem-extension |
| "CONVERT ", // ANSI |
| "CONVERTFROMHEX ", // Tandem-extension |
| "CONVERTTIMESTAMP ", // Tandem-extension |
| "CONVERTTOHEX ", // Tandem-extension |
| "COS ", // Tandem-extension |
| "COSH ", // Tandem-extension |
| "COUNT ", // ANSI |
| "CRC32 ", // Trafodion extension |
| "CURDATE ", // Tandem-extension |
| "CURRENT ", // ANSI |
| "CURRENT_DATE ", // ANSI |
| "CURRENT_TIME ", // ANSI |
| "CURRENT_TIMESTAMP ", // ANSI |
| "CURRENT_USER ", // ANSI |
| "CURTIME ", // Tandem-extension |
| "DATEFORMAT ", // Tandem-extension |
| "DAY ", // Datatype with scales/precisions/length |
| "DAYNAME ", // Tandem-extension |
| "DAYOFMONTH ", // Tandem-extension |
| "DAYOFWEEK ", // Tandem-extension |
| "DAYOFYEAR ", // Tandem-extension |
| "DEC ", // Datatype with scales/precisions/length |
| "DECIMAL ", // Datatype with scales/precisions/length |
| "DEGREES ", // Tandem-extension |
| "DESC ", // Collation name |
| "ENCODE_KEY ", // Tandem-extension |
| "EXP ", // Tandem-extension |
| "EXTEND ", // ANSI |
| "EXTERNAL ", // Collation name |
| "EXTRACT ", // ANSI |
| "FIRSTDAYOFYEAR ", // Tandem-extension |
| "FLOAT ", // Datatype with scales/precisions/length |
| "FLOOR ", // Tandem-extension |
| "GROUP_CONCAT", // MySQL-extension |
| "HASHPARTFUNC ", // Tandem-extension |
| "HOUR ", // Datatype with scales/precisions/length |
| "JSON_OBJECT_FIELD_TEXT" //json_object_field_text |
| "JULIANTIMESTAMP ", // Tandem-extension |
| "LCASE ", // Tandem-extension |
| "LOCATE ", // Tandem-extension |
| "LOG ", // Tandem-extension |
| "LOG10 ", // Tandem-extension |
| "LOWER ", // ANSI |
| "LPAD ", // Tandem-extension |
| "LTRIM ", // Tandem-extension |
| "MAX ", // ANSI |
| "MD5 ", // Trafodion extension |
| "MIN ", // ANSI |
| "MINUTE ", // Datatype with scales/precisions/length |
| "MOD ", // Tandem-extension |
| "MONTH ", // Datatype with scales/precisions/length |
| "MONTHNAME ", // Tandem-extension |
| "NCHAR ", // Datatype with scales/precisions/length |
| "NOW ", // Tandem-extension |
| "NULLIF ", // ANSI |
| "NUMERIC ", // Datatype with scales/precisions/length |
| "OCTET_LENGTH ", // ANSI |
| "OS_USERID ", // Tandem-extension |
| "PI ", // Tandem-extension |
| "PIC 9 ", // Cobol datatype directly supported by SQLMX DDL |
| "PICTURE 9 ", // Cobol datatype directly supported by SQLMX DDL |
| "POSITION ", // ANSI |
| "POWER ", // Tandem-extension |
| "QUARTER ", // Tandem-extension |
| "RADIANS ", // Tandem-extension |
| "RAND ", // Tandem-extension |
| "REPEAT ", // Tandem-extension |
| "ROUND ", // Tandem-extension |
| "ROUNDROBINPARTFUNC ", // Tandem-extension |
| "RPAD ", // Tandem-extension |
| "RTRIM ", // Tandem-extension |
| "SECOND ", // Datatype with scales/precisions/length |
| "SESSION_USER ", // ANSI |
| "SHA ", // Trafodion extension |
| "SHA1 ", // Trafodion extension |
| "SHA2 ", // Trafodion extension |
| "SIGN ", // Tandem-extension |
| "SIN ", // Tandem-extension |
| "SINH ", // Tandem-extension |
| "SQRT ", // Tandem-extension |
| "STDDEV ", // Tandem-extension |
| "SUBSTRING ", // ANSI |
| "SUM ", // ANSI |
| "SYS_GUID ", // Oracle-extension |
| "TAN ", // Tandem-extension |
| "TANH ", // Tandem-extension |
| "TIME ", // Datatype with scales/precisions/length |
| "TIMESTAMP ", // Datatype with scales/precisions/length |
| "TRANSLATE ", // ANSI |
| "TRIM ", // ANSI |
| "TRUNCATE ", // Tandem-extension |
| "UCASE ", // Tandem-extension |
| "UPPER ", // ANSI |
| "UPSHIFT ", // ANSI |
| "USER ", // ANSI |
| "VARCHAR ", // Datatype with scales/precisions/length |
| "VARIANCE ", // Tandem-extension |
| "VARNCHAR ", // Datatype with scales/precisions/length |
| "VARYING ", // Datatype with scales/precisions/length |
| "WEEK ", // Tandem-extension |
| "YEAR ", // Datatype with scales/precisions/length |
| }; |
| |
| #ifdef _DEBUG |
| // Only check the order of the above keywords in debug mode. |
| static NABoolean checked_order = FALSE; |
| if (!checked_order) |
| { |
| for (Int32 i = 1; i < (sizeof(keywords) / sizeof(keywords[0])); i++) |
| { |
| if (::strcmp(keywords[i], keywords[i - 1]) <= 0) |
| { |
| char err_buf[128]; |
| sprintf(err_buf, "keywords %s and %s are out of order", |
| keywords[i], keywords[i-1]); |
| ABORT(err_buf); |
| } |
| } |
| checked_order = TRUE; |
| } |
| #endif |
| |
| // Return true if this is a keyword |
| if (bsearch(tok.data() + 1, keywords, (sizeof(keywords) / sizeof(keywords[0])), |
| sizeof(char*), bsearchStrcmp)) |
| return TRUE; |
| |
| // PICTURE or PIC (Cobol datatype directly supported by SQLMX DDL). |
| if (prevpos) prevpos--; |
| NAString prevtok(&sqlText.data()[prevpos]); |
| NABoolean pic = FALSE; |
| if (prevtok.length() > 9) |
| { |
| prevtok.remove(9); |
| pic = prevtok == " PICTURE "; |
| } |
| if (!pic && prevtok.length() > 5) |
| { |
| prevtok.remove(5); |
| pic = prevtok == " PIC "; |
| } |
| if (pic) |
| { |
| if (tok == " B " || tok == " X " || |
| tok == " S9 " || tok == " S 9 " || tok == " SV9 " || tok == " V9 ") |
| return TRUE; |
| } |
| if (tok == " V9 ") // PICTURE S V9(nnn) |
| { |
| if (prevtok.length() > 3) |
| { |
| prevtok.remove(3); |
| if (prevtok == " S ") |
| { |
| if (prevpos >= 8) |
| { |
| prevtok = &sqlText.data()[prevpos-8]; |
| prevtok.remove(9); |
| if (prevtok == " PICTURE ") return TRUE; |
| } |
| if (prevpos >= 4) |
| { |
| prevtok = &sqlText.data()[prevpos-4]; |
| prevtok.remove(5); |
| if (prevtok == " PIC ") return TRUE; |
| } |
| } |
| } |
| } |
| |
| return FALSE; |
| } |
| |
| // This is cloned from sqlcomp/parser.C's stringScanWillTerminateInParser, |
| // though it serves a different purpose. |
| // It compresses multiple blanks into a single space, and optionally uppercases |
| // (it does not, of course, do these things within quoted text, for either |
| // ' or " quoting). //" |
| // |
| Lng32 PrettifySqlText(NAString &sqlText, const char *nationalCharSetName) |
| { |
| #define prevResultChar (result.length() ? result[result.length() - 1] : '\0') |
| #define prevResultCharIs(c) (prevResultChar == c) |
| |
| // Either this is NOT passed in (null pointer), OR it has the Ansi character |
| // for charset introducer, the underscore. |
| ComASSERT(!nationalCharSetName || *nationalCharSetName == '_'); |
| |
| NAString result; |
| enum TokType { SPACE, ALPHA, DELIM, DIGIT, LPAREN, PUNC, UNARYOP } |
| toktype = SPACE, prevtoktype = SPACE; |
| char prev = ' '; // will remove leading blanks |
| char quote_seen = '\0'; |
| size_t alphapos = 0, prevalphapos = 0; |
| |
| for (const char *s = sqlText.data(); *s; s++) |
| { |
| char curr = *s; |
| if (quote_seen) |
| if (*s == quote_seen) |
| quote_seen = '\0'; |
| else |
| { /*consume quoted character*/ } |
| else if (*s == '\'' || *s == '"') |
| { |
| quote_seen = *s; |
| if (toktype != DELIM) // initial, not embedded, quote |
| { |
| // Put a space in front of initial quote, |
| // unless it is a national, bit, or hex string literal (Ansi 5.3) |
| if (prev != ' ') |
| if (*s == '"') |
| result.append(" "); |
| else if (prev != '_' && strchr(specialSQL_TEXT, prev)) |
| result.append(" "); |
| else if (prev == 'N' && nationalCharSetName) |
| if (result.length() >= 2) |
| if (result[result.length()-2] == ' ' || |
| result[result.length()-2] == '(') { |
| // Here we have ' N' or '(N' preceding an initial squote. |
| // Replace the N with the actual cs name |
| // (which the caller must provide with the correct |
| // underscore introducer, e.g. "_KANJI"). |
| result.remove(result.length() - 1); |
| result.append(nationalCharSetName); |
| } |
| toktype = DELIM; // delim-ident or string literal... |
| } |
| } |
| else if (isSpace8859_1((unsigned char)*s)) |
| curr = ' '; // convert unquoted tab/newline to space |
| |
| if (quote_seen || *s == '\'' || *s == '"') // in quotes or on ending quote |
| { |
| result.append(s, 1); |
| } |
| else if (curr == ' ') |
| { |
| if (prev == ' ') |
| { /*throw away the subsequent spaces; remain in whatever toktype*/ } |
| else |
| { |
| result.append(" "); // append ourself, a space |
| prevtoktype = toktype; |
| toktype = SPACE; |
| } |
| } |
| else // unquoted and not a space |
| { |
| TokType efftoktype = toktype != SPACE ? toktype : prevtoktype; |
| |
| // Put a space before the first letter of an identifier/hostvar/param, |
| // before the first digit of a number (but not a digit in an ident), |
| // before the first lparen of a series of lparens. |
| // |
| NABoolean isInLatin1ExtendedHalf = FALSE; |
| NAWchar tmpBuf[10]; |
| if ((UInt32)curr >= 0x80) // is not a 7-bit ASCII character |
| { |
| char * p1stUnstranslatedChar = NULL; |
| UInt32 iOutLenInBytesIncludingNull = 0; |
| UInt32 iTranslatedCharCount = 0; |
| Int32 cnvErrStatus = LocaleToUTF16 |
| ( cnv_version1 // in - const enum cnv_version version |
| , s // in - const char *in_bufr |
| , strlen(s) // in - const int in_len |
| , (const char *) tmpBuf // out - const char *out_bufr |
| , 10*BYTES_PER_NAWCHAR // in - const int out_bufr_size_in_bytes |
| , cnv_UTF8 // in - enum cnv_charset charset of source |
| , p1stUnstranslatedChar // out - char * & first_untranslated_char |
| , &iOutLenInBytesIncludingNull // out - unsigned int *output_data_len_p |
| , 0 // in - const int cnv_flags |
| , (Int32) TRUE // in - const int addNullAtEnd_flag |
| , &iTranslatedCharCount // out - unsigned int * translated_char_cnt_p |
| , 1 // in - unsigned int max_chars_to_convert |
| ); |
| // NOTE: No errors should be possible -- string has been converted before. |
| // ComASSERT(cnvErrStatus == 0 && iTranslatedCharCount == 1); |
| if (cnvErrStatus EQU 0 AND (UInt32)tmpBuf[0] >= 0x80 AND (UInt32)tmpBuf[0] <= 0xFF) |
| { |
| isInLatin1ExtendedHalf = TRUE; |
| curr = (unsigned char)tmpBuf[0]; |
| s++; // The character stored in curr requires two bytes in UTF-8 encoding value |
| } |
| } |
| if (isAlphaIsoMapCS((unsigned char)curr) || curr == '_' || curr == ':' || |
| curr == '?' || curr == '$' || curr == '\\') // non-Ansi '\nsk.$vol' extension |
| { |
| if (toktype != ALPHA) |
| { |
| toktype = ALPHA; |
| |
| // 123.E+10 and 123. E+10 are numeric. |
| // ABC.E+10 is ABC.E + 10 (efftoktype is alpha, thus E is). |
| // Note: need to test both prevResultChar and efftoktype here |
| // since rparen sets what can become our efftoktype to DIGIT; |
| // probably unnecessary since |
| // A)E+10 is not legal (derived-table-rename plus number?). |
| if (curr == 'E' || curr == 'e') |
| if (s[1] == '+' || s[1] == '-' || isDigit8859_1((unsigned char)s[1])) |
| if (isDigit8859_1((unsigned char)prevResultChar) || prevResultChar == '.') |
| if (efftoktype == DIGIT) |
| { |
| // Avoid getting into toktype PUNC next loop iter: |
| if (s[1] == '+' || s[1] == '-') |
| { |
| result.append("E"); |
| curr = *++s; |
| } |
| toktype = DIGIT; |
| } |
| |
| if (toktype == ALPHA) |
| { |
| if (prev != ' ') result.append(" "); |
| prevalphapos = alphapos; |
| alphapos = result.length(); |
| } |
| } |
| } |
| else if (isDigit8859_1((unsigned char)curr)) |
| { |
| if (toktype != DIGIT && toktype != ALPHA) |
| { |
| if (prev != ' ') result.append(" "); |
| toktype = DIGIT; |
| } |
| } |
| else if (curr == '(') |
| { |
| if (toktype != LPAREN) |
| { |
| if (prev != ' ') result.append(" "); |
| toktype = LPAREN; |
| if (prevResultCharIs(' ')) // note: wrong to test (prev==' ') |
| if (tokIsFuncOrParenKeyword(result, alphapos, prevalphapos)) |
| result.remove(result.length() - 1); |
| } |
| } |
| else if (curr == '.') |
| { |
| if (efftoktype == ALPHA || efftoktype == DELIM) |
| { |
| if (prevResultCharIs(' ')) // note: wrong to test (prev==' ') |
| result.remove(result.length() - 1); |
| } |
| else if (toktype != DIGIT) // not efftoktype! |
| { |
| if (prev != ' ') result.append(" "); |
| toktype = DIGIT; |
| } |
| } |
| else if (curr == ')' || curr == ',' || curr == ';') |
| { |
| // Remove any preceding space for this kind of punctuation. |
| if (prevResultCharIs(' ')) // note: wrong to test (prev==' ') |
| result.remove(result.length() - 1); |
| |
| // Set prevtoktype for UNARYOP determination. |
| // Note that input of (alpha)-1,(digit)+-1,isp()- -1 |
| // will thus display as (alpha) - 1, (digit) + -1, isp () - -1 |
| prevtoktype = curr == ')' ? DIGIT : SPACE; |
| toktype = SPACE; |
| } |
| else |
| { |
| // Currently not dealing with Sql characters '&', '[', ']' |
| // because we don't expect them to appear in the text this |
| // procedure ever encounters. |
| if (toktype != PUNC) |
| { |
| // Put a space before first of a new series of punc |
| // and before subsequent unary minuses in a series of uminus |
| // (i.e. "x + - -y", not "x + --y", as "--" is Sql comment). |
| // Note: wrong to test (prev=='-'); see curr/prev set below. |
| if (prev != ' ' || |
| (curr == '-' && prevResultCharIs('-'))) |
| result.append(" "); |
| |
| // '+ or -' is unary if it follows a reserved word, a comma, |
| // an LPAREN (but not an rparen!), or any PUNC or UNARYOP. |
| // E.g., select +1 + -2 from t where -3 < -col - (-5 + 6) - 7; |
| if (curr == '+' || curr == '-') |
| { |
| if (efftoktype == ALPHA) |
| { |
| NAString tok; |
| tok += &result.data()[alphapos]; |
| tok[tok.length()-1] = 0; |
| toktype = IsSqlReservedWord(tok.data()) ? UNARYOP : PUNC; |
| } |
| else |
| toktype = (efftoktype != DELIM && efftoktype != DIGIT) ? |
| UNARYOP : PUNC; |
| } |
| else |
| { |
| toktype = PUNC; |
| |
| // If "CREATE ... LOCATION /G/directory ...", |
| // need to treat from the slash thru the next space |
| // as if they were quoted -- don't insert spaces |
| // and don't uppercase. |
| if (curr == '/' && efftoktype == ALPHA) |
| { |
| NAString tok(&result.data()[alphapos]); |
| if (tok == "LOCATION ") |
| { |
| tok = result; |
| tok.remove(7); |
| if (tok == "CREATE ") |
| quote_seen = ' '; // will consume till ' ' |
| } |
| } |
| } |
| } |
| else if (curr == '+' || curr == '-') |
| { |
| // Put a space before a unary operator. |
| if (!prevResultCharIs(' ')) result.append(" "); |
| toktype = UNARYOP; |
| } |
| } |
| |
| // Preceding spaces have been dealt with; now add the current char. |
| if (isInLatin1ExtendedHalf) |
| { |
| NAString ns(curr); |
| ns.toUpper8859_1(); |
| curr = (char)ns.data()[0]; |
| |
| char utf8Buf[20]; |
| char * p1stUnstranslatedChar = NULL; |
| UInt32 utf8OutLenInBytesIncludingNull = 0; |
| UInt32 charCount = 0; |
| Int32 returnCode = LocaleToUTF8 |
| ( cnv_version1 |
| , (const char*) ns.data() // source |
| , (const Int32) ns.length() // source len in bytes - should be 1 |
| , (const char*) utf8Buf // output buffer for target |
| , (const Int32) 20 // output buffer size in bytes |
| , cnv_ISO88591 // source char set |
| , p1stUnstranslatedChar // char * & first_untranslated_char |
| , &utf8OutLenInBytesIncludingNull // unsigned int * output_data_len_p in bytes including '\0' terminator |
| , (const Int32)TRUE // const int addNullAtEnd_flag |
| , &charCount // unsigned int * translated_char_cnt_p - should be 1 |
| ); |
| ComASSERT(returnCode == 0 && charCount == 1 && utf8OutLenInBytesIncludingNull == 3); |
| |
| // Exclude the NULL terminator added at the end (addNullAtEnd_flag was set to TRUE in the above call) |
| // from the count. |
| UInt32 utf8StrLenInBytes = 0; |
| if ((Int32)utf8OutLenInBytesIncludingNull >= CharInfo::minBytesPerChar(CharInfo::UTF8)) |
| utf8StrLenInBytes = utf8OutLenInBytesIncludingNull - CharInfo::minBytesPerChar(CharInfo::UTF8); |
| |
| ComASSERT(utf8StrLenInBytes > 0); |
| result.append(utf8Buf, utf8StrLenInBytes); |
| } |
| else if ((UInt32)curr < 0x80) // is a 7-bit ASCII character |
| { |
| curr = (char)toupper(curr); |
| result.append(&curr, 1); |
| } |
| // This kind of punctuation does not want spaces following it -- |
| // except, do not collapse "* *" (as in "CONTROL TABLE * * RESET;") |
| // into "**" exponentiation operator. |
| // (Note that '?' does not appear here: Tdm named parameters |
| // must have the name immediately following the '?', to distinguish |
| // from Ansi unnamed params. Also, Tdm '\nsk.$vol' punc is never |
| // allowed with trailing spaces on input, so no need here to |
| // scan ahead and remove it.) |
| // Remain in same toktype (space/lparen/alpha/etc). |
| |
| if (curr == '.' || curr == '(' || curr == ':' || |
| toktype == PUNC || toktype == UNARYOP) |
| { |
| if (curr != '*') |
| while (isSpace8859_1((unsigned char)s[1])) s++; // throw away following spaces |
| |
| // Set curr to space, which next sets prev to space, which |
| // in the next loop iter will prevent a space from being appended. |
| // Thus, CAT.SCH.TBL.FLTCOL > 1.5 + -7 will display correctly. |
| if (toktype != PUNC) curr = ' '; |
| } |
| |
| } // unquoted and not a space |
| |
| prev = curr; |
| |
| } // loop over sqlText |
| |
| sqlText = result; |
| |
| if (quote_seen) { |
| return -15005; // Unmatched quote |
| } |
| |
| size_t len = sqlText.length(); |
| if (len) { |
| len--; |
| if (sqlText[len] == ' ') sqlText.remove(len--); // trim final space |
| // These lines commented out because space already removed before ';' above... |
| // if (sqlText[len] == ';' && len--) |
| // if (sqlText[len] == ' ') sqlText.remove(len,1); // trim space before ';' |
| } |
| |
| return 0; // no error |
| |
| } // PrettifySqlText |
| |
| // SQL/MX Regression Test Support |
| // Calculate an increased max output line length to accommodate schema names |
| // longer than 'SCH', that are used when regression test suites are executed |
| // concurrently. The number of characters that schemaName is longer than |
| // 'SCH' is referred to as the "excess character count" in the following |
| // description. The increase beyond maxLineLen is calculated as the excess |
| // character count times the number of times schemaName occurs in the search |
| // substring of sqlText starting at pos. The search substring length is |
| // increased by the excess character count each time schemaName is found in |
| // the search substring. |
| // |
| size_t adjustedMaxLen(const NAString &sqlText, size_t pos, size_t maxLineLen, |
| const char *schemaName) |
| { |
| const size_t SCH_LEN = 3; // Number of chars in 'SCH' |
| size_t schemaNameLen = schemaName ? strlen(schemaName) : 0; |
| if (schemaNameLen > SCH_LEN) |
| { |
| size_t excessCharCount = schemaNameLen - SCH_LEN; |
| size_t sqlTextLen = sqlText.length(); |
| size_t maxSearchPos = pos + maxLineLen - 1; |
| size_t occurs = 0; |
| while (TRUE) |
| { |
| if (maxSearchPos >= sqlTextLen) |
| maxSearchPos = sqlTextLen - 1; |
| pos = sqlText.index(schemaName, pos, NAString::ignoreCase); |
| if ((pos == NA_NPOS) || (pos > maxSearchPos)) |
| break; |
| occurs++; |
| pos += schemaNameLen; |
| maxSearchPos += excessCharCount; |
| } |
| return maxLineLen + (occurs * excessCharCount); |
| } |
| else |
| return maxLineLen; |
| } |
| |
| // Called by SHOWDDL command (CmpDescribe.C). |
| // Inserts linebreaks at word boundaries in order to keep tokens whole -- |
| // to make it easier for a user to cut SHOWDDL output text and paste it |
| // into SQLCI as a new command. |
| // |
| // The optional schemaName argument provides SQL/MX regression test |
| // support. When a schemaName is provided, the maximum output line |
| // length is adjusted so that lines are broken in the same logical place |
| // they would be if the deafult schema was 'SCH'. |
| // |
| size_t LineBreakSqlText(NAString &sqlText, |
| NABoolean showddlView, |
| size_t maxlen, |
| size_t pfxlen, |
| size_t pfxinitlen, |
| char pfxchar, |
| const char * schemaName, |
| NABoolean commentOut) |
| { |
| if (commentOut && pfxchar != '-') |
| { |
| // Make sure that the prefixes have enough room |
| // for the leading "--" comment prefix. |
| if (pfxlen < 2) |
| pfxlen += (2 - pfxlen); |
| if (pfxinitlen < 2) |
| pfxinitlen += (2 - pfxinitlen); |
| } |
| // The initial line can be indented differently from subsequent lines. |
| if (maxlen == 0 || maxlen <= pfxlen) maxlen = pfxlen + 1; |
| size_t maxinitlen = (maxlen <= pfxinitlen) ? pfxinitlen + 1 : maxlen; |
| maxlen -= pfxlen; |
| maxinitlen -= pfxinitlen; |
| size_t maxcurrlen = adjustedMaxLen(sqlText, 0, maxinitlen, schemaName); |
| |
| NAString result(pfxchar, pfxinitlen); |
| NAString pfx(pfxchar, pfxlen); |
| if (commentOut && pfxchar != '-') |
| { |
| result[(size_t)0] = '-'; |
| result[(size_t)1] = '-'; |
| pfx[(size_t)0] = '-'; |
| pfx[(size_t)1] = '-'; |
| } |
| pfx.prepend("\n"); |
| |
| NABoolean showddlViewAS = FALSE; |
| char quote_seen = '\0'; |
| size_t cnt = 0, space = 0, dot[3]; // C.S.T.COL ref has max 3 dots |
| size_t sqlNextPos = 0; |
| dot[0] = 0; |
| dot[1] = 0; |
| sqlText += "\n"; // sentinel (newline, for "--") |
| for (const char *s = sqlText.data(); *s; s++) |
| { |
| if (quote_seen) |
| if (*s == quote_seen) |
| quote_seen = '\0'; |
| else |
| { /*consume quoted character*/ } |
| else if (*s == '\'' || *s == '"') |
| quote_seen = *s; |
| else if (*s == '-' && s[1] == '-') // SQL comment: "--" to eol |
| quote_seen = '\n'; |
| |
| result.append(s, 1); |
| cnt++; |
| sqlNextPos++; |
| if (!quote_seen) |
| if (isSpace8859_1((unsigned char)*s)) // sentinel ensures we get here |
| { |
| if (showddlView) // look for keyword "AS" |
| if ((space && result.length() - space == 3) || |
| (!space && cnt == 3)) |
| if (result[result.length()-3] == 'A' && |
| result[result.length()-2] == 'S') |
| showddlViewAS = TRUE; |
| |
| if (cnt < maxcurrlen) |
| { |
| space = result.length(); // thus space > pfxlen |
| } |
| else |
| { |
| if (cnt > maxcurrlen + 1 && space > 0) // linebreak on space |
| { |
| result.replace(space - 1, 1, pfx); |
| cnt = result.length() - space - pfxlen; |
| space = result.length(); |
| Int32 i=0; |
| for (; i<3; i++) |
| if (!dot[i]) break; |
| else dot[i] += pfxlen; // replaced 1 with p+1 |
| maxcurrlen = adjustedMaxLen(sqlText, sqlNextPos - cnt, |
| maxlen, schemaName); |
| // fall through to while loop |
| } |
| |
| while (cnt > maxcurrlen + 1) |
| { |
| NABoolean dotfound = FALSE; |
| size_t dotdiff; |
| Int32 i = 0; |
| for (i=0; i<3; i++) |
| if (!dot[i]) break; |
| else |
| { |
| dotdiff = dot[i] - (result.length() - cnt); |
| if (dotdiff <= maxcurrlen) { dotfound = TRUE; break; } |
| } |
| if (dotfound) // linebreak after dot |
| { // and don't forget to dot your i's! |
| result.insert(dot[i], pfx); |
| cnt -= dotdiff; |
| dot[i] = 0; |
| for ( ; i--; ) dot[i] += pfxlen + 1; // inserted p+1 |
| } |
| else // linebreak after too-long unspaced token |
| { |
| result.remove(result.length() - 1); |
| result += pfx; |
| cnt = space = 0; |
| } |
| maxcurrlen = adjustedMaxLen(sqlText, sqlNextPos - cnt, maxlen, |
| schemaName); |
| } // while still too long |
| |
| if (cnt >= maxcurrlen) |
| { |
| result.remove(result.length() - 1); |
| result += pfx; |
| cnt = space = 0; |
| maxcurrlen = adjustedMaxLen(sqlText, sqlNextPos, maxlen, |
| schemaName); |
| } |
| else if (cnt && result[result.length() - 1] == ' ') |
| space = result.length(); |
| |
| } // need to linebreak |
| |
| dot[0] = 0; |
| |
| if (showddlViewAS) // it was the keyword "AS" |
| { |
| size_t i; |
| for (i = result.length() - 1; |
| result[i] == ' ' || result[i] == pfxchar; i--) |
| result.remove(i); |
| if (result[i] != '\n') result += "\n"; // newline, no pfx chars |
| |
| if (result.length() <= maxinitlen + 1) |
| pfxlen = pfxinitlen; // still the first line |
| |
| NAString queryText(++s); // get past the current space |
| cnt = LineBreakSqlText(queryText, FALSE, |
| maxlen+pfxlen, pfxlen+4, pfxlen+2, |
| pfxchar, schemaName); |
| result += queryText; |
| s = &sqlText.data()[sqlText.length() - 1]; // will exit loop |
| } |
| |
| } // unquoted space |
| |
| else if ((*s == '.') && |
| (!isDigit8859_1((unsigned char)s[1]))) // ignore dots in numeric constants |
| { |
| dot[2] = dot[1]; |
| dot[1] = dot[0]; |
| dot[0] = result.length(); |
| } // unquoted dot |
| |
| } // loop over sqlText |
| |
| sqlText = result; |
| TrimNAStringSpace(sqlText, FALSE, TRUE); // remove trailing sentinel char |
| return cnt; |
| |
| } // LineBreakSqlText |
| |
| void GetSimplePosixFilename(NAString &filename, NABoolean doLower) |
| { |
| // Remove any preceding directory path |
| const char *fslash = strrchr(filename, '/'), |
| *bslash = strrchr(filename, '\\'), |
| *dirpathpunc; |
| if (fslash && bslash) |
| dirpathpunc = fslash > bslash ? fslash : bslash; |
| else |
| dirpathpunc = fslash ? fslash : bslash; |
| if (dirpathpunc) filename = ++dirpathpunc; |
| |
| #ifdef NA_CASE_INSENSITIVE_FILENAMES |
| if (doLower) filename.toLower(); |
| #endif |
| |
| } // GetSimplePosixFilename |
| |
| void FUNNY_ANSI_IDENT_REMOVE_PREFIX(NAString &str, const char *pfx) |
| { |
| // Say str is "PACKED__@T" and pfx is PACKED__@ |
| str.remove(1, strlen(pfx)); // str is now "T" (dquotes kept) |
| ToInternalIdentifier(str); // str is now T |
| str = ToAnsiIdentifier(str); // str remains T |
| assert(!str.isNull()); |
| } |
| |
| NAString Latin1StrToUTF8(const NAString & latin1Str, NAMemory * heap) |
| { |
| if (latin1Str.isNull()) |
| return NAString(); |
| |
| char buffer[3008]; // allocate a few extra bytes to make me feel better |
| char * target = &buffer[0]; |
| bool isBufferAllocatedFromProcessHeap(FALSE); |
| Lng32 targetBufferLen = (Lng32)(latin1Str.length() * 4 /* SQL_UTF8_CHAR_MAXSIZE */ + 2); |
| if ( targetBufferLen > 3000 ) |
| { |
| isBufferAllocatedFromProcessHeap = TRUE; |
| target = new (heap) char[targetBufferLen + 2]; // allocate a couple extra bytes ... |
| } |
| |
| char * p1stUnstranslatedChar = NULL; |
| UInt32 utf8StrLenInBytes = 0; |
| UInt32 charCount = 0; // number of characters translated/converted |
| Int32 errorCode = LocaleToUTF8 ( cnv_version1 |
| , latin1Str.data() // in - const char * srcStr |
| , (Int32)latin1Str.length() // in - const int srcStrLen |
| , (const char*)target // out - const char * bufferForTargetStr |
| , (Int32)targetBufferLen // in - const in targetBufferSizeInBytes |
| , cnv_ISO88591 // in - cnv_charset srcCharset |
| , p1stUnstranslatedChar // out - char* & first_untranslated_char |
| , &utf8StrLenInBytes // out - unsigned int * output_data_len_p |
| , (const Int32)TRUE // in - const int addNullAtEnd_flag |
| , &charCount // out - unsigned int * translated_char_cnt_p |
| ); |
| // Exclude the NULL terminator added to the end (addNullAtEnd_flag was set to TRUE in the above call) |
| // from the count. |
| if ((Int32)utf8StrLenInBytes >= CharInfo::minBytesPerChar(CharInfo::UTF8)) |
| utf8StrLenInBytes -= (UInt32)CharInfo::minBytesPerChar(CharInfo::UTF8); |
| else |
| utf8StrLenInBytes = 0; |
| NAString result; |
| if (utf8StrLenInBytes > 0) |
| result.append(target, (size_t)utf8StrLenInBytes); |
| if (isBufferAllocatedFromProcessHeap) |
| NADELETEBASIC(target, heap); |
| return result; |
| } |
| |
| // ----------------------------------------------------------------------- |
| // StatementHeap-related stuff |
| // ----------------------------------------------------------------------- |
| |
| static NAMemory *TheStatementHeap = NASTRING_UNINIT_HEAP_PTR; |
| |