blob: 8ade55ba3b8d7c692346cc1bee3d86a8ae46bbe8 [file] [log] [blame]
/**********************************************************************
// @@@ START COPYRIGHT @@@
//
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
//
// @@@ END COPYRIGHT @@@
**********************************************************************/
/* -*-C++-*-
*****************************************************************************
*
* File: parser.C
* Description:
*
*
* Created: 8/30/1996
* Modified: $ $Date: 2007/03/08 02:22:32 $ (GMT)
* Language: C++
* Status: $State: Exp $
*
*
*
*
*****************************************************************************
*/
#include <ctype.h>
#include <wchar.h>
#include "NAWinNT.h"
#include "arkcmp_parser_defs.h"
#define SQLPARSERGLOBALS_CONTEXT_AND_DIAGS
#define SQLPARSERGLOBALS_FLAGS
#define SQLPARSERGLOBALS_LEX_AND_PARSE
#define SQLPARSERGLOBALS_NADEFAULTS
#define SQLPARSERGLOBALS_NAMES_AND_TOKENS
#include "SqlParserGlobals.h"
#include "NLSConversion.h"
#include "csconvert.h"
#include "ulexer.h"
#include "CmpContext.h"
#include "CmpStatement.h"
#include "CmpErrLog.h"
#include "HvRoles.h"
#include "NAExit.h"
#include "NAMemory.h"
#include "ParserMsg.h"
#include "parser.h"
#include "QueryText.h"
#include "RelExeUtil.h"
#include "RelMisc.h" // for RelRoot
#include "RelStoredProc.h" // for RelInternalSP
#include "SchemaDB.h"
#include "SqlciError.h"
#include "StmtNode.h" // for StmtQuery and for ItemColRef.h classes
#include "str.h"
#include "CompException.h" // for CmpInternalException
#include "ComCextdecs.h"
#include "CmpSeabaseDDL.h"
#include "StmtDDLonHiveObjects.h"
#include "logmxevent.h"
ostream &operator<<(ostream &dest, const ComDiagsArea& da);
static THREAD_P NABoolean resetIsNeeded = FALSE;
void Parser::reset(NABoolean on_entry_reset_was_needed)
{
ResetLexer();
// On entry to parseDML means we left that proc abnormally the last time
// (by asserting). However, any flags would have been reset by
// ARKCMP_EXCEPTION_EPILOGUE(), so we need not do anything here;
// in fact, we must leave the global flags as our caller stack has set 'em.
//
if (on_entry_reset_was_needed) return;
// Do this on exit from parseDML so that callers of parseDML
// (in particular, sql_parse as called from arkcmp/cmpmod.cpp)
// get reset, *except* if caller is managing its own reset
// (DELAYED_RESET, e.g. in CmpMain::sqlcomp+sqlcompCleanup, which allows
// entire compilation to retain the flags, in particular ComObjectName calls).
//
if (!Get_SqlParser_Flags(DELAYED_RESET))
Set_SqlParser_Flags(0);
if (with_clauses_)
with_clauses_->clear();
}
ULng32 cmmHashFunc_NAString(const NAString& str)
{
return (ULng32) NAString::hash(str);
}
Parser::Parser(const CmpContext* cmpContext)
: hasOlapFunctions_(NULL),
hasTDFunctions_(NULL)
{
cmpContext_ = const_cast<CmpContext*>(cmpContext);
if (cmpContext_ && ((wHeap_ = cmpContext_->statementHeap()) != NULL))
{
hasInternalHeap_ = FALSE;
}
else
{
// set memory upper limit - currently only used to test setjmp/longjmp logic
char* memLimitStr = getenv("MEMORY_LIMIT_PARSERSWP_UPPER_KB");
size_t memLimit = 0;
if (memLimitStr != NULL)
memLimit = (size_t) 1024 * atol(memLimitStr);
// Allocate a heap for the parser to prevent memory leaks.
hasInternalHeap_ = TRUE;
wHeap_ = new NAHeap("Cmp Parser Heap",
NAMemory::DERIVED_FROM_SYS_HEAP,
524288,
memLimit);
wHeap_->setErrorCallback(&CmpErrLog::CmpErrLogCallback);
}
prevParser_ = SqlParser_CurrentParser;
SqlParser_CurrentParser = this;
lexer = NULL;
inputBuf_ = NULL;
charset_ = CharInfo::UnknownCharSet;
initialInputCharSet_ = CharInfo::UnknownCharSet;
wInputBuf_ = NULL;
internalExpr_ = NORMAL_TOKEN;
modeSpecial1_ = (CmpCommon::getDefault(MODE_SPECIAL_1) == DF_ON);
modeSpecial4_ = (CmpCommon::getDefault(MODE_SPECIAL_4) == DF_ON);
defaultColCharset_ = CharInfo::UnknownCharSet;
NAString cs = CmpCommon::getDefaultString(TRAF_DEFAULT_COL_CHARSET);
if (! cs.isNull())
{
defaultColCharset_ = CharInfo::getCharSetEnum(cs);
}
hasOlapFunctions_.setHeap(wHeap_);
hasTDFunctions_.setHeap(wHeap_);
clearHasOlapFunctions();
HQCKey_ = NULL;
Lng32 initsize = 10;
with_clauses_ = new (wHeap_) NAHashDictionary<NAString,RelExpr>(&cmmHashFunc_NAString, initsize , TRUE, wHeap_) ;
hiveDDLInfo_ = new (wHeap_) HiveDDLInfo();
}
Parser::~Parser()
{
delete lexer;
// If a heap was allocated in the Parser constructor, then delete it
// and the memory associated with it here.
if (hasInternalHeap_)
{
delete wHeap_;
}
else
{
// These buffers were allocated from the heap associated with
// the current statement and should be deleted here.
NADELETE(inputBuf_, charBuf, wHeap_);
NADELETE(wInputBuf_, NAWcharBuf, wHeap_);
}
SqlParser_CurrentParser = prevParser_;
}
CmpContext* Parser::cmpContext()
{
return cmpContext_;
}
size_t Parser::inputStrLen()
{
// Note that inputBuf_->getBufSize() returns the size of the buffer not the string
// length - The buffer could contain the string and the string's optional trailing
// null terminator character(s) followed by any garbage data.
if (inputBuf_ == NULL || inputBuf_->getBufSize() <= 0 || inputBuf_->getStrLen() <= 0)
return 0;
return (size_t)inputBuf_->getStrLen(); // rely on the correctness of getStrLen()
}
size_t Parser::wInputStrLen()
{
// The comments in Parser::inputStrLen() method definition are also true for wInputBuf_
if (wInputBuf_ == NULL || wInputBuf_->getBufSize() <= 0 || wInputBuf_->getStrLen() <= 0)
return 0;
return (size_t)wInputBuf_->getStrLen(); // rely on the correctness of getStrLen()
}
NABoolean Parser::fixupParserInputBufAndAppendSemicolon()
{
const unsigned char nullchar('\0');
const unsigned char spacechar(' ');
const unsigned char semicolon(';');
const unsigned char minuschar('-');
if (inputBuf_ == NULL || inputBuf_->getBufSize() <= 0 || inputStrLen() <= 0)
{
NADELETE(inputBuf_, charBuf, wHeap());
inputBuf_ = new(wHeap()) charBuf(16 /* int newBufSize */, wHeap());
inputBuf_->data()[0] = semicolon; inputBuf_->setStrLen(1);
inputBuf_->data()[1] = nullchar;
return TRUE; // inputBuf_ is (re)allocated
}
// Note that inputBuf_->getBufSize() returns the size of the buffer
// (including the trailing null characters)
Int32 bufferSize = inputBuf_->getBufSize();
unsigned char *s = inputBuf_->data();
// exclude the trailing null characters from the count
Int32 initialInputStrLen = inputStrLen();
Int32 i = initialInputStrLen - 1; // position of the last elements in the buffer
// replace trailing white space with null character and
while (i >= 0 && (isSpace8859_1(s[i]) || s[i] == nullchar))
s[i--] = nullchar;
// special case! Check for the terminating -- and replace them with null characters
if ( i >= 1 && s[i] == minuschar && s[i-1] == minuschar)
{
s[i--] = nullchar; // replace minus with null character
s[i--] = nullchar; // replace minus with null character
// replace white space before the -- with null characters
while (i >= 0 && (isSpace8859_1(s[i]) || s[i] == nullchar))
s[i--] = nullchar;
}
// if there are multiple trailing white space and semicolons,
// keep the leftmost semicolon and replace the characters to
// the right of that semicolon with null characters
if (i >= 0)
{
Int32 ix = i; Int32 nullcharPos = -1; Int32 semicolonPos = -1;
for (; ix >= 0 && (isSpace8859_1(s[ix]) || s[ix] == semicolon || s[ix] == nullchar); ix--)
{
if (s[ix] == semicolon)
semicolonPos = ix;
else if (s[ix] == nullchar)
nullcharPos = ix;
}
if (semicolonPos != -1) // semicolon found
{
if (nullcharPos != -1 && nullcharPos < semicolonPos) // null chars before the semicolon
{
s[nullcharPos] = semicolon; // replace the null char with a semicolon
semicolonPos = nullcharPos;
}
inputBuf_->setStrLen(semicolonPos+1);
if (semicolonPos+1 < bufferSize)
{
s[semicolonPos+1] = nullchar;
}
else if (semicolonPos+1 == bufferSize)
{
// cannot append a null char to the existing buffer because there is
// no more room left in the buffer - Allocate a new (bigger) buffer.
charBuf *pNewCharBuf = new(wHeap()) charBuf(bufferSize + 16 /* Int32 newBufferSize */, wHeap());
memcpy((void*)pNewCharBuf->data(), (const void *)inputBuf_->data(), bufferSize/*in_bytes*/);
pNewCharBuf->data()[bufferSize] = nullchar;
pNewCharBuf->setStrLen(bufferSize);
NADELETE(inputBuf_, charBuf, wHeap());
inputBuf_ = pNewCharBuf;
return TRUE; // inputBuf_ is (re)allocated
}
return FALSE;
}
}
if (i == -1)
{
if (bufferSize >= 2) // buffer has enough room for a semicolon and a null character
{
s[0] = semicolon; inputBuf_->setStrLen(1);
s[1] = nullchar;
return FALSE;
}
else
{
NADELETE(inputBuf_, charBuf, wHeap());
inputBuf_ = new(wHeap()) charBuf(16 /* int newBufSize */, wHeap());
s = inputBuf_->data(); // note that inputBuf_->data() now points to a new location
s[0] = semicolon; inputBuf_->setStrLen(1);
s[1] = nullchar;
return TRUE; // inputBuf_ is (re)allocated
}
}
if (i >= 0 && s[i] != semicolon)
{
if (i + 2 < bufferSize)
{
// There is enough room to add a semicolon and a null terminator
s[i+1] = semicolon; inputBuf_->setStrLen(i+2);
s[i+2] = nullchar;
return FALSE;
}
// --- Do not have enough space in the buffer to add/append a semicolon ---
// Allocate new buffer
NAString newInputStr(wHeap());
newInputStr.append((const char *)s, (size_t)(i+1)); // i+1 == inputStrLen()
newInputStr.append(semicolon);
// NAString is always null terminated and the null terminator is excluded from
// the count returned by the NAString length() method.
Int32 newInputStrLen = (Int32)newInputStr.length();
unsigned char * pNewInputStr = (unsigned char *)newInputStr.data();
NADELETE(inputBuf_, charBuf, wHeap());
inputBuf_ = new(wHeap()) charBuf ( newInputStrLen + 16 // int newBufferSize
, wHeap()
);
// !!! IMPORTANT !!! inputBuf_ and inputBuf_->data() now have new pointer values
// Note that inputBuf_->getBufSize() is now == newInputStr.length() + 16
s = inputBuf_->data();
memcpy((void*)s, (void*)pNewInputStr, (size_t)(newInputStrLen + 1));
inputBuf_->setStrLen(newInputStrLen);
inputBuf_->data()[inputBuf_->getStrLen()] = nullchar;
return TRUE; // inputBuf_ is (re)allocated
}
return FALSE;
}
NABoolean Parser::fixupParserWInputBufAndAppendSemicolon()
{
const NAWchar nullchar(0);
const NAWchar spacechar(' ');
const NAWchar semicolon(';');
const NAWchar minuschar('-');
if (wInputBuf_ == NULL || wInputBuf_->getBufSize() <= 0 || wInputStrLen() <= 0)
{
NADELETE(wInputBuf_, NAWcharBuf, wHeap());
wInputBuf_ = new(wHeap()) NAWcharBuf(16 /* int newBufSize */, wHeap());
wInputBuf_->data()[0] = semicolon; wInputBuf_->setStrLen(1);
wInputBuf_->data()[1] = nullchar;
return TRUE; // wInputBuf_ is (re)allocated
}
// Note that wInputBuf_->getBufSize() returns the size of the buffer
// (including the trailing null characters)
Int32 bufferSize = wInputBuf_->getBufSize();
NAWchar *s = wInputBuf_->data();
// exclude the trailing null characters from the count
Int32 initialWInputStrLen = wInputStrLen();
Int32 i = initialWInputStrLen - 1; // position of the last elements in the buffer
// replace trailing white space with null character and
while (i >= 0 && (isSpace8859_1(s[i]) || s[i] == nullchar))
s[i--] = nullchar;
// special case! Check for the terminating -- and replace them with null characters
if ( i >= 1 && s[i] == minuschar && s[i-1] == minuschar)
{
s[i--] = nullchar; // replace minus with null character
s[i--] = nullchar; // replace minus with null character
// replace white space before the -- with null characters
while (i >= 0 && (isSpace8859_1(s[i]) || s[i] == nullchar))
s[i--] = nullchar;
}
// if there are multiple trailing white space and semicolons,
// keep the leftmost semicolon and replace the characters to
// the right of that semicolon with null characters
if (i >= 0)
{
Int32 ix = i; Int32 nullcharPos = -1; Int32 semicolonPos = -1;
for (; ix >= 0 && (isSpace8859_1(s[ix]) || s[ix] == semicolon || s[ix] == nullchar); ix--)
{
if (s[ix] == semicolon)
semicolonPos = ix;
else if (s[ix] == nullchar)
nullcharPos = ix;
}
if (semicolonPos != -1) // semicolon found
{
if (nullcharPos != -1 && nullcharPos < semicolonPos) // null chars before the semicolon
{
s[nullcharPos] = semicolon; // replace the null char with a semicolon
semicolonPos = nullcharPos;
}
wInputBuf_->setStrLen(semicolonPos+1);
if (semicolonPos+1 < bufferSize)
{
s[semicolonPos+1] = nullchar;
}
else if (semicolonPos+1 == bufferSize)
{
// cannot append a null char to the existing buffer because there is
// no more room left in the buffer - Allocate a new (bigger) buffer.
NAWcharBuf *pNewNAWCharBuf =
new(wHeap()) NAWcharBuf(bufferSize + 4 /* Int32 newBufferSize */, wHeap());
NAWstrncpy(pNewNAWCharBuf->data(), wInputBuf_->data(), bufferSize/*in_NAWchars*/);
pNewNAWCharBuf->data()[bufferSize/*in_NAWchars*/] = nullchar;
pNewNAWCharBuf->setStrLen(bufferSize/*in_NAWchars*/);
NADELETE(wInputBuf_, NAWcharBuf, wHeap());
wInputBuf_ = pNewNAWCharBuf;
return TRUE; // wInputBuf_ is (re)allocated
}
return FALSE;
}
}
if (i == -1)
{
if (bufferSize >= 2) // buffer has enough room for a semicolon and a null character
{
s[0] = semicolon; wInputBuf_->setStrLen(1);
s[1] = nullchar;
return FALSE;
}
else
{
NADELETE(wInputBuf_, NAWcharBuf, wHeap());
wInputBuf_ = new(wHeap()) NAWcharBuf(16 /* int newBufSize */, wHeap());
s = wInputBuf_->data(); // note that wInputBuf_->data() now points to a new location
s[0] = semicolon; wInputBuf_->setStrLen(1);
s[1] = nullchar;
return TRUE; // wInputBuf_ is (re)allocated
}
}
if (i >= 0 && s[i] != semicolon)
{
if (i + 2 < bufferSize)
{
// There is enough room to add a semicolon and a null terminator
s[i+1] = semicolon; wInputBuf_->setStrLen(i+2);
s[i+2] = nullchar;
return FALSE;
}
// --- Do not have enough space in the buffer to add/append a semicolon ---
// Allocate a new buffer
NAWString newInputStr(wHeap());
newInputStr.append((const NAWchar *)s, (size_t)inputStrLen());
newInputStr.append(semicolon);
// NAWString is always NAWchar null terminated and the null terminator is
// excluded from the count returned by the NAWString length() method.
Int32 newInputStrLen = (Int32)newInputStr.length();
const NAWchar * pNewInputStr = newInputStr.data();
NADELETE(wInputBuf_, NAWcharBuf, wHeap());
wInputBuf_ = new(wHeap()) NAWcharBuf ( newInputStrLen + 16 // int newBufSize
, wHeap()
);
// !!! IMPORTANT !!! wInputBuf_ and wInputBuf_->data() now have new pointer values
// Note that wInputBuf_->getBufSize() is now == newInputStr.length() + 16
s = wInputBuf_->data();
NAWstrncpy(s, pNewInputStr, (size_t)(newInputStrLen + 1));
wInputBuf_->setStrLen(newInputStrLen);
wInputBuf_->data()[wInputBuf_->getStrLen()] = nullchar;
return TRUE; // wInputBuf_ is (re)allocated
}
return FALSE;
}
static NAWcharBuf* parserCharSetToUTF16(const charBuf& inCharBuf,
CollHeap* heap,
NAWcharBuf*& outNAWcharBuf,
Int32 inStrCharSet,
Int32 & outErrorCode,
NABoolean addNullAtEnd = TRUE,
Int32 * outCharCount = NULL,
Int32 * outErrorByteOff = NULL)
{
NAWcharBuf * result = NULL;
Int32 iCharCount = 0;
Int32 iErrorByteOff = 0;
if (outCharCount == NULL) outCharCount = &iCharCount;
if (outErrorByteOff == NULL) outErrorByteOff = &iErrorByteOff;
result = csetToUnicode(inCharBuf, heap, outNAWcharBuf, inStrCharSet,
outErrorCode, addNullAtEnd, outCharCount, outErrorByteOff);
if (outErrorCode)
{
if(outErrorCode == CNV_ERR_INVALID_CHAR)
{
*CmpCommon::diags() << DgSqlCode(-2109)
<< DgString0(CharInfo::getCharSetName((CharInfo::CharSet)inStrCharSet))
<< DgString1("UCS2")
<< DgInt0(*outCharCount) << DgInt1(*outErrorByteOff);
}
else if(outErrorCode == CNV_ERR_BUFFER_OVERRUN)
*CmpCommon::diags() << DgSqlCode(-2110)
<< DgString0(CharInfo::getCharSetName((CharInfo::CharSet)inStrCharSet))
<< DgString1("UCS2");
else
PARSERASSERT(false);
outNAWcharBuf = NULL;
return NULL;
}
return result;
}
charBuf* parserUTF16ToCharSet(const NAWcharBuf& pr_UTF16StrBuf,
CollHeap* heap,
charBuf*& pr_pOutCharSetStrBuf,
Int32 inStrCharSet,
Int32 & outErrorCode,
NABoolean addNullAtEnd, // default is TRUE
NABoolean allowInvalidCodePoint, // default is TRUE
Int32 * outCharCount, // default is NULL
Int32 * outErrorByteOff) // default is NULL
{
charBuf * result = NULL;
Int32 iCharCount = 0;
Int32 iErrorByteOff = 0;
if (outCharCount == NULL) outCharCount = &iCharCount;
if (outErrorByteOff == NULL) outErrorByteOff = &iErrorByteOff;
result = unicodeTocset(pr_UTF16StrBuf,
heap,
pr_pOutCharSetStrBuf,
inStrCharSet,
outErrorCode,
addNullAtEnd,
allowInvalidCodePoint,
outCharCount,
outErrorByteOff);
if (outErrorCode)
{
if (outErrorCode == CNV_ERR_INVALID_CHAR)
{
*CmpCommon::diags() << DgSqlCode(-2109)
<< DgString0(CharInfo::getCharSetName((CharInfo::CharSet)inStrCharSet))
<< DgString1("UCS2")
<< DgInt0(*outCharCount) << DgInt1(*outErrorByteOff);
}
else if (outErrorCode == CNV_ERR_BUFFER_OVERRUN)
*CmpCommon::diags() << DgSqlCode(-2110)
<< DgString0(CharInfo::getCharSetName((CharInfo::CharSet)inStrCharSet))
<< DgString1("UCS2");
else
PARSERASSERT(false);
pr_pOutCharSetStrBuf = NULL;
return NULL;
}
return result;
}
// Prescan the string to detect conditions which would cause our SqlParser.y
// to either infinitely loop or read past the end of the input buffer and
// thus access-violate.
// Although sqlci/InputStmt.C does this same checking, it cannot do so for
// quoted text as seen in a (hostvar) prototype value. This code should remain
// here, and the (much more complicated) code in InputStmt removed, IMHO.
//
static NABoolean stringScanWillTerminateInParser(const NAWchar *str,
Int32 internalExpr, Int32 sLen)
{
// Encoded strings from GenRfork's buildEncodeTree
// (calling via GenExpGenerator.h's createExprTree) are weird,
// often having embedded squotes that DO terminate in Parser, so return OK.
//
if (internalExpr) return TRUE;
NAWchar quote_seen = NAWCHR('\0');
enum seen { NOT_SEEN, SEEN, SEEN_EMBEDDED };
seen semicolon = NOT_SEEN;
Int32 len = 0;
for (const NAWchar *s = str; len < sLen; s++, len++)
{
if (quote_seen)
if (*s == quote_seen)
quote_seen = NAWCHR('\0');
else
{ /*consume quoted character*/ }
else if (*s == NAWCHR('\'') || *s == NAWCHR('"'))
quote_seen = *s;
else if (semicolon == SEEN && !NAWisspace(*s)) // && *s != NAWCHR(';')
{ semicolon = SEEN_EMBEDDED; break; }
else if (*s == NAWCHR(';'))
semicolon = SEEN;
}
if (quote_seen)
{
// Unmatched quote
*SqlParser_Diags << DgSqlCode(-SQLCI_INPUT_MISSING_QUOTE)
<< DgWString0(NAWString(WIDE_("\n")) + str);
return FALSE;
}
// compound statements use ';' as a statement separator, so
// we cannot simply reject unquoted semicolons as an error.
return TRUE;
}
// ------------------------------------------------------------------------
// processHiveDDL
//
// This method is called if a hive ddl statement is seen during parsing.
// When that is detected, information is set in HiveDDLInfo
// and parsing phase errors out.
// This is needed to avoid enhancing the parser with hive ddl syntax.
//
// For example:
// create table hive.hive.t (a int) stored as sequencefile;
// Traf parser does not undertand 'stored as sequencefile' syntax.
// As soon as 'hive.hive.t' is detected, all relevant information is
// stored in HiveDDLInfo class and parsing phase is terminated.
// This method then creates the needed structures so the create stmt could
// be passed on to hive api layer.
//
// Return: 'node' contains the generated tree.
// TRUE, if all ok.
// FALSE, if error.
// -------------------------------------------------------------------------
NABoolean Parser::processHiveDDL(Parser::HiveDDLInfo * hiveDDLInfo,
ExprNode** node)
{
NABoolean rc = CmpSeabaseDDL::setupQueryTreeForHiveDDL
(hiveDDLInfo,
inputStr(),
(CharInfo::CharSet)inputStrCharSet(),
CmpCommon::getDefaultString(CATALOG),
CmpCommon::getDefaultString(SCHEMA),
node);
TheHostVarRoles->clear();
return rc;
}
// Parser::parseSQL is a private helper function that encapsulates most of
// the work that used to be done in Parser::parseDML. It avoids duplicating
// code shared by parseDML and parse_w_DML.
// requires: Parser.inputStr() and Parser.wInputStr() are well-defined
// modifies: node,
// Parser.{lexer,internalExpr_}
// SqlParser_Diags, SqlParser_ParamItemList,
// SqlParser_Flags, TheParseTree, ParScannedTokens, AllHostVars,
// common/SqlParserGlobals*.h LEX/PARSE globals
// effects : parses the SQL statement whose text is given by
// Parser.inputStr() and Parser.wInputStr()
// uses wInputStr() for lexing and parsing
// uses inputStr() for error reporting
// returns 0 if all OK, 1 otherwise
Int32 Parser::parseSQL
(ExprNode **node, // (OUT): parse tree if all OK
Int32 internalExpr, // (IN) : NORMAL_TOKEN, INTERNALEXPR_TOKEN, etc
ItemExprList *paramItemList)// (IN) : assigned to SqlParser_ParamItemList
{
// set the SQL text to the event logging area if the buffer there
// is empty
cmpCurrentContext->setLogmxEventSqlText( wInputStr() );
// Set parser globals here
// if (ParScannedTokens == NULL)
ParScannedTokens = new(wHeap()) ParScannedTokenQueue();
// if (TheHostVarRoles == NULL)
TheHostVarRoles = new(wHeap()) HostVarRole_vec(wHeap());
// End of setting parser globals
// The parameter internal_expr indicates that this expression
// was created internally (e.g., by binder or generator) and is being
// parsed to get back the corresponding parse tree.
// The root of this tree will be the corresponding ExprNode,
// NOT necessarily a StmtNode (which tops the tree for all normal
// SQL statements).
internalExpr_ = internalExpr;
if (internalExpr == INTERNALEXPR_TOKEN) {
// Set flag to indicate that we are parsing an internal expression
// so that arbitrary precision exact numeric literals are accepted
// by the SqlParserAux.cpp literalOfNumericPassingScale function.
Set_SqlParser_Flags(ALLOW_ARB_PRECISION_LITERALS);
}
#ifndef NDEBUG
// Define this env var to the usual 1 to display all sqltext input except
// internal expressions (casts from the generator) and
// object-name parsing from check constraint binding.
const char *dbg = getenv("SQLCOMP_DEBUG");
if (!dbg) {
// Set this to ascii '1' (or other digit) if debugging in MSDEV when
// you don't have the env var defined (e.g. a static compile).
static const char overrideEnv = '\0';
dbg = &overrideEnv;
}
if (dbg && *dbg != '\0' && *dbg != '0')
if (!internalExpr || *dbg != '1') // internal-expr's
{
NAString tmp(inputStr());
tmp.remove(6);
if (tmp != "TABLE " || *dbg == '9') // check constraint binding
{
NAString pretty(inputStr());
PrettifySqlText(pretty);
LineBreakSqlText(pretty);
cout << pretty << endl;
}
}
#endif
// if using special DDL or requesing DDL for SQL/MP objects, generate
// DDLExpr node now.
if (!internalExpr)
{
if (processSpecialDDL(inputStr(),
inputStrLen(),
NULL,
(CharInfo::CharSet)inputStrCharSet(),
node))
{
// Either an error or special DDL found
TheHostVarRoles->clear();
if (*node == NULL)
return 1; // error
else
return 0; // special DDL found and node has been generated
}
}
// Rewrite the utility commands into internal stored procedure commands.
ExprNode * utilISPNode = NULL;
if (!internalExpr)
{
parseUtilISPCommand(inputStr(),
inputStrLen(),
(CharInfo::CharSet)inputStrCharSet(),
&utilISPNode);
}
// Mark the compiler's common diags area:
// This is because compiler might call the other routines in compiler
// for ExprNode constructors that might put in the errors into
// CmpCommon::diags() area. So the CmpCommon::diags()
// is marked here and at the end merged into the SqlParser_Diags
// (the diags area maintained by parser).
//
Lng32 diagsMark = CmpCommon::diags()->mark();
Lng32 initialErrCnt = SqlParser_Diags->getNumber(DgSqlCode::ERROR_);
Int32 parseError = 1; // error
// This static flag will be TRUE on entry if a previous yyparse ComASSERTed
// (longjmp'd), which the try block below does *NOT* catch...
if ( cmpContext() )
{
if ( cmpContext()->getParserResetIsNeeded() ) reset( TRUE );
else cmpContext()->setParserResetIsNeeded( TRUE );
}
else
{
if ( resetIsNeeded ) reset( TRUE );
else resetIsNeeded = TRUE ;
}
// SqlParser_Diags is initialized elsewhere, not here.
SqlParser_NADefaults_Glob =
ActiveSchemaDB()->getDefaults().getSqlParser_NADefaults();
SqlParser_ParamItemList = paramItemList;
SqlParser_ParenDepth = 0;
SqlParser_WheneverClause = FALSE;
TheParseTree = NULL;
// SqlParser_Flags is *not* initialized prior to calling yyparse,
// it's only reset to zero *afterwards*.
// This allows Binder/Catman/DDL-Rfork to set flags before calling Parser.
// Only internal *module* is trusted, not internal mdf...
if (cmpContext() &&
((cmpContext()->internalCompile() == CmpContext::INTERNAL_MODULENAME)||
(cmpContext()->statement() && cmpContext()->statement()->isSMDRecompile())))
Set_SqlParser_Flags(ALLOW_SPECIALTABLETYPE);
if ( internalExpr == INTERNALEXPR_TOKEN )
Set_SqlParser_Flags(ALLOW_UNKNOWN_CHARSET);
try
{
if (wInputStr() &&
stringScanWillTerminateInParser(wInputStr(), internalExpr,
wInputStrLen()))
{
// convert str to Unicode
delete lexer;
lexer = new yyULexer(wInputStr(), wInputStrLen());
parseError = yyparse(); // yyparse returns 0 if success
}
else
if ( cmpContext() ) cmpContext()->setParserResetIsNeeded( FALSE );
else resetIsNeeded = FALSE;
if (!parseError &&
initialErrCnt < SqlParser_Diags->getNumber(DgSqlCode::ERROR_))
parseError = 1; // error
}
catch(EHBreakException&)
{
cerr << "Parser exception :" << endl;
cerr << *SqlParser_Diags;
NAExit(1);
}
catch(...)
{
parseError = 1; // error
}
// Should be impossible to satisfy this test, but just in case...
if (parseError &&
initialErrCnt >= SqlParser_Diags->getNumber(DgSqlCode::ERROR_))
yyerror(""); // call before any reinit/reset
// This marking and moving seems hokey to me now:
// Here we're moving diags that were inserted into common after the mark
// (i.e., by compiler components during this parse) --
// moving those diags into the parser area,
// to follow diags put into there during this parse.
// Then we copy all the parser diags back to the common diags
// appending after the original mark (to which common diags were rewound).
// Seems like we could forgo the diags mark above and the rewindAndMerge here,
// doing just the mergeAfter, with no loss of information.
//
// It is the common diags that end up getting displayed.
//
CmpCommon::diags()->rewindAndMergeIfDifferent(diagsMark, SqlParser_Diags);
CmpCommon::diags()->mergeAfter(*SqlParser_Diags);
// Reinitialize our globals (failing to do this sometimes results in
// spurious error messages!)
if ( cmpContext() )
{
if ( cmpContext()->getParserResetIsNeeded() )
{
reset();
cmpContext()->setParserResetIsNeeded( FALSE );
}
}
else
{
if ( resetIsNeeded ) { reset() ; resetIsNeeded = FALSE ; }
}
if (parseError)
{
delete TheParseTree;
TheParseTree = NULL;
}
// if this query generated a utilISPNode but is also recognized by sql
// parser, then use the sql parser generated node.
// If sql parser doesn't recognize it, clear diags area and return the
// utilISP node.
if (utilISPNode)
{
if (parseError)
{
CmpCommon::diags()->clear();
*node = utilISPNode;
parseError = 0;
}
else
{
delete utilISPNode;
*node = TheParseTree;
}
}
else if (SqlParser_CurrentParser->hiveDDLInfo_->foundDDL_)
{
// if a hive ddl object was found during parsing, generate ddl expr tree.
// foundDDL_ could be set during successful parsing as well as for
// a query which gave a syntax error.
if (TheParseTree)
delete TheParseTree;
TheParseTree = NULL;
*node = NULL;
if ((processHiveDDL(SqlParser_CurrentParser->hiveDDLInfo_, node)) &&
(*node != NULL))
parseError = 0; // hive DDL found and node has been generated
else
parseError = 1; // error
}
else if (SqlParser_CurrentParser->hiveDDLInfo_->backquotedDelimFound_)
{
// backquote delim identifier only valid for hive objects.
if (TheParseTree)
delete TheParseTree;
TheParseTree = NULL;
parseError = 1;
}
else
{
*node = TheParseTree;
}
return parseError;
}
// parseDML widens the locale-based str and scans & parses it
Int32 Parser::parseDML(const char *instr, Int32 inlen,
CharInfo::CharSet charset,
ExprNode **node,
Int32 internalExpr,
ItemExprList *paramItemList)
{
initialInputCharSet_ = charset;
if (charset == CharInfo::UCS2 && wInputBuf_ != NULL)
{
PARSERASSERT(wInputBuf_->data() != (NAWchar *)instr);
PARSERASSERT((inlen & 1) == 0); // inlen must be an even number
NADELETE(wInputBuf_, NAWcharBuf, wHeap());
wInputBuf_ = NULL;
}
else if (charset != CharInfo::UCS2 && inputBuf_ != NULL)
{
PARSERASSERT(inputBuf_->data() != (unsigned char *)instr);
NADELETE(inputBuf_, charBuf, wHeap());
inputBuf_ = NULL;
}
Int32 len = 0;
if (charset == CharInfo::UCS2)
{
NADELETE(wInputBuf_, NAWcharBuf, wHeap());
size_t wInputBufSizeInNAWchars = (size_t)((inlen/*in_bytes*/ + 16) / BYTES_PER_NAWCHAR);
wInputBuf_ = new(wHeap()) NAWcharBuf(wInputBufSizeInNAWchars, wHeap());
wInputBuf_->setStrLen/*in_NAWchars*/(inlen/*in_bytes*/ / BYTES_PER_NAWCHAR);
NAWstrncpy(wInputBuf_->data(), (const NAWchar *)instr, wInputBuf_->getStrLen());
wInputBuf_->data()[wInputBuf_->getStrLen()] = 0;
fixupParserWInputBufAndAppendSemicolon();
len = wInputStrLen(); /* in NAWchars */
if (inputBuf_ != NULL)
{
NADELETE(inputBuf_, charBuf, wHeap());
inputBuf_ = NULL;
}
}
else
{
NADELETE(inputBuf_, charBuf, wHeap());
inputBuf_ = new(wHeap()) charBuf ( inlen + 16 // buffer size in bytes
, wHeap()
);
memcpy((void *)inputBuf_->data(), (void *)instr, inlen/*in_bytes*/);
inputBuf_->setStrLen(inlen);
inputBuf_->data()[inlen] = 0;
fixupParserInputBufAndAppendSemicolon();
len = inputStrLen(); /* in bytes */
if (wInputBuf_ != NULL)
{
NADELETE(wInputBuf_, NAWcharBuf, wHeap());
wInputBuf_ = NULL;
}
}
Int32 errorcode = 0;
Int32 charCount = 0;
Int32 errorByteOff = 0;
if (inputBuf_) {
switch (charset) {
case SQLCHARSETCODE_ISO88591:
// inputBuf_ was already allocated and fixed up at the beginning of the routine
wInputBuf_ = ISO88591ToUnicode(*inputBuf_, wHeap(), wInputBuf_);
break;
case SQLCHARSETCODE_UCS2:
// wInputBuf_ was already allocated and fixed up at the beginning of the routine
// inputBuf_ == NULL
inputBuf_ = unicodeToISO88591(*wInputBuf_, wHeap(), inputBuf_);
break;
case SQLCHARSETCODE_EUCJP:
case SQLCHARSETCODE_SJIS:
case SQLCHARSETCODE_GB18030:
case SQLCHARSETCODE_GB2312:
case SQLCHARSETCODE_GBK:
case SQLCHARSETCODE_MB_KSC5601:
case SQLCHARSETCODE_BIG5:
case SQLCHARSETCODE_UTF8:
// inputBuf_ was already allocated and fixed up at the beginning of the routine
// wInputBuf_ == NULL
wInputBuf_ = parserCharSetToUTF16(*inputBuf_, wHeap(), wInputBuf_, charset,
errorcode, TRUE, &charCount, &errorByteOff);
if (errorcode) return 1;
break;
default:
{ Int32 CharsetNotSupported=0; PARSERASSERT(CharsetNotSupported); }
break;
}
}
charset_ = charset; // needed by lexer
if (wInputStrLen() > 0 && charset_ != CharInfo::UTF8)
{
charset_ = CharInfo::UTF8;
NADELETE(inputBuf_, charBuf, wHeap());
inputBuf_ = NULL; // must be set to NULL for the following call to work correctly
inputBuf_ = parserUTF16ToCharSet ( *wInputBuf_, wHeap(), inputBuf_, charset_, errorcode
, TRUE // NABoolean addNullAtEnd
, FALSE // NABoolean allowInvalidCodePoint
);
if (errorcode) return 1;
}
ParScannedInputCharset = charset_;
if (inputStr() != NULL && inputStrLen() > 0)
fixupParserInputBufAndAppendSemicolon();
if (wInputStr() != NULL && wInputStrLen() > 0)
fixupParserWInputBufAndAppendSemicolon();
// scan & parse it
return parseSQL(node, internalExpr, paramItemList);
}
// parseDML widens the locale-based str and scans & parses it
Int32 Parser::parseDML(QueryText& txt,
ExprNode **node,
Int32 internalExpr,
ItemExprList *paramItemList)
{
initialInputCharSet_ = (CharInfo::CharSet)txt.charSet();
// set up input string buffer. avoid SqlParser globals. (tcr)
NADELETE(inputBuf_, charBuf, wHeap());
NADELETE(wInputBuf_, NAWcharBuf, wHeap());
inputBuf_ = NULL; // Set both buffers to NULL. Otherwise the conversion
// routines below will assume they are valid and
// write on deleted memory.
wInputBuf_ = NULL;
Int32 len = txt.octetLength();
while (len > 0 && txt.text()[len - 1] == 0) // exclude trailing null characters from the count
len--;
charset_ = (CharInfo::CharSet)txt.charSet(); // needed by lexer
Int32 errorcode = 0;
Int32 charCount = 0;
Int32 errorByteOff = 0;
switch ((SQLCHARSET_CODE)charset_) {
case SQLCHARSETCODE_ISO88591:
inputBuf_ = new (wHeap()) charBuf((unsigned char*)txt.text(), len); // shallow copy
wInputBuf_ = ISO88591ToUnicode(*inputBuf_, wHeap(), wInputBuf_);
#ifndef NDEBUG
if ( getenv("UCS2_SQL_TEXT_DEBUG") ) {
charset_ = CharInfo::UNICODE;
}
#endif
break;
case SQLCHARSETCODE_UCS2:
wInputBuf_ = new (wHeap()) NAWcharBuf(txt.length() + 4, wHeap());
NAWstrncpy(wInputBuf_->data(), txt.wText(), txt.length());
wInputBuf_->data()[txt.length()] = NAWCHR('\0');
wInputBuf_->setStrLen(txt.length());
break;
case SQLCHARSETCODE_EUCJP:
case SQLCHARSETCODE_SJIS:
case SQLCHARSETCODE_GB18030:
case SQLCHARSETCODE_GB2312:
case SQLCHARSETCODE_GBK:
case SQLCHARSETCODE_MB_KSC5601:
case SQLCHARSETCODE_BIG5:
case SQLCHARSETCODE_UTF8:
inputBuf_ = new (wHeap()) charBuf((unsigned char*)txt.text(), len);
wInputBuf_ = parserCharSetToUTF16(*inputBuf_, wHeap(), wInputBuf_, charset_,
errorcode, TRUE, &charCount, &errorByteOff);
if (errorcode) return 1;
break;
default:
{ Int32 CharsetNotSupported=0; PARSERASSERT(CharsetNotSupported); }
break;
}
//*****************************************************************
// Do NOT #ifdef or comment out this end-of-string (len ';' '\0') code,
// without an extremely valid reason!
// At least 3 submits to Redfish have been delayed due to bugs from missing
// nul-terminator on sqltext, in ODBC and DDOL regression tests!
//*****************************************************************
fixupParserWInputBufAndAppendSemicolon();
if (wInputStrLen() > 0 && initialInputCharSet_ != CharInfo::UTF8)
{
charset_ = CharInfo::UTF8;
NADELETE(inputBuf_, charBuf, wHeap());
inputBuf_ = NULL; // must be set to NULL for the following call to work correctly
inputBuf_ = parserUTF16ToCharSet ( *wInputBuf_, wHeap(), inputBuf_, charset_, errorcode
, TRUE // NABoolean addNullAtEnd
, FALSE // NABoolean allowInvalidCodePoint
);
if (errorcode) return 1;
ParScannedInputCharset = charset_;
} // if (wInputStrLen() > 0 && initialInputCharSet_ != CharInfo::UTF8)
if (inputStr() != NULL && inputStrLen() > 0)
fixupParserInputBufAndAppendSemicolon();
if (wInputStr() != NULL && wInputStrLen() > 0)
fixupParserWInputBufAndAppendSemicolon();
// scan & parse it
return parseSQL(node, internalExpr, paramItemList);
}
// str is a unicode-encoded SQL statement (or stmt fragment);
// scan and parse str; narrow str to the given charset when doing
// other text processing stuff, such as, error reporting, etc
Int32 Parser::parse_w_DML(const NAWchar *instr, Int32 inlen,
ExprNode **node,
Int32 internalExpr,
ItemExprList *paramItemList
)
{
initialInputCharSet_ = CharInfo::UCS2;
if (wInputBuf_ != NULL)
{
PARSERASSERT(wInputBuf_->data() != instr);
NADELETE(wInputBuf_, NAWcharBuf, wHeap());
}
wInputBuf_ = new (wHeap()) NAWcharBuf ( inlen + 4 // extra space for semicolon and null characters
, wHeap()
);
NAWstrncpy(wInputBuf_->data(), instr, inlen);
// Fill the remaining with null characters
wInputBuf_->zeroOutBuf(inlen/*Int32 startPos*/);
wInputBuf_->setStrLen(inlen);
fixupParserWInputBufAndAppendSemicolon();
// set up input string buffer. avoid SqlParser globals. (tcr)
NADELETE(inputBuf_, charBuf, wHeap());
inputBuf_ = NULL;
charset_ = CharInfo::UCS2; // needed by lexer
if (wInputStrLen() > 0)
{
charset_ = CharInfo::UTF8; // needed by lexer
Int32 errorcode = 0;
NADELETE(inputBuf_, charBuf, wHeap());
inputBuf_ = NULL; // set to NULL to ask parserUTF16ToCharSet to allocate a new buffer
inputBuf_ = parserUTF16ToCharSet ( *wInputBuf_, wHeap(), inputBuf_, charset_, errorcode
, TRUE // NABoolean addNullAtEnd
, FALSE // NABoolean allowInvalidCodePoint
);
if (errorcode) return 1;
}
ParScannedInputCharset = charset_;
if (inputStr() != NULL && inputStrLen() > 0)
fixupParserInputBufAndAppendSemicolon();
if (wInputStr() != NULL && wInputStrLen() > 0)
fixupParserWInputBufAndAppendSemicolon();
// scan & parse it
return parseSQL(node, internalExpr, paramItemList);
}
ExprNode *Parser::parseDML(const char *str, Int32 len, CharInfo::CharSet charset)
{
ExprNode *node = NULL;
parseDML(str, len, charset, &node, 0, NULL);
return node;
}
ExprNode *Parser::getExprTree(const char * str,
UInt32 strlength,
CharInfo::CharSet strCharSet,
Int32 num_params,
ItemExpr * p1,
ItemExpr * p2,
ItemExpr * p3,
ItemExpr * p4,
ItemExpr * p5,
ItemExpr * p6,
ItemExprList * otherParams,
Int32 internal_expr) // getItemExprTree is caller
{
char *newstr;
SQLParserStartToken token =
(internal_expr ? INTERNALEXPR_TOKEN : NORMAL_TOKEN);
// If strlength is passed in, use it so non-null-terminated strings
// can be passed to parser.
size_t newlen = ((strlength > 0) ? strlength : strlen(str));
// Exclude trailing null characters from the count
while (newlen > 0 && str[newlen - 1] == 0)
newlen--;
if (newlen >=2 && str[newlen-1] == ';' && str[newlen] == 0)
{
newstr = (char *)str; // it really is const but C++ doesn't know it
}
else
{
// add a semicolon and a null character to the end of str (required by the parser)
newstr = new(wHeap()) char[newlen + 1 + 1];
str_cpy_all(newstr, str, newlen);
newstr[newlen] = ';' ;
newstr[newlen+1] = '\0';
newlen++;
}
ExprNode *node = NULL;
ItemExprList *paramItemList = NULL;
// num_params refers only to the 6 params passed as separate arguments, and
// does not include the number of entries in otherParams. Either the indivicual
// parameters, the list, or both may be used (but typically only one or the
// other will).
if (num_params > 0 || (otherParams && otherParams->entries() > 0))
{
paramItemList = new(wHeap()) ItemExprList(wHeap());
if (num_params >= 1) paramItemList->insert(p1);
if (num_params >= 2) paramItemList->insert(p2);
if (num_params >= 3) paramItemList->insert(p3);
if (num_params >= 4) paramItemList->insert(p4);
if (num_params >= 5) paramItemList->insert(p5);
if (num_params >= 6) paramItemList->insert(p6);
if (otherParams && otherParams->entries() > 0)
paramItemList->insert(*otherParams);
}
// parseDML method resets all SqlParser_Flags.
// save the current SqlParser_Flags and restore them after parse step.
ULng32 saved_SqlParser_Flags = SqlParser_Flags;
parseDML(newstr, newlen, strCharSet, &node, token, paramItemList);
delete paramItemList;
// restore the saved SqlParser_Flags
Set_SqlParser_Flags(saved_SqlParser_Flags);
if (newstr != str)
NADELETEBASIC(newstr, wHeap());
return node;
}
ExprNode *Parser::get_w_ExprTree(const NAWchar * str, // strCharSet should be CharInfo::UCS2
UInt32 strlength,
Int32 num_params,
ItemExpr * p1,
ItemExpr * p2,
ItemExpr * p3,
ItemExpr * p4,
ItemExpr * p5,
ItemExpr * p6,
ItemExprList * /*paramItemList not used*/,
Int32 internal_expr) // getItemExprTree is caller
{
NAWchar *newstr;
SQLParserStartToken token =
(internal_expr ? INTERNALEXPR_TOKEN : NORMAL_TOKEN);
// If strlength is passed in, use it so non-null-terminated strings
// can be passed to parser.
size_t newlen = ((strlength > 0) ? strlength : NAWstrlen(str));
// Exclude trailing null characters from the count
while (newlen > 0 && str[newlen - 1] == 0)
newlen--;
if (newlen >= 2 && str[newlen-1] == NAWCHR(';') && str[newlen] == 0)
newstr = (NAWchar *)str; // it really is const but C++ doesn't know it
else
{
// add a semicolon and a null character to the end of str (required by the parser)
newstr = new(wHeap()) NAWchar[newlen + 1 + 1];
NAWstrncpy(newstr, str, newlen);
newstr[newlen] = NAWCHR(';') ;
newstr[newlen+1] = NAWCHR('\0');
newlen++;
}
ExprNode *node = NULL;
ItemExprList *paramItemList = NULL;
if (num_params > 0)
{
paramItemList = new(wHeap()) ItemExprList(wHeap());
/**(num_params >= 1)**/ paramItemList->insert(p1);
if (num_params >= 2) paramItemList->insert(p2);
if (num_params >= 3) paramItemList->insert(p3);
if (num_params >= 4) paramItemList->insert(p4);
if (num_params >= 5) paramItemList->insert(p5);
if (num_params >= 6) paramItemList->insert(p6);
}
parse_w_DML(newstr, newlen, &node, token, paramItemList);
delete paramItemList;
if (newstr != str)
NADELETEBASIC(newstr, wHeap());
return node;
}
ItemExpr *Parser::getItemExprTree(const char * str,
UInt32 len,
CharInfo::CharSet strCharSet,
Int32 num_params,
ItemExpr * p1,
ItemExpr * p2,
ItemExpr * p3,
ItemExpr * p4,
ItemExpr * p5,
ItemExpr * p6,
ItemExprList * paramItemList)
{
ExprNode *et = getExprTree(str,len,strCharSet,num_params,p1,p2,p3,p4,p5,p6,paramItemList,
INTERNALEXPR_TOKEN);
PARSERASSERT(et == NULL ||
(et->getOperatorType() >= ITM_FIRST_ITEM_OP &&
et->getOperatorType() <= ITM_LAST_ITEM_OP));
return (ItemExpr *)et;
}
ItemExpr *Parser::get_w_ItemExprTree(const NAWchar * str,
UInt32 len,
Int32 num_params,
ItemExpr * p1,
ItemExpr * p2,
ItemExpr * p3,
ItemExpr * p4,
ItemExpr * p5,
ItemExpr * p6,
ItemExprList * paramItemList)
{
ExprNode *et = get_w_ExprTree(str,len,num_params,p1,p2,p3,p4,p5,p6,paramItemList,
INTERNALEXPR_TOKEN);
PARSERASSERT(et == NULL ||
(et->getOperatorType() >= ITM_FIRST_ITEM_OP &&
et->getOperatorType() <= ITM_LAST_ITEM_OP));
return (ItemExpr *)et;
}
ElemDDLColDef* Parser::parseColumnDefinition(const char* str, size_t strLen, CharInfo::CharSet strCharSet)
{
ExprNode* node;
// If strLen is passed in, use it so non-null-terminated strings can be passed to parser.
Int32 len = (Int32)(strLen > 0 ? strLen : strlen(str)) + 2;
char* newStr = new(wHeap()) char[len];
sprintf(newStr, "%s;", str);
parseDML(newStr, len, strCharSet, &node, COLUMNDEF_TOKEN, NULL);
// parseDML is expected to always return, should not jump to other places.
// so the following delete will always be performed.
NADELETEBASIC(newStr, wHeap());
return (ElemDDLColDef*)node;
}
NABoolean Parser::parseUtilISPCommand(const char* command, size_t cmdLen, CharInfo::CharSet cmdCharSet, ExprNode** node)
{
if (cmdLen == 0)
cmdLen = strlen(command);
Int32 inStrLen = cmdLen;
// Exclude trailing null characters from the count
while (inStrLen > 0 && command[inStrLen - 1] == 0)
inStrLen--;
static const char* UtilISPToken[] =
{ "PURGEDATA", "POPULATE", "RECOVER", "REFRESH", "UPGRADE", "DOWNGRADE", "VALIDATE", "TRANSFORM","" };
static const char* UtilISPName[] =
{ "sp_purgedata", "sp_populate", "sp_recover", "sp_refresh", "sp_SchLevel", "sp_SchLevel", "sp_validate", "sp_transform","" };
static const char* displayString = "DISPLAY";
static const char* tokenDelimiter=" \t\r\n\0";
NABoolean displayFound = FALSE;
char* tempStr = new (wHeap()) char[inStrLen + 1 ];
memcpy((void *)tempStr, (void *)command, (size_t)inStrLen);
tempStr[inStrLen] = 0;
char* p = strtok(tempStr, tokenDelimiter);
// Upshift the token before comparing
unsigned char *puc = (unsigned char *)p;
for ( ; *puc != '\0'; puc++)
*puc = (unsigned char)(TOUPPER(*puc));
if (p && _stricmp(p, displayString)== 0 )
{
displayFound = TRUE;
p = strtok(NULL, tokenDelimiter);
}
NABoolean utilISPFound = FALSE;
NABoolean isPurgedata = FALSE;
Int32 index = 0;
if (p)
{
if (displayFound)
{
// Upshift the token before comparing
puc = (unsigned char *)p;
for ( ; *puc != '\0'; puc++)
*puc = (unsigned char)(TOUPPER(*puc));
}
for ( index=0; !utilISPFound && strlen(UtilISPToken[index]) > 0 ; index++ )
if ( _stricmp(p, UtilISPToken[index]) == 0 )
{
utilISPFound = TRUE;
if (_stricmp(p, "PURGEDATA") == 0)
isPurgedata = TRUE;
}
}
if (node)
*node = NULL;
if ( utilISPFound )
{
*node = NULL;
} // utilISPFound
NADELETEBASIC(tempStr, wHeap());
return utilISPFound;
}
// ------------------------------------------------------------------------
// processSpecialDDL:
//
// If the request is a "special DDL request", go ahead a generate the
// DDLExpr node
//
// Special DDL requests consist of:
// UPDATE STATISTICS
// HIVE DDL request
//
// return TRUE: if a special DDL request or error.
// : if error, node returned is NULL.
// return FALSE: if need to call SQL/MX parser after return from here.
// -------------------------------------------------------------------------
NABoolean Parser::processSpecialDDL(const char* inputStr, size_t inputStrLen,
ExprNode * childNode,
CharInfo::CharSet inputStrCS,
ExprNode** node)
{
if (cmpContext() && cmpContext()->internalCompile())
return FALSE;
PARSERASSERT(inputStrCS != CharInfo::UCS2);
NABoolean ustat = FALSE; // will be TRUE if the special DDL is for Update Statistics
if (inputStrLen == 0)
inputStrLen = strlen(inputStr);
Int32 newStrLen = inputStrLen;
// Exclude trailing null characters from the count
while (newStrLen > 0 && inputStr[newStrLen-1] == 0)
newStrLen--;
CharInfo::CharSet inputStrCharSet = inputStrCS;
// Fix up input string:
// Get rid of leading blanks
// Strip off the leading "DISPLAY" if found
// Strip off the leading "PROCEDURE procname (...)" if found
NAString ns(wHeap());
if (inputStr != NULL)
ns.append(inputStr, (size_t)newStrLen);
// skip leading blanks
ns = ns.strip(NAString::leading, ' ');
// if first token is display, skip it. Remember that it was a display.
NABoolean displayFound = FALSE;
size_t position = ns.index("DISPLAY", 0, NAString::ignoreCase);
if (position == 0)
{
// found DISPLAY. Remember it and skip it.
displayFound = TRUE;
ns = ns(7, ns.length()-7);
ns = ns.strip(NAString::leading, ' ');
}
// Now go and see if request is a special DDL request
NABoolean specialDDL = FALSE;
NABoolean xnNeeded = FALSE;
// Check for UPDATE STATISTICS
if (ns.index("UPDATE", 0, NAString::ignoreCase) == 0)
{
NAString nstemp = ns;
nstemp = nstemp(6, nstemp.length()-6); // skip over UPDATE
nstemp = nstemp.strip(NAString::leading, ' ');
if (nstemp.index("STATISTICS", 0, NAString::ignoreCase) == 0)
{
specialDDL = TRUE; // UPDATE STATISTICS
ustat = TRUE;
// do not start Xn at runtime.
xnNeeded = FALSE;
}
}
else if (childNode)
{
ustat = FALSE;
specialDDL = TRUE;
xnNeeded = FALSE;
}
// If a special DDL is found, go ahead and create a DDLExpr node
if (specialDDL)
{
*node = NULL;
DDLExpr * ddlExpr = new(CmpCommon::statementHeap())
DDLExpr(childNode, (char *)ns.data(), inputStrCharSet,
CmpCommon::statementHeap());
RelExpr *queryExpr = new(CmpCommon::statementHeap())
RelRoot(ddlExpr);
ddlExpr->xnNeeded() = xnNeeded;
ddlExpr->specialDDL() = TRUE;
// Indicate whether the special DDL is an Update Stats
ddlExpr->isUstat() = ustat;
// indicate that this is the root for the entire query
((RelRoot *) queryExpr)->setRootFlag(TRUE);
if (displayFound)
((RelRoot *)queryExpr)->setDisplayTree(TRUE);
StmtQuery* query = new(wHeap())StmtQuery(queryExpr);
*node = query;
return TRUE;
}
return FALSE;
}
void Parser::ResetLexer(void)
{
if (lexer) lexer->reset();
ParScannedTokenPos = 0;
ParScannedTokenOffset = 0;
ParScannedInputCharset = SQLCHARSETCODE_UTF8;
ParNameLocListPtr = NULL;
}
void HQCParseKey::addTokenToNormalizedString(Int32 & tokCod)
{
if(SqlParser_CurrentParser->getLexer()->isLiteral4HQC(tokCod))
{
keyText_ += "#np# ";
NAString* literal = unicodeToChar(SqlParser_CurrentParser->YYText(), SqlParser_CurrentParser->YYLeng(), (Lng32)ParScannedInputCharset, heap_);
CMPASSERT(literal);
getParams().getNPLiterals().insert(*literal);
}
else
{
NAString* tok = unicodeToChar(SqlParser_CurrentParser->YYText(), SqlParser_CurrentParser->YYLeng(), (Lng32)ParScannedInputCharset, heap_);
if(tok) {
//for first token which is select/insert/update/delete, it might be HQC cacheable.
tok->toLower(); //make case insensitive
if(nOfTokens_ == 0
&& ( strncmp(tok->data(), "select", 6) == 0
//HQC does not cache insert statement as SQC already did this before bind.
//SQC does strict NAType checking on constants while HQC does not for Insert,
//this will cause inconsistency.
//Fix launchpad bug 1421374
//||strncmp(tok->data(), "insert", 6) == 0
||strncmp(tok->data(), "update", 6) == 0
||strncmp(tok->data(), "delete", 6) == 0)
)
setIsCacheable(TRUE);
if(SqlParser_CurrentParser->getLexer()->isDynamicParameter(tokCod)) {
NABoolean FoundInList = FALSE;
for(CollIndex i = 0; i < HQCDynParamMap_.entries(); i++) {
if(HQCDynParamMap_[i].original_ == *tok)
{
keyText_ += HQCDynParamMap_[i].normalized_ + " ";
FoundInList = TRUE;
break;
}
}
if(!FoundInList) {
NAString param = "?";
param += "param" + UnsignedToNAString(HQCDynParamMap_.entries()+1);
keyText_ += param + " ";
HQCDynParamMap_.insert(HQCDParamPair(*tok, param));
}
//not support dynamic parameter
setIsCacheable(FALSE);
}
else
keyText_ += *tok + " ";
}
}
nOfTokens_++;
isStringNormalized_ = FALSE;
}
/* JWP
//KSKSKS
NAWchar *Parser::wInputStr()
{
Int32 i;
static NAWchar *temp2 = (NAWchar *) 111111111; // 0x069F68C7
static NAWchar *temp3 = (NAWchar *) 1412509744; // 0x54313030
if (wInputBuf_ != NULL)
{
if (wInputBuf_->data() == NULL)
i = 20;
else if ( wInputBuf_->data() <= (NAWchar *) temp2
|| wInputBuf_->data() >= (NAWchar *) temp3
)
i = 21;
return wInputBuf_->data();
}
else
return NULL;
}
//KSKSKS
*/
Int32 yylex(YYSTYPE *lvalp)
{
return SqlParser_CurrentParser ? SqlParser_CurrentParser->yylex(lvalp) : 0;
}
void ParserAssertInternal(const char* condition, const char* file, Int32 num)
{
// Put the internal error into the diags area if there is one
*SqlParser_Diags << DgSqlCode(-3000) << DgInt0(num) <<
DgString0(condition) << DgString1(file);
CmpInternalException(condition, file , num).throwException();
}
void ParserAbortInternal(const char* condition, const char* file, Int32 num)
{
cerr << "Internal error (" << condition << ") at "
<< file << ", line " << num << ", aborting."
<< endl;
throw EHBreakException(file, num);
}
// -----------------------------------------------------------------------
// The parsing routine which the preprocessor must call,
// as well as arkcmp/cmpmod.cpp routines.
// -----------------------------------------------------------------------
Int32 sql_parse(const char* str, Int32 len, CharInfo::CharSet charset,
StmtNode **stmt_node_ptr_ptr
/***, SqlParser_Flags_Enum flags ***/)
{
ExprNode *node;
Int32 result = 0;
Parser *parser = new Parser(cmpCurrentContext);
try {
result = parser->parseDML(str, len, charset, &node, 0, NULL);
} catch (...) {
delete parser;
throw; // rethrow the exception
}
delete parser;
*stmt_node_ptr_ptr = (StmtNode*)node;
return result;
}