| /********************************************************************** |
| // @@@ START COPYRIGHT @@@ |
| // |
| // Licensed to the Apache Software Foundation (ASF) under one |
| // or more contributor license agreements. See the NOTICE file |
| // distributed with this work for additional information |
| // regarding copyright ownership. The ASF licenses this file |
| // to you under the Apache License, Version 2.0 (the |
| // "License"); you may not use this file except in compliance |
| // with the License. You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, |
| // software distributed under the License is distributed on an |
| // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| // KIND, either express or implied. See the License for the |
| // specific language governing permissions and limitations |
| // under the License. |
| // |
| // @@@ END COPYRIGHT @@@ |
| **********************************************************************/ |
| #ifndef PARSER_H |
| #define PARSER_H |
| |
| #include "ItemExpr.h" |
| #include "ItemExprList.h" |
| #include "NLSConversion.h" |
| #include "ulexer.h" |
| #include "SQLCLIdev.h" |
| #include "Collections.h" |
| #include "stringBuf.h" |
| #include "charinfo.h" |
| #include "QCache.h" |
| |
| // forward refs |
| class CmpContext; |
| class ElemDDLColDef; |
| class StmtNode; |
| class QueryText; |
| |
| class Parser |
| { |
| public: |
| |
| Parser(const CmpContext *cmpContext); |
| virtual ~Parser(); |
| |
| CmpContext *cmpContext(); |
| NAHeap *wHeap() { return wHeap_; } |
| |
| // locale-to-unicode conversion in parseDML requires buffer len (tcr) |
| Int32 parseDML(const char *str, Int32 len, |
| CharInfo::CharSet charset, |
| ExprNode ** node, |
| Int32 token = 0, |
| ItemExprList * enl = NULL); |
| // widens str to UNICODE for parsing, uses localized str for error handling |
| |
| // locale-to-unicode conversion in parseDML (for odbc/mx unicode support) |
| Int32 parseDML(QueryText& txt, |
| ExprNode ** node, |
| Int32 token = 0, |
| ItemExprList * enl = NULL); |
| // widens str to UNICODE for parsing, uses localized str for error handling |
| |
| Int32 parse_w_DML(const NAWchar *str, Int32 len, |
| ExprNode ** node, |
| Int32 token = 0, |
| ItemExprList * enl = NULL |
| ); |
| // narrows str to locale for error handling, uses wide str for parsing |
| |
| ExprNode *parseDML(const char *str, Int32 len, CharInfo::CharSet charset); |
| |
| ///////////////////////////////////////////////////////////////////// |
| // |
| // The following procedures take as input a string containing |
| // a SQL expression and return the parse tree for it. |
| // |
| // Parameters(ItemExpr's) could also be passed to this procedure to be |
| // replaced at the desired place in the string. |
| // An arithmetic value is specified by @A<n>, and a boolean value |
| // is specified as @B<n> in the string, where <n> is the parameter |
| // number. It is 1-based. |
| // |
| // For example, |
| // createExprTree("@A1 + 1", 1, item_expr) |
| // would return a tree: |
| // |
| // |-------------------| |
| // | BiArith node(+) | |
| // |-------------------| |
| // / \ |
| // / \ |
| // |------------| |-----------| |
| // | item_expr | |ConstValue | |
| // | | | (value=1) | |
| // |------------| |-----------| |
| // the parameter node generated |
| // tree by parser |
| // |
| // |
| // To pass in more than 6 parameters, insert them in a comma list |
| // and pass it. |
| // |
| ///////////////////////////////////////////////////////////////////// |
| |
| ExprNode *getExprTree( const char * str, |
| UInt32 strlength = 0, |
| CharInfo::CharSet strCharSet = CharInfo::UTF8, |
| Int32 num_params = 0, |
| ItemExpr * p1 = NULL, |
| ItemExpr * p2 = NULL, |
| ItemExpr * p3 = NULL, |
| ItemExpr * p4 = NULL, |
| ItemExpr * p5 = NULL, |
| ItemExpr * p6 = NULL, |
| ItemExprList * paramItemList = NULL, |
| Int32 internal_expr = FALSE); // pass this as TRUE if |
| // you know it is going |
| // to be an ItemExpr |
| |
| ItemExpr *getItemExprTree(const char * str, |
| UInt32 strlength = 0, |
| CharInfo::CharSet strCharSet = CharInfo::UTF8, |
| Int32 num_params = 0, |
| ItemExpr * p1 = NULL, |
| ItemExpr * p2 = NULL, |
| ItemExpr * p3 = NULL, |
| ItemExpr * p4 = NULL, |
| ItemExpr * p5 = NULL, |
| ItemExpr * p6 = NULL, |
| ItemExprList * paramItemList = NULL); |
| |
| // wide versions of the above functions; used by catman to |
| // process unicode-encoded column default value strings. |
| ExprNode *get_w_ExprTree(const NAWchar * str, |
| UInt32 strlength = 0, |
| Int32 num_params = 0, |
| ItemExpr * p1 = NULL, |
| ItemExpr * p2 = NULL, |
| ItemExpr * p3 = NULL, |
| ItemExpr * p4 = NULL, |
| ItemExpr * p5 = NULL, |
| ItemExpr * p6 = NULL, |
| ItemExprList * paramItemList = NULL, |
| Int32 internal_expr = FALSE); // pass this as TRUE if |
| // you know it is going |
| // to be an ItemExpr |
| |
| ItemExpr *get_w_ItemExprTree(const NAWchar * str, |
| UInt32 strlength = 0, |
| Int32 num_params = 0, |
| ItemExpr * p1 = NULL, |
| ItemExpr * p2 = NULL, |
| ItemExpr * p3 = NULL, |
| ItemExpr * p4 = NULL, |
| ItemExpr * p5 = NULL, |
| ItemExpr * p6 = NULL, |
| ItemExprList * paramItemList = NULL); |
| |
| // parse the column definition, called from internal stored procedure |
| // component ( i.e. CmpStoredProc.C ) |
| |
| ElemDDLColDef* parseColumnDefinition(const char* str, size_t strLen, CharInfo::CharSet strCharSet); |
| |
| // part of interface to Unicode lexer |
| yyULexer *getLexer() { return lexer; } |
| Int32 yylex(YYSTYPE *lvalp ) |
| { |
| Int32 retCode = lexer ? lexer->yylex(lvalp) : 0; |
| addTokenToNormalizedString(retCode); |
| return retCode; |
| } |
| const NAWchar* YYText() { return lexer ? lexer->YYText() : WIDE_(""); } |
| Int32 YYLeng() { return lexer ? lexer->YYLeng() : 0; } |
| |
| char *inputStr() { return inputBuf_ ? (char*)(inputBuf_->data()) : NULL; } |
| charBuf *getInputcharBuf() { return inputBuf_; } |
| size_t inputStrLen(); // size (in bytes) of inputBuf_ with trailing null characters excluded from the count |
| CharInfo::CharSet inputStrCharSet() { return inputStr() == NULL ? CharInfo::UnknownCharSet : charset_; } |
| NAWchar *wInputStr() { return wInputBuf_ ? wInputBuf_->data() : NULL; } |
| NAWcharBuf *getInputNAWcharBuf() { return wInputBuf_; } |
| size_t wInputStrLen(); // size (in NAWchars) of wInputBuf_ with trailing null characters excluded from the count |
| |
| NABoolean fixupParserInputBufAndAppendSemicolon(); // returns TRUE if new inputBuf_ is (re)allocated |
| NABoolean fixupParserWInputBufAndAppendSemicolon(); // returns TRUE if new wInputBuf_ is (re)allocated |
| |
| NABoolean CharHereIsDoubleQuote(StringPos p) { |
| return wInputStrLen() > p && wInputStr()[p] == NAWCHR('"'); |
| } |
| |
| // This replaces the global variable SqlParser_InputStr in arkcmp. |
| // SqlParser_InputStr is bad news to recursive parser calls. For |
| // example, when arkcmp executes "create table t(a char not null)", it |
| // calls CatalogManager::executeDDL() which calls CatCommand() which |
| // parses the above statment and then calls CatCommand::execute() |
| // which eventually calls CatAddNotNullConstraint() which calls the |
| // parser again to process "alter table t add constraint blah check |
| // (a is not null)". The partially unicode-enabled parser does the |
| // unicode conversion of the input string very late: just before parsing. |
| // This conversion may require memory allocation and deallocation. The |
| // end result can be a ComASSERT() failure and possibly an arkcmp crash. |
| // (tcr) |
| |
| void reset(NABoolean on_entry_reset_was_needed = FALSE); |
| |
| // set to oneof: NORMAL_TOKEN=0, INTERNALEXPR_TOKEN=1, COLUMNDEF_TOKEN=2; |
| // used by the catalog manager for scanning/parsing odd stuff like: |
| // "CAST('<minvalue>' AS CHAR(n))" (tcr) |
| Int32 internalExpr_; |
| |
| // the original client locale's character set; used by ulexer to convert |
| // unicode string literals back to their original multibyte char form. |
| CharInfo::CharSet charset_; |
| CharInfo::CharSet initialInputCharSet_; |
| |
| // if this is not set to UnknownCharSet, then it is used during col create if one |
| // is not explicitly specified. |
| CharInfo::CharSet defaultColCharset_; |
| |
| CharInfo::CharSet defaultColCharset() { return defaultColCharset_;} |
| |
| void setmodeSpecial1(NABoolean v) { modeSpecial1_ = v; } |
| NABoolean modeSpecial1() { return modeSpecial1_; } |
| void setmodeSpecial4(NABoolean v) { modeSpecial4_ = v; } |
| NABoolean modeSpecial4() { return modeSpecial4_; } |
| |
| void pushHasOlapFunctions(NABoolean v) { hasOlapFunctions_.insert( v ); } |
| NABoolean topHasOlapFunctions() { return hasOlapFunctions_[hasOlapFunctions_.entries()-1]; } |
| void setTopHasOlapFunctions( NABoolean v) { hasOlapFunctions_[hasOlapFunctions_.entries()-1] = v; } |
| NABoolean popHasOlapFunctions() { return hasOlapFunctions_.removeAt( hasOlapFunctions_.entries() - 1 ); } |
| void clearHasOlapFunctions() {hasOlapFunctions_.clear();} |
| Int32 hasOlapFunctionsEntries() { return hasOlapFunctions_.entries(); } |
| void pushHasTDFunctions(NABoolean v) { hasTDFunctions_.insert( v ); } |
| NABoolean topHasTDFunctions() { return hasTDFunctions_[hasTDFunctions_.entries()-1]; } |
| void setTopHasTDFunctions( NABoolean v) { hasTDFunctions_[hasTDFunctions_.entries()-1] = v; } |
| NABoolean popHasTDFunctions() { return hasTDFunctions_.removeAt( hasTDFunctions_.entries() - 1 ); } |
| void clearHasTDFunctions() {hasTDFunctions_.clear();} |
| Int32 hasTDFunctionsEntries() { return hasTDFunctions_.entries(); } |
| |
| HQCParseKey* getHQCKey() { return HQCKey_; } |
| |
| void setHQCKey(HQCParseKey* k) { HQCKey_ = k; } |
| |
| void addTokenToNormalizedString(Int32 & tokCod) |
| { if(HQCKey_)HQCKey_->addTokenToNormalizedString(tokCod); } |
| |
| void FixupForUnaryNegate(BiArith* itm) |
| { if(HQCKey_)HQCKey_->FixupForUnaryNegate(itm); } |
| |
| void collectItem4HQC(ItemExpr* itm) |
| { if(HQCKey_)HQCKey_->collectItem4HQC(itm); } |
| |
| void setIsHQCCacheable(NABoolean b) |
| { if(HQCKey_)HQCKey_->setIsCacheable(b); } |
| |
| NABoolean isHQCCacheable() |
| { return HQCKey_?HQCKey_->isCacheable():FALSE; } |
| |
| NABoolean hasWithDefinition(NAString* key) |
| { if(with_clauses_->contains(key) ) return TRUE; |
| else return FALSE; |
| } |
| |
| void insertWithDefinition(NAString* key, RelExpr* val) |
| { |
| with_clauses_->insert(key,val); |
| } |
| |
| RelExpr * getWithDefinition(NAString *key) |
| { |
| return with_clauses_->getFirstValue(key); |
| } |
| |
| ////////////////////////////////////////////////////////////////////////// |
| // class HiveDDLInfo |
| // this class contains various fields and info that is needed to process |
| // a hive ddl statement. These fields are set during the parsing phase |
| // and are processed after return from parser. |
| // That is done in method processHiveDDL. |
| ////////////////////////////////////////////////////////////////////////// |
| class HiveDDLInfo |
| { |
| public: |
| enum ESSD // Explain/Showplan/Showshape/Display |
| { |
| NONE_ = 0, |
| EXPLAIN_ = 1, |
| SHOWPLAN_ = 2, |
| SHOWSHAPE_ = 3, |
| DISPLAY_ = 4, |
| }; |
| |
| HiveDDLInfo() |
| { |
| init(); |
| } |
| |
| void init() |
| { |
| disableDDLcheck_ = FALSE; |
| checkForDDL_ = FALSE; |
| foundDDL_ = FALSE; |
| ddlObjectType_ = 0; |
| ddlOperation_ = 0; |
| ifExistsOrNotExists_ = FALSE; |
| ddlNamePos_ = 0; |
| ddlNameLen_ = 0; |
| backquotedDelimFound_ = FALSE; |
| essd_ = NONE_; |
| essdQueryStartPos_ = 0; |
| } |
| |
| void setValues(NABoolean checkForDDL, |
| Int32 ddlOperation, Int32 ddlObjectType, |
| NABoolean ifExistsOrNotExists = FALSE) |
| { |
| checkForDDL_ = checkForDDL; |
| ddlOperation_ = ddlOperation; |
| ddlObjectType_ = ddlObjectType; |
| ifExistsOrNotExists_ = ifExistsOrNotExists; |
| } |
| |
| void setFoundDDL(NABoolean v) |
| { |
| foundDDL_ = v; |
| } |
| |
| // in some cases, parser should not do hive ddl check. |
| // This may happen for internal parsing, for ex, for view expansion, |
| // or internal MD ddl compiles. |
| NABoolean disableDDLcheck_; |
| |
| // this is set when create/drop/alter ddl keyword is seen. |
| // It is later used to see if the specified name is a hive name |
| // (catalog is HIVE). |
| NABoolean checkForDDL_; |
| |
| // set if specified name is a valid hive name |
| NABoolean foundDDL_; |
| |
| // StmtDDLonHiveObjects::Operation |
| Int32 ddlOperation_; |
| |
| // StmtDDLonHiveObjects::ObjectType |
| Int32 ddlObjectType_; |
| |
| // TRUE: if 'if exists' is specified for drop or truncate, |
| // or if 'if not exists' is specified for create. |
| // FALSE: otherwise |
| NABoolean ifExistsOrNotExists_; |
| |
| // position and length of hive name within the input string. |
| Int32 ddlNamePos_; |
| Int32 ddlNameLen_; |
| |
| // set if backquoted delimited name is seen ( `abc` ). |
| // Valid for hive names only. |
| NABoolean backquotedDelimFound_; |
| |
| // 1, if explain query. 2, if showplan. 3, if showshape. 4, if display. |
| Int32 essd_; |
| NAString essdOptions_; |
| Int32 essdQueryStartPos_; |
| |
| // hive ddl stmt passed in by user. |
| // For direct ddl, like "drop table t...", or "alter table t..", this |
| // contains the whole statement. |
| // For passthru ddl sent in via "process hive ddl 'drop table...'", |
| // this contains the contents of the single quoted string ('drop table...') |
| NAString userSpecifiedStmt_; |
| }; |
| HiveDDLInfo * hiveDDLInfo_; |
| |
| private: |
| |
| HQCParseKey* HQCKey_; |
| |
| // See notes in .C file. |
| CmpContext *cmpContext_; |
| Parser *prevParser_; |
| |
| NAHeap *wHeap_; // Pointer to the NAHeap |
| NABoolean hasInternalHeap_; // Did Parser allocate this heap? |
| |
| // private methods for internal usage. |
| |
| // parseUtilISPCommand parse the input query for utility keyword and |
| // generate a StmtQuery ( RelRoot ( RelInternalSP ) ) tree. The tree |
| // is generated in this routine to bypass the arkcmp parser, because |
| // the utility stored procedure will parse the parameter. Since there |
| // might be quoted strings in the parameters, arkcmp parser can't parse |
| // the parameters, it might destroy the parameters. This routine returns |
| // TRUE in the case of utility keyword found and tree generated. |
| // FALSE otherwise. |
| NABoolean parseUtilISPCommand(const char* commandText, |
| size_t commandTextLen, |
| CharInfo::CharSet commandCharSet, |
| ExprNode** node); |
| |
| // parse input query for a Rel1 NSK DDL, UPDATE STATISTICS or special |
| // CAT API requests. Create DDL Expr here |
| // instead of letting it go thru the MX parser. |
| NABoolean processSpecialDDL(const char* commandText, |
| size_t commandTextLen, |
| ExprNode * childNode, |
| CharInfo::CharSet commandTextCharSet, |
| ExprNode** node); |
| |
| // see comments in parser.cpp file. |
| NABoolean processHiveDDL(HiveDDLInfo * hiveDDLInfo, ExprNode** node); |
| |
| Int32 parseSQL(ExprNode ** node, |
| Int32 token = 0, |
| ItemExprList * enl = NULL); |
| |
| void ResetLexer(void); |
| yyULexer *lexer; |
| |
| charBuf *inputBuf_; |
| NAWcharBuf *wInputBuf_; |
| |
| NABoolean modeSpecial1_; |
| NABoolean modeSpecial4_; |
| |
| LIST(NABoolean ) hasOlapFunctions_; |
| LIST(NABoolean ) hasTDFunctions_; |
| /* |
| * hashmap to save WITH clause definition |
| * key is the name of the with clause |
| * value is the RelExpr structure |
| */ |
| NAHashDictionary<NAString,RelExpr> *with_clauses_; |
| |
| }; |
| |
| #define PARSERASSERT(b) \ |
| if (!(b)) { ParserAssertInternal( " " # b " ", __FILE__,__LINE__); } |
| #define PARSERABORT(b) \ |
| if (!(b)) { ParserAbortInternal( " " # b " ", __FILE__,__LINE__); } |
| |
| void ParserAssertInternal(const char*, const char*, Int32); |
| void ParserAbortInternal(const char*, const char*, Int32); |
| |
| // The parsing routine which the preprocessor must call. |
| Int32 sql_parse(const char* str, Int32 len, CharInfo::CharSet charset, |
| StmtNode **stmt_node_ptr_ptr); |
| |
| charBuf* parserUTF16ToCharSet(const NAWcharBuf& pr_UTF16StrBuf, |
| CollHeap* pp_Heap, |
| charBuf*& pr_pOutCharSetStrBuf, |
| Int32 pv_iCharSet, |
| Int32 & pr_iErrorcode, |
| NABoolean pv_bAddNullAtEnd = TRUE, |
| NABoolean pv_bAllowInvalidCodePoint = TRUE, |
| Int32 * pp_iCharCount = NULL, |
| Int32 * pp_iErrorByteOff = NULL); |
| |
| #endif // PARSER_H |