core/sql/parser/ulexer.h - trafodion - Git at Google

 /**********************************************************************/
 // Copyright (c) 1993 The Regents of the University of California.
 // All rights reserved.
 //
 // This code is derived from software contributed to Berkeley by
 // Kent Williams and Tom Epperly.
 //
 // Redistribution and use in source and binary forms with or without
 // modification are permitted provided that: (1) source distributions retain
 // this entire copyright notice and comment, and (2) distributions including
 // binaries display the following acknowledgement:  ``This product includes
 // software developed by the University of California, Berkeley and its
 // contributors'' in the documentation or other materials provided with the
 // distribution and in all advertising materials mentioning features or use
 // of this software.  Neither the name of the University nor the names of
 // its contributors may be used to endorse or promote products derived from
 // this software without specific prior written permission.
 //
 //
 // Later modifications to enable Unicode parsing were granted to ASF.
 //
 // # Licensed to the Apache Software Foundation (ASF) under one
 // # or more contributor license agreements.  See the NOTICE file
 // # distributed with this work for additional information
 // # regarding copyright ownership.  The ASF licenses this file
 // # to you under the Apache License, Version 2.0 (the
 // # "License"); you may not use this file except in compliance
 // # with the License.  You may obtain a copy of the License at
 // #
 // #   http://www.apache.org/licenses/LICENSE-2.0
 // #
 // # Unless required by applicable law or agreed to in writing,
 // # software distributed under the License is distributed on an
 // # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 // # KIND, either express or implied.  See the License for the
 // # specific language governing permissions and limitations
 // # under the License.
 //


 // ULexer.h -- define interfaces for Unicode lexical analyzer class (tcr)

 // Flex (version 2.5.4a and earlier) does not support Unicode. Our attempts
 // at extending flex to generate a Unicode scanner were unsuccessful. So, we
 // hand-code a Unicode scanner for SQL/MX but retain the flex C++ scanner
 // class interface (defined in flexlexer.h)

 // This file defines ULexer, an abstract class which specifies the
 // external interface provided to Unicode C++ lexer objects, and yyULexer,
 // which defines a particular lexer class.
 //
 // If you want to create multiple lexer classes, you use the -P flag (if flex
 // could generate a Unicode scanner), or hand-code lexers, say, xxULexer,
 // zzULexer, ...
 // You then include <ULexer.h> in your other sources once per lexer class:
 //
 //	#undef yyULexer
 //	#define yyULexer xxULexer
 //	#include <ULexer.h>
 //
 //	#undef yyULexer
 //	#define yyULexer zzULexer
 //	#include <ULexer.h>
 //	...

 // This interface is drastically simplified from the flex original
 // because the arkcmp lexer always scans a single in-memory buffer.
 // So, we jettisoned all excess buffer-handling baggage.

 #ifndef __U_LEXER_H
 // Never included before - need to define base class.
 #define __U_LEXER_H

 #include <ctype.h>		// for toupper()
 #include <stdio.h>
 #include "NAWinNT.h"		// for NAWchar, WIDE_(), etc.
 #include "NABoolean.h"
 #include "arkcmp_parser_defs.h"

 // Forward references.
 class ParKeyWord;

 // UR2-CNTNSK
 #define TXT(s) WIDE_(s)		// macro for Unicode string literals

 extern "C++" {

 struct yy_buffer_state;
 typedef Int32 yy_state_type;

 union YYSTYPE;

 class ULexer {
 public:
 	virtual ~ULexer()	{ }

 	const NAWchar* YYText()	{ return yytext_; }
 	Int32 YYLeng()		{ return yyleng_; }

 	virtual Int32 yylex(YYSTYPE *lvalp) = 0;

 	Int32 debug() const		{ return yy_U_debug_; }
 	void set_debug( Int32 flag )	{ yy_U_debug_ = flag; }

 protected:
 	NAWchar* yytext_;
 	Int32 yyleng_;
 	Int32 yy_U_debug_;// only has effect with -d or "%option debug"

 	void yyToUpper()

 	{ for (NAWchar* c=yytext_; *c; c++) *c = toupper(*c); }

 	  char yynarrow_[400];
 	  void yyToNarrow()
 	  {
 	    char *n = yynarrow_;
 	    char *eob = n + sizeof(yynarrow_) - 1;
 	    for (NAWchar* c=yytext_; *c; c++, n++) {
 	      assert(n < eob);
 	      *n = (char)*c;
 	      NAWchar w = *n;
 	      assert(w == *c);
 	    }
 	    *n = '\0';
 	  }
 };

 }
 #endif	// __U_LEXER_H

 #if defined(yyULexer) || ! defined(yyULexerOnce)
 // Either this is the first time through (yyULexerOnce not defined),
 // or this is a repeated include to define a different flavor of
 // yyULexer, as discussed in the flex man page.
 #define yyULexerOnce

 class yyULexer : public ULexer {
 public:
     // construct lexer to scan an in-memory string
     yyULexer(const NAWchar *str, Int32 charCount);
     yyULexer(const NAWchar *str, size_t charCount);

     virtual ~yyULexer();

     virtual Int32 yylex(YYSTYPE *lvalp);

     void reset();

     // these 2 replace the old SqlParser_InputPos global variable
     Int32 getInputPos();
     void setInputPos(Int32 i);

     void setReturnAllChars()   { returnAllChars_ = TRUE; }
     void resetReturnAllChars() { returnAllChars_ = FALSE; }

     NABoolean isDynamicParameter(Int32 tokCod);

     NABoolean isLiteral4HQC(Int32 tokCod);

 protected:
     void yyULexer_ctor(const NAWchar *str, Int32 charCount);
     Int32 input_pos_; // used only by {set|get}InputPos()

     void yy_load_buffer_state();

     struct yy_buffer_state* yy_current_buffer_;

     // yy_hold_char_ holds the character lost when yytext_ is formed.
     NAWchar yy_hold_char_;

     // Number of characters read into yy_ch_buf.
     Int32 yy_n_chars_;

     // Points to current character in buffer.
     NAWchar* yy_c_buf_p_;

     Int32 yy_init_;		// whether we need to initialize

     NAWchar *beginRun_; // points to start of a run
     NAWchar *currChar_; // points to current candidate end of run

     NABoolean returnAllChars_;

     // set up yytext_, etc for the start of a scan
     void startRun()
     { currChar_ = yy_c_buf_p_; *currChar_ = yy_hold_char_;
       yytext_ = beginRun_ = currChar_; }

     // Done after the current pattern has been matched and before the
     // corresponding action - sets up yytext_.
     void doBeforeAction()
     { yytext_ = beginRun_; yyleng_ = (Int32)(currChar_ - beginRun_);
       input_pos_ = 0;
       yy_hold_char_ = *currChar_; *currChar_ = '\0'; yy_c_buf_p_ = currChar_; }

     // un-null terminate yytext_. used in scanning compound tokens.
     void undoBeforeAction() { *yy_c_buf_p_ = yy_hold_char_; }

     // useful after an advance()
     Int32 YYLengNow()	{ return (Int32)(currChar_ - beginRun_); }

     // used to remember candidate end of a compound token.
     NAWchar *mark() { return currChar_; }

     // used to retract current char pointer in compound token scanning
     void retractToMark(NAWchar *m) { currChar_ = m; }

     // have we reached the end of buffer?
     Int32 endOfBuffer();

     // advance current character
     void advance() { currChar_++; }

     // read current character; if end of buffer then refill it first.
     // returns WEOF or current character.
     NAWchar peekChar();

     // return current character and then advance
     NAWchar peekAdvance() { NAWchar c=peekChar(); advance(); return c; }

     // set current character to c
     void setCurrChar(NAWchar c) { *currChar_ = c; }

     // does lexer actions associated with recognition of one of:
     // {Reserved IDENTIFIER, IDENTIFIER, SQL/MX keyword, compound
     // keyword, compound Cobol token, approx numeric, exact numeric
     // with scale, exact numeric no scale}
     Int32 anSQLMXReservedWord(YYSTYPE *lvalp);
     Int32 anIdentifier    (YYSTYPE *lvalp);
     Int32 anSQLMXKeyword  (Int32 tokCod, YYSTYPE *lvalp);
     Int32 aCompoundKeyword(Int32 tokCod, YYSTYPE *lvalp);
     Int32 aCobolToken     (Int32 tokCod, YYSTYPE *lvalp);
     Int32 anApproxNumber  (YYSTYPE *lvalp);
     Int32 exactWithScale  (YYSTYPE *lvalp);
     Int32 exactNoScale    (YYSTYPE *lvalp);
     Int32 eitherCompoundOrSimpleKeyword(
 			NABoolean isCompound,
 			Int32 tokcodCompound,
 			Int32 tokcodSimple,
 			NAWchar *end1,
 			NAWchar holdChar1,
                         YYSTYPE *lvalp);
     Int32 notCompoundKeyword(const ParKeyWord *key,
                              NAWchar &holdChar,
                              YYSTYPE *lvalp);

     Int32 aStringLiteralWithCharSet(CharInfo::CharSet,
                                   const NAWchar *s,
                                   Int32 len,
                                   NAWchar quote,
                                   YYSTYPE *lvalp);

     // qualified hexadecimal format string literals
     Int32 aHexStringLiteralWithCharSet(CharInfo::CharSet,
                                   const NAWchar *s,
                                   Int32 len,
                                   NAWchar quote,
                                   YYSTYPE *lvalp);
     Int32 constructStringLiteralWithCharSet(NABoolean hexFormat,
                                             CharInfo::CharSet cs,
                                             YYSTYPE *lvalp,
                                             NAWchar quote=L'\'');

     // helper functions to set yylval token value used by above functions
     Int32 setStringval(Int32 tokCod, const char *dbgstr, YYSTYPE *lvalp);
     Int32 setTokval   (Int32 tokCod, const char *dbgstr, YYSTYPE *lvalp);

     Int32 prematureEOF(YYSTYPE* lvalp); // hit EOF inside a string or comment
     Int32 invalidHexStrLit(YYSTYPE* lvalp); //invalid format of hexadecimal representation of a string literal
     Int32 invalidStrLitNonTranslatableChars(YYSTYPE *lvalp);  // invalid string literal/host var name
     Int32 invalidHostVarNonTranslatableChars(YYSTYPE *lvalp); // due to non-translatable characters.

   void addTokenToGlobalQueue(NABoolean isComment = FALSE);

 }; // class yyULexer

 #endif	// defined(yyULexer) || ! defined(yyULexerOnce)
	/**********************************************************************/
	// Copyright (c) 1993 The Regents of the University of California.
	// All rights reserved.
	//
	// This code is derived from software contributed to Berkeley by
	// Kent Williams and Tom Epperly.
	//
	// Redistribution and use in source and binary forms with or without
	// modification are permitted provided that: (1) source distributions retain
	// this entire copyright notice and comment, and (2) distributions including
	// binaries display the following acknowledgement: ``This product includes
	// software developed by the University of California, Berkeley and its
	// contributors'' in the documentation or other materials provided with the
	// distribution and in all advertising materials mentioning features or use
	// of this software. Neither the name of the University nor the names of
	// its contributors may be used to endorse or promote products derived from
	// this software without specific prior written permission.
	//
	//
	// Later modifications to enable Unicode parsing were granted to ASF.
	//
	// # Licensed to the Apache Software Foundation (ASF) under one
	// # or more contributor license agreements. See the NOTICE file
	// # distributed with this work for additional information
	// # regarding copyright ownership. The ASF licenses this file
	// # to you under the Apache License, Version 2.0 (the
	// # "License"); you may not use this file except in compliance
	// # with the License. You may obtain a copy of the License at
	// #
	// # http://www.apache.org/licenses/LICENSE-2.0
	// #
	// # Unless required by applicable law or agreed to in writing,
	// # software distributed under the License is distributed on an
	// # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
	// # KIND, either express or implied. See the License for the
	// # specific language governing permissions and limitations
	// # under the License.
	//


	// ULexer.h -- define interfaces for Unicode lexical analyzer class (tcr)

	// Flex (version 2.5.4a and earlier) does not support Unicode. Our attempts
	// at extending flex to generate a Unicode scanner were unsuccessful. So, we
	// hand-code a Unicode scanner for SQL/MX but retain the flex C++ scanner
	// class interface (defined in flexlexer.h)

	// This file defines ULexer, an abstract class which specifies the
	// external interface provided to Unicode C++ lexer objects, and yyULexer,
	// which defines a particular lexer class.
	//
	// If you want to create multiple lexer classes, you use the -P flag (if flex
	// could generate a Unicode scanner), or hand-code lexers, say, xxULexer,
	// zzULexer, ...
	// You then include <ULexer.h> in your other sources once per lexer class:
	//
	// #undef yyULexer
	// #define yyULexer xxULexer
	// #include <ULexer.h>
	//
	// #undef yyULexer
	// #define yyULexer zzULexer
	// #include <ULexer.h>
	// ...

	// This interface is drastically simplified from the flex original
	// because the arkcmp lexer always scans a single in-memory buffer.
	// So, we jettisoned all excess buffer-handling baggage.

	#ifndef __U_LEXER_H
	// Never included before - need to define base class.
	#define __U_LEXER_H

	#include <ctype.h> // for toupper()
	#include <stdio.h>
	#include "NAWinNT.h" // for NAWchar, WIDE_(), etc.
	#include "NABoolean.h"
	#include "arkcmp_parser_defs.h"

	// Forward references.
	class ParKeyWord;

	// UR2-CNTNSK
	#define TXT(s) WIDE_(s) // macro for Unicode string literals

	extern "C++" {

	struct yy_buffer_state;
	typedef Int32 yy_state_type;

	union YYSTYPE;

	class ULexer {
	public:
	virtual ~ULexer() { }

	const NAWchar* YYText() { return yytext_; }
	Int32 YYLeng() { return yyleng_; }

	virtual Int32 yylex(YYSTYPE *lvalp) = 0;

	Int32 debug() const { return yy_U_debug_; }
	void set_debug( Int32 flag ) { yy_U_debug_ = flag; }

	protected:
	NAWchar* yytext_;
	Int32 yyleng_;
	Int32 yy_U_debug_;// only has effect with -d or "%option debug"

	void yyToUpper()

	{ for (NAWchar* c=yytext_; c; c++) c = toupper(*c); }

	char yynarrow_[400];
	void yyToNarrow()
	{
	char *n = yynarrow_;
	char *eob = n + sizeof(yynarrow_) - 1;
	for (NAWchar* c=yytext_; *c; c++, n++) {
	assert(n < eob);
	n = (char)c;
	NAWchar w = *n;
	assert(w == *c);
	}
	*n = '\0';
	}
	};

	}
	#endif // __U_LEXER_H

	#if defined(yyULexer) \|\| ! defined(yyULexerOnce)
	// Either this is the first time through (yyULexerOnce not defined),
	// or this is a repeated include to define a different flavor of
	// yyULexer, as discussed in the flex man page.
	#define yyULexerOnce

	class yyULexer : public ULexer {
	public:
	// construct lexer to scan an in-memory string
	yyULexer(const NAWchar *str, Int32 charCount);
	yyULexer(const NAWchar *str, size_t charCount);

	virtual ~yyULexer();

	virtual Int32 yylex(YYSTYPE *lvalp);

	void reset();

	// these 2 replace the old SqlParser_InputPos global variable
	Int32 getInputPos();
	void setInputPos(Int32 i);

	void setReturnAllChars() { returnAllChars_ = TRUE; }
	void resetReturnAllChars() { returnAllChars_ = FALSE; }

	NABoolean isDynamicParameter(Int32 tokCod);

	NABoolean isLiteral4HQC(Int32 tokCod);

	protected:
	void yyULexer_ctor(const NAWchar *str, Int32 charCount);
	Int32 input_pos_; // used only by {set\|get}InputPos()

	void yy_load_buffer_state();

	struct yy_buffer_state* yy_current_buffer_;

	// yy_hold_char_ holds the character lost when yytext_ is formed.
	NAWchar yy_hold_char_;

	// Number of characters read into yy_ch_buf.
	Int32 yy_n_chars_;

	// Points to current character in buffer.
	NAWchar* yy_c_buf_p_;

	Int32 yy_init_; // whether we need to initialize

	NAWchar *beginRun_; // points to start of a run
	NAWchar *currChar_; // points to current candidate end of run

	NABoolean returnAllChars_;

	// set up yytext_, etc for the start of a scan
	void startRun()
	{ currChar_ = yy_c_buf_p_; *currChar_ = yy_hold_char_;
	yytext_ = beginRun_ = currChar_; }

	// Done after the current pattern has been matched and before the
	// corresponding action - sets up yytext_.
	void doBeforeAction()
	{ yytext_ = beginRun_; yyleng_ = (Int32)(currChar_ - beginRun_);
	input_pos_ = 0;
	yy_hold_char_ = currChar_; currChar_ = '\0'; yy_c_buf_p_ = currChar_; }

	// un-null terminate yytext_. used in scanning compound tokens.
	void undoBeforeAction() { *yy_c_buf_p_ = yy_hold_char_; }

	// useful after an advance()
	Int32 YYLengNow() { return (Int32)(currChar_ - beginRun_); }

	// used to remember candidate end of a compound token.
	NAWchar *mark() { return currChar_; }

	// used to retract current char pointer in compound token scanning
	void retractToMark(NAWchar *m) { currChar_ = m; }

	// have we reached the end of buffer?
	Int32 endOfBuffer();

	// advance current character
	void advance() { currChar_++; }

	// read current character; if end of buffer then refill it first.
	// returns WEOF or current character.
	NAWchar peekChar();

	// return current character and then advance
	NAWchar peekAdvance() { NAWchar c=peekChar(); advance(); return c; }

	// set current character to c
	void setCurrChar(NAWchar c) { *currChar_ = c; }

	// does lexer actions associated with recognition of one of:
	// {Reserved IDENTIFIER, IDENTIFIER, SQL/MX keyword, compound
	// keyword, compound Cobol token, approx numeric, exact numeric
	// with scale, exact numeric no scale}
	Int32 anSQLMXReservedWord(YYSTYPE *lvalp);
	Int32 anIdentifier (YYSTYPE *lvalp);
	Int32 anSQLMXKeyword (Int32 tokCod, YYSTYPE *lvalp);
	Int32 aCompoundKeyword(Int32 tokCod, YYSTYPE *lvalp);
	Int32 aCobolToken (Int32 tokCod, YYSTYPE *lvalp);
	Int32 anApproxNumber (YYSTYPE *lvalp);
	Int32 exactWithScale (YYSTYPE *lvalp);
	Int32 exactNoScale (YYSTYPE *lvalp);
	Int32 eitherCompoundOrSimpleKeyword(
	NABoolean isCompound,
	Int32 tokcodCompound,
	Int32 tokcodSimple,
	NAWchar *end1,
	NAWchar holdChar1,
	YYSTYPE *lvalp);
	Int32 notCompoundKeyword(const ParKeyWord *key,
	NAWchar &holdChar,
	YYSTYPE *lvalp);

	Int32 aStringLiteralWithCharSet(CharInfo::CharSet,
	const NAWchar *s,
	Int32 len,
	NAWchar quote,
	YYSTYPE *lvalp);

	// qualified hexadecimal format string literals
	Int32 aHexStringLiteralWithCharSet(CharInfo::CharSet,
	const NAWchar *s,
	Int32 len,
	NAWchar quote,
	YYSTYPE *lvalp);
	Int32 constructStringLiteralWithCharSet(NABoolean hexFormat,
	CharInfo::CharSet cs,
	YYSTYPE *lvalp,
	NAWchar quote=L'\'');

	// helper functions to set yylval token value used by above functions
	Int32 setStringval(Int32 tokCod, const char dbgstr, YYSTYPE lvalp);
	Int32 setTokval (Int32 tokCod, const char dbgstr, YYSTYPE lvalp);

	Int32 prematureEOF(YYSTYPE* lvalp); // hit EOF inside a string or comment
	Int32 invalidHexStrLit(YYSTYPE* lvalp); //invalid format of hexadecimal representation of a string literal
	Int32 invalidStrLitNonTranslatableChars(YYSTYPE *lvalp); // invalid string literal/host var name
	Int32 invalidHostVarNonTranslatableChars(YYSTYPE *lvalp); // due to non-translatable characters.

	void addTokenToGlobalQueue(NABoolean isComment = FALSE);

	}; // class yyULexer

	#endif // defined(yyULexer) \|\| ! defined(yyULexerOnce)