blob: a61f3d0e60eb076a9c6f657e278f9c2aee58a15a [file] [log] [blame]
/**********************************************************************/
// Copyright (c) 1993 The Regents of the University of California.
// All rights reserved.
//
// This code is derived from software contributed to Berkeley by
// Kent Williams and Tom Epperly.
//
// Redistribution and use in source and binary forms with or without
// modification are permitted provided that: (1) source distributions retain
// this entire copyright notice and comment, and (2) distributions including
// binaries display the following acknowledgement: ``This product includes
// software developed by the University of California, Berkeley and its
// contributors'' in the documentation or other materials provided with the
// distribution and in all advertising materials mentioning features or use
// of this software. Neither the name of the University nor the names of
// its contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
//
// Later modifications to enable Unicode parsing were granted to ASF.
//
// # Licensed to the Apache Software Foundation (ASF) under one
// # or more contributor license agreements. See the NOTICE file
// # distributed with this work for additional information
// # regarding copyright ownership. The ASF licenses this file
// # to you under the Apache License, Version 2.0 (the
// # "License"); you may not use this file except in compliance
// # with the License. You may obtain a copy of the License at
// #
// # http://www.apache.org/licenses/LICENSE-2.0
// #
// # Unless required by applicable law or agreed to in writing,
// # software distributed under the License is distributed on an
// # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// # KIND, either express or implied. See the License for the
// # specific language governing permissions and limitations
// # under the License.
//
// ULexer.h -- define interfaces for Unicode lexical analyzer class (tcr)
// Flex (version 2.5.4a and earlier) does not support Unicode. Our attempts
// at extending flex to generate a Unicode scanner were unsuccessful. So, we
// hand-code a Unicode scanner for SQL/MX but retain the flex C++ scanner
// class interface (defined in flexlexer.h)
// This file defines ULexer, an abstract class which specifies the
// external interface provided to Unicode C++ lexer objects, and yyULexer,
// which defines a particular lexer class.
//
// If you want to create multiple lexer classes, you use the -P flag (if flex
// could generate a Unicode scanner), or hand-code lexers, say, xxULexer,
// zzULexer, ...
// You then include <ULexer.h> in your other sources once per lexer class:
//
// #undef yyULexer
// #define yyULexer xxULexer
// #include <ULexer.h>
//
// #undef yyULexer
// #define yyULexer zzULexer
// #include <ULexer.h>
// ...
// This interface is drastically simplified from the flex original
// because the arkcmp lexer always scans a single in-memory buffer.
// So, we jettisoned all excess buffer-handling baggage.
#ifndef __U_LEXER_H
// Never included before - need to define base class.
#define __U_LEXER_H
#include <ctype.h> // for toupper()
#include <stdio.h>
#include "NAWinNT.h" // for NAWchar, WIDE_(), etc.
#include "NABoolean.h"
#include "arkcmp_parser_defs.h"
// Forward references.
class ParKeyWord;
// UR2-CNTNSK
#define TXT(s) WIDE_(s) // macro for Unicode string literals
extern "C++" {
struct yy_buffer_state;
typedef Int32 yy_state_type;
union YYSTYPE;
class ULexer {
public:
virtual ~ULexer() { }
const NAWchar* YYText() { return yytext_; }
Int32 YYLeng() { return yyleng_; }
virtual Int32 yylex(YYSTYPE *lvalp) = 0;
Int32 debug() const { return yy_U_debug_; }
void set_debug( Int32 flag ) { yy_U_debug_ = flag; }
protected:
NAWchar* yytext_;
Int32 yyleng_;
Int32 yy_U_debug_;// only has effect with -d or "%option debug"
void yyToUpper()
{ for (NAWchar* c=yytext_; *c; c++) *c = toupper(*c); }
char yynarrow_[400];
void yyToNarrow()
{
char *n = yynarrow_;
char *eob = n + sizeof(yynarrow_) - 1;
for (NAWchar* c=yytext_; *c; c++, n++) {
assert(n < eob);
*n = (char)*c;
NAWchar w = *n;
assert(w == *c);
}
*n = '\0';
}
};
}
#endif // __U_LEXER_H
#if defined(yyULexer) || ! defined(yyULexerOnce)
// Either this is the first time through (yyULexerOnce not defined),
// or this is a repeated include to define a different flavor of
// yyULexer, as discussed in the flex man page.
#define yyULexerOnce
class yyULexer : public ULexer {
public:
// construct lexer to scan an in-memory string
yyULexer(const NAWchar *str, Int32 charCount);
yyULexer(const NAWchar *str, size_t charCount);
virtual ~yyULexer();
virtual Int32 yylex(YYSTYPE *lvalp);
void reset();
// these 2 replace the old SqlParser_InputPos global variable
Int32 getInputPos();
void setInputPos(Int32 i);
void setReturnAllChars() { returnAllChars_ = TRUE; }
void resetReturnAllChars() { returnAllChars_ = FALSE; }
NABoolean isDynamicParameter(Int32 tokCod);
NABoolean isLiteral4HQC(Int32 tokCod);
protected:
void yyULexer_ctor(const NAWchar *str, Int32 charCount);
Int32 input_pos_; // used only by {set|get}InputPos()
void yy_load_buffer_state();
struct yy_buffer_state* yy_current_buffer_;
// yy_hold_char_ holds the character lost when yytext_ is formed.
NAWchar yy_hold_char_;
// Number of characters read into yy_ch_buf.
Int32 yy_n_chars_;
// Points to current character in buffer.
NAWchar* yy_c_buf_p_;
Int32 yy_init_; // whether we need to initialize
NAWchar *beginRun_; // points to start of a run
NAWchar *currChar_; // points to current candidate end of run
NABoolean returnAllChars_;
// set up yytext_, etc for the start of a scan
void startRun()
{ currChar_ = yy_c_buf_p_; *currChar_ = yy_hold_char_;
yytext_ = beginRun_ = currChar_; }
// Done after the current pattern has been matched and before the
// corresponding action - sets up yytext_.
void doBeforeAction()
{ yytext_ = beginRun_; yyleng_ = (Int32)(currChar_ - beginRun_);
input_pos_ = 0;
yy_hold_char_ = *currChar_; *currChar_ = '\0'; yy_c_buf_p_ = currChar_; }
// un-null terminate yytext_. used in scanning compound tokens.
void undoBeforeAction() { *yy_c_buf_p_ = yy_hold_char_; }
// useful after an advance()
Int32 YYLengNow() { return (Int32)(currChar_ - beginRun_); }
// used to remember candidate end of a compound token.
NAWchar *mark() { return currChar_; }
// used to retract current char pointer in compound token scanning
void retractToMark(NAWchar *m) { currChar_ = m; }
// have we reached the end of buffer?
Int32 endOfBuffer();
// advance current character
void advance() { currChar_++; }
// read current character; if end of buffer then refill it first.
// returns WEOF or current character.
NAWchar peekChar();
// return current character and then advance
NAWchar peekAdvance() { NAWchar c=peekChar(); advance(); return c; }
// set current character to c
void setCurrChar(NAWchar c) { *currChar_ = c; }
// does lexer actions associated with recognition of one of:
// {Reserved IDENTIFIER, IDENTIFIER, SQL/MX keyword, compound
// keyword, compound Cobol token, approx numeric, exact numeric
// with scale, exact numeric no scale}
Int32 anSQLMXReservedWord(YYSTYPE *lvalp);
Int32 anIdentifier (YYSTYPE *lvalp);
Int32 anSQLMXKeyword (Int32 tokCod, YYSTYPE *lvalp);
Int32 aCompoundKeyword(Int32 tokCod, YYSTYPE *lvalp);
Int32 aCobolToken (Int32 tokCod, YYSTYPE *lvalp);
Int32 anApproxNumber (YYSTYPE *lvalp);
Int32 exactWithScale (YYSTYPE *lvalp);
Int32 exactNoScale (YYSTYPE *lvalp);
Int32 eitherCompoundOrSimpleKeyword(
NABoolean isCompound,
Int32 tokcodCompound,
Int32 tokcodSimple,
NAWchar *end1,
NAWchar holdChar1,
YYSTYPE *lvalp);
Int32 notCompoundKeyword(const ParKeyWord *key,
NAWchar &holdChar,
YYSTYPE *lvalp);
Int32 aStringLiteralWithCharSet(CharInfo::CharSet,
const NAWchar *s,
Int32 len,
NAWchar quote,
YYSTYPE *lvalp);
// qualified hexadecimal format string literals
Int32 aHexStringLiteralWithCharSet(CharInfo::CharSet,
const NAWchar *s,
Int32 len,
NAWchar quote,
YYSTYPE *lvalp);
Int32 constructStringLiteralWithCharSet(NABoolean hexFormat,
CharInfo::CharSet cs,
YYSTYPE *lvalp,
NAWchar quote=L'\'');
// helper functions to set yylval token value used by above functions
Int32 setStringval(Int32 tokCod, const char *dbgstr, YYSTYPE *lvalp);
Int32 setTokval (Int32 tokCod, const char *dbgstr, YYSTYPE *lvalp);
Int32 prematureEOF(YYSTYPE* lvalp); // hit EOF inside a string or comment
Int32 invalidHexStrLit(YYSTYPE* lvalp); //invalid format of hexadecimal representation of a string literal
Int32 invalidStrLitNonTranslatableChars(YYSTYPE *lvalp); // invalid string literal/host var name
Int32 invalidHostVarNonTranslatableChars(YYSTYPE *lvalp); // due to non-translatable characters.
void addTokenToGlobalQueue(NABoolean isComment = FALSE);
}; // class yyULexer
#endif // defined(yyULexer) || ! defined(yyULexerOnce)