| %{ |
| /* |
| ** A scanner for EMP-style numeric ranges |
| */ |
| |
| #include "postgres.h" |
| #include "parser/keywords.h" |
| |
| #ifdef GP_VERSION_NUM |
| #define YY_NO_INPUT 1 |
| #endif |
| |
| #undef yyparse /* don't need it now and solve problems with 8.1 */ |
| |
| #include "parser/gramparse.h" |
| #include "parser/keywords.h" |
| /* Not needed now that this file is compiled as part of gram.y */ |
| /* #include "parser/parse.h" */ |
| #include "parser/scansup.h" |
| #include "mb/pg_wchar.h" |
| |
| #define unify_version(a,b,c) ((a<<16)+(b<<8)+c) |
| |
| /* simplified current version */ |
| #define YY_FLEX_FULL_VERSION \ |
| unify_version(YY_FLEX_MAJOR_VERSION,YY_FLEX_MINOR_VERSION,YY_FLEX_SUBMINOR_VERSION) |
| |
| /* |
| * Need to define these prototypes to shut GCC up. Flex fixes this from |
| * > 2.5.35 |
| */ |
| #if YY_FLEX_FULL_VERSION < unify_version(2,5,35) |
| int orafce_sql_yyget_lineno (void); |
| FILE *orafce_sql_yyget_in (void); |
| FILE *orafce_sql_yyget_out (void); |
| char *orafce_sql_yyget_text (void); |
| void orafce_sql_yyset_lineno (int line_number ); |
| void orafce_sql_yyset_in (FILE * in_str ); |
| void orafce_sql_yyset_out (FILE * out_str ); |
| int orafce_sql_yyget_debug (void); |
| void orafce_sql_yyset_debug (int bdebug ); |
| int orafce_sql_yylex_destroy (void); |
| int orafce_sql_yyget_leng(void); |
| #endif |
| |
| #if PG_VERSION_NUM >= 80500 |
| extern PGDLLIMPORT const ScanKeyword ScanKeywords[]; |
| extern PGDLLIMPORT const int NumScanKeywords; |
| #define ScanKeywordLookupArgs , ScanKeywords, NumScanKeywords |
| #else |
| #define ScanKeywordLookupArgs |
| #endif |
| |
| #undef fprintf |
| #define fprintf(file, fmt, msg) ereport(ERROR, (errmsg_internal("%s", msg))) |
| |
| static int xcdepth = 0; /* depth of nesting in slash-star comments */ |
| static char *dolqstart; /* current $foo$ quote start string */ |
| static bool extended_string = false; |
| |
| |
| /* No reason to constrain amount of data slurped */ |
| #define YY_READ_BUF_SIZE 16777216 |
| |
| /* Avoid exit() on fatal scanner errors (a bit ugly -- see yy_fatal_error) */ |
| #define fprintf(file, fmt, msg) ereport(ERROR, (errmsg_internal("%s", msg))) |
| |
| /* Handles to the buffer that the lexer uses internally */ |
| |
| |
| static YY_BUFFER_STATE scanbufhandle; |
| |
| #define SET_YYLLOC() (yylval.val.lloc = yylloc = yytext - scanbuf) |
| |
| /* Handles to the buffer that the lexer uses internally */ |
| static char *scanbuf; |
| |
| /* flex 2.5.4 doesn't bother with a decl for this */ |
| |
| int orafce_sql_yylex(void); |
| |
| extern YYSTYPE yylval; |
| |
| void orafce_sql_scanner_init(const char *str); |
| void orafce_sql_scanner_finish(void); |
| |
| /* |
| * literalbuf is used to accumulate literal values when multiple rules |
| * are needed to parse a single literal. Call startlit to reset buffer |
| * to empty, addlit to add text. Note that the buffer is palloc'd and |
| * starts life afresh on every parse cycle. |
| */ |
| static char *literalbuf; /* expandable buffer */ |
| static int literallen; /* actual current length */ |
| static int literalalloc; /* current allocated buffer size */ |
| |
| #define startlit() (literalbuf[0] = '\0', literallen = 0) |
| static void addlit(char *ytext, int yleng); |
| static void addlitchar(unsigned char ychar); |
| static char *litbufdup(void); |
| |
| static int lexer_errposition(void); |
| |
| /* |
| * Each call to yylex must set yylloc to the location of the found token |
| * (expressed as a byte offset from the start of the input text). |
| * When we parse a token that requires multiple lexer rules to process, |
| * this should be done in the first such rule, else yylloc will point |
| * into the middle of the token. |
| */ |
| |
| /* Handles to the buffer that the lexer uses internally */ |
| static char *scanbuf; |
| |
| static unsigned char unescape_single_char(unsigned char c); |
| |
| #ifndef _pg_mbstrlen_with_len |
| #define _pg_mbstrlen_with_len(buf,loc) pg_mbstrlen_with_len(buf,loc) |
| #endif |
| |
| %} |
| |
| %option 8bit |
| %option never-interactive |
| %option nodefault |
| %option nounput |
| %option noyywrap |
| %option prefix="orafce_sql_yy" |
| |
| /* |
| * OK, here is a short description of lex/flex rules behavior. |
| * The longest pattern which matches an input string is always chosen. |
| * For equal-length patterns, the first occurring in the rules list is chosen. |
| * INITIAL is the starting state, to which all non-conditional rules apply. |
| * Exclusive states change parsing rules while the state is active. When in |
| * an exclusive state, only those rules defined for that state apply. |
| * |
| * We use exclusive states for quoted strings, extended comments, |
| * and to eliminate parsing troubles for numeric strings. |
| * Exclusive states: |
| * <xb> bit string literal |
| * <xc> extended C-style comments |
| * <xd> delimited identifiers (double-quoted identifiers) |
| * <xh> hexadecimal numeric string |
| * <xq> standard quoted strings |
| * <xe> extended quoted strings (support backslash escape sequences) |
| * <xdolq> $foo$ quoted strings |
| */ |
| |
| %x xb |
| %x xc |
| %x xd |
| %x xh |
| %x xe |
| %x xq |
| %x xdolq |
| |
| |
| /* |
| * In order to make the world safe for Windows and Mac clients as well as |
| * Unix ones, we accept either \n or \r as a newline. A DOS-style \r\n |
| * sequence will be seen as two successive newlines, but that doesn't cause |
| * any problems. Comments that start with -- and extend to the next |
| * newline are treated as equivalent to a single whitespace character. |
| * |
| * NOTE a fine point: if there is no newline following --, we will absorb |
| * everything to the end of the input as a comment. This is correct. Older |
| * versions of Postgres failed to recognize -- as a comment if the input |
| * did not end with a newline. |
| * |
| * XXX perhaps \f (formfeed) should be treated as a newline as well? |
| * |
| * XXX if you change the set of whitespace characters, fix scanner_isspace() |
| * to agree, and see also the plpgsql lexer. |
| */ |
| |
| space [ \t\n\r\f] |
| horiz_space [ \t\f] |
| newline [\n\r] |
| non_newline [^\n\r] |
| |
| comment ("--"{non_newline}*) |
| |
| whitespace {space}+ |
| |
| /* |
| * SQL requires at least one newline in the whitespace separating |
| * string literals that are to be concatenated. Silly, but who are we |
| * to argue? Note that {whitespace_with_newline} should not have * after |
| * it, whereas {whitespace} should generally have a * after it... |
| */ |
| |
| special_whitespace ({space}+|{comment}{newline}) |
| horiz_whitespace ({horiz_space}|{comment}) |
| whitespace_with_newline ({horiz_whitespace}*{newline}{special_whitespace}*) |
| |
| /* |
| * To ensure that {quotecontinue} can be scanned without having to back up |
| * if the full pattern isn't matched, we include trailing whitespace in |
| * {quotestop}. This matches all cases where {quotecontinue} fails to match, |
| * except for {quote} followed by whitespace and just one "-" (not two, |
| * which would start a {comment}). To cover that we have {quotefail}. |
| * The actions for {quotestop} and {quotefail} must throw back characters |
| * beyond the quote proper. |
| */ |
| quote ' |
| quotestop {quote}{whitespace}* |
| quotecontinue {quote}{whitespace_with_newline}{quote} |
| quotefail {quote}{whitespace}*"-" |
| |
| /* Bit string |
| * It is tempting to scan the string for only those characters |
| * which are allowed. However, this leads to silently swallowed |
| * characters if illegal characters are included in the string. |
| * For example, if xbinside is [01] then B'ABCD' is interpreted |
| * as a zero-length string, and the ABCD' is lost! |
| * Better to pass the string forward and let the input routines |
| * validate the contents. |
| */ |
| xbstart [bB]{quote} |
| xbinside [^']* |
| |
| /* Hexadecimal number */ |
| xhstart [xX]{quote} |
| xhinside [^']* |
| |
| /* National character */ |
| xnstart [nN]{quote} |
| |
| /* Quoted string that allows backslash escapes */ |
| xestart [eE]{quote} |
| xeinside [^\\']+ |
| xeescape [\\][^0-7] |
| xeoctesc [\\][0-7]{1,3} |
| xehexesc [\\]x[0-9A-Fa-f]{1,2} |
| |
| /* Extended quote |
| * xqdouble implements embedded quote, '''' |
| */ |
| xqstart {quote} |
| xqdouble {quote}{quote} |
| xqinside [^']+ |
| |
| /* $foo$ style quotes ("dollar quoting") |
| * The quoted string starts with $foo$ where "foo" is an optional string |
| * in the form of an identifier, except that it may not contain "$", |
| * and extends to the first occurrence of an identical string. |
| * There is *no* processing of the quoted text. |
| * |
| * {dolqfailed} is an error rule to avoid scanner backup when {dolqdelim} |
| * fails to match its trailing "$". |
| */ |
| dolq_start [A-Za-z\200-\377_] |
| dolq_cont [A-Za-z\200-\377_0-9] |
| dolqdelim \$({dolq_start}{dolq_cont}*)?\$ |
| dolqfailed \${dolq_start}{dolq_cont}* |
| dolqinside [^$]+ |
| |
| /* Double quote |
| * Allows embedded spaces and other special characters into identifiers. |
| */ |
| dquote \" |
| xdstart {dquote} |
| xdstop {dquote} |
| xddouble {dquote}{dquote} |
| xdinside [^"]+ |
| |
| /* C-style comments |
| * |
| * The "extended comment" syntax closely resembles allowable operator syntax. |
| * The tricky part here is to get lex to recognize a string starting with |
| * slash-star as a comment, when interpreting it as an operator would produce |
| * a longer match --- remember lex will prefer a longer match! Also, if we |
| * have something like plus-slash-star, lex will think this is a 3-character |
| * operator whereas we want to see it as a + operator and a comment start. |
| * The solution is two-fold: |
| * 1. append {op_chars}* to xcstart so that it matches as much text as |
| * {operator} would. Then the tie-breaker (first matching rule of same |
| * length) ensures xcstart wins. We put back the extra stuff with yyless() |
| * in case it contains a star-slash that should terminate the comment. |
| * 2. In the operator rule, check for slash-star within the operator, and |
| * if found throw it back with yyless(). This handles the plus-slash-star |
| * problem. |
| * Dash-dash comments have similar interactions with the operator rule. |
| */ |
| xcstart \/\*{op_chars}* |
| xcstop \*+\/ |
| xcinside [^*/]+ |
| |
| digit [0-9] |
| ident_start [A-Za-z\200-\377_] |
| ident_cont [A-Za-z\200-\377_0-9\$] |
| |
| identifier {ident_start}{ident_cont}* |
| |
| typecast "::" |
| |
| /* |
| * "self" is the set of chars that should be returned as single-character |
| * tokens. "op_chars" is the set of chars that can make up "Op" tokens, |
| * which can be one or more characters long (but if a single-char token |
| * appears in the "self" set, it is not to be returned as an Op). Note |
| * that the sets overlap, but each has some chars that are not in the other. |
| * |
| * If you change either set, adjust the character lists appearing in the |
| * rule for "operator"! |
| */ |
| self [,()\[\].;\:\+\-\*\/\%\^\<\>\=] |
| op_chars [\~\!\@\#\^\&\|\`\?\+\-\*\/\%\<\>\=] |
| operator {op_chars}+ |
| |
| /* we no longer allow unary minus in numbers. |
| * instead we pass it separately to parser. there it gets |
| * coerced via doNegate() -- Leon aug 20 1999 |
| * |
| * {realfail1} and {realfail2} are added to prevent the need for scanner |
| * backup when the {real} rule fails to match completely. |
| */ |
| |
| integer {digit}+ |
| decimal (({digit}*\.{digit}+)|({digit}+\.{digit}*)) |
| real ({integer}|{decimal})[Ee][-+]?{digit}+ |
| realfail1 ({integer}|{decimal})[Ee] |
| realfail2 ({integer}|{decimal})[Ee][-+] |
| |
| param \${integer} |
| |
| other . |
| |
| /* |
| * Dollar quoted strings are totally opaque, and no escaping is done on them. |
| * Other quoted strings must allow some special characters such as single-quote |
| * and newline. |
| * Embedded single-quotes are implemented both in the SQL standard |
| * style of two adjacent single quotes "''" and in the Postgres/Java style |
| * of escaped-quote "\'". |
| * Other embedded escaped characters are matched explicitly and the leading |
| * backslash is dropped from the string. |
| * Note that xcstart must appear before operator, as explained above! |
| * Also whitespace (comment) must appear before operator. |
| */ |
| |
| %% |
| |
| {whitespace} { |
| SET_YYLLOC(); |
| yylval.val.str = yytext; |
| yylval.val.modificator = NULL; |
| yylval.val.keycode = -1; |
| yylval.val.sep = NULL; |
| return X_WHITESPACE; |
| } |
| |
| {comment} { |
| SET_YYLLOC(); |
| yylval.val.str = yytext; |
| yylval.val.modificator = "sc"; |
| yylval.val.keycode = -1; |
| yylval.val.sep = NULL; |
| return X_COMMENT; |
| } |
| |
| |
| {xcstart} { |
| /* Set location in case of syntax error in comment */ |
| SET_YYLLOC(); |
| xcdepth = 0; |
| BEGIN(xc); |
| /* Put back any characters past slash-star; see above */ |
| startlit(); |
| addlitchar('/'); |
| addlitchar('*'); |
| |
| yyless(2); |
| } |
| |
| <xc>{xcstart} { |
| xcdepth++; |
| /* Put back any characters past slash-star; see above */ |
| addlitchar('/'); |
| addlitchar('*'); |
| |
| yyless(2); |
| } |
| |
| <xc>{xcstop} { |
| if (xcdepth <= 0) |
| { |
| BEGIN(INITIAL); |
| addlitchar('*'); |
| addlitchar('/'); |
| |
| yylval.val.str = litbufdup(); |
| yylval.val.modificator = "ec"; |
| yylval.val.keycode = -1; |
| yylval.val.sep = NULL; |
| return X_COMMENT; |
| } |
| else |
| { |
| xcdepth--; |
| addlitchar('*'); |
| addlitchar('/'); |
| } |
| |
| } |
| |
| <xc>{xcinside} { |
| addlit(yytext, yyleng); |
| } |
| |
| <xc>{op_chars} { |
| addlit(yytext, yyleng); |
| } |
| |
| <xc>\*+ { |
| addlit(yytext, yyleng); |
| } |
| |
| <xc><<EOF>> { |
| yylval.val.str = litbufdup(); |
| yylval.val.modificator = "ecu"; |
| yylval.val.keycode = -1; |
| yylval.val.sep = NULL; |
| return X_COMMENT; |
| |
| } |
| |
| {xbstart} { |
| /* Binary bit type. |
| * At some point we should simply pass the string |
| * forward to the parser and label it there. |
| * In the meantime, place a leading "b" on the string |
| * to mark it for the input routine as a binary string. |
| */ |
| SET_YYLLOC(); |
| BEGIN(xb); |
| startlit(); |
| addlitchar('b'); |
| } |
| <xb>{quotestop} | |
| <xb>{quotefail} { |
| yyless(1); |
| BEGIN(INITIAL); |
| yylval.val.str = litbufdup(); |
| yylval.val.modificator = "b"; |
| yylval.val.keycode = -1; |
| yylval.val.sep = NULL; |
| return X_NCONST; |
| } |
| <xh>{xhinside} | |
| <xb>{xbinside} { |
| addlit(yytext, yyleng); |
| } |
| <xh>{quotecontinue} | |
| <xb>{quotecontinue} { |
| /* ignore */ |
| } |
| <xb><<EOF>> { |
| yylval.val.str = litbufdup(); |
| yylval.val.modificator = "bu"; |
| yylval.val.keycode = -1; |
| yylval.val.sep = NULL; |
| return X_NCONST; |
| } |
| |
| {xhstart} { |
| /* Hexadecimal bit type. |
| * At some point we should simply pass the string |
| * forward to the parser and label it there. |
| * In the meantime, place a leading "x" on the string |
| * to mark it for the input routine as a hex string. |
| */ |
| SET_YYLLOC(); |
| BEGIN(xh); |
| startlit(); |
| addlitchar('x'); |
| } |
| <xh>{quotestop} | |
| <xh>{quotefail} { |
| yyless(1); |
| BEGIN(INITIAL); |
| yylval.val.str = litbufdup(); |
| yylval.val.modificator = "x"; |
| yylval.val.keycode = -1; |
| yylval.val.sep = NULL; |
| return X_NCONST; |
| } |
| <xh><<EOF>> { |
| yylval.val.str = litbufdup(); |
| yylval.val.modificator = "xu"; |
| yylval.val.keycode = -1; |
| yylval.val.sep = NULL; |
| return X_NCONST; |
| } |
| |
| {xnstart} { |
| /* National character. |
| * We will pass this along as a normal character string, |
| * but preceded with an internally-generated "NCHAR". |
| */ |
| const ScanKeyword *keyword; |
| |
| SET_YYLLOC(); |
| yyless(1); /* eat only 'n' this time */ |
| /* nchar had better be a keyword! */ |
| keyword = ScanKeywordLookup("nchar" ScanKeywordLookupArgs); |
| Assert(keyword != NULL); |
| yylval.val.str = (char*) keyword->name; |
| yylval.val.keycode = keyword->value; |
| yylval.val.modificator = NULL; |
| yylval.val.sep = NULL; |
| return X_KEYWORD; |
| } |
| |
| {xqstart} { |
| SET_YYLLOC(); |
| BEGIN(xq); |
| extended_string = false; |
| startlit(); |
| } |
| {xestart} { |
| SET_YYLLOC(); |
| BEGIN(xe); |
| extended_string = true; |
| startlit(); |
| } |
| <xq,xe>{quotestop} | |
| <xq,xe>{quotefail} { |
| yyless(1); |
| BEGIN(INITIAL); |
| yylval.val.str = litbufdup(); |
| yylval.val.modificator = extended_string ? "es" : "qs"; |
| yylval.val.keycode = -1; |
| yylval.val.sep = NULL; |
| return X_SCONST; |
| } |
| <xq,xe>{xqdouble} { |
| addlitchar('\''); |
| } |
| <xq>{xqinside} { |
| addlit(yytext, yyleng); |
| } |
| <xe>{xeinside} { |
| addlit(yytext, yyleng); |
| } |
| <xe>{xeescape} { |
| addlitchar(unescape_single_char(yytext[1])); |
| } |
| <xe>{xeoctesc} { |
| unsigned char c = strtoul(yytext+1, NULL, 8); |
| |
| addlitchar(c); |
| } |
| <xe>{xehexesc} { |
| unsigned char c = strtoul(yytext+2, NULL, 16); |
| |
| addlitchar(c); |
| } |
| <xq,xe>{quotecontinue} { |
| /* ignore */ |
| } |
| <xe>. { |
| /* This is only needed for \ just before EOF */ |
| addlitchar(yytext[0]); |
| } |
| <xq,xe><<EOF>> { |
| yylval.val.str = litbufdup(); |
| yylval.val.modificator = extended_string ? "esu" : "qsu"; |
| yylval.val.keycode = -1; |
| yylval.val.sep = NULL; |
| return X_SCONST; |
| } |
| |
| {dolqdelim} { |
| SET_YYLLOC(); |
| dolqstart = pstrdup(yytext); |
| BEGIN(xdolq); |
| startlit(); |
| } |
| {dolqfailed} { |
| /* throw back all but the initial "$" */ |
| yyless(1); |
| /* and treat it as {other} */ |
| yylval.val.str = yytext; |
| yylval.val.modificator = "dolqf"; |
| yylval.val.keycode = -1; |
| yylval.val.sep = NULL; |
| return X_OTHERS; |
| } |
| <xdolq>{dolqdelim} { |
| if (strcmp(yytext, dolqstart) == 0) |
| { |
| yylval.val.sep = dolqstart; |
| yylval.val.modificator = "dolq"; |
| BEGIN(INITIAL); |
| yylval.val.str = litbufdup(); |
| yylval.val.keycode = -1; |
| return X_SCONST; |
| } |
| else |
| { |
| /* |
| * When we fail to match $...$ to dolqstart, transfer |
| * the $... part to the output, but put back the final |
| * $ for rescanning. Consider $delim$...$junk$delim$ |
| */ |
| addlit(yytext, yyleng-1); |
| yyless(yyleng-1); |
| } |
| } |
| <xdolq>{dolqinside} { |
| addlit(yytext, yyleng); |
| } |
| <xdolq>{dolqfailed} { |
| addlit(yytext, yyleng); |
| } |
| <xdolq>. { |
| /* This is only needed for inside the quoted text */ |
| addlitchar(yytext[0]); |
| } |
| <xdolq><<EOF>> { |
| yylval.val.sep = dolqstart; |
| yylval.val.modificator = "dolqu"; |
| yylval.val.str = litbufdup(); |
| yylval.val.keycode = -1; |
| yylval.val.sep = NULL; |
| return X_SCONST; |
| } |
| |
| {xdstart} { |
| SET_YYLLOC(); |
| BEGIN(xd); |
| startlit(); |
| } |
| <xd>{xdstop} { |
| char *ident; |
| |
| BEGIN(INITIAL); |
| if (literallen == 0) |
| yyerror("zero-length delimited identifier"); |
| ident = litbufdup(); |
| if (literallen >= NAMEDATALEN) |
| truncate_identifier(ident, literallen, true); |
| yylval.val.modificator = "dq"; |
| yylval.val.str = ident; |
| yylval.val.keycode = -1; |
| yylval.val.sep = NULL; |
| return X_IDENT; |
| } |
| <xd>{xddouble} { |
| addlitchar('"'); |
| } |
| <xd>{xdinside} { |
| addlit(yytext, yyleng); |
| } |
| <xd><<EOF>> { |
| yylval.val.modificator = "dqu"; |
| yylval.val.str = litbufdup(); |
| yylval.val.keycode = -1; |
| yylval.val.sep = NULL; |
| return X_IDENT; |
| } |
| {typecast} { |
| SET_YYLLOC(); |
| yylval.val.modificator = "typecast"; |
| yylval.val.keycode = X_TYPECAST; |
| yylval.val.sep = NULL; |
| return X_OTHERS; |
| } |
| |
| {self} { |
| SET_YYLLOC(); |
| yylval.val.str = yytext; |
| yylval.val.modificator = "self"; |
| yylval.val.keycode = yytext[0]; |
| yylval.val.sep = NULL; |
| return X_OTHERS; |
| } |
| |
| {operator} { |
| /* |
| * Check for embedded slash-star or dash-dash; those |
| * are comment starts, so operator must stop there. |
| * Note that slash-star or dash-dash at the first |
| * character will match a prior rule, not this one. |
| */ |
| int nchars = yyleng; |
| char *slashstar = strstr(yytext, "/*"); |
| char *dashdash = strstr(yytext, "--"); |
| |
| if (slashstar && dashdash) |
| { |
| /* if both appear, take the first one */ |
| if (slashstar > dashdash) |
| slashstar = dashdash; |
| } |
| else if (!slashstar) |
| slashstar = dashdash; |
| if (slashstar) |
| nchars = slashstar - yytext; |
| |
| /* |
| * For SQL compatibility, '+' and '-' cannot be the |
| * last char of a multi-char operator unless the operator |
| * contains chars that are not in SQL operators. |
| * The idea is to lex '=-' as two operators, but not |
| * to forbid operator names like '?-' that could not be |
| * sequences of SQL operators. |
| */ |
| while (nchars > 1 && |
| (yytext[nchars-1] == '+' || |
| yytext[nchars-1] == '-')) |
| { |
| int ic; |
| |
| for (ic = nchars-2; ic >= 0; ic--) |
| { |
| if (strchr("~!@#^&|`?%", yytext[ic])) |
| break; |
| } |
| if (ic >= 0) |
| break; /* found a char that makes it OK */ |
| nchars--; /* else remove the +/-, and check again */ |
| } |
| |
| SET_YYLLOC(); |
| |
| if (nchars < yyleng) |
| { |
| /* Strip the unwanted chars from the token */ |
| yyless(nchars); |
| /* |
| * If what we have left is only one char, and it's |
| * one of the characters matching "self", then |
| * return it as a character token the same way |
| * that the "self" rule would have. |
| */ |
| if (nchars == 1 && |
| strchr(",()[].;:+-*/%^<>=", yytext[0])) |
| { |
| yylval.val.str = yytext; |
| yylval.val.modificator = NULL; |
| yylval.val.keycode = yytext[0]; |
| yylval.val.sep = NULL; |
| return X_OTHERS; |
| } |
| } |
| |
| /* |
| * Complain if operator is too long. Unlike the case |
| * for identifiers, we make this an error not a notice- |
| * and-truncate, because the odds are we are looking at |
| * a syntactic mistake anyway. |
| */ |
| if (nchars >= NAMEDATALEN) |
| yyerror("operator too long"); |
| |
| /* Convert "!=" operator to "<>" for compatibility */ |
| yylval.val.modificator = NULL; |
| if (strcmp(yytext, "!=") == 0) |
| yylval.val.str = pstrdup("<>"); |
| else |
| yylval.val.str = pstrdup(yytext); |
| yylval.val.keycode = -1; |
| yylval.val.sep = NULL; |
| return X_OP; |
| } |
| |
| {param} { |
| SET_YYLLOC(); |
| yylval.val.modificator = NULL; |
| yylval.val.str = yytext; |
| yylval.val.keycode = -1; |
| yylval.val.sep = NULL; |
| return X_PARAM; |
| } |
| |
| {integer} { |
| long val; |
| char* endptr; |
| |
| SET_YYLLOC(); |
| errno = 0; |
| val = strtol(yytext, &endptr, 10); |
| if (*endptr != '\0' || errno == ERANGE |
| #ifdef HAVE_LONG_INT_64 |
| /* if long > 32 bits, check for overflow of int4 */ |
| || val != (long) ((int32) val) |
| #endif |
| ) |
| { |
| /* integer too large, treat it as a float */ |
| yylval.val.str = pstrdup(yytext); |
| yylval.val.modificator = "f"; |
| yylval.val.keycode = -1; |
| yylval.val.sep = NULL; |
| return X_NCONST; |
| } |
| yylval.val.str = yytext; |
| yylval.val.modificator = "i"; |
| yylval.val.keycode = -1; |
| yylval.val.sep = NULL; |
| return X_NCONST; |
| } |
| {decimal} { |
| SET_YYLLOC(); |
| yylval.val.str = pstrdup(yytext); |
| yylval.val.modificator = "f"; |
| yylval.val.keycode = -1; |
| yylval.val.sep = NULL; |
| return X_NCONST; |
| } |
| {real} { |
| SET_YYLLOC(); |
| yylval.val.str = pstrdup(yytext); |
| yylval.val.modificator = "f"; |
| yylval.val.keycode = -1; |
| yylval.val.sep = NULL; |
| return X_NCONST; |
| } |
| {realfail1} { |
| /* |
| * throw back the [Ee], and treat as {decimal}. Note |
| * that it is possible the input is actually {integer}, |
| * but since this case will almost certainly lead to a |
| * syntax error anyway, we don't bother to distinguish. |
| */ |
| yyless(yyleng-1); |
| SET_YYLLOC(); |
| yylval.val.str = pstrdup(yytext); |
| yylval.val.modificator = "f"; |
| yylval.val.keycode = -1; |
| yylval.val.sep = NULL; |
| return X_NCONST; |
| } |
| {realfail2} { |
| /* throw back the [Ee][+-], and proceed as above */ |
| yyless(yyleng-2); |
| SET_YYLLOC(); |
| yylval.val.str = pstrdup(yytext); |
| yylval.val.modificator = "f"; |
| yylval.val.keycode = -1; |
| yylval.val.sep = NULL; |
| return X_NCONST; |
| } |
| |
| |
| {identifier} { |
| const ScanKeyword *keyword; |
| char *ident; |
| |
| SET_YYLLOC(); |
| |
| /* Is it a keyword? */ |
| keyword = ScanKeywordLookup(yytext ScanKeywordLookupArgs); |
| if (keyword != NULL) |
| { |
| yylval.val.str = (char*) keyword->name; |
| yylval.val.keycode = keyword->value; |
| yylval.val.modificator = NULL; |
| yylval.val.sep = NULL; |
| return X_KEYWORD; |
| } |
| |
| /* |
| * No. Convert the identifier to lower case, and truncate |
| * if necessary. |
| */ |
| ident = downcase_truncate_identifier(yytext, yyleng, true); |
| yylval.val.str = ident; |
| yylval.val.modificator = NULL; |
| yylval.val.keycode = -1; |
| yylval.val.sep = NULL; |
| return X_IDENT; |
| } |
| |
| {other} { |
| SET_YYLLOC(); |
| yylval.val.str = yytext; |
| yylval.val.modificator = NULL; |
| yylval.val.keycode = yytext[0]; |
| yylval.val.sep = NULL; |
| return X_OTHERS; |
| } |
| |
| <<EOF>> { |
| SET_YYLLOC(); |
| yyterminate(); |
| } |
| |
| %% |
| |
| /* |
| * lexer_errposition |
| * Report a lexical-analysis-time cursor position, if possible. |
| * |
| * This is expected to be used within an ereport() call. The return value |
| * is a dummy (always 0, in fact). |
| * |
| * Note that this can only be used for messages from the lexer itself, |
| * since it depends on scanbuf to still be valid. |
| */ |
| static int |
| lexer_errposition(void) |
| { |
| int pos; |
| |
| /* Convert byte offset to character number */ |
| pos = _pg_mbstrlen_with_len(scanbuf, yylloc) + 1; |
| /* And pass it to the ereport mechanism */ |
| return errposition(pos); |
| } |
| |
| /* |
| * yyerror |
| * Report a lexer or grammar error. |
| * |
| * The message's cursor position identifies the most recently lexed token. |
| * This is OK for syntax error messages from the Bison parser, because Bison |
| * parsers report error as soon as the first unparsable token is reached. |
| * Beware of using yyerror for other purposes, as the cursor position might |
| * be misleading! |
| */ |
| void |
| yyerror(const char *message) |
| { |
| const char *loc = scanbuf + yylloc; |
| |
| if (*loc == YY_END_OF_BUFFER_CHAR) |
| { |
| ereport(ERROR, |
| (errcode(ERRCODE_SYNTAX_ERROR), |
| /* translator: %s is typically "syntax error" */ |
| errmsg("%s at end of input", _(message)), |
| lexer_errposition())); |
| } |
| else |
| { |
| ereport(ERROR, |
| (errcode(ERRCODE_SYNTAX_ERROR), |
| /* translator: first %s is typically "syntax error" */ |
| errmsg("%s at or near \"%s\"", _(message), loc), |
| lexer_errposition())); |
| } |
| } |
| |
| |
| /* |
| * Called before any actual parsing is done |
| */ |
| void |
| orafce_sql_scanner_init(const char *str) |
| { |
| Size slen = strlen(str); |
| |
| /* |
| * Might be left over after ereport() |
| */ |
| if (YY_CURRENT_BUFFER) |
| yy_delete_buffer(YY_CURRENT_BUFFER); |
| |
| /* |
| * Make a scan buffer with special termination needed by flex. |
| */ |
| scanbuflen = slen; |
| scanbuf = palloc(slen + 2); |
| memcpy(scanbuf, str, slen); |
| scanbuf[slen] = scanbuf[slen + 1] = YY_END_OF_BUFFER_CHAR; |
| scanbufhandle = yy_scan_buffer(scanbuf, slen + 2); |
| |
| /* initialize literal buffer to a reasonable but expansible size */ |
| literalalloc = 128; |
| literalbuf = (char *) palloc(literalalloc); |
| startlit(); |
| |
| BEGIN(INITIAL); |
| } |
| |
| |
| /* |
| * Called after parsing is done to clean up after fdate_scanner_init() |
| */ |
| void |
| orafce_sql_scanner_finish(void) |
| { |
| yy_delete_buffer(scanbufhandle); |
| pfree(scanbuf); |
| } |
| |
| static void |
| addlit(char *ytext, int yleng) |
| { |
| /* enlarge buffer if needed */ |
| if ((literallen+yleng) >= literalalloc) |
| { |
| do { |
| literalalloc *= 2; |
| } while ((literallen+yleng) >= literalalloc); |
| literalbuf = (char *) repalloc(literalbuf, literalalloc); |
| } |
| /* append new data, add trailing null */ |
| memcpy(literalbuf+literallen, ytext, yleng); |
| literallen += yleng; |
| literalbuf[literallen] = '\0'; |
| } |
| |
| |
| static void |
| addlitchar(unsigned char ychar) |
| { |
| /* enlarge buffer if needed */ |
| if ((literallen+1) >= literalalloc) |
| { |
| literalalloc *= 2; |
| literalbuf = (char *) repalloc(literalbuf, literalalloc); |
| } |
| /* append new data, add trailing null */ |
| literalbuf[literallen] = ychar; |
| literallen += 1; |
| literalbuf[literallen] = '\0'; |
| } |
| |
| |
| /* |
| * One might be tempted to write pstrdup(literalbuf) instead of this, |
| * but for long literals this is much faster because the length is |
| * already known. |
| */ |
| static char * |
| litbufdup(void) |
| { |
| char *new; |
| |
| new = palloc(literallen + 1); |
| memcpy(new, literalbuf, literallen+1); |
| return new; |
| } |
| |
| |
| static unsigned char |
| unescape_single_char(unsigned char c) |
| { |
| switch (c) |
| { |
| case 'b': |
| return '\b'; |
| case 'f': |
| return '\f'; |
| case 'n': |
| return '\n'; |
| case 'r': |
| return '\r'; |
| case 't': |
| return '\t'; |
| default: |
| return c; |
| } |
| } |
| |
| |