| /*------------------------------------------------------------------------- |
| * |
| * parser.c |
| * Main entry point/driver for PostgreSQL grammar |
| * |
| * This should match src/backend/parser/parser.c, except that we do not |
| * need to bother with re-entrant interfaces. |
| * |
| * Note: ECPG doesn't report error location like the backend does. |
| * This file will need work if we ever want it to. |
| * |
| * |
| * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group |
| * Portions Copyright (c) 1994, Regents of the University of California |
| * |
| * IDENTIFICATION |
| * src/interfaces/ecpg/preproc/parser.c |
| * |
| *------------------------------------------------------------------------- |
| */ |
| |
| #include "postgres_fe.h" |
| |
| #include "preproc_extern.h" |
| #include "preproc.h" |
| |
| |
| static bool have_lookahead; /* is lookahead info valid? */ |
| static int lookahead_token; /* one-token lookahead */ |
| static YYSTYPE lookahead_yylval; /* yylval for lookahead token */ |
| static YYLTYPE lookahead_yylloc; /* yylloc for lookahead token */ |
| static char *lookahead_yytext; /* start current token */ |
| |
| static bool check_uescapechar(unsigned char escape); |
| static bool ecpg_isspace(char ch); |
| |
| |
| /* |
| * Intermediate filter between parser and base lexer (base_yylex in scan.l). |
| * |
| * This filter is needed because in some cases the standard SQL grammar |
| * requires more than one token lookahead. We reduce these cases to one-token |
| * lookahead by replacing tokens here, in order to keep the grammar LALR(1). |
| * |
| * Using a filter is simpler than trying to recognize multiword tokens |
| * directly in scan.l, because we'd have to allow for comments between the |
| * words. Furthermore it's not clear how to do that without re-introducing |
| * scanner backtrack, which would cost more performance than this filter |
| * layer does. |
| * |
| * We also use this filter to convert UIDENT and USCONST sequences into |
| * plain IDENT and SCONST tokens. While that could be handled by additional |
| * productions in the main grammar, it's more efficient to do it like this. |
| */ |
| int |
| filtered_base_yylex(void) |
| { |
| int cur_token; |
| int next_token; |
| YYSTYPE cur_yylval; |
| YYLTYPE cur_yylloc; |
| char *cur_yytext; |
| |
| /* Get next token --- we might already have it */ |
| if (have_lookahead) |
| { |
| cur_token = lookahead_token; |
| base_yylval = lookahead_yylval; |
| base_yylloc = lookahead_yylloc; |
| base_yytext = lookahead_yytext; |
| have_lookahead = false; |
| } |
| else |
| cur_token = base_yylex(); |
| |
| /* |
| * If this token isn't one that requires lookahead, just return it. |
| */ |
| switch (cur_token) |
| { |
| case NOT: |
| case NULLS_P: |
| case WITH: |
| case UIDENT: |
| case USCONST: |
| break; |
| default: |
| return cur_token; |
| } |
| |
| /* Save and restore lexer output variables around the call */ |
| cur_yylval = base_yylval; |
| cur_yylloc = base_yylloc; |
| cur_yytext = base_yytext; |
| |
| /* Get next token, saving outputs into lookahead variables */ |
| next_token = base_yylex(); |
| |
| lookahead_token = next_token; |
| lookahead_yylval = base_yylval; |
| lookahead_yylloc = base_yylloc; |
| lookahead_yytext = base_yytext; |
| |
| base_yylval = cur_yylval; |
| base_yylloc = cur_yylloc; |
| base_yytext = cur_yytext; |
| |
| have_lookahead = true; |
| |
| /* Replace cur_token if needed, based on lookahead */ |
| switch (cur_token) |
| { |
| case NOT: |
| /* Replace NOT by NOT_LA if it's followed by BETWEEN, IN, etc */ |
| switch (next_token) |
| { |
| case BETWEEN: |
| case IN_P: |
| case LIKE: |
| case ILIKE: |
| case SIMILAR: |
| cur_token = NOT_LA; |
| break; |
| } |
| break; |
| |
| case NULLS_P: |
| /* Replace NULLS_P by NULLS_LA if it's followed by FIRST or LAST */ |
| switch (next_token) |
| { |
| case FIRST_P: |
| case LAST_P: |
| cur_token = NULLS_LA; |
| break; |
| } |
| break; |
| |
| case WITH: |
| /* Replace WITH by WITH_LA if it's followed by TIME or ORDINALITY */ |
| switch (next_token) |
| { |
| case TIME: |
| case ORDINALITY: |
| cur_token = WITH_LA; |
| break; |
| } |
| break; |
| case UIDENT: |
| case USCONST: |
| /* Look ahead for UESCAPE */ |
| if (next_token == UESCAPE) |
| { |
| /* Yup, so get third token, which had better be SCONST */ |
| const char *escstr; |
| |
| /* |
| * Again save and restore lexer output variables around the |
| * call |
| */ |
| cur_yylval = base_yylval; |
| cur_yylloc = base_yylloc; |
| cur_yytext = base_yytext; |
| |
| /* Get third token */ |
| next_token = base_yylex(); |
| |
| if (next_token != SCONST) |
| mmerror(PARSE_ERROR, ET_ERROR, "UESCAPE must be followed by a simple string literal"); |
| |
| /* |
| * Save and check escape string, which the scanner returns |
| * with quotes |
| */ |
| escstr = base_yylval.str; |
| if (strlen(escstr) != 3 || !check_uescapechar(escstr[1])) |
| mmerror(PARSE_ERROR, ET_ERROR, "invalid Unicode escape character"); |
| |
| base_yylval = cur_yylval; |
| base_yylloc = cur_yylloc; |
| base_yytext = cur_yytext; |
| |
| /* Combine 3 tokens into 1 */ |
| base_yylval.str = psprintf("%s UESCAPE %s", base_yylval.str, escstr); |
| |
| /* Clear have_lookahead, thereby consuming all three tokens */ |
| have_lookahead = false; |
| } |
| |
| if (cur_token == UIDENT) |
| cur_token = IDENT; |
| else if (cur_token == USCONST) |
| cur_token = SCONST; |
| break; |
| } |
| |
| return cur_token; |
| } |
| |
| /* |
| * check_uescapechar() and ecpg_isspace() should match their equivalents |
| * in pgc.l. |
| */ |
| |
| /* is 'escape' acceptable as Unicode escape character (UESCAPE syntax) ? */ |
| static bool |
| check_uescapechar(unsigned char escape) |
| { |
| if (isxdigit(escape) |
| || escape == '+' |
| || escape == '\'' |
| || escape == '"' |
| || ecpg_isspace(escape)) |
| return false; |
| else |
| return true; |
| } |
| |
| /* |
| * ecpg_isspace() --- return true if flex scanner considers char whitespace |
| */ |
| static bool |
| ecpg_isspace(char ch) |
| { |
| if (ch == ' ' || |
| ch == '\t' || |
| ch == '\n' || |
| ch == '\r' || |
| ch == '\f') |
| return true; |
| return false; |
| } |