| %{ |
| /*------------------------------------------------------------------------- |
| * |
| * exprscan.l |
| * lexical scanner for pgbench backslash commands |
| * |
| * This lexer supports two operating modes: |
| * |
| * In INITIAL state, just parse off whitespace-separated words (this mode |
| * is basically equivalent to strtok(), which is what we used to use). |
| * |
| * In EXPR state, lex for the simple expression syntax of exprparse.y. |
| * |
| * In either mode, stop upon hitting newline or end of string. |
| * |
| * Note that this lexer operates within the framework created by psqlscan.l, |
| * |
| * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group |
| * Portions Copyright (c) 1994, Regents of the University of California |
| * |
| * src/bin/pgbench/exprscan.l |
| * |
| *------------------------------------------------------------------------- |
| */ |
| |
| #include "fe_utils/psqlscan_int.h" |
| |
| /* context information for reporting errors in expressions */ |
| static const char *expr_source = NULL; |
| static int expr_lineno = 0; |
| static int expr_start_offset = 0; |
| static const char *expr_command = NULL; |
| |
| /* indicates whether last yylex() call read a newline */ |
| static bool last_was_newline = false; |
| |
| /* |
| * Work around a bug in flex 2.5.35: it emits a couple of functions that |
| * it forgets to emit declarations for. Since we use -Wmissing-prototypes, |
| * this would cause warnings. Providing our own declarations should be |
| * harmless even when the bug gets fixed. |
| */ |
| extern int expr_yyget_column(yyscan_t yyscanner); |
| extern void expr_yyset_column(int column_no, yyscan_t yyscanner); |
| |
| /* LCOV_EXCL_START */ |
| |
| %} |
| |
| /* Except for the prefix, these options should match psqlscan.l */ |
| %option reentrant |
| %option bison-bridge |
| %option 8bit |
| %option never-interactive |
| %option nodefault |
| %option noinput |
| %option nounput |
| %option noyywrap |
| %option warn |
| %option prefix="expr_yy" |
| |
| /* Character classes */ |
| alpha [a-zA-Z\200-\377_] |
| digit [0-9] |
| alnum [A-Za-z\200-\377_0-9] |
| /* {space} + {nonspace} + {newline} should cover all characters */ |
| space [ \t\r\f\v] |
| nonspace [^ \t\r\f\v\n] |
| newline [\n] |
| |
| /* Line continuation marker */ |
| continuation \\\r?{newline} |
| |
| /* case insensitive keywords */ |
| and [Aa][Nn][Dd] |
| or [Oo][Rr] |
| not [Nn][Oo][Tt] |
| case [Cc][Aa][Ss][Ee] |
| when [Ww][Hh][Ee][Nn] |
| then [Tt][Hh][Ee][Nn] |
| else [Ee][Ll][Ss][Ee] |
| end [Ee][Nn][Dd] |
| true [Tt][Rr][Uu][Ee] |
| false [Ff][Aa][Ll][Ss][Ee] |
| null [Nn][Uu][Ll][Ll] |
| is [Ii][Ss] |
| isnull [Ii][Ss][Nn][Uu][Ll][Ll] |
| notnull [Nn][Oo][Tt][Nn][Uu][Ll][Ll] |
| |
| /* Exclusive states */ |
| %x EXPR |
| |
| %% |
| |
| %{ |
| /* Declare some local variables inside yylex(), for convenience */ |
| PsqlScanState cur_state = yyextra; |
| |
| /* |
| * Force flex into the state indicated by start_state. This has a |
| * couple of purposes: it lets some of the functions below set a new |
| * starting state without ugly direct access to flex variables, and it |
| * allows us to transition from one flex lexer to another so that we |
| * can lex different parts of the source string using separate lexers. |
| */ |
| BEGIN(cur_state->start_state); |
| |
| /* Reset was-newline flag */ |
| last_was_newline = false; |
| %} |
| |
| /* INITIAL state */ |
| |
| {nonspace}+ { |
| /* Found a word, emit and return it */ |
| psqlscan_emit(cur_state, yytext, yyleng); |
| return 1; |
| } |
| |
| /* |
| * We need this rule to avoid returning "word\" instead of recognizing |
| * a continuation marker just after a word: |
| */ |
| {nonspace}+{continuation} { |
| /* Found "word\\\r?\n", emit and return just "word" */ |
| int wordlen = yyleng - 2; |
| if (yytext[wordlen] == '\r') |
| wordlen--; |
| Assert(yytext[wordlen] == '\\'); |
| psqlscan_emit(cur_state, yytext, wordlen); |
| return 1; |
| } |
| |
| {space}+ { /* ignore */ } |
| |
| {continuation} { /* ignore */ } |
| |
| {newline} { |
| /* report end of command */ |
| last_was_newline = true; |
| return 0; |
| } |
| |
| /* EXPR state */ |
| |
| <EXPR>{ |
| |
| "+" { return '+'; } |
| "-" { return '-'; } |
| "*" { return '*'; } |
| "/" { return '/'; } |
| "%" { return '%'; } /* C version, also in Pg SQL */ |
| "=" { return '='; } |
| "<>" { return NE_OP; } |
| "!=" { return NE_OP; } /* C version, also in Pg SQL */ |
| "<=" { return LE_OP; } |
| ">=" { return GE_OP; } |
| "<<" { return LS_OP; } |
| ">>" { return RS_OP; } |
| "<" { return '<'; } |
| ">" { return '>'; } |
| "|" { return '|'; } |
| "&" { return '&'; } |
| "#" { return '#'; } |
| "~" { return '~'; } |
| |
| "(" { return '('; } |
| ")" { return ')'; } |
| "," { return ','; } |
| |
| {and} { return AND_OP; } |
| {or} { return OR_OP; } |
| {not} { return NOT_OP; } |
| {is} { return IS_OP; } |
| {isnull} { return ISNULL_OP; } |
| {notnull} { return NOTNULL_OP; } |
| |
| {case} { return CASE_KW; } |
| {when} { return WHEN_KW; } |
| {then} { return THEN_KW; } |
| {else} { return ELSE_KW; } |
| {end} { return END_KW; } |
| |
| :{alnum}+ { |
| yylval->str = pg_strdup(yytext + 1); |
| return VARIABLE; |
| } |
| |
| {null} { return NULL_CONST; } |
| {true} { |
| yylval->bval = true; |
| return BOOLEAN_CONST; |
| } |
| {false} { |
| yylval->bval = false; |
| return BOOLEAN_CONST; |
| } |
| "9223372036854775808" { |
| /* |
| * Special handling for PG_INT64_MIN, which can't |
| * accurately be represented here, as the minus sign is |
| * lexed separately and INT64_MIN can't be represented as |
| * a positive integer. |
| */ |
| return MAXINT_PLUS_ONE_CONST; |
| } |
| {digit}+ { |
| if (!strtoint64(yytext, true, &yylval->ival)) |
| expr_yyerror_more(yyscanner, "bigint constant overflow", |
| strdup(yytext)); |
| return INTEGER_CONST; |
| } |
| {digit}+(\.{digit}*)?([eE][-+]?{digit}+)? { |
| if (!strtodouble(yytext, true, &yylval->dval)) |
| expr_yyerror_more(yyscanner, "double constant overflow", |
| strdup(yytext)); |
| return DOUBLE_CONST; |
| } |
| \.{digit}+([eE][-+]?{digit}+)? { |
| if (!strtodouble(yytext, true, &yylval->dval)) |
| expr_yyerror_more(yyscanner, "double constant overflow", |
| strdup(yytext)); |
| return DOUBLE_CONST; |
| } |
| {alpha}{alnum}* { |
| yylval->str = pg_strdup(yytext); |
| return FUNCTION; |
| } |
| |
| {space}+ { /* ignore */ } |
| |
| {continuation} { /* ignore */ } |
| |
| {newline} { |
| /* report end of command */ |
| last_was_newline = true; |
| return 0; |
| } |
| |
| . { |
| /* |
| * must strdup yytext so that expr_yyerror_more doesn't |
| * change it while finding end of line |
| */ |
| expr_yyerror_more(yyscanner, "unexpected character", |
| pg_strdup(yytext)); |
| /* NOTREACHED, syntax_error calls exit() */ |
| return 0; |
| } |
| |
| } |
| |
| <<EOF>> { |
| if (cur_state->buffer_stack == NULL) |
| return 0; /* end of input reached */ |
| |
| /* |
| * We were expanding a variable, so pop the inclusion |
| * stack and keep lexing |
| */ |
| psqlscan_pop_buffer_stack(cur_state); |
| psqlscan_select_top_buffer(cur_state); |
| } |
| |
| %% |
| |
| /* LCOV_EXCL_STOP */ |
| |
| void |
| expr_yyerror_more(yyscan_t yyscanner, const char *message, const char *more) |
| { |
| PsqlScanState state = yyget_extra(yyscanner); |
| int error_detection_offset = expr_scanner_offset(state) - 1; |
| YYSTYPE lval; |
| char *full_line; |
| |
| /* |
| * While parsing an expression, we may not have collected the whole line |
| * yet from the input source. Lex till EOL so we can report whole line. |
| * (If we're at EOF, it's okay to call yylex() an extra time.) |
| */ |
| if (!last_was_newline) |
| { |
| while (yylex(&lval, yyscanner)) |
| /* skip */ ; |
| } |
| |
| /* Extract the line, trimming trailing newline if any */ |
| full_line = expr_scanner_get_substring(state, |
| expr_start_offset, |
| expr_scanner_offset(state), |
| true); |
| |
| syntax_error(expr_source, expr_lineno, full_line, expr_command, |
| message, more, error_detection_offset - expr_start_offset); |
| } |
| |
| void |
| expr_yyerror(yyscan_t yyscanner, const char *message) |
| { |
| expr_yyerror_more(yyscanner, message, NULL); |
| } |
| |
| /* |
| * Collect a space-separated word from a backslash command and return it |
| * in word_buf, along with its starting string offset in *offset. |
| * Returns true if successful, false if at end of command. |
| */ |
| bool |
| expr_lex_one_word(PsqlScanState state, PQExpBuffer word_buf, int *offset) |
| { |
| int lexresult; |
| YYSTYPE lval; |
| |
| /* Must be scanning already */ |
| Assert(state->scanbufhandle != NULL); |
| |
| /* Set current output target */ |
| state->output_buf = word_buf; |
| resetPQExpBuffer(word_buf); |
| |
| /* Set input source */ |
| if (state->buffer_stack != NULL) |
| yy_switch_to_buffer(state->buffer_stack->buf, state->scanner); |
| else |
| yy_switch_to_buffer(state->scanbufhandle, state->scanner); |
| |
| /* Set start state */ |
| state->start_state = INITIAL; |
| |
| /* And lex. */ |
| lexresult = yylex(&lval, state->scanner); |
| |
| /* |
| * Save start offset of word, if any. We could do this more efficiently, |
| * but for now this seems fine. |
| */ |
| if (lexresult) |
| *offset = expr_scanner_offset(state) - word_buf->len; |
| else |
| *offset = -1; |
| |
| /* |
| * In case the caller returns to using the regular SQL lexer, reselect the |
| * appropriate initial state. |
| */ |
| psql_scan_reselect_sql_lexer(state); |
| |
| return (bool) lexresult; |
| } |
| |
| /* |
| * Prepare to lex an expression via expr_yyparse(). |
| * |
| * Returns the yyscan_t that is to be passed to expr_yyparse(). |
| * (This is just state->scanner, but callers don't need to know that.) |
| */ |
| yyscan_t |
| expr_scanner_init(PsqlScanState state, |
| const char *source, int lineno, int start_offset, |
| const char *command) |
| { |
| /* Save error context info */ |
| expr_source = source; |
| expr_lineno = lineno; |
| expr_start_offset = start_offset; |
| expr_command = command; |
| |
| /* Must be scanning already */ |
| Assert(state->scanbufhandle != NULL); |
| |
| /* Set current output target */ |
| state->output_buf = NULL; |
| |
| /* Set input source */ |
| if (state->buffer_stack != NULL) |
| yy_switch_to_buffer(state->buffer_stack->buf, state->scanner); |
| else |
| yy_switch_to_buffer(state->scanbufhandle, state->scanner); |
| |
| /* Set start state */ |
| state->start_state = EXPR; |
| |
| return state->scanner; |
| } |
| |
| /* |
| * Finish lexing an expression. |
| */ |
| void |
| expr_scanner_finish(yyscan_t yyscanner) |
| { |
| PsqlScanState state = yyget_extra(yyscanner); |
| |
| /* |
| * Reselect appropriate initial state for SQL lexer. |
| */ |
| psql_scan_reselect_sql_lexer(state); |
| } |
| |
| /* |
| * Get offset from start of string to end of current lexer token. |
| * |
| * We rely on the knowledge that flex modifies the scan buffer by storing |
| * a NUL at the end of the current token (yytext). Note that this might |
| * not work quite right if we were parsing a sub-buffer, but since pgbench |
| * never invokes that functionality, it doesn't matter. |
| */ |
| int |
| expr_scanner_offset(PsqlScanState state) |
| { |
| return strlen(state->scanbuf); |
| } |
| |
| /* |
| * Get a malloc'd copy of the lexer input string from start_offset |
| * to just before end_offset. If chomp is true, drop any trailing |
| * newline(s). |
| */ |
| char * |
| expr_scanner_get_substring(PsqlScanState state, |
| int start_offset, int end_offset, |
| bool chomp) |
| { |
| char *result; |
| const char *scanptr = state->scanbuf + start_offset; |
| int slen = end_offset - start_offset; |
| |
| Assert(slen >= 0); |
| Assert(end_offset <= strlen(state->scanbuf)); |
| |
| if (chomp) |
| { |
| while (slen > 0 && |
| (scanptr[slen - 1] == '\n' || scanptr[slen - 1] == '\r')) |
| slen--; |
| } |
| |
| result = (char *) pg_malloc(slen + 1); |
| memcpy(result, scanptr, slen); |
| result[slen] = '\0'; |
| |
| return result; |
| } |
| |
| /* |
| * Get the line number associated with the given string offset |
| * (which must not be past the end of where we've lexed to). |
| */ |
| int |
| expr_scanner_get_lineno(PsqlScanState state, int offset) |
| { |
| int lineno = 1; |
| const char *p = state->scanbuf; |
| |
| while (*p && offset > 0) |
| { |
| if (*p == '\n') |
| lineno++; |
| p++, offset--; |
| } |
| return lineno; |
| } |