| %{ |
| /*------------------------------------------------------------------------- |
| * |
| * jsonpath_scan.l |
| * Lexical parser for jsonpath datatype |
| * |
| * Splits jsonpath string into tokens represented as JsonPathString structs. |
| * Decodes unicode and hex escaped strings. |
| * |
| * Copyright (c) 2019-2021, PostgreSQL Global Development Group |
| * |
| * IDENTIFICATION |
| * src/backend/utils/adt/jsonpath_scan.l |
| * |
| *------------------------------------------------------------------------- |
| */ |
| |
| #include "postgres.h" |
| |
| #include "mb/pg_wchar.h" |
| #include "nodes/pg_list.h" |
| |
| static JsonPathString scanstring; |
| |
| /* Handles to the buffer that the lexer uses internally */ |
| static YY_BUFFER_STATE scanbufhandle; |
| static char *scanbuf; |
| static int scanbuflen; |
| |
| static void addstring(bool init, char *s, int l); |
| static void addchar(bool init, char s); |
| static enum yytokentype checkKeyword(void); |
| static void parseUnicode(char *s, int l); |
| static void parseHexChar(char *s); |
| |
| /* Avoid exit() on fatal scanner errors (a bit ugly -- see yy_fatal_error) */ |
| #undef fprintf |
| #define fprintf(file, fmt, msg) fprintf_to_ereport(fmt, msg) |
| |
| static void |
| fprintf_to_ereport(const char *fmt, const char *msg) |
| { |
| ereport(ERROR, (errmsg_internal("%s", msg))); |
| } |
| |
| /* LCOV_EXCL_START */ |
| |
| %} |
| |
| %option 8bit |
| %option never-interactive |
| %option nodefault |
| %option noinput |
| %option nounput |
| %option noyywrap |
| %option warn |
| %option prefix="jsonpath_yy" |
| %option bison-bridge |
| %option noyyalloc |
| %option noyyrealloc |
| %option noyyfree |
| |
| /* |
| * We use exclusive states for quoted and non-quoted strings, |
| * quoted variable names and C-style comments. |
| * Exclusive states: |
| * <xq> - quoted strings |
| * <xnq> - non-quoted strings |
| * <xvq> - quoted variable names |
| * <xc> - C-style comment |
| */ |
| |
| %x xq |
| %x xnq |
| %x xvq |
| %x xc |
| |
| special [\?\%\$\.\[\]\{\}\(\)\|\&\!\=\<\>\@\#\,\*:\-\+\/] |
| blank [ \t\n\r\f] |
| /* "other" means anything that's not special, blank, or '\' or '"' */ |
| other [^\?\%\$\.\[\]\{\}\(\)\|\&\!\=\<\>\@\#\,\*:\-\+\/\\\" \t\n\r\f] |
| |
| digit [0-9] |
| integer (0|[1-9]{digit}*) |
| decimal {integer}\.{digit}+ |
| decimalfail {integer}\. |
| real ({integer}|{decimal})[Ee][-+]?{digit}+ |
| realfail1 ({integer}|{decimal})[Ee] |
| realfail2 ({integer}|{decimal})[Ee][-+] |
| |
| hex_dig [0-9A-Fa-f] |
| unicode \\u({hex_dig}{4}|\{{hex_dig}{1,6}\}) |
| unicodefail \\u({hex_dig}{0,3}|\{{hex_dig}{0,6}) |
| hex_char \\x{hex_dig}{2} |
| hex_fail \\x{hex_dig}{0,1} |
| |
| %% |
| |
| <xnq>{other}+ { |
| addstring(false, yytext, yyleng); |
| } |
| |
| <xnq>{blank}+ { |
| yylval->str = scanstring; |
| BEGIN INITIAL; |
| return checkKeyword(); |
| } |
| |
| <xnq>\/\* { |
| yylval->str = scanstring; |
| BEGIN xc; |
| } |
| |
| <xnq>({special}|\") { |
| yylval->str = scanstring; |
| yyless(0); |
| BEGIN INITIAL; |
| return checkKeyword(); |
| } |
| |
| <xnq><<EOF>> { |
| yylval->str = scanstring; |
| BEGIN INITIAL; |
| return checkKeyword(); |
| } |
| |
| <xnq,xq,xvq>\\b { addchar(false, '\b'); } |
| |
| <xnq,xq,xvq>\\f { addchar(false, '\f'); } |
| |
| <xnq,xq,xvq>\\n { addchar(false, '\n'); } |
| |
| <xnq,xq,xvq>\\r { addchar(false, '\r'); } |
| |
| <xnq,xq,xvq>\\t { addchar(false, '\t'); } |
| |
| <xnq,xq,xvq>\\v { addchar(false, '\v'); } |
| |
| <xnq,xq,xvq>{unicode}+ { parseUnicode(yytext, yyleng); } |
| |
| <xnq,xq,xvq>{hex_char} { parseHexChar(yytext); } |
| |
| <xnq,xq,xvq>{unicode}*{unicodefail} { yyerror(NULL, "invalid unicode sequence"); } |
| |
| <xnq,xq,xvq>{hex_fail} { yyerror(NULL, "invalid hex character sequence"); } |
| |
| <xnq,xq,xvq>{unicode}+\\ { |
| /* throw back the \\, and treat as unicode */ |
| yyless(yyleng - 1); |
| parseUnicode(yytext, yyleng); |
| } |
| |
| <xnq,xq,xvq>\\. { addchar(false, yytext[1]); } |
| |
| <xnq,xq,xvq>\\ { yyerror(NULL, "unexpected end after backslash"); } |
| |
| <xq,xvq><<EOF>> { yyerror(NULL, "unexpected end of quoted string"); } |
| |
| <xq>\" { |
| yylval->str = scanstring; |
| BEGIN INITIAL; |
| return STRING_P; |
| } |
| |
| <xvq>\" { |
| yylval->str = scanstring; |
| BEGIN INITIAL; |
| return VARIABLE_P; |
| } |
| |
| <xq,xvq>[^\\\"]+ { addstring(false, yytext, yyleng); } |
| |
| <xc>\*\/ { BEGIN INITIAL; } |
| |
| <xc>[^\*]+ { } |
| |
| <xc>\* { } |
| |
| <xc><<EOF>> { yyerror(NULL, "unexpected end of comment"); } |
| |
| \&\& { return AND_P; } |
| |
| \|\| { return OR_P; } |
| |
| \! { return NOT_P; } |
| |
| \*\* { return ANY_P; } |
| |
| \< { return LESS_P; } |
| |
| \<\= { return LESSEQUAL_P; } |
| |
| \=\= { return EQUAL_P; } |
| |
| \<\> { return NOTEQUAL_P; } |
| |
| \!\= { return NOTEQUAL_P; } |
| |
| \>\= { return GREATEREQUAL_P; } |
| |
| \> { return GREATER_P; } |
| |
| \${other}+ { |
| addstring(true, yytext + 1, yyleng - 1); |
| addchar(false, '\0'); |
| yylval->str = scanstring; |
| return VARIABLE_P; |
| } |
| |
| \$\" { |
| addchar(true, '\0'); |
| BEGIN xvq; |
| } |
| |
| {special} { return *yytext; } |
| |
| {blank}+ { /* ignore */ } |
| |
| \/\* { |
| addchar(true, '\0'); |
| BEGIN xc; |
| } |
| |
| {real} { |
| addstring(true, yytext, yyleng); |
| addchar(false, '\0'); |
| yylval->str = scanstring; |
| return NUMERIC_P; |
| } |
| |
| {decimal} { |
| addstring(true, yytext, yyleng); |
| addchar(false, '\0'); |
| yylval->str = scanstring; |
| return NUMERIC_P; |
| } |
| |
| {integer} { |
| addstring(true, yytext, yyleng); |
| addchar(false, '\0'); |
| yylval->str = scanstring; |
| return INT_P; |
| } |
| |
| {decimalfail} { |
| /* throw back the ., and treat as integer */ |
| yyless(yyleng - 1); |
| addstring(true, yytext, yyleng); |
| addchar(false, '\0'); |
| yylval->str = scanstring; |
| return INT_P; |
| } |
| |
| ({realfail1}|{realfail2}) { yyerror(NULL, "invalid floating point number"); } |
| |
| \" { |
| addchar(true, '\0'); |
| BEGIN xq; |
| } |
| |
| \\ { |
| yyless(0); |
| addchar(true, '\0'); |
| BEGIN xnq; |
| } |
| |
| {other}+ { |
| addstring(true, yytext, yyleng); |
| BEGIN xnq; |
| } |
| |
| <<EOF>> { yyterminate(); } |
| |
| %% |
| |
| /* LCOV_EXCL_STOP */ |
| |
| void |
| jsonpath_yyerror(JsonPathParseResult **result, const char *message) |
| { |
| if (*yytext == YY_END_OF_BUFFER_CHAR) |
| { |
| ereport(ERROR, |
| (errcode(ERRCODE_SYNTAX_ERROR), |
| /* translator: %s is typically "syntax error" */ |
| errmsg("%s at end of jsonpath input", _(message)))); |
| } |
| else |
| { |
| ereport(ERROR, |
| (errcode(ERRCODE_SYNTAX_ERROR), |
| /* translator: first %s is typically "syntax error" */ |
| errmsg("%s at or near \"%s\" of jsonpath input", |
| _(message), yytext))); |
| } |
| } |
| |
| typedef struct JsonPathKeyword |
| { |
| int16 len; |
| bool lowercase; |
| int val; |
| const char *keyword; |
| } JsonPathKeyword; |
| |
| /* |
| * Array of key words should be sorted by length and then |
| * alphabetical order |
| */ |
| static const JsonPathKeyword keywords[] = { |
| { 2, false, IS_P, "is"}, |
| { 2, false, TO_P, "to"}, |
| { 3, false, ABS_P, "abs"}, |
| { 3, false, LAX_P, "lax"}, |
| { 4, false, FLAG_P, "flag"}, |
| { 4, false, LAST_P, "last"}, |
| { 4, true, NULL_P, "null"}, |
| { 4, false, SIZE_P, "size"}, |
| { 4, true, TRUE_P, "true"}, |
| { 4, false, TYPE_P, "type"}, |
| { 4, false, WITH_P, "with"}, |
| { 5, true, FALSE_P, "false"}, |
| { 5, false, FLOOR_P, "floor"}, |
| { 6, false, DOUBLE_P, "double"}, |
| { 6, false, EXISTS_P, "exists"}, |
| { 6, false, STARTS_P, "starts"}, |
| { 6, false, STRICT_P, "strict"}, |
| { 7, false, CEILING_P, "ceiling"}, |
| { 7, false, UNKNOWN_P, "unknown"}, |
| { 8, false, DATETIME_P, "datetime"}, |
| { 8, false, KEYVALUE_P, "keyvalue"}, |
| { 10,false, LIKE_REGEX_P, "like_regex"}, |
| }; |
| |
| /* Check if current scanstring value is a keyword */ |
| static enum yytokentype |
| checkKeyword() |
| { |
| int res = IDENT_P; |
| int diff; |
| const JsonPathKeyword *StopLow = keywords, |
| *StopHigh = keywords + lengthof(keywords), |
| *StopMiddle; |
| |
| if (scanstring.len > keywords[lengthof(keywords) - 1].len) |
| return res; |
| |
| while (StopLow < StopHigh) |
| { |
| StopMiddle = StopLow + ((StopHigh - StopLow) >> 1); |
| |
| if (StopMiddle->len == scanstring.len) |
| diff = pg_strncasecmp(StopMiddle->keyword, scanstring.val, |
| scanstring.len); |
| else |
| diff = StopMiddle->len - scanstring.len; |
| |
| if (diff < 0) |
| StopLow = StopMiddle + 1; |
| else if (diff > 0) |
| StopHigh = StopMiddle; |
| else |
| { |
| if (StopMiddle->lowercase) |
| diff = strncmp(StopMiddle->keyword, scanstring.val, |
| scanstring.len); |
| |
| if (diff == 0) |
| res = StopMiddle->val; |
| |
| break; |
| } |
| } |
| |
| return res; |
| } |
| |
| /* |
| * Called before any actual parsing is done |
| */ |
| static void |
| jsonpath_scanner_init(const char *str, int slen) |
| { |
| if (slen <= 0) |
| slen = strlen(str); |
| |
| /* |
| * Might be left over after ereport() |
| */ |
| yy_init_globals(); |
| |
| /* |
| * Make a scan buffer with special termination needed by flex. |
| */ |
| |
| scanbuflen = slen; |
| scanbuf = palloc(slen + 2); |
| memcpy(scanbuf, str, slen); |
| scanbuf[slen] = scanbuf[slen + 1] = YY_END_OF_BUFFER_CHAR; |
| scanbufhandle = yy_scan_buffer(scanbuf, slen + 2); |
| |
| BEGIN(INITIAL); |
| } |
| |
| |
| /* |
| * Called after parsing is done to clean up after jsonpath_scanner_init() |
| */ |
| static void |
| jsonpath_scanner_finish(void) |
| { |
| yy_delete_buffer(scanbufhandle); |
| pfree(scanbuf); |
| } |
| |
| /* |
| * Resize scanstring so that it can append string of given length. |
| * Reinitialize if required. |
| */ |
| static void |
| resizeString(bool init, int appendLen) |
| { |
| if (init) |
| { |
| scanstring.total = Max(32, appendLen); |
| scanstring.val = (char *) palloc(scanstring.total); |
| scanstring.len = 0; |
| } |
| else |
| { |
| if (scanstring.len + appendLen >= scanstring.total) |
| { |
| while (scanstring.len + appendLen >= scanstring.total) |
| scanstring.total *= 2; |
| scanstring.val = repalloc(scanstring.val, scanstring.total); |
| } |
| } |
| } |
| |
| /* Add set of bytes at "s" of length "l" to scanstring */ |
| static void |
| addstring(bool init, char *s, int l) |
| { |
| resizeString(init, l + 1); |
| memcpy(scanstring.val + scanstring.len, s, l); |
| scanstring.len += l; |
| } |
| |
| /* Add single byte "c" to scanstring */ |
| static void |
| addchar(bool init, char c) |
| { |
| resizeString(init, 1); |
| scanstring.val[scanstring.len] = c; |
| if (c != '\0') |
| scanstring.len++; |
| } |
| |
| /* Interface to jsonpath parser */ |
| JsonPathParseResult * |
| parsejsonpath(const char *str, int len) |
| { |
| JsonPathParseResult *parseresult; |
| |
| jsonpath_scanner_init(str, len); |
| |
| if (jsonpath_yyparse((void *) &parseresult) != 0) |
| jsonpath_yyerror(NULL, "bogus input"); /* shouldn't happen */ |
| |
| jsonpath_scanner_finish(); |
| |
| return parseresult; |
| } |
| |
| /* Turn hex character into integer */ |
| static int |
| hexval(char c) |
| { |
| if (c >= '0' && c <= '9') |
| return c - '0'; |
| if (c >= 'a' && c <= 'f') |
| return c - 'a' + 0xA; |
| if (c >= 'A' && c <= 'F') |
| return c - 'A' + 0xA; |
| jsonpath_yyerror(NULL, "invalid hexadecimal digit"); |
| return 0; /* not reached */ |
| } |
| |
| /* Add given unicode character to scanstring */ |
| static void |
| addUnicodeChar(int ch) |
| { |
| if (ch == 0) |
| { |
| /* We can't allow this, since our TEXT type doesn't */ |
| ereport(ERROR, |
| (errcode(ERRCODE_UNTRANSLATABLE_CHARACTER), |
| errmsg("unsupported Unicode escape sequence"), |
| errdetail("\\u0000 cannot be converted to text."))); |
| } |
| else |
| { |
| char cbuf[MAX_UNICODE_EQUIVALENT_STRING + 1]; |
| |
| pg_unicode_to_server(ch, (unsigned char *) cbuf); |
| addstring(false, cbuf, strlen(cbuf)); |
| } |
| } |
| |
| /* Add unicode character, processing any surrogate pairs */ |
| static void |
| addUnicode(int ch, int *hi_surrogate) |
| { |
| if (is_utf16_surrogate_first(ch)) |
| { |
| if (*hi_surrogate != -1) |
| ereport(ERROR, |
| (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), |
| errmsg("invalid input syntax for type %s", "jsonpath"), |
| errdetail("Unicode high surrogate must not follow " |
| "a high surrogate."))); |
| *hi_surrogate = ch; |
| return; |
| } |
| else if (is_utf16_surrogate_second(ch)) |
| { |
| if (*hi_surrogate == -1) |
| ereport(ERROR, |
| (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), |
| errmsg("invalid input syntax for type %s", "jsonpath"), |
| errdetail("Unicode low surrogate must follow a high " |
| "surrogate."))); |
| ch = surrogate_pair_to_codepoint(*hi_surrogate, ch); |
| *hi_surrogate = -1; |
| } |
| else if (*hi_surrogate != -1) |
| { |
| ereport(ERROR, |
| (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), |
| errmsg("invalid input syntax for type %s", "jsonpath"), |
| errdetail("Unicode low surrogate must follow a high " |
| "surrogate."))); |
| } |
| |
| addUnicodeChar(ch); |
| } |
| |
| /* |
| * parseUnicode was adopted from json_lex_string() in |
| * src/backend/utils/adt/json.c |
| */ |
| static void |
| parseUnicode(char *s, int l) |
| { |
| int i = 2; |
| int hi_surrogate = -1; |
| |
| for (i = 2; i < l; i += 2) /* skip '\u' */ |
| { |
| int ch = 0; |
| int j; |
| |
| if (s[i] == '{') /* parse '\u{XX...}' */ |
| { |
| while (s[++i] != '}' && i < l) |
| ch = (ch << 4) | hexval(s[i]); |
| i++; /* skip '}' */ |
| } |
| else /* parse '\uXXXX' */ |
| { |
| for (j = 0; j < 4 && i < l; j++) |
| ch = (ch << 4) | hexval(s[i++]); |
| } |
| |
| addUnicode(ch, &hi_surrogate); |
| } |
| |
| if (hi_surrogate != -1) |
| { |
| ereport(ERROR, |
| (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), |
| errmsg("invalid input syntax for type %s", "jsonpath"), |
| errdetail("Unicode low surrogate must follow a high " |
| "surrogate."))); |
| } |
| } |
| |
| /* Parse sequence of hex-encoded characters */ |
| static void |
| parseHexChar(char *s) |
| { |
| int ch = (hexval(s[2]) << 4) | |
| hexval(s[3]); |
| |
| addUnicodeChar(ch); |
| } |
| |
| /* |
| * Interface functions to make flex use palloc() instead of malloc(). |
| * It'd be better to make these static, but flex insists otherwise. |
| */ |
| |
| void * |
| jsonpath_yyalloc(yy_size_t bytes) |
| { |
| return palloc(bytes); |
| } |
| |
| void * |
| jsonpath_yyrealloc(void *ptr, yy_size_t bytes) |
| { |
| if (ptr) |
| return repalloc(ptr, bytes); |
| else |
| return palloc(bytes); |
| } |
| |
| void |
| jsonpath_yyfree(void *ptr) |
| { |
| if (ptr) |
| pfree(ptr); |
| } |