| /* |
| * txtquery io |
| * Teodor Sigaev <teodor@stack.net> |
| * contrib/ltree/ltxtquery_io.c |
| */ |
| #include "postgres.h" |
| |
| #include <ctype.h> |
| |
| #include "crc32.h" |
| #include "libpq/pqformat.h" |
| #include "ltree.h" |
| #include "miscadmin.h" |
| |
| |
| /* parser's states */ |
| #define WAITOPERAND 1 |
| #define INOPERAND 2 |
| #define WAITOPERATOR 3 |
| |
| /* |
| * node of query tree, also used |
| * for storing polish notation in parser |
| */ |
| typedef struct NODE |
| { |
| int32 type; |
| int32 val; |
| int16 distance; |
| int16 length; |
| uint16 flag; |
| struct NODE *next; |
| } NODE; |
| |
| typedef struct |
| { |
| char *buf; |
| int32 state; |
| int32 count; |
| /* reverse polish notation in list (for temporary usage) */ |
| NODE *str; |
| /* number in str */ |
| int32 num; |
| |
| /* user-friendly operand */ |
| int32 lenop; |
| int32 sumlen; |
| char *op; |
| char *curop; |
| } QPRS_STATE; |
| |
| /* |
| * get token from query string |
| */ |
| static int32 |
| gettoken_query(QPRS_STATE *state, int32 *val, int32 *lenval, char **strval, uint16 *flag) |
| { |
| int charlen; |
| |
| for (;;) |
| { |
| charlen = pg_mblen(state->buf); |
| |
| switch (state->state) |
| { |
| case WAITOPERAND: |
| if (charlen == 1 && t_iseq(state->buf, '!')) |
| { |
| (state->buf)++; |
| *val = (int32) '!'; |
| return OPR; |
| } |
| else if (charlen == 1 && t_iseq(state->buf, '(')) |
| { |
| state->count++; |
| (state->buf)++; |
| return OPEN; |
| } |
| else if (ISALNUM(state->buf)) |
| { |
| state->state = INOPERAND; |
| *strval = state->buf; |
| *lenval = charlen; |
| *flag = 0; |
| } |
| else if (!t_isspace(state->buf)) |
| ereport(ERROR, |
| (errcode(ERRCODE_SYNTAX_ERROR), |
| errmsg("operand syntax error"))); |
| break; |
| case INOPERAND: |
| if (ISALNUM(state->buf)) |
| { |
| if (*flag) |
| ereport(ERROR, |
| (errcode(ERRCODE_SYNTAX_ERROR), |
| errmsg("modifiers syntax error"))); |
| *lenval += charlen; |
| } |
| else if (charlen == 1 && t_iseq(state->buf, '%')) |
| *flag |= LVAR_SUBLEXEME; |
| else if (charlen == 1 && t_iseq(state->buf, '@')) |
| *flag |= LVAR_INCASE; |
| else if (charlen == 1 && t_iseq(state->buf, '*')) |
| *flag |= LVAR_ANYEND; |
| else |
| { |
| state->state = WAITOPERATOR; |
| return VAL; |
| } |
| break; |
| case WAITOPERATOR: |
| if (charlen == 1 && (t_iseq(state->buf, '&') || t_iseq(state->buf, '|'))) |
| { |
| state->state = WAITOPERAND; |
| *val = (int32) *(state->buf); |
| (state->buf)++; |
| return OPR; |
| } |
| else if (charlen == 1 && t_iseq(state->buf, ')')) |
| { |
| (state->buf)++; |
| state->count--; |
| return (state->count < 0) ? ERR : CLOSE; |
| } |
| else if (*(state->buf) == '\0') |
| return (state->count) ? ERR : END; |
| else if (charlen == 1 && !t_iseq(state->buf, ' ')) |
| return ERR; |
| break; |
| default: |
| return ERR; |
| break; |
| } |
| |
| state->buf += charlen; |
| } |
| } |
| |
| /* |
| * push new one in polish notation reverse view |
| */ |
| static void |
| pushquery(QPRS_STATE *state, int32 type, int32 val, int32 distance, int32 lenval, uint16 flag) |
| { |
| NODE *tmp = (NODE *) palloc(sizeof(NODE)); |
| |
| tmp->type = type; |
| tmp->val = val; |
| tmp->flag = flag; |
| if (distance > 0xffff) |
| ereport(ERROR, |
| (errcode(ERRCODE_INVALID_PARAMETER_VALUE), |
| errmsg("value is too big"))); |
| if (lenval > 0xff) |
| ereport(ERROR, |
| (errcode(ERRCODE_INVALID_PARAMETER_VALUE), |
| errmsg("operand is too long"))); |
| tmp->distance = distance; |
| tmp->length = lenval; |
| tmp->next = state->str; |
| state->str = tmp; |
| state->num++; |
| } |
| |
| /* |
| * This function is used for query text parsing |
| */ |
| static void |
| pushval_asis(QPRS_STATE *state, int type, char *strval, int lenval, uint16 flag) |
| { |
| if (lenval > 0xffff) |
| ereport(ERROR, |
| (errcode(ERRCODE_INVALID_PARAMETER_VALUE), |
| errmsg("word is too long"))); |
| |
| pushquery(state, type, ltree_crc32_sz(strval, lenval), |
| state->curop - state->op, lenval, flag); |
| |
| while (state->curop - state->op + lenval + 1 >= state->lenop) |
| { |
| int32 tmp = state->curop - state->op; |
| |
| state->lenop *= 2; |
| state->op = (char *) repalloc((void *) state->op, state->lenop); |
| state->curop = state->op + tmp; |
| } |
| memcpy((void *) state->curop, (void *) strval, lenval); |
| state->curop += lenval; |
| *(state->curop) = '\0'; |
| state->curop++; |
| state->sumlen += lenval + 1; |
| } |
| |
| #define STACKDEPTH 32 |
| /* |
| * make polish notation of query |
| */ |
| static int32 |
| makepol(QPRS_STATE *state) |
| { |
| int32 val = 0, |
| type; |
| int32 lenval = 0; |
| char *strval = NULL; |
| int32 stack[STACKDEPTH]; |
| int32 lenstack = 0; |
| uint16 flag = 0; |
| |
| /* since this function recurses, it could be driven to stack overflow */ |
| check_stack_depth(); |
| |
| while ((type = gettoken_query(state, &val, &lenval, &strval, &flag)) != END) |
| { |
| switch (type) |
| { |
| case VAL: |
| pushval_asis(state, VAL, strval, lenval, flag); |
| while (lenstack && (stack[lenstack - 1] == (int32) '&' || |
| stack[lenstack - 1] == (int32) '!')) |
| { |
| lenstack--; |
| pushquery(state, OPR, stack[lenstack], 0, 0, 0); |
| } |
| break; |
| case OPR: |
| if (lenstack && val == (int32) '|') |
| pushquery(state, OPR, val, 0, 0, 0); |
| else |
| { |
| if (lenstack == STACKDEPTH) |
| /* internal error */ |
| elog(ERROR, "stack too short"); |
| stack[lenstack] = val; |
| lenstack++; |
| } |
| break; |
| case OPEN: |
| if (makepol(state) == ERR) |
| return ERR; |
| while (lenstack && (stack[lenstack - 1] == (int32) '&' || |
| stack[lenstack - 1] == (int32) '!')) |
| { |
| lenstack--; |
| pushquery(state, OPR, stack[lenstack], 0, 0, 0); |
| } |
| break; |
| case CLOSE: |
| while (lenstack) |
| { |
| lenstack--; |
| pushquery(state, OPR, stack[lenstack], 0, 0, 0); |
| }; |
| return END; |
| break; |
| case ERR: |
| default: |
| ereport(ERROR, |
| (errcode(ERRCODE_SYNTAX_ERROR), |
| errmsg("syntax error"))); |
| |
| return ERR; |
| |
| } |
| } |
| while (lenstack) |
| { |
| lenstack--; |
| pushquery(state, OPR, stack[lenstack], 0, 0, 0); |
| }; |
| return END; |
| } |
| |
| static void |
| findoprnd(ITEM *ptr, int32 *pos) |
| { |
| /* since this function recurses, it could be driven to stack overflow. */ |
| check_stack_depth(); |
| |
| if (ptr[*pos].type == VAL || ptr[*pos].type == VALTRUE) |
| { |
| ptr[*pos].left = 0; |
| (*pos)++; |
| } |
| else if (ptr[*pos].val == (int32) '!') |
| { |
| ptr[*pos].left = 1; |
| (*pos)++; |
| findoprnd(ptr, pos); |
| } |
| else |
| { |
| ITEM *curitem = &ptr[*pos]; |
| int32 tmp = *pos; |
| |
| (*pos)++; |
| findoprnd(ptr, pos); |
| curitem->left = *pos - tmp; |
| findoprnd(ptr, pos); |
| } |
| } |
| |
| |
| /* |
| * input |
| */ |
| static ltxtquery * |
| queryin(char *buf) |
| { |
| QPRS_STATE state; |
| int32 i; |
| ltxtquery *query; |
| int32 commonlen; |
| ITEM *ptr; |
| NODE *tmp; |
| int32 pos = 0; |
| |
| #ifdef BS_DEBUG |
| char pbuf[16384], |
| *cur; |
| #endif |
| |
| /* init state */ |
| state.buf = buf; |
| state.state = WAITOPERAND; |
| state.count = 0; |
| state.num = 0; |
| state.str = NULL; |
| |
| /* init list of operand */ |
| state.sumlen = 0; |
| state.lenop = 64; |
| state.curop = state.op = (char *) palloc(state.lenop); |
| *(state.curop) = '\0'; |
| |
| /* parse query & make polish notation (postfix, but in reverse order) */ |
| makepol(&state); |
| if (!state.num) |
| ereport(ERROR, |
| (errcode(ERRCODE_SYNTAX_ERROR), |
| errmsg("syntax error"), |
| errdetail("Empty query."))); |
| |
| if (LTXTQUERY_TOO_BIG(state.num, state.sumlen)) |
| ereport(ERROR, |
| (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), |
| errmsg("ltxtquery is too large"))); |
| commonlen = COMPUTESIZE(state.num, state.sumlen); |
| |
| query = (ltxtquery *) palloc0(commonlen); |
| SET_VARSIZE(query, commonlen); |
| query->size = state.num; |
| ptr = GETQUERY(query); |
| |
| /* set item in polish notation */ |
| for (i = 0; i < state.num; i++) |
| { |
| ptr[i].type = state.str->type; |
| ptr[i].val = state.str->val; |
| ptr[i].distance = state.str->distance; |
| ptr[i].length = state.str->length; |
| ptr[i].flag = state.str->flag; |
| tmp = state.str->next; |
| pfree(state.str); |
| state.str = tmp; |
| } |
| |
| /* set user-friendly operand view */ |
| memcpy((void *) GETOPERAND(query), (void *) state.op, state.sumlen); |
| pfree(state.op); |
| |
| /* set left operand's position for every operator */ |
| pos = 0; |
| findoprnd(ptr, &pos); |
| |
| return query; |
| } |
| |
| /* |
| * in without morphology |
| */ |
| PG_FUNCTION_INFO_V1(ltxtq_in); |
| Datum |
| ltxtq_in(PG_FUNCTION_ARGS) |
| { |
| PG_RETURN_POINTER(queryin((char *) PG_GETARG_POINTER(0))); |
| } |
| |
| /* |
| * ltxtquery type recv function |
| * |
| * The type is sent as text in binary mode, so this is almost the same |
| * as the input function, but it's prefixed with a version number so we |
| * can change the binary format sent in future if necessary. For now, |
| * only version 1 is supported. |
| */ |
| PG_FUNCTION_INFO_V1(ltxtq_recv); |
| Datum |
| ltxtq_recv(PG_FUNCTION_ARGS) |
| { |
| StringInfo buf = (StringInfo) PG_GETARG_POINTER(0); |
| int version = pq_getmsgint(buf, 1); |
| char *str; |
| int nbytes; |
| ltxtquery *res; |
| |
| if (version != 1) |
| elog(ERROR, "unsupported ltxtquery version number %d", version); |
| |
| str = pq_getmsgtext(buf, buf->len - buf->cursor, &nbytes); |
| res = queryin(str); |
| pfree(str); |
| |
| PG_RETURN_POINTER(res); |
| } |
| |
| /* |
| * out function |
| */ |
| typedef struct |
| { |
| ITEM *curpol; |
| char *buf; |
| char *cur; |
| char *op; |
| int32 buflen; |
| } INFIX; |
| |
| #define RESIZEBUF(inf,addsize) \ |
| while( ( (inf)->cur - (inf)->buf ) + (addsize) + 1 >= (inf)->buflen ) \ |
| { \ |
| int32 len = (inf)->cur - (inf)->buf; \ |
| (inf)->buflen *= 2; \ |
| (inf)->buf = (char*) repalloc( (void*)(inf)->buf, (inf)->buflen ); \ |
| (inf)->cur = (inf)->buf + len; \ |
| } |
| |
| /* |
| * recursive walk on tree and print it in |
| * infix (human-readable) view |
| */ |
| static void |
| infix(INFIX *in, bool first) |
| { |
| /* since this function recurses, it could be driven to stack overflow. */ |
| check_stack_depth(); |
| |
| if (in->curpol->type == VAL) |
| { |
| char *op = in->op + in->curpol->distance; |
| |
| RESIZEBUF(in, in->curpol->length * 2 + 5); |
| while (*op) |
| { |
| *(in->cur) = *op; |
| op++; |
| in->cur++; |
| } |
| if (in->curpol->flag & LVAR_SUBLEXEME) |
| { |
| *(in->cur) = '%'; |
| in->cur++; |
| } |
| if (in->curpol->flag & LVAR_INCASE) |
| { |
| *(in->cur) = '@'; |
| in->cur++; |
| } |
| if (in->curpol->flag & LVAR_ANYEND) |
| { |
| *(in->cur) = '*'; |
| in->cur++; |
| } |
| *(in->cur) = '\0'; |
| in->curpol++; |
| } |
| else if (in->curpol->val == (int32) '!') |
| { |
| bool isopr = false; |
| |
| RESIZEBUF(in, 1); |
| *(in->cur) = '!'; |
| in->cur++; |
| *(in->cur) = '\0'; |
| in->curpol++; |
| if (in->curpol->type == OPR) |
| { |
| isopr = true; |
| RESIZEBUF(in, 2); |
| sprintf(in->cur, "( "); |
| in->cur = strchr(in->cur, '\0'); |
| } |
| infix(in, isopr); |
| if (isopr) |
| { |
| RESIZEBUF(in, 2); |
| sprintf(in->cur, " )"); |
| in->cur = strchr(in->cur, '\0'); |
| } |
| } |
| else |
| { |
| int32 op = in->curpol->val; |
| INFIX nrm; |
| |
| in->curpol++; |
| if (op == (int32) '|' && !first) |
| { |
| RESIZEBUF(in, 2); |
| sprintf(in->cur, "( "); |
| in->cur = strchr(in->cur, '\0'); |
| } |
| |
| nrm.curpol = in->curpol; |
| nrm.op = in->op; |
| nrm.buflen = 16; |
| nrm.cur = nrm.buf = (char *) palloc(sizeof(char) * nrm.buflen); |
| |
| /* get right operand */ |
| infix(&nrm, false); |
| |
| /* get & print left operand */ |
| in->curpol = nrm.curpol; |
| infix(in, false); |
| |
| /* print operator & right operand */ |
| RESIZEBUF(in, 3 + (nrm.cur - nrm.buf)); |
| sprintf(in->cur, " %c %s", op, nrm.buf); |
| in->cur = strchr(in->cur, '\0'); |
| pfree(nrm.buf); |
| |
| if (op == (int32) '|' && !first) |
| { |
| RESIZEBUF(in, 2); |
| sprintf(in->cur, " )"); |
| in->cur = strchr(in->cur, '\0'); |
| } |
| } |
| } |
| |
| PG_FUNCTION_INFO_V1(ltxtq_out); |
| Datum |
| ltxtq_out(PG_FUNCTION_ARGS) |
| { |
| ltxtquery *query = PG_GETARG_LTXTQUERY_P(0); |
| INFIX nrm; |
| |
| if (query->size == 0) |
| ereport(ERROR, |
| (errcode(ERRCODE_SYNTAX_ERROR), |
| errmsg("syntax error"), |
| errdetail("Empty query."))); |
| |
| nrm.curpol = GETQUERY(query); |
| nrm.buflen = 32; |
| nrm.cur = nrm.buf = (char *) palloc(sizeof(char) * nrm.buflen); |
| *(nrm.cur) = '\0'; |
| nrm.op = GETOPERAND(query); |
| infix(&nrm, true); |
| |
| PG_RETURN_POINTER(nrm.buf); |
| } |
| |
| /* |
| * ltxtquery type send function |
| * |
| * The type is sent as text in binary mode, so this is almost the same |
| * as the output function, but it's prefixed with a version number so we |
| * can change the binary format sent in future if necessary. For now, |
| * only version 1 is supported. |
| */ |
| PG_FUNCTION_INFO_V1(ltxtq_send); |
| Datum |
| ltxtq_send(PG_FUNCTION_ARGS) |
| { |
| ltxtquery *query = PG_GETARG_LTXTQUERY_P(0); |
| StringInfoData buf; |
| int version = 1; |
| INFIX nrm; |
| |
| if (query->size == 0) |
| ereport(ERROR, |
| (errcode(ERRCODE_SYNTAX_ERROR), |
| errmsg("syntax error"), |
| errdetail("Empty query."))); |
| |
| nrm.curpol = GETQUERY(query); |
| nrm.buflen = 32; |
| nrm.cur = nrm.buf = (char *) palloc(sizeof(char) * nrm.buflen); |
| *(nrm.cur) = '\0'; |
| nrm.op = GETOPERAND(query); |
| infix(&nrm, true); |
| |
| pq_begintypsend(&buf); |
| pq_sendint8(&buf, version); |
| pq_sendtext(&buf, nrm.buf, strlen(nrm.buf)); |
| pfree(nrm.buf); |
| |
| PG_RETURN_BYTEA_P(pq_endtypsend(&buf)); |
| } |