| /*------------------------------------------------------------------------- |
| * |
| * regis.c |
| * Fast regex subset |
| * |
| * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group |
| * |
| * |
| * IDENTIFICATION |
| * src/backend/tsearch/regis.c |
| * |
| *------------------------------------------------------------------------- |
| */ |
| |
| #include "postgres.h" |
| |
| #include "tsearch/dicts/regis.h" |
| #include "tsearch/ts_locale.h" |
| |
| #define RS_IN_ONEOF 1 |
| #define RS_IN_ONEOF_IN 2 |
| #define RS_IN_NONEOF 3 |
| #define RS_IN_WAIT 4 |
| |
| |
| /* |
| * Test whether a regex is of the subset supported here. |
| * Keep this in sync with RS_compile! |
| */ |
| bool |
| RS_isRegis(const char *str) |
| { |
| int state = RS_IN_WAIT; |
| const char *c = str; |
| |
| while (*c) |
| { |
| if (state == RS_IN_WAIT) |
| { |
| if (t_isalpha(c)) |
| /* okay */ ; |
| else if (t_iseq(c, '[')) |
| state = RS_IN_ONEOF; |
| else |
| return false; |
| } |
| else if (state == RS_IN_ONEOF) |
| { |
| if (t_iseq(c, '^')) |
| state = RS_IN_NONEOF; |
| else if (t_isalpha(c)) |
| state = RS_IN_ONEOF_IN; |
| else |
| return false; |
| } |
| else if (state == RS_IN_ONEOF_IN || state == RS_IN_NONEOF) |
| { |
| if (t_isalpha(c)) |
| /* okay */ ; |
| else if (t_iseq(c, ']')) |
| state = RS_IN_WAIT; |
| else |
| return false; |
| } |
| else |
| elog(ERROR, "internal error in RS_isRegis: state %d", state); |
| c += pg_mblen(c); |
| } |
| |
| return (state == RS_IN_WAIT); |
| } |
| |
| static RegisNode * |
| newRegisNode(RegisNode *prev, int len) |
| { |
| RegisNode *ptr; |
| |
| ptr = (RegisNode *) palloc0(RNHDRSZ + len + 1); |
| if (prev) |
| prev->next = ptr; |
| return ptr; |
| } |
| |
| void |
| RS_compile(Regis *r, bool issuffix, const char *str) |
| { |
| int len = strlen(str); |
| int state = RS_IN_WAIT; |
| const char *c = str; |
| RegisNode *ptr = NULL; |
| |
| memset(r, 0, sizeof(Regis)); |
| r->issuffix = (issuffix) ? 1 : 0; |
| |
| while (*c) |
| { |
| if (state == RS_IN_WAIT) |
| { |
| if (t_isalpha(c)) |
| { |
| if (ptr) |
| ptr = newRegisNode(ptr, len); |
| else |
| ptr = r->node = newRegisNode(NULL, len); |
| COPYCHAR(ptr->data, c); |
| ptr->type = RSF_ONEOF; |
| ptr->len = pg_mblen(c); |
| } |
| else if (t_iseq(c, '[')) |
| { |
| if (ptr) |
| ptr = newRegisNode(ptr, len); |
| else |
| ptr = r->node = newRegisNode(NULL, len); |
| ptr->type = RSF_ONEOF; |
| state = RS_IN_ONEOF; |
| } |
| else /* shouldn't get here */ |
| elog(ERROR, "invalid regis pattern: \"%s\"", str); |
| } |
| else if (state == RS_IN_ONEOF) |
| { |
| if (t_iseq(c, '^')) |
| { |
| ptr->type = RSF_NONEOF; |
| state = RS_IN_NONEOF; |
| } |
| else if (t_isalpha(c)) |
| { |
| COPYCHAR(ptr->data, c); |
| ptr->len = pg_mblen(c); |
| state = RS_IN_ONEOF_IN; |
| } |
| else /* shouldn't get here */ |
| elog(ERROR, "invalid regis pattern: \"%s\"", str); |
| } |
| else if (state == RS_IN_ONEOF_IN || state == RS_IN_NONEOF) |
| { |
| if (t_isalpha(c)) |
| { |
| COPYCHAR(ptr->data + ptr->len, c); |
| ptr->len += pg_mblen(c); |
| } |
| else if (t_iseq(c, ']')) |
| state = RS_IN_WAIT; |
| else /* shouldn't get here */ |
| elog(ERROR, "invalid regis pattern: \"%s\"", str); |
| } |
| else |
| elog(ERROR, "internal error in RS_compile: state %d", state); |
| c += pg_mblen(c); |
| } |
| |
| if (state != RS_IN_WAIT) /* shouldn't get here */ |
| elog(ERROR, "invalid regis pattern: \"%s\"", str); |
| |
| ptr = r->node; |
| while (ptr) |
| { |
| r->nchar++; |
| ptr = ptr->next; |
| } |
| } |
| |
| void |
| RS_free(Regis *r) |
| { |
| RegisNode *ptr = r->node, |
| *tmp; |
| |
| while (ptr) |
| { |
| tmp = ptr->next; |
| pfree(ptr); |
| ptr = tmp; |
| } |
| |
| r->node = NULL; |
| } |
| |
| static bool |
| mb_strchr(char *str, char *c) |
| { |
| int clen, |
| plen, |
| i; |
| char *ptr = str; |
| bool res = false; |
| |
| clen = pg_mblen(c); |
| while (*ptr && !res) |
| { |
| plen = pg_mblen(ptr); |
| if (plen == clen) |
| { |
| i = plen; |
| res = true; |
| while (i--) |
| if (*(ptr + i) != *(c + i)) |
| { |
| res = false; |
| break; |
| } |
| } |
| |
| ptr += plen; |
| } |
| |
| return res; |
| } |
| |
| bool |
| RS_execute(Regis *r, char *str) |
| { |
| RegisNode *ptr = r->node; |
| char *c = str; |
| int len = 0; |
| |
| while (*c) |
| { |
| len++; |
| c += pg_mblen(c); |
| } |
| |
| if (len < r->nchar) |
| return 0; |
| |
| c = str; |
| if (r->issuffix) |
| { |
| len -= r->nchar; |
| while (len-- > 0) |
| c += pg_mblen(c); |
| } |
| |
| |
| while (ptr) |
| { |
| switch (ptr->type) |
| { |
| case RSF_ONEOF: |
| if (!mb_strchr((char *) ptr->data, c)) |
| return false; |
| break; |
| case RSF_NONEOF: |
| if (mb_strchr((char *) ptr->data, c)) |
| return false; |
| break; |
| default: |
| elog(ERROR, "unrecognized regis node type: %d", ptr->type); |
| } |
| ptr = ptr->next; |
| c += pg_mblen(c); |
| } |
| |
| return true; |
| } |