| /*------------------------------------------------------------------------- |
| * |
| * wparser.c |
| * Standard interface to word parser |
| * |
| * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group |
| * |
| * |
| * IDENTIFICATION |
| * src/backend/tsearch/wparser.c |
| * |
| *------------------------------------------------------------------------- |
| */ |
| #include "postgres.h" |
| |
| #include "catalog/namespace.h" |
| #include "catalog/pg_type.h" |
| #include "commands/defrem.h" |
| #include "common/jsonapi.h" |
| #include "funcapi.h" |
| #include "tsearch/ts_cache.h" |
| #include "tsearch/ts_utils.h" |
| #include "utils/builtins.h" |
| #include "utils/jsonfuncs.h" |
| #include "utils/varlena.h" |
| |
| /******sql-level interface******/ |
| |
| typedef struct |
| { |
| int cur; |
| LexDescr *list; |
| } TSTokenTypeStorage; |
| |
| /* state for ts_headline_json_* */ |
| typedef struct HeadlineJsonState |
| { |
| HeadlineParsedText *prs; |
| TSConfigCacheEntry *cfg; |
| TSParserCacheEntry *prsobj; |
| TSQuery query; |
| List *prsoptions; |
| bool transformed; |
| } HeadlineJsonState; |
| |
| static text *headline_json_value(void *_state, char *elem_value, int elem_len); |
| |
| static void |
| tt_setup_firstcall(FuncCallContext *funcctx, Oid prsid) |
| { |
| TupleDesc tupdesc; |
| MemoryContext oldcontext; |
| TSTokenTypeStorage *st; |
| TSParserCacheEntry *prs = lookup_ts_parser_cache(prsid); |
| |
| if (!OidIsValid(prs->lextypeOid)) |
| elog(ERROR, "method lextype isn't defined for text search parser %u", |
| prsid); |
| |
| oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx); |
| |
| st = (TSTokenTypeStorage *) palloc(sizeof(TSTokenTypeStorage)); |
| st->cur = 0; |
| /* lextype takes one dummy argument */ |
| st->list = (LexDescr *) DatumGetPointer(OidFunctionCall1(prs->lextypeOid, |
| (Datum) 0)); |
| funcctx->user_fctx = (void *) st; |
| |
| tupdesc = CreateTemplateTupleDesc(3); |
| TupleDescInitEntry(tupdesc, (AttrNumber) 1, "tokid", |
| INT4OID, -1, 0); |
| TupleDescInitEntry(tupdesc, (AttrNumber) 2, "alias", |
| TEXTOID, -1, 0); |
| TupleDescInitEntry(tupdesc, (AttrNumber) 3, "description", |
| TEXTOID, -1, 0); |
| |
| funcctx->attinmeta = TupleDescGetAttInMetadata(tupdesc); |
| MemoryContextSwitchTo(oldcontext); |
| } |
| |
| static Datum |
| tt_process_call(FuncCallContext *funcctx) |
| { |
| TSTokenTypeStorage *st; |
| |
| st = (TSTokenTypeStorage *) funcctx->user_fctx; |
| if (st->list && st->list[st->cur].lexid) |
| { |
| Datum result; |
| char *values[3]; |
| char txtid[16]; |
| HeapTuple tuple; |
| |
| sprintf(txtid, "%d", st->list[st->cur].lexid); |
| values[0] = txtid; |
| values[1] = st->list[st->cur].alias; |
| values[2] = st->list[st->cur].descr; |
| |
| tuple = BuildTupleFromCStrings(funcctx->attinmeta, values); |
| result = HeapTupleGetDatum(tuple); |
| |
| pfree(values[1]); |
| pfree(values[2]); |
| st->cur++; |
| return result; |
| } |
| return (Datum) 0; |
| } |
| |
| Datum |
| ts_token_type_byid(PG_FUNCTION_ARGS) |
| { |
| FuncCallContext *funcctx; |
| Datum result; |
| |
| if (SRF_IS_FIRSTCALL()) |
| { |
| funcctx = SRF_FIRSTCALL_INIT(); |
| tt_setup_firstcall(funcctx, PG_GETARG_OID(0)); |
| } |
| |
| funcctx = SRF_PERCALL_SETUP(); |
| |
| if ((result = tt_process_call(funcctx)) != (Datum) 0) |
| SRF_RETURN_NEXT(funcctx, result); |
| SRF_RETURN_DONE(funcctx); |
| } |
| |
| Datum |
| ts_token_type_byname(PG_FUNCTION_ARGS) |
| { |
| FuncCallContext *funcctx; |
| Datum result; |
| |
| if (SRF_IS_FIRSTCALL()) |
| { |
| text *prsname = PG_GETARG_TEXT_PP(0); |
| Oid prsId; |
| |
| funcctx = SRF_FIRSTCALL_INIT(); |
| prsId = get_ts_parser_oid(textToQualifiedNameList(prsname), false); |
| tt_setup_firstcall(funcctx, prsId); |
| } |
| |
| funcctx = SRF_PERCALL_SETUP(); |
| |
| if ((result = tt_process_call(funcctx)) != (Datum) 0) |
| SRF_RETURN_NEXT(funcctx, result); |
| SRF_RETURN_DONE(funcctx); |
| } |
| |
| typedef struct |
| { |
| int type; |
| char *lexeme; |
| } LexemeEntry; |
| |
| typedef struct |
| { |
| int cur; |
| int len; |
| LexemeEntry *list; |
| } PrsStorage; |
| |
| |
| static void |
| prs_setup_firstcall(FuncCallContext *funcctx, Oid prsid, text *txt) |
| { |
| TupleDesc tupdesc; |
| MemoryContext oldcontext; |
| PrsStorage *st; |
| TSParserCacheEntry *prs = lookup_ts_parser_cache(prsid); |
| char *lex = NULL; |
| int llen = 0, |
| type = 0; |
| void *prsdata; |
| |
| oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx); |
| |
| st = (PrsStorage *) palloc(sizeof(PrsStorage)); |
| st->cur = 0; |
| st->len = 16; |
| st->list = (LexemeEntry *) palloc(sizeof(LexemeEntry) * st->len); |
| |
| prsdata = (void *) DatumGetPointer(FunctionCall2(&prs->prsstart, |
| PointerGetDatum(VARDATA_ANY(txt)), |
| Int32GetDatum(VARSIZE_ANY_EXHDR(txt)))); |
| |
| while ((type = DatumGetInt32(FunctionCall3(&prs->prstoken, |
| PointerGetDatum(prsdata), |
| PointerGetDatum(&lex), |
| PointerGetDatum(&llen)))) != 0) |
| { |
| if (st->cur >= st->len) |
| { |
| st->len = 2 * st->len; |
| st->list = (LexemeEntry *) repalloc(st->list, sizeof(LexemeEntry) * st->len); |
| } |
| st->list[st->cur].lexeme = palloc(llen + 1); |
| memcpy(st->list[st->cur].lexeme, lex, llen); |
| st->list[st->cur].lexeme[llen] = '\0'; |
| st->list[st->cur].type = type; |
| st->cur++; |
| } |
| |
| FunctionCall1(&prs->prsend, PointerGetDatum(prsdata)); |
| |
| st->len = st->cur; |
| st->cur = 0; |
| |
| funcctx->user_fctx = (void *) st; |
| tupdesc = CreateTemplateTupleDesc(2); |
| TupleDescInitEntry(tupdesc, (AttrNumber) 1, "tokid", |
| INT4OID, -1, 0); |
| TupleDescInitEntry(tupdesc, (AttrNumber) 2, "token", |
| TEXTOID, -1, 0); |
| |
| funcctx->attinmeta = TupleDescGetAttInMetadata(tupdesc); |
| MemoryContextSwitchTo(oldcontext); |
| } |
| |
| static Datum |
| prs_process_call(FuncCallContext *funcctx) |
| { |
| PrsStorage *st; |
| |
| st = (PrsStorage *) funcctx->user_fctx; |
| if (st->cur < st->len) |
| { |
| Datum result; |
| char *values[2]; |
| char tid[16]; |
| HeapTuple tuple; |
| |
| values[0] = tid; |
| sprintf(tid, "%d", st->list[st->cur].type); |
| values[1] = st->list[st->cur].lexeme; |
| tuple = BuildTupleFromCStrings(funcctx->attinmeta, values); |
| result = HeapTupleGetDatum(tuple); |
| |
| pfree(values[1]); |
| st->cur++; |
| return result; |
| } |
| return (Datum) 0; |
| } |
| |
| Datum |
| ts_parse_byid(PG_FUNCTION_ARGS) |
| { |
| FuncCallContext *funcctx; |
| Datum result; |
| |
| if (SRF_IS_FIRSTCALL()) |
| { |
| text *txt = PG_GETARG_TEXT_PP(1); |
| |
| funcctx = SRF_FIRSTCALL_INIT(); |
| prs_setup_firstcall(funcctx, PG_GETARG_OID(0), txt); |
| PG_FREE_IF_COPY(txt, 1); |
| } |
| |
| funcctx = SRF_PERCALL_SETUP(); |
| |
| if ((result = prs_process_call(funcctx)) != (Datum) 0) |
| SRF_RETURN_NEXT(funcctx, result); |
| SRF_RETURN_DONE(funcctx); |
| } |
| |
| Datum |
| ts_parse_byname(PG_FUNCTION_ARGS) |
| { |
| FuncCallContext *funcctx; |
| Datum result; |
| |
| if (SRF_IS_FIRSTCALL()) |
| { |
| text *prsname = PG_GETARG_TEXT_PP(0); |
| text *txt = PG_GETARG_TEXT_PP(1); |
| Oid prsId; |
| |
| funcctx = SRF_FIRSTCALL_INIT(); |
| prsId = get_ts_parser_oid(textToQualifiedNameList(prsname), false); |
| prs_setup_firstcall(funcctx, prsId, txt); |
| } |
| |
| funcctx = SRF_PERCALL_SETUP(); |
| |
| if ((result = prs_process_call(funcctx)) != (Datum) 0) |
| SRF_RETURN_NEXT(funcctx, result); |
| SRF_RETURN_DONE(funcctx); |
| } |
| |
| Datum |
| ts_headline_byid_opt(PG_FUNCTION_ARGS) |
| { |
| Oid tsconfig = PG_GETARG_OID(0); |
| text *in = PG_GETARG_TEXT_PP(1); |
| TSQuery query = PG_GETARG_TSQUERY(2); |
| text *opt = (PG_NARGS() > 3 && PG_GETARG_POINTER(3)) ? PG_GETARG_TEXT_PP(3) : NULL; |
| HeadlineParsedText prs; |
| List *prsoptions; |
| text *out; |
| TSConfigCacheEntry *cfg; |
| TSParserCacheEntry *prsobj; |
| |
| cfg = lookup_ts_config_cache(tsconfig); |
| prsobj = lookup_ts_parser_cache(cfg->prsId); |
| |
| if (!OidIsValid(prsobj->headlineOid)) |
| ereport(ERROR, |
| (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), |
| errmsg("text search parser does not support headline creation"))); |
| |
| memset(&prs, 0, sizeof(HeadlineParsedText)); |
| prs.lenwords = 32; |
| prs.words = (HeadlineWordEntry *) palloc(sizeof(HeadlineWordEntry) * prs.lenwords); |
| |
| hlparsetext(cfg->cfgId, &prs, query, |
| VARDATA_ANY(in), VARSIZE_ANY_EXHDR(in)); |
| |
| if (opt) |
| prsoptions = deserialize_deflist(PointerGetDatum(opt)); |
| else |
| prsoptions = NIL; |
| |
| FunctionCall3(&(prsobj->prsheadline), |
| PointerGetDatum(&prs), |
| PointerGetDatum(prsoptions), |
| PointerGetDatum(query)); |
| |
| out = generateHeadline(&prs); |
| |
| PG_FREE_IF_COPY(in, 1); |
| PG_FREE_IF_COPY(query, 2); |
| if (opt) |
| PG_FREE_IF_COPY(opt, 3); |
| pfree(prs.words); |
| pfree(prs.startsel); |
| pfree(prs.stopsel); |
| |
| PG_RETURN_POINTER(out); |
| } |
| |
| Datum |
| ts_headline_byid(PG_FUNCTION_ARGS) |
| { |
| PG_RETURN_DATUM(DirectFunctionCall3(ts_headline_byid_opt, |
| PG_GETARG_DATUM(0), |
| PG_GETARG_DATUM(1), |
| PG_GETARG_DATUM(2))); |
| } |
| |
| Datum |
| ts_headline(PG_FUNCTION_ARGS) |
| { |
| PG_RETURN_DATUM(DirectFunctionCall3(ts_headline_byid_opt, |
| ObjectIdGetDatum(getTSCurrentConfig(true)), |
| PG_GETARG_DATUM(0), |
| PG_GETARG_DATUM(1))); |
| } |
| |
| Datum |
| ts_headline_opt(PG_FUNCTION_ARGS) |
| { |
| PG_RETURN_DATUM(DirectFunctionCall4(ts_headline_byid_opt, |
| ObjectIdGetDatum(getTSCurrentConfig(true)), |
| PG_GETARG_DATUM(0), |
| PG_GETARG_DATUM(1), |
| PG_GETARG_DATUM(2))); |
| } |
| |
| Datum |
| ts_headline_jsonb_byid_opt(PG_FUNCTION_ARGS) |
| { |
| Oid tsconfig = PG_GETARG_OID(0); |
| Jsonb *jb = PG_GETARG_JSONB_P(1); |
| TSQuery query = PG_GETARG_TSQUERY(2); |
| text *opt = (PG_NARGS() > 3 && PG_GETARG_POINTER(3)) ? PG_GETARG_TEXT_P(3) : NULL; |
| Jsonb *out; |
| JsonTransformStringValuesAction action = (JsonTransformStringValuesAction) headline_json_value; |
| HeadlineParsedText prs; |
| HeadlineJsonState *state = palloc0(sizeof(HeadlineJsonState)); |
| |
| memset(&prs, 0, sizeof(HeadlineParsedText)); |
| prs.lenwords = 32; |
| prs.words = (HeadlineWordEntry *) palloc(sizeof(HeadlineWordEntry) * prs.lenwords); |
| |
| state->prs = &prs; |
| state->cfg = lookup_ts_config_cache(tsconfig); |
| state->prsobj = lookup_ts_parser_cache(state->cfg->prsId); |
| state->query = query; |
| if (opt) |
| state->prsoptions = deserialize_deflist(PointerGetDatum(opt)); |
| else |
| state->prsoptions = NIL; |
| |
| if (!OidIsValid(state->prsobj->headlineOid)) |
| ereport(ERROR, |
| (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), |
| errmsg("text search parser does not support headline creation"))); |
| |
| out = transform_jsonb_string_values(jb, state, action); |
| |
| PG_FREE_IF_COPY(jb, 1); |
| PG_FREE_IF_COPY(query, 2); |
| if (opt) |
| PG_FREE_IF_COPY(opt, 3); |
| |
| pfree(prs.words); |
| |
| if (state->transformed) |
| { |
| pfree(prs.startsel); |
| pfree(prs.stopsel); |
| } |
| |
| PG_RETURN_JSONB_P(out); |
| } |
| |
| Datum |
| ts_headline_jsonb(PG_FUNCTION_ARGS) |
| { |
| PG_RETURN_DATUM(DirectFunctionCall3(ts_headline_jsonb_byid_opt, |
| ObjectIdGetDatum(getTSCurrentConfig(true)), |
| PG_GETARG_DATUM(0), |
| PG_GETARG_DATUM(1))); |
| } |
| |
| Datum |
| ts_headline_jsonb_byid(PG_FUNCTION_ARGS) |
| { |
| PG_RETURN_DATUM(DirectFunctionCall3(ts_headline_jsonb_byid_opt, |
| PG_GETARG_DATUM(0), |
| PG_GETARG_DATUM(1), |
| PG_GETARG_DATUM(2))); |
| } |
| |
| Datum |
| ts_headline_jsonb_opt(PG_FUNCTION_ARGS) |
| { |
| PG_RETURN_DATUM(DirectFunctionCall4(ts_headline_jsonb_byid_opt, |
| ObjectIdGetDatum(getTSCurrentConfig(true)), |
| PG_GETARG_DATUM(0), |
| PG_GETARG_DATUM(1), |
| PG_GETARG_DATUM(2))); |
| } |
| |
| Datum |
| ts_headline_json_byid_opt(PG_FUNCTION_ARGS) |
| { |
| Oid tsconfig = PG_GETARG_OID(0); |
| text *json = PG_GETARG_TEXT_P(1); |
| TSQuery query = PG_GETARG_TSQUERY(2); |
| text *opt = (PG_NARGS() > 3 && PG_GETARG_POINTER(3)) ? PG_GETARG_TEXT_P(3) : NULL; |
| text *out; |
| JsonTransformStringValuesAction action = (JsonTransformStringValuesAction) headline_json_value; |
| |
| HeadlineParsedText prs; |
| HeadlineJsonState *state = palloc0(sizeof(HeadlineJsonState)); |
| |
| memset(&prs, 0, sizeof(HeadlineParsedText)); |
| prs.lenwords = 32; |
| prs.words = (HeadlineWordEntry *) palloc(sizeof(HeadlineWordEntry) * prs.lenwords); |
| |
| state->prs = &prs; |
| state->cfg = lookup_ts_config_cache(tsconfig); |
| state->prsobj = lookup_ts_parser_cache(state->cfg->prsId); |
| state->query = query; |
| if (opt) |
| state->prsoptions = deserialize_deflist(PointerGetDatum(opt)); |
| else |
| state->prsoptions = NIL; |
| |
| if (!OidIsValid(state->prsobj->headlineOid)) |
| ereport(ERROR, |
| (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), |
| errmsg("text search parser does not support headline creation"))); |
| |
| out = transform_json_string_values(json, state, action); |
| |
| PG_FREE_IF_COPY(json, 1); |
| PG_FREE_IF_COPY(query, 2); |
| if (opt) |
| PG_FREE_IF_COPY(opt, 3); |
| pfree(prs.words); |
| |
| if (state->transformed) |
| { |
| pfree(prs.startsel); |
| pfree(prs.stopsel); |
| } |
| |
| PG_RETURN_TEXT_P(out); |
| } |
| |
| Datum |
| ts_headline_json(PG_FUNCTION_ARGS) |
| { |
| PG_RETURN_DATUM(DirectFunctionCall3(ts_headline_json_byid_opt, |
| ObjectIdGetDatum(getTSCurrentConfig(true)), |
| PG_GETARG_DATUM(0), |
| PG_GETARG_DATUM(1))); |
| } |
| |
| Datum |
| ts_headline_json_byid(PG_FUNCTION_ARGS) |
| { |
| PG_RETURN_DATUM(DirectFunctionCall3(ts_headline_json_byid_opt, |
| PG_GETARG_DATUM(0), |
| PG_GETARG_DATUM(1), |
| PG_GETARG_DATUM(2))); |
| } |
| |
| Datum |
| ts_headline_json_opt(PG_FUNCTION_ARGS) |
| { |
| PG_RETURN_DATUM(DirectFunctionCall4(ts_headline_json_byid_opt, |
| ObjectIdGetDatum(getTSCurrentConfig(true)), |
| PG_GETARG_DATUM(0), |
| PG_GETARG_DATUM(1), |
| PG_GETARG_DATUM(2))); |
| } |
| |
| |
| /* |
| * Return headline in text from, generated from a json(b) element |
| */ |
| static text * |
| headline_json_value(void *_state, char *elem_value, int elem_len) |
| { |
| HeadlineJsonState *state = (HeadlineJsonState *) _state; |
| |
| HeadlineParsedText *prs = state->prs; |
| TSConfigCacheEntry *cfg = state->cfg; |
| TSParserCacheEntry *prsobj = state->prsobj; |
| TSQuery query = state->query; |
| List *prsoptions = state->prsoptions; |
| |
| prs->curwords = 0; |
| hlparsetext(cfg->cfgId, prs, query, elem_value, elem_len); |
| FunctionCall3(&(prsobj->prsheadline), |
| PointerGetDatum(prs), |
| PointerGetDatum(prsoptions), |
| PointerGetDatum(query)); |
| |
| state->transformed = true; |
| return generateHeadline(prs); |
| } |