src/backend/tsearch/dict_thesaurus.c - cloudberry - Git at Google

 /*-------------------------------------------------------------------------
  *
  * dict_thesaurus.c
  *		Thesaurus dictionary: phrase to phrase substitution
  *
  * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
  *
  *
  * IDENTIFICATION
  *	  src/backend/tsearch/dict_thesaurus.c
  *
  *-------------------------------------------------------------------------
  */
 #include "postgres.h"

 #include "catalog/namespace.h"
 #include "commands/defrem.h"
 #include "tsearch/ts_cache.h"
 #include "tsearch/ts_locale.h"
 #include "tsearch/ts_utils.h"
 #include "utils/builtins.h"
 #include "utils/regproc.h"


 /*
  * Temporary we use TSLexeme.flags for inner use...
  */
 #define DT_USEASIS		0x1000

 typedef struct LexemeInfo
 {
 	uint32		idsubst;		/* entry's number in DictThesaurus->subst */
 	uint16		posinsubst;		/* pos info in entry */
 	uint16		tnvariant;		/* total num lexemes in one variant */
 	struct LexemeInfo *nextentry;
 	struct LexemeInfo *nextvariant;
 } LexemeInfo;

 typedef struct
 {
 	char	   *lexeme;
 	LexemeInfo *entries;
 } TheLexeme;

 typedef struct
 {
 	uint16		lastlexeme;		/* number lexemes to substitute */
 	uint16		reslen;
 	TSLexeme   *res;			/* prepared substituted result */
 } TheSubstitute;

 typedef struct
 {
 	/* subdictionary to normalize lexemes */
 	Oid			subdictOid;
 	TSDictionaryCacheEntry *subdict;

 	/* Array to search lexeme by exact match */
 	TheLexeme  *wrds;
 	int			nwrds;			/* current number of words */
 	int			ntwrds;			/* allocated array length */

 	/*
 	 * Storage of substituted result, n-th element is for n-th expression
 	 */
 	TheSubstitute *subst;
 	int			nsubst;
 } DictThesaurus;


 static void
 newLexeme(DictThesaurus *d, char *b, char *e, uint32 idsubst, uint16 posinsubst)
 {
 	TheLexeme  *ptr;

 	if (d->nwrds >= d->ntwrds)
 	{
 		if (d->ntwrds == 0)
 		{
 			d->ntwrds = 16;
 			d->wrds = (TheLexeme *) palloc(sizeof(TheLexeme) * d->ntwrds);
 		}
 		else
 		{
 			d->ntwrds *= 2;
 			d->wrds = (TheLexeme *) repalloc(d->wrds, sizeof(TheLexeme) * d->ntwrds);
 		}
 	}

 	ptr = d->wrds + d->nwrds;
 	d->nwrds++;

 	ptr->lexeme = palloc(e - b + 1);

 	memcpy(ptr->lexeme, b, e - b);
 	ptr->lexeme[e - b] = '\0';

 	ptr->entries = (LexemeInfo *) palloc(sizeof(LexemeInfo));

 	ptr->entries->nextentry = NULL;
 	ptr->entries->idsubst = idsubst;
 	ptr->entries->posinsubst = posinsubst;
 }

 static void
 addWrd(DictThesaurus *d, char *b, char *e, uint32 idsubst, uint16 nwrd, uint16 posinsubst, bool useasis)
 {
 	static int	nres = 0;
 	static int	ntres = 0;
 	TheSubstitute *ptr;

 	if (nwrd == 0)
 	{
 		nres = ntres = 0;

 		if (idsubst >= d->nsubst)
 		{
 			if (d->nsubst == 0)
 			{
 				d->nsubst = 16;
 				d->subst = (TheSubstitute *) palloc(sizeof(TheSubstitute) * d->nsubst);
 			}
 			else
 			{
 				d->nsubst *= 2;
 				d->subst = (TheSubstitute *) repalloc(d->subst, sizeof(TheSubstitute) * d->nsubst);
 			}
 		}
 	}

 	ptr = d->subst + idsubst;

 	ptr->lastlexeme = posinsubst - 1;

 	if (nres + 1 >= ntres)
 	{
 		if (ntres == 0)
 		{
 			ntres = 2;
 			ptr->res = (TSLexeme *) palloc(sizeof(TSLexeme) * ntres);
 		}
 		else
 		{
 			ntres *= 2;
 			ptr->res = (TSLexeme *) repalloc(ptr->res, sizeof(TSLexeme) * ntres);
 		}
 	}

 	ptr->res[nres].lexeme = palloc(e - b + 1);
 	memcpy(ptr->res[nres].lexeme, b, e - b);
 	ptr->res[nres].lexeme[e - b] = '\0';

 	ptr->res[nres].nvariant = nwrd;
 	if (useasis)
 		ptr->res[nres].flags = DT_USEASIS;
 	else
 		ptr->res[nres].flags = 0;

 	ptr->res[++nres].lexeme = NULL;
 }

 #define TR_WAITLEX	1
 #define TR_INLEX	2
 #define TR_WAITSUBS 3
 #define TR_INSUBS	4

 static void
 thesaurusRead(const char *filename, DictThesaurus *d)
 {
 	tsearch_readline_state trst;
 	uint32		idsubst = 0;
 	bool		useasis = false;
 	char	   *line;

 	filename = get_tsearch_config_filename(filename, "ths");
 	if (!tsearch_readline_begin(&trst, filename))
 		ereport(ERROR,
 				(errcode(ERRCODE_CONFIG_FILE_ERROR),
 				 errmsg("could not open thesaurus file \"%s\": %m",
 						filename)));

 	while ((line = tsearch_readline(&trst)) != NULL)
 	{
 		char	   *ptr;
 		int			state = TR_WAITLEX;
 		char	   *beginwrd = NULL;
 		uint32		posinsubst = 0;
 		uint32		nwrd = 0;

 		ptr = line;

 		/* is it a comment? */
 		while (*ptr && t_isspace(ptr))
 			ptr += pg_mblen(ptr);

 		if (t_iseq(ptr, '#') || *ptr == '\0' ||
 			t_iseq(ptr, '\n') || t_iseq(ptr, '\r'))
 		{
 			pfree(line);
 			continue;
 		}

 		while (*ptr)
 		{
 			if (state == TR_WAITLEX)
 			{
 				if (t_iseq(ptr, ':'))
 				{
 					if (posinsubst == 0)
 						ereport(ERROR,
 								(errcode(ERRCODE_CONFIG_FILE_ERROR),
 								 errmsg("unexpected delimiter")));
 					state = TR_WAITSUBS;
 				}
 				else if (!t_isspace(ptr))
 				{
 					beginwrd = ptr;
 					state = TR_INLEX;
 				}
 			}
 			else if (state == TR_INLEX)
 			{
 				if (t_iseq(ptr, ':'))
 				{
 					newLexeme(d, beginwrd, ptr, idsubst, posinsubst++);
 					state = TR_WAITSUBS;
 				}
 				else if (t_isspace(ptr))
 				{
 					newLexeme(d, beginwrd, ptr, idsubst, posinsubst++);
 					state = TR_WAITLEX;
 				}
 			}
 			else if (state == TR_WAITSUBS)
 			{
 				if (t_iseq(ptr, '*'))
 				{
 					useasis = true;
 					state = TR_INSUBS;
 					beginwrd = ptr + pg_mblen(ptr);
 				}
 				else if (t_iseq(ptr, '\\'))
 				{
 					useasis = false;
 					state = TR_INSUBS;
 					beginwrd = ptr + pg_mblen(ptr);
 				}
 				else if (!t_isspace(ptr))
 				{
 					useasis = false;
 					beginwrd = ptr;
 					state = TR_INSUBS;
 				}
 			}
 			else if (state == TR_INSUBS)
 			{
 				if (t_isspace(ptr))
 				{
 					if (ptr == beginwrd)
 						ereport(ERROR,
 								(errcode(ERRCODE_CONFIG_FILE_ERROR),
 								 errmsg("unexpected end of line or lexeme")));
 					addWrd(d, beginwrd, ptr, idsubst, nwrd++, posinsubst, useasis);
 					state = TR_WAITSUBS;
 				}
 			}
 			else
 				elog(ERROR, "unrecognized thesaurus state: %d", state);

 			ptr += pg_mblen(ptr);
 		}

 		if (state == TR_INSUBS)
 		{
 			if (ptr == beginwrd)
 				ereport(ERROR,
 						(errcode(ERRCODE_CONFIG_FILE_ERROR),
 						 errmsg("unexpected end of line or lexeme")));
 			addWrd(d, beginwrd, ptr, idsubst, nwrd++, posinsubst, useasis);
 		}

 		idsubst++;

 		if (!(nwrd && posinsubst))
 			ereport(ERROR,
 					(errcode(ERRCODE_CONFIG_FILE_ERROR),
 					 errmsg("unexpected end of line")));

 		if (nwrd != (uint16) nwrd || posinsubst != (uint16) posinsubst)
 			ereport(ERROR,
 					(errcode(ERRCODE_CONFIG_FILE_ERROR),
 					 errmsg("too many lexemes in thesaurus entry")));

 		pfree(line);
 	}

 	d->nsubst = idsubst;

 	tsearch_readline_end(&trst);
 }

 static TheLexeme *
 addCompiledLexeme(TheLexeme *newwrds, int *nnw, int *tnm, TSLexeme *lexeme, LexemeInfo *src, uint16 tnvariant)
 {
 	if (*nnw >= *tnm)
 	{
 		*tnm *= 2;
 		newwrds = (TheLexeme *) repalloc(newwrds, sizeof(TheLexeme) * *tnm);
 	}

 	newwrds[*nnw].entries = (LexemeInfo *) palloc(sizeof(LexemeInfo));

 	if (lexeme && lexeme->lexeme)
 	{
 		newwrds[*nnw].lexeme = pstrdup(lexeme->lexeme);
 		newwrds[*nnw].entries->tnvariant = tnvariant;
 	}
 	else
 	{
 		newwrds[*nnw].lexeme = NULL;
 		newwrds[*nnw].entries->tnvariant = 1;
 	}

 	newwrds[*nnw].entries->idsubst = src->idsubst;
 	newwrds[*nnw].entries->posinsubst = src->posinsubst;

 	newwrds[*nnw].entries->nextentry = NULL;

 	(*nnw)++;
 	return newwrds;
 }

 static int
 cmpLexemeInfo(LexemeInfo *a, LexemeInfo *b)
 {
 	if (a == NULL || b == NULL)
 		return 0;

 	if (a->idsubst == b->idsubst)
 	{
 		if (a->posinsubst == b->posinsubst)
 		{
 			if (a->tnvariant == b->tnvariant)
 				return 0;

 			return (a->tnvariant > b->tnvariant) ? 1 : -1;
 		}

 		return (a->posinsubst > b->posinsubst) ? 1 : -1;
 	}

 	return (a->idsubst > b->idsubst) ? 1 : -1;
 }

 static int
 cmpLexeme(const TheLexeme *a, const TheLexeme *b)
 {
 	if (a->lexeme == NULL)
 	{
 		if (b->lexeme == NULL)
 			return 0;
 		else
 			return 1;
 	}
 	else if (b->lexeme == NULL)
 		return -1;

 	return strcmp(a->lexeme, b->lexeme);
 }

 static int
 cmpLexemeQ(const void *a, const void *b)
 {
 	return cmpLexeme((const TheLexeme *) a, (const TheLexeme *) b);
 }

 static int
 cmpTheLexeme(const void *a, const void *b)
 {
 	const TheLexeme *la = (const TheLexeme *) a;
 	const TheLexeme *lb = (const TheLexeme *) b;
 	int			res;

 	if ((res = cmpLexeme(la, lb)) != 0)
 		return res;

 	return -cmpLexemeInfo(la->entries, lb->entries);
 }

 static void
 compileTheLexeme(DictThesaurus *d)
 {
 	int			i,
 				nnw = 0,
 				tnm = 16;
 	TheLexeme  *newwrds = (TheLexeme *) palloc(sizeof(TheLexeme) * tnm),
 			   *ptrwrds;

 	for (i = 0; i < d->nwrds; i++)
 	{
 		TSLexeme   *ptr;

 		if (strcmp(d->wrds[i].lexeme, "?") == 0)	/* Is stop word marker? */
 			newwrds = addCompiledLexeme(newwrds, &nnw, &tnm, NULL, d->wrds[i].entries, 0);
 		else
 		{
 			ptr = (TSLexeme *) DatumGetPointer(FunctionCall4(&(d->subdict->lexize),
 															 PointerGetDatum(d->subdict->dictData),
 															 PointerGetDatum(d->wrds[i].lexeme),
 															 Int32GetDatum(strlen(d->wrds[i].lexeme)),
 															 PointerGetDatum(NULL)));

 			if (!ptr)
 				ereport(ERROR,
 						(errcode(ERRCODE_CONFIG_FILE_ERROR),
 						 errmsg("thesaurus sample word \"%s\" isn't recognized by subdictionary (rule %d)",
 								d->wrds[i].lexeme,
 								d->wrds[i].entries->idsubst + 1)));
 			else if (!(ptr->lexeme))
 				ereport(ERROR,
 						(errcode(ERRCODE_CONFIG_FILE_ERROR),
 						 errmsg("thesaurus sample word \"%s\" is a stop word (rule %d)",
 								d->wrds[i].lexeme,
 								d->wrds[i].entries->idsubst + 1),
 						 errhint("Use \"?\" to represent a stop word within a sample phrase.")));
 			else
 			{
 				while (ptr->lexeme)
 				{
 					TSLexeme   *remptr = ptr + 1;
 					int			tnvar = 1;
 					int			curvar = ptr->nvariant;

 					/* compute n words in one variant */
 					while (remptr->lexeme)
 					{
 						if (remptr->nvariant != (remptr - 1)->nvariant)
 							break;
 						tnvar++;
 						remptr++;
 					}

 					remptr = ptr;
 					while (remptr->lexeme && remptr->nvariant == curvar)
 					{
 						newwrds = addCompiledLexeme(newwrds, &nnw, &tnm, remptr, d->wrds[i].entries, tnvar);
 						remptr++;
 					}

 					ptr = remptr;
 				}
 			}
 		}

 		pfree(d->wrds[i].lexeme);
 		pfree(d->wrds[i].entries);
 	}

 	if (d->wrds)
 		pfree(d->wrds);
 	d->wrds = newwrds;
 	d->nwrds = nnw;
 	d->ntwrds = tnm;

 	if (d->nwrds > 1)
 	{
 		qsort(d->wrds, d->nwrds, sizeof(TheLexeme), cmpTheLexeme);

 		/* uniq */
 		newwrds = d->wrds;
 		ptrwrds = d->wrds + 1;
 		while (ptrwrds - d->wrds < d->nwrds)
 		{
 			if (cmpLexeme(ptrwrds, newwrds) == 0)
 			{
 				if (cmpLexemeInfo(ptrwrds->entries, newwrds->entries))
 				{
 					ptrwrds->entries->nextentry = newwrds->entries;
 					newwrds->entries = ptrwrds->entries;
 				}
 				else
 					pfree(ptrwrds->entries);

 				if (ptrwrds->lexeme)
 					pfree(ptrwrds->lexeme);
 			}
 			else
 			{
 				newwrds++;
 				*newwrds = *ptrwrds;
 			}

 			ptrwrds++;
 		}

 		d->nwrds = newwrds - d->wrds + 1;
 		d->wrds = (TheLexeme *) repalloc(d->wrds, sizeof(TheLexeme) * d->nwrds);
 	}
 }

 static void
 compileTheSubstitute(DictThesaurus *d)
 {
 	int			i;

 	for (i = 0; i < d->nsubst; i++)
 	{
 		TSLexeme   *rem = d->subst[i].res,
 				   *outptr,
 				   *inptr;
 		int			n = 2;

 		outptr = d->subst[i].res = (TSLexeme *) palloc(sizeof(TSLexeme) * n);
 		outptr->lexeme = NULL;
 		inptr = rem;

 		while (inptr && inptr->lexeme)
 		{
 			TSLexeme   *lexized,
 						tmplex[2];

 			if (inptr->flags & DT_USEASIS)
 			{					/* do not lexize */
 				tmplex[0] = *inptr;
 				tmplex[0].flags = 0;
 				tmplex[1].lexeme = NULL;
 				lexized = tmplex;
 			}
 			else
 			{
 				lexized = (TSLexeme *) DatumGetPointer(FunctionCall4(&(d->subdict->lexize),
 																	 PointerGetDatum(d->subdict->dictData),
 																	 PointerGetDatum(inptr->lexeme),
 																	 Int32GetDatum(strlen(inptr->lexeme)),
 																	 PointerGetDatum(NULL)));
 			}

 			if (lexized && lexized->lexeme)
 			{
 				int			toset = (lexized->lexeme && outptr != d->subst[i].res) ? (outptr - d->subst[i].res) : -1;

 				while (lexized->lexeme)
 				{
 					if (outptr - d->subst[i].res + 1 >= n)
 					{
 						int			diff = outptr - d->subst[i].res;

 						n *= 2;
 						d->subst[i].res = (TSLexeme *) repalloc(d->subst[i].res, sizeof(TSLexeme) * n);
 						outptr = d->subst[i].res + diff;
 					}

 					*outptr = *lexized;
 					outptr->lexeme = pstrdup(lexized->lexeme);

 					outptr++;
 					lexized++;
 				}

 				if (toset > 0)
 					d->subst[i].res[toset].flags |= TSL_ADDPOS;
 			}
 			else if (lexized)
 			{
 				ereport(ERROR,
 						(errcode(ERRCODE_CONFIG_FILE_ERROR),
 						 errmsg("thesaurus substitute word \"%s\" is a stop word (rule %d)",
 								inptr->lexeme, i + 1)));
 			}
 			else
 			{
 				ereport(ERROR,
 						(errcode(ERRCODE_CONFIG_FILE_ERROR),
 						 errmsg("thesaurus substitute word \"%s\" isn't recognized by subdictionary (rule %d)",
 								inptr->lexeme, i + 1)));
 			}

 			if (inptr->lexeme)
 				pfree(inptr->lexeme);
 			inptr++;
 		}

 		if (outptr == d->subst[i].res)
 			ereport(ERROR,
 					(errcode(ERRCODE_CONFIG_FILE_ERROR),
 					 errmsg("thesaurus substitute phrase is empty (rule %d)",
 							i + 1)));

 		d->subst[i].reslen = outptr - d->subst[i].res;

 		pfree(rem);
 	}
 }

 Datum
 thesaurus_init(PG_FUNCTION_ARGS)
 {
 	List	   *dictoptions = (List *) PG_GETARG_POINTER(0);
 	DictThesaurus *d;
 	char	   *subdictname = NULL;
 	bool		fileloaded = false;
 	List	   *namelist;
 	ListCell   *l;

 	d = (DictThesaurus *) palloc0(sizeof(DictThesaurus));

 	foreach(l, dictoptions)
 	{
 		DefElem    *defel = (DefElem *) lfirst(l);

 		if (strcmp(defel->defname, "dictfile") == 0)
 		{
 			if (fileloaded)
 				ereport(ERROR,
 						(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
 						 errmsg("multiple DictFile parameters")));
 			thesaurusRead(defGetString(defel), d);
 			fileloaded = true;
 		}
 		else if (strcmp(defel->defname, "dictionary") == 0)
 		{
 			if (subdictname)
 				ereport(ERROR,
 						(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
 						 errmsg("multiple Dictionary parameters")));
 			subdictname = pstrdup(defGetString(defel));
 		}
 		else
 		{
 			ereport(ERROR,
 					(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
 					 errmsg("unrecognized Thesaurus parameter: \"%s\"",
 							defel->defname)));
 		}
 	}

 	if (!fileloaded)
 		ereport(ERROR,
 				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
 				 errmsg("missing DictFile parameter")));
 	if (!subdictname)
 		ereport(ERROR,
 				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
 				 errmsg("missing Dictionary parameter")));

 	namelist = stringToQualifiedNameList(subdictname, NULL);
 	d->subdictOid = get_ts_dict_oid(namelist, false);
 	d->subdict = lookup_ts_dictionary_cache(d->subdictOid);

 	compileTheLexeme(d);
 	compileTheSubstitute(d);

 	PG_RETURN_POINTER(d);
 }

 static LexemeInfo *
 findTheLexeme(DictThesaurus *d, char *lexeme)
 {
 	TheLexeme	key,
 			   *res;

 	if (d->nwrds == 0)
 		return NULL;

 	key.lexeme = lexeme;
 	key.entries = NULL;

 	res = bsearch(&key, d->wrds, d->nwrds, sizeof(TheLexeme), cmpLexemeQ);

 	if (res == NULL)
 		return NULL;
 	return res->entries;
 }

 static bool
 matchIdSubst(LexemeInfo *stored, uint32 idsubst)
 {
 	bool		res = true;

 	if (stored)
 	{
 		res = false;

 		for (; stored; stored = stored->nextvariant)
 			if (stored->idsubst == idsubst)
 			{
 				res = true;
 				break;
 			}
 	}

 	return res;
 }

 static LexemeInfo *
 findVariant(LexemeInfo *in, LexemeInfo *stored, uint16 curpos, LexemeInfo **newin, int newn)
 {
 	for (;;)
 	{
 		int			i;
 		LexemeInfo *ptr = newin[0];

 		for (i = 0; i < newn; i++)
 		{
 			while (newin[i] && newin[i]->idsubst < ptr->idsubst)
 				newin[i] = newin[i]->nextentry;

 			if (newin[i] == NULL)
 				return in;

 			if (newin[i]->idsubst > ptr->idsubst)
 			{
 				ptr = newin[i];
 				i = -1;
 				continue;
 			}

 			while (newin[i]->idsubst == ptr->idsubst)
 			{
 				if (newin[i]->posinsubst == curpos && newin[i]->tnvariant == newn)
 				{
 					ptr = newin[i];
 					break;
 				}

 				newin[i] = newin[i]->nextentry;
 				if (newin[i] == NULL)
 					return in;
 			}

 			if (newin[i]->idsubst != ptr->idsubst)
 			{
 				ptr = newin[i];
 				i = -1;
 				continue;
 			}
 		}

 		if (i == newn && matchIdSubst(stored, ptr->idsubst) && (in == NULL || !matchIdSubst(in, ptr->idsubst)))
 		{						/* found */

 			ptr->nextvariant = in;
 			in = ptr;
 		}

 		/* step forward */
 		for (i = 0; i < newn; i++)
 			newin[i] = newin[i]->nextentry;
 	}
 }

 static TSLexeme *
 copyTSLexeme(TheSubstitute *ts)
 {
 	TSLexeme   *res;
 	uint16		i;

 	res = (TSLexeme *) palloc(sizeof(TSLexeme) * (ts->reslen + 1));
 	for (i = 0; i < ts->reslen; i++)
 	{
 		res[i] = ts->res[i];
 		res[i].lexeme = pstrdup(ts->res[i].lexeme);
 	}

 	res[ts->reslen].lexeme = NULL;

 	return res;
 }

 static TSLexeme *
 checkMatch(DictThesaurus *d, LexemeInfo *info, uint16 curpos, bool *moreres)
 {
 	*moreres = false;
 	while (info)
 	{
 		Assert(info->idsubst < d->nsubst);
 		if (info->nextvariant)
 			*moreres = true;
 		if (d->subst[info->idsubst].lastlexeme == curpos)
 			return copyTSLexeme(d->subst + info->idsubst);
 		info = info->nextvariant;
 	}

 	return NULL;
 }

 Datum
 thesaurus_lexize(PG_FUNCTION_ARGS)
 {
 	DictThesaurus *d = (DictThesaurus *) PG_GETARG_POINTER(0);
 	DictSubState *dstate = (DictSubState *) PG_GETARG_POINTER(3);
 	TSLexeme   *res = NULL;
 	LexemeInfo *stored,
 			   *info = NULL;
 	uint16		curpos = 0;
 	bool		moreres = false;

 	if (PG_NARGS() != 4 || dstate == NULL)
 		elog(ERROR, "forbidden call of thesaurus or nested call");

 	if (dstate->isend)
 		PG_RETURN_POINTER(NULL);
 	stored = (LexemeInfo *) dstate->private_state;

 	if (stored)
 		curpos = stored->posinsubst + 1;

 	if (!d->subdict->isvalid)
 		d->subdict = lookup_ts_dictionary_cache(d->subdictOid);

 	res = (TSLexeme *) DatumGetPointer(FunctionCall4(&(d->subdict->lexize),
 													 PointerGetDatum(d->subdict->dictData),
 													 PG_GETARG_DATUM(1),
 													 PG_GETARG_DATUM(2),
 													 PointerGetDatum(NULL)));

 	if (res && res->lexeme)
 	{
 		TSLexeme   *ptr = res,
 				   *basevar;

 		while (ptr->lexeme)
 		{
 			uint16		nv = ptr->nvariant;
 			uint16		i,
 						nlex = 0;
 			LexemeInfo **infos;

 			basevar = ptr;
 			while (ptr->lexeme && nv == ptr->nvariant)
 			{
 				nlex++;
 				ptr++;
 			}

 			infos = (LexemeInfo **) palloc(sizeof(LexemeInfo *) * nlex);
 			for (i = 0; i < nlex; i++)
 				if ((infos[i] = findTheLexeme(d, basevar[i].lexeme)) == NULL)
 					break;

 			if (i < nlex)
 			{
 				/* no chance to find */
 				pfree(infos);
 				continue;
 			}

 			info = findVariant(info, stored, curpos, infos, nlex);
 		}
 	}
 	else if (res)
 	{							/* stop-word */
 		LexemeInfo *infos = findTheLexeme(d, NULL);

 		info = findVariant(NULL, stored, curpos, &infos, 1);
 	}
 	else
 	{
 		info = NULL;			/* word isn't recognized */
 	}

 	dstate->private_state = (void *) info;

 	if (!info)
 	{
 		dstate->getnext = false;
 		PG_RETURN_POINTER(NULL);
 	}

 	if ((res = checkMatch(d, info, curpos, &moreres)) != NULL)
 	{
 		dstate->getnext = moreres;
 		PG_RETURN_POINTER(res);
 	}

 	dstate->getnext = true;

 	PG_RETURN_POINTER(NULL);
 }
	/*-------------------------------------------------------------------------
	*
	* dict_thesaurus.c
	* Thesaurus dictionary: phrase to phrase substitution
	*
	* Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
	*
	*
	* IDENTIFICATION
	* src/backend/tsearch/dict_thesaurus.c
	*
	*-------------------------------------------------------------------------
	*/
	#include "postgres.h"

	#include "catalog/namespace.h"
	#include "commands/defrem.h"
	#include "tsearch/ts_cache.h"
	#include "tsearch/ts_locale.h"
	#include "tsearch/ts_utils.h"
	#include "utils/builtins.h"
	#include "utils/regproc.h"


	/*
	* Temporary we use TSLexeme.flags for inner use...
	*/
	#define DT_USEASIS 0x1000

	typedef struct LexemeInfo
	{
	uint32 idsubst; /* entry's number in DictThesaurus->subst */
	uint16 posinsubst; /* pos info in entry */
	uint16 tnvariant; /* total num lexemes in one variant */
	struct LexemeInfo *nextentry;
	struct LexemeInfo *nextvariant;
	} LexemeInfo;

	typedef struct
	{
	char *lexeme;
	LexemeInfo *entries;
	} TheLexeme;

	typedef struct
	{
	uint16 lastlexeme; /* number lexemes to substitute */
	uint16 reslen;
	TSLexeme res; / prepared substituted result */
	} TheSubstitute;

	typedef struct
	{
	/* subdictionary to normalize lexemes */
	Oid subdictOid;
	TSDictionaryCacheEntry *subdict;

	/* Array to search lexeme by exact match */
	TheLexeme *wrds;
	int nwrds; /* current number of words */
	int ntwrds; /* allocated array length */

	/*
	* Storage of substituted result, n-th element is for n-th expression
	*/
	TheSubstitute *subst;
	int nsubst;
	} DictThesaurus;


	static void
	newLexeme(DictThesaurus d, char b, char *e, uint32 idsubst, uint16 posinsubst)
	{
	TheLexeme *ptr;

	if (d->nwrds >= d->ntwrds)
	{
	if (d->ntwrds == 0)
	{
	d->ntwrds = 16;
	d->wrds = (TheLexeme ) palloc(sizeof(TheLexeme) d->ntwrds);
	}
	else
	{
	d->ntwrds *= 2;
	d->wrds = (TheLexeme ) repalloc(d->wrds, sizeof(TheLexeme) d->ntwrds);
	}
	}

	ptr = d->wrds + d->nwrds;
	d->nwrds++;

	ptr->lexeme = palloc(e - b + 1);

	memcpy(ptr->lexeme, b, e - b);
	ptr->lexeme[e - b] = '\0';

	ptr->entries = (LexemeInfo *) palloc(sizeof(LexemeInfo));

	ptr->entries->nextentry = NULL;
	ptr->entries->idsubst = idsubst;
	ptr->entries->posinsubst = posinsubst;
	}

	static void
	addWrd(DictThesaurus d, char b, char *e, uint32 idsubst, uint16 nwrd, uint16 posinsubst, bool useasis)
	{
	static int nres = 0;
	static int ntres = 0;
	TheSubstitute *ptr;

	if (nwrd == 0)
	{
	nres = ntres = 0;

	if (idsubst >= d->nsubst)
	{
	if (d->nsubst == 0)
	{
	d->nsubst = 16;
	d->subst = (TheSubstitute ) palloc(sizeof(TheSubstitute) d->nsubst);
	}
	else
	{
	d->nsubst *= 2;
	d->subst = (TheSubstitute ) repalloc(d->subst, sizeof(TheSubstitute) d->nsubst);
	}
	}
	}

	ptr = d->subst + idsubst;

	ptr->lastlexeme = posinsubst - 1;

	if (nres + 1 >= ntres)
	{
	if (ntres == 0)
	{
	ntres = 2;
	ptr->res = (TSLexeme ) palloc(sizeof(TSLexeme) ntres);
	}
	else
	{
	ntres *= 2;
	ptr->res = (TSLexeme ) repalloc(ptr->res, sizeof(TSLexeme) ntres);
	}
	}

	ptr->res[nres].lexeme = palloc(e - b + 1);
	memcpy(ptr->res[nres].lexeme, b, e - b);
	ptr->res[nres].lexeme[e - b] = '\0';

	ptr->res[nres].nvariant = nwrd;
	if (useasis)
	ptr->res[nres].flags = DT_USEASIS;
	else
	ptr->res[nres].flags = 0;

	ptr->res[++nres].lexeme = NULL;
	}

	#define TR_WAITLEX 1
	#define TR_INLEX 2
	#define TR_WAITSUBS 3
	#define TR_INSUBS 4

	static void
	thesaurusRead(const char filename, DictThesaurus d)
	{
	tsearch_readline_state trst;
	uint32 idsubst = 0;
	bool useasis = false;
	char *line;

	filename = get_tsearch_config_filename(filename, "ths");
	if (!tsearch_readline_begin(&trst, filename))
	ereport(ERROR,
	(errcode(ERRCODE_CONFIG_FILE_ERROR),
	errmsg("could not open thesaurus file \"%s\": %m",
	filename)));

	while ((line = tsearch_readline(&trst)) != NULL)
	{
	char *ptr;
	int state = TR_WAITLEX;
	char *beginwrd = NULL;
	uint32 posinsubst = 0;
	uint32 nwrd = 0;

	ptr = line;

	/* is it a comment? */
	while (*ptr && t_isspace(ptr))
	ptr += pg_mblen(ptr);

	if (t_iseq(ptr, '#') \|\| *ptr == '\0' \|\|
	t_iseq(ptr, '\n') \|\| t_iseq(ptr, '\r'))
	{
	pfree(line);
	continue;
	}

	while (*ptr)
	{
	if (state == TR_WAITLEX)
	{
	if (t_iseq(ptr, ':'))
	{
	if (posinsubst == 0)
	ereport(ERROR,
	(errcode(ERRCODE_CONFIG_FILE_ERROR),
	errmsg("unexpected delimiter")));
	state = TR_WAITSUBS;
	}
	else if (!t_isspace(ptr))
	{
	beginwrd = ptr;
	state = TR_INLEX;
	}
	}
	else if (state == TR_INLEX)
	{
	if (t_iseq(ptr, ':'))
	{
	newLexeme(d, beginwrd, ptr, idsubst, posinsubst++);
	state = TR_WAITSUBS;
	}
	else if (t_isspace(ptr))
	{
	newLexeme(d, beginwrd, ptr, idsubst, posinsubst++);
	state = TR_WAITLEX;
	}
	}
	else if (state == TR_WAITSUBS)
	{
	if (t_iseq(ptr, '*'))
	{
	useasis = true;
	state = TR_INSUBS;
	beginwrd = ptr + pg_mblen(ptr);
	}
	else if (t_iseq(ptr, '\\'))
	{
	useasis = false;
	state = TR_INSUBS;
	beginwrd = ptr + pg_mblen(ptr);
	}
	else if (!t_isspace(ptr))
	{
	useasis = false;
	beginwrd = ptr;
	state = TR_INSUBS;
	}
	}
	else if (state == TR_INSUBS)
	{
	if (t_isspace(ptr))
	{
	if (ptr == beginwrd)
	ereport(ERROR,
	(errcode(ERRCODE_CONFIG_FILE_ERROR),
	errmsg("unexpected end of line or lexeme")));
	addWrd(d, beginwrd, ptr, idsubst, nwrd++, posinsubst, useasis);
	state = TR_WAITSUBS;
	}
	}
	else
	elog(ERROR, "unrecognized thesaurus state: %d", state);

	ptr += pg_mblen(ptr);
	}

	if (state == TR_INSUBS)
	{
	if (ptr == beginwrd)
	ereport(ERROR,
	(errcode(ERRCODE_CONFIG_FILE_ERROR),
	errmsg("unexpected end of line or lexeme")));
	addWrd(d, beginwrd, ptr, idsubst, nwrd++, posinsubst, useasis);
	}

	idsubst++;

	if (!(nwrd && posinsubst))
	ereport(ERROR,
	(errcode(ERRCODE_CONFIG_FILE_ERROR),
	errmsg("unexpected end of line")));

	if (nwrd != (uint16) nwrd \|\| posinsubst != (uint16) posinsubst)
	ereport(ERROR,
	(errcode(ERRCODE_CONFIG_FILE_ERROR),
	errmsg("too many lexemes in thesaurus entry")));

	pfree(line);
	}

	d->nsubst = idsubst;

	tsearch_readline_end(&trst);
	}

	static TheLexeme *
	addCompiledLexeme(TheLexeme newwrds, int nnw, int tnm, TSLexeme lexeme, LexemeInfo *src, uint16 tnvariant)
	{
	if (nnw >= tnm)
	{
	tnm = 2;
	newwrds = (TheLexeme ) repalloc(newwrds, sizeof(TheLexeme) *tnm);
	}

	newwrds[nnw].entries = (LexemeInfo ) palloc(sizeof(LexemeInfo));

	if (lexeme && lexeme->lexeme)
	{
	newwrds[*nnw].lexeme = pstrdup(lexeme->lexeme);
	newwrds[*nnw].entries->tnvariant = tnvariant;
	}
	else
	{
	newwrds[*nnw].lexeme = NULL;
	newwrds[*nnw].entries->tnvariant = 1;
	}

	newwrds[*nnw].entries->idsubst = src->idsubst;
	newwrds[*nnw].entries->posinsubst = src->posinsubst;

	newwrds[*nnw].entries->nextentry = NULL;

	(*nnw)++;
	return newwrds;
	}

	static int
	cmpLexemeInfo(LexemeInfo a, LexemeInfo b)
	{
	if (a == NULL \|\| b == NULL)
	return 0;

	if (a->idsubst == b->idsubst)
	{
	if (a->posinsubst == b->posinsubst)
	{
	if (a->tnvariant == b->tnvariant)
	return 0;

	return (a->tnvariant > b->tnvariant) ? 1 : -1;
	}

	return (a->posinsubst > b->posinsubst) ? 1 : -1;
	}

	return (a->idsubst > b->idsubst) ? 1 : -1;
	}

	static int
	cmpLexeme(const TheLexeme a, const TheLexeme b)
	{
	if (a->lexeme == NULL)
	{
	if (b->lexeme == NULL)
	return 0;
	else
	return 1;
	}
	else if (b->lexeme == NULL)
	return -1;

	return strcmp(a->lexeme, b->lexeme);
	}

	static int
	cmpLexemeQ(const void a, const void b)
	{
	return cmpLexeme((const TheLexeme ) a, (const TheLexeme ) b);
	}

	static int
	cmpTheLexeme(const void a, const void b)
	{
	const TheLexeme la = (const TheLexeme ) a;
	const TheLexeme lb = (const TheLexeme ) b;
	int res;

	if ((res = cmpLexeme(la, lb)) != 0)
	return res;

	return -cmpLexemeInfo(la->entries, lb->entries);
	}

	static void
	compileTheLexeme(DictThesaurus *d)
	{
	int i,
	nnw = 0,
	tnm = 16;
	TheLexeme newwrds = (TheLexeme ) palloc(sizeof(TheLexeme) * tnm),
	*ptrwrds;

	for (i = 0; i < d->nwrds; i++)
	{
	TSLexeme *ptr;

	if (strcmp(d->wrds[i].lexeme, "?") == 0) /* Is stop word marker? */
	newwrds = addCompiledLexeme(newwrds, &nnw, &tnm, NULL, d->wrds[i].entries, 0);
	else
	{
	ptr = (TSLexeme *) DatumGetPointer(FunctionCall4(&(d->subdict->lexize),
	PointerGetDatum(d->subdict->dictData),
	PointerGetDatum(d->wrds[i].lexeme),
	Int32GetDatum(strlen(d->wrds[i].lexeme)),
	PointerGetDatum(NULL)));

	if (!ptr)
	ereport(ERROR,
	(errcode(ERRCODE_CONFIG_FILE_ERROR),
	errmsg("thesaurus sample word \"%s\" isn't recognized by subdictionary (rule %d)",
	d->wrds[i].lexeme,
	d->wrds[i].entries->idsubst + 1)));
	else if (!(ptr->lexeme))
	ereport(ERROR,
	(errcode(ERRCODE_CONFIG_FILE_ERROR),
	errmsg("thesaurus sample word \"%s\" is a stop word (rule %d)",
	d->wrds[i].lexeme,
	d->wrds[i].entries->idsubst + 1),
	errhint("Use \"?\" to represent a stop word within a sample phrase.")));
	else
	{
	while (ptr->lexeme)
	{
	TSLexeme *remptr = ptr + 1;
	int tnvar = 1;
	int curvar = ptr->nvariant;

	/* compute n words in one variant */
	while (remptr->lexeme)
	{
	if (remptr->nvariant != (remptr - 1)->nvariant)
	break;
	tnvar++;
	remptr++;
	}

	remptr = ptr;
	while (remptr->lexeme && remptr->nvariant == curvar)
	{
	newwrds = addCompiledLexeme(newwrds, &nnw, &tnm, remptr, d->wrds[i].entries, tnvar);
	remptr++;
	}

	ptr = remptr;
	}
	}
	}

	pfree(d->wrds[i].lexeme);
	pfree(d->wrds[i].entries);
	}

	if (d->wrds)
	pfree(d->wrds);
	d->wrds = newwrds;
	d->nwrds = nnw;
	d->ntwrds = tnm;

	if (d->nwrds > 1)
	{
	qsort(d->wrds, d->nwrds, sizeof(TheLexeme), cmpTheLexeme);

	/* uniq */
	newwrds = d->wrds;
	ptrwrds = d->wrds + 1;
	while (ptrwrds - d->wrds < d->nwrds)
	{
	if (cmpLexeme(ptrwrds, newwrds) == 0)
	{
	if (cmpLexemeInfo(ptrwrds->entries, newwrds->entries))
	{
	ptrwrds->entries->nextentry = newwrds->entries;
	newwrds->entries = ptrwrds->entries;
	}
	else
	pfree(ptrwrds->entries);

	if (ptrwrds->lexeme)
	pfree(ptrwrds->lexeme);
	}
	else
	{
	newwrds++;
	newwrds = ptrwrds;
	}

	ptrwrds++;
	}

	d->nwrds = newwrds - d->wrds + 1;
	d->wrds = (TheLexeme ) repalloc(d->wrds, sizeof(TheLexeme) d->nwrds);
	}
	}

	static void
	compileTheSubstitute(DictThesaurus *d)
	{
	int i;

	for (i = 0; i < d->nsubst; i++)
	{
	TSLexeme *rem = d->subst[i].res,
	*outptr,
	*inptr;
	int n = 2;

	outptr = d->subst[i].res = (TSLexeme ) palloc(sizeof(TSLexeme) n);
	outptr->lexeme = NULL;
	inptr = rem;

	while (inptr && inptr->lexeme)
	{
	TSLexeme *lexized,
	tmplex[2];

	if (inptr->flags & DT_USEASIS)
	{ /* do not lexize */
	tmplex[0] = *inptr;
	tmplex[0].flags = 0;
	tmplex[1].lexeme = NULL;
	lexized = tmplex;
	}
	else
	{
	lexized = (TSLexeme *) DatumGetPointer(FunctionCall4(&(d->subdict->lexize),
	PointerGetDatum(d->subdict->dictData),
	PointerGetDatum(inptr->lexeme),
	Int32GetDatum(strlen(inptr->lexeme)),
	PointerGetDatum(NULL)));
	}

	if (lexized && lexized->lexeme)
	{
	int toset = (lexized->lexeme && outptr != d->subst[i].res) ? (outptr - d->subst[i].res) : -1;

	while (lexized->lexeme)
	{
	if (outptr - d->subst[i].res + 1 >= n)
	{
	int diff = outptr - d->subst[i].res;

	n *= 2;
	d->subst[i].res = (TSLexeme ) repalloc(d->subst[i].res, sizeof(TSLexeme) n);
	outptr = d->subst[i].res + diff;
	}

	outptr = lexized;
	outptr->lexeme = pstrdup(lexized->lexeme);

	outptr++;
	lexized++;
	}

	if (toset > 0)
	d->subst[i].res[toset].flags \|= TSL_ADDPOS;
	}
	else if (lexized)
	{
	ereport(ERROR,
	(errcode(ERRCODE_CONFIG_FILE_ERROR),
	errmsg("thesaurus substitute word \"%s\" is a stop word (rule %d)",
	inptr->lexeme, i + 1)));
	}
	else
	{
	ereport(ERROR,
	(errcode(ERRCODE_CONFIG_FILE_ERROR),
	errmsg("thesaurus substitute word \"%s\" isn't recognized by subdictionary (rule %d)",
	inptr->lexeme, i + 1)));
	}

	if (inptr->lexeme)
	pfree(inptr->lexeme);
	inptr++;
	}

	if (outptr == d->subst[i].res)
	ereport(ERROR,
	(errcode(ERRCODE_CONFIG_FILE_ERROR),
	errmsg("thesaurus substitute phrase is empty (rule %d)",
	i + 1)));

	d->subst[i].reslen = outptr - d->subst[i].res;

	pfree(rem);
	}
	}

	Datum
	thesaurus_init(PG_FUNCTION_ARGS)
	{
	List dictoptions = (List ) PG_GETARG_POINTER(0);
	DictThesaurus *d;
	char *subdictname = NULL;
	bool fileloaded = false;
	List *namelist;
	ListCell *l;

	d = (DictThesaurus *) palloc0(sizeof(DictThesaurus));

	foreach(l, dictoptions)
	{
	DefElem defel = (DefElem ) lfirst(l);

	if (strcmp(defel->defname, "dictfile") == 0)
	{
	if (fileloaded)
	ereport(ERROR,
	(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
	errmsg("multiple DictFile parameters")));
	thesaurusRead(defGetString(defel), d);
	fileloaded = true;
	}
	else if (strcmp(defel->defname, "dictionary") == 0)
	{
	if (subdictname)
	ereport(ERROR,
	(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
	errmsg("multiple Dictionary parameters")));
	subdictname = pstrdup(defGetString(defel));
	}
	else
	{
	ereport(ERROR,
	(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
	errmsg("unrecognized Thesaurus parameter: \"%s\"",
	defel->defname)));
	}
	}

	if (!fileloaded)
	ereport(ERROR,
	(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
	errmsg("missing DictFile parameter")));
	if (!subdictname)
	ereport(ERROR,
	(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
	errmsg("missing Dictionary parameter")));

	namelist = stringToQualifiedNameList(subdictname, NULL);
	d->subdictOid = get_ts_dict_oid(namelist, false);
	d->subdict = lookup_ts_dictionary_cache(d->subdictOid);

	compileTheLexeme(d);
	compileTheSubstitute(d);

	PG_RETURN_POINTER(d);
	}

	static LexemeInfo *
	findTheLexeme(DictThesaurus d, char lexeme)
	{
	TheLexeme key,
	*res;

	if (d->nwrds == 0)
	return NULL;

	key.lexeme = lexeme;
	key.entries = NULL;

	res = bsearch(&key, d->wrds, d->nwrds, sizeof(TheLexeme), cmpLexemeQ);

	if (res == NULL)
	return NULL;
	return res->entries;
	}

	static bool
	matchIdSubst(LexemeInfo *stored, uint32 idsubst)
	{
	bool res = true;

	if (stored)
	{
	res = false;

	for (; stored; stored = stored->nextvariant)
	if (stored->idsubst == idsubst)
	{
	res = true;
	break;
	}
	}

	return res;
	}

	static LexemeInfo *
	findVariant(LexemeInfo in, LexemeInfo stored, uint16 curpos, LexemeInfo **newin, int newn)
	{
	for (;;)
	{
	int i;
	LexemeInfo *ptr = newin[0];

	for (i = 0; i < newn; i++)
	{
	while (newin[i] && newin[i]->idsubst < ptr->idsubst)
	newin[i] = newin[i]->nextentry;

	if (newin[i] == NULL)
	return in;

	if (newin[i]->idsubst > ptr->idsubst)
	{
	ptr = newin[i];
	i = -1;
	continue;
	}

	while (newin[i]->idsubst == ptr->idsubst)
	{
	if (newin[i]->posinsubst == curpos && newin[i]->tnvariant == newn)
	{
	ptr = newin[i];
	break;
	}

	newin[i] = newin[i]->nextentry;
	if (newin[i] == NULL)
	return in;
	}

	if (newin[i]->idsubst != ptr->idsubst)
	{
	ptr = newin[i];
	i = -1;
	continue;
	}
	}

	if (i == newn && matchIdSubst(stored, ptr->idsubst) && (in == NULL \|\| !matchIdSubst(in, ptr->idsubst)))
	{ /* found */

	ptr->nextvariant = in;
	in = ptr;
	}

	/* step forward */
	for (i = 0; i < newn; i++)
	newin[i] = newin[i]->nextentry;
	}
	}

	static TSLexeme *
	copyTSLexeme(TheSubstitute *ts)
	{
	TSLexeme *res;
	uint16 i;

	res = (TSLexeme ) palloc(sizeof(TSLexeme) (ts->reslen + 1));
	for (i = 0; i < ts->reslen; i++)
	{
	res[i] = ts->res[i];
	res[i].lexeme = pstrdup(ts->res[i].lexeme);
	}

	res[ts->reslen].lexeme = NULL;

	return res;
	}

	static TSLexeme *
	checkMatch(DictThesaurus d, LexemeInfo info, uint16 curpos, bool *moreres)
	{
	*moreres = false;
	while (info)
	{
	Assert(info->idsubst < d->nsubst);
	if (info->nextvariant)
	*moreres = true;
	if (d->subst[info->idsubst].lastlexeme == curpos)
	return copyTSLexeme(d->subst + info->idsubst);
	info = info->nextvariant;
	}

	return NULL;
	}

	Datum
	thesaurus_lexize(PG_FUNCTION_ARGS)
	{
	DictThesaurus d = (DictThesaurus ) PG_GETARG_POINTER(0);
	DictSubState dstate = (DictSubState ) PG_GETARG_POINTER(3);
	TSLexeme *res = NULL;
	LexemeInfo *stored,
	*info = NULL;
	uint16 curpos = 0;
	bool moreres = false;

	if (PG_NARGS() != 4 \|\| dstate == NULL)
	elog(ERROR, "forbidden call of thesaurus or nested call");

	if (dstate->isend)
	PG_RETURN_POINTER(NULL);
	stored = (LexemeInfo *) dstate->private_state;

	if (stored)
	curpos = stored->posinsubst + 1;

	if (!d->subdict->isvalid)
	d->subdict = lookup_ts_dictionary_cache(d->subdictOid);

	res = (TSLexeme *) DatumGetPointer(FunctionCall4(&(d->subdict->lexize),
	PointerGetDatum(d->subdict->dictData),
	PG_GETARG_DATUM(1),
	PG_GETARG_DATUM(2),
	PointerGetDatum(NULL)));

	if (res && res->lexeme)
	{
	TSLexeme *ptr = res,
	*basevar;

	while (ptr->lexeme)
	{
	uint16 nv = ptr->nvariant;
	uint16 i,
	nlex = 0;
	LexemeInfo **infos;

	basevar = ptr;
	while (ptr->lexeme && nv == ptr->nvariant)
	{
	nlex++;
	ptr++;
	}

	infos = (LexemeInfo *) palloc(sizeof(LexemeInfo ) * nlex);
	for (i = 0; i < nlex; i++)
	if ((infos[i] = findTheLexeme(d, basevar[i].lexeme)) == NULL)
	break;

	if (i < nlex)
	{
	/* no chance to find */
	pfree(infos);
	continue;
	}

	info = findVariant(info, stored, curpos, infos, nlex);
	}
	}
	else if (res)
	{ /* stop-word */
	LexemeInfo *infos = findTheLexeme(d, NULL);

	info = findVariant(NULL, stored, curpos, &infos, 1);
	}
	else
	{
	info = NULL; /* word isn't recognized */
	}

	dstate->private_state = (void *) info;

	if (!info)
	{
	dstate->getnext = false;
	PG_RETURN_POINTER(NULL);
	}

	if ((res = checkMatch(d, info, curpos, &moreres)) != NULL)
	{
	dstate->getnext = moreres;
	PG_RETURN_POINTER(res);
	}

	dstate->getnext = true;

	PG_RETURN_POINTER(NULL);
	}