src/backend/tsearch/ts_parse.c - cloudberry - Git at Google

 /*-------------------------------------------------------------------------
  *
  * ts_parse.c
  *		main parse functions for tsearch
  *
  * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
  *
  *
  * IDENTIFICATION
  *	  src/backend/tsearch/ts_parse.c
  *
  *-------------------------------------------------------------------------
  */

 #include "postgres.h"

 #include "tsearch/ts_cache.h"
 #include "tsearch/ts_utils.h"

 #define IGNORE_LONGLEXEME	1

 /*
  * Lexize subsystem
  */

 typedef struct ParsedLex
 {
 	int			type;
 	char	   *lemm;
 	int			lenlemm;
 	struct ParsedLex *next;
 } ParsedLex;

 typedef struct ListParsedLex
 {
 	ParsedLex  *head;
 	ParsedLex  *tail;
 } ListParsedLex;

 typedef struct
 {
 	TSConfigCacheEntry *cfg;
 	Oid			curDictId;
 	int			posDict;
 	DictSubState dictState;
 	ParsedLex  *curSub;
 	ListParsedLex towork;		/* current list to work */
 	ListParsedLex waste;		/* list of lexemes that already lexized */

 	/*
 	 * fields to store last variant to lexize (basically, thesaurus or similar
 	 * to, which wants	several lexemes
 	 */

 	ParsedLex  *lastRes;
 	TSLexeme   *tmpRes;
 } LexizeData;

 static void
 LexizeInit(LexizeData *ld, TSConfigCacheEntry *cfg)
 {
 	ld->cfg = cfg;
 	ld->curDictId = InvalidOid;
 	ld->posDict = 0;
 	ld->towork.head = ld->towork.tail = ld->curSub = NULL;
 	ld->waste.head = ld->waste.tail = NULL;
 	ld->lastRes = NULL;
 	ld->tmpRes = NULL;
 }

 static void
 LPLAddTail(ListParsedLex *list, ParsedLex *newpl)
 {
 	if (list->tail)
 	{
 		list->tail->next = newpl;
 		list->tail = newpl;
 	}
 	else
 		list->head = list->tail = newpl;
 	newpl->next = NULL;
 }

 static ParsedLex *
 LPLRemoveHead(ListParsedLex *list)
 {
 	ParsedLex  *res = list->head;

 	if (list->head)
 		list->head = list->head->next;

 	if (list->head == NULL)
 		list->tail = NULL;

 	return res;
 }

 static void
 LexizeAddLemm(LexizeData *ld, int type, char *lemm, int lenlemm)
 {
 	ParsedLex  *newpl = (ParsedLex *) palloc(sizeof(ParsedLex));

 	newpl->type = type;
 	newpl->lemm = lemm;
 	newpl->lenlemm = lenlemm;
 	LPLAddTail(&ld->towork, newpl);
 	ld->curSub = ld->towork.tail;
 }

 static void
 RemoveHead(LexizeData *ld)
 {
 	LPLAddTail(&ld->waste, LPLRemoveHead(&ld->towork));

 	ld->posDict = 0;
 }

 static void
 setCorrLex(LexizeData *ld, ParsedLex **correspondLexem)
 {
 	if (correspondLexem)
 	{
 		*correspondLexem = ld->waste.head;
 	}
 	else
 	{
 		ParsedLex  *tmp,
 				   *ptr = ld->waste.head;

 		while (ptr)
 		{
 			tmp = ptr->next;
 			pfree(ptr);
 			ptr = tmp;
 		}
 	}
 	ld->waste.head = ld->waste.tail = NULL;
 }

 static void
 moveToWaste(LexizeData *ld, ParsedLex *stop)
 {
 	bool		go = true;

 	while (ld->towork.head && go)
 	{
 		if (ld->towork.head == stop)
 		{
 			ld->curSub = stop->next;
 			go = false;
 		}
 		RemoveHead(ld);
 	}
 }

 static void
 setNewTmpRes(LexizeData *ld, ParsedLex *lex, TSLexeme *res)
 {
 	if (ld->tmpRes)
 	{
 		TSLexeme   *ptr;

 		for (ptr = ld->tmpRes; ptr->lexeme; ptr++)
 			pfree(ptr->lexeme);
 		pfree(ld->tmpRes);
 	}
 	ld->tmpRes = res;
 	ld->lastRes = lex;
 }

 static TSLexeme *
 LexizeExec(LexizeData *ld, ParsedLex **correspondLexem)
 {
 	int			i;
 	ListDictionary *map;
 	TSDictionaryCacheEntry *dict;
 	TSLexeme   *res;

 	if (ld->curDictId == InvalidOid)
 	{
 		/*
 		 * usual mode: dictionary wants only one word, but we should keep in
 		 * mind that we should go through all stack
 		 */

 		while (ld->towork.head)
 		{
 			ParsedLex  *curVal = ld->towork.head;
 			char	   *curValLemm = curVal->lemm;
 			int			curValLenLemm = curVal->lenlemm;

 			map = ld->cfg->map + curVal->type;

 			if (curVal->type == 0 || curVal->type >= ld->cfg->lenmap || map->len == 0)
 			{
 				/* skip this type of lexeme */
 				RemoveHead(ld);
 				continue;
 			}

 			for (i = ld->posDict; i < map->len; i++)
 			{
 				dict = lookup_ts_dictionary_cache(map->dictIds[i]);

 				ld->dictState.isend = ld->dictState.getnext = false;
 				ld->dictState.private_state = NULL;
 				res = (TSLexeme *) DatumGetPointer(FunctionCall4(&(dict->lexize),
 																 PointerGetDatum(dict->dictData),
 																 PointerGetDatum(curValLemm),
 																 Int32GetDatum(curValLenLemm),
 																 PointerGetDatum(&ld->dictState)));

 				if (ld->dictState.getnext)
 				{
 					/*
 					 * dictionary wants next word, so setup and store current
 					 * position and go to multiword mode
 					 */

 					ld->curDictId = DatumGetObjectId(map->dictIds[i]);
 					ld->posDict = i + 1;
 					ld->curSub = curVal->next;
 					if (res)
 						setNewTmpRes(ld, curVal, res);
 					return LexizeExec(ld, correspondLexem);
 				}

 				if (!res)		/* dictionary doesn't know this lexeme */
 					continue;

 				if (res->flags & TSL_FILTER)
 				{
 					curValLemm = res->lexeme;
 					curValLenLemm = strlen(res->lexeme);
 					continue;
 				}

 				RemoveHead(ld);
 				setCorrLex(ld, correspondLexem);
 				return res;
 			}

 			RemoveHead(ld);
 		}
 	}
 	else
 	{							/* curDictId is valid */
 		dict = lookup_ts_dictionary_cache(ld->curDictId);

 		/*
 		 * Dictionary ld->curDictId asks  us about following words
 		 */

 		while (ld->curSub)
 		{
 			ParsedLex  *curVal = ld->curSub;

 			map = ld->cfg->map + curVal->type;

 			if (curVal->type != 0)
 			{
 				bool		dictExists = false;

 				if (curVal->type >= ld->cfg->lenmap || map->len == 0)
 				{
 					/* skip this type of lexeme */
 					ld->curSub = curVal->next;
 					continue;
 				}

 				/*
 				 * We should be sure that current type of lexeme is recognized
 				 * by our dictionary: we just check is it exist in list of
 				 * dictionaries ?
 				 */
 				for (i = 0; i < map->len && !dictExists; i++)
 					if (ld->curDictId == DatumGetObjectId(map->dictIds[i]))
 						dictExists = true;

 				if (!dictExists)
 				{
 					/*
 					 * Dictionary can't work with current type of lexeme,
 					 * return to basic mode and redo all stored lexemes
 					 */
 					ld->curDictId = InvalidOid;
 					return LexizeExec(ld, correspondLexem);
 				}
 			}

 			ld->dictState.isend = (curVal->type == 0) ? true : false;
 			ld->dictState.getnext = false;

 			res = (TSLexeme *) DatumGetPointer(FunctionCall4(&(dict->lexize),
 															 PointerGetDatum(dict->dictData),
 															 PointerGetDatum(curVal->lemm),
 															 Int32GetDatum(curVal->lenlemm),
 															 PointerGetDatum(&ld->dictState)));

 			if (ld->dictState.getnext)
 			{
 				/* Dictionary wants one more */
 				ld->curSub = curVal->next;
 				if (res)
 					setNewTmpRes(ld, curVal, res);
 				continue;
 			}

 			if (res || ld->tmpRes)
 			{
 				/*
 				 * Dictionary normalizes lexemes, so we remove from stack all
 				 * used lexemes, return to basic mode and redo end of stack
 				 * (if it exists)
 				 */
 				if (res)
 				{
 					moveToWaste(ld, ld->curSub);
 				}
 				else
 				{
 					res = ld->tmpRes;
 					moveToWaste(ld, ld->lastRes);
 				}

 				/* reset to initial state */
 				ld->curDictId = InvalidOid;
 				ld->posDict = 0;
 				ld->lastRes = NULL;
 				ld->tmpRes = NULL;
 				setCorrLex(ld, correspondLexem);
 				return res;
 			}

 			/*
 			 * Dict don't want next lexem and didn't recognize anything, redo
 			 * from ld->towork.head
 			 */
 			ld->curDictId = InvalidOid;
 			return LexizeExec(ld, correspondLexem);
 		}
 	}

 	setCorrLex(ld, correspondLexem);
 	return NULL;
 }

 /*
  * Parse string and lexize words.
  *
  * prs will be filled in.
  */
 void
 parsetext(Oid cfgId, ParsedText *prs, char *buf, int buflen)
 {
 	int			type,
 				lenlemm;
 	char	   *lemm = NULL;
 	LexizeData	ldata;
 	TSLexeme   *norms;
 	TSConfigCacheEntry *cfg;
 	TSParserCacheEntry *prsobj;
 	void	   *prsdata;

 	cfg = lookup_ts_config_cache(cfgId);
 	prsobj = lookup_ts_parser_cache(cfg->prsId);

 	prsdata = (void *) DatumGetPointer(FunctionCall2(&prsobj->prsstart,
 													 PointerGetDatum(buf),
 													 Int32GetDatum(buflen)));

 	LexizeInit(&ldata, cfg);

 	do
 	{
 		type = DatumGetInt32(FunctionCall3(&(prsobj->prstoken),
 										   PointerGetDatum(prsdata),
 										   PointerGetDatum(&lemm),
 										   PointerGetDatum(&lenlemm)));

 		if (type > 0 && lenlemm >= MAXSTRLEN)
 		{
 #ifdef IGNORE_LONGLEXEME
 			ereport(NOTICE,
 					(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
 					 errmsg("word is too long to be indexed"),
 					 errdetail("Words longer than %d characters are ignored.",
 							   MAXSTRLEN)));
 			continue;
 #else
 			ereport(ERROR,
 					(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
 					 errmsg("word is too long to be indexed"),
 					 errdetail("Words longer than %d characters are ignored.",
 							   MAXSTRLEN)));
 #endif
 		}

 		LexizeAddLemm(&ldata, type, lemm, lenlemm);

 		while ((norms = LexizeExec(&ldata, NULL)) != NULL)
 		{
 			TSLexeme   *ptr = norms;

 			prs->pos++;			/* set pos */

 			while (ptr->lexeme)
 			{
 				if (prs->curwords == prs->lenwords)
 				{
 					prs->lenwords *= 2;
 					prs->words = (ParsedWord *) repalloc((void *) prs->words, prs->lenwords * sizeof(ParsedWord));
 				}

 				if (ptr->flags & TSL_ADDPOS)
 					prs->pos++;
 				prs->words[prs->curwords].len = strlen(ptr->lexeme);
 				prs->words[prs->curwords].word = ptr->lexeme;
 				prs->words[prs->curwords].nvariant = ptr->nvariant;
 				prs->words[prs->curwords].flags = ptr->flags & TSL_PREFIX;
 				prs->words[prs->curwords].alen = 0;
 				prs->words[prs->curwords].pos.pos = LIMITPOS(prs->pos);
 				ptr++;
 				prs->curwords++;
 			}
 			pfree(norms);
 		}
 	} while (type > 0);

 	FunctionCall1(&(prsobj->prsend), PointerGetDatum(prsdata));
 }

 /*
  * Headline framework
  */
 static void
 hladdword(HeadlineParsedText *prs, char *buf, int buflen, int type)
 {
 	while (prs->curwords >= prs->lenwords)
 	{
 		prs->lenwords *= 2;
 		prs->words = (HeadlineWordEntry *) repalloc((void *) prs->words, prs->lenwords * sizeof(HeadlineWordEntry));
 	}
 	memset(&(prs->words[prs->curwords]), 0, sizeof(HeadlineWordEntry));
 	prs->words[prs->curwords].type = (uint8) type;
 	prs->words[prs->curwords].len = buflen;
 	prs->words[prs->curwords].word = palloc(buflen);
 	memcpy(prs->words[prs->curwords].word, buf, buflen);
 	prs->curwords++;
 }

 static void
 hlfinditem(HeadlineParsedText *prs, TSQuery query, int32 pos, char *buf, int buflen)
 {
 	int			i;
 	QueryItem  *item = GETQUERY(query);
 	HeadlineWordEntry *word;

 	while (prs->curwords + query->size >= prs->lenwords)
 	{
 		prs->lenwords *= 2;
 		prs->words = (HeadlineWordEntry *) repalloc((void *) prs->words, prs->lenwords * sizeof(HeadlineWordEntry));
 	}

 	word = &(prs->words[prs->curwords - 1]);
 	word->pos = LIMITPOS(pos);
 	for (i = 0; i < query->size; i++)
 	{
 		if (item->type == QI_VAL &&
 			tsCompareString(GETOPERAND(query) + item->qoperand.distance, item->qoperand.length,
 							buf, buflen, item->qoperand.prefix) == 0)
 		{
 			if (word->item)
 			{
 				memcpy(&(prs->words[prs->curwords]), word, sizeof(HeadlineWordEntry));
 				prs->words[prs->curwords].item = &item->qoperand;
 				prs->words[prs->curwords].repeated = 1;
 				prs->curwords++;
 			}
 			else
 				word->item = &item->qoperand;
 		}
 		item++;
 	}
 }

 static void
 addHLParsedLex(HeadlineParsedText *prs, TSQuery query, ParsedLex *lexs, TSLexeme *norms)
 {
 	ParsedLex  *tmplexs;
 	TSLexeme   *ptr;
 	int32		savedpos;

 	while (lexs)
 	{
 		if (lexs->type > 0)
 			hladdword(prs, lexs->lemm, lexs->lenlemm, lexs->type);

 		ptr = norms;
 		savedpos = prs->vectorpos;
 		while (ptr && ptr->lexeme)
 		{
 			if (ptr->flags & TSL_ADDPOS)
 				savedpos++;
 			hlfinditem(prs, query, savedpos, ptr->lexeme, strlen(ptr->lexeme));
 			ptr++;
 		}

 		tmplexs = lexs->next;
 		pfree(lexs);
 		lexs = tmplexs;
 	}

 	if (norms)
 	{
 		ptr = norms;
 		while (ptr->lexeme)
 		{
 			if (ptr->flags & TSL_ADDPOS)
 				prs->vectorpos++;
 			pfree(ptr->lexeme);
 			ptr++;
 		}
 		pfree(norms);
 	}
 }

 void
 hlparsetext(Oid cfgId, HeadlineParsedText *prs, TSQuery query, char *buf, int buflen)
 {
 	int			type,
 				lenlemm;
 	char	   *lemm = NULL;
 	LexizeData	ldata;
 	TSLexeme   *norms;
 	ParsedLex  *lexs;
 	TSConfigCacheEntry *cfg;
 	TSParserCacheEntry *prsobj;
 	void	   *prsdata;

 	cfg = lookup_ts_config_cache(cfgId);
 	prsobj = lookup_ts_parser_cache(cfg->prsId);

 	prsdata = (void *) DatumGetPointer(FunctionCall2(&(prsobj->prsstart),
 													 PointerGetDatum(buf),
 													 Int32GetDatum(buflen)));

 	LexizeInit(&ldata, cfg);

 	do
 	{
 		type = DatumGetInt32(FunctionCall3(&(prsobj->prstoken),
 										   PointerGetDatum(prsdata),
 										   PointerGetDatum(&lemm),
 										   PointerGetDatum(&lenlemm)));

 		if (type > 0 && lenlemm >= MAXSTRLEN)
 		{
 #ifdef IGNORE_LONGLEXEME
 			ereport(NOTICE,
 					(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
 					 errmsg("word is too long to be indexed"),
 					 errdetail("Words longer than %d characters are ignored.",
 							   MAXSTRLEN)));
 			continue;
 #else
 			ereport(ERROR,
 					(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
 					 errmsg("word is too long to be indexed"),
 					 errdetail("Words longer than %d characters are ignored.",
 							   MAXSTRLEN)));
 #endif
 		}

 		LexizeAddLemm(&ldata, type, lemm, lenlemm);

 		do
 		{
 			if ((norms = LexizeExec(&ldata, &lexs)) != NULL)
 			{
 				prs->vectorpos++;
 				addHLParsedLex(prs, query, lexs, norms);
 			}
 			else
 				addHLParsedLex(prs, query, lexs, NULL);
 		} while (norms);

 	} while (type > 0);

 	FunctionCall1(&(prsobj->prsend), PointerGetDatum(prsdata));
 }

 text *
 generateHeadline(HeadlineParsedText *prs)
 {
 	text	   *out;
 	char	   *ptr;
 	int			len = 128;
 	int			numfragments = 0;
 	int16		infrag = 0;

 	HeadlineWordEntry *wrd = prs->words;

 	out = (text *) palloc(len);
 	ptr = ((char *) out) + VARHDRSZ;

 	while (wrd - prs->words < prs->curwords)
 	{
 		while (wrd->len + prs->stopsellen + prs->startsellen + prs->fragdelimlen + (ptr - ((char *) out)) >= len)
 		{
 			int			dist = ptr - ((char *) out);

 			len *= 2;
 			out = (text *) repalloc(out, len);
 			ptr = ((char *) out) + dist;
 		}

 		if (wrd->in && !wrd->repeated)
 		{
 			if (!infrag)
 			{

 				/* start of a new fragment */
 				infrag = 1;
 				numfragments++;
 				/* add a fragment delimiter if this is after the first one */
 				if (numfragments > 1)
 				{
 					memcpy(ptr, prs->fragdelim, prs->fragdelimlen);
 					ptr += prs->fragdelimlen;
 				}

 			}
 			if (wrd->replace)
 			{
 				*ptr = ' ';
 				ptr++;
 			}
 			else if (!wrd->skip)
 			{
 				if (wrd->selected)
 				{
 					memcpy(ptr, prs->startsel, prs->startsellen);
 					ptr += prs->startsellen;
 				}
 				memcpy(ptr, wrd->word, wrd->len);
 				ptr += wrd->len;
 				if (wrd->selected)
 				{
 					memcpy(ptr, prs->stopsel, prs->stopsellen);
 					ptr += prs->stopsellen;
 				}
 			}
 		}
 		else if (!wrd->repeated)
 		{
 			if (infrag)
 				infrag = 0;
 			pfree(wrd->word);
 		}

 		wrd++;
 	}

 	SET_VARSIZE(out, ptr - ((char *) out));
 	return out;
 }
	/*-------------------------------------------------------------------------
	*
	* ts_parse.c
	* main parse functions for tsearch
	*
	* Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
	*
	*
	* IDENTIFICATION
	* src/backend/tsearch/ts_parse.c
	*
	*-------------------------------------------------------------------------
	*/

	#include "postgres.h"

	#include "tsearch/ts_cache.h"
	#include "tsearch/ts_utils.h"

	#define IGNORE_LONGLEXEME 1

	/*
	* Lexize subsystem
	*/

	typedef struct ParsedLex
	{
	int type;
	char *lemm;
	int lenlemm;
	struct ParsedLex *next;
	} ParsedLex;

	typedef struct ListParsedLex
	{
	ParsedLex *head;
	ParsedLex *tail;
	} ListParsedLex;

	typedef struct
	{
	TSConfigCacheEntry *cfg;
	Oid curDictId;
	int posDict;
	DictSubState dictState;
	ParsedLex *curSub;
	ListParsedLex towork; /* current list to work */
	ListParsedLex waste; /* list of lexemes that already lexized */

	/*
	* fields to store last variant to lexize (basically, thesaurus or similar
	* to, which wants several lexemes
	*/

	ParsedLex *lastRes;
	TSLexeme *tmpRes;
	} LexizeData;

	static void
	LexizeInit(LexizeData ld, TSConfigCacheEntry cfg)
	{
	ld->cfg = cfg;
	ld->curDictId = InvalidOid;
	ld->posDict = 0;
	ld->towork.head = ld->towork.tail = ld->curSub = NULL;
	ld->waste.head = ld->waste.tail = NULL;
	ld->lastRes = NULL;
	ld->tmpRes = NULL;
	}

	static void
	LPLAddTail(ListParsedLex list, ParsedLex newpl)
	{
	if (list->tail)
	{
	list->tail->next = newpl;
	list->tail = newpl;
	}
	else
	list->head = list->tail = newpl;
	newpl->next = NULL;
	}

	static ParsedLex *
	LPLRemoveHead(ListParsedLex *list)
	{
	ParsedLex *res = list->head;

	if (list->head)
	list->head = list->head->next;

	if (list->head == NULL)
	list->tail = NULL;

	return res;
	}

	static void
	LexizeAddLemm(LexizeData ld, int type, char lemm, int lenlemm)
	{
	ParsedLex newpl = (ParsedLex ) palloc(sizeof(ParsedLex));

	newpl->type = type;
	newpl->lemm = lemm;
	newpl->lenlemm = lenlemm;
	LPLAddTail(&ld->towork, newpl);
	ld->curSub = ld->towork.tail;
	}

	static void
	RemoveHead(LexizeData *ld)
	{
	LPLAddTail(&ld->waste, LPLRemoveHead(&ld->towork));

	ld->posDict = 0;
	}

	static void
	setCorrLex(LexizeData ld, ParsedLex *correspondLexem)
	{
	if (correspondLexem)
	{
	*correspondLexem = ld->waste.head;
	}
	else
	{
	ParsedLex *tmp,
	*ptr = ld->waste.head;

	while (ptr)
	{
	tmp = ptr->next;
	pfree(ptr);
	ptr = tmp;
	}
	}
	ld->waste.head = ld->waste.tail = NULL;
	}

	static void
	moveToWaste(LexizeData ld, ParsedLex stop)
	{
	bool go = true;

	while (ld->towork.head && go)
	{
	if (ld->towork.head == stop)
	{
	ld->curSub = stop->next;
	go = false;
	}
	RemoveHead(ld);
	}
	}

	static void
	setNewTmpRes(LexizeData ld, ParsedLex lex, TSLexeme *res)
	{
	if (ld->tmpRes)
	{
	TSLexeme *ptr;

	for (ptr = ld->tmpRes; ptr->lexeme; ptr++)
	pfree(ptr->lexeme);
	pfree(ld->tmpRes);
	}
	ld->tmpRes = res;
	ld->lastRes = lex;
	}

	static TSLexeme *
	LexizeExec(LexizeData ld, ParsedLex *correspondLexem)
	{
	int i;
	ListDictionary *map;
	TSDictionaryCacheEntry *dict;
	TSLexeme *res;

	if (ld->curDictId == InvalidOid)
	{
	/*
	* usual mode: dictionary wants only one word, but we should keep in
	* mind that we should go through all stack
	*/

	while (ld->towork.head)
	{
	ParsedLex *curVal = ld->towork.head;
	char *curValLemm = curVal->lemm;
	int curValLenLemm = curVal->lenlemm;

	map = ld->cfg->map + curVal->type;

	if (curVal->type == 0 \|\| curVal->type >= ld->cfg->lenmap \|\| map->len == 0)
	{
	/* skip this type of lexeme */
	RemoveHead(ld);
	continue;
	}

	for (i = ld->posDict; i < map->len; i++)
	{
	dict = lookup_ts_dictionary_cache(map->dictIds[i]);

	ld->dictState.isend = ld->dictState.getnext = false;
	ld->dictState.private_state = NULL;
	res = (TSLexeme *) DatumGetPointer(FunctionCall4(&(dict->lexize),
	PointerGetDatum(dict->dictData),
	PointerGetDatum(curValLemm),
	Int32GetDatum(curValLenLemm),
	PointerGetDatum(&ld->dictState)));

	if (ld->dictState.getnext)
	{
	/*
	* dictionary wants next word, so setup and store current
	* position and go to multiword mode
	*/

	ld->curDictId = DatumGetObjectId(map->dictIds[i]);
	ld->posDict = i + 1;
	ld->curSub = curVal->next;
	if (res)
	setNewTmpRes(ld, curVal, res);
	return LexizeExec(ld, correspondLexem);
	}

	if (!res) /* dictionary doesn't know this lexeme */
	continue;

	if (res->flags & TSL_FILTER)
	{
	curValLemm = res->lexeme;
	curValLenLemm = strlen(res->lexeme);
	continue;
	}

	RemoveHead(ld);
	setCorrLex(ld, correspondLexem);
	return res;
	}

	RemoveHead(ld);
	}
	}
	else
	{ /* curDictId is valid */
	dict = lookup_ts_dictionary_cache(ld->curDictId);

	/*
	* Dictionary ld->curDictId asks us about following words
	*/

	while (ld->curSub)
	{
	ParsedLex *curVal = ld->curSub;

	map = ld->cfg->map + curVal->type;

	if (curVal->type != 0)
	{
	bool dictExists = false;

	if (curVal->type >= ld->cfg->lenmap \|\| map->len == 0)
	{
	/* skip this type of lexeme */
	ld->curSub = curVal->next;
	continue;
	}

	/*
	* We should be sure that current type of lexeme is recognized
	* by our dictionary: we just check is it exist in list of
	* dictionaries ?
	*/
	for (i = 0; i < map->len && !dictExists; i++)
	if (ld->curDictId == DatumGetObjectId(map->dictIds[i]))
	dictExists = true;

	if (!dictExists)
	{
	/*
	* Dictionary can't work with current type of lexeme,
	* return to basic mode and redo all stored lexemes
	*/
	ld->curDictId = InvalidOid;
	return LexizeExec(ld, correspondLexem);
	}
	}

	ld->dictState.isend = (curVal->type == 0) ? true : false;
	ld->dictState.getnext = false;

	res = (TSLexeme *) DatumGetPointer(FunctionCall4(&(dict->lexize),
	PointerGetDatum(dict->dictData),
	PointerGetDatum(curVal->lemm),
	Int32GetDatum(curVal->lenlemm),
	PointerGetDatum(&ld->dictState)));

	if (ld->dictState.getnext)
	{
	/* Dictionary wants one more */
	ld->curSub = curVal->next;
	if (res)
	setNewTmpRes(ld, curVal, res);
	continue;
	}

	if (res \|\| ld->tmpRes)
	{
	/*
	* Dictionary normalizes lexemes, so we remove from stack all
	* used lexemes, return to basic mode and redo end of stack
	* (if it exists)
	*/
	if (res)
	{
	moveToWaste(ld, ld->curSub);
	}
	else
	{
	res = ld->tmpRes;
	moveToWaste(ld, ld->lastRes);
	}

	/* reset to initial state */
	ld->curDictId = InvalidOid;
	ld->posDict = 0;
	ld->lastRes = NULL;
	ld->tmpRes = NULL;
	setCorrLex(ld, correspondLexem);
	return res;
	}

	/*
	* Dict don't want next lexem and didn't recognize anything, redo
	* from ld->towork.head
	*/
	ld->curDictId = InvalidOid;
	return LexizeExec(ld, correspondLexem);
	}
	}

	setCorrLex(ld, correspondLexem);
	return NULL;
	}

	/*
	* Parse string and lexize words.
	*
	* prs will be filled in.
	*/
	void
	parsetext(Oid cfgId, ParsedText prs, char buf, int buflen)
	{
	int type,
	lenlemm;
	char *lemm = NULL;
	LexizeData ldata;
	TSLexeme *norms;
	TSConfigCacheEntry *cfg;
	TSParserCacheEntry *prsobj;
	void *prsdata;

	cfg = lookup_ts_config_cache(cfgId);
	prsobj = lookup_ts_parser_cache(cfg->prsId);

	prsdata = (void *) DatumGetPointer(FunctionCall2(&prsobj->prsstart,
	PointerGetDatum(buf),
	Int32GetDatum(buflen)));

	LexizeInit(&ldata, cfg);

	do
	{
	type = DatumGetInt32(FunctionCall3(&(prsobj->prstoken),
	PointerGetDatum(prsdata),
	PointerGetDatum(&lemm),
	PointerGetDatum(&lenlemm)));

	if (type > 0 && lenlemm >= MAXSTRLEN)
	{
	#ifdef IGNORE_LONGLEXEME
	ereport(NOTICE,
	(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
	errmsg("word is too long to be indexed"),
	errdetail("Words longer than %d characters are ignored.",
	MAXSTRLEN)));
	continue;
	#else
	ereport(ERROR,
	(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
	errmsg("word is too long to be indexed"),
	errdetail("Words longer than %d characters are ignored.",
	MAXSTRLEN)));
	#endif
	}

	LexizeAddLemm(&ldata, type, lemm, lenlemm);

	while ((norms = LexizeExec(&ldata, NULL)) != NULL)
	{
	TSLexeme *ptr = norms;

	prs->pos++; /* set pos */

	while (ptr->lexeme)
	{
	if (prs->curwords == prs->lenwords)
	{
	prs->lenwords *= 2;
	prs->words = (ParsedWord ) repalloc((void ) prs->words, prs->lenwords * sizeof(ParsedWord));
	}

	if (ptr->flags & TSL_ADDPOS)
	prs->pos++;
	prs->words[prs->curwords].len = strlen(ptr->lexeme);
	prs->words[prs->curwords].word = ptr->lexeme;
	prs->words[prs->curwords].nvariant = ptr->nvariant;
	prs->words[prs->curwords].flags = ptr->flags & TSL_PREFIX;
	prs->words[prs->curwords].alen = 0;
	prs->words[prs->curwords].pos.pos = LIMITPOS(prs->pos);
	ptr++;
	prs->curwords++;
	}
	pfree(norms);
	}
	} while (type > 0);

	FunctionCall1(&(prsobj->prsend), PointerGetDatum(prsdata));
	}

	/*
	* Headline framework
	*/
	static void
	hladdword(HeadlineParsedText prs, char buf, int buflen, int type)
	{
	while (prs->curwords >= prs->lenwords)
	{
	prs->lenwords *= 2;
	prs->words = (HeadlineWordEntry ) repalloc((void ) prs->words, prs->lenwords * sizeof(HeadlineWordEntry));
	}
	memset(&(prs->words[prs->curwords]), 0, sizeof(HeadlineWordEntry));
	prs->words[prs->curwords].type = (uint8) type;
	prs->words[prs->curwords].len = buflen;
	prs->words[prs->curwords].word = palloc(buflen);
	memcpy(prs->words[prs->curwords].word, buf, buflen);
	prs->curwords++;
	}

	static void
	hlfinditem(HeadlineParsedText prs, TSQuery query, int32 pos, char buf, int buflen)
	{
	int i;
	QueryItem *item = GETQUERY(query);
	HeadlineWordEntry *word;

	while (prs->curwords + query->size >= prs->lenwords)
	{
	prs->lenwords *= 2;
	prs->words = (HeadlineWordEntry ) repalloc((void ) prs->words, prs->lenwords * sizeof(HeadlineWordEntry));
	}

	word = &(prs->words[prs->curwords - 1]);
	word->pos = LIMITPOS(pos);
	for (i = 0; i < query->size; i++)
	{
	if (item->type == QI_VAL &&
	tsCompareString(GETOPERAND(query) + item->qoperand.distance, item->qoperand.length,
	buf, buflen, item->qoperand.prefix) == 0)
	{
	if (word->item)
	{
	memcpy(&(prs->words[prs->curwords]), word, sizeof(HeadlineWordEntry));
	prs->words[prs->curwords].item = &item->qoperand;
	prs->words[prs->curwords].repeated = 1;
	prs->curwords++;
	}
	else
	word->item = &item->qoperand;
	}
	item++;
	}
	}

	static void
	addHLParsedLex(HeadlineParsedText prs, TSQuery query, ParsedLex lexs, TSLexeme *norms)
	{
	ParsedLex *tmplexs;
	TSLexeme *ptr;
	int32 savedpos;

	while (lexs)
	{
	if (lexs->type > 0)
	hladdword(prs, lexs->lemm, lexs->lenlemm, lexs->type);

	ptr = norms;
	savedpos = prs->vectorpos;
	while (ptr && ptr->lexeme)
	{
	if (ptr->flags & TSL_ADDPOS)
	savedpos++;
	hlfinditem(prs, query, savedpos, ptr->lexeme, strlen(ptr->lexeme));
	ptr++;
	}

	tmplexs = lexs->next;
	pfree(lexs);
	lexs = tmplexs;
	}

	if (norms)
	{
	ptr = norms;
	while (ptr->lexeme)
	{
	if (ptr->flags & TSL_ADDPOS)
	prs->vectorpos++;
	pfree(ptr->lexeme);
	ptr++;
	}
	pfree(norms);
	}
	}

	void
	hlparsetext(Oid cfgId, HeadlineParsedText prs, TSQuery query, char buf, int buflen)
	{
	int type,
	lenlemm;
	char *lemm = NULL;
	LexizeData ldata;
	TSLexeme *norms;
	ParsedLex *lexs;
	TSConfigCacheEntry *cfg;
	TSParserCacheEntry *prsobj;
	void *prsdata;

	cfg = lookup_ts_config_cache(cfgId);
	prsobj = lookup_ts_parser_cache(cfg->prsId);

	prsdata = (void *) DatumGetPointer(FunctionCall2(&(prsobj->prsstart),
	PointerGetDatum(buf),
	Int32GetDatum(buflen)));

	LexizeInit(&ldata, cfg);

	do
	{
	type = DatumGetInt32(FunctionCall3(&(prsobj->prstoken),
	PointerGetDatum(prsdata),
	PointerGetDatum(&lemm),
	PointerGetDatum(&lenlemm)));

	if (type > 0 && lenlemm >= MAXSTRLEN)
	{
	#ifdef IGNORE_LONGLEXEME
	ereport(NOTICE,
	(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
	errmsg("word is too long to be indexed"),
	errdetail("Words longer than %d characters are ignored.",
	MAXSTRLEN)));
	continue;
	#else
	ereport(ERROR,
	(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
	errmsg("word is too long to be indexed"),
	errdetail("Words longer than %d characters are ignored.",
	MAXSTRLEN)));
	#endif
	}

	LexizeAddLemm(&ldata, type, lemm, lenlemm);

	do
	{
	if ((norms = LexizeExec(&ldata, &lexs)) != NULL)
	{
	prs->vectorpos++;
	addHLParsedLex(prs, query, lexs, norms);
	}
	else
	addHLParsedLex(prs, query, lexs, NULL);
	} while (norms);

	} while (type > 0);

	FunctionCall1(&(prsobj->prsend), PointerGetDatum(prsdata));
	}

	text *
	generateHeadline(HeadlineParsedText *prs)
	{
	text *out;
	char *ptr;
	int len = 128;
	int numfragments = 0;
	int16 infrag = 0;

	HeadlineWordEntry *wrd = prs->words;

	out = (text *) palloc(len);
	ptr = ((char *) out) + VARHDRSZ;

	while (wrd - prs->words < prs->curwords)
	{
	while (wrd->len + prs->stopsellen + prs->startsellen + prs->fragdelimlen + (ptr - ((char *) out)) >= len)
	{
	int dist = ptr - ((char *) out);

	len *= 2;
	out = (text *) repalloc(out, len);
	ptr = ((char *) out) + dist;
	}

	if (wrd->in && !wrd->repeated)
	{
	if (!infrag)
	{

	/* start of a new fragment */
	infrag = 1;
	numfragments++;
	/* add a fragment delimiter if this is after the first one */
	if (numfragments > 1)
	{
	memcpy(ptr, prs->fragdelim, prs->fragdelimlen);
	ptr += prs->fragdelimlen;
	}

	}
	if (wrd->replace)
	{
	*ptr = ' ';
	ptr++;
	}
	else if (!wrd->skip)
	{
	if (wrd->selected)
	{
	memcpy(ptr, prs->startsel, prs->startsellen);
	ptr += prs->startsellen;
	}
	memcpy(ptr, wrd->word, wrd->len);
	ptr += wrd->len;
	if (wrd->selected)
	{
	memcpy(ptr, prs->stopsel, prs->stopsellen);
	ptr += prs->stopsellen;
	}
	}
	}
	else if (!wrd->repeated)
	{
	if (infrag)
	infrag = 0;
	pfree(wrd->word);
	}

	wrd++;
	}

	SET_VARSIZE(out, ptr - ((char *) out));
	return out;
	}