src/backend/utils/adt/tsvector_op.c - cloudberry - Git at Google

 /*-------------------------------------------------------------------------
  *
  * tsvector_op.c
  *	  operations over tsvector
  *
  * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
  *
  *
  * IDENTIFICATION
  *	  src/backend/utils/adt/tsvector_op.c
  *
  *-------------------------------------------------------------------------
  */
 #include "postgres.h"

 #include <limits.h>

 #include "access/htup_details.h"
 #include "catalog/namespace.h"
 #include "catalog/pg_type.h"
 #include "commands/trigger.h"
 #include "executor/spi.h"
 #include "funcapi.h"
 #include "lib/qunique.h"
 #include "mb/pg_wchar.h"
 #include "miscadmin.h"
 #include "parser/parse_coerce.h"
 #include "tsearch/ts_utils.h"
 #include "utils/array.h"
 #include "utils/builtins.h"
 #include "utils/lsyscache.h"
 #include "utils/regproc.h"
 #include "utils/rel.h"


 typedef struct
 {
 	WordEntry  *arrb;
 	WordEntry  *arre;
 	char	   *values;
 	char	   *operand;
 } CHKVAL;


 typedef struct StatEntry
 {
 	uint32		ndoc;			/* zero indicates that we were already here
 								 * while walking through the tree */
 	uint32		nentry;
 	struct StatEntry *left;
 	struct StatEntry *right;
 	uint32		lenlexeme;
 	char		lexeme[FLEXIBLE_ARRAY_MEMBER];
 } StatEntry;

 #define STATENTRYHDRSZ	(offsetof(StatEntry, lexeme))

 typedef struct
 {
 	int32		weight;

 	uint32		maxdepth;

 	StatEntry **stack;
 	uint32		stackpos;

 	StatEntry  *root;
 } TSVectorStat;


 static TSTernaryValue TS_execute_recurse(QueryItem *curitem, void *arg,
 										 uint32 flags,
 										 TSExecuteCallback chkcond);
 static bool TS_execute_locations_recurse(QueryItem *curitem,
 										 void *arg,
 										 TSExecuteCallback chkcond,
 										 List **locations);
 static int	tsvector_bsearch(const TSVector tsv, char *lexeme, int lexeme_len);
 static Datum tsvector_update_trigger(PG_FUNCTION_ARGS, bool config_column);


 /*
  * Order: haspos, len, word, for all positions (pos, weight)
  */
 static int
 silly_cmp_tsvector(const TSVector a, const TSVector b)
 {
 	if (VARSIZE(a) < VARSIZE(b))
 		return -1;
 	else if (VARSIZE(a) > VARSIZE(b))
 		return 1;
 	else if (a->size < b->size)
 		return -1;
 	else if (a->size > b->size)
 		return 1;
 	else
 	{
 		WordEntry  *aptr = ARRPTR(a);
 		WordEntry  *bptr = ARRPTR(b);
 		int			i = 0;
 		int			res;


 		for (i = 0; i < a->size; i++)
 		{
 			if (aptr->haspos != bptr->haspos)
 			{
 				return (aptr->haspos > bptr->haspos) ? -1 : 1;
 			}
 			else if ((res = tsCompareString(STRPTR(a) + aptr->pos, aptr->len, STRPTR(b) + bptr->pos, bptr->len, false)) != 0)
 			{
 				return res;
 			}
 			else if (aptr->haspos)
 			{
 				WordEntryPos *ap = POSDATAPTR(a, aptr);
 				WordEntryPos *bp = POSDATAPTR(b, bptr);
 				int			j;

 				if (POSDATALEN(a, aptr) != POSDATALEN(b, bptr))
 					return (POSDATALEN(a, aptr) > POSDATALEN(b, bptr)) ? -1 : 1;

 				for (j = 0; j < POSDATALEN(a, aptr); j++)
 				{
 					if (WEP_GETPOS(*ap) != WEP_GETPOS(*bp))
 					{
 						return (WEP_GETPOS(*ap) > WEP_GETPOS(*bp)) ? -1 : 1;
 					}
 					else if (WEP_GETWEIGHT(*ap) != WEP_GETWEIGHT(*bp))
 					{
 						return (WEP_GETWEIGHT(*ap) > WEP_GETWEIGHT(*bp)) ? -1 : 1;
 					}
 					ap++, bp++;
 				}
 			}

 			aptr++;
 			bptr++;
 		}
 	}

 	return 0;
 }

 #define TSVECTORCMPFUNC( type, action, ret )			\
 Datum													\
 tsvector_##type(PG_FUNCTION_ARGS)						\
 {														\
 	TSVector	a = PG_GETARG_TSVECTOR(0);				\
 	TSVector	b = PG_GETARG_TSVECTOR(1);				\
 	int			res = silly_cmp_tsvector(a, b);			\
 	PG_FREE_IF_COPY(a,0);								\
 	PG_FREE_IF_COPY(b,1);								\
 	PG_RETURN_##ret( res action 0 );					\
 }	\
 /* keep compiler quiet - no extra ; */					\
 extern int no_such_variable

 TSVECTORCMPFUNC(lt, <, BOOL);
 TSVECTORCMPFUNC(le, <=, BOOL);
 TSVECTORCMPFUNC(eq, ==, BOOL);
 TSVECTORCMPFUNC(ge, >=, BOOL);
 TSVECTORCMPFUNC(gt, >, BOOL);
 TSVECTORCMPFUNC(ne, !=, BOOL);
 TSVECTORCMPFUNC(cmp, +, INT32);

 Datum
 tsvector_strip(PG_FUNCTION_ARGS)
 {
 	TSVector	in = PG_GETARG_TSVECTOR(0);
 	TSVector	out;
 	int			i,
 				len = 0;
 	WordEntry  *arrin = ARRPTR(in),
 			   *arrout;
 	char	   *cur;

 	for (i = 0; i < in->size; i++)
 		len += arrin[i].len;

 	len = CALCDATASIZE(in->size, len);
 	out = (TSVector) palloc0(len);
 	SET_VARSIZE(out, len);
 	out->size = in->size;
 	arrout = ARRPTR(out);
 	cur = STRPTR(out);
 	for (i = 0; i < in->size; i++)
 	{
 		memcpy(cur, STRPTR(in) + arrin[i].pos, arrin[i].len);
 		arrout[i].haspos = 0;
 		arrout[i].len = arrin[i].len;
 		arrout[i].pos = cur - STRPTR(out);
 		cur += arrout[i].len;
 	}

 	PG_FREE_IF_COPY(in, 0);
 	PG_RETURN_POINTER(out);
 }

 Datum
 tsvector_length(PG_FUNCTION_ARGS)
 {
 	TSVector	in = PG_GETARG_TSVECTOR(0);
 	int32		ret = in->size;

 	PG_FREE_IF_COPY(in, 0);
 	PG_RETURN_INT32(ret);
 }

 Datum
 tsvector_setweight(PG_FUNCTION_ARGS)
 {
 	TSVector	in = PG_GETARG_TSVECTOR(0);
 	char		cw = PG_GETARG_CHAR(1);
 	TSVector	out;
 	int			i,
 				j;
 	WordEntry  *entry;
 	WordEntryPos *p;
 	int			w = 0;

 	switch (cw)
 	{
 		case 'A':
 		case 'a':
 			w = 3;
 			break;
 		case 'B':
 		case 'b':
 			w = 2;
 			break;
 		case 'C':
 		case 'c':
 			w = 1;
 			break;
 		case 'D':
 		case 'd':
 			w = 0;
 			break;
 		default:
 			/* internal error */
 			elog(ERROR, "unrecognized weight: %d", cw);
 	}

 	out = (TSVector) palloc(VARSIZE(in));
 	memcpy(out, in, VARSIZE(in));
 	entry = ARRPTR(out);
 	i = out->size;
 	while (i--)
 	{
 		if ((j = POSDATALEN(out, entry)) != 0)
 		{
 			p = POSDATAPTR(out, entry);
 			while (j--)
 			{
 				WEP_SETWEIGHT(*p, w);
 				p++;
 			}
 		}
 		entry++;
 	}

 	PG_FREE_IF_COPY(in, 0);
 	PG_RETURN_POINTER(out);
 }

 /*
  * setweight(tsin tsvector, char_weight "char", lexemes "text"[])
  *
  * Assign weight w to elements of tsin that are listed in lexemes.
  */
 Datum
 tsvector_setweight_by_filter(PG_FUNCTION_ARGS)
 {
 	TSVector	tsin = PG_GETARG_TSVECTOR(0);
 	char		char_weight = PG_GETARG_CHAR(1);
 	ArrayType  *lexemes = PG_GETARG_ARRAYTYPE_P(2);

 	TSVector	tsout;
 	int			i,
 				j,
 				nlexemes,
 				weight;
 	WordEntry  *entry;
 	Datum	   *dlexemes;
 	bool	   *nulls;

 	switch (char_weight)
 	{
 		case 'A':
 		case 'a':
 			weight = 3;
 			break;
 		case 'B':
 		case 'b':
 			weight = 2;
 			break;
 		case 'C':
 		case 'c':
 			weight = 1;
 			break;
 		case 'D':
 		case 'd':
 			weight = 0;
 			break;
 		default:
 			/* internal error */
 			elog(ERROR, "unrecognized weight: %c", char_weight);
 	}

 	tsout = (TSVector) palloc(VARSIZE(tsin));
 	memcpy(tsout, tsin, VARSIZE(tsin));
 	entry = ARRPTR(tsout);

 	deconstruct_array_builtin(lexemes, TEXTOID, &dlexemes, &nulls, &nlexemes);

 	/*
 	 * Assuming that lexemes array is significantly shorter than tsvector we
 	 * can iterate through lexemes performing binary search of each lexeme
 	 * from lexemes in tsvector.
 	 */
 	for (i = 0; i < nlexemes; i++)
 	{
 		char	   *lex;
 		int			lex_len,
 					lex_pos;

 		/* Ignore null array elements, they surely don't match */
 		if (nulls[i])
 			continue;

 		lex = VARDATA(dlexemes[i]);
 		lex_len = VARSIZE(dlexemes[i]) - VARHDRSZ;
 		lex_pos = tsvector_bsearch(tsout, lex, lex_len);

 		if (lex_pos >= 0 && (j = POSDATALEN(tsout, entry + lex_pos)) != 0)
 		{
 			WordEntryPos *p = POSDATAPTR(tsout, entry + lex_pos);

 			while (j--)
 			{
 				WEP_SETWEIGHT(*p, weight);
 				p++;
 			}
 		}
 	}

 	PG_FREE_IF_COPY(tsin, 0);
 	PG_FREE_IF_COPY(lexemes, 2);

 	PG_RETURN_POINTER(tsout);
 }

 #define compareEntry(pa, a, pb, b) \
 	tsCompareString((pa) + (a)->pos, (a)->len,	\
 					(pb) + (b)->pos, (b)->len,	\
 					false)

 /*
  * Add positions from src to dest after offsetting them by maxpos.
  * Return the number added (might be less than expected due to overflow)
  */
 static int32
 add_pos(TSVector src, WordEntry *srcptr,
 		TSVector dest, WordEntry *destptr,
 		int32 maxpos)
 {
 	uint16	   *clen = &_POSVECPTR(dest, destptr)->npos;
 	int			i;
 	uint16		slen = POSDATALEN(src, srcptr),
 				startlen;
 	WordEntryPos *spos = POSDATAPTR(src, srcptr),
 			   *dpos = POSDATAPTR(dest, destptr);

 	if (!destptr->haspos)
 		*clen = 0;

 	startlen = *clen;
 	for (i = 0;
 		 i < slen && *clen < MAXNUMPOS &&
 		 (*clen == 0 || WEP_GETPOS(dpos[*clen - 1]) != MAXENTRYPOS - 1);
 		 i++)
 	{
 		WEP_SETWEIGHT(dpos[*clen], WEP_GETWEIGHT(spos[i]));
 		WEP_SETPOS(dpos[*clen], LIMITPOS(WEP_GETPOS(spos[i]) + maxpos));
 		(*clen)++;
 	}

 	if (*clen != startlen)
 		destptr->haspos = 1;
 	return *clen - startlen;
 }

 /*
  * Perform binary search of given lexeme in TSVector.
  * Returns lexeme position in TSVector's entry array or -1 if lexeme wasn't
  * found.
  */
 static int
 tsvector_bsearch(const TSVector tsv, char *lexeme, int lexeme_len)
 {
 	WordEntry  *arrin = ARRPTR(tsv);
 	int			StopLow = 0,
 				StopHigh = tsv->size,
 				StopMiddle,
 				cmp;

 	while (StopLow < StopHigh)
 	{
 		StopMiddle = (StopLow + StopHigh) / 2;

 		cmp = tsCompareString(lexeme, lexeme_len,
 							  STRPTR(tsv) + arrin[StopMiddle].pos,
 							  arrin[StopMiddle].len,
 							  false);

 		if (cmp < 0)
 			StopHigh = StopMiddle;
 		else if (cmp > 0)
 			StopLow = StopMiddle + 1;
 		else					/* found it */
 			return StopMiddle;
 	}

 	return -1;
 }

 /*
  * qsort comparator functions
  */

 static int
 compare_int(const void *va, const void *vb)
 {
 	int			a = *((const int *) va);
 	int			b = *((const int *) vb);

 	if (a == b)
 		return 0;
 	return (a > b) ? 1 : -1;
 }

 static int
 compare_text_lexemes(const void *va, const void *vb)
 {
 	Datum		a = *((const Datum *) va);
 	Datum		b = *((const Datum *) vb);
 	char	   *alex = VARDATA_ANY(a);
 	int			alex_len = VARSIZE_ANY_EXHDR(a);
 	char	   *blex = VARDATA_ANY(b);
 	int			blex_len = VARSIZE_ANY_EXHDR(b);

 	return tsCompareString(alex, alex_len, blex, blex_len, false);
 }

 /*
  * Internal routine to delete lexemes from TSVector by array of offsets.
  *
  * int *indices_to_delete -- array of lexeme offsets to delete (modified here!)
  * int indices_count -- size of that array
  *
  * Returns new TSVector without given lexemes along with their positions
  * and weights.
  */
 static TSVector
 tsvector_delete_by_indices(TSVector tsv, int *indices_to_delete,
 						   int indices_count)
 {
 	TSVector	tsout;
 	WordEntry  *arrin = ARRPTR(tsv),
 			   *arrout;
 	char	   *data = STRPTR(tsv),
 			   *dataout;
 	int			i,				/* index in arrin */
 				j,				/* index in arrout */
 				k,				/* index in indices_to_delete */
 				curoff;			/* index in dataout area */

 	/*
 	 * Sort the filter array to simplify membership checks below.  Also, get
 	 * rid of any duplicate entries, so that we can assume that indices_count
 	 * is exactly equal to the number of lexemes that will be removed.
 	 */
 	if (indices_count > 1)
 	{
 		qsort(indices_to_delete, indices_count, sizeof(int), compare_int);
 		indices_count = qunique(indices_to_delete, indices_count, sizeof(int),
 								compare_int);
 	}

 	/*
 	 * Here we overestimate tsout size, since we don't know how much space is
 	 * used by the deleted lexeme(s).  We will set exact size below.
 	 */
 	tsout = (TSVector) palloc0(VARSIZE(tsv));

 	/* This count must be correct because STRPTR(tsout) relies on it. */
 	tsout->size = tsv->size - indices_count;

 	/*
 	 * Copy tsv to tsout, skipping lexemes listed in indices_to_delete.
 	 */
 	arrout = ARRPTR(tsout);
 	dataout = STRPTR(tsout);
 	curoff = 0;
 	for (i = j = k = 0; i < tsv->size; i++)
 	{
 		/*
 		 * If current i is present in indices_to_delete, skip this lexeme.
 		 * Since indices_to_delete is already sorted, we only need to check
 		 * the current (k'th) entry.
 		 */
 		if (k < indices_count && i == indices_to_delete[k])
 		{
 			k++;
 			continue;
 		}

 		/* Copy lexeme and its positions and weights */
 		memcpy(dataout + curoff, data + arrin[i].pos, arrin[i].len);
 		arrout[j].haspos = arrin[i].haspos;
 		arrout[j].len = arrin[i].len;
 		arrout[j].pos = curoff;
 		curoff += arrin[i].len;
 		if (arrin[i].haspos)
 		{
 			int			len = POSDATALEN(tsv, arrin + i) * sizeof(WordEntryPos)
 				+ sizeof(uint16);

 			curoff = SHORTALIGN(curoff);
 			memcpy(dataout + curoff,
 				   STRPTR(tsv) + SHORTALIGN(arrin[i].pos + arrin[i].len),
 				   len);
 			curoff += len;
 		}

 		j++;
 	}

 	/*
 	 * k should now be exactly equal to indices_count. If it isn't then the
 	 * caller provided us with indices outside of [0, tsv->size) range and
 	 * estimation of tsout's size is wrong.
 	 */
 	Assert(k == indices_count);

 	SET_VARSIZE(tsout, CALCDATASIZE(tsout->size, curoff));
 	return tsout;
 }

 /*
  * Delete given lexeme from tsvector.
  * Implementation of user-level ts_delete(tsvector, text).
  */
 Datum
 tsvector_delete_str(PG_FUNCTION_ARGS)
 {
 	TSVector	tsin = PG_GETARG_TSVECTOR(0),
 				tsout;
 	text	   *tlexeme = PG_GETARG_TEXT_PP(1);
 	char	   *lexeme = VARDATA_ANY(tlexeme);
 	int			lexeme_len = VARSIZE_ANY_EXHDR(tlexeme),
 				skip_index;

 	if ((skip_index = tsvector_bsearch(tsin, lexeme, lexeme_len)) == -1)
 		PG_RETURN_POINTER(tsin);

 	tsout = tsvector_delete_by_indices(tsin, &skip_index, 1);

 	PG_FREE_IF_COPY(tsin, 0);
 	PG_FREE_IF_COPY(tlexeme, 1);
 	PG_RETURN_POINTER(tsout);
 }

 /*
  * Delete given array of lexemes from tsvector.
  * Implementation of user-level ts_delete(tsvector, text[]).
  */
 Datum
 tsvector_delete_arr(PG_FUNCTION_ARGS)
 {
 	TSVector	tsin = PG_GETARG_TSVECTOR(0),
 				tsout;
 	ArrayType  *lexemes = PG_GETARG_ARRAYTYPE_P(1);
 	int			i,
 				nlex,
 				skip_count,
 			   *skip_indices;
 	Datum	   *dlexemes;
 	bool	   *nulls;

 	deconstruct_array_builtin(lexemes, TEXTOID, &dlexemes, &nulls, &nlex);

 	/*
 	 * In typical use case array of lexemes to delete is relatively small. So
 	 * here we optimize things for that scenario: iterate through lexarr
 	 * performing binary search of each lexeme from lexarr in tsvector.
 	 */
 	skip_indices = palloc0(nlex * sizeof(int));
 	for (i = skip_count = 0; i < nlex; i++)
 	{
 		char	   *lex;
 		int			lex_len,
 					lex_pos;

 		/* Ignore null array elements, they surely don't match */
 		if (nulls[i])
 			continue;

 		lex = VARDATA(dlexemes[i]);
 		lex_len = VARSIZE(dlexemes[i]) - VARHDRSZ;
 		lex_pos = tsvector_bsearch(tsin, lex, lex_len);

 		if (lex_pos >= 0)
 			skip_indices[skip_count++] = lex_pos;
 	}

 	tsout = tsvector_delete_by_indices(tsin, skip_indices, skip_count);

 	pfree(skip_indices);
 	PG_FREE_IF_COPY(tsin, 0);
 	PG_FREE_IF_COPY(lexemes, 1);

 	PG_RETURN_POINTER(tsout);
 }

 /*
  * Expand tsvector as table with following columns:
  *	   lexeme: lexeme text
  *	   positions: integer array of lexeme positions
  *	   weights: char array of weights corresponding to positions
  */
 Datum
 tsvector_unnest(PG_FUNCTION_ARGS)
 {
 	FuncCallContext *funcctx;
 	TSVector	tsin;

 	if (SRF_IS_FIRSTCALL())
 	{
 		MemoryContext oldcontext;
 		TupleDesc	tupdesc;

 		funcctx = SRF_FIRSTCALL_INIT();
 		oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);

 		tupdesc = CreateTemplateTupleDesc(3);
 		TupleDescInitEntry(tupdesc, (AttrNumber) 1, "lexeme",
 						   TEXTOID, -1, 0);
 		TupleDescInitEntry(tupdesc, (AttrNumber) 2, "positions",
 						   INT2ARRAYOID, -1, 0);
 		TupleDescInitEntry(tupdesc, (AttrNumber) 3, "weights",
 						   TEXTARRAYOID, -1, 0);
 		if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
 			elog(ERROR, "return type must be a row type");
 		funcctx->tuple_desc = tupdesc;

 		funcctx->user_fctx = PG_GETARG_TSVECTOR_COPY(0);

 		MemoryContextSwitchTo(oldcontext);
 	}

 	funcctx = SRF_PERCALL_SETUP();
 	tsin = (TSVector) funcctx->user_fctx;

 	if (funcctx->call_cntr < tsin->size)
 	{
 		WordEntry  *arrin = ARRPTR(tsin);
 		char	   *data = STRPTR(tsin);
 		HeapTuple	tuple;
 		int			j,
 					i = funcctx->call_cntr;
 		bool		nulls[] = {false, false, false};
 		Datum		values[3];

 		values[0] = PointerGetDatum(cstring_to_text_with_len(data + arrin[i].pos, arrin[i].len));

 		if (arrin[i].haspos)
 		{
 			WordEntryPosVector *posv;
 			Datum	   *positions;
 			Datum	   *weights;
 			char		weight;

 			/*
 			 * Internally tsvector stores position and weight in the same
 			 * uint16 (2 bits for weight, 14 for position). Here we extract
 			 * that in two separate arrays.
 			 */
 			posv = _POSVECPTR(tsin, arrin + i);
 			positions = palloc(posv->npos * sizeof(Datum));
 			weights = palloc(posv->npos * sizeof(Datum));
 			for (j = 0; j < posv->npos; j++)
 			{
 				positions[j] = Int16GetDatum(WEP_GETPOS(posv->pos[j]));
 				weight = 'D' - WEP_GETWEIGHT(posv->pos[j]);
 				weights[j] = PointerGetDatum(cstring_to_text_with_len(&weight,
 																	  1));
 			}

 			values[1] = PointerGetDatum(construct_array_builtin(positions, posv->npos, INT2OID));
 			values[2] = PointerGetDatum(construct_array_builtin(weights, posv->npos, TEXTOID));
 		}
 		else
 		{
 			nulls[1] = nulls[2] = true;
 		}

 		tuple = heap_form_tuple(funcctx->tuple_desc, values, nulls);
 		SRF_RETURN_NEXT(funcctx, HeapTupleGetDatum(tuple));
 	}
 	else
 	{
 		SRF_RETURN_DONE(funcctx);
 	}
 }

 /*
  * Convert tsvector to array of lexemes.
  */
 Datum
 tsvector_to_array(PG_FUNCTION_ARGS)
 {
 	TSVector	tsin = PG_GETARG_TSVECTOR(0);
 	WordEntry  *arrin = ARRPTR(tsin);
 	Datum	   *elements;
 	int			i;
 	ArrayType  *array;

 	elements = palloc(tsin->size * sizeof(Datum));

 	for (i = 0; i < tsin->size; i++)
 	{
 		elements[i] = PointerGetDatum(cstring_to_text_with_len(STRPTR(tsin) + arrin[i].pos,
 															   arrin[i].len));
 	}

 	array = construct_array_builtin(elements, tsin->size, TEXTOID);

 	pfree(elements);
 	PG_FREE_IF_COPY(tsin, 0);
 	PG_RETURN_POINTER(array);
 }

 /*
  * Build tsvector from array of lexemes.
  */
 Datum
 array_to_tsvector(PG_FUNCTION_ARGS)
 {
 	ArrayType  *v = PG_GETARG_ARRAYTYPE_P(0);
 	TSVector	tsout;
 	Datum	   *dlexemes;
 	WordEntry  *arrout;
 	bool	   *nulls;
 	int			nitems,
 				i,
 				tslen,
 				datalen = 0;
 	char	   *cur;

 	deconstruct_array_builtin(v, TEXTOID, &dlexemes, &nulls, &nitems);

 	/*
 	 * Reject nulls and zero length strings (maybe we should just ignore them,
 	 * instead?)
 	 */
 	for (i = 0; i < nitems; i++)
 	{
 		if (nulls[i])
 			ereport(ERROR,
 					(errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
 					 errmsg("lexeme array may not contain nulls")));

 		if (VARSIZE(dlexemes[i]) - VARHDRSZ == 0)
 			ereport(ERROR,
 					(errcode(ERRCODE_ZERO_LENGTH_CHARACTER_STRING),
 					 errmsg("lexeme array may not contain empty strings")));
 	}

 	/* Sort and de-dup, because this is required for a valid tsvector. */
 	if (nitems > 1)
 	{
 		qsort(dlexemes, nitems, sizeof(Datum), compare_text_lexemes);
 		nitems = qunique(dlexemes, nitems, sizeof(Datum),
 						 compare_text_lexemes);
 	}

 	/* Calculate space needed for surviving lexemes. */
 	for (i = 0; i < nitems; i++)
 		datalen += VARSIZE(dlexemes[i]) - VARHDRSZ;
 	tslen = CALCDATASIZE(nitems, datalen);

 	/* Allocate and fill tsvector. */
 	tsout = (TSVector) palloc0(tslen);
 	SET_VARSIZE(tsout, tslen);
 	tsout->size = nitems;

 	arrout = ARRPTR(tsout);
 	cur = STRPTR(tsout);
 	for (i = 0; i < nitems; i++)
 	{
 		char	   *lex = VARDATA(dlexemes[i]);
 		int			lex_len = VARSIZE(dlexemes[i]) - VARHDRSZ;

 		memcpy(cur, lex, lex_len);
 		arrout[i].haspos = 0;
 		arrout[i].len = lex_len;
 		arrout[i].pos = cur - STRPTR(tsout);
 		cur += lex_len;
 	}

 	PG_FREE_IF_COPY(v, 0);
 	PG_RETURN_POINTER(tsout);
 }

 /*
  * ts_filter(): keep only lexemes with given weights in tsvector.
  */
 Datum
 tsvector_filter(PG_FUNCTION_ARGS)
 {
 	TSVector	tsin = PG_GETARG_TSVECTOR(0),
 				tsout;
 	ArrayType  *weights = PG_GETARG_ARRAYTYPE_P(1);
 	WordEntry  *arrin = ARRPTR(tsin),
 			   *arrout;
 	char	   *datain = STRPTR(tsin),
 			   *dataout;
 	Datum	   *dweights;
 	bool	   *nulls;
 	int			nweights;
 	int			i,
 				j;
 	int			cur_pos = 0;
 	char		mask = 0;

 	deconstruct_array_builtin(weights, CHAROID, &dweights, &nulls, &nweights);

 	for (i = 0; i < nweights; i++)
 	{
 		char		char_weight;

 		if (nulls[i])
 			ereport(ERROR,
 					(errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
 					 errmsg("weight array may not contain nulls")));

 		char_weight = DatumGetChar(dweights[i]);
 		switch (char_weight)
 		{
 			case 'A':
 			case 'a':
 				mask = mask | 8;
 				break;
 			case 'B':
 			case 'b':
 				mask = mask | 4;
 				break;
 			case 'C':
 			case 'c':
 				mask = mask | 2;
 				break;
 			case 'D':
 			case 'd':
 				mask = mask | 1;
 				break;
 			default:
 				ereport(ERROR,
 						(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
 						 errmsg("unrecognized weight: \"%c\"", char_weight)));
 		}
 	}

 	tsout = (TSVector) palloc0(VARSIZE(tsin));
 	tsout->size = tsin->size;
 	arrout = ARRPTR(tsout);
 	dataout = STRPTR(tsout);

 	for (i = j = 0; i < tsin->size; i++)
 	{
 		WordEntryPosVector *posvin,
 				   *posvout;
 		int			npos = 0;
 		int			k;

 		if (!arrin[i].haspos)
 			continue;

 		posvin = _POSVECPTR(tsin, arrin + i);
 		posvout = (WordEntryPosVector *)
 			(dataout + SHORTALIGN(cur_pos + arrin[i].len));

 		for (k = 0; k < posvin->npos; k++)
 		{
 			if (mask & (1 << WEP_GETWEIGHT(posvin->pos[k])))
 				posvout->pos[npos++] = posvin->pos[k];
 		}

 		/* if no satisfactory positions found, skip lexeme */
 		if (!npos)
 			continue;

 		arrout[j].haspos = true;
 		arrout[j].len = arrin[i].len;
 		arrout[j].pos = cur_pos;

 		memcpy(dataout + cur_pos, datain + arrin[i].pos, arrin[i].len);
 		posvout->npos = npos;
 		cur_pos += SHORTALIGN(arrin[i].len);
 		cur_pos += POSDATALEN(tsout, arrout + j) * sizeof(WordEntryPos) +
 			sizeof(uint16);
 		j++;
 	}

 	tsout->size = j;
 	if (dataout != STRPTR(tsout))
 		memmove(STRPTR(tsout), dataout, cur_pos);

 	SET_VARSIZE(tsout, CALCDATASIZE(tsout->size, cur_pos));

 	PG_FREE_IF_COPY(tsin, 0);
 	PG_RETURN_POINTER(tsout);
 }

 Datum
 tsvector_concat(PG_FUNCTION_ARGS)
 {
 	TSVector	in1 = PG_GETARG_TSVECTOR(0);
 	TSVector	in2 = PG_GETARG_TSVECTOR(1);
 	TSVector	out;
 	WordEntry  *ptr;
 	WordEntry  *ptr1,
 			   *ptr2;
 	WordEntryPos *p;
 	int			maxpos = 0,
 				i,
 				j,
 				i1,
 				i2,
 				dataoff,
 				output_bytes,
 				output_size;
 	char	   *data,
 			   *data1,
 			   *data2;

 	/* Get max position in in1; we'll need this to offset in2's positions */
 	ptr = ARRPTR(in1);
 	i = in1->size;
 	while (i--)
 	{
 		if ((j = POSDATALEN(in1, ptr)) != 0)
 		{
 			p = POSDATAPTR(in1, ptr);
 			while (j--)
 			{
 				if (WEP_GETPOS(*p) > maxpos)
 					maxpos = WEP_GETPOS(*p);
 				p++;
 			}
 		}
 		ptr++;
 	}

 	ptr1 = ARRPTR(in1);
 	ptr2 = ARRPTR(in2);
 	data1 = STRPTR(in1);
 	data2 = STRPTR(in2);
 	i1 = in1->size;
 	i2 = in2->size;

 	/*
 	 * Conservative estimate of space needed.  We might need all the data in
 	 * both inputs, and conceivably add a pad byte before position data for
 	 * each item where there was none before.
 	 */
 	output_bytes = VARSIZE(in1) + VARSIZE(in2) + i1 + i2;

 	out = (TSVector) palloc0(output_bytes);
 	SET_VARSIZE(out, output_bytes);

 	/*
 	 * We must make out->size valid so that STRPTR(out) is sensible.  We'll
 	 * collapse out any unused space at the end.
 	 */
 	out->size = in1->size + in2->size;

 	ptr = ARRPTR(out);
 	data = STRPTR(out);
 	dataoff = 0;
 	while (i1 && i2)
 	{
 		int			cmp = compareEntry(data1, ptr1, data2, ptr2);

 		if (cmp < 0)
 		{						/* in1 first */
 			ptr->haspos = ptr1->haspos;
 			ptr->len = ptr1->len;
 			memcpy(data + dataoff, data1 + ptr1->pos, ptr1->len);
 			ptr->pos = dataoff;
 			dataoff += ptr1->len;
 			if (ptr->haspos)
 			{
 				dataoff = SHORTALIGN(dataoff);
 				memcpy(data + dataoff, _POSVECPTR(in1, ptr1), POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16));
 				dataoff += POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16);
 			}

 			ptr++;
 			ptr1++;
 			i1--;
 		}
 		else if (cmp > 0)
 		{						/* in2 first */
 			ptr->haspos = ptr2->haspos;
 			ptr->len = ptr2->len;
 			memcpy(data + dataoff, data2 + ptr2->pos, ptr2->len);
 			ptr->pos = dataoff;
 			dataoff += ptr2->len;
 			if (ptr->haspos)
 			{
 				int			addlen = add_pos(in2, ptr2, out, ptr, maxpos);

 				if (addlen == 0)
 					ptr->haspos = 0;
 				else
 				{
 					dataoff = SHORTALIGN(dataoff);
 					dataoff += addlen * sizeof(WordEntryPos) + sizeof(uint16);
 				}
 			}

 			ptr++;
 			ptr2++;
 			i2--;
 		}
 		else
 		{
 			ptr->haspos = ptr1->haspos | ptr2->haspos;
 			ptr->len = ptr1->len;
 			memcpy(data + dataoff, data1 + ptr1->pos, ptr1->len);
 			ptr->pos = dataoff;
 			dataoff += ptr1->len;
 			if (ptr->haspos)
 			{
 				if (ptr1->haspos)
 				{
 					dataoff = SHORTALIGN(dataoff);
 					memcpy(data + dataoff, _POSVECPTR(in1, ptr1), POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16));
 					dataoff += POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16);
 					if (ptr2->haspos)
 						dataoff += add_pos(in2, ptr2, out, ptr, maxpos) * sizeof(WordEntryPos);
 				}
 				else			/* must have ptr2->haspos */
 				{
 					int			addlen = add_pos(in2, ptr2, out, ptr, maxpos);

 					if (addlen == 0)
 						ptr->haspos = 0;
 					else
 					{
 						dataoff = SHORTALIGN(dataoff);
 						dataoff += addlen * sizeof(WordEntryPos) + sizeof(uint16);
 					}
 				}
 			}

 			ptr++;
 			ptr1++;
 			ptr2++;
 			i1--;
 			i2--;
 		}
 	}

 	while (i1)
 	{
 		ptr->haspos = ptr1->haspos;
 		ptr->len = ptr1->len;
 		memcpy(data + dataoff, data1 + ptr1->pos, ptr1->len);
 		ptr->pos = dataoff;
 		dataoff += ptr1->len;
 		if (ptr->haspos)
 		{
 			dataoff = SHORTALIGN(dataoff);
 			memcpy(data + dataoff, _POSVECPTR(in1, ptr1), POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16));
 			dataoff += POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16);
 		}

 		ptr++;
 		ptr1++;
 		i1--;
 	}

 	while (i2)
 	{
 		ptr->haspos = ptr2->haspos;
 		ptr->len = ptr2->len;
 		memcpy(data + dataoff, data2 + ptr2->pos, ptr2->len);
 		ptr->pos = dataoff;
 		dataoff += ptr2->len;
 		if (ptr->haspos)
 		{
 			int			addlen = add_pos(in2, ptr2, out, ptr, maxpos);

 			if (addlen == 0)
 				ptr->haspos = 0;
 			else
 			{
 				dataoff = SHORTALIGN(dataoff);
 				dataoff += addlen * sizeof(WordEntryPos) + sizeof(uint16);
 			}
 		}

 		ptr++;
 		ptr2++;
 		i2--;
 	}

 	/*
 	 * Instead of checking each offset individually, we check for overflow of
 	 * pos fields once at the end.
 	 */
 	if (dataoff > MAXSTRPOS)
 		ereport(ERROR,
 				(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
 				 errmsg("string is too long for tsvector (%d bytes, max %d bytes)", dataoff, MAXSTRPOS)));

 	/*
 	 * Adjust sizes (asserting that we didn't overrun the original estimates)
 	 * and collapse out any unused array entries.
 	 */
 	output_size = ptr - ARRPTR(out);
 	Assert(output_size <= out->size);
 	out->size = output_size;
 	if (data != STRPTR(out))
 		memmove(STRPTR(out), data, dataoff);
 	output_bytes = CALCDATASIZE(out->size, dataoff);
 	Assert(output_bytes <= VARSIZE(out));
 	SET_VARSIZE(out, output_bytes);

 	PG_FREE_IF_COPY(in1, 0);
 	PG_FREE_IF_COPY(in2, 1);
 	PG_RETURN_POINTER(out);
 }

 /*
  * Compare two strings by tsvector rules.
  *
  * if prefix = true then it returns zero value iff b has prefix a
  */
 int32
 tsCompareString(char *a, int _lena, char *b, int _lenb, bool prefix)
 {
 	int			cmp;

 	Assert(_lena >= 0 && _lenb >= 0);
 	size_t lena = (size_t)_lena;
 	size_t lenb = (size_t)_lenb;
 	if (lena == 0)
 	{
 		if (prefix)
 			cmp = 0;			/* empty string is prefix of anything */
 		else
 			cmp = (lenb > 0) ? -1 : 0;
 	}
 	else if (lenb == 0)
 	{
 		cmp = (lena > 0) ? 1 : 0;
 	}
 	else
 	{

 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wstringop-overflow"

 		cmp = memcmp(a, b, Min((unsigned int) lena, (unsigned int) lenb));

 #pragma GCC diagnostic pop

 		if (prefix)
 		{
 			if (cmp == 0 && lena > lenb)
 				cmp = 1;		/* a is longer, so not a prefix of b */
 		}
 		else if (cmp == 0 && lena != lenb)
 		{
 			cmp = (lena < lenb) ? -1 : 1;
 		}
 	}

 	return cmp;
 }

 /*
  * Check weight info or/and fill 'data' with the required positions
  */
 static TSTernaryValue
 checkclass_str(CHKVAL *chkval, WordEntry *entry, QueryOperand *val,
 			   ExecPhraseData *data)
 {
 	TSTernaryValue result = TS_NO;

 	Assert(data == NULL || data->npos == 0);

 	if (entry->haspos)
 	{
 		WordEntryPosVector *posvec;

 		/*
 		 * We can't use the _POSVECPTR macro here because the pointer to the
 		 * tsvector's lexeme storage is already contained in chkval->values.
 		 */
 		posvec = (WordEntryPosVector *)
 			(chkval->values + SHORTALIGN(entry->pos + entry->len));

 		if (val->weight && data)
 		{
 			WordEntryPos *posvec_iter = posvec->pos;
 			WordEntryPos *dptr;

 			/*
 			 * Filter position information by weights
 			 */
 			dptr = data->pos = palloc(sizeof(WordEntryPos) * posvec->npos);
 			data->allocated = true;

 			/* Is there a position with a matching weight? */
 			while (posvec_iter < posvec->pos + posvec->npos)
 			{
 				/* If true, append this position to the data->pos */
 				if (val->weight & (1 << WEP_GETWEIGHT(*posvec_iter)))
 				{
 					*dptr = WEP_GETPOS(*posvec_iter);
 					dptr++;
 				}

 				posvec_iter++;
 			}

 			data->npos = dptr - data->pos;

 			if (data->npos > 0)
 				result = TS_YES;
 			else
 			{
 				pfree(data->pos);
 				data->pos = NULL;
 				data->allocated = false;
 			}
 		}
 		else if (val->weight)
 		{
 			WordEntryPos *posvec_iter = posvec->pos;

 			/* Is there a position with a matching weight? */
 			while (posvec_iter < posvec->pos + posvec->npos)
 			{
 				if (val->weight & (1 << WEP_GETWEIGHT(*posvec_iter)))
 				{
 					result = TS_YES;
 					break;		/* no need to go further */
 				}

 				posvec_iter++;
 			}
 		}
 		else if (data)
 		{
 			data->npos = posvec->npos;
 			data->pos = posvec->pos;
 			data->allocated = false;
 			result = TS_YES;
 		}
 		else
 		{
 			/* simplest case: no weight check, positions not needed */
 			result = TS_YES;
 		}
 	}
 	else
 	{
 		/*
 		 * Position info is lacking, so if the caller requires it, we can only
 		 * say that maybe there is a match.
 		 *
 		 * Notice, however, that we *don't* check val->weight here.
 		 * Historically, stripped tsvectors are considered to match queries
 		 * whether or not the query has a weight restriction; that's a little
 		 * dubious but we'll preserve the behavior.
 		 */
 		if (data)
 			result = TS_MAYBE;
 		else
 			result = TS_YES;
 	}

 	return result;
 }

 /*
  * TS_execute callback for matching a tsquery operand to plain tsvector data
  */
 static TSTernaryValue
 checkcondition_str(void *checkval, QueryOperand *val, ExecPhraseData *data)
 {
 	CHKVAL	   *chkval = (CHKVAL *) checkval;
 	WordEntry  *StopLow = chkval->arrb;
 	WordEntry  *StopHigh = chkval->arre;
 	WordEntry  *StopMiddle = StopHigh;
 	TSTernaryValue res = TS_NO;

 	/* Loop invariant: StopLow <= val < StopHigh */
 	while (StopLow < StopHigh)
 	{
 		int			difference;

 		StopMiddle = StopLow + (StopHigh - StopLow) / 2;
 		difference = tsCompareString(chkval->operand + val->distance,
 									 val->length,
 									 chkval->values + StopMiddle->pos,
 									 StopMiddle->len,
 									 false);

 		if (difference == 0)
 		{
 			/* Check weight info & fill 'data' with positions */
 			res = checkclass_str(chkval, StopMiddle, val, data);
 			break;
 		}
 		else if (difference > 0)
 			StopLow = StopMiddle + 1;
 		else
 			StopHigh = StopMiddle;
 	}

 	/*
 	 * If it's a prefix search, we should also consider lexemes that the
 	 * search term is a prefix of (which will necessarily immediately follow
 	 * the place we found in the above loop).  But we can skip them if there
 	 * was a definite match on the exact term AND the caller doesn't need
 	 * position info.
 	 */
 	if (val->prefix && (res != TS_YES || data))
 	{
 		WordEntryPos *allpos = NULL;
 		int			npos = 0,
 					totalpos = 0;

 		/* adjust start position for corner case */
 		if (StopLow >= StopHigh)
 			StopMiddle = StopHigh;

 		/* we don't try to re-use any data from the initial match */
 		if (data)
 		{
 			if (data->allocated)
 				pfree(data->pos);
 			data->pos = NULL;
 			data->allocated = false;
 			data->npos = 0;
 		}
 		res = TS_NO;

 		while ((res != TS_YES || data) &&
 			   StopMiddle < chkval->arre &&
 			   tsCompareString(chkval->operand + val->distance,
 							   val->length,
 							   chkval->values + StopMiddle->pos,
 							   StopMiddle->len,
 							   true) == 0)
 		{
 			TSTernaryValue subres;

 			subres = checkclass_str(chkval, StopMiddle, val, data);

 			if (subres != TS_NO)
 			{
 				if (data)
 				{
 					/*
 					 * We need to join position information
 					 */
 					if (subres == TS_MAYBE)
 					{
 						/*
 						 * No position info for this match, so we must report
 						 * MAYBE overall.
 						 */
 						res = TS_MAYBE;
 						/* forget any previous positions */
 						npos = 0;
 						/* don't leak storage */
 						if (allpos)
 							pfree(allpos);
 						break;
 					}

 					while (npos + data->npos > totalpos)
 					{
 						if (totalpos == 0)
 						{
 							totalpos = 256;
 							allpos = palloc(sizeof(WordEntryPos) * totalpos);
 						}
 						else
 						{
 							totalpos *= 2;
 							allpos = repalloc(allpos, sizeof(WordEntryPos) * totalpos);
 						}
 					}

 					memcpy(allpos + npos, data->pos, sizeof(WordEntryPos) * data->npos);
 					npos += data->npos;

 					/* don't leak storage from individual matches */
 					if (data->allocated)
 						pfree(data->pos);
 					data->pos = NULL;
 					data->allocated = false;
 					/* it's important to reset data->npos before next loop */
 					data->npos = 0;
 				}
 				else
 				{
 					/* Don't need positions, just handle YES/MAYBE */
 					if (subres == TS_YES || res == TS_NO)
 						res = subres;
 				}
 			}

 			StopMiddle++;
 		}

 		if (data && npos > 0)
 		{
 			/* Sort and make unique array of found positions */
 			data->pos = allpos;
 			qsort(data->pos, npos, sizeof(WordEntryPos), compareWordEntryPos);
 			data->npos = qunique(data->pos, npos, sizeof(WordEntryPos),
 								 compareWordEntryPos);
 			data->allocated = true;
 			res = TS_YES;
 		}
 	}

 	return res;
 }

 /*
  * Compute output position list for a tsquery operator in phrase mode.
  *
  * Merge the position lists in Ldata and Rdata as specified by "emit",
  * returning the result list into *data.  The input position lists must be
  * sorted and unique, and the output will be as well.
  *
  * data: pointer to initially-all-zeroes output struct, or NULL
  * Ldata, Rdata: input position lists
  * emit: bitmask of TSPO_XXX flags
  * Loffset: offset to be added to Ldata positions before comparing/outputting
  * Roffset: offset to be added to Rdata positions before comparing/outputting
  * max_npos: maximum possible required size of output position array
  *
  * Loffset and Roffset should not be negative, else we risk trying to output
  * negative positions, which won't fit into WordEntryPos.
  *
  * The result is boolean (TS_YES or TS_NO), but for the caller's convenience
  * we return it as TSTernaryValue.
  *
  * Returns TS_YES if any positions were emitted to *data; or if data is NULL,
  * returns TS_YES if any positions would have been emitted.
  */
 #define TSPO_L_ONLY		0x01	/* emit positions appearing only in L */
 #define TSPO_R_ONLY		0x02	/* emit positions appearing only in R */
 #define TSPO_BOTH		0x04	/* emit positions appearing in both L&R */

 static TSTernaryValue
 TS_phrase_output(ExecPhraseData *data,
 				 ExecPhraseData *Ldata,
 				 ExecPhraseData *Rdata,
 				 int emit,
 				 int Loffset,
 				 int Roffset,
 				 int max_npos)
 {
 	int			Lindex,
 				Rindex;

 	/* Loop until both inputs are exhausted */
 	Lindex = Rindex = 0;
 	while (Lindex < Ldata->npos || Rindex < Rdata->npos)
 	{
 		int			Lpos,
 					Rpos;
 		int			output_pos = 0;

 		/*
 		 * Fetch current values to compare.  WEP_GETPOS() is needed because
 		 * ExecPhraseData->data can point to a tsvector's WordEntryPosVector.
 		 */
 		if (Lindex < Ldata->npos)
 			Lpos = WEP_GETPOS(Ldata->pos[Lindex]) + Loffset;
 		else
 		{
 			/* L array exhausted, so we're done if R_ONLY isn't set */
 			if (!(emit & TSPO_R_ONLY))
 				break;
 			Lpos = INT_MAX;
 		}
 		if (Rindex < Rdata->npos)
 			Rpos = WEP_GETPOS(Rdata->pos[Rindex]) + Roffset;
 		else
 		{
 			/* R array exhausted, so we're done if L_ONLY isn't set */
 			if (!(emit & TSPO_L_ONLY))
 				break;
 			Rpos = INT_MAX;
 		}

 		/* Merge-join the two input lists */
 		if (Lpos < Rpos)
 		{
 			/* Lpos is not matched in Rdata, should we output it? */
 			if (emit & TSPO_L_ONLY)
 				output_pos = Lpos;
 			Lindex++;
 		}
 		else if (Lpos == Rpos)
 		{
 			/* Lpos and Rpos match ... should we output it? */
 			if (emit & TSPO_BOTH)
 				output_pos = Rpos;
 			Lindex++;
 			Rindex++;
 		}
 		else					/* Lpos > Rpos */
 		{
 			/* Rpos is not matched in Ldata, should we output it? */
 			if (emit & TSPO_R_ONLY)
 				output_pos = Rpos;
 			Rindex++;
 		}

 		if (output_pos > 0)
 		{
 			if (data)
 			{
 				/* Store position, first allocating output array if needed */
 				if (data->pos == NULL)
 				{
 					data->pos = (WordEntryPos *)
 						palloc(max_npos * sizeof(WordEntryPos));
 					data->allocated = true;
 				}
 				data->pos[data->npos++] = output_pos;
 			}
 			else
 			{
 				/*
 				 * Exact positions not needed, so return TS_YES as soon as we
 				 * know there is at least one.
 				 */
 				return TS_YES;
 			}
 		}
 	}

 	if (data && data->npos > 0)
 	{
 		/* Let's assert we didn't overrun the array */
 		Assert(data->npos <= max_npos);
 		return TS_YES;
 	}
 	return TS_NO;
 }

 /*
  * Execute tsquery at or below an OP_PHRASE operator.
  *
  * This handles tsquery execution at recursion levels where we need to care
  * about match locations.
  *
  * In addition to the same arguments used for TS_execute, the caller may pass
  * a preinitialized-to-zeroes ExecPhraseData struct, to be filled with lexeme
  * match position info on success.  data == NULL if no position data need be
  * returned.
  * Note: the function assumes data != NULL for operators other than OP_PHRASE.
  * This is OK because an outside call always starts from an OP_PHRASE node,
  * and all internal recursion cases pass data != NULL.
  *
  * The detailed semantics of the match data, given that the function returned
  * TS_YES (successful match), are:
  *
  * npos > 0, negate = false:
  *	 query is matched at specified position(s) (and only those positions)
  * npos > 0, negate = true:
  *	 query is matched at all positions *except* specified position(s)
  * npos = 0, negate = true:
  *	 query is matched at all positions
  * npos = 0, negate = false:
  *	 disallowed (this should result in TS_NO or TS_MAYBE, as appropriate)
  *
  * Successful matches also return a "width" value which is the match width in
  * lexemes, less one.  Hence, "width" is zero for simple one-lexeme matches,
  * and is the sum of the phrase operator distances for phrase matches.  Note
  * that when width > 0, the listed positions represent the ends of matches not
  * the starts.  (This unintuitive rule is needed to avoid possibly generating
  * negative positions, which wouldn't fit into the WordEntryPos arrays.)
  *
  * If the TSExecuteCallback function reports that an operand is present
  * but fails to provide position(s) for it, we will return TS_MAYBE when
  * it is possible but not certain that the query is matched.
  *
  * When the function returns TS_NO or TS_MAYBE, it must return npos = 0,
  * negate = false (which is the state initialized by the caller); but the
  * "width" output in such cases is undefined.
  */
 static TSTernaryValue
 TS_phrase_execute(QueryItem *curitem, void *arg, uint32 flags,
 				  TSExecuteCallback chkcond,
 				  ExecPhraseData *data)
 {
 	ExecPhraseData Ldata,
 				Rdata;
 	TSTernaryValue lmatch,
 				rmatch;
 	int			Loffset,
 				Roffset,
 				maxwidth;

 	/* since this function recurses, it could be driven to stack overflow */
 	check_stack_depth();

 	/* ... and let's check for query cancel while we're at it */
 	CHECK_FOR_INTERRUPTS();

 	if (curitem->type == QI_VAL)
 		return chkcond(arg, (QueryOperand *) curitem, data);

 	switch (curitem->qoperator.oper)
 	{
 		case OP_NOT:

 			/*
 			 * We need not touch data->width, since a NOT operation does not
 			 * change the match width.
 			 */
 			if (flags & TS_EXEC_SKIP_NOT)
 			{
 				/* with SKIP_NOT, report NOT as "match everywhere" */
 				Assert(data->npos == 0 && !data->negate);
 				data->negate = true;
 				return TS_YES;
 			}
 			switch (TS_phrase_execute(curitem + 1, arg, flags, chkcond, data))
 			{
 				case TS_NO:
 					/* change "match nowhere" to "match everywhere" */
 					Assert(data->npos == 0 && !data->negate);
 					data->negate = true;
 					return TS_YES;
 				case TS_YES:
 					if (data->npos > 0)
 					{
 						/* we have some positions, invert negate flag */
 						data->negate = !data->negate;
 						return TS_YES;
 					}
 					else if (data->negate)
 					{
 						/* change "match everywhere" to "match nowhere" */
 						data->negate = false;
 						return TS_NO;
 					}
 					/* Should not get here if result was TS_YES */
 					Assert(false);
 					break;
 				case TS_MAYBE:
 					/* match positions are, and remain, uncertain */
 					return TS_MAYBE;
 			}
 			break;

 		case OP_PHRASE:
 		case OP_AND:
 			memset(&Ldata, 0, sizeof(Ldata));
 			memset(&Rdata, 0, sizeof(Rdata));

 			lmatch = TS_phrase_execute(curitem + curitem->qoperator.left,
 									   arg, flags, chkcond, &Ldata);
 			if (lmatch == TS_NO)
 				return TS_NO;

 			rmatch = TS_phrase_execute(curitem + 1,
 									   arg, flags, chkcond, &Rdata);
 			if (rmatch == TS_NO)
 				return TS_NO;

 			/*
 			 * If either operand has no position information, then we can't
 			 * return reliable position data, only a MAYBE result.
 			 */
 			if (lmatch == TS_MAYBE || rmatch == TS_MAYBE)
 				return TS_MAYBE;

 			if (curitem->qoperator.oper == OP_PHRASE)
 			{
 				/*
 				 * Compute Loffset and Roffset suitable for phrase match, and
 				 * compute overall width of whole phrase match.
 				 */
 				Loffset = curitem->qoperator.distance + Rdata.width;
 				Roffset = 0;
 				if (data)
 					data->width = curitem->qoperator.distance +
 						Ldata.width + Rdata.width;
 			}
 			else
 			{
 				/*
 				 * For OP_AND, set output width and alignment like OP_OR (see
 				 * comment below)
 				 */
 				maxwidth = Max(Ldata.width, Rdata.width);
 				Loffset = maxwidth - Ldata.width;
 				Roffset = maxwidth - Rdata.width;
 				if (data)
 					data->width = maxwidth;
 			}

 			if (Ldata.negate && Rdata.negate)
 			{
 				/* !L & !R: treat as !(L | R) */
 				(void) TS_phrase_output(data, &Ldata, &Rdata,
 										TSPO_BOTH | TSPO_L_ONLY | TSPO_R_ONLY,
 										Loffset, Roffset,
 										Ldata.npos + Rdata.npos);
 				if (data)
 					data->negate = true;
 				return TS_YES;
 			}
 			else if (Ldata.negate)
 			{
 				/* !L & R */
 				return TS_phrase_output(data, &Ldata, &Rdata,
 										TSPO_R_ONLY,
 										Loffset, Roffset,
 										Rdata.npos);
 			}
 			else if (Rdata.negate)
 			{
 				/* L & !R */
 				return TS_phrase_output(data, &Ldata, &Rdata,
 										TSPO_L_ONLY,
 										Loffset, Roffset,
 										Ldata.npos);
 			}
 			else
 			{
 				/* straight AND */
 				return TS_phrase_output(data, &Ldata, &Rdata,
 										TSPO_BOTH,
 										Loffset, Roffset,
 										Min(Ldata.npos, Rdata.npos));
 			}

 		case OP_OR:
 			memset(&Ldata, 0, sizeof(Ldata));
 			memset(&Rdata, 0, sizeof(Rdata));

 			lmatch = TS_phrase_execute(curitem + curitem->qoperator.left,
 									   arg, flags, chkcond, &Ldata);
 			rmatch = TS_phrase_execute(curitem + 1,
 									   arg, flags, chkcond, &Rdata);

 			if (lmatch == TS_NO && rmatch == TS_NO)
 				return TS_NO;

 			/*
 			 * If either operand has no position information, then we can't
 			 * return reliable position data, only a MAYBE result.
 			 */
 			if (lmatch == TS_MAYBE || rmatch == TS_MAYBE)
 				return TS_MAYBE;

 			/*
 			 * Cope with undefined output width from failed submatch.  (This
 			 * takes less code than trying to ensure that all failure returns
 			 * set data->width to zero.)
 			 */
 			if (lmatch == TS_NO)
 				Ldata.width = 0;
 			if (rmatch == TS_NO)
 				Rdata.width = 0;

 			/*
 			 * For OP_AND and OP_OR, report the width of the wider of the two
 			 * inputs, and align the narrower input's positions to the right
 			 * end of that width.  This rule deals at least somewhat
 			 * reasonably with cases like "x <-> (y | z <-> q)".
 			 */
 			maxwidth = Max(Ldata.width, Rdata.width);
 			Loffset = maxwidth - Ldata.width;
 			Roffset = maxwidth - Rdata.width;
 			data->width = maxwidth;

 			if (Ldata.negate && Rdata.negate)
 			{
 				/* !L | !R: treat as !(L & R) */
 				(void) TS_phrase_output(data, &Ldata, &Rdata,
 										TSPO_BOTH,
 										Loffset, Roffset,
 										Min(Ldata.npos, Rdata.npos));
 				data->negate = true;
 				return TS_YES;
 			}
 			else if (Ldata.negate)
 			{
 				/* !L | R: treat as !(L & !R) */
 				(void) TS_phrase_output(data, &Ldata, &Rdata,
 										TSPO_L_ONLY,
 										Loffset, Roffset,
 										Ldata.npos);
 				data->negate = true;
 				return TS_YES;
 			}
 			else if (Rdata.negate)
 			{
 				/* L | !R: treat as !(!L & R) */
 				(void) TS_phrase_output(data, &Ldata, &Rdata,
 										TSPO_R_ONLY,
 										Loffset, Roffset,
 										Rdata.npos);
 				data->negate = true;
 				return TS_YES;
 			}
 			else
 			{
 				/* straight OR */
 				return TS_phrase_output(data, &Ldata, &Rdata,
 										TSPO_BOTH | TSPO_L_ONLY | TSPO_R_ONLY,
 										Loffset, Roffset,
 										Ldata.npos + Rdata.npos);
 			}

 		default:
 			elog(ERROR, "unrecognized operator: %d", curitem->qoperator.oper);
 	}

 	/* not reachable, but keep compiler quiet */
 	return TS_NO;
 }


 /*
  * Evaluate tsquery boolean expression.
  *
  * curitem: current tsquery item (initially, the first one)
  * arg: opaque value to pass through to callback function
  * flags: bitmask of flag bits shown in ts_utils.h
  * chkcond: callback function to check whether a primitive value is present
  */
 bool
 TS_execute(QueryItem *curitem, void *arg, uint32 flags,
 		   TSExecuteCallback chkcond)
 {
 	/*
 	 * If we get TS_MAYBE from the recursion, return true.  We could only see
 	 * that result if the caller passed TS_EXEC_PHRASE_NO_POS, so there's no
 	 * need to check again.
 	 */
 	return TS_execute_recurse(curitem, arg, flags, chkcond) != TS_NO;
 }

 /*
  * Evaluate tsquery boolean expression.
  *
  * This is the same as TS_execute except that TS_MAYBE is returned as-is.
  */
 TSTernaryValue
 TS_execute_ternary(QueryItem *curitem, void *arg, uint32 flags,
 				   TSExecuteCallback chkcond)
 {
 	return TS_execute_recurse(curitem, arg, flags, chkcond);
 }

 /*
  * TS_execute recursion for operators above any phrase operator.  Here we do
  * not need to worry about lexeme positions.  As soon as we hit an OP_PHRASE
  * operator, we pass it off to TS_phrase_execute which does worry.
  */
 static TSTernaryValue
 TS_execute_recurse(QueryItem *curitem, void *arg, uint32 flags,
 				   TSExecuteCallback chkcond)
 {
 	TSTernaryValue lmatch;

 	/* since this function recurses, it could be driven to stack overflow */
 	check_stack_depth();

 	/* ... and let's check for query cancel while we're at it */
 	CHECK_FOR_INTERRUPTS();

 	if (curitem->type == QI_VAL)
 		return chkcond(arg, (QueryOperand *) curitem,
 					   NULL /* don't need position info */ );

 	switch (curitem->qoperator.oper)
 	{
 		case OP_NOT:
 			if (flags & TS_EXEC_SKIP_NOT)
 				return TS_YES;
 			switch (TS_execute_recurse(curitem + 1, arg, flags, chkcond))
 			{
 				case TS_NO:
 					return TS_YES;
 				case TS_YES:
 					return TS_NO;
 				case TS_MAYBE:
 					return TS_MAYBE;
 			}
 			break;

 		case OP_AND:
 			lmatch = TS_execute_recurse(curitem + curitem->qoperator.left, arg,
 										flags, chkcond);
 			if (lmatch == TS_NO)
 				return TS_NO;
 			switch (TS_execute_recurse(curitem + 1, arg, flags, chkcond))
 			{
 				case TS_NO:
 					return TS_NO;
 				case TS_YES:
 					return lmatch;
 				case TS_MAYBE:
 					return TS_MAYBE;
 			}
 			break;

 		case OP_OR:
 			lmatch = TS_execute_recurse(curitem + curitem->qoperator.left, arg,
 										flags, chkcond);
 			if (lmatch == TS_YES)
 				return TS_YES;
 			switch (TS_execute_recurse(curitem + 1, arg, flags, chkcond))
 			{
 				case TS_NO:
 					return lmatch;
 				case TS_YES:
 					return TS_YES;
 				case TS_MAYBE:
 					return TS_MAYBE;
 			}
 			break;

 		case OP_PHRASE:

 			/*
 			 * If we get a MAYBE result, and the caller doesn't want that,
 			 * convert it to NO.  It would be more consistent, perhaps, to
 			 * return the result of TS_phrase_execute() verbatim and then
 			 * convert MAYBE results at the top of the recursion.  But
 			 * converting at the topmost phrase operator gives results that
 			 * are bug-compatible with the old implementation, so do it like
 			 * this for now.
 			 */
 			switch (TS_phrase_execute(curitem, arg, flags, chkcond, NULL))
 			{
 				case TS_NO:
 					return TS_NO;
 				case TS_YES:
 					return TS_YES;
 				case TS_MAYBE:
 					return (flags & TS_EXEC_PHRASE_NO_POS) ? TS_MAYBE : TS_NO;
 			}
 			break;

 		default:
 			elog(ERROR, "unrecognized operator: %d", curitem->qoperator.oper);
 	}

 	/* not reachable, but keep compiler quiet */
 	return TS_NO;
 }

 /*
  * Evaluate tsquery and report locations of matching terms.
  *
  * This is like TS_execute except that it returns match locations not just
  * success/failure status.  The callback function is required to provide
  * position data (we report failure if it doesn't).
  *
  * On successful match, the result is a List of ExecPhraseData structs, one
  * for each AND'ed term or phrase operator in the query.  Each struct includes
  * a sorted array of lexeme positions matching that term.  (Recall that for
  * phrase operators, the match includes width+1 lexemes, and the recorded
  * position is that of the rightmost lexeme.)
  *
  * OR subexpressions are handled by union'ing their match locations into a
  * single List element, which is valid since any of those locations contains
  * a match.  However, when some of the OR'ed terms are phrase operators, we
  * report the maximum width of any of the OR'ed terms, making such cases
  * slightly imprecise in the conservative direction.  (For example, if the
  * tsquery is "(A <-> B) | C", an occurrence of C in the data would be
  * reported as though it includes the lexeme to the left of C.)
  *
  * Locations of NOT subexpressions are not reported.  (Obviously, there can
  * be no successful NOT matches at top level, or the match would have failed.
  * So this amounts to ignoring NOTs underneath ORs.)
  *
  * The result is NIL if no match, or if position data was not returned.
  *
  * Arguments are the same as for TS_execute, although flags is currently
  * vestigial since none of the defined bits are sensible here.
  */
 List *
 TS_execute_locations(QueryItem *curitem, void *arg,
 					 uint32 flags,
 					 TSExecuteCallback chkcond)
 {
 	List	   *result;

 	/* No flags supported, as yet */
 	Assert(flags == TS_EXEC_EMPTY);
 	if (TS_execute_locations_recurse(curitem, arg, chkcond, &result))
 		return result;
 	return NIL;
 }

 /*
  * TS_execute_locations recursion for operators above any phrase operator.
  * OP_PHRASE subexpressions can be passed off to TS_phrase_execute.
  */
 static bool
 TS_execute_locations_recurse(QueryItem *curitem, void *arg,
 							 TSExecuteCallback chkcond,
 							 List **locations)
 {
 	bool		lmatch,
 				rmatch;
 	List	   *llocations,
 			   *rlocations;
 	ExecPhraseData *data;

 	/* since this function recurses, it could be driven to stack overflow */
 	check_stack_depth();

 	/* ... and let's check for query cancel while we're at it */
 	CHECK_FOR_INTERRUPTS();

 	/* Default locations result is empty */
 	*locations = NIL;

 	if (curitem->type == QI_VAL)
 	{
 		data = palloc0_object(ExecPhraseData);
 		if (chkcond(arg, (QueryOperand *) curitem, data) == TS_YES)
 		{
 			*locations = list_make1(data);
 			return true;
 		}
 		pfree(data);
 		return false;
 	}

 	switch (curitem->qoperator.oper)
 	{
 		case OP_NOT:
 			if (!TS_execute_locations_recurse(curitem + 1, arg, chkcond,
 											  &llocations))
 				return true;	/* we don't pass back any locations */
 			return false;

 		case OP_AND:
 			if (!TS_execute_locations_recurse(curitem + curitem->qoperator.left,
 											  arg, chkcond,
 											  &llocations))
 				return false;
 			if (!TS_execute_locations_recurse(curitem + 1,
 											  arg, chkcond,
 											  &rlocations))
 				return false;
 			*locations = list_concat(llocations, rlocations);
 			return true;

 		case OP_OR:
 			lmatch = TS_execute_locations_recurse(curitem + curitem->qoperator.left,
 												  arg, chkcond,
 												  &llocations);
 			rmatch = TS_execute_locations_recurse(curitem + 1,
 												  arg, chkcond,
 												  &rlocations);
 			if (lmatch || rmatch)
 			{
 				/*
 				 * We generate an AND'able location struct from each
 				 * combination of sub-matches, following the disjunctive law
 				 * (A & B) | (C & D) = (A | C) & (A | D) & (B | C) & (B | D).
 				 *
 				 * However, if either input didn't produce locations (i.e., it
 				 * failed or was a NOT), we must just return the other list.
 				 */
 				if (llocations == NIL)
 					*locations = rlocations;
 				else if (rlocations == NIL)
 					*locations = llocations;
 				else
 				{
 					ListCell   *ll;

 					foreach(ll, llocations)
 					{
 						ExecPhraseData *ldata = (ExecPhraseData *) lfirst(ll);
 						ListCell   *lr;

 						foreach(lr, rlocations)
 						{
 							ExecPhraseData *rdata = (ExecPhraseData *) lfirst(lr);

 							data = palloc0_object(ExecPhraseData);
 							(void) TS_phrase_output(data, ldata, rdata,
 													TSPO_BOTH | TSPO_L_ONLY | TSPO_R_ONLY,
 													0, 0,
 													ldata->npos + rdata->npos);
 							/* Report the larger width, as explained above. */
 							data->width = Max(ldata->width, rdata->width);
 							*locations = lappend(*locations, data);
 						}
 					}
 				}

 				return true;
 			}
 			return false;

 		case OP_PHRASE:
 			/* We can hand this off to TS_phrase_execute */
 			data = palloc0_object(ExecPhraseData);
 			if (TS_phrase_execute(curitem, arg, TS_EXEC_EMPTY, chkcond,
 								  data) == TS_YES)
 			{
 				if (!data->negate)
 					*locations = list_make1(data);
 				return true;
 			}
 			pfree(data);
 			return false;

 		default:
 			elog(ERROR, "unrecognized operator: %d", curitem->qoperator.oper);
 	}

 	/* not reachable, but keep compiler quiet */
 	return false;
 }

 /*
  * Detect whether a tsquery boolean expression requires any positive matches
  * to values shown in the tsquery.
  *
  * This is needed to know whether a GIN index search requires full index scan.
  * For example, 'x & !y' requires a match of x, so it's sufficient to scan
  * entries for x; but 'x | !y' could match rows containing neither x nor y.
  */
 bool
 tsquery_requires_match(QueryItem *curitem)
 {
 	/* since this function recurses, it could be driven to stack overflow */
 	check_stack_depth();

 	if (curitem->type == QI_VAL)
 		return true;

 	switch (curitem->qoperator.oper)
 	{
 		case OP_NOT:

 			/*
 			 * Assume there are no required matches underneath a NOT.  For
 			 * some cases with nested NOTs, we could prove there's a required
 			 * match, but it seems unlikely to be worth the trouble.
 			 */
 			return false;

 		case OP_PHRASE:

 			/*
 			 * Treat OP_PHRASE as OP_AND here
 			 */
 		case OP_AND:
 			/* If either side requires a match, we're good */
 			if (tsquery_requires_match(curitem + curitem->qoperator.left))
 				return true;
 			else
 				return tsquery_requires_match(curitem + 1);

 		case OP_OR:
 			/* Both sides must require a match */
 			if (tsquery_requires_match(curitem + curitem->qoperator.left))
 				return tsquery_requires_match(curitem + 1);
 			else
 				return false;

 		default:
 			elog(ERROR, "unrecognized operator: %d", curitem->qoperator.oper);
 	}

 	/* not reachable, but keep compiler quiet */
 	return false;
 }

 /*
  * boolean operations
  */
 Datum
 ts_match_qv(PG_FUNCTION_ARGS)
 {
 	PG_RETURN_DATUM(DirectFunctionCall2(ts_match_vq,
 										PG_GETARG_DATUM(1),
 										PG_GETARG_DATUM(0)));
 }

 Datum
 ts_match_vq(PG_FUNCTION_ARGS)
 {
 	TSVector	val = PG_GETARG_TSVECTOR(0);
 	TSQuery		query = PG_GETARG_TSQUERY(1);
 	CHKVAL		chkval;
 	bool		result;

 	/* empty query matches nothing */
 	if (!query->size)
 	{
 		PG_FREE_IF_COPY(val, 0);
 		PG_FREE_IF_COPY(query, 1);
 		PG_RETURN_BOOL(false);
 	}

 	chkval.arrb = ARRPTR(val);
 	chkval.arre = chkval.arrb + val->size;
 	chkval.values = STRPTR(val);
 	chkval.operand = GETOPERAND(query);
 	result = TS_execute(GETQUERY(query),
 						&chkval,
 						TS_EXEC_EMPTY,
 						checkcondition_str);

 	PG_FREE_IF_COPY(val, 0);
 	PG_FREE_IF_COPY(query, 1);
 	PG_RETURN_BOOL(result);
 }

 Datum
 ts_match_tt(PG_FUNCTION_ARGS)
 {
 	TSVector	vector;
 	TSQuery		query;
 	bool		res;

 	vector = DatumGetTSVector(DirectFunctionCall1(to_tsvector,
 												  PG_GETARG_DATUM(0)));
 	query = DatumGetTSQuery(DirectFunctionCall1(plainto_tsquery,
 												PG_GETARG_DATUM(1)));

 	res = DatumGetBool(DirectFunctionCall2(ts_match_vq,
 										   TSVectorGetDatum(vector),
 										   TSQueryGetDatum(query)));

 	pfree(vector);
 	pfree(query);

 	PG_RETURN_BOOL(res);
 }

 Datum
 ts_match_tq(PG_FUNCTION_ARGS)
 {
 	TSVector	vector;
 	TSQuery		query = PG_GETARG_TSQUERY(1);
 	bool		res;

 	vector = DatumGetTSVector(DirectFunctionCall1(to_tsvector,
 												  PG_GETARG_DATUM(0)));

 	res = DatumGetBool(DirectFunctionCall2(ts_match_vq,
 										   TSVectorGetDatum(vector),
 										   TSQueryGetDatum(query)));

 	pfree(vector);
 	PG_FREE_IF_COPY(query, 1);

 	PG_RETURN_BOOL(res);
 }

 /*
  * ts_stat statistic function support
  */


 /*
  * Returns the number of positions in value 'wptr' within tsvector 'txt',
  * that have a weight equal to one of the weights in 'weight' bitmask.
  */
 static int
 check_weight(TSVector txt, WordEntry *wptr, int8 weight)
 {
 	int			len = POSDATALEN(txt, wptr);
 	int			num = 0;
 	WordEntryPos *ptr = POSDATAPTR(txt, wptr);

 	while (len--)
 	{
 		if (weight & (1 << WEP_GETWEIGHT(*ptr)))
 			num++;
 		ptr++;
 	}
 	return num;
 }

 #define compareStatWord(a,e,t)							\
 	tsCompareString((a)->lexeme, (a)->lenlexeme,		\
 					STRPTR(t) + (e)->pos, (e)->len,		\
 					false)

 static void
 insertStatEntry(MemoryContext persistentContext, TSVectorStat *stat, TSVector txt, uint32 off)
 {
 	WordEntry  *we = ARRPTR(txt) + off;
 	StatEntry  *node = stat->root,
 			   *pnode = NULL;
 	int			n,
 				res = 0;
 	uint32		depth = 1;

 	if (stat->weight == 0)
 		n = (we->haspos) ? POSDATALEN(txt, we) : 1;
 	else
 		n = (we->haspos) ? check_weight(txt, we, stat->weight) : 0;

 	if (n == 0)
 		return;					/* nothing to insert */

 	while (node)
 	{
 		res = compareStatWord(node, we, txt);

 		if (res == 0)
 		{
 			break;
 		}
 		else
 		{
 			pnode = node;
 			node = (res < 0) ? node->left : node->right;
 		}
 		depth++;
 	}

 	if (depth > stat->maxdepth)
 		stat->maxdepth = depth;

 	if (node == NULL)
 	{
 		node = MemoryContextAlloc(persistentContext, STATENTRYHDRSZ + we->len);
 		node->left = node->right = NULL;
 		node->ndoc = 1;
 		node->nentry = n;
 		node->lenlexeme = we->len;
 		memcpy(node->lexeme, STRPTR(txt) + we->pos, node->lenlexeme);

 		if (pnode == NULL)
 		{
 			stat->root = node;
 		}
 		else
 		{
 			if (res < 0)
 				pnode->left = node;
 			else
 				pnode->right = node;
 		}
 	}
 	else
 	{
 		node->ndoc++;
 		node->nentry += n;
 	}
 }

 static void
 chooseNextStatEntry(MemoryContext persistentContext, TSVectorStat *stat, TSVector txt,
 					uint32 low, uint32 high, uint32 offset)
 {
 	uint32		pos;
 	uint32		middle = (low + high) >> 1;

 	pos = (low + middle) >> 1;
 	if (low != middle && pos >= offset && pos - offset < txt->size)
 		insertStatEntry(persistentContext, stat, txt, pos - offset);
 	pos = (high + middle + 1) >> 1;
 	if (middle + 1 != high && pos >= offset && pos - offset < txt->size)
 		insertStatEntry(persistentContext, stat, txt, pos - offset);

 	if (low != middle)
 		chooseNextStatEntry(persistentContext, stat, txt, low, middle, offset);
 	if (high != middle + 1)
 		chooseNextStatEntry(persistentContext, stat, txt, middle + 1, high, offset);
 }

 /*
  * This is written like a custom aggregate function, because the
  * original plan was to do just that. Unfortunately, an aggregate function
  * can't return a set, so that plan was abandoned. If that limitation is
  * lifted in the future, ts_stat could be a real aggregate function so that
  * you could use it like this:
  *
  *	 SELECT ts_stat(vector_column) FROM vector_table;
  *
  *	where vector_column is a tsvector-type column in vector_table.
  */

 static TSVectorStat *
 ts_accum(MemoryContext persistentContext, TSVectorStat *stat, Datum data)
 {
 	TSVector	txt = DatumGetTSVector(data);
 	uint32		i,
 				nbit = 0,
 				offset;

 	if (stat == NULL)
 	{							/* Init in first */
 		stat = MemoryContextAllocZero(persistentContext, sizeof(TSVectorStat));
 		stat->maxdepth = 1;
 	}

 	/* simple check of correctness */
 	if (txt == NULL || txt->size == 0)
 	{
 		if (txt && txt != (TSVector) DatumGetPointer(data))
 			pfree(txt);
 		return stat;
 	}

 	i = txt->size - 1;
 	for (; i > 0; i >>= 1)
 		nbit++;

 	nbit = 1 << nbit;
 	offset = (nbit - txt->size) / 2;

 	insertStatEntry(persistentContext, stat, txt, (nbit >> 1) - offset);
 	chooseNextStatEntry(persistentContext, stat, txt, 0, nbit, offset);

 	return stat;
 }

 static void
 ts_setup_firstcall(FunctionCallInfo fcinfo, FuncCallContext *funcctx,
 				   TSVectorStat *stat)
 {
 	TupleDesc	tupdesc;
 	MemoryContext oldcontext;
 	StatEntry  *node;

 	funcctx->user_fctx = (void *) stat;

 	oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);

 	stat->stack = palloc0(sizeof(StatEntry *) * (stat->maxdepth + 1));
 	stat->stackpos = 0;

 	node = stat->root;
 	/* find leftmost value */
 	if (node == NULL)
 		stat->stack[stat->stackpos] = NULL;
 	else
 		for (;;)
 		{
 			stat->stack[stat->stackpos] = node;
 			if (node->left)
 			{
 				stat->stackpos++;
 				node = node->left;
 			}
 			else
 				break;
 		}
 	Assert(stat->stackpos <= stat->maxdepth);

 	if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
 		elog(ERROR, "return type must be a row type");
 	funcctx->tuple_desc = tupdesc;
 	funcctx->attinmeta = TupleDescGetAttInMetadata(tupdesc);

 	MemoryContextSwitchTo(oldcontext);
 }

 static StatEntry *
 walkStatEntryTree(TSVectorStat *stat)
 {
 	StatEntry  *node = stat->stack[stat->stackpos];

 	if (node == NULL)
 		return NULL;

 	if (node->ndoc != 0)
 	{
 		/* return entry itself: we already was at left sublink */
 		return node;
 	}
 	else if (node->right && node->right != stat->stack[stat->stackpos + 1])
 	{
 		/* go on right sublink */
 		stat->stackpos++;
 		node = node->right;

 		/* find most-left value */
 		for (;;)
 		{
 			stat->stack[stat->stackpos] = node;
 			if (node->left)
 			{
 				stat->stackpos++;
 				node = node->left;
 			}
 			else
 				break;
 		}
 		Assert(stat->stackpos <= stat->maxdepth);
 	}
 	else
 	{
 		/* we already return all left subtree, itself and  right subtree */
 		if (stat->stackpos == 0)
 			return NULL;

 		stat->stackpos--;
 		return walkStatEntryTree(stat);
 	}

 	return node;
 }

 static Datum
 ts_process_call(FuncCallContext *funcctx)
 {
 	TSVectorStat *st;
 	StatEntry  *entry;

 	st = (TSVectorStat *) funcctx->user_fctx;

 	entry = walkStatEntryTree(st);

 	if (entry != NULL)
 	{
 		Datum		result;
 		char	   *values[3];
 		char		ndoc[16];
 		char		nentry[16];
 		HeapTuple	tuple;

 		values[0] = palloc(entry->lenlexeme + 1);
 		memcpy(values[0], entry->lexeme, entry->lenlexeme);
 		(values[0])[entry->lenlexeme] = '\0';
 		sprintf(ndoc, "%d", entry->ndoc);
 		values[1] = ndoc;
 		sprintf(nentry, "%d", entry->nentry);
 		values[2] = nentry;

 		tuple = BuildTupleFromCStrings(funcctx->attinmeta, values);
 		result = HeapTupleGetDatum(tuple);

 		pfree(values[0]);

 		/* mark entry as already visited */
 		entry->ndoc = 0;

 		return result;
 	}

 	return (Datum) 0;
 }

 static TSVectorStat *
 ts_stat_sql(MemoryContext persistentContext, text *txt, text *ws)
 {
 	char	   *query = text_to_cstring(txt);
 	TSVectorStat *stat;
 	bool		isnull;
 	Portal		portal;
 	SPIPlanPtr	plan;

 	if ((plan = SPI_prepare(query, 0, NULL)) == NULL)
 		/* internal error */
 		elog(ERROR, "SPI_prepare(\"%s\") failed", query);

 	if ((portal = SPI_cursor_open(NULL, plan, NULL, NULL, true)) == NULL)
 		/* internal error */
 		elog(ERROR, "SPI_cursor_open(\"%s\") failed", query);

 	SPI_cursor_fetch(portal, true, 100);

 	if (SPI_tuptable == NULL ||
 		SPI_tuptable->tupdesc->natts != 1 ||
 		!IsBinaryCoercible(SPI_gettypeid(SPI_tuptable->tupdesc, 1),
 						   TSVECTOROID))
 		ereport(ERROR,
 				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
 				 errmsg("ts_stat query must return one tsvector column")));

 	stat = MemoryContextAllocZero(persistentContext, sizeof(TSVectorStat));
 	stat->maxdepth = 1;

 	if (ws)
 	{
 		char	   *buf;

 		buf = VARDATA_ANY(ws);
 		while (buf - VARDATA_ANY(ws) < VARSIZE_ANY_EXHDR(ws))
 		{
 			if (pg_mblen(buf) == 1)
 			{
 				switch (*buf)
 				{
 					case 'A':
 					case 'a':
 						stat->weight |= 1 << 3;
 						break;
 					case 'B':
 					case 'b':
 						stat->weight |= 1 << 2;
 						break;
 					case 'C':
 					case 'c':
 						stat->weight |= 1 << 1;
 						break;
 					case 'D':
 					case 'd':
 						stat->weight |= 1;
 						break;
 					default:
 						stat->weight |= 0;
 				}
 			}
 			buf += pg_mblen(buf);
 		}
 	}

 	while (SPI_processed > 0)
 	{
 		uint64		i;

 		for (i = 0; i < SPI_processed; i++)
 		{
 			Datum		data = SPI_getbinval(SPI_tuptable->vals[i], SPI_tuptable->tupdesc, 1, &isnull);

 			if (!isnull)
 				stat = ts_accum(persistentContext, stat, data);
 		}

 		SPI_freetuptable(SPI_tuptable);
 		SPI_cursor_fetch(portal, true, 100);
 	}

 	SPI_freetuptable(SPI_tuptable);
 	SPI_cursor_close(portal);
 	SPI_freeplan(plan);
 	pfree(query);

 	return stat;
 }

 Datum
 ts_stat1(PG_FUNCTION_ARGS)
 {
 	FuncCallContext *funcctx;
 	Datum		result;

 	if (SRF_IS_FIRSTCALL())
 	{
 		TSVectorStat *stat;
 		text	   *txt = PG_GETARG_TEXT_PP(0);

 		funcctx = SRF_FIRSTCALL_INIT();
 		SPI_connect();
 		stat = ts_stat_sql(funcctx->multi_call_memory_ctx, txt, NULL);
 		PG_FREE_IF_COPY(txt, 0);
 		ts_setup_firstcall(fcinfo, funcctx, stat);
 		SPI_finish();
 	}

 	funcctx = SRF_PERCALL_SETUP();
 	if ((result = ts_process_call(funcctx)) != (Datum) 0)
 		SRF_RETURN_NEXT(funcctx, result);
 	SRF_RETURN_DONE(funcctx);
 }

 Datum
 ts_stat2(PG_FUNCTION_ARGS)
 {
 	FuncCallContext *funcctx;
 	Datum		result;

 	if (SRF_IS_FIRSTCALL())
 	{
 		TSVectorStat *stat;
 		text	   *txt = PG_GETARG_TEXT_PP(0);
 		text	   *ws = PG_GETARG_TEXT_PP(1);

 		funcctx = SRF_FIRSTCALL_INIT();
 		SPI_connect();
 		stat = ts_stat_sql(funcctx->multi_call_memory_ctx, txt, ws);
 		PG_FREE_IF_COPY(txt, 0);
 		PG_FREE_IF_COPY(ws, 1);
 		ts_setup_firstcall(fcinfo, funcctx, stat);
 		SPI_finish();
 	}

 	funcctx = SRF_PERCALL_SETUP();
 	if ((result = ts_process_call(funcctx)) != (Datum) 0)
 		SRF_RETURN_NEXT(funcctx, result);
 	SRF_RETURN_DONE(funcctx);
 }


 /*
  * Triggers for automatic update of a tsvector column from text column(s)
  *
  * Trigger arguments are either
  *		name of tsvector col, name of tsconfig to use, name(s) of text col(s)
  *		name of tsvector col, name of regconfig col, name(s) of text col(s)
  * ie, tsconfig can either be specified by name, or indirectly as the
  * contents of a regconfig field in the row.  If the name is used, it must
  * be explicitly schema-qualified.
  */
 Datum
 tsvector_update_trigger_byid(PG_FUNCTION_ARGS)
 {
 	return tsvector_update_trigger(fcinfo, false);
 }

 Datum
 tsvector_update_trigger_bycolumn(PG_FUNCTION_ARGS)
 {
 	return tsvector_update_trigger(fcinfo, true);
 }

 static Datum
 tsvector_update_trigger(PG_FUNCTION_ARGS, bool config_column)
 {
 	TriggerData *trigdata;
 	Trigger    *trigger;
 	Relation	rel;
 	HeapTuple	rettuple = NULL;
 	int			tsvector_attr_num,
 				i;
 	ParsedText	prs;
 	Datum		datum;
 	bool		isnull;
 	text	   *txt;
 	Oid			cfgId;
 	bool		update_needed;

 	/* Check call context */
 	if (!CALLED_AS_TRIGGER(fcinfo)) /* internal error */
 		elog(ERROR, "tsvector_update_trigger: not fired by trigger manager");

 	trigdata = (TriggerData *) fcinfo->context;
 	if (!TRIGGER_FIRED_FOR_ROW(trigdata->tg_event))
 		elog(ERROR, "tsvector_update_trigger: must be fired for row");
 	if (!TRIGGER_FIRED_BEFORE(trigdata->tg_event))
 		elog(ERROR, "tsvector_update_trigger: must be fired BEFORE event");

 	if (TRIGGER_FIRED_BY_INSERT(trigdata->tg_event))
 	{
 		rettuple = trigdata->tg_trigtuple;
 		update_needed = true;
 	}
 	else if (TRIGGER_FIRED_BY_UPDATE(trigdata->tg_event))
 	{
 		rettuple = trigdata->tg_newtuple;
 		update_needed = false;	/* computed below */
 	}
 	else
 		elog(ERROR, "tsvector_update_trigger: must be fired for INSERT or UPDATE");

 	trigger = trigdata->tg_trigger;
 	rel = trigdata->tg_relation;

 	if (trigger->tgnargs < 3)
 		elog(ERROR, "tsvector_update_trigger: arguments must be tsvector_field, ts_config, text_field1, ...)");

 	/* Find the target tsvector column */
 	tsvector_attr_num = SPI_fnumber(rel->rd_att, trigger->tgargs[0]);
 	if (tsvector_attr_num == SPI_ERROR_NOATTRIBUTE)
 		ereport(ERROR,
 				(errcode(ERRCODE_UNDEFINED_COLUMN),
 				 errmsg("tsvector column \"%s\" does not exist",
 						trigger->tgargs[0])));
 	/* This will effectively reject system columns, so no separate test: */
 	if (!IsBinaryCoercible(SPI_gettypeid(rel->rd_att, tsvector_attr_num),
 						   TSVECTOROID))
 		ereport(ERROR,
 				(errcode(ERRCODE_DATATYPE_MISMATCH),
 				 errmsg("column \"%s\" is not of tsvector type",
 						trigger->tgargs[0])));

 	/* Find the configuration to use */
 	if (config_column)
 	{
 		int			config_attr_num;

 		config_attr_num = SPI_fnumber(rel->rd_att, trigger->tgargs[1]);
 		if (config_attr_num == SPI_ERROR_NOATTRIBUTE)
 			ereport(ERROR,
 					(errcode(ERRCODE_UNDEFINED_COLUMN),
 					 errmsg("configuration column \"%s\" does not exist",
 							trigger->tgargs[1])));
 		if (!IsBinaryCoercible(SPI_gettypeid(rel->rd_att, config_attr_num),
 							   REGCONFIGOID))
 			ereport(ERROR,
 					(errcode(ERRCODE_DATATYPE_MISMATCH),
 					 errmsg("column \"%s\" is not of regconfig type",
 							trigger->tgargs[1])));

 		datum = SPI_getbinval(rettuple, rel->rd_att, config_attr_num, &isnull);
 		if (isnull)
 			ereport(ERROR,
 					(errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
 					 errmsg("configuration column \"%s\" must not be null",
 							trigger->tgargs[1])));
 		cfgId = DatumGetObjectId(datum);
 	}
 	else
 	{
 		List	   *names;

 		names = stringToQualifiedNameList(trigger->tgargs[1], NULL);
 		/* require a schema so that results are not search path dependent */
 		if (list_length(names) < 2)
 			ereport(ERROR,
 					(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
 					 errmsg("text search configuration name \"%s\" must be schema-qualified",
 							trigger->tgargs[1])));
 		cfgId = get_ts_config_oid(names, false);
 	}

 	/* initialize parse state */
 	prs.lenwords = 32;
 	prs.curwords = 0;
 	prs.pos = 0;
 	prs.words = (ParsedWord *) palloc(sizeof(ParsedWord) * prs.lenwords);

 	/* find all words in indexable column(s) */
 	for (i = 2; i < trigger->tgnargs; i++)
 	{
 		int			numattr;

 		numattr = SPI_fnumber(rel->rd_att, trigger->tgargs[i]);
 		if (numattr == SPI_ERROR_NOATTRIBUTE)
 			ereport(ERROR,
 					(errcode(ERRCODE_UNDEFINED_COLUMN),
 					 errmsg("column \"%s\" does not exist",
 							trigger->tgargs[i])));
 		if (!IsBinaryCoercible(SPI_gettypeid(rel->rd_att, numattr), TEXTOID))
 			ereport(ERROR,
 					(errcode(ERRCODE_DATATYPE_MISMATCH),
 					 errmsg("column \"%s\" is not of a character type",
 							trigger->tgargs[i])));

 		if (bms_is_member(numattr - FirstLowInvalidHeapAttributeNumber, trigdata->tg_updatedcols))
 			update_needed = true;

 		datum = SPI_getbinval(rettuple, rel->rd_att, numattr, &isnull);
 		if (isnull)
 			continue;

 		txt = DatumGetTextPP(datum);

 		parsetext(cfgId, &prs, VARDATA_ANY(txt), VARSIZE_ANY_EXHDR(txt));

 		if (txt != (text *) DatumGetPointer(datum))
 			pfree(txt);
 	}

 	if (update_needed)
 	{
 		/* make tsvector value */
 		datum = TSVectorGetDatum(make_tsvector(&prs));
 		isnull = false;

 		/* and insert it into tuple */
 		rettuple = heap_modify_tuple_by_cols(rettuple, rel->rd_att,
 											 1, &tsvector_attr_num,
 											 &datum, &isnull);

 		pfree(DatumGetPointer(datum));
 	}

 	return PointerGetDatum(rettuple);
 }