src/backend/tsearch/dict_synonym.c - age - Git at Google

 /*-------------------------------------------------------------------------
  *
  * dict_synonym.c
  *		Synonym dictionary: replace word by its synonym
  *
  * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
  *
  *
  * IDENTIFICATION
  *	  src/backend/tsearch/dict_synonym.c
  *
  *-------------------------------------------------------------------------
  */
 #include "postgres.h"

 #include "commands/defrem.h"
 #include "tsearch/ts_locale.h"
 #include "tsearch/ts_utils.h"
 #include "utils/builtins.h"

 typedef struct
 {
 	char	   *in;
 	char	   *out;
 	int			outlen;
 	uint16		flags;
 } Syn;

 typedef struct
 {
 	int			len;			/* length of syn array */
 	Syn		   *syn;
 	bool		case_sensitive;
 } DictSyn;

 /*
  * Finds the next whitespace-delimited word within the 'in' string.
  * Returns a pointer to the first character of the word, and a pointer
  * to the next byte after the last character in the word (in *end).
  * Character '*' at the end of word will not be treated as word
  * character if flags is not null.
  */
 static char *
 findwrd(char *in, char **end, uint16 *flags)
 {
 	char	   *start;
 	char	   *lastchar;

 	/* Skip leading spaces */
 	while (*in && t_isspace(in))
 		in += pg_mblen(in);

 	/* Return NULL on empty lines */
 	if (*in == '\0')
 	{
 		*end = NULL;
 		return NULL;
 	}

 	lastchar = start = in;

 	/* Find end of word */
 	while (*in && !t_isspace(in))
 	{
 		lastchar = in;
 		in += pg_mblen(in);
 	}

 	if (in - lastchar == 1 && t_iseq(lastchar, '*') && flags)
 	{
 		*flags = TSL_PREFIX;
 		*end = lastchar;
 	}
 	else
 	{
 		if (flags)
 			*flags = 0;
 		*end = in;
 	}

 	return start;
 }

 static int
 compareSyn(const void *a, const void *b)
 {
 	return strcmp(((const Syn *) a)->in, ((const Syn *) b)->in);
 }


 Datum
 dsynonym_init(PG_FUNCTION_ARGS)
 {
 	List	   *dictoptions = (List *) PG_GETARG_POINTER(0);
 	DictSyn    *d;
 	ListCell   *l;
 	char	   *filename = NULL;
 	bool		case_sensitive = false;
 	tsearch_readline_state trst;
 	char	   *starti,
 			   *starto,
 			   *end = NULL;
 	int			cur = 0;
 	char	   *line = NULL;
 	uint16		flags = 0;

 	foreach(l, dictoptions)
 	{
 		DefElem    *defel = (DefElem *) lfirst(l);

 		if (strcmp(defel->defname, "synonyms") == 0)
 			filename = defGetString(defel);
 		else if (strcmp(defel->defname, "casesensitive") == 0)
 			case_sensitive = defGetBoolean(defel);
 		else
 			ereport(ERROR,
 					(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
 					 errmsg("unrecognized synonym parameter: \"%s\"",
 							defel->defname)));
 	}

 	if (!filename)
 		ereport(ERROR,
 				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
 				 errmsg("missing Synonyms parameter")));

 	filename = get_tsearch_config_filename(filename, "syn");

 	if (!tsearch_readline_begin(&trst, filename))
 		ereport(ERROR,
 				(errcode(ERRCODE_CONFIG_FILE_ERROR),
 				 errmsg("could not open synonym file \"%s\": %m",
 						filename)));

 	d = (DictSyn *) palloc0(sizeof(DictSyn));

 	while ((line = tsearch_readline(&trst)) != NULL)
 	{
 		starti = findwrd(line, &end, NULL);
 		if (!starti)
 		{
 			/* Empty line */
 			goto skipline;
 		}
 		if (*end == '\0')
 		{
 			/* A line with only one word. Ignore silently. */
 			goto skipline;
 		}
 		*end = '\0';

 		starto = findwrd(end + 1, &end, &flags);
 		if (!starto)
 		{
 			/* A line with only one word (+whitespace). Ignore silently. */
 			goto skipline;
 		}
 		*end = '\0';

 		/*
 		 * starti now points to the first word, and starto to the second word
 		 * on the line, with a \0 terminator at the end of both words.
 		 */

 		if (cur >= d->len)
 		{
 			if (d->len == 0)
 			{
 				d->len = 64;
 				d->syn = (Syn *) palloc(sizeof(Syn) * d->len);
 			}
 			else
 			{
 				d->len *= 2;
 				d->syn = (Syn *) repalloc(d->syn, sizeof(Syn) * d->len);
 			}
 		}

 		if (case_sensitive)
 		{
 			d->syn[cur].in = pstrdup(starti);
 			d->syn[cur].out = pstrdup(starto);
 		}
 		else
 		{
 			d->syn[cur].in = lowerstr(starti);
 			d->syn[cur].out = lowerstr(starto);
 		}

 		d->syn[cur].outlen = strlen(starto);
 		d->syn[cur].flags = flags;

 		cur++;

 skipline:
 		pfree(line);
 	}

 	tsearch_readline_end(&trst);

 	d->len = cur;
 	qsort(d->syn, d->len, sizeof(Syn), compareSyn);

 	d->case_sensitive = case_sensitive;

 	PG_RETURN_POINTER(d);
 }

 Datum
 dsynonym_lexize(PG_FUNCTION_ARGS)
 {
 	DictSyn    *d = (DictSyn *) PG_GETARG_POINTER(0);
 	char	   *in = (char *) PG_GETARG_POINTER(1);
 	int32		len = PG_GETARG_INT32(2);
 	Syn			key,
 			   *found;
 	TSLexeme   *res;

 	/* note: d->len test protects against Solaris bsearch-of-no-items bug */
 	if (len <= 0 || d->len <= 0)
 		PG_RETURN_POINTER(NULL);

 	if (d->case_sensitive)
 		key.in = pnstrdup(in, len);
 	else
 		key.in = lowerstr_with_len(in, len);

 	key.out = NULL;

 	found = (Syn *) bsearch(&key, d->syn, d->len, sizeof(Syn), compareSyn);
 	pfree(key.in);

 	if (!found)
 		PG_RETURN_POINTER(NULL);

 	res = palloc0(sizeof(TSLexeme) * 2);
 	res[0].lexeme = pnstrdup(found->out, found->outlen);
 	res[0].flags = found->flags;

 	PG_RETURN_POINTER(res);
 }
	/*-------------------------------------------------------------------------
	*
	* dict_synonym.c
	* Synonym dictionary: replace word by its synonym
	*
	* Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
	*
	*
	* IDENTIFICATION
	* src/backend/tsearch/dict_synonym.c
	*
	*-------------------------------------------------------------------------
	*/
	#include "postgres.h"

	#include "commands/defrem.h"
	#include "tsearch/ts_locale.h"
	#include "tsearch/ts_utils.h"
	#include "utils/builtins.h"

	typedef struct
	{
	char *in;
	char *out;
	int outlen;
	uint16 flags;
	} Syn;

	typedef struct
	{
	int len; /* length of syn array */
	Syn *syn;
	bool case_sensitive;
	} DictSyn;

	/*
	* Finds the next whitespace-delimited word within the 'in' string.
	* Returns a pointer to the first character of the word, and a pointer
	* to the next byte after the last character in the word (in *end).
	* Character '*' at the end of word will not be treated as word
	* character if flags is not null.
	*/
	static char *
	findwrd(char in, char end, uint16 flags)
	{
	char *start;
	char *lastchar;

	/* Skip leading spaces */
	while (*in && t_isspace(in))
	in += pg_mblen(in);

	/* Return NULL on empty lines */
	if (*in == '\0')
	{
	*end = NULL;
	return NULL;
	}

	lastchar = start = in;

	/* Find end of word */
	while (*in && !t_isspace(in))
	{
	lastchar = in;
	in += pg_mblen(in);
	}

	if (in - lastchar == 1 && t_iseq(lastchar, '*') && flags)
	{
	*flags = TSL_PREFIX;
	*end = lastchar;
	}
	else
	{
	if (flags)
	*flags = 0;
	*end = in;
	}

	return start;
	}

	static int
	compareSyn(const void a, const void b)
	{
	return strcmp(((const Syn ) a)->in, ((const Syn ) b)->in);
	}


	Datum
	dsynonym_init(PG_FUNCTION_ARGS)
	{
	List dictoptions = (List ) PG_GETARG_POINTER(0);
	DictSyn *d;
	ListCell *l;
	char *filename = NULL;
	bool case_sensitive = false;
	tsearch_readline_state trst;
	char *starti,
	*starto,
	*end = NULL;
	int cur = 0;
	char *line = NULL;
	uint16 flags = 0;

	foreach(l, dictoptions)
	{
	DefElem defel = (DefElem ) lfirst(l);

	if (strcmp(defel->defname, "synonyms") == 0)
	filename = defGetString(defel);
	else if (strcmp(defel->defname, "casesensitive") == 0)
	case_sensitive = defGetBoolean(defel);
	else
	ereport(ERROR,
	(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
	errmsg("unrecognized synonym parameter: \"%s\"",
	defel->defname)));
	}

	if (!filename)
	ereport(ERROR,
	(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
	errmsg("missing Synonyms parameter")));

	filename = get_tsearch_config_filename(filename, "syn");

	if (!tsearch_readline_begin(&trst, filename))
	ereport(ERROR,
	(errcode(ERRCODE_CONFIG_FILE_ERROR),
	errmsg("could not open synonym file \"%s\": %m",
	filename)));

	d = (DictSyn *) palloc0(sizeof(DictSyn));

	while ((line = tsearch_readline(&trst)) != NULL)
	{
	starti = findwrd(line, &end, NULL);
	if (!starti)
	{
	/* Empty line */
	goto skipline;
	}
	if (*end == '\0')
	{
	/* A line with only one word. Ignore silently. */
	goto skipline;
	}
	*end = '\0';

	starto = findwrd(end + 1, &end, &flags);
	if (!starto)
	{
	/* A line with only one word (+whitespace). Ignore silently. */
	goto skipline;
	}
	*end = '\0';

	/*
	* starti now points to the first word, and starto to the second word
	* on the line, with a \0 terminator at the end of both words.
	*/

	if (cur >= d->len)
	{
	if (d->len == 0)
	{
	d->len = 64;
	d->syn = (Syn ) palloc(sizeof(Syn) d->len);
	}
	else
	{
	d->len *= 2;
	d->syn = (Syn ) repalloc(d->syn, sizeof(Syn) d->len);
	}
	}

	if (case_sensitive)
	{
	d->syn[cur].in = pstrdup(starti);
	d->syn[cur].out = pstrdup(starto);
	}
	else
	{
	d->syn[cur].in = lowerstr(starti);
	d->syn[cur].out = lowerstr(starto);
	}

	d->syn[cur].outlen = strlen(starto);
	d->syn[cur].flags = flags;

	cur++;

	skipline:
	pfree(line);
	}

	tsearch_readline_end(&trst);

	d->len = cur;
	qsort(d->syn, d->len, sizeof(Syn), compareSyn);

	d->case_sensitive = case_sensitive;

	PG_RETURN_POINTER(d);
	}

	Datum
	dsynonym_lexize(PG_FUNCTION_ARGS)
	{
	DictSyn d = (DictSyn ) PG_GETARG_POINTER(0);
	char in = (char ) PG_GETARG_POINTER(1);
	int32 len = PG_GETARG_INT32(2);
	Syn key,
	*found;
	TSLexeme *res;

	/* note: d->len test protects against Solaris bsearch-of-no-items bug */
	if (len <= 0 \|\| d->len <= 0)
	PG_RETURN_POINTER(NULL);

	if (d->case_sensitive)
	key.in = pnstrdup(in, len);
	else
	key.in = lowerstr_with_len(in, len);

	key.out = NULL;

	found = (Syn *) bsearch(&key, d->syn, d->len, sizeof(Syn), compareSyn);
	pfree(key.in);

	if (!found)
	PG_RETURN_POINTER(NULL);

	res = palloc0(sizeof(TSLexeme) * 2);
	res[0].lexeme = pnstrdup(found->out, found->outlen);
	res[0].flags = found->flags;

	PG_RETURN_POINTER(res);
	}