| /*------------------------------------------------------------------------- |
| * |
| * ts_utils.c |
| * various support functions |
| * |
| * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group |
| * |
| * |
| * IDENTIFICATION |
| * src/backend/tsearch/ts_utils.c |
| * |
| *------------------------------------------------------------------------- |
| */ |
| |
| #include "postgres.h" |
| |
| #include <ctype.h> |
| |
| #include "miscadmin.h" |
| #include "tsearch/ts_locale.h" |
| #include "tsearch/ts_utils.h" |
| |
| |
| /* |
| * Given the base name and extension of a tsearch config file, return |
| * its full path name. The base name is assumed to be user-supplied, |
| * and is checked to prevent pathname attacks. The extension is assumed |
| * to be safe. |
| * |
| * The result is a palloc'd string. |
| */ |
| char * |
| get_tsearch_config_filename(const char *basename, |
| const char *extension) |
| { |
| char sharepath[MAXPGPATH]; |
| char *result; |
| |
| /* |
| * We limit the basename to contain a-z, 0-9, and underscores. This may |
| * be overly restrictive, but we don't want to allow access to anything |
| * outside the tsearch_data directory, so for instance '/' *must* be |
| * rejected, and on some platforms '\' and ':' are risky as well. Allowing |
| * uppercase might result in incompatible behavior between case-sensitive |
| * and case-insensitive filesystems, and non-ASCII characters create other |
| * interesting risks, so on the whole a tight policy seems best. |
| */ |
| if (strspn(basename, "abcdefghijklmnopqrstuvwxyz0123456789_") != strlen(basename)) |
| ereport(ERROR, |
| (errcode(ERRCODE_INVALID_PARAMETER_VALUE), |
| errmsg("invalid text search configuration file name \"%s\"", |
| basename))); |
| |
| get_share_path(my_exec_path, sharepath); |
| result = palloc(MAXPGPATH); |
| snprintf(result, MAXPGPATH, "%s/tsearch_data/%s.%s", |
| sharepath, basename, extension); |
| |
| return result; |
| } |
| |
| /* |
| * Reads a stop-word file. Each word is run through 'wordop' |
| * function, if given. wordop may either modify the input in-place, |
| * or palloc a new version. |
| */ |
| void |
| readstoplist(const char *fname, StopList *s, char *(*wordop) (const char *)) |
| { |
| char **stop = NULL; |
| |
| s->len = 0; |
| if (fname && *fname) |
| { |
| char *filename = get_tsearch_config_filename(fname, "stop"); |
| tsearch_readline_state trst; |
| char *line; |
| int reallen = 0; |
| |
| if (!tsearch_readline_begin(&trst, filename)) |
| ereport(ERROR, |
| (errcode(ERRCODE_CONFIG_FILE_ERROR), |
| errmsg("could not open stop-word file \"%s\": %m", |
| filename))); |
| |
| while ((line = tsearch_readline(&trst)) != NULL) |
| { |
| char *pbuf = line; |
| |
| /* Trim trailing space */ |
| while (*pbuf && !t_isspace(pbuf)) |
| pbuf += pg_mblen(pbuf); |
| *pbuf = '\0'; |
| |
| /* Skip empty lines */ |
| if (*line == '\0') |
| { |
| pfree(line); |
| continue; |
| } |
| |
| if (s->len >= reallen) |
| { |
| if (reallen == 0) |
| { |
| reallen = 64; |
| stop = (char **) palloc(sizeof(char *) * reallen); |
| } |
| else |
| { |
| reallen *= 2; |
| stop = (char **) repalloc(stop, sizeof(char *) * reallen); |
| } |
| } |
| |
| if (wordop) |
| { |
| stop[s->len] = wordop(line); |
| if (stop[s->len] != line) |
| pfree(line); |
| } |
| else |
| stop[s->len] = line; |
| |
| (s->len)++; |
| } |
| |
| tsearch_readline_end(&trst); |
| pfree(filename); |
| } |
| |
| s->stop = stop; |
| |
| /* Sort to allow binary searching */ |
| if (s->stop && s->len > 0) |
| qsort(s->stop, s->len, sizeof(char *), pg_qsort_strcmp); |
| } |
| |
| bool |
| searchstoplist(StopList *s, char *key) |
| { |
| return (s->stop && s->len > 0 && |
| bsearch(&key, s->stop, s->len, |
| sizeof(char *), pg_qsort_strcmp)); |
| } |