| /*-------------------------------------------------------------------------- |
| * |
| * test_regex.c |
| * Test harness for the regular expression package. |
| * |
| * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group |
| * Portions Copyright (c) 1994, Regents of the University of California |
| * |
| * IDENTIFICATION |
| * src/test/modules/test_regex/test_regex.c |
| * |
| * ------------------------------------------------------------------------- |
| */ |
| |
| #include "postgres.h" |
| |
| #include "funcapi.h" |
| #include "miscadmin.h" |
| #include "regex/regex.h" |
| #include "utils/array.h" |
| #include "utils/builtins.h" |
| |
| PG_MODULE_MAGIC; |
| |
| |
| /* all the options of interest for regex functions */ |
| typedef struct test_re_flags |
| { |
| int cflags; /* compile flags for Spencer's regex code */ |
| int eflags; /* execute flags for Spencer's regex code */ |
| long info; /* expected re_info bits */ |
| bool glob; /* do it globally (for each occurrence) */ |
| bool indices; /* report indices not actual strings */ |
| bool partial; /* expect partial match */ |
| } test_re_flags; |
| |
| /* cross-call state for test_regex() */ |
| typedef struct test_regex_ctx |
| { |
| test_re_flags re_flags; /* flags */ |
| rm_detail_t details; /* "details" from execution */ |
| text *orig_str; /* data string in original TEXT form */ |
| int nmatches; /* number of places where pattern matched */ |
| int npatterns; /* number of capturing subpatterns */ |
| /* We store start char index and end+1 char index for each match */ |
| /* so the number of entries in match_locs is nmatches * npatterns * 2 */ |
| int *match_locs; /* 0-based character indexes */ |
| int next_match; /* 0-based index of next match to process */ |
| /* workspace for build_test_match_result() */ |
| Datum *elems; /* has npatterns+1 elements */ |
| bool *nulls; /* has npatterns+1 elements */ |
| pg_wchar *wide_str; /* wide-char version of original string */ |
| char *conv_buf; /* conversion buffer, if needed */ |
| int conv_bufsiz; /* size thereof */ |
| } test_regex_ctx; |
| |
| /* Local functions */ |
| static void test_re_compile(text *text_re, int cflags, Oid collation, |
| regex_t *result_re); |
| static void parse_test_flags(test_re_flags *flags, text *opts); |
| static test_regex_ctx *setup_test_matches(text *orig_str, |
| regex_t *cpattern, |
| test_re_flags *flags, |
| Oid collation, |
| bool use_subpatterns); |
| static ArrayType *build_test_info_result(regex_t *cpattern, |
| test_re_flags *flags); |
| static ArrayType *build_test_match_result(test_regex_ctx *matchctx); |
| |
| |
| /* |
| * test_regex(pattern text, string text, flags text) returns setof text[] |
| * |
| * This is largely based on regexp.c's regexp_matches, with additions |
| * for debugging purposes. |
| */ |
| PG_FUNCTION_INFO_V1(test_regex); |
| |
| Datum |
| test_regex(PG_FUNCTION_ARGS) |
| { |
| FuncCallContext *funcctx; |
| test_regex_ctx *matchctx; |
| ArrayType *result_ary; |
| |
| if (SRF_IS_FIRSTCALL()) |
| { |
| text *pattern = PG_GETARG_TEXT_PP(0); |
| text *flags = PG_GETARG_TEXT_PP(2); |
| Oid collation = PG_GET_COLLATION(); |
| test_re_flags re_flags; |
| regex_t cpattern; |
| MemoryContext oldcontext; |
| |
| funcctx = SRF_FIRSTCALL_INIT(); |
| oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx); |
| |
| /* Determine options */ |
| parse_test_flags(&re_flags, flags); |
| |
| /* set up the compiled pattern */ |
| test_re_compile(pattern, re_flags.cflags, collation, &cpattern); |
| |
| /* be sure to copy the input string into the multi-call ctx */ |
| matchctx = setup_test_matches(PG_GETARG_TEXT_P_COPY(1), &cpattern, |
| &re_flags, |
| collation, |
| true); |
| |
| /* Pre-create workspace that build_test_match_result needs */ |
| matchctx->elems = (Datum *) palloc(sizeof(Datum) * |
| (matchctx->npatterns + 1)); |
| matchctx->nulls = (bool *) palloc(sizeof(bool) * |
| (matchctx->npatterns + 1)); |
| |
| MemoryContextSwitchTo(oldcontext); |
| funcctx->user_fctx = (void *) matchctx; |
| |
| /* |
| * Return the first result row, which is info equivalent to Tcl's |
| * "regexp -about" output |
| */ |
| result_ary = build_test_info_result(&cpattern, &re_flags); |
| |
| pg_regfree(&cpattern); |
| |
| SRF_RETURN_NEXT(funcctx, PointerGetDatum(result_ary)); |
| } |
| else |
| { |
| /* Each subsequent row describes one match */ |
| funcctx = SRF_PERCALL_SETUP(); |
| matchctx = (test_regex_ctx *) funcctx->user_fctx; |
| |
| if (matchctx->next_match < matchctx->nmatches) |
| { |
| result_ary = build_test_match_result(matchctx); |
| matchctx->next_match++; |
| SRF_RETURN_NEXT(funcctx, PointerGetDatum(result_ary)); |
| } |
| } |
| |
| SRF_RETURN_DONE(funcctx); |
| } |
| |
| |
| /* |
| * test_re_compile - compile a RE |
| * |
| * text_re --- the pattern, expressed as a TEXT object |
| * cflags --- compile options for the pattern |
| * collation --- collation to use for LC_CTYPE-dependent behavior |
| * result_re --- output, compiled RE is stored here |
| * |
| * Pattern is given in the database encoding. We internally convert to |
| * an array of pg_wchar, which is what Spencer's regex package wants. |
| * |
| * Caller must eventually pg_regfree the resulting RE to avoid memory leaks. |
| */ |
| static void |
| test_re_compile(text *text_re, int cflags, Oid collation, |
| regex_t *result_re) |
| { |
| int text_re_len = VARSIZE_ANY_EXHDR(text_re); |
| char *text_re_val = VARDATA_ANY(text_re); |
| pg_wchar *pattern; |
| int pattern_len; |
| int regcomp_result; |
| char errMsg[100]; |
| |
| /* Convert pattern string to wide characters */ |
| pattern = (pg_wchar *) palloc((text_re_len + 1) * sizeof(pg_wchar)); |
| pattern_len = pg_mb2wchar_with_len(text_re_val, |
| pattern, |
| text_re_len); |
| |
| regcomp_result = pg_regcomp(result_re, |
| pattern, |
| pattern_len, |
| cflags, |
| collation); |
| |
| pfree(pattern); |
| |
| if (regcomp_result != REG_OKAY) |
| { |
| /* re didn't compile (no need for pg_regfree, if so) */ |
| |
| /* |
| * Here and in other places in this file, do CHECK_FOR_INTERRUPTS |
| * before reporting a regex error. This is so that if the regex |
| * library aborts and returns REG_CANCEL, we don't print an error |
| * message that implies the regex was invalid. |
| */ |
| CHECK_FOR_INTERRUPTS(); |
| |
| pg_regerror(regcomp_result, result_re, errMsg, sizeof(errMsg)); |
| ereport(ERROR, |
| (errcode(ERRCODE_INVALID_REGULAR_EXPRESSION), |
| errmsg("invalid regular expression: %s", errMsg))); |
| } |
| } |
| |
| /* |
| * test_re_execute - execute a RE on pg_wchar data |
| * |
| * Returns true on match, false on no match |
| * Arguments are as for pg_regexec |
| */ |
| static bool |
| test_re_execute(regex_t *re, pg_wchar *data, int data_len, |
| int start_search, |
| rm_detail_t *details, |
| int nmatch, regmatch_t *pmatch, |
| int eflags) |
| { |
| int regexec_result; |
| char errMsg[100]; |
| |
| /* Initialize match locations in case engine doesn't */ |
| details->rm_extend.rm_so = -1; |
| details->rm_extend.rm_eo = -1; |
| for (int i = 0; i < nmatch; i++) |
| { |
| pmatch[i].rm_so = -1; |
| pmatch[i].rm_eo = -1; |
| } |
| |
| /* Perform RE match and return result */ |
| regexec_result = pg_regexec(re, |
| data, |
| data_len, |
| start_search, |
| details, |
| nmatch, |
| pmatch, |
| eflags); |
| |
| if (regexec_result != REG_OKAY && regexec_result != REG_NOMATCH) |
| { |
| /* re failed??? */ |
| CHECK_FOR_INTERRUPTS(); |
| pg_regerror(regexec_result, re, errMsg, sizeof(errMsg)); |
| ereport(ERROR, |
| (errcode(ERRCODE_INVALID_REGULAR_EXPRESSION), |
| errmsg("regular expression failed: %s", errMsg))); |
| } |
| |
| return (regexec_result == REG_OKAY); |
| } |
| |
| |
| /* |
| * parse_test_flags - parse the flags argument |
| * |
| * flags --- output argument, filled with desired options |
| * opts --- TEXT object, or NULL for defaults |
| */ |
| static void |
| parse_test_flags(test_re_flags *flags, text *opts) |
| { |
| /* these defaults must match Tcl's */ |
| int cflags = REG_ADVANCED; |
| int eflags = 0; |
| long info = 0; |
| |
| flags->glob = false; |
| flags->indices = false; |
| flags->partial = false; |
| |
| if (opts) |
| { |
| char *opt_p = VARDATA_ANY(opts); |
| int opt_len = VARSIZE_ANY_EXHDR(opts); |
| int i; |
| |
| for (i = 0; i < opt_len; i++) |
| { |
| switch (opt_p[i]) |
| { |
| case '-': |
| /* allowed, no-op */ |
| break; |
| case '!': |
| flags->partial = true; |
| break; |
| case '*': |
| /* test requires Unicode --- ignored here */ |
| break; |
| case '0': |
| flags->indices = true; |
| break; |
| |
| /* These flags correspond to user-exposed RE options: */ |
| case 'g': /* global match */ |
| flags->glob = true; |
| break; |
| case 'i': /* case insensitive */ |
| cflags |= REG_ICASE; |
| break; |
| case 'n': /* \n affects ^ $ . [^ */ |
| cflags |= REG_NEWLINE; |
| break; |
| case 'p': /* ~Perl, \n affects . [^ */ |
| cflags |= REG_NLSTOP; |
| cflags &= ~REG_NLANCH; |
| break; |
| case 'w': /* weird, \n affects ^ $ only */ |
| cflags &= ~REG_NLSTOP; |
| cflags |= REG_NLANCH; |
| break; |
| case 'x': /* expanded syntax */ |
| cflags |= REG_EXPANDED; |
| break; |
| |
| /* These flags correspond to Tcl's -xflags options: */ |
| case 'a': |
| cflags |= REG_ADVF; |
| break; |
| case 'b': |
| cflags &= ~REG_ADVANCED; |
| break; |
| case 'c': |
| |
| /* |
| * Tcl calls this TCL_REG_CANMATCH, but it's really |
| * REG_EXPECT. In this implementation we must also set |
| * the partial and indices flags, so that |
| * setup_test_matches and build_test_match_result will |
| * emit the desired data. (They'll emit more fields than |
| * Tcl would, but that's fine.) |
| */ |
| cflags |= REG_EXPECT; |
| flags->partial = true; |
| flags->indices = true; |
| break; |
| case 'e': |
| cflags &= ~REG_ADVANCED; |
| cflags |= REG_EXTENDED; |
| break; |
| case 'q': |
| cflags &= ~REG_ADVANCED; |
| cflags |= REG_QUOTE; |
| break; |
| case 'o': /* o for opaque */ |
| cflags |= REG_NOSUB; |
| break; |
| case 's': /* s for start */ |
| cflags |= REG_BOSONLY; |
| break; |
| case '+': |
| cflags |= REG_FAKE; |
| break; |
| case ',': |
| cflags |= REG_PROGRESS; |
| break; |
| case '.': |
| cflags |= REG_DUMP; |
| break; |
| case ':': |
| eflags |= REG_MTRACE; |
| break; |
| case ';': |
| eflags |= REG_FTRACE; |
| break; |
| case '^': |
| eflags |= REG_NOTBOL; |
| break; |
| case '$': |
| eflags |= REG_NOTEOL; |
| break; |
| case 't': |
| cflags |= REG_EXPECT; |
| break; |
| case '%': |
| eflags |= REG_SMALL; |
| break; |
| |
| /* These flags define expected info bits: */ |
| case 'A': |
| info |= REG_UBSALNUM; |
| break; |
| case 'B': |
| info |= REG_UBRACES; |
| break; |
| case 'E': |
| info |= REG_UBBS; |
| break; |
| case 'H': |
| info |= REG_ULOOKAROUND; |
| break; |
| case 'I': |
| info |= REG_UIMPOSSIBLE; |
| break; |
| case 'L': |
| info |= REG_ULOCALE; |
| break; |
| case 'M': |
| info |= REG_UUNPORT; |
| break; |
| case 'N': |
| info |= REG_UEMPTYMATCH; |
| break; |
| case 'P': |
| info |= REG_UNONPOSIX; |
| break; |
| case 'Q': |
| info |= REG_UBOUNDS; |
| break; |
| case 'R': |
| info |= REG_UBACKREF; |
| break; |
| case 'S': |
| info |= REG_UUNSPEC; |
| break; |
| case 'T': |
| info |= REG_USHORTEST; |
| break; |
| case 'U': |
| info |= REG_UPBOTCH; |
| break; |
| |
| default: |
| ereport(ERROR, |
| (errcode(ERRCODE_INVALID_PARAMETER_VALUE), |
| errmsg("invalid regular expression test option: \"%.*s\"", |
| pg_mblen(opt_p + i), opt_p + i))); |
| break; |
| } |
| } |
| } |
| flags->cflags = cflags; |
| flags->eflags = eflags; |
| flags->info = info; |
| } |
| |
| /* |
| * setup_test_matches --- do the initial matching |
| * |
| * To simplify memory management, we do all the matching in one swoop. |
| * The returned test_regex_ctx contains the locations of all the substrings |
| * matching the pattern. |
| */ |
| static test_regex_ctx * |
| setup_test_matches(text *orig_str, |
| regex_t *cpattern, test_re_flags *re_flags, |
| Oid collation, |
| bool use_subpatterns) |
| { |
| test_regex_ctx *matchctx = palloc0(sizeof(test_regex_ctx)); |
| int eml = pg_database_encoding_max_length(); |
| int orig_len; |
| pg_wchar *wide_str; |
| int wide_len; |
| regmatch_t *pmatch; |
| int pmatch_len; |
| int array_len; |
| int array_idx; |
| int prev_match_end; |
| int start_search; |
| int maxlen = 0; /* largest fetch length in characters */ |
| |
| /* save flags */ |
| matchctx->re_flags = *re_flags; |
| |
| /* save original string --- we'll extract result substrings from it */ |
| matchctx->orig_str = orig_str; |
| |
| /* convert string to pg_wchar form for matching */ |
| orig_len = VARSIZE_ANY_EXHDR(orig_str); |
| wide_str = (pg_wchar *) palloc(sizeof(pg_wchar) * (orig_len + 1)); |
| wide_len = pg_mb2wchar_with_len(VARDATA_ANY(orig_str), wide_str, orig_len); |
| |
| /* do we want to remember subpatterns? */ |
| if (use_subpatterns && cpattern->re_nsub > 0) |
| { |
| matchctx->npatterns = cpattern->re_nsub + 1; |
| pmatch_len = cpattern->re_nsub + 1; |
| } |
| else |
| { |
| use_subpatterns = false; |
| matchctx->npatterns = 1; |
| pmatch_len = 1; |
| } |
| |
| /* temporary output space for RE package */ |
| pmatch = palloc(sizeof(regmatch_t) * pmatch_len); |
| |
| /* |
| * the real output space (grown dynamically if needed) |
| * |
| * use values 2^n-1, not 2^n, so that we hit the limit at 2^28-1 rather |
| * than at 2^27 |
| */ |
| array_len = re_flags->glob ? 255 : 31; |
| matchctx->match_locs = (int *) palloc(sizeof(int) * array_len); |
| array_idx = 0; |
| |
| /* search for the pattern, perhaps repeatedly */ |
| prev_match_end = 0; |
| start_search = 0; |
| while (test_re_execute(cpattern, wide_str, wide_len, |
| start_search, |
| &matchctx->details, |
| pmatch_len, pmatch, |
| re_flags->eflags)) |
| { |
| /* enlarge output space if needed */ |
| while (array_idx + matchctx->npatterns * 2 + 1 > array_len) |
| { |
| array_len += array_len + 1; /* 2^n-1 => 2^(n+1)-1 */ |
| if (array_len > MaxAllocSize / sizeof(int)) |
| ereport(ERROR, |
| (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), |
| errmsg("too many regular expression matches"))); |
| matchctx->match_locs = (int *) repalloc(matchctx->match_locs, |
| sizeof(int) * array_len); |
| } |
| |
| /* save this match's locations */ |
| for (int i = 0; i < matchctx->npatterns; i++) |
| { |
| int so = pmatch[i].rm_so; |
| int eo = pmatch[i].rm_eo; |
| |
| matchctx->match_locs[array_idx++] = so; |
| matchctx->match_locs[array_idx++] = eo; |
| if (so >= 0 && eo >= 0 && (eo - so) > maxlen) |
| maxlen = (eo - so); |
| } |
| matchctx->nmatches++; |
| prev_match_end = pmatch[0].rm_eo; |
| |
| /* if not glob, stop after one match */ |
| if (!re_flags->glob) |
| break; |
| |
| /* |
| * Advance search position. Normally we start the next search at the |
| * end of the previous match; but if the match was of zero length, we |
| * have to advance by one character, or we'd just find the same match |
| * again. |
| */ |
| start_search = prev_match_end; |
| if (pmatch[0].rm_so == pmatch[0].rm_eo) |
| start_search++; |
| if (start_search > wide_len) |
| break; |
| } |
| |
| /* |
| * If we had no match, but "partial" and "indices" are set, emit the |
| * details. |
| */ |
| if (matchctx->nmatches == 0 && re_flags->partial && re_flags->indices) |
| { |
| /* enlarge output space if needed */ |
| while (array_idx + matchctx->npatterns * 2 + 1 > array_len) |
| { |
| array_len += array_len + 1; /* 2^n-1 => 2^(n+1)-1 */ |
| if (array_len > MaxAllocSize / sizeof(int)) |
| ereport(ERROR, |
| (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), |
| errmsg("too many regular expression matches"))); |
| matchctx->match_locs = (int *) repalloc(matchctx->match_locs, |
| sizeof(int) * array_len); |
| } |
| |
| matchctx->match_locs[array_idx++] = matchctx->details.rm_extend.rm_so; |
| matchctx->match_locs[array_idx++] = matchctx->details.rm_extend.rm_eo; |
| /* we don't have pmatch data, so emit -1 */ |
| for (int i = 1; i < matchctx->npatterns; i++) |
| { |
| matchctx->match_locs[array_idx++] = -1; |
| matchctx->match_locs[array_idx++] = -1; |
| } |
| matchctx->nmatches++; |
| } |
| |
| Assert(array_idx <= array_len); |
| |
| if (eml > 1) |
| { |
| int64 maxsiz = eml * (int64) maxlen; |
| int conv_bufsiz; |
| |
| /* |
| * Make the conversion buffer large enough for any substring of |
| * interest. |
| * |
| * Worst case: assume we need the maximum size (maxlen*eml), but take |
| * advantage of the fact that the original string length in bytes is |
| * an upper bound on the byte length of any fetched substring (and we |
| * know that len+1 is safe to allocate because the varlena header is |
| * longer than 1 byte). |
| */ |
| if (maxsiz > orig_len) |
| conv_bufsiz = orig_len + 1; |
| else |
| conv_bufsiz = maxsiz + 1; /* safe since maxsiz < 2^30 */ |
| |
| matchctx->conv_buf = palloc(conv_bufsiz); |
| matchctx->conv_bufsiz = conv_bufsiz; |
| matchctx->wide_str = wide_str; |
| } |
| else |
| { |
| /* No need to keep the wide string if we're in a single-byte charset. */ |
| pfree(wide_str); |
| matchctx->wide_str = NULL; |
| matchctx->conv_buf = NULL; |
| matchctx->conv_bufsiz = 0; |
| } |
| |
| /* Clean up temp storage */ |
| pfree(pmatch); |
| |
| return matchctx; |
| } |
| |
| /* |
| * build_test_info_result - build output array describing compiled regexp |
| * |
| * This borrows some code from Tcl's TclRegAbout(). |
| */ |
| static ArrayType * |
| build_test_info_result(regex_t *cpattern, test_re_flags *flags) |
| { |
| /* Translation data for flag bits in regex_t.re_info */ |
| struct infoname |
| { |
| int bit; |
| const char *text; |
| }; |
| static const struct infoname infonames[] = { |
| {REG_UBACKREF, "REG_UBACKREF"}, |
| {REG_ULOOKAROUND, "REG_ULOOKAROUND"}, |
| {REG_UBOUNDS, "REG_UBOUNDS"}, |
| {REG_UBRACES, "REG_UBRACES"}, |
| {REG_UBSALNUM, "REG_UBSALNUM"}, |
| {REG_UPBOTCH, "REG_UPBOTCH"}, |
| {REG_UBBS, "REG_UBBS"}, |
| {REG_UNONPOSIX, "REG_UNONPOSIX"}, |
| {REG_UUNSPEC, "REG_UUNSPEC"}, |
| {REG_UUNPORT, "REG_UUNPORT"}, |
| {REG_ULOCALE, "REG_ULOCALE"}, |
| {REG_UEMPTYMATCH, "REG_UEMPTYMATCH"}, |
| {REG_UIMPOSSIBLE, "REG_UIMPOSSIBLE"}, |
| {REG_USHORTEST, "REG_USHORTEST"}, |
| {0, NULL} |
| }; |
| const struct infoname *inf; |
| Datum elems[lengthof(infonames) + 1]; |
| int nresults = 0; |
| char buf[80]; |
| int dims[1]; |
| int lbs[1]; |
| |
| /* Set up results: first, the number of subexpressions */ |
| snprintf(buf, sizeof(buf), "%d", (int) cpattern->re_nsub); |
| elems[nresults++] = PointerGetDatum(cstring_to_text(buf)); |
| |
| /* Report individual info bit states */ |
| for (inf = infonames; inf->bit != 0; inf++) |
| { |
| if (cpattern->re_info & inf->bit) |
| { |
| if (flags->info & inf->bit) |
| elems[nresults++] = PointerGetDatum(cstring_to_text(inf->text)); |
| else |
| { |
| snprintf(buf, sizeof(buf), "unexpected %s!", inf->text); |
| elems[nresults++] = PointerGetDatum(cstring_to_text(buf)); |
| } |
| } |
| else |
| { |
| if (flags->info & inf->bit) |
| { |
| snprintf(buf, sizeof(buf), "missing %s!", inf->text); |
| elems[nresults++] = PointerGetDatum(cstring_to_text(buf)); |
| } |
| } |
| } |
| |
| /* And form an array */ |
| dims[0] = nresults; |
| lbs[0] = 1; |
| /* XXX: this hardcodes assumptions about the text type */ |
| return construct_md_array(elems, NULL, 1, dims, lbs, |
| TEXTOID, -1, false, TYPALIGN_INT); |
| } |
| |
| /* |
| * build_test_match_result - build output array for current match |
| * |
| * Note that if the indices flag is set, we don't need any strings, |
| * just the location data. |
| */ |
| static ArrayType * |
| build_test_match_result(test_regex_ctx *matchctx) |
| { |
| char *buf = matchctx->conv_buf; |
| Datum *elems = matchctx->elems; |
| bool *nulls = matchctx->nulls; |
| bool indices = matchctx->re_flags.indices; |
| char bufstr[80]; |
| int dims[1]; |
| int lbs[1]; |
| int loc; |
| int i; |
| |
| /* Extract matching substrings from the original string */ |
| loc = matchctx->next_match * matchctx->npatterns * 2; |
| for (i = 0; i < matchctx->npatterns; i++) |
| { |
| int so = matchctx->match_locs[loc++]; |
| int eo = matchctx->match_locs[loc++]; |
| |
| if (indices) |
| { |
| /* Report eo this way for consistency with Tcl */ |
| snprintf(bufstr, sizeof(bufstr), "%d %d", |
| so, so < 0 ? eo : eo - 1); |
| elems[i] = PointerGetDatum(cstring_to_text(bufstr)); |
| nulls[i] = false; |
| } |
| else if (so < 0 || eo < 0) |
| { |
| elems[i] = (Datum) 0; |
| nulls[i] = true; |
| } |
| else if (buf) |
| { |
| int len = pg_wchar2mb_with_len(matchctx->wide_str + so, |
| buf, |
| eo - so); |
| |
| Assert(len < matchctx->conv_bufsiz); |
| elems[i] = PointerGetDatum(cstring_to_text_with_len(buf, len)); |
| nulls[i] = false; |
| } |
| else |
| { |
| elems[i] = DirectFunctionCall3(text_substr, |
| PointerGetDatum(matchctx->orig_str), |
| Int32GetDatum(so + 1), |
| Int32GetDatum(eo - so)); |
| nulls[i] = false; |
| } |
| } |
| |
| /* In EXPECT indices mode, also report the "details" */ |
| if (indices && (matchctx->re_flags.cflags & REG_EXPECT)) |
| { |
| int so = matchctx->details.rm_extend.rm_so; |
| int eo = matchctx->details.rm_extend.rm_eo; |
| |
| snprintf(bufstr, sizeof(bufstr), "%d %d", |
| so, so < 0 ? eo : eo - 1); |
| elems[i] = PointerGetDatum(cstring_to_text(bufstr)); |
| nulls[i] = false; |
| i++; |
| } |
| |
| /* And form an array */ |
| dims[0] = i; |
| lbs[0] = 1; |
| /* XXX: this hardcodes assumptions about the text type */ |
| return construct_md_array(elems, nulls, 1, dims, lbs, |
| TEXTOID, -1, false, TYPALIGN_INT); |
| } |