| /* |
| * re_*exec and friends - match REs |
| * |
| * Copyright (c) 1998, 1999 Henry Spencer. All rights reserved. |
| * |
| * Development of this software was funded, in part, by Cray Research Inc., |
| * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics |
| * Corporation, none of whom are responsible for the results. The author |
| * thanks all of them. |
| * |
| * Redistribution and use in source and binary forms -- with or without |
| * modification -- are permitted for any purpose, provided that |
| * redistributions in source form retain this entire copyright notice and |
| * indicate the origin and nature of any modifications. |
| * |
| * I'd appreciate being given credit for this package in the documentation |
| * of software which uses it, but that is not a requirement. |
| * |
| * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, |
| * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY |
| * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL |
| * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, |
| * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, |
| * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; |
| * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, |
| * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR |
| * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF |
| * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| * |
| * src/backend/regex/regexec.c |
| * |
| */ |
| |
| #include "regex/regguts.h" |
| |
| |
| |
| /* lazy-DFA representation */ |
| struct arcp |
| { /* "pointer" to an outarc */ |
| struct sset *ss; |
| color co; |
| }; |
| |
| struct sset |
| { /* state set */ |
| unsigned *states; /* pointer to bitvector */ |
| unsigned hash; /* hash of bitvector */ |
| #define HASH(bv, nw) (((nw) == 1) ? *(bv) : hash(bv, nw)) |
| #define HIT(h,bv,ss,nw) ((ss)->hash == (h) && ((nw) == 1 || \ |
| memcmp(VS(bv), VS((ss)->states), (nw)*sizeof(unsigned)) == 0)) |
| int flags; |
| #define STARTER 01 /* the initial state set */ |
| #define POSTSTATE 02 /* includes the goal state */ |
| #define LOCKED 04 /* locked in cache */ |
| #define NOPROGRESS 010 /* zero-progress state set */ |
| struct arcp ins; /* chain of inarcs pointing here */ |
| chr *lastseen; /* last entered on arrival here */ |
| struct sset **outs; /* outarc vector indexed by color */ |
| struct arcp *inchain; /* chain-pointer vector for outarcs */ |
| }; |
| |
| struct dfa |
| { |
| int nssets; /* size of cache */ |
| int nssused; /* how many entries occupied yet */ |
| int nstates; /* number of states */ |
| int ncolors; /* length of outarc and inchain vectors */ |
| int wordsper; /* length of state-set bitvectors */ |
| struct sset *ssets; /* state-set cache */ |
| unsigned *statesarea; /* bitvector storage */ |
| unsigned *work; /* pointer to work area within statesarea */ |
| struct sset **outsarea; /* outarc-vector storage */ |
| struct arcp *incarea; /* inchain storage */ |
| struct cnfa *cnfa; |
| struct colormap *cm; |
| chr *lastpost; /* location of last cache-flushed success */ |
| chr *lastnopr; /* location of last cache-flushed NOPROGRESS */ |
| struct sset *search; /* replacement-search-pointer memory */ |
| int backno; /* if DFA for a backref, subno it refers to */ |
| short backmin; /* min repetitions for backref */ |
| short backmax; /* max repetitions for backref */ |
| bool ismalloced; /* should this struct dfa be freed? */ |
| bool arraysmalloced; /* should its subsidiary arrays be freed? */ |
| }; |
| |
| #define WORK 1 /* number of work bitvectors needed */ |
| |
| /* setup for non-malloc allocation for small cases */ |
| #define FEWSTATES 20 /* must be less than UBITS */ |
| #define FEWCOLORS 15 |
| struct smalldfa |
| { |
| struct dfa dfa; /* must be first */ |
| struct sset ssets[FEWSTATES * 2]; |
| unsigned statesarea[FEWSTATES * 2 + WORK]; |
| struct sset *outsarea[FEWSTATES * 2 * FEWCOLORS]; |
| struct arcp incarea[FEWSTATES * 2 * FEWCOLORS]; |
| }; |
| |
| #define DOMALLOC ((struct smalldfa *)NULL) /* force malloc */ |
| |
| |
| |
| /* internal variables, bundled for easy passing around */ |
| struct vars |
| { |
| regex_t *re; |
| struct guts *g; |
| int eflags; /* copies of arguments */ |
| size_t nmatch; |
| regmatch_t *pmatch; |
| rm_detail_t *details; |
| chr *start; /* start of string */ |
| chr *search_start; /* search start of string */ |
| chr *stop; /* just past end of string */ |
| int err; /* error code if any (0 none) */ |
| struct dfa **subdfas; /* per-tree-subre DFAs */ |
| struct dfa **ladfas; /* per-lacon-subre DFAs */ |
| struct sset **lblastcss; /* per-lacon-subre lookbehind restart data */ |
| chr **lblastcp; /* per-lacon-subre lookbehind restart data */ |
| struct smalldfa dfa1; |
| struct smalldfa dfa2; |
| }; |
| |
| #define VISERR(vv) ((vv)->err != 0) /* have we seen an error yet? */ |
| #define ISERR() VISERR(v) |
| #define VERR(vv,e) ((vv)->err = ((vv)->err ? (vv)->err : (e))) |
| #define ERR(e) VERR(v, e) /* record an error */ |
| #define NOERR() {if (ISERR()) return v->err;} /* if error seen, return it */ |
| #define OFF(p) ((p) - v->start) |
| #define LOFF(p) ((long)OFF(p)) |
| |
| |
| |
| /* |
| * forward declarations |
| */ |
| /* === regexec.c === */ |
| static struct dfa *getsubdfa(struct vars *v, struct subre *t); |
| static struct dfa *getladfa(struct vars *v, int n); |
| static int find(struct vars *v, struct cnfa *cnfa, struct colormap *cm); |
| static int cfind(struct vars *v, struct cnfa *cnfa, struct colormap *cm); |
| static int cfindloop(struct vars *v, struct cnfa *cnfa, struct colormap *cm, |
| struct dfa *d, struct dfa *s, chr **coldp); |
| static void zapallsubs(regmatch_t *p, size_t n); |
| static void zaptreesubs(struct vars *v, struct subre *t); |
| static void subset(struct vars *v, struct subre *sub, chr *begin, chr *end); |
| static int cdissect(struct vars *v, struct subre *t, chr *begin, chr *end); |
| static int ccondissect(struct vars *v, struct subre *t, chr *begin, chr *end); |
| static int crevcondissect(struct vars *v, struct subre *t, chr *begin, chr *end); |
| static int cbrdissect(struct vars *v, struct subre *t, chr *begin, chr *end); |
| static int caltdissect(struct vars *v, struct subre *t, chr *begin, chr *end); |
| static int citerdissect(struct vars *v, struct subre *t, chr *begin, chr *end); |
| static int creviterdissect(struct vars *v, struct subre *t, chr *begin, chr *end); |
| |
| /* === rege_dfa.c === */ |
| static chr *longest(struct vars *v, struct dfa *d, |
| chr *start, chr *stop, int *hitstopp); |
| static chr *shortest(struct vars *v, struct dfa *d, chr *start, chr *min, |
| chr *max, chr **coldp, int *hitstopp); |
| static int matchuntil(struct vars *v, struct dfa *d, chr *probe, |
| struct sset **lastcss, chr **lastcp); |
| static chr *dfa_backref(struct vars *v, struct dfa *d, chr *start, |
| chr *min, chr *max, bool shortest); |
| static chr *lastcold(struct vars *v, struct dfa *d); |
| static struct dfa *newdfa(struct vars *v, struct cnfa *cnfa, |
| struct colormap *cm, struct smalldfa *sml); |
| static void freedfa(struct dfa *d); |
| static unsigned hash(unsigned *uv, int n); |
| static struct sset *initialize(struct vars *v, struct dfa *d, chr *start); |
| static struct sset *miss(struct vars *v, struct dfa *d, struct sset *css, |
| color co, chr *cp, chr *start); |
| static int lacon(struct vars *v, struct cnfa *pcnfa, chr *cp, color co); |
| static struct sset *getvacant(struct vars *v, struct dfa *d, chr *cp, |
| chr *start); |
| static struct sset *pickss(struct vars *v, struct dfa *d, chr *cp, |
| chr *start); |
| |
| |
| /* |
| * pg_regexec - match regular expression |
| */ |
| int |
| pg_regexec(regex_t *re, |
| const chr *string, |
| size_t len, |
| size_t search_start, |
| rm_detail_t *details, |
| size_t nmatch, |
| regmatch_t pmatch[], |
| int flags) |
| { |
| struct vars var; |
| struct vars *v = &var; |
| int st; |
| size_t n; |
| size_t i; |
| int backref; |
| |
| #define LOCALMAT 20 |
| regmatch_t mat[LOCALMAT]; |
| |
| #define LOCALDFAS 40 |
| struct dfa *subdfas[LOCALDFAS]; |
| |
| /* sanity checks */ |
| if (re == NULL || string == NULL || re->re_magic != REMAGIC) |
| return REG_INVARG; |
| if (re->re_csize != sizeof(chr)) |
| return REG_MIXED; |
| if (search_start > len) |
| return REG_NOMATCH; |
| |
| /* Initialize locale-dependent support */ |
| pg_set_regex_collation(re->re_collation); |
| |
| /* setup */ |
| v->re = re; |
| v->g = (struct guts *) re->re_guts; |
| if ((v->g->cflags & REG_EXPECT) && details == NULL) |
| return REG_INVARG; |
| if (v->g->info & REG_UIMPOSSIBLE) |
| return REG_NOMATCH; |
| backref = (v->g->info & REG_UBACKREF) ? 1 : 0; |
| v->eflags = flags; |
| if (backref && nmatch <= v->g->nsub) |
| { |
| /* need larger work area */ |
| v->nmatch = v->g->nsub + 1; |
| if (v->nmatch <= LOCALMAT) |
| v->pmatch = mat; |
| else |
| v->pmatch = (regmatch_t *) MALLOC(v->nmatch * sizeof(regmatch_t)); |
| if (v->pmatch == NULL) |
| return REG_ESPACE; |
| zapallsubs(v->pmatch, v->nmatch); |
| } |
| else |
| { |
| /* we can store results directly in caller's array */ |
| v->pmatch = pmatch; |
| /* ensure any extra entries in caller's array are filled with -1 */ |
| if (nmatch > 0) |
| zapallsubs(pmatch, nmatch); |
| /* then forget about extra entries, to avoid useless work in find() */ |
| if (nmatch > v->g->nsub + 1) |
| nmatch = v->g->nsub + 1; |
| v->nmatch = nmatch; |
| } |
| v->details = details; |
| v->start = (chr *) string; |
| v->search_start = (chr *) string + search_start; |
| v->stop = (chr *) string + len; |
| v->err = 0; |
| v->subdfas = NULL; |
| v->ladfas = NULL; |
| v->lblastcss = NULL; |
| v->lblastcp = NULL; |
| /* below this point, "goto cleanup" will behave sanely */ |
| |
| assert(v->g->ntree >= 0); |
| n = (size_t) v->g->ntree; |
| if (n <= LOCALDFAS) |
| v->subdfas = subdfas; |
| else |
| { |
| v->subdfas = (struct dfa **) MALLOC(n * sizeof(struct dfa *)); |
| if (v->subdfas == NULL) |
| { |
| st = REG_ESPACE; |
| goto cleanup; |
| } |
| } |
| for (i = 0; i < n; i++) |
| v->subdfas[i] = NULL; |
| |
| assert(v->g->nlacons >= 0); |
| n = (size_t) v->g->nlacons; |
| if (n > 0) |
| { |
| v->ladfas = (struct dfa **) MALLOC(n * sizeof(struct dfa *)); |
| if (v->ladfas == NULL) |
| { |
| st = REG_ESPACE; |
| goto cleanup; |
| } |
| for (i = 0; i < n; i++) |
| v->ladfas[i] = NULL; |
| v->lblastcss = (struct sset **) MALLOC(n * sizeof(struct sset *)); |
| v->lblastcp = (chr **) MALLOC(n * sizeof(chr *)); |
| if (v->lblastcss == NULL || v->lblastcp == NULL) |
| { |
| st = REG_ESPACE; |
| goto cleanup; |
| } |
| for (i = 0; i < n; i++) |
| { |
| v->lblastcss[i] = NULL; |
| v->lblastcp[i] = NULL; |
| } |
| } |
| |
| /* do it */ |
| assert(v->g->tree != NULL); |
| if (backref) |
| st = cfind(v, &v->g->tree->cnfa, &v->g->cmap); |
| else |
| st = find(v, &v->g->tree->cnfa, &v->g->cmap); |
| |
| /* on success, ensure caller's match vector is filled correctly */ |
| if (st == REG_OKAY && nmatch > 0) |
| { |
| if (v->pmatch != pmatch) |
| { |
| /* copy portion of match vector over from (larger) work area */ |
| assert(nmatch <= v->nmatch); |
| memcpy(VS(pmatch), VS(v->pmatch), nmatch * sizeof(regmatch_t)); |
| } |
| if (v->g->cflags & REG_NOSUB) |
| { |
| /* don't expose possibly-partial sub-match results to caller */ |
| zapallsubs(pmatch, nmatch); |
| } |
| } |
| |
| /* clean up */ |
| cleanup: |
| if (v->pmatch != pmatch && v->pmatch != mat) |
| FREE(v->pmatch); |
| if (v->subdfas != NULL) |
| { |
| n = (size_t) v->g->ntree; |
| for (i = 0; i < n; i++) |
| { |
| if (v->subdfas[i] != NULL) |
| freedfa(v->subdfas[i]); |
| } |
| if (v->subdfas != subdfas) |
| FREE(v->subdfas); |
| } |
| if (v->ladfas != NULL) |
| { |
| n = (size_t) v->g->nlacons; |
| for (i = 0; i < n; i++) |
| { |
| if (v->ladfas[i] != NULL) |
| freedfa(v->ladfas[i]); |
| } |
| FREE(v->ladfas); |
| } |
| if (v->lblastcss != NULL) |
| FREE(v->lblastcss); |
| if (v->lblastcp != NULL) |
| FREE(v->lblastcp); |
| |
| #ifdef REG_DEBUG |
| if (v->eflags & (REG_FTRACE | REG_MTRACE)) |
| fflush(stdout); |
| #endif |
| |
| return st; |
| } |
| |
| /* |
| * getsubdfa - create or re-fetch the DFA for a tree subre node |
| * |
| * We only need to create the DFA once per overall regex execution. |
| * The DFA will be freed by the cleanup step in pg_regexec(). |
| */ |
| static struct dfa * |
| getsubdfa(struct vars *v, |
| struct subre *t) |
| { |
| struct dfa *d = v->subdfas[t->id]; |
| |
| if (d == NULL) |
| { |
| d = newdfa(v, &t->cnfa, &v->g->cmap, DOMALLOC); |
| if (d == NULL) |
| return NULL; |
| /* set up additional info if this is a backref node */ |
| if (t->op == 'b') |
| { |
| d->backno = t->backno; |
| d->backmin = t->min; |
| d->backmax = t->max; |
| } |
| v->subdfas[t->id] = d; |
| } |
| return d; |
| } |
| |
| /* |
| * getladfa - create or re-fetch the DFA for a LACON subre node |
| * |
| * Same as above, but for LACONs. |
| */ |
| static struct dfa * |
| getladfa(struct vars *v, |
| int n) |
| { |
| assert(n > 0 && n < v->g->nlacons && v->g->lacons != NULL); |
| |
| if (v->ladfas[n] == NULL) |
| { |
| struct subre *sub = &v->g->lacons[n]; |
| |
| v->ladfas[n] = newdfa(v, &sub->cnfa, &v->g->cmap, DOMALLOC); |
| /* a LACON can't contain a backref, so nothing else to do */ |
| } |
| return v->ladfas[n]; |
| } |
| |
| /* |
| * find - find a match for the main NFA (no-complications case) |
| */ |
| static int |
| find(struct vars *v, |
| struct cnfa *cnfa, |
| struct colormap *cm) |
| { |
| struct dfa *s; |
| struct dfa *d; |
| chr *begin; |
| chr *end = NULL; |
| chr *cold; |
| chr *open; /* open and close of range of possible starts */ |
| chr *close; |
| int hitend; |
| int shorter = (v->g->tree->flags & SHORTER) ? 1 : 0; |
| |
| /* first, a shot with the search RE */ |
| s = newdfa(v, &v->g->search, cm, &v->dfa1); |
| if (s == NULL) |
| return v->err; |
| MDEBUG(("\nsearch at %ld\n", LOFF(v->start))); |
| cold = NULL; |
| close = shortest(v, s, v->search_start, v->search_start, v->stop, |
| &cold, (int *) NULL); |
| freedfa(s); |
| NOERR(); |
| if (v->g->cflags & REG_EXPECT) |
| { |
| assert(v->details != NULL); |
| if (cold != NULL) |
| v->details->rm_extend.rm_so = OFF(cold); |
| else |
| v->details->rm_extend.rm_so = OFF(v->stop); |
| v->details->rm_extend.rm_eo = OFF(v->stop); /* unknown */ |
| } |
| if (close == NULL) /* not found */ |
| return REG_NOMATCH; |
| if (v->nmatch == 0) /* found, don't need exact location */ |
| return REG_OKAY; |
| |
| /* find starting point and match */ |
| assert(cold != NULL); |
| open = cold; |
| cold = NULL; |
| MDEBUG(("between %ld and %ld\n", LOFF(open), LOFF(close))); |
| d = newdfa(v, cnfa, cm, &v->dfa1); |
| if (d == NULL) |
| return v->err; |
| for (begin = open; begin <= close; begin++) |
| { |
| MDEBUG(("\nfind trying at %ld\n", LOFF(begin))); |
| if (shorter) |
| end = shortest(v, d, begin, begin, v->stop, |
| (chr **) NULL, &hitend); |
| else |
| end = longest(v, d, begin, v->stop, &hitend); |
| if (ISERR()) |
| { |
| freedfa(d); |
| return v->err; |
| } |
| if (hitend && cold == NULL) |
| cold = begin; |
| if (end != NULL) |
| break; /* NOTE BREAK OUT */ |
| } |
| assert(end != NULL); /* search RE succeeded so loop should */ |
| freedfa(d); |
| |
| /* and pin down details */ |
| assert(v->nmatch > 0); |
| v->pmatch[0].rm_so = OFF(begin); |
| v->pmatch[0].rm_eo = OFF(end); |
| if (v->g->cflags & REG_EXPECT) |
| { |
| if (cold != NULL) |
| v->details->rm_extend.rm_so = OFF(cold); |
| else |
| v->details->rm_extend.rm_so = OFF(v->stop); |
| v->details->rm_extend.rm_eo = OFF(v->stop); /* unknown */ |
| } |
| if (v->nmatch == 1) /* no need for submatches */ |
| return REG_OKAY; |
| |
| /* find submatches */ |
| return cdissect(v, v->g->tree, begin, end); |
| } |
| |
| /* |
| * cfind - find a match for the main NFA (with complications) |
| */ |
| static int |
| cfind(struct vars *v, |
| struct cnfa *cnfa, |
| struct colormap *cm) |
| { |
| struct dfa *s; |
| struct dfa *d; |
| chr *cold; |
| int ret; |
| |
| s = newdfa(v, &v->g->search, cm, &v->dfa1); |
| if (s == NULL) |
| return v->err; |
| d = newdfa(v, cnfa, cm, &v->dfa2); |
| if (d == NULL) |
| { |
| freedfa(s); |
| return v->err; |
| } |
| |
| ret = cfindloop(v, cnfa, cm, d, s, &cold); |
| |
| freedfa(d); |
| freedfa(s); |
| NOERR(); |
| if (v->g->cflags & REG_EXPECT) |
| { |
| assert(v->details != NULL); |
| if (cold != NULL) |
| v->details->rm_extend.rm_so = OFF(cold); |
| else |
| v->details->rm_extend.rm_so = OFF(v->stop); |
| v->details->rm_extend.rm_eo = OFF(v->stop); /* unknown */ |
| } |
| return ret; |
| } |
| |
| /* |
| * cfindloop - the heart of cfind |
| */ |
| static int |
| cfindloop(struct vars *v, |
| struct cnfa *cnfa, |
| struct colormap *cm, |
| struct dfa *d, |
| struct dfa *s, |
| chr **coldp) /* where to put coldstart pointer */ |
| { |
| chr *begin; |
| chr *end; |
| chr *cold; |
| chr *open; /* open and close of range of possible starts */ |
| chr *close; |
| chr *estart; |
| chr *estop; |
| int er; |
| int shorter = v->g->tree->flags & SHORTER; |
| int hitend; |
| |
| assert(d != NULL && s != NULL); |
| cold = NULL; |
| close = v->search_start; |
| do |
| { |
| /* Search with the search RE for match range at/beyond "close" */ |
| MDEBUG(("\ncsearch at %ld\n", LOFF(close))); |
| close = shortest(v, s, close, close, v->stop, &cold, (int *) NULL); |
| if (ISERR()) |
| { |
| *coldp = cold; |
| return v->err; |
| } |
| if (close == NULL) |
| break; /* no more possible match anywhere */ |
| assert(cold != NULL); |
| open = cold; |
| cold = NULL; |
| /* Search for matches starting between "open" and "close" inclusive */ |
| MDEBUG(("cbetween %ld and %ld\n", LOFF(open), LOFF(close))); |
| for (begin = open; begin <= close; begin++) |
| { |
| MDEBUG(("\ncfind trying at %ld\n", LOFF(begin))); |
| estart = begin; |
| estop = v->stop; |
| for (;;) |
| { |
| /* Here we use the top node's detailed RE */ |
| if (shorter) |
| end = shortest(v, d, begin, estart, |
| estop, (chr **) NULL, &hitend); |
| else |
| end = longest(v, d, begin, estop, |
| &hitend); |
| if (ISERR()) |
| { |
| *coldp = cold; |
| return v->err; |
| } |
| if (hitend && cold == NULL) |
| cold = begin; |
| if (end == NULL) |
| break; /* no match with this begin point, try next */ |
| MDEBUG(("tentative end %ld\n", LOFF(end))); |
| /* Dissect the potential match to see if it really matches */ |
| er = cdissect(v, v->g->tree, begin, end); |
| if (er == REG_OKAY) |
| { |
| if (v->nmatch > 0) |
| { |
| v->pmatch[0].rm_so = OFF(begin); |
| v->pmatch[0].rm_eo = OFF(end); |
| } |
| *coldp = cold; |
| return REG_OKAY; |
| } |
| if (er != REG_NOMATCH) |
| { |
| ERR(er); |
| *coldp = cold; |
| return er; |
| } |
| /* Try next longer/shorter match with same begin point */ |
| if (shorter) |
| { |
| if (end == estop) |
| break; /* no more, so try next begin point */ |
| estart = end + 1; |
| } |
| else |
| { |
| if (end == begin) |
| break; /* no more, so try next begin point */ |
| estop = end - 1; |
| } |
| } /* end loop over endpoint positions */ |
| } /* end loop over beginning positions */ |
| |
| /* |
| * If we get here, there is no possible match starting at or before |
| * "close", so consider matches beyond that. We'll do a fresh search |
| * with the search RE to find a new promising match range. |
| */ |
| close++; |
| } while (close < v->stop); |
| |
| *coldp = cold; |
| return REG_NOMATCH; |
| } |
| |
| /* |
| * zapallsubs - initialize all subexpression matches to "no match" |
| * |
| * Note that p[0], the overall-match location, is not touched. |
| */ |
| static void |
| zapallsubs(regmatch_t *p, |
| size_t n) |
| { |
| size_t i; |
| |
| for (i = n - 1; i > 0; i--) |
| { |
| p[i].rm_so = -1; |
| p[i].rm_eo = -1; |
| } |
| } |
| |
| /* |
| * zaptreesubs - initialize subexpressions within subtree to "no match" |
| */ |
| static void |
| zaptreesubs(struct vars *v, |
| struct subre *t) |
| { |
| int n = t->capno; |
| struct subre *t2; |
| |
| if (n > 0) |
| { |
| if ((size_t) n < v->nmatch) |
| { |
| v->pmatch[n].rm_so = -1; |
| v->pmatch[n].rm_eo = -1; |
| } |
| } |
| |
| for (t2 = t->child; t2 != NULL; t2 = t2->sibling) |
| zaptreesubs(v, t2); |
| } |
| |
| /* |
| * subset - set subexpression match data for a successful subre |
| */ |
| static void |
| subset(struct vars *v, |
| struct subre *sub, |
| chr *begin, |
| chr *end) |
| { |
| int n = sub->capno; |
| |
| assert(n > 0); |
| if ((size_t) n >= v->nmatch) |
| return; |
| |
| MDEBUG(("%d: setting %d = %ld-%ld\n", sub->id, n, LOFF(begin), LOFF(end))); |
| v->pmatch[n].rm_so = OFF(begin); |
| v->pmatch[n].rm_eo = OFF(end); |
| } |
| |
| /* |
| * cdissect - check backrefs and determine subexpression matches |
| * |
| * cdissect recursively processes a subre tree to check matching of backrefs |
| * and/or identify submatch boundaries for capture nodes. The proposed match |
| * runs from "begin" to "end" (not including "end"), and we are basically |
| * "dissecting" it to see where the submatches are. |
| * |
| * Before calling any level of cdissect, the caller must have run the node's |
| * DFA and found that the proposed substring satisfies the DFA. (We make |
| * the caller do that because in concatenation and iteration nodes, it's |
| * much faster to check all the substrings against the child DFAs before we |
| * recurse.) |
| * |
| * A side-effect of a successful match is to save match locations for |
| * capturing subexpressions in v->pmatch[]. This is a little bit tricky, |
| * so we make the following rules: |
| * 1. Before initial entry to cdissect, all match data must have been |
| * cleared (this is seen to by zapallsubs). |
| * 2. Before any recursive entry to cdissect, the match data for that |
| * subexpression tree must be guaranteed clear (see zaptreesubs). |
| * 3. When returning REG_OKAY, each level of cdissect will have saved |
| * any relevant match locations. |
| * 4. When returning REG_NOMATCH, each level of cdissect will guarantee |
| * that its subexpression match locations are again clear. |
| * 5. No guarantees are made for error cases (i.e., other result codes). |
| * 6. When a level of cdissect abandons a successful sub-match, it will |
| * clear that subtree's match locations with zaptreesubs before trying |
| * any new DFA match or cdissect call for that subtree or any subtree |
| * to its right (that is, any subtree that could have a backref into the |
| * abandoned match). |
| * This may seem overly complicated, but it's difficult to simplify it |
| * because of the provision that match locations must be reset before |
| * any fresh DFA match (a rule that is needed to make dfa_backref safe). |
| * That means it won't work to just reset relevant match locations at the |
| * start of each cdissect level. |
| */ |
| static int /* regexec return code */ |
| cdissect(struct vars *v, |
| struct subre *t, |
| chr *begin, /* beginning of relevant substring */ |
| chr *end) /* end of same */ |
| { |
| int er; |
| |
| assert(t != NULL); |
| MDEBUG(("%d: cdissect %c %ld-%ld\n", t->id, t->op, LOFF(begin), LOFF(end))); |
| |
| /* handy place to check for operation cancel */ |
| INTERRUPT(v->re); |
| /* ... and stack overrun */ |
| if (STACK_TOO_DEEP(v->re)) |
| return REG_ETOOBIG; |
| |
| switch (t->op) |
| { |
| case '=': /* terminal node */ |
| assert(t->child == NULL); |
| er = REG_OKAY; /* no action, parent did the work */ |
| break; |
| case 'b': /* back reference */ |
| assert(t->child == NULL); |
| er = cbrdissect(v, t, begin, end); |
| break; |
| case '.': /* concatenation */ |
| assert(t->child != NULL); |
| if (t->child->flags & SHORTER) /* reverse scan */ |
| er = crevcondissect(v, t, begin, end); |
| else |
| er = ccondissect(v, t, begin, end); |
| break; |
| case '|': /* alternation */ |
| assert(t->child != NULL); |
| er = caltdissect(v, t, begin, end); |
| break; |
| case '*': /* iteration */ |
| assert(t->child != NULL); |
| if (t->child->flags & SHORTER) /* reverse scan */ |
| er = creviterdissect(v, t, begin, end); |
| else |
| er = citerdissect(v, t, begin, end); |
| break; |
| case '(': /* no-op capture node */ |
| assert(t->child != NULL); |
| er = cdissect(v, t->child, begin, end); |
| break; |
| default: |
| er = REG_ASSERT; |
| break; |
| } |
| |
| /* |
| * We should never have a match failure unless backrefs lurk below; |
| * otherwise, either caller failed to check the DFA, or there's some |
| * inconsistency between the DFA and the node's innards. |
| */ |
| assert(er != REG_NOMATCH || (t->flags & BACKR)); |
| |
| /* |
| * If this node is marked as capturing, save successful match's location. |
| */ |
| if (t->capno > 0 && er == REG_OKAY) |
| subset(v, t, begin, end); |
| |
| return er; |
| } |
| |
| /* |
| * ccondissect - dissect match for concatenation node |
| */ |
| static int /* regexec return code */ |
| ccondissect(struct vars *v, |
| struct subre *t, |
| chr *begin, /* beginning of relevant substring */ |
| chr *end) /* end of same */ |
| { |
| struct subre *left = t->child; |
| struct subre *right = left->sibling; |
| struct dfa *d; |
| struct dfa *d2; |
| chr *mid; |
| int er; |
| |
| assert(t->op == '.'); |
| assert(left != NULL && left->cnfa.nstates > 0); |
| assert(right != NULL && right->cnfa.nstates > 0); |
| assert(right->sibling == NULL); |
| assert(!(left->flags & SHORTER)); |
| |
| d = getsubdfa(v, left); |
| NOERR(); |
| d2 = getsubdfa(v, right); |
| NOERR(); |
| MDEBUG(("%d: ccondissect %ld-%ld\n", t->id, LOFF(begin), LOFF(end))); |
| |
| /* pick a tentative midpoint */ |
| mid = longest(v, d, begin, end, (int *) NULL); |
| NOERR(); |
| if (mid == NULL) |
| return REG_NOMATCH; |
| MDEBUG(("%d: tentative midpoint %ld\n", t->id, LOFF(mid))); |
| |
| /* iterate until satisfaction or failure */ |
| for (;;) |
| { |
| /* try this midpoint on for size */ |
| if (longest(v, d2, mid, end, (int *) NULL) == end) |
| { |
| er = cdissect(v, left, begin, mid); |
| if (er == REG_OKAY) |
| { |
| er = cdissect(v, right, mid, end); |
| if (er == REG_OKAY) |
| { |
| /* satisfaction */ |
| MDEBUG(("%d: successful\n", t->id)); |
| return REG_OKAY; |
| } |
| /* Reset left's matches (right should have done so itself) */ |
| zaptreesubs(v, left); |
| } |
| if (er != REG_NOMATCH) |
| return er; |
| } |
| NOERR(); |
| |
| /* that midpoint didn't work, find a new one */ |
| if (mid == begin) |
| { |
| /* all possibilities exhausted */ |
| MDEBUG(("%d: no midpoint\n", t->id)); |
| return REG_NOMATCH; |
| } |
| mid = longest(v, d, begin, mid - 1, (int *) NULL); |
| NOERR(); |
| if (mid == NULL) |
| { |
| /* failed to find a new one */ |
| MDEBUG(("%d: failed midpoint\n", t->id)); |
| return REG_NOMATCH; |
| } |
| MDEBUG(("%d: new midpoint %ld\n", t->id, LOFF(mid))); |
| } |
| |
| /* can't get here */ |
| return REG_ASSERT; |
| } |
| |
| /* |
| * crevcondissect - dissect match for concatenation node, shortest-first |
| */ |
| static int /* regexec return code */ |
| crevcondissect(struct vars *v, |
| struct subre *t, |
| chr *begin, /* beginning of relevant substring */ |
| chr *end) /* end of same */ |
| { |
| struct subre *left = t->child; |
| struct subre *right = left->sibling; |
| struct dfa *d; |
| struct dfa *d2; |
| chr *mid; |
| int er; |
| |
| assert(t->op == '.'); |
| assert(left != NULL && left->cnfa.nstates > 0); |
| assert(right != NULL && right->cnfa.nstates > 0); |
| assert(right->sibling == NULL); |
| assert(left->flags & SHORTER); |
| |
| d = getsubdfa(v, left); |
| NOERR(); |
| d2 = getsubdfa(v, right); |
| NOERR(); |
| MDEBUG(("%d: crevcondissect %ld-%ld\n", t->id, LOFF(begin), LOFF(end))); |
| |
| /* pick a tentative midpoint */ |
| mid = shortest(v, d, begin, begin, end, (chr **) NULL, (int *) NULL); |
| NOERR(); |
| if (mid == NULL) |
| return REG_NOMATCH; |
| MDEBUG(("%d: tentative midpoint %ld\n", t->id, LOFF(mid))); |
| |
| /* iterate until satisfaction or failure */ |
| for (;;) |
| { |
| /* try this midpoint on for size */ |
| if (longest(v, d2, mid, end, (int *) NULL) == end) |
| { |
| er = cdissect(v, left, begin, mid); |
| if (er == REG_OKAY) |
| { |
| er = cdissect(v, right, mid, end); |
| if (er == REG_OKAY) |
| { |
| /* satisfaction */ |
| MDEBUG(("%d: successful\n", t->id)); |
| return REG_OKAY; |
| } |
| /* Reset left's matches (right should have done so itself) */ |
| zaptreesubs(v, left); |
| } |
| if (er != REG_NOMATCH) |
| return er; |
| } |
| NOERR(); |
| |
| /* that midpoint didn't work, find a new one */ |
| if (mid == end) |
| { |
| /* all possibilities exhausted */ |
| MDEBUG(("%d: no midpoint\n", t->id)); |
| return REG_NOMATCH; |
| } |
| mid = shortest(v, d, begin, mid + 1, end, (chr **) NULL, (int *) NULL); |
| NOERR(); |
| if (mid == NULL) |
| { |
| /* failed to find a new one */ |
| MDEBUG(("%d: failed midpoint\n", t->id)); |
| return REG_NOMATCH; |
| } |
| MDEBUG(("%d: new midpoint %ld\n", t->id, LOFF(mid))); |
| } |
| |
| /* can't get here */ |
| return REG_ASSERT; |
| } |
| |
| /* |
| * cbrdissect - dissect match for backref node |
| * |
| * The backref match might already have been verified by dfa_backref(), |
| * but we don't know that for sure so must check it here. |
| */ |
| static int /* regexec return code */ |
| cbrdissect(struct vars *v, |
| struct subre *t, |
| chr *begin, /* beginning of relevant substring */ |
| chr *end) /* end of same */ |
| { |
| int n = t->backno; |
| size_t numreps; |
| size_t tlen; |
| size_t brlen; |
| chr *brstring; |
| chr *p; |
| int min = t->min; |
| int max = t->max; |
| |
| assert(t != NULL); |
| assert(t->op == 'b'); |
| assert(n >= 0); |
| assert((size_t) n < v->nmatch); |
| |
| MDEBUG(("%d: cbrdissect %d{%d-%d} %ld-%ld\n", t->id, n, min, max, |
| LOFF(begin), LOFF(end))); |
| |
| /* get the backreferenced string */ |
| if (v->pmatch[n].rm_so == -1) |
| return REG_NOMATCH; |
| brstring = v->start + v->pmatch[n].rm_so; |
| brlen = v->pmatch[n].rm_eo - v->pmatch[n].rm_so; |
| |
| /* special cases for zero-length strings */ |
| if (brlen == 0) |
| { |
| /* |
| * matches only if target is zero length, but any number of |
| * repetitions can be considered to be present |
| */ |
| if (begin == end && min <= max) |
| { |
| MDEBUG(("%d: backref matched trivially\n", t->id)); |
| return REG_OKAY; |
| } |
| return REG_NOMATCH; |
| } |
| if (begin == end) |
| { |
| /* matches only if zero repetitions are okay */ |
| if (min == 0) |
| { |
| MDEBUG(("%d: backref matched trivially\n", t->id)); |
| return REG_OKAY; |
| } |
| return REG_NOMATCH; |
| } |
| |
| /* |
| * check target length to see if it could possibly be an allowed number of |
| * repetitions of brstring |
| */ |
| assert(end > begin); |
| tlen = end - begin; |
| if (tlen % brlen != 0) |
| return REG_NOMATCH; |
| numreps = tlen / brlen; |
| if (numreps < min || (numreps > max && max != DUPINF)) |
| return REG_NOMATCH; |
| |
| /* okay, compare the actual string contents */ |
| p = begin; |
| while (numreps-- > 0) |
| { |
| if ((*v->g->compare) (brstring, p, brlen) != 0) |
| return REG_NOMATCH; |
| p += brlen; |
| } |
| |
| MDEBUG(("%d: backref matched\n", t->id)); |
| return REG_OKAY; |
| } |
| |
| /* |
| * caltdissect - dissect match for alternation node |
| */ |
| static int /* regexec return code */ |
| caltdissect(struct vars *v, |
| struct subre *t, |
| chr *begin, /* beginning of relevant substring */ |
| chr *end) /* end of same */ |
| { |
| struct dfa *d; |
| int er; |
| |
| assert(t->op == '|'); |
| |
| t = t->child; |
| /* there should be at least 2 alternatives */ |
| assert(t != NULL && t->sibling != NULL); |
| |
| while (t != NULL) |
| { |
| assert(t->cnfa.nstates > 0); |
| |
| MDEBUG(("%d: caltdissect %ld-%ld\n", t->id, LOFF(begin), LOFF(end))); |
| |
| d = getsubdfa(v, t); |
| NOERR(); |
| if (longest(v, d, begin, end, (int *) NULL) == end) |
| { |
| MDEBUG(("%d: caltdissect matched\n", t->id)); |
| er = cdissect(v, t, begin, end); |
| if (er != REG_NOMATCH) |
| return er; |
| } |
| NOERR(); |
| |
| t = t->sibling; |
| } |
| |
| return REG_NOMATCH; |
| } |
| |
| /* |
| * citerdissect - dissect match for iteration node |
| */ |
| static int /* regexec return code */ |
| citerdissect(struct vars *v, |
| struct subre *t, |
| chr *begin, /* beginning of relevant substring */ |
| chr *end) /* end of same */ |
| { |
| struct dfa *d; |
| chr **endpts; |
| chr *limit; |
| int min_matches; |
| size_t max_matches; |
| int nverified; |
| int k; |
| int i; |
| int er; |
| |
| assert(t->op == '*'); |
| assert(t->child != NULL && t->child->cnfa.nstates > 0); |
| assert(!(t->child->flags & SHORTER)); |
| assert(begin <= end); |
| |
| MDEBUG(("%d: citerdissect %ld-%ld\n", t->id, LOFF(begin), LOFF(end))); |
| |
| /* |
| * For the moment, assume the minimum number of matches is 1. If zero |
| * matches are allowed, and the target string is empty, we are allowed to |
| * match regardless of the contents of the iter node --- but we would |
| * prefer to match once, so that capturing parens get set. (An example of |
| * the concern here is a pattern like "()*\1", which historically this |
| * code has allowed to succeed.) Therefore, we deal with the zero-matches |
| * case at the bottom, after failing to find any other way to match. |
| */ |
| min_matches = t->min; |
| if (min_matches <= 0) |
| min_matches = 1; |
| |
| /* |
| * We need workspace to track the endpoints of each sub-match. Normally |
| * we consider only nonzero-length sub-matches, so there can be at most |
| * end-begin of them. However, if min is larger than that, we will also |
| * consider zero-length sub-matches in order to find enough matches. |
| * |
| * For convenience, endpts[0] contains the "begin" pointer and we store |
| * sub-match endpoints in endpts[1..max_matches]. |
| */ |
| max_matches = end - begin; |
| if (max_matches > t->max && t->max != DUPINF) |
| max_matches = t->max; |
| if (max_matches < min_matches) |
| max_matches = min_matches; |
| endpts = (chr **) MALLOC((max_matches + 1) * sizeof(chr *)); |
| if (endpts == NULL) |
| return REG_ESPACE; |
| endpts[0] = begin; |
| |
| d = getsubdfa(v, t->child); |
| if (ISERR()) |
| { |
| FREE(endpts); |
| return v->err; |
| } |
| |
| /* |
| * Our strategy is to first find a set of sub-match endpoints that are |
| * valid according to the child node's DFA, and then recursively dissect |
| * each sub-match to confirm validity. If any validity check fails, |
| * backtrack that sub-match and try again. And, when we next try for a |
| * validity check, we need not recheck any successfully verified |
| * sub-matches that we didn't move the endpoints of. nverified remembers |
| * how many sub-matches are currently known okay. |
| */ |
| |
| /* initialize to consider first sub-match */ |
| nverified = 0; |
| k = 1; |
| limit = end; |
| |
| /* iterate until satisfaction or failure */ |
| while (k > 0) |
| { |
| /* try to find an endpoint for the k'th sub-match */ |
| endpts[k] = longest(v, d, endpts[k - 1], limit, (int *) NULL); |
| if (ISERR()) |
| { |
| FREE(endpts); |
| return v->err; |
| } |
| if (endpts[k] == NULL) |
| { |
| /* no match possible, so see if we can shorten previous one */ |
| k--; |
| goto backtrack; |
| } |
| MDEBUG(("%d: working endpoint %d: %ld\n", |
| t->id, k, LOFF(endpts[k]))); |
| |
| /* k'th sub-match can no longer be considered verified */ |
| if (nverified >= k) |
| nverified = k - 1; |
| |
| if (endpts[k] != end) |
| { |
| /* haven't reached end yet, try another iteration if allowed */ |
| if (k >= max_matches) |
| { |
| /* must try to shorten some previous match */ |
| k--; |
| goto backtrack; |
| } |
| |
| /* reject zero-length match unless necessary to achieve min */ |
| if (endpts[k] == endpts[k - 1] && |
| (k >= min_matches || min_matches - k < end - endpts[k])) |
| goto backtrack; |
| |
| k++; |
| limit = end; |
| continue; |
| } |
| |
| /* |
| * We've identified a way to divide the string into k sub-matches that |
| * works so far as the child DFA can tell. If k is an allowed number |
| * of matches, start the slow part: recurse to verify each sub-match. |
| * We always have k <= max_matches, needn't check that. |
| */ |
| if (k < min_matches) |
| goto backtrack; |
| |
| MDEBUG(("%d: verifying %d..%d\n", t->id, nverified + 1, k)); |
| |
| for (i = nverified + 1; i <= k; i++) |
| { |
| /* zap any match data from a non-last iteration */ |
| zaptreesubs(v, t->child); |
| er = cdissect(v, t->child, endpts[i - 1], endpts[i]); |
| if (er == REG_OKAY) |
| { |
| nverified = i; |
| continue; |
| } |
| if (er == REG_NOMATCH) |
| break; |
| /* oops, something failed */ |
| FREE(endpts); |
| return er; |
| } |
| |
| if (i > k) |
| { |
| /* satisfaction */ |
| MDEBUG(("%d: successful\n", t->id)); |
| FREE(endpts); |
| return REG_OKAY; |
| } |
| |
| /* i'th match failed to verify, so backtrack it */ |
| k = i; |
| |
| backtrack: |
| |
| /* |
| * Must consider shorter versions of the k'th sub-match. However, |
| * we'll only ask for a zero-length match if necessary. |
| */ |
| while (k > 0) |
| { |
| chr *prev_end = endpts[k - 1]; |
| |
| if (endpts[k] > prev_end) |
| { |
| limit = endpts[k] - 1; |
| if (limit > prev_end || |
| (k < min_matches && min_matches - k >= end - prev_end)) |
| { |
| /* break out of backtrack loop, continue the outer one */ |
| break; |
| } |
| } |
| /* can't shorten k'th sub-match any more, consider previous one */ |
| k--; |
| } |
| } |
| |
| /* all possibilities exhausted */ |
| FREE(endpts); |
| |
| /* |
| * Now consider the possibility that we can match to a zero-length string |
| * by using zero repetitions. |
| */ |
| if (t->min == 0 && begin == end) |
| { |
| MDEBUG(("%d: allowing zero matches\n", t->id)); |
| return REG_OKAY; |
| } |
| |
| MDEBUG(("%d: failed\n", t->id)); |
| return REG_NOMATCH; |
| } |
| |
| /* |
| * creviterdissect - dissect match for iteration node, shortest-first |
| */ |
| static int /* regexec return code */ |
| creviterdissect(struct vars *v, |
| struct subre *t, |
| chr *begin, /* beginning of relevant substring */ |
| chr *end) /* end of same */ |
| { |
| struct dfa *d; |
| chr **endpts; |
| chr *limit; |
| int min_matches; |
| size_t max_matches; |
| int nverified; |
| int k; |
| int i; |
| int er; |
| |
| assert(t->op == '*'); |
| assert(t->child != NULL && t->child->cnfa.nstates > 0); |
| assert(t->child->flags & SHORTER); |
| assert(begin <= end); |
| |
| MDEBUG(("%d: creviterdissect %ld-%ld\n", t->id, LOFF(begin), LOFF(end))); |
| |
| /* |
| * If zero matches are allowed, and target string is empty, just declare |
| * victory. OTOH, if target string isn't empty, zero matches can't work |
| * so we pretend the min is 1. |
| */ |
| min_matches = t->min; |
| if (min_matches <= 0) |
| { |
| if (begin == end) |
| { |
| MDEBUG(("%d: allowing zero matches\n", t->id)); |
| return REG_OKAY; |
| } |
| min_matches = 1; |
| } |
| |
| /* |
| * We need workspace to track the endpoints of each sub-match. Normally |
| * we consider only nonzero-length sub-matches, so there can be at most |
| * end-begin of them. However, if min is larger than that, we will also |
| * consider zero-length sub-matches in order to find enough matches. |
| * |
| * For convenience, endpts[0] contains the "begin" pointer and we store |
| * sub-match endpoints in endpts[1..max_matches]. |
| */ |
| max_matches = end - begin; |
| if (max_matches > t->max && t->max != DUPINF) |
| max_matches = t->max; |
| if (max_matches < min_matches) |
| max_matches = min_matches; |
| endpts = (chr **) MALLOC((max_matches + 1) * sizeof(chr *)); |
| if (endpts == NULL) |
| return REG_ESPACE; |
| endpts[0] = begin; |
| |
| d = getsubdfa(v, t->child); |
| if (ISERR()) |
| { |
| FREE(endpts); |
| return v->err; |
| } |
| |
| /* |
| * Our strategy is to first find a set of sub-match endpoints that are |
| * valid according to the child node's DFA, and then recursively dissect |
| * each sub-match to confirm validity. If any validity check fails, |
| * backtrack that sub-match and try again. And, when we next try for a |
| * validity check, we need not recheck any successfully verified |
| * sub-matches that we didn't move the endpoints of. nverified remembers |
| * how many sub-matches are currently known okay. |
| */ |
| |
| /* initialize to consider first sub-match */ |
| nverified = 0; |
| k = 1; |
| limit = begin; |
| |
| /* iterate until satisfaction or failure */ |
| while (k > 0) |
| { |
| /* disallow zero-length match unless necessary to achieve min */ |
| if (limit == endpts[k - 1] && |
| limit != end && |
| (k >= min_matches || min_matches - k < end - limit)) |
| limit++; |
| |
| /* if this is the last allowed sub-match, it must reach to the end */ |
| if (k >= max_matches) |
| limit = end; |
| |
| /* try to find an endpoint for the k'th sub-match */ |
| endpts[k] = shortest(v, d, endpts[k - 1], limit, end, |
| (chr **) NULL, (int *) NULL); |
| if (ISERR()) |
| { |
| FREE(endpts); |
| return v->err; |
| } |
| if (endpts[k] == NULL) |
| { |
| /* no match possible, so see if we can lengthen previous one */ |
| k--; |
| goto backtrack; |
| } |
| MDEBUG(("%d: working endpoint %d: %ld\n", |
| t->id, k, LOFF(endpts[k]))); |
| |
| /* k'th sub-match can no longer be considered verified */ |
| if (nverified >= k) |
| nverified = k - 1; |
| |
| if (endpts[k] != end) |
| { |
| /* haven't reached end yet, try another iteration if allowed */ |
| if (k >= max_matches) |
| { |
| /* must try to lengthen some previous match */ |
| k--; |
| goto backtrack; |
| } |
| |
| k++; |
| limit = endpts[k - 1]; |
| continue; |
| } |
| |
| /* |
| * We've identified a way to divide the string into k sub-matches that |
| * works so far as the child DFA can tell. If k is an allowed number |
| * of matches, start the slow part: recurse to verify each sub-match. |
| * We always have k <= max_matches, needn't check that. |
| */ |
| if (k < min_matches) |
| goto backtrack; |
| |
| MDEBUG(("%d: verifying %d..%d\n", t->id, nverified + 1, k)); |
| |
| for (i = nverified + 1; i <= k; i++) |
| { |
| /* zap any match data from a non-last iteration */ |
| zaptreesubs(v, t->child); |
| er = cdissect(v, t->child, endpts[i - 1], endpts[i]); |
| if (er == REG_OKAY) |
| { |
| nverified = i; |
| continue; |
| } |
| if (er == REG_NOMATCH) |
| break; |
| /* oops, something failed */ |
| FREE(endpts); |
| return er; |
| } |
| |
| if (i > k) |
| { |
| /* satisfaction */ |
| MDEBUG(("%d: successful\n", t->id)); |
| FREE(endpts); |
| return REG_OKAY; |
| } |
| |
| /* i'th match failed to verify, so backtrack it */ |
| k = i; |
| |
| backtrack: |
| |
| /* |
| * Must consider longer versions of the k'th sub-match. |
| */ |
| while (k > 0) |
| { |
| if (endpts[k] < end) |
| { |
| limit = endpts[k] + 1; |
| /* break out of backtrack loop, continue the outer one */ |
| break; |
| } |
| /* can't lengthen k'th sub-match any more, consider previous one */ |
| k--; |
| } |
| } |
| |
| /* all possibilities exhausted */ |
| MDEBUG(("%d: failed\n", t->id)); |
| FREE(endpts); |
| return REG_NOMATCH; |
| } |
| |
| |
| |
| #include "rege_dfa.c" |