src/backend/regex/regc_locale.c - hawq - Git at Google

 /*
  * regc_locale.c --
  *
  *	This file contains locale-specific regexp routines.
  *	This file is #included by regcomp.c.
  *
  * Copyright (c) 1998 by Scriptics Corporation.
  *
  * This software is copyrighted by the Regents of the University of
  * California, Sun Microsystems, Inc., Scriptics Corporation, ActiveState
  * Corporation and other parties.  The following terms apply to all files
  * associated with the software unless explicitly disclaimed in
  * individual files.
  *
  * The authors hereby grant permission to use, copy, modify, distribute,
  * and license this software and its documentation for any purpose, provided
  * that existing copyright notices are retained in all copies and that this
  * notice is included verbatim in any distributions. No written agreement,
  * license, or royalty fee is required for any of the authorized uses.
  * Modifications to this software may be copyrighted by their authors
  * and need not follow the licensing terms described here, provided that
  * the new terms are clearly indicated on the first page of each file where
  * they apply.
  *
  * IN NO EVENT SHALL THE AUTHORS OR DISTRIBUTORS BE LIABLE TO ANY PARTY
  * FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
  * ARISING OUT OF THE USE OF THIS SOFTWARE, ITS DOCUMENTATION, OR ANY
  * DERIVATIVES THEREOF, EVEN IF THE AUTHORS HAVE BEEN ADVISED OF THE
  * POSSIBILITY OF SUCH DAMAGE.
  *
  * THE AUTHORS AND DISTRIBUTORS SPECIFICALLY DISCLAIM ANY WARRANTIES,
  * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY,
  * FITNESS FOR A PARTICULAR PURPOSE, AND NON-INFRINGEMENT.	THIS SOFTWARE
  * IS PROVIDED ON AN "AS IS" BASIS, AND THE AUTHORS AND DISTRIBUTORS HAVE
  * NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR
  * MODIFICATIONS.
  *
  * GOVERNMENT USE: If you are acquiring this software on behalf of the
  * U.S. government, the Government shall have only "Restricted Rights"
  * in the software and related documentation as defined in the Federal
  * Acquisition Regulations (FARs) in Clause 52.227.19 (c) (2).	If you
  * are acquiring the software on behalf of the Department of Defense, the
  * software shall be classified as "Commercial Computer Software" and the
  * Government shall have only "Restricted Rights" as defined in Clause
  * 252.227-7013 (c) (1) of DFARs.  Notwithstanding the foregoing, the
  * authors grant the U.S. Government and others acting in its behalf
  * permission to use and distribute the software in accordance with the
  * terms specified in this license.
  *
  * $PostgreSQL: pgsql/src/backend/regex/regc_locale.c,v 1.9 2008/02/14 17:33:37 tgl Exp $
  */

 /* ASCII character-name table */

 static const struct cname
 {
 	const char *name;
 	const char	code;
 }	cnames[] =

 {
 	{
 		"NUL", '\0'
 	},
 	{
 		"SOH", '\001'
 	},
 	{
 		"STX", '\002'
 	},
 	{
 		"ETX", '\003'
 	},
 	{
 		"EOT", '\004'
 	},
 	{
 		"ENQ", '\005'
 	},
 	{
 		"ACK", '\006'
 	},
 	{
 		"BEL", '\007'
 	},
 	{
 		"alert", '\007'
 	},
 	{
 		"BS", '\010'
 	},
 	{
 		"backspace", '\b'
 	},
 	{
 		"HT", '\011'
 	},
 	{
 		"tab", '\t'
 	},
 	{
 		"LF", '\012'
 	},
 	{
 		"newline", '\n'
 	},
 	{
 		"VT", '\013'
 	},
 	{
 		"vertical-tab", '\v'
 	},
 	{
 		"FF", '\014'
 	},
 	{
 		"form-feed", '\f'
 	},
 	{
 		"CR", '\015'
 	},
 	{
 		"carriage-return", '\r'
 	},
 	{
 		"SO", '\016'
 	},
 	{
 		"SI", '\017'
 	},
 	{
 		"DLE", '\020'
 	},
 	{
 		"DC1", '\021'
 	},
 	{
 		"DC2", '\022'
 	},
 	{
 		"DC3", '\023'
 	},
 	{
 		"DC4", '\024'
 	},
 	{
 		"NAK", '\025'
 	},
 	{
 		"SYN", '\026'
 	},
 	{
 		"ETB", '\027'
 	},
 	{
 		"CAN", '\030'
 	},
 	{
 		"EM", '\031'
 	},
 	{
 		"SUB", '\032'
 	},
 	{
 		"ESC", '\033'
 	},
 	{
 		"IS4", '\034'
 	},
 	{
 		"FS", '\034'
 	},
 	{
 		"IS3", '\035'
 	},
 	{
 		"GS", '\035'
 	},
 	{
 		"IS2", '\036'
 	},
 	{
 		"RS", '\036'
 	},
 	{
 		"IS1", '\037'
 	},
 	{
 		"US", '\037'
 	},
 	{
 		"space", ' '
 	},
 	{
 		"exclamation-mark", '!'
 	},
 	{
 		"quotation-mark", '"'
 	},
 	{
 		"number-sign", '#'
 	},
 	{
 		"dollar-sign", '$'
 	},
 	{
 		"percent-sign", '%'
 	},
 	{
 		"ampersand", '&'
 	},
 	{
 		"apostrophe", '\''
 	},
 	{
 		"left-parenthesis", '('
 	},
 	{
 		"right-parenthesis", ')'
 	},
 	{
 		"asterisk", '*'
 	},
 	{
 		"plus-sign", '+'
 	},
 	{
 		"comma", ','
 	},
 	{
 		"hyphen", '-'
 	},
 	{
 		"hyphen-minus", '-'
 	},
 	{
 		"period", '.'
 	},
 	{
 		"full-stop", '.'
 	},
 	{
 		"slash", '/'
 	},
 	{
 		"solidus", '/'
 	},
 	{
 		"zero", '0'
 	},
 	{
 		"one", '1'
 	},
 	{
 		"two", '2'
 	},
 	{
 		"three", '3'
 	},
 	{
 		"four", '4'
 	},
 	{
 		"five", '5'
 	},
 	{
 		"six", '6'
 	},
 	{
 		"seven", '7'
 	},
 	{
 		"eight", '8'
 	},
 	{
 		"nine", '9'
 	},
 	{
 		"colon", ':'
 	},
 	{
 		"semicolon", ';'
 	},
 	{
 		"less-than-sign", '<'
 	},
 	{
 		"equals-sign", '='
 	},
 	{
 		"greater-than-sign", '>'
 	},
 	{
 		"question-mark", '?'
 	},
 	{
 		"commercial-at", '@'
 	},
 	{
 		"left-square-bracket", '['
 	},
 	{
 		"backslash", '\\'
 	},
 	{
 		"reverse-solidus", '\\'
 	},
 	{
 		"right-square-bracket", ']'
 	},
 	{
 		"circumflex", '^'
 	},
 	{
 		"circumflex-accent", '^'
 	},
 	{
 		"underscore", '_'
 	},
 	{
 		"low-line", '_'
 	},
 	{
 		"grave-accent", '`'
 	},
 	{
 		"left-brace", '{'
 	},
 	{
 		"left-curly-bracket", '{'
 	},
 	{
 		"vertical-line", '|'
 	},
 	{
 		"right-brace", '}'
 	},
 	{
 		"right-curly-bracket", '}'
 	},
 	{
 		"tilde", '~'
 	},
 	{
 		"DEL", '\177'
 	},
 	{
 		NULL, 0
 	}
 };

 /*
  * some ctype functions with non-ascii-char guard
  */
 static int
 pg_wc_isdigit(pg_wchar c)
 {
 	return (c <= UCHAR_MAX && isdigit((unsigned char) c));
 }

 static int
 pg_wc_isalpha(pg_wchar c)
 {
 	return (c <= UCHAR_MAX && isalpha((unsigned char) c));
 }

 static int
 pg_wc_isalnum(pg_wchar c)
 {
 	return (c <= UCHAR_MAX && isalnum((unsigned char) c));
 }

 static int
 pg_wc_isupper(pg_wchar c)
 {
 	return (c <= UCHAR_MAX && isupper((unsigned char) c));
 }

 static int
 pg_wc_islower(pg_wchar c)
 {
 	return (c <= UCHAR_MAX && islower((unsigned char) c));
 }

 static int
 pg_wc_isgraph(pg_wchar c)
 {
 	return (c <= UCHAR_MAX && isgraph((unsigned char) c));
 }

 static int
 pg_wc_isprint(pg_wchar c)
 {
 	return (c <= UCHAR_MAX && isprint((unsigned char) c));
 }

 static int
 pg_wc_ispunct(pg_wchar c)
 {
 	return (c <= UCHAR_MAX && ispunct((unsigned char) c));
 }

 static int
 pg_wc_isspace(pg_wchar c)
 {
 	return (c <= UCHAR_MAX && isspace((unsigned char) c));
 }

 static pg_wchar
 pg_wc_toupper(pg_wchar c)
 {
 	if (c <= UCHAR_MAX)
 		return toupper((unsigned char) c);
 	return c;
 }

 static pg_wchar
 pg_wc_tolower(pg_wchar c)
 {
 	if (c <= UCHAR_MAX)
 		return tolower((unsigned char) c);
 	return c;
 }


 /*
  * element - map collating-element name to celt
  */
 static celt
 element(struct vars * v,		/* context */
 		const chr *startp,		/* points to start of name */
 		const chr *endp)		/* points just past end of name */
 {
 	const struct cname *cn;
 	size_t		len;

 	/* generic:  one-chr names stand for themselves */
 	assert(startp < endp);
 	len = endp - startp;
 	if (len == 1)
 		return *startp;

 	NOTE(REG_ULOCALE);

 	/* search table */
 	for (cn = cnames; cn->name != NULL; cn++)
 	{
 		if (strlen(cn->name) == len &&
 			pg_char_and_wchar_strncmp(cn->name, startp, len) == 0)
 		{
 			break;				/* NOTE BREAK OUT */
 		}
 	}
 	if (cn->name != NULL)
 		return CHR(cn->code);

 	/* couldn't find it */
 	ERR(REG_ECOLLATE);
 	return 0;
 }

 /*
  * range - supply cvec for a range, including legality check
  */
 static struct cvec *
 range(struct vars * v,			/* context */
 	  celt a,					/* range start */
 	  celt b,					/* range end, might equal a */
 	  int cases)				/* case-independent? */
 {
 	int			nchrs;
 	struct cvec *cv;
 	celt		c,
 				cc;

 	if (a != b && !before(a, b))
 	{
 		ERR(REG_ERANGE);
 		return NULL;
 	}

 	if (!cases)
 	{							/* easy version */
 		cv = getcvec(v, 0, 1);
 		NOERRN();
 		addrange(cv, a, b);
 		return cv;
 	}

 	/*
 	* When case-independent, it's hard to decide when cvec ranges are usable,
 	* so for now at least, we won't try.  We use a range for the originally
 	* specified chrs and then add on any case-equivalents that are outside
 	* that range as individual chrs.
 	*
 	* To ensure sane behavior if someone specifies a very large range, limit
 	* the allocation size to 100000 chrs (arbitrary) and check for overrun
 	* inside the loop below.
 	*/

 	nchrs = b - a + 1;

 	if (nchrs <= 0 || nchrs > 100000)
 		nchrs = 100000;

 	cv = getcvec(v, nchrs, 1);
 	NOERRN();
 	addrange(cv, a, b);

 	for (c = a; c <= b; c++)
 	{
 		cc = pg_wc_tolower((chr) c);
 		if (cc !=c && (before(cc, a) || before(b, cc)))
 		{
 			if (cv->nchrs >= cv->chrspace)
 			{
 				ERR(REG_ETOOBIG);
 				return NULL;
 			}
 			addchr(cv, cc);
 		}
 		cc = pg_wc_toupper((chr) c);
 		if (cc != c && (before(cc, a) || before(b, cc)))
 		{
 			if (cv->nchrs >= cv->chrspace)
 			{
 				ERR(REG_ETOOBIG);
 				return NULL;
 			}
 			addchr(cv, cc);
 		}
 	}

 	return cv;
 }

 /*
  * before - is celt x before celt y, for purposes of range legality?
  */
 static int						/* predicate */
 before(celt x, celt y)
 {
 	if (x < y)
 		return 1;
 	return 0;
 }

 /*
  * eclass - supply cvec for an equivalence class
  * Must include case counterparts on request.
  */
 static struct cvec *
 eclass(struct vars * v,			/* context */
 	   celt c,					/* Collating element representing the
 								 * equivalence class. */
 	   int cases)				/* all cases? */
 {
 	struct cvec *cv;

 	/* crude fake equivalence class for testing */
 	if ((v->cflags & REG_FAKE) && c == 'x')
 	{
 		cv = getcvec(v, 4, 0);
 		addchr(cv, (chr) 'x');
 		addchr(cv, (chr) 'y');
 		if (cases)
 		{
 			addchr(cv, (chr) 'X');
 			addchr(cv, (chr) 'Y');
 		}
 		return cv;
 	}

 	/* otherwise, none */
 	if (cases)
 		return allcases(v, c);
 	cv = getcvec(v, 1, 0);
 	assert(cv != NULL);
 	addchr(cv, (chr) c);
 	return cv;
 }

 /*
  * cclass - supply cvec for a character class
  *
  * Must include case counterparts on request.
  */
 static struct cvec *
 cclass(struct vars * v,			/* context */
 	   const chr *startp,		/* where the name starts */
 	   const chr *endp,			/* just past the end of the name */
 	   int cases)				/* case-independent? */
 {
 	size_t		len;
 	struct cvec *cv = NULL;
 	const char **namePtr;
 	int			i,
 				index;

 	/*
 	 * The following arrays define the valid character class names.
 	 */

 	static const char *classNames[] = {
 		"alnum", "alpha", "ascii", "blank", "cntrl", "digit", "graph",
 		"lower", "print", "punct", "space", "upper", "xdigit", NULL
 	};

 	enum classes
 	{
 		CC_ALNUM, CC_ALPHA, CC_ASCII, CC_BLANK, CC_CNTRL, CC_DIGIT, CC_GRAPH,
 		CC_LOWER, CC_PRINT, CC_PUNCT, CC_SPACE, CC_UPPER, CC_XDIGIT
 	};

 	/*
 	 * Map the name to the corresponding enumerated value.
 	 */
 	len = endp - startp;
 	index = -1;
 	for (namePtr = classNames, i = 0; *namePtr != NULL; namePtr++, i++)
 	{
 		if (strlen(*namePtr) == len &&
 			pg_char_and_wchar_strncmp(*namePtr, startp, len) == 0)
 		{
 			index = i;
 			break;
 		}
 	}
 	if (index == -1)
 	{
 		ERR(REG_ECTYPE);
 		return NULL;
 	}

 	/*
 	 * Remap lower and upper to alpha if the match is case insensitive.
 	 */

 	if (cases &&
 		((enum classes) index == CC_LOWER ||
 		 (enum classes) index == CC_UPPER))
 		index = (int) CC_ALPHA;

 	/*
 	 * Now compute the character class contents.
 	 *
 	 * For the moment, assume that only char codes < 256 can be in these
 	 * classes.
 	 */

 	switch ((enum classes) index)
 	{
 		case CC_PRINT:
 			cv = getcvec(v, UCHAR_MAX, 0);
 			if (cv)
 			{
 				for (i = 0; i <= UCHAR_MAX; i++)
 				{
 					if (pg_wc_isprint((chr) i))
 						addchr(cv, (chr) i);
 				}
 			}
 			break;
 		case CC_ALNUM:
 			cv = getcvec(v, UCHAR_MAX, 0);
 			if (cv)
 			{
 				for (i = 0; i <= UCHAR_MAX; i++)
 				{
 					if (pg_wc_isalnum((chr) i))
 						addchr(cv, (chr) i);
 				}
 			}
 			break;
 		case CC_ALPHA:
 			cv = getcvec(v, UCHAR_MAX, 0);
 			if (cv)
 			{
 				for (i = 0; i <= UCHAR_MAX; i++)
 				{
 					if (pg_wc_isalpha((chr) i))
 						addchr(cv, (chr) i);
 				}
 			}
 			break;
 		case CC_ASCII:
 			cv = getcvec(v, 0, 1);
 			if (cv)
 				addrange(cv, 0, 0x7f);
 			break;
 		case CC_BLANK:
 			cv = getcvec(v, 2, 0);
 			addchr(cv, '\t');
 			addchr(cv, ' ');
 			break;
 		case CC_CNTRL:
 			cv = getcvec(v, 0, 2);
 			addrange(cv, 0x0, 0x1f);
 			addrange(cv, 0x7f, 0x9f);
 			break;
 		case CC_DIGIT:
 			cv = getcvec(v, 0, 1);
 			if (cv)
 				addrange(cv, (chr) '0', (chr) '9');
 			break;
 		case CC_PUNCT:
 			cv = getcvec(v, UCHAR_MAX, 0);
 			if (cv)
 			{
 				for (i = 0; i <= UCHAR_MAX; i++)
 				{
 					if (pg_wc_ispunct((chr) i))
 						addchr(cv, (chr) i);
 				}
 			}
 			break;
 		case CC_XDIGIT:
 			cv = getcvec(v, 0, 3);
 			if (cv)
 			{
 				addrange(cv, '0', '9');
 				addrange(cv, 'a', 'f');
 				addrange(cv, 'A', 'F');
 			}
 			break;
 		case CC_SPACE:
 			cv = getcvec(v, UCHAR_MAX, 0);
 			if (cv)
 			{
 				for (i = 0; i <= UCHAR_MAX; i++)
 				{
 					if (pg_wc_isspace((chr) i))
 						addchr(cv, (chr) i);
 				}
 			}
 			break;
 		case CC_LOWER:
 			cv = getcvec(v, UCHAR_MAX, 0);
 			if (cv)
 			{
 				for (i = 0; i <= UCHAR_MAX; i++)
 				{
 					if (pg_wc_islower((chr) i))
 						addchr(cv, (chr) i);
 				}
 			}
 			break;
 		case CC_UPPER:
 			cv = getcvec(v, UCHAR_MAX, 0);
 			if (cv)
 			{
 				for (i = 0; i <= UCHAR_MAX; i++)
 				{
 					if (pg_wc_isupper((chr) i))
 						addchr(cv, (chr) i);
 				}
 			}
 			break;
 		case CC_GRAPH:
 			cv = getcvec(v, UCHAR_MAX, 0);
 			if (cv)
 			{
 				for (i = 0; i <= UCHAR_MAX; i++)
 				{
 					if (pg_wc_isgraph((chr) i))
 						addchr(cv, (chr) i);
 				}
 			}
 			break;
 	}
 	if (cv == NULL)
 		ERR(REG_ESPACE);
 	return cv;
 }

 /*
  * allcases - supply cvec for all case counterparts of a chr (including itself)
  *
  * This is a shortcut, preferably an efficient one, for simple characters;
  * messy cases are done via range().
  */
 static struct cvec *
 allcases(struct vars * v,		/* context */
 		 chr pc)				/* character to get case equivs of */
 {
 	struct cvec *cv;
 	chr			c = (chr) pc;
 	chr			lc,
 				uc;

 	lc = pg_wc_tolower((chr) c);
 	uc = pg_wc_toupper((chr) c);

 	cv = getcvec(v, 2, 0);
 	addchr(cv, lc);
 	if (lc != uc)
 		addchr(cv, uc);
 	return cv;
 }

 /*
  * cmp - chr-substring compare
  *
  * Backrefs need this.	It should preferably be efficient.
  * Note that it does not need to report anything except equal/unequal.
  * Note also that the length is exact, and the comparison should not
  * stop at embedded NULs!
  */
 static int						/* 0 for equal, nonzero for unequal */
 cmp(const chr *x, const chr *y, /* strings to compare */
 	size_t len)					/* exact length of comparison */
 {
 	return memcmp(VS(x), VS(y), len * sizeof(chr));
 }

 /*
  * casecmp - case-independent chr-substring compare
  *
  * REG_ICASE backrefs need this.  It should preferably be efficient.
  * Note that it does not need to report anything except equal/unequal.
  * Note also that the length is exact, and the comparison should not
  * stop at embedded NULs!
  */
 static int						/* 0 for equal, nonzero for unequal */
 casecmp(const chr *x, const chr *y,		/* strings to compare */
 		size_t len)				/* exact length of comparison */
 {
 	for (; len > 0; len--, x++, y++)
 	{
 		if ((*x != *y) && (pg_wc_tolower(*x) != pg_wc_tolower(*y)))
 			return 1;
 	}
 	return 0;
 }
	/*
	* regc_locale.c --
	*
	* This file contains locale-specific regexp routines.
	* This file is #included by regcomp.c.
	*
	* Copyright (c) 1998 by Scriptics Corporation.
	*
	* This software is copyrighted by the Regents of the University of
	* California, Sun Microsystems, Inc., Scriptics Corporation, ActiveState
	* Corporation and other parties. The following terms apply to all files
	* associated with the software unless explicitly disclaimed in
	* individual files.
	*
	* The authors hereby grant permission to use, copy, modify, distribute,
	* and license this software and its documentation for any purpose, provided
	* that existing copyright notices are retained in all copies and that this
	* notice is included verbatim in any distributions. No written agreement,
	* license, or royalty fee is required for any of the authorized uses.
	* Modifications to this software may be copyrighted by their authors
	* and need not follow the licensing terms described here, provided that
	* the new terms are clearly indicated on the first page of each file where
	* they apply.
	*
	* IN NO EVENT SHALL THE AUTHORS OR DISTRIBUTORS BE LIABLE TO ANY PARTY
	* FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
	* ARISING OUT OF THE USE OF THIS SOFTWARE, ITS DOCUMENTATION, OR ANY
	* DERIVATIVES THEREOF, EVEN IF THE AUTHORS HAVE BEEN ADVISED OF THE
	* POSSIBILITY OF SUCH DAMAGE.
	*
	* THE AUTHORS AND DISTRIBUTORS SPECIFICALLY DISCLAIM ANY WARRANTIES,
	* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY,
	* FITNESS FOR A PARTICULAR PURPOSE, AND NON-INFRINGEMENT. THIS SOFTWARE
	* IS PROVIDED ON AN "AS IS" BASIS, AND THE AUTHORS AND DISTRIBUTORS HAVE
	* NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR
	* MODIFICATIONS.
	*
	* GOVERNMENT USE: If you are acquiring this software on behalf of the
	* U.S. government, the Government shall have only "Restricted Rights"
	* in the software and related documentation as defined in the Federal
	* Acquisition Regulations (FARs) in Clause 52.227.19 (c) (2). If you
	* are acquiring the software on behalf of the Department of Defense, the
	* software shall be classified as "Commercial Computer Software" and the
	* Government shall have only "Restricted Rights" as defined in Clause
	* 252.227-7013 (c) (1) of DFARs. Notwithstanding the foregoing, the
	* authors grant the U.S. Government and others acting in its behalf
	* permission to use and distribute the software in accordance with the
	* terms specified in this license.
	*
	* $PostgreSQL: pgsql/src/backend/regex/regc_locale.c,v 1.9 2008/02/14 17:33:37 tgl Exp $
	*/

	/* ASCII character-name table */

	static const struct cname
	{
	const char *name;
	const char code;
	} cnames[] =

	{
	{
	"NUL", '\0'
	},
	{
	"SOH", '\001'
	},
	{
	"STX", '\002'
	},
	{
	"ETX", '\003'
	},
	{
	"EOT", '\004'
	},
	{
	"ENQ", '\005'
	},
	{
	"ACK", '\006'
	},
	{
	"BEL", '\007'
	},
	{
	"alert", '\007'
	},
	{
	"BS", '\010'
	},
	{
	"backspace", '\b'
	},
	{
	"HT", '\011'
	},
	{
	"tab", '\t'
	},
	{
	"LF", '\012'
	},
	{
	"newline", '\n'
	},
	{
	"VT", '\013'
	},
	{
	"vertical-tab", '\v'
	},
	{
	"FF", '\014'
	},
	{
	"form-feed", '\f'
	},
	{
	"CR", '\015'
	},
	{
	"carriage-return", '\r'
	},
	{
	"SO", '\016'
	},
	{
	"SI", '\017'
	},
	{
	"DLE", '\020'
	},
	{
	"DC1", '\021'
	},
	{
	"DC2", '\022'
	},
	{
	"DC3", '\023'
	},
	{
	"DC4", '\024'
	},
	{
	"NAK", '\025'
	},
	{
	"SYN", '\026'
	},
	{
	"ETB", '\027'
	},
	{
	"CAN", '\030'
	},
	{
	"EM", '\031'
	},
	{
	"SUB", '\032'
	},
	{
	"ESC", '\033'
	},
	{
	"IS4", '\034'
	},
	{
	"FS", '\034'
	},
	{
	"IS3", '\035'
	},
	{
	"GS", '\035'
	},
	{
	"IS2", '\036'
	},
	{
	"RS", '\036'
	},
	{
	"IS1", '\037'
	},
	{
	"US", '\037'
	},
	{
	"space", ' '
	},
	{
	"exclamation-mark", '!'
	},
	{
	"quotation-mark", '"'
	},
	{
	"number-sign", '#'
	},
	{
	"dollar-sign", '$'
	},
	{
	"percent-sign", '%'
	},
	{
	"ampersand", '&'
	},
	{
	"apostrophe", '\''
	},
	{
	"left-parenthesis", '('
	},
	{
	"right-parenthesis", ')'
	},
	{
	"asterisk", '*'
	},
	{
	"plus-sign", '+'
	},
	{
	"comma", ','
	},
	{
	"hyphen", '-'
	},
	{
	"hyphen-minus", '-'
	},
	{
	"period", '.'
	},
	{
	"full-stop", '.'
	},
	{
	"slash", '/'
	},
	{
	"solidus", '/'
	},
	{
	"zero", '0'
	},
	{
	"one", '1'
	},
	{
	"two", '2'
	},
	{
	"three", '3'
	},
	{
	"four", '4'
	},
	{
	"five", '5'
	},
	{
	"six", '6'
	},
	{
	"seven", '7'
	},
	{
	"eight", '8'
	},
	{
	"nine", '9'
	},
	{
	"colon", ':'
	},
	{
	"semicolon", ';'
	},
	{
	"less-than-sign", '<'
	},
	{
	"equals-sign", '='
	},
	{
	"greater-than-sign", '>'
	},
	{
	"question-mark", '?'
	},
	{
	"commercial-at", '@'
	},
	{
	"left-square-bracket", '['
	},
	{
	"backslash", '\\'
	},
	{
	"reverse-solidus", '\\'
	},
	{
	"right-square-bracket", ']'
	},
	{
	"circumflex", '^'
	},
	{
	"circumflex-accent", '^'
	},
	{
	"underscore", '_'
	},
	{
	"low-line", '_'
	},
	{
	"grave-accent", '`'
	},
	{
	"left-brace", '{'
	},
	{
	"left-curly-bracket", '{'
	},
	{
	"vertical-line", '\|'
	},
	{
	"right-brace", '}'
	},
	{
	"right-curly-bracket", '}'
	},
	{
	"tilde", '~'
	},
	{
	"DEL", '\177'
	},
	{
	NULL, 0
	}
	};

	/*
	* some ctype functions with non-ascii-char guard
	*/
	static int
	pg_wc_isdigit(pg_wchar c)
	{
	return (c <= UCHAR_MAX && isdigit((unsigned char) c));
	}

	static int
	pg_wc_isalpha(pg_wchar c)
	{
	return (c <= UCHAR_MAX && isalpha((unsigned char) c));
	}

	static int
	pg_wc_isalnum(pg_wchar c)
	{
	return (c <= UCHAR_MAX && isalnum((unsigned char) c));
	}

	static int
	pg_wc_isupper(pg_wchar c)
	{
	return (c <= UCHAR_MAX && isupper((unsigned char) c));
	}

	static int
	pg_wc_islower(pg_wchar c)
	{
	return (c <= UCHAR_MAX && islower((unsigned char) c));
	}

	static int
	pg_wc_isgraph(pg_wchar c)
	{
	return (c <= UCHAR_MAX && isgraph((unsigned char) c));
	}

	static int
	pg_wc_isprint(pg_wchar c)
	{
	return (c <= UCHAR_MAX && isprint((unsigned char) c));
	}

	static int
	pg_wc_ispunct(pg_wchar c)
	{
	return (c <= UCHAR_MAX && ispunct((unsigned char) c));
	}

	static int
	pg_wc_isspace(pg_wchar c)
	{
	return (c <= UCHAR_MAX && isspace((unsigned char) c));
	}

	static pg_wchar
	pg_wc_toupper(pg_wchar c)
	{
	if (c <= UCHAR_MAX)
	return toupper((unsigned char) c);
	return c;
	}

	static pg_wchar
	pg_wc_tolower(pg_wchar c)
	{
	if (c <= UCHAR_MAX)
	return tolower((unsigned char) c);
	return c;
	}


	/*
	* element - map collating-element name to celt
	*/
	static celt
	element(struct vars * v, /* context */
	const chr startp, / points to start of name */
	const chr endp) / points just past end of name */
	{
	const struct cname *cn;
	size_t len;

	/* generic: one-chr names stand for themselves */
	assert(startp < endp);
	len = endp - startp;
	if (len == 1)
	return *startp;

	NOTE(REG_ULOCALE);

	/* search table */
	for (cn = cnames; cn->name != NULL; cn++)
	{
	if (strlen(cn->name) == len &&
	pg_char_and_wchar_strncmp(cn->name, startp, len) == 0)
	{
	break; /* NOTE BREAK OUT */
	}
	}
	if (cn->name != NULL)
	return CHR(cn->code);

	/* couldn't find it */
	ERR(REG_ECOLLATE);
	return 0;
	}

	/*
	* range - supply cvec for a range, including legality check
	*/
	static struct cvec *
	range(struct vars * v, /* context */
	celt a, /* range start */
	celt b, /* range end, might equal a */
	int cases) /* case-independent? */
	{
	int nchrs;
	struct cvec *cv;
	celt c,
	cc;

	if (a != b && !before(a, b))
	{
	ERR(REG_ERANGE);
	return NULL;
	}

	if (!cases)
	{ /* easy version */
	cv = getcvec(v, 0, 1);
	NOERRN();
	addrange(cv, a, b);
	return cv;
	}

	/*
	* When case-independent, it's hard to decide when cvec ranges are usable,
	* so for now at least, we won't try. We use a range for the originally
	* specified chrs and then add on any case-equivalents that are outside
	* that range as individual chrs.
	*
	* To ensure sane behavior if someone specifies a very large range, limit
	* the allocation size to 100000 chrs (arbitrary) and check for overrun
	* inside the loop below.
	*/

	nchrs = b - a + 1;

	if (nchrs <= 0 \|\| nchrs > 100000)
	nchrs = 100000;

	cv = getcvec(v, nchrs, 1);
	NOERRN();
	addrange(cv, a, b);

	for (c = a; c <= b; c++)
	{
	cc = pg_wc_tolower((chr) c);
	if (cc !=c && (before(cc, a) \|\| before(b, cc)))
	{
	if (cv->nchrs >= cv->chrspace)
	{
	ERR(REG_ETOOBIG);
	return NULL;
	}
	addchr(cv, cc);
	}
	cc = pg_wc_toupper((chr) c);
	if (cc != c && (before(cc, a) \|\| before(b, cc)))
	{
	if (cv->nchrs >= cv->chrspace)
	{
	ERR(REG_ETOOBIG);
	return NULL;
	}
	addchr(cv, cc);
	}
	}

	return cv;
	}

	/*
	* before - is celt x before celt y, for purposes of range legality?
	*/
	static int /* predicate */
	before(celt x, celt y)
	{
	if (x < y)
	return 1;
	return 0;
	}

	/*
	* eclass - supply cvec for an equivalence class
	* Must include case counterparts on request.
	*/
	static struct cvec *
	eclass(struct vars * v, /* context */
	celt c, /* Collating element representing the
	* equivalence class. */
	int cases) /* all cases? */
	{
	struct cvec *cv;

	/* crude fake equivalence class for testing */
	if ((v->cflags & REG_FAKE) && c == 'x')
	{
	cv = getcvec(v, 4, 0);
	addchr(cv, (chr) 'x');
	addchr(cv, (chr) 'y');
	if (cases)
	{
	addchr(cv, (chr) 'X');
	addchr(cv, (chr) 'Y');
	}
	return cv;
	}

	/* otherwise, none */
	if (cases)
	return allcases(v, c);
	cv = getcvec(v, 1, 0);
	assert(cv != NULL);
	addchr(cv, (chr) c);
	return cv;
	}

	/*
	* cclass - supply cvec for a character class
	*
	* Must include case counterparts on request.
	*/
	static struct cvec *
	cclass(struct vars * v, /* context */
	const chr startp, / where the name starts */
	const chr endp, / just past the end of the name */
	int cases) /* case-independent? */
	{
	size_t len;
	struct cvec *cv = NULL;
	const char **namePtr;
	int i,
	index;

	/*
	* The following arrays define the valid character class names.
	*/

	static const char *classNames[] = {
	"alnum", "alpha", "ascii", "blank", "cntrl", "digit", "graph",
	"lower", "print", "punct", "space", "upper", "xdigit", NULL
	};

	enum classes
	{
	CC_ALNUM, CC_ALPHA, CC_ASCII, CC_BLANK, CC_CNTRL, CC_DIGIT, CC_GRAPH,
	CC_LOWER, CC_PRINT, CC_PUNCT, CC_SPACE, CC_UPPER, CC_XDIGIT
	};

	/*
	* Map the name to the corresponding enumerated value.
	*/
	len = endp - startp;
	index = -1;
	for (namePtr = classNames, i = 0; *namePtr != NULL; namePtr++, i++)
	{
	if (strlen(*namePtr) == len &&
	pg_char_and_wchar_strncmp(*namePtr, startp, len) == 0)
	{
	index = i;
	break;
	}
	}
	if (index == -1)
	{
	ERR(REG_ECTYPE);
	return NULL;
	}

	/*
	* Remap lower and upper to alpha if the match is case insensitive.
	*/

	if (cases &&
	((enum classes) index == CC_LOWER \|\|
	(enum classes) index == CC_UPPER))
	index = (int) CC_ALPHA;

	/*
	* Now compute the character class contents.
	*
	* For the moment, assume that only char codes < 256 can be in these
	* classes.
	*/

	switch ((enum classes) index)
	{
	case CC_PRINT:
	cv = getcvec(v, UCHAR_MAX, 0);
	if (cv)
	{
	for (i = 0; i <= UCHAR_MAX; i++)
	{
	if (pg_wc_isprint((chr) i))
	addchr(cv, (chr) i);
	}
	}
	break;
	case CC_ALNUM:
	cv = getcvec(v, UCHAR_MAX, 0);
	if (cv)
	{
	for (i = 0; i <= UCHAR_MAX; i++)
	{
	if (pg_wc_isalnum((chr) i))
	addchr(cv, (chr) i);
	}
	}
	break;
	case CC_ALPHA:
	cv = getcvec(v, UCHAR_MAX, 0);
	if (cv)
	{
	for (i = 0; i <= UCHAR_MAX; i++)
	{
	if (pg_wc_isalpha((chr) i))
	addchr(cv, (chr) i);
	}
	}
	break;
	case CC_ASCII:
	cv = getcvec(v, 0, 1);
	if (cv)
	addrange(cv, 0, 0x7f);
	break;
	case CC_BLANK:
	cv = getcvec(v, 2, 0);
	addchr(cv, '\t');
	addchr(cv, ' ');
	break;
	case CC_CNTRL:
	cv = getcvec(v, 0, 2);
	addrange(cv, 0x0, 0x1f);
	addrange(cv, 0x7f, 0x9f);
	break;
	case CC_DIGIT:
	cv = getcvec(v, 0, 1);
	if (cv)
	addrange(cv, (chr) '0', (chr) '9');
	break;
	case CC_PUNCT:
	cv = getcvec(v, UCHAR_MAX, 0);
	if (cv)
	{
	for (i = 0; i <= UCHAR_MAX; i++)
	{
	if (pg_wc_ispunct((chr) i))
	addchr(cv, (chr) i);
	}
	}
	break;
	case CC_XDIGIT:
	cv = getcvec(v, 0, 3);
	if (cv)
	{
	addrange(cv, '0', '9');
	addrange(cv, 'a', 'f');
	addrange(cv, 'A', 'F');
	}
	break;
	case CC_SPACE:
	cv = getcvec(v, UCHAR_MAX, 0);
	if (cv)
	{
	for (i = 0; i <= UCHAR_MAX; i++)
	{
	if (pg_wc_isspace((chr) i))
	addchr(cv, (chr) i);
	}
	}
	break;
	case CC_LOWER:
	cv = getcvec(v, UCHAR_MAX, 0);
	if (cv)
	{
	for (i = 0; i <= UCHAR_MAX; i++)
	{
	if (pg_wc_islower((chr) i))
	addchr(cv, (chr) i);
	}
	}
	break;
	case CC_UPPER:
	cv = getcvec(v, UCHAR_MAX, 0);
	if (cv)
	{
	for (i = 0; i <= UCHAR_MAX; i++)
	{
	if (pg_wc_isupper((chr) i))
	addchr(cv, (chr) i);
	}
	}
	break;
	case CC_GRAPH:
	cv = getcvec(v, UCHAR_MAX, 0);
	if (cv)
	{
	for (i = 0; i <= UCHAR_MAX; i++)
	{
	if (pg_wc_isgraph((chr) i))
	addchr(cv, (chr) i);
	}
	}
	break;
	}
	if (cv == NULL)
	ERR(REG_ESPACE);
	return cv;
	}

	/*
	* allcases - supply cvec for all case counterparts of a chr (including itself)
	*
	* This is a shortcut, preferably an efficient one, for simple characters;
	* messy cases are done via range().
	*/
	static struct cvec *
	allcases(struct vars * v, /* context */
	chr pc) /* character to get case equivs of */
	{
	struct cvec *cv;
	chr c = (chr) pc;
	chr lc,
	uc;

	lc = pg_wc_tolower((chr) c);
	uc = pg_wc_toupper((chr) c);

	cv = getcvec(v, 2, 0);
	addchr(cv, lc);
	if (lc != uc)
	addchr(cv, uc);
	return cv;
	}

	/*
	* cmp - chr-substring compare
	*
	* Backrefs need this. It should preferably be efficient.
	* Note that it does not need to report anything except equal/unequal.
	* Note also that the length is exact, and the comparison should not
	* stop at embedded NULs!
	*/
	static int /* 0 for equal, nonzero for unequal */
	cmp(const chr x, const chr y, /* strings to compare */
	size_t len) /* exact length of comparison */
	{
	return memcmp(VS(x), VS(y), len * sizeof(chr));
	}

	/*
	* casecmp - case-independent chr-substring compare
	*
	* REG_ICASE backrefs need this. It should preferably be efficient.
	* Note that it does not need to report anything except equal/unequal.
	* Note also that the length is exact, and the comparison should not
	* stop at embedded NULs!
	*/
	static int /* 0 for equal, nonzero for unequal */
	casecmp(const chr x, const chr y, /* strings to compare */
	size_t len) /* exact length of comparison */
	{
	for (; len > 0; len--, x++, y++)
	{
	if ((x != y) && (pg_wc_tolower(x) != pg_wc_tolower(y)))
	return 1;
	}
	return 0;
	}