thirdparty/civetweb-1.9.1/src/third_party/duktape-1.5.2/src-separate/duk_unicode_support.c - nifi-minifi-cpp - Git at Google

 /*
  *  Various Unicode help functions for character classification predicates,
  *  case conversion, decoding, etc.
  */

 #include "duk_internal.h"

 /*
  *  Fast path tables
  */

 #if defined(DUK_USE_IDCHAR_FASTPATH)
 DUK_INTERNAL const duk_int8_t duk_is_idchar_tab[128] = {
 	/* 0: not IdentifierStart or IdentifierPart
 	 * 1: IdentifierStart and IdentifierPart
 	 * -1: IdentifierPart only
 	 */
 	0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,   /* 0x00...0x0f */
 	0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,   /* 0x10...0x1f */
 	0,  0,  0,  0,  1,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,   /* 0x20...0x2f */
 	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0,  0,  0,  0,  0,  0,   /* 0x30...0x3f */
 	0,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,   /* 0x40...0x4f */
 	1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  0,  0,  0,  0,  1,   /* 0x50...0x5f */
 	0,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,   /* 0x60...0x6f */
 	1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  0,  0,  0,  0,  0    /* 0x70...0x7f */
 };
 #endif

 /*
  *  XUTF-8 and CESU-8 encoding/decoding
  */

 DUK_INTERNAL duk_small_int_t duk_unicode_get_xutf8_length(duk_ucodepoint_t cp) {
 	duk_uint_fast32_t x = (duk_uint_fast32_t) cp;
 	if (x < 0x80UL) {
 		/* 7 bits */
 		return 1;
 	} else if (x < 0x800UL) {
 		/* 11 bits */
 		return 2;
 	} else if (x < 0x10000UL) {
 		/* 16 bits */
 		return 3;
 	} else if (x < 0x200000UL) {
 		/* 21 bits */
 		return 4;
 	} else if (x < 0x4000000UL) {
 		/* 26 bits */
 		return 5;
 	} else if (x < (duk_ucodepoint_t) 0x80000000UL) {
 		/* 31 bits */
 		return 6;
 	} else {
 		/* 36 bits */
 		return 7;
 	}
 }

 #if defined(DUK_USE_ASSERTIONS)
 DUK_INTERNAL duk_small_int_t duk_unicode_get_cesu8_length(duk_ucodepoint_t cp) {
 	duk_uint_fast32_t x = (duk_uint_fast32_t) cp;
 	if (x < 0x80UL) {
 		/* 7 bits */
 		return 1;
 	} else if (x < 0x800UL) {
 		/* 11 bits */
 		return 2;
 	} else if (x < 0x10000UL) {
 		/* 16 bits */
 		return 3;
 	} else {
 		/* Encoded as surrogate pair, each encoding to 3 bytes for
 		 * 6 bytes total.  Codepoints above U+10FFFF encode as 6 bytes
 		 * too, see duk_unicode_encode_cesu8().
 		  */
 		return 3 + 3;
 	}
 }
 #endif  /* DUK_USE_ASSERTIONS */

 DUK_INTERNAL const duk_uint8_t duk_unicode_xutf8_markers[7] = {
 	0x00, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc, 0xfe
 };

 /* Encode to extended UTF-8; 'out' must have space for at least
  * DUK_UNICODE_MAX_XUTF8_LENGTH bytes.  Allows encoding of any
  * 32-bit (unsigned) codepoint.
  */
 DUK_INTERNAL duk_small_int_t duk_unicode_encode_xutf8(duk_ucodepoint_t cp, duk_uint8_t *out) {
 	duk_uint_fast32_t x = (duk_uint_fast32_t) cp;
 	duk_small_int_t len;
 	duk_uint8_t marker;
 	duk_small_int_t i;

 	len = duk_unicode_get_xutf8_length(cp);
 	DUK_ASSERT(len > 0);

 	marker = duk_unicode_xutf8_markers[len - 1];  /* 64-bit OK because always >= 0 */

 	i = len;
 	DUK_ASSERT(i > 0);
 	do {
 		i--;
 		if (i > 0) {
 			out[i] = (duk_uint8_t) (0x80 + (x & 0x3f));
 			x >>= 6;
 		} else {
 			/* Note: masking of 'x' is not necessary because of
 			 * range check and shifting -> no bits overlapping
 			 * the marker should be set.
 			 */
 			out[0] = (duk_uint8_t) (marker + x);
 		}
 	} while (i > 0);

 	return len;
 }

 /* Encode to CESU-8; 'out' must have space for at least
  * DUK_UNICODE_MAX_CESU8_LENGTH bytes; codepoints above U+10FFFF
  * will encode to garbage but won't overwrite the output buffer.
  */
 DUK_INTERNAL duk_small_int_t duk_unicode_encode_cesu8(duk_ucodepoint_t cp, duk_uint8_t *out) {
 	duk_uint_fast32_t x = (duk_uint_fast32_t) cp;
 	duk_small_int_t len;

 	if (x < 0x80UL) {
 		out[0] = (duk_uint8_t) x;
 		len = 1;
 	} else if (x < 0x800UL) {
 		out[0] = (duk_uint8_t) (0xc0 + ((x >> 6) & 0x1f));
 		out[1] = (duk_uint8_t) (0x80 + (x & 0x3f));
 		len = 2;
 	} else if (x < 0x10000UL) {
 		/* surrogate pairs get encoded here */
 		out[0] = (duk_uint8_t) (0xe0 + ((x >> 12) & 0x0f));
 		out[1] = (duk_uint8_t) (0x80 + ((x >> 6) & 0x3f));
 		out[2] = (duk_uint8_t) (0x80 + (x & 0x3f));
 		len = 3;
 	} else {
 		/*
 		 *  Unicode codepoints above U+FFFF are encoded as surrogate
 		 *  pairs here.  This ensures that all CESU-8 codepoints are
 		 *  16-bit values as expected in Ecmascript.  The surrogate
 		 *  pairs always get a 3-byte encoding (each) in CESU-8.
 		 *  See: http://en.wikipedia.org/wiki/Surrogate_pair
 		 *
 		 *  20-bit codepoint, 10 bits (A and B) per surrogate pair:
 		 *
 		 *    x = 0b00000000 0000AAAA AAAAAABB BBBBBBBB
 		 *  sp1 = 0b110110AA AAAAAAAA  (0xd800 + ((x >> 10) & 0x3ff))
 		 *  sp2 = 0b110111BB BBBBBBBB  (0xdc00 + (x & 0x3ff))
 		 *
 		 *  Encoded into CESU-8:
 		 *
 		 *  sp1 -> 0b11101101  (0xe0 + ((sp1 >> 12) & 0x0f))
 		 *      -> 0b1010AAAA  (0x80 + ((sp1 >> 6) & 0x3f))
 		 *      -> 0b10AAAAAA  (0x80 + (sp1 & 0x3f))
 		 *  sp2 -> 0b11101101  (0xe0 + ((sp2 >> 12) & 0x0f))
 		 *      -> 0b1011BBBB  (0x80 + ((sp2 >> 6) & 0x3f))
 		 *      -> 0b10BBBBBB  (0x80 + (sp2 & 0x3f))
 		 *
 		 *  Note that 0x10000 must be subtracted first.  The code below
 		 *  avoids the sp1, sp2 temporaries which saves around 20 bytes
 		 *  of code.
 		 */

 		x -= 0x10000UL;

 		out[0] = (duk_uint8_t) (0xed);
 		out[1] = (duk_uint8_t) (0xa0 + ((x >> 16) & 0x0f));
 		out[2] = (duk_uint8_t) (0x80 + ((x >> 10) & 0x3f));
 		out[3] = (duk_uint8_t) (0xed);
 		out[4] = (duk_uint8_t) (0xb0 + ((x >> 6) & 0x0f));
 		out[5] = (duk_uint8_t) (0x80 + (x & 0x3f));
 		len = 6;
 	}

 	return len;
 }

 /* Decode helper.  Return zero on error. */
 DUK_INTERNAL duk_small_int_t duk_unicode_decode_xutf8(duk_hthread *thr, const duk_uint8_t **ptr, const duk_uint8_t *ptr_start, const duk_uint8_t *ptr_end, duk_ucodepoint_t *out_cp) {
 	const duk_uint8_t *p;
 	duk_uint32_t res;
 	duk_uint_fast8_t ch;
 	duk_small_int_t n;

 	DUK_UNREF(thr);

 	p = *ptr;
 	if (p < ptr_start || p >= ptr_end) {
 		goto fail;
 	}

 	/*
 	 *  UTF-8 decoder which accepts longer than standard byte sequences.
 	 *  This allows full 32-bit code points to be used.
 	 */

 	ch = (duk_uint_fast8_t) (*p++);
 	if (ch < 0x80) {
 		/* 0xxx xxxx   [7 bits] */
 		res = (duk_uint32_t) (ch & 0x7f);
 		n = 0;
 	} else if (ch < 0xc0) {
 		/* 10xx xxxx -> invalid */
 		goto fail;
 	} else if (ch < 0xe0) {
 		/* 110x xxxx   10xx xxxx   [11 bits] */
 		res = (duk_uint32_t) (ch & 0x1f);
 		n = 1;
 	} else if (ch < 0xf0) {
 		/* 1110 xxxx   10xx xxxx   10xx xxxx   [16 bits] */
 		res = (duk_uint32_t) (ch & 0x0f);
 		n = 2;
 	} else if (ch < 0xf8) {
 		/* 1111 0xxx   10xx xxxx   10xx xxxx   10xx xxxx   [21 bits] */
 		res = (duk_uint32_t) (ch & 0x07);
 		n = 3;
 	} else if (ch < 0xfc) {
 		/* 1111 10xx   10xx xxxx   10xx xxxx   10xx xxxx   10xx xxxx   [26 bits] */
 		res = (duk_uint32_t) (ch & 0x03);
 		n = 4;
 	} else if (ch < 0xfe) {
 		/* 1111 110x   10xx xxxx   10xx xxxx   10xx xxxx   10xx xxxx   10xx xxxx   [31 bits] */
 		res = (duk_uint32_t) (ch & 0x01);
 		n = 5;
 	} else if (ch < 0xff) {
 		/* 1111 1110   10xx xxxx   10xx xxxx   10xx xxxx   10xx xxxx   10xx xxxx   10xx xxxx   [36 bits] */
 		res = (duk_uint32_t) (0);
 		n = 6;
 	} else {
 		/* 8-byte format could be:
 		 * 1111 1111   10xx xxxx   10xx xxxx   10xx xxxx   10xx xxxx   10xx xxxx   10xx xxxx   10xx xxxx   [41 bits]
 		 *
 		 * However, this format would not have a zero bit following the
 		 * leading one bits and would not allow 0xFF to be used as an
 		 * "invalid xutf-8" marker for internal keys.  Further, 8-byte
 		 * encodings (up to 41 bit code points) are not currently needed.
 		 */
 		goto fail;
 	}

 	DUK_ASSERT(p >= ptr_start);  /* verified at beginning */
 	if (p + n > ptr_end) {
 		/* check pointer at end */
 		goto fail;
 	}

 	while (n > 0) {
 		DUK_ASSERT(p >= ptr_start && p < ptr_end);
 		res = res << 6;
 		res += (duk_uint32_t) ((*p++) & 0x3f);
 		n--;
 	}

 	*ptr = p;
 	*out_cp = res;
 	return 1;

  fail:
 	return 0;
 }

 /* used by e.g. duk_regexp_executor.c, string built-ins */
 DUK_INTERNAL duk_ucodepoint_t duk_unicode_decode_xutf8_checked(duk_hthread *thr, const duk_uint8_t **ptr, const duk_uint8_t *ptr_start, const duk_uint8_t *ptr_end) {
 	duk_ucodepoint_t cp;

 	if (duk_unicode_decode_xutf8(thr, ptr, ptr_start, ptr_end, &cp)) {
 		return cp;
 	}
 	DUK_ERROR_INTERNAL(thr, "utf-8 decode failed");  /* XXX: 'internal error' is a bit of a misnomer */
 	DUK_UNREACHABLE();
 	return 0;
 }

 /* Compute (extended) utf-8 length without codepoint encoding validation,
  * used for string interning.
  *
  * NOTE: This algorithm is performance critical, more so than string hashing
  * in some cases.  It is needed when interning a string and needs to scan
  * every byte of the string with no skipping.  Having an ASCII fast path
  * is useful if possible in the algorithm.  The current algorithms were
  * chosen from several variants, based on x64 gcc -O2 testing.  See:
  * https://github.com/svaarala/duktape/pull/422
  *
  * NOTE: must match src/dukutil.py:duk_unicode_unvalidated_utf8_length().
  */

 #if defined(DUK_USE_PREFER_SIZE)
 /* Small variant; roughly 150 bytes smaller than the fast variant. */
 DUK_INTERNAL duk_size_t duk_unicode_unvalidated_utf8_length(const duk_uint8_t *data, duk_size_t blen) {
 	const duk_uint8_t *p;
 	const duk_uint8_t *p_end;
 	duk_size_t ncont;
 	duk_size_t clen;

 	p = data;
 	p_end = data + blen;
 	ncont = 0;
 	while (p != p_end) {
 		duk_uint8_t x;
 		x = *p++;
 		if (DUK_UNLIKELY(x >= 0x80 && x <= 0xbf)) {
 			ncont++;
 		}
 	}

 	DUK_ASSERT(ncont <= blen);
 	clen = blen - ncont;
 	DUK_ASSERT(clen <= blen);
 	return clen;
 }
 #else  /* DUK_USE_PREFER_SIZE */
 /* This seems like a good overall approach.  Fast path for ASCII in 4 byte
  * blocks.
  */
 DUK_INTERNAL duk_size_t duk_unicode_unvalidated_utf8_length(const duk_uint8_t *data, duk_size_t blen) {
 	const duk_uint8_t *p;
 	const duk_uint8_t *p_end;
 	const duk_uint32_t *p32_end;
 	const duk_uint32_t *p32;
 	duk_size_t ncont;
 	duk_size_t clen;

 	ncont = 0;  /* number of continuation (non-initial) bytes in [0x80,0xbf] */
 	p = data;
 	p_end = data + blen;
 	if (blen < 16) {
 		goto skip_fastpath;
 	}

 	/* Align 'p' to 4; the input data may have arbitrary alignment.
 	 * End of string check not needed because blen >= 16.
 	 */
 	while (((duk_size_t) (const void *) p) & 0x03U) {
 		duk_uint8_t x;
 		x = *p++;
 		if (DUK_UNLIKELY(x >= 0x80 && x <= 0xbf)) {
 			ncont++;
 		}
 	}

 	/* Full, aligned 4-byte reads. */
 	p32_end = (const duk_uint32_t *) (const void *) (p + ((duk_size_t) (p_end - p) & (duk_size_t) (~0x03)));
 	p32 = (const duk_uint32_t *) (const void *) p;
 	while (p32 != (const duk_uint32_t *) p32_end) {
 		duk_uint32_t x;
 		x = *p32++;
 		if (DUK_LIKELY((x & 0x80808080UL) == 0)) {
 			;  /* ASCII fast path */
 		} else {
 			/* Flip highest bit of each byte which changes
 			 * the bit pattern 10xxxxxx into 00xxxxxx which
 			 * allows an easy bit mask test.
 			 */
 			x ^= 0x80808080UL;
 			if (DUK_UNLIKELY(!(x & 0xc0000000UL))) {
 				ncont++;
 			}
 			if (DUK_UNLIKELY(!(x & 0x00c00000UL))) {
 				ncont++;
 			}
 			if (DUK_UNLIKELY(!(x & 0x0000c000UL))) {
 				ncont++;
 			}
 			if (DUK_UNLIKELY(!(x & 0x000000c0UL))) {
 				ncont++;
 			}
 		}
 	}
 	p = (const duk_uint8_t *) p32;
 	/* Fall through to handle the rest. */

  skip_fastpath:
 	while (p != p_end) {
 		duk_uint8_t x;
 		x = *p++;
 		if (DUK_UNLIKELY(x >= 0x80 && x <= 0xbf)) {
 			ncont++;
 		}
 	}

 	DUK_ASSERT(ncont <= blen);
 	clen = blen - ncont;
 	DUK_ASSERT(clen <= blen);
 	return clen;
 }
 #endif  /* DUK_USE_PREFER_SIZE */

 /*
  *  Unicode range matcher
  *
  *  Matches a codepoint against a packed bitstream of character ranges.
  *  Used for slow path Unicode matching.
  */

 /* Must match src/extract_chars.py, generate_match_table3(). */
 DUK_LOCAL duk_uint32_t duk__uni_decode_value(duk_bitdecoder_ctx *bd_ctx) {
 	duk_uint32_t t;

 	t = (duk_uint32_t) duk_bd_decode(bd_ctx, 4);
 	if (t <= 0x0eU) {
 		return t;
 	}
 	t = (duk_uint32_t) duk_bd_decode(bd_ctx, 8);
 	if (t <= 0xfdU) {
 		return t + 0x0f;
 	}
 	if (t == 0xfeU) {
 		t = (duk_uint32_t) duk_bd_decode(bd_ctx, 12);
 		return t + 0x0fU + 0xfeU;
 	} else {
 		t = (duk_uint32_t) duk_bd_decode(bd_ctx, 24);
 		return t + 0x0fU + 0xfeU + 0x1000UL;
 	}
 }

 DUK_LOCAL duk_small_int_t duk__uni_range_match(const duk_uint8_t *unitab, duk_size_t unilen, duk_codepoint_t cp) {
 	duk_bitdecoder_ctx bd_ctx;
 	duk_codepoint_t prev_re;

 	DUK_MEMZERO(&bd_ctx, sizeof(bd_ctx));
 	bd_ctx.data = (const duk_uint8_t *) unitab;
 	bd_ctx.length = (duk_size_t) unilen;

 	prev_re = 0;
 	for (;;) {
 		duk_codepoint_t r1, r2;
 		r1 = (duk_codepoint_t) duk__uni_decode_value(&bd_ctx);
 		if (r1 == 0) {
 			break;
 		}
 		r2 = (duk_codepoint_t) duk__uni_decode_value(&bd_ctx);

 		r1 = prev_re + r1;
 		r2 = r1 + r2;
 		prev_re = r2;

 		/* [r1,r2] is the range */

 		DUK_DDD(DUK_DDDPRINT("duk__uni_range_match: cp=%06lx range=[0x%06lx,0x%06lx]",
 		                     (unsigned long) cp, (unsigned long) r1, (unsigned long) r2));
 		if (cp >= r1 && cp <= r2) {
 			return 1;
 		}
 	}

 	return 0;
 }

 /*
  *  "WhiteSpace" production check.
  */

 DUK_INTERNAL duk_small_int_t duk_unicode_is_whitespace(duk_codepoint_t cp) {
 	/*
 	 *  E5 Section 7.2 specifies six characters specifically as
 	 *  white space:
 	 *
 	 *    0009;<control>;Cc;0;S;;;;;N;CHARACTER TABULATION;;;;
 	 *    000B;<control>;Cc;0;S;;;;;N;LINE TABULATION;;;;
 	 *    000C;<control>;Cc;0;WS;;;;;N;FORM FEED (FF);;;;
 	 *    0020;SPACE;Zs;0;WS;;;;;N;;;;;
 	 *    00A0;NO-BREAK SPACE;Zs;0;CS;<noBreak> 0020;;;;N;NON-BREAKING SPACE;;;;
 	 *    FEFF;ZERO WIDTH NO-BREAK SPACE;Cf;0;BN;;;;;N;BYTE ORDER MARK;;;;
 	 *
 	 *  It also specifies any Unicode category 'Zs' characters as white
 	 *  space.  These can be extracted with the "src/extract_chars.py" script.
 	 *  Current result:
 	 *
 	 *    RAW OUTPUT:
 	 *    ===========
 	 *    0020;SPACE;Zs;0;WS;;;;;N;;;;;
 	 *    00A0;NO-BREAK SPACE;Zs;0;CS;<noBreak> 0020;;;;N;NON-BREAKING SPACE;;;;
 	 *    1680;OGHAM SPACE MARK;Zs;0;WS;;;;;N;;;;;
 	 *    180E;MONGOLIAN VOWEL SEPARATOR;Zs;0;WS;;;;;N;;;;;
 	 *    2000;EN QUAD;Zs;0;WS;2002;;;;N;;;;;
 	 *    2001;EM QUAD;Zs;0;WS;2003;;;;N;;;;;
 	 *    2002;EN SPACE;Zs;0;WS;<compat> 0020;;;;N;;;;;
 	 *    2003;EM SPACE;Zs;0;WS;<compat> 0020;;;;N;;;;;
 	 *    2004;THREE-PER-EM SPACE;Zs;0;WS;<compat> 0020;;;;N;;;;;
 	 *    2005;FOUR-PER-EM SPACE;Zs;0;WS;<compat> 0020;;;;N;;;;;
 	 *    2006;SIX-PER-EM SPACE;Zs;0;WS;<compat> 0020;;;;N;;;;;
 	 *    2007;FIGURE SPACE;Zs;0;WS;<noBreak> 0020;;;;N;;;;;
 	 *    2008;PUNCTUATION SPACE;Zs;0;WS;<compat> 0020;;;;N;;;;;
 	 *    2009;THIN SPACE;Zs;0;WS;<compat> 0020;;;;N;;;;;
 	 *    200A;HAIR SPACE;Zs;0;WS;<compat> 0020;;;;N;;;;;
 	 *    202F;NARROW NO-BREAK SPACE;Zs;0;CS;<noBreak> 0020;;;;N;;;;;
 	 *    205F;MEDIUM MATHEMATICAL SPACE;Zs;0;WS;<compat> 0020;;;;N;;;;;
 	 *    3000;IDEOGRAPHIC SPACE;Zs;0;WS;<wide> 0020;;;;N;;;;;
 	 *
 	 *    RANGES:
 	 *    =======
 	 *    0x0020
 	 *    0x00a0
 	 *    0x1680
 	 *    0x180e
 	 *    0x2000 ... 0x200a
 	 *    0x202f
 	 *    0x205f
 	 *    0x3000
 	 *
 	 *  A manual decoder (below) is probably most compact for this.
 	 */

 	duk_uint_fast8_t lo;
 	duk_uint_fast32_t hi;

 	/* cp == -1 (EOF) never matches and causes return value 0 */

 	lo = (duk_uint_fast8_t) (cp & 0xff);
 	hi = (duk_uint_fast32_t) (cp >> 8);  /* does not fit into an uchar */

 	if (hi == 0x0000UL) {
 		if (lo == 0x09U || lo == 0x0bU || lo == 0x0cU ||
 		    lo == 0x20U || lo == 0xa0U) {
 			return 1;
 		}
 	} else if (hi == 0x0020UL) {
 		if (lo <= 0x0aU || lo == 0x2fU || lo == 0x5fU) {
 			return 1;
 		}
 	} else if (cp == 0x1680L || cp == 0x180eL || cp == 0x3000L ||
 	           cp == 0xfeffL) {
 		return 1;
 	}

 	return 0;
 }

 /*
  *  "LineTerminator" production check.
  */

 DUK_INTERNAL duk_small_int_t duk_unicode_is_line_terminator(duk_codepoint_t cp) {
 	/*
 	 *  E5 Section 7.3
 	 *
 	 *  A LineTerminatorSequence essentially merges <CR> <LF> sequences
 	 *  into a single line terminator.  This must be handled by the caller.
 	 */

 	if (cp == 0x000aL || cp == 0x000dL || cp == 0x2028L ||
 	    cp == 0x2029L) {
 		return 1;
 	}

 	return 0;
 }

 /*
  *  "IdentifierStart" production check.
  */

 DUK_INTERNAL duk_small_int_t duk_unicode_is_identifier_start(duk_codepoint_t cp) {
 	/*
 	 *  E5 Section 7.6:
 	 *
 	 *    IdentifierStart:
 	 *      UnicodeLetter
 	 *      $
 	 *      _
 	 *      \ UnicodeEscapeSequence
 	 *
 	 *  IdentifierStart production has one multi-character production:
 	 *
 	 *    \ UnicodeEscapeSequence
 	 *
 	 *  The '\' character is -not- matched by this function.  Rather, the caller
 	 *  should decode the escape and then call this function to check whether the
 	 *  decoded character is acceptable (see discussion in E5 Section 7.6).
 	 *
 	 *  The "UnicodeLetter" alternative of the production allows letters
 	 *  from various Unicode categories.  These can be extracted with the
 	 *  "src/extract_chars.py" script.
 	 *
 	 *  Because the result has hundreds of Unicode codepoint ranges, matching
 	 *  for any values >= 0x80 are done using a very slow range-by-range scan
 	 *  and a packed range format.
 	 *
 	 *  The ASCII portion (codepoints 0x00 ... 0x7f) is fast-pathed below because
 	 *  it matters the most.  The ASCII related ranges of IdentifierStart are:
 	 *
 	 *    0x0041 ... 0x005a     ['A' ... 'Z']
 	 *    0x0061 ... 0x007a     ['a' ... 'z']
 	 *    0x0024                ['$']
 	 *    0x005f                ['_']
 	 */

 	/* ASCII (and EOF) fast path -- quick accept and reject */
 	if (cp <= 0x7fL) {
 #if defined(DUK_USE_IDCHAR_FASTPATH)
 		return (cp >= 0) && (duk_is_idchar_tab[cp] > 0);
 #else
 		if ((cp >= 'a' && cp <= 'z') ||
 		    (cp >= 'A' && cp <= 'Z') ||
 		    cp == '_' || cp == '$') {
 			return 1;
 		}
 		return 0;
 #endif
 	}

 	/* Non-ASCII slow path (range-by-range linear comparison), very slow */

 #ifdef DUK_USE_SOURCE_NONBMP
 	if (duk__uni_range_match(duk_unicode_ids_noa,
 	                         (duk_size_t) sizeof(duk_unicode_ids_noa),
 	                         (duk_codepoint_t) cp)) {
 		return 1;
 	}
 	return 0;
 #else
 	if (cp < 0x10000L) {
 		if (duk__uni_range_match(duk_unicode_ids_noabmp,
 		                         sizeof(duk_unicode_ids_noabmp),
 		                         (duk_codepoint_t) cp)) {
 			return 1;
 		}
 		return 0;
 	} else {
 		/* without explicit non-BMP support, assume non-BMP characters
 		 * are always accepted as identifier characters.
 		 */
 		return 1;
 	}
 #endif
 }

 /*
  *  "IdentifierPart" production check.
  */

 DUK_INTERNAL duk_small_int_t duk_unicode_is_identifier_part(duk_codepoint_t cp) {
 	/*
 	 *  E5 Section 7.6:
 	 *
 	 *    IdentifierPart:
 	 *      IdentifierStart
 	 *      UnicodeCombiningMark
 	 *      UnicodeDigit
 	 *      UnicodeConnectorPunctuation
 	 *      <ZWNJ>  [U+200C]
 	 *      <ZWJ>   [U+200D]
 	 *
 	 *  IdentifierPart production has one multi-character production
 	 *  as part of its IdentifierStart alternative.  The '\' character
 	 *  of an escape sequence is not matched here, see discussion in
 	 *  duk_unicode_is_identifier_start().
 	 *
 	 *  To match non-ASCII characters (codepoints >= 0x80), a very slow
 	 *  linear range-by-range scan is used.  The codepoint is first compared
 	 *  to the IdentifierStart ranges, and if it doesn't match, then to a
 	 *  set consisting of code points in IdentifierPart but not in
 	 *  IdentifierStart.  This is done to keep the unicode range data small,
 	 *  at the expense of speed.
 	 *
 	 *  The ASCII fast path consists of:
 	 *
 	 *    0x0030 ... 0x0039     ['0' ... '9', UnicodeDigit]
 	 *    0x0041 ... 0x005a     ['A' ... 'Z', IdentifierStart]
 	 *    0x0061 ... 0x007a     ['a' ... 'z', IdentifierStart]
 	 *    0x0024                ['$', IdentifierStart]
 	 *    0x005f                ['_', IdentifierStart and
 	 *                                UnicodeConnectorPunctuation]
 	 *
 	 *  UnicodeCombiningMark has no code points <= 0x7f.
 	 *
 	 *  The matching code reuses the "identifier start" tables, and then
 	 *  consults a separate range set for characters in "identifier part"
 	 *  but not in "identifier start".  These can be extracted with the
 	 *  "src/extract_chars.py" script.
 	 *
 	 *  UnicodeCombiningMark -> categories Mn, Mc
 	 *  UnicodeDigit -> categories Nd
 	 *  UnicodeConnectorPunctuation -> categories Pc
 	 */

 	/* ASCII (and EOF) fast path -- quick accept and reject */
 	if (cp <= 0x7fL) {
 #if defined(DUK_USE_IDCHAR_FASTPATH)
 		return (cp >= 0) && (duk_is_idchar_tab[cp] != 0);
 #else
 		if ((cp >= 'a' && cp <= 'z') ||
 		    (cp >= 'A' && cp <= 'Z') ||
 		    (cp >= '0' && cp <= '9') ||
 		    cp == '_' || cp == '$') {
 			return 1;
 		}
 		return 0;
 #endif
 	}

 	/* Non-ASCII slow path (range-by-range linear comparison), very slow */

 #ifdef DUK_USE_SOURCE_NONBMP
 	if (duk__uni_range_match(duk_unicode_ids_noa,
 	                         sizeof(duk_unicode_ids_noa),
 	                         (duk_codepoint_t) cp) ||
 	    duk__uni_range_match(duk_unicode_idp_m_ids_noa,
 	                         sizeof(duk_unicode_idp_m_ids_noa),
 	                         (duk_codepoint_t) cp)) {
 		return 1;
 	}
 	return 0;
 #else
 	if (cp < 0x10000L) {
 		if (duk__uni_range_match(duk_unicode_ids_noabmp,
 		                         sizeof(duk_unicode_ids_noabmp),
 		                         (duk_codepoint_t) cp) ||
 		    duk__uni_range_match(duk_unicode_idp_m_ids_noabmp,
 		                         sizeof(duk_unicode_idp_m_ids_noabmp),
 		                         (duk_codepoint_t) cp)) {
 			return 1;
 		}
 		return 0;
 	} else {
 		/* without explicit non-BMP support, assume non-BMP characters
 		 * are always accepted as identifier characters.
 		 */
 		return 1;
 	}
 #endif
 }

 /*
  *  Unicode letter check.
  */

 DUK_INTERNAL duk_small_int_t duk_unicode_is_letter(duk_codepoint_t cp) {
 	/*
 	 *  Unicode letter is now taken to be the categories:
 	 *
 	 *    Lu, Ll, Lt, Lm, Lo
 	 *
 	 *  (Not sure if this is exactly correct.)
 	 *
 	 *  The ASCII fast path consists of:
 	 *
 	 *    0x0041 ... 0x005a     ['A' ... 'Z']
 	 *    0x0061 ... 0x007a     ['a' ... 'z']
 	 */

 	/* ASCII (and EOF) fast path -- quick accept and reject */
 	if (cp <= 0x7fL) {
 		if ((cp >= 'a' && cp <= 'z') ||
 		    (cp >= 'A' && cp <= 'Z')) {
 			return 1;
 		}
 		return 0;
 	}

 	/* Non-ASCII slow path (range-by-range linear comparison), very slow */

 #ifdef DUK_USE_SOURCE_NONBMP
 	if (duk__uni_range_match(duk_unicode_ids_noa,
 	                         sizeof(duk_unicode_ids_noa),
 	                         (duk_codepoint_t) cp) &&
 	    !duk__uni_range_match(duk_unicode_ids_m_let_noa,
 	                          sizeof(duk_unicode_ids_m_let_noa),
 	                          (duk_codepoint_t) cp)) {
 		return 1;
 	}
 	return 0;
 #else
 	if (cp < 0x10000L) {
 		if (duk__uni_range_match(duk_unicode_ids_noabmp,
 		                         sizeof(duk_unicode_ids_noabmp),
 		                         (duk_codepoint_t) cp) &&
 		    !duk__uni_range_match(duk_unicode_ids_m_let_noabmp,
 		                          sizeof(duk_unicode_ids_m_let_noabmp),
 		                          (duk_codepoint_t) cp)) {
 			return 1;
 		}
 		return 0;
 	} else {
 		/* without explicit non-BMP support, assume non-BMP characters
 		 * are always accepted as letters.
 		 */
 		return 1;
 	}
 #endif
 }

 /*
  *  Complex case conversion helper which decodes a bit-packed conversion
  *  control stream generated by unicode/extract_caseconv.py.  The conversion
  *  is very slow because it runs through the conversion data in a linear
  *  fashion to save space (which is why ASCII characters have a special
  *  fast path before arriving here).
  *
  *  The particular bit counts etc have been determined experimentally to
  *  be small but still sufficient, and must match the Python script
  *  (src/extract_caseconv.py).
  *
  *  The return value is the case converted codepoint or -1 if the conversion
  *  results in multiple characters (this is useful for regexp Canonicalization
  *  operation).  If 'buf' is not NULL, the result codepoint(s) are also
  *  appended to the hbuffer.
  *
  *  Context and locale specific rules must be checked before consulting
  *  this function.
  */

 DUK_LOCAL
 duk_codepoint_t duk__slow_case_conversion(duk_hthread *thr,
                                           duk_bufwriter_ctx *bw,
                                           duk_codepoint_t cp,
                                           duk_bitdecoder_ctx *bd_ctx) {
 	duk_small_int_t skip = 0;
 	duk_small_int_t n;
 	duk_small_int_t t;
 	duk_small_int_t count;
 	duk_codepoint_t tmp_cp;
 	duk_codepoint_t start_i;
 	duk_codepoint_t start_o;

 	DUK_UNREF(thr);
 	DUK_ASSERT(bd_ctx != NULL);

 	DUK_DDD(DUK_DDDPRINT("slow case conversion for codepoint: %ld", (long) cp));

 	/* range conversion with a "skip" */
 	DUK_DDD(DUK_DDDPRINT("checking ranges"));
 	for (;;) {
 		skip++;
 		n = (duk_small_int_t) duk_bd_decode(bd_ctx, 6);
 		if (n == 0x3f) {
 			/* end marker */
 			break;
 		}
 		DUK_DDD(DUK_DDDPRINT("skip=%ld, n=%ld", (long) skip, (long) n));

 		while (n--) {
 			start_i = (duk_codepoint_t) duk_bd_decode(bd_ctx, 16);
 			start_o = (duk_codepoint_t) duk_bd_decode(bd_ctx, 16);
 			count = (duk_small_int_t) duk_bd_decode(bd_ctx, 7);
 			DUK_DDD(DUK_DDDPRINT("range: start_i=%ld, start_o=%ld, count=%ld, skip=%ld",
 			                     (long) start_i, (long) start_o, (long) count, (long) skip));

 			if (cp >= start_i) {
 				tmp_cp = cp - start_i;  /* always >= 0 */
 				if (tmp_cp < (duk_codepoint_t) count * (duk_codepoint_t) skip &&
 				    (tmp_cp % (duk_codepoint_t) skip) == 0) {
 					DUK_DDD(DUK_DDDPRINT("range matches input codepoint"));
 					cp = start_o + tmp_cp;
 					goto single;
 				}
 			}
 		}
 	}

 	/* 1:1 conversion */
 	n = (duk_small_int_t) duk_bd_decode(bd_ctx, 6);
 	DUK_DDD(DUK_DDDPRINT("checking 1:1 conversions (count %ld)", (long) n));
 	while (n--) {
 		start_i = (duk_codepoint_t) duk_bd_decode(bd_ctx, 16);
 		start_o = (duk_codepoint_t) duk_bd_decode(bd_ctx, 16);
 		DUK_DDD(DUK_DDDPRINT("1:1 conversion %ld -> %ld", (long) start_i, (long) start_o));
 		if (cp == start_i) {
 			DUK_DDD(DUK_DDDPRINT("1:1 matches input codepoint"));
 			cp = start_o;
 			goto single;
 		}
 	}

 	/* complex, multicharacter conversion */
 	n = (duk_small_int_t) duk_bd_decode(bd_ctx, 7);
 	DUK_DDD(DUK_DDDPRINT("checking 1:n conversions (count %ld)", (long) n));
 	while (n--) {
 		start_i = (duk_codepoint_t) duk_bd_decode(bd_ctx, 16);
 		t = (duk_small_int_t) duk_bd_decode(bd_ctx, 2);
 		DUK_DDD(DUK_DDDPRINT("1:n conversion %ld -> %ld chars", (long) start_i, (long) t));
 		if (cp == start_i) {
 			DUK_DDD(DUK_DDDPRINT("1:n matches input codepoint"));
 			if (bw != NULL) {
 				while (t--) {
 					tmp_cp = (duk_codepoint_t) duk_bd_decode(bd_ctx, 16);
 					DUK_BW_WRITE_RAW_XUTF8(thr, bw, (duk_ucodepoint_t) tmp_cp);
 				}
 			}
 			return -1;
 		} else {
 			while (t--) {
 				(void) duk_bd_decode(bd_ctx, 16);
 			}
 		}
 	}

 	/* default: no change */
 	DUK_DDD(DUK_DDDPRINT("no rule matches, output is same as input"));
 	/* fall through */

  single:
 	if (bw != NULL) {
 		DUK_BW_WRITE_RAW_XUTF8(thr, bw, (duk_ucodepoint_t) cp);
 	}
 	return cp;
 }

 /*
  *  Case conversion helper, with context/local sensitivity.
  *  For proper case conversion, one needs to know the character
  *  and the preceding and following characters, as well as
  *  locale/language.
  */

 /* XXX: add 'language' argument when locale/language sensitive rule
  * support added.
  */
 DUK_LOCAL
 duk_codepoint_t duk__case_transform_helper(duk_hthread *thr,
                                            duk_bufwriter_ctx *bw,
                                            duk_codepoint_t cp,
                                            duk_codepoint_t prev,
                                            duk_codepoint_t next,
                                            duk_bool_t uppercase) {
 	duk_bitdecoder_ctx bd_ctx;

 	/* fast path for ASCII */
 	if (cp < 0x80L) {
 		/* XXX: there are language sensitive rules for the ASCII range.
 		 * If/when language/locale support is implemented, they need to
 		 * be implemented here for the fast path.  There are no context
 		 * sensitive rules for ASCII range.
 		 */

 		if (uppercase) {
 			if (cp >= 'a' && cp <= 'z') {
 				cp = cp - 'a' + 'A';
 			}
 		} else {
 			if (cp >= 'A' && cp <= 'Z') {
 				cp = cp - 'A' + 'a';
 			}
 		}

 		if (bw != NULL) {
 			DUK_BW_WRITE_RAW_U8(thr, bw, (duk_uint8_t) cp);
 		}
 		return cp;
 	}

 	/* context and locale specific rules which cannot currently be represented
 	 * in the caseconv bitstream: hardcoded rules in C
 	 */
 	if (uppercase) {
 		/* XXX: turkish / azeri */
 	} else {
 		/*
 		 *  Final sigma context specific rule.  This is a rather tricky
 		 *  rule and this handling is probably not 100% correct now.
 		 *  The rule is not locale/language specific so it is supported.
 		 */

 		if (cp == 0x03a3L &&    /* U+03A3 = GREEK CAPITAL LETTER SIGMA */
 		    duk_unicode_is_letter(prev) &&        /* prev exists and is not a letter */
 		    !duk_unicode_is_letter(next)) {       /* next does not exist or next is not a letter */
 			/* Capital sigma occurred at "end of word", lowercase to
 			 * U+03C2 = GREEK SMALL LETTER FINAL SIGMA.  Otherwise
 			 * fall through and let the normal rules lowercase it to
 			 * U+03C3 = GREEK SMALL LETTER SIGMA.
 			 */
 			cp = 0x03c2L;
 			goto singlechar;
 		}

 		/* XXX: lithuanian not implemented */
 		/* XXX: lithuanian, explicit dot rules */
 		/* XXX: turkish / azeri, lowercase rules */
 	}

 	/* 1:1 or special conversions, but not locale/context specific: script generated rules */
 	DUK_MEMZERO(&bd_ctx, sizeof(bd_ctx));
 	if (uppercase) {
 		bd_ctx.data = (const duk_uint8_t *) duk_unicode_caseconv_uc;
 		bd_ctx.length = (duk_size_t) sizeof(duk_unicode_caseconv_uc);
 	} else {
 		bd_ctx.data = (const duk_uint8_t *) duk_unicode_caseconv_lc;
 		bd_ctx.length = (duk_size_t) sizeof(duk_unicode_caseconv_lc);
 	}
 	return duk__slow_case_conversion(thr, bw, cp, &bd_ctx);

  singlechar:
 	if (bw != NULL) {
 		DUK_BW_WRITE_RAW_XUTF8(thr, bw, (duk_ucodepoint_t) cp);
 	}
 	return cp;

  /* unused now, not needed until Turkish/Azeri */
 #if 0
  nochar:
 	return -1;
 #endif
 }

 /*
  *  Replace valstack top with case converted version.
  */

 DUK_INTERNAL void duk_unicode_case_convert_string(duk_hthread *thr, duk_small_int_t uppercase) {
 	duk_context *ctx = (duk_context *) thr;
 	duk_hstring *h_input;
 	duk_bufwriter_ctx bw_alloc;
 	duk_bufwriter_ctx *bw;
 	const duk_uint8_t *p, *p_start, *p_end;
 	duk_codepoint_t prev, curr, next;

 	h_input = duk_require_hstring(ctx, -1);
 	DUK_ASSERT(h_input != NULL);

 	bw = &bw_alloc;
 	DUK_BW_INIT_PUSHBUF(thr, bw, DUK_HSTRING_GET_BYTELEN(h_input));

 	/* [ ... input buffer ] */

 	p_start = (const duk_uint8_t *) DUK_HSTRING_GET_DATA(h_input);
 	p_end = p_start + DUK_HSTRING_GET_BYTELEN(h_input);
 	p = p_start;

 	prev = -1; DUK_UNREF(prev);
 	curr = -1;
 	next = -1;
 	for (;;) {
 		prev = curr;
 		curr = next;
 		next = -1;
 		if (p < p_end) {
 			next = (int) duk_unicode_decode_xutf8_checked(thr, &p, p_start, p_end);
 		} else {
 			/* end of input and last char has been processed */
 			if (curr < 0) {
 				break;
 			}
 		}

 		/* on first round, skip */
 		if (curr >= 0) {
 			/* XXX: could add a fast path to process chunks of input codepoints,
 			 * but relative benefit would be quite small.
 			 */

 			/* Ensure space for maximum multi-character result; estimate is overkill. */
 			DUK_BW_ENSURE(thr, bw, 8 * DUK_UNICODE_MAX_XUTF8_LENGTH);

 			duk__case_transform_helper(thr,
 			                           bw,
 			                           (duk_codepoint_t) curr,
 			                           prev,
 			                           next,
 			                           uppercase);
 		}
 	}

 	DUK_BW_COMPACT(thr, bw);
 	duk_to_string(ctx, -1);  /* invalidates h_buf pointer */
 	duk_remove(ctx, -2);
 }

 #ifdef DUK_USE_REGEXP_SUPPORT

 /*
  *  Canonicalize() abstract operation needed for canonicalization of individual
  *  codepoints during regexp compilation and execution, see E5 Section 15.10.2.8.
  *  Note that codepoints are canonicalized one character at a time, so no context
  *  specific rules can apply.  Locale specific rules can apply, though.
  */

 DUK_INTERNAL duk_codepoint_t duk_unicode_re_canonicalize_char(duk_hthread *thr, duk_codepoint_t cp) {
 #if defined(DUK_USE_REGEXP_CANON_WORKAROUND)
 	/* Fast canonicalization lookup at the cost of 128kB footprint. */
 	DUK_ASSERT(cp >= 0);
 	DUK_UNREF(thr);
 	if (DUK_LIKELY(cp < 0x10000L)) {
 		return (duk_codepoint_t) duk_unicode_re_canon_lookup[cp];
 	}
 	return cp;
 #else  /* DUK_USE_REGEXP_CANON_WORKAROUND */
 	duk_codepoint_t y;

 	y = duk__case_transform_helper(thr,
 	                               NULL,    /* NULL is allowed, no output */
 	                               cp,      /* curr char */
 	                               -1,      /* prev char */
 	                               -1,      /* next char */
 	                               1);      /* uppercase */

 	if ((y < 0) || (cp >= 0x80 && y < 0x80)) {
 		/* multiple codepoint conversion or non-ASCII mapped to ASCII
 		 * --> leave as is.
 		 */
 		return cp;
 	}

 	return y;
 #endif  /* DUK_USE_REGEXP_CANON_WORKAROUND */
 }

 /*
  *  E5 Section 15.10.2.6 "IsWordChar" abstract operation.  Assume
  *  x < 0 for characters read outside the string.
  */

 DUK_INTERNAL duk_small_int_t duk_unicode_re_is_wordchar(duk_codepoint_t x) {
 	/*
 	 *  Note: the description in E5 Section 15.10.2.6 has a typo, it
 	 *  contains 'A' twice and lacks 'a'; the intent is [0-9a-zA-Z_].
 	 */
 	if ((x >= '0' && x <= '9') ||
 	    (x >= 'a' && x <= 'z') ||
 	    (x >= 'A' && x <= 'Z') ||
 	    (x == '_')) {
 		return 1;
 	}
 	return 0;
 }

 /*
  *  Regexp range tables
  */

 /* exposed because lexer needs these too */
 DUK_INTERNAL const duk_uint16_t duk_unicode_re_ranges_digit[2] = {
 	(duk_uint16_t) 0x0030UL, (duk_uint16_t) 0x0039UL,
 };
 DUK_INTERNAL const duk_uint16_t duk_unicode_re_ranges_white[22] = {
 	(duk_uint16_t) 0x0009UL, (duk_uint16_t) 0x000DUL,
 	(duk_uint16_t) 0x0020UL, (duk_uint16_t) 0x0020UL,
 	(duk_uint16_t) 0x00A0UL, (duk_uint16_t) 0x00A0UL,
 	(duk_uint16_t) 0x1680UL, (duk_uint16_t) 0x1680UL,
 	(duk_uint16_t) 0x180EUL, (duk_uint16_t) 0x180EUL,
 	(duk_uint16_t) 0x2000UL, (duk_uint16_t) 0x200AUL,
 	(duk_uint16_t) 0x2028UL, (duk_uint16_t) 0x2029UL,
 	(duk_uint16_t) 0x202FUL, (duk_uint16_t) 0x202FUL,
 	(duk_uint16_t) 0x205FUL, (duk_uint16_t) 0x205FUL,
 	(duk_uint16_t) 0x3000UL, (duk_uint16_t) 0x3000UL,
 	(duk_uint16_t) 0xFEFFUL, (duk_uint16_t) 0xFEFFUL,
 };
 DUK_INTERNAL const duk_uint16_t duk_unicode_re_ranges_wordchar[8] = {
 	(duk_uint16_t) 0x0030UL, (duk_uint16_t) 0x0039UL,
 	(duk_uint16_t) 0x0041UL, (duk_uint16_t) 0x005AUL,
 	(duk_uint16_t) 0x005FUL, (duk_uint16_t) 0x005FUL,
 	(duk_uint16_t) 0x0061UL, (duk_uint16_t) 0x007AUL,
 };
 DUK_INTERNAL const duk_uint16_t duk_unicode_re_ranges_not_digit[4] = {
 	(duk_uint16_t) 0x0000UL, (duk_uint16_t) 0x002FUL,
 	(duk_uint16_t) 0x003AUL, (duk_uint16_t) 0xFFFFUL,
 };
 DUK_INTERNAL const duk_uint16_t duk_unicode_re_ranges_not_white[24] = {
 	(duk_uint16_t) 0x0000UL, (duk_uint16_t) 0x0008UL,
 	(duk_uint16_t) 0x000EUL, (duk_uint16_t) 0x001FUL,
 	(duk_uint16_t) 0x0021UL, (duk_uint16_t) 0x009FUL,
 	(duk_uint16_t) 0x00A1UL, (duk_uint16_t) 0x167FUL,
 	(duk_uint16_t) 0x1681UL, (duk_uint16_t) 0x180DUL,
 	(duk_uint16_t) 0x180FUL, (duk_uint16_t) 0x1FFFUL,
 	(duk_uint16_t) 0x200BUL, (duk_uint16_t) 0x2027UL,
 	(duk_uint16_t) 0x202AUL, (duk_uint16_t) 0x202EUL,
 	(duk_uint16_t) 0x2030UL, (duk_uint16_t) 0x205EUL,
 	(duk_uint16_t) 0x2060UL, (duk_uint16_t) 0x2FFFUL,
 	(duk_uint16_t) 0x3001UL, (duk_uint16_t) 0xFEFEUL,
 	(duk_uint16_t) 0xFF00UL, (duk_uint16_t) 0xFFFFUL,
 };
 DUK_INTERNAL const duk_uint16_t duk_unicode_re_ranges_not_wordchar[10] = {
 	(duk_uint16_t) 0x0000UL, (duk_uint16_t) 0x002FUL,
 	(duk_uint16_t) 0x003AUL, (duk_uint16_t) 0x0040UL,
 	(duk_uint16_t) 0x005BUL, (duk_uint16_t) 0x005EUL,
 	(duk_uint16_t) 0x0060UL, (duk_uint16_t) 0x0060UL,
 	(duk_uint16_t) 0x007BUL, (duk_uint16_t) 0xFFFFUL,
 };

 #endif  /* DUK_USE_REGEXP_SUPPORT */