thirdparty/civetweb-1.9.1/src/third_party/duktape-1.5.2/examples/codepage-conv/duk_codepage_conv.c - nifi-minifi-cpp - Git at Google

 /*
  *  Convert an 8-bit input string (e.g. ISO-8859-1) into CESU-8.
  *  Calling code supplies the "code page" as a 256-entry array of
  *  codepoints for the conversion.
  *
  *  This is useful when input data is in non-UTF-8 format and must
  *  be converted at runtime, e.g. when compiling non-UTF-8 source
  *  code.  Another alternative is to use e.g. iconv.
  */

 #include "duktape.h"

 /* Decode an 8-bit string using 'codepage' into Unicode codepoints and
  * re-encode into CESU-8.  Codepage argument must point to a 256-entry
  * table.  Only supports BMP (codepoints U+0000 to U+FFFF).
  */
 void duk_decode_string_codepage(duk_context *ctx, const char *str, size_t len, unsigned int *codepage) {
 	unsigned char *tmp;
 	size_t tmplen, i;
 	unsigned char *p;
 	unsigned int cp;

 	tmplen = 3 * len;  /* max expansion is 1 input byte -> 3 output bytes */
 	if (tmplen / 3 != len) {
 		/* Temporary buffer length wraps. */
 		duk_error(ctx, DUK_ERR_RANGE_ERROR, "input string too long");
 		return;
 	}

 	tmp = (unsigned char *) duk_push_fixed_buffer(ctx, tmplen);

 	for (i = 0, p = tmp; i < len; i++) {
 		cp = codepage[((unsigned char *) str)[i]] & 0xffffUL;
 		if (cp < 0x80UL) {
 			*p++ = (unsigned char) cp;
 		} else if (cp < 0x800UL) {
 			*p++ = (unsigned char) (0xc0 + ((cp >> 6) & 0x1f));
 			*p++ = (unsigned char) (0x80 + (cp & 0x3f));
 		} else {
 			/* In CESU-8 all codepoints in [0x0000,0xFFFF] are
 			 * allowed, including surrogates.
 			 */
 			*p++ = (unsigned char) (0xe0 + ((cp >> 12) & 0x0f));
 			*p++ = (unsigned char) (0x80 + ((cp >> 6) & 0x3f));
 			*p++ = (unsigned char) (0x80 + (cp & 0x3f));
 		}
 	}

 	duk_push_lstring(ctx, (const char *) tmp, (duk_size_t) (p - tmp));

 	/* [ ... tmp res ] */

 	duk_remove(ctx, -2);
 }
	/*
	* Convert an 8-bit input string (e.g. ISO-8859-1) into CESU-8.
	* Calling code supplies the "code page" as a 256-entry array of
	* codepoints for the conversion.
	*
	* This is useful when input data is in non-UTF-8 format and must
	* be converted at runtime, e.g. when compiling non-UTF-8 source
	* code. Another alternative is to use e.g. iconv.
	*/

	#include "duktape.h"

	/* Decode an 8-bit string using 'codepage' into Unicode codepoints and
	* re-encode into CESU-8. Codepage argument must point to a 256-entry
	* table. Only supports BMP (codepoints U+0000 to U+FFFF).
	*/
	void duk_decode_string_codepage(duk_context ctx, const char str, size_t len, unsigned int *codepage) {
	unsigned char *tmp;
	size_t tmplen, i;
	unsigned char *p;
	unsigned int cp;

	tmplen = 3 * len; /* max expansion is 1 input byte -> 3 output bytes */
	if (tmplen / 3 != len) {
	/* Temporary buffer length wraps. */
	duk_error(ctx, DUK_ERR_RANGE_ERROR, "input string too long");
	return;
	}

	tmp = (unsigned char *) duk_push_fixed_buffer(ctx, tmplen);

	for (i = 0, p = tmp; i < len; i++) {
	cp = codepage[((unsigned char *) str)[i]] & 0xffffUL;
	if (cp < 0x80UL) {
	*p++ = (unsigned char) cp;
	} else if (cp < 0x800UL) {
	*p++ = (unsigned char) (0xc0 + ((cp >> 6) & 0x1f));
	*p++ = (unsigned char) (0x80 + (cp & 0x3f));
	} else {
	/* In CESU-8 all codepoints in [0x0000,0xFFFF] are
	* allowed, including surrogates.
	*/
	*p++ = (unsigned char) (0xe0 + ((cp >> 12) & 0x0f));
	*p++ = (unsigned char) (0x80 + ((cp >> 6) & 0x3f));
	*p++ = (unsigned char) (0x80 + (cp & 0x3f));
	}
	}

	duk_push_lstring(ctx, (const char *) tmp, (duk_size_t) (p - tmp));

	/* [ ... tmp res ] */

	duk_remove(ctx, -2);
	}