src/backend/utils/mb/conv.c - hawq - Git at Google

 /*-------------------------------------------------------------------------
  *
  *	  Utility functions for conversion procs.
  *
  * Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
  *	  $PostgreSQL: pgsql/src/backend/utils/mb/conv.c,v 1.68 2010/01/02 16:57:56 momjian Exp $
  *
  *-------------------------------------------------------------------------
  */
 #include "postgres.h"
 #include "mb/pg_wchar.h"


 /*
  * LATINn ---> MIC when the charset's local codes map directly to MIC
  *
  * l points to the source string of length len
  * p is the output area (must be large enough!)
  * lc is the mule character set id for the local encoding
  * encoding is the PG identifier for the local encoding
  */
 void
 latin2mic(const unsigned char *l, unsigned char *p, int len,
 		  int lc, int encoding)
 {
 	int			c1;

 	while (len > 0)
 	{
 		c1 = *l;
 		if (c1 == 0)
 			report_invalid_encoding(encoding, (const char *) l, len);
 		if (IS_HIGHBIT_SET(c1))
 			*p++ = lc;
 		*p++ = c1;
 		l++;
 		len--;
 	}
 	*p = '\0';
 }

 /*
  * MIC ---> LATINn when the charset's local codes map directly to MIC
  *
  * mic points to the source string of length len
  * p is the output area (must be large enough!)
  * lc is the mule character set id for the local encoding
  * encoding is the PG identifier for the local encoding
  */
 void
 mic2latin(const unsigned char *mic, unsigned char *p, int len,
 		  int lc, int encoding)
 {
 	int			c1;

 	while (len > 0)
 	{
 		c1 = *mic;
 		if (c1 == 0)
 			report_invalid_encoding(PG_MULE_INTERNAL, (const char *) mic, len);
 		if (!IS_HIGHBIT_SET(c1))
 		{
 			/* easy for ASCII */
 			*p++ = c1;
 			mic++;
 			len--;
 		}
 		else
 		{
 			int			l = pg_mic_mblen(mic);

 			if (len < l)
 				report_invalid_encoding(PG_MULE_INTERNAL, (const char *) mic,
 										len);
 			if (l != 2 || c1 != lc || !IS_HIGHBIT_SET(mic[1]))
 				report_untranslatable_char(PG_MULE_INTERNAL, encoding,
 										   (const char *) mic, len);
 			*p++ = mic[1];
 			mic += 2;
 			len -= 2;
 		}
 	}
 	*p = '\0';
 }


 /*
  * ASCII ---> MIC
  *
  * While ordinarily SQL_ASCII encoding is forgiving of high-bit-set
  * characters, here we must take a hard line because we don't know
  * the appropriate MIC equivalent.
  */
 void
 pg_ascii2mic(const unsigned char *l, unsigned char *p, int len)
 {
 	int			c1;

 	while (len > 0)
 	{
 		c1 = *l;
 		if (c1 == 0 || IS_HIGHBIT_SET(c1))
 			report_invalid_encoding(PG_SQL_ASCII, (const char *) l, len);
 		*p++ = c1;
 		l++;
 		len--;
 	}
 	*p = '\0';
 }

 /*
  * MIC ---> ASCII
  */
 void
 pg_mic2ascii(const unsigned char *mic, unsigned char *p, int len)
 {
 	int			c1;

 	while (len > 0)
 	{
 		c1 = *mic;
 		if (c1 == 0 || IS_HIGHBIT_SET(c1))
 			report_untranslatable_char(PG_MULE_INTERNAL, PG_SQL_ASCII,
 									   (const char *) mic, len);
 		*p++ = c1;
 		mic++;
 		len--;
 	}
 	*p = '\0';
 }

 /*
  * latin2mic_with_table: a generic single byte charset encoding
  * conversion from a local charset to the mule internal code.
  *
  * l points to the source string of length len
  * p is the output area (must be large enough!)
  * lc is the mule character set id for the local encoding
  * encoding is the PG identifier for the local encoding
  * tab holds conversion entries for the local charset
  * starting from 128 (0x80). each entry in the table
  * holds the corresponding code point for the mule internal code.
  */
 void
 latin2mic_with_table(const unsigned char *l,
 					 unsigned char *p,
 					 int len,
 					 int lc,
 					 int encoding,
 					 const unsigned char *tab)
 {
 	unsigned char c1,
 				c2;

 	while (len > 0)
 	{
 		c1 = *l;
 		if (c1 == 0)
 			report_invalid_encoding(encoding, (const char *) l, len);
 		if (!IS_HIGHBIT_SET(c1))
 			*p++ = c1;
 		else
 		{
 			c2 = tab[c1 - HIGHBIT];
 			if (c2)
 			{
 				*p++ = lc;
 				*p++ = c2;
 			}
 			else
 				report_untranslatable_char(encoding, PG_MULE_INTERNAL,
 										   (const char *) l, len);
 		}
 		l++;
 		len--;
 	}
 	*p = '\0';
 }

 /*
  * mic2latin_with_table: a generic single byte charset encoding
  * conversion from the mule internal code to a local charset.
  *
  * mic points to the source string of length len
  * p is the output area (must be large enough!)
  * lc is the mule character set id for the local encoding
  * encoding is the PG identifier for the local encoding
  * tab holds conversion entries for the mule internal code's
  * second byte, starting from 128 (0x80). each entry in the table
  * holds the corresponding code point for the local charset.
  */
 void
 mic2latin_with_table(const unsigned char *mic,
 					 unsigned char *p,
 					 int len,
 					 int lc,
 					 int encoding,
 					 const unsigned char *tab)
 {
 	unsigned char c1,
 				c2;

 	while (len > 0)
 	{
 		c1 = *mic;
 		if (c1 == 0)
 			report_invalid_encoding(PG_MULE_INTERNAL, (const char *) mic, len);
 		if (!IS_HIGHBIT_SET(c1))
 		{
 			/* easy for ASCII */
 			*p++ = c1;
 			mic++;
 			len--;
 		}
 		else
 		{
 			int			l = pg_mic_mblen(mic);

 			if (len < l)
 				report_invalid_encoding(PG_MULE_INTERNAL, (const char *) mic,
 										len);
 			if (l != 2 || c1 != lc || !IS_HIGHBIT_SET(mic[1]) ||
 				(c2 = tab[mic[1] - HIGHBIT]) == 0)
 			{
 				report_untranslatable_char(PG_MULE_INTERNAL, encoding,
 										   (const char *) mic, len);
 				break;			/* keep compiler quiet */
 			}
 			*p++ = c2;
 			mic += 2;
 			len -= 2;
 		}
 	}
 	*p = '\0';
 }

 /*
  * comparison routine for bsearch()
  * this routine is intended for UTF8 -> local code
  */
 static int
 compare1(const void *p1, const void *p2)
 {
 	uint32		v1,
 				v2;

 	v1 = *(uint32 *) p1;
 	v2 = ((pg_utf_to_local *) p2)->utf;
 	return (v1 > v2) ? 1 : ((v1 == v2) ? 0 : -1);
 }

 /*
  * comparison routine for bsearch()
  * this routine is intended for local code -> UTF8
  */
 static int
 compare2(const void *p1, const void *p2)
 {
 	uint32		v1,
 				v2;

 	v1 = *(uint32 *) p1;
 	v2 = ((pg_local_to_utf *) p2)->code;
 	return (v1 > v2) ? 1 : ((v1 == v2) ? 0 : -1);
 }

 /*
  * comparison routine for bsearch()
  * this routine is intended for combined UTF8 -> local code
  */
 static int
 compare3(const void *p1, const void *p2)
 {
 	uint32		s1,
 				s2,
 				d1,
 				d2;

 	s1 = *(uint32 *) p1;
 	s2 = *((uint32 *) p1 + 1);
 	d1 = ((pg_utf_to_local_combined *) p2)->utf1;
 	d2 = ((pg_utf_to_local_combined *) p2)->utf2;
 	return (s1 > d1 || (s1 == d1 && s2 > d2)) ? 1 : ((s1 == d1 && s2 == d2) ? 0 : -1);
 }

 /*
  * comparison routine for bsearch()
  * this routine is intended for local code -> combined UTF8
  */
 static int
 compare4(const void *p1, const void *p2)
 {
 	uint32		v1,
 				v2;

 	v1 = *(uint32 *) p1;
 	v2 = ((pg_local_to_utf_combined *) p2)->code;
 	return (v1 > v2) ? 1 : ((v1 == v2) ? 0 : -1);
 }

 /*
  * convert 32bit wide character to mutibye stream pointed to by iso
  */
 static unsigned char *
 set_iso_code(unsigned char *iso, uint32 code)
 {
 	if (code & 0xff000000)
 		*iso++ = code >> 24;
 	if (code & 0x00ff0000)
 		*iso++ = (code & 0x00ff0000) >> 16;
 	if (code & 0x0000ff00)
 		*iso++ = (code & 0x0000ff00) >> 8;
 	if (code & 0x000000ff)
 		*iso++ = code & 0x000000ff;
 	return iso;
 }

 /*
  * UTF8 ---> local code
  *
  * utf: input UTF8 string (need not be null-terminated).
  * iso: pointer to the output area (must be large enough!)
  * map: the conversion map.
  * cmap: the conversion map for combined characters.
  *		  (optional)
  * size1: the size of the conversion map.
  * size2: the size of the conversion map for combined characters
  *		  (optional)
  * encoding: the PG identifier for the local encoding.
  * len: length of input string.
  */
 void
 UtfToLocal(const unsigned char *utf, unsigned char *iso,
 		   const pg_utf_to_local *map, const pg_utf_to_local_combined *cmap,
 		   int size1, int size2, int encoding, int len)
 {
 	uint32		iutf;
 	uint32		cutf[2];
 	uint32		code;
 	pg_utf_to_local *p;
 	pg_utf_to_local_combined *cp;
 	int			l;

 	for (; len > 0; len -= l)
 	{
 		/* "break" cases all represent errors */
 		if (*utf == '\0')
 			break;

 		l = pg_utf_mblen(utf);

 		if (len < l)
 			break;

 		if (!pg_utf8_islegal(utf, l))
 			break;

 		if (l == 1)
 		{
 			/* ASCII case is easy */
 			*iso++ = *utf++;
 			continue;
 		}
 		else if (l == 2)
 		{
 			iutf = *utf++ << 8;
 			iutf |= *utf++;
 		}
 		else if (l == 3)
 		{
 			iutf = *utf++ << 16;
 			iutf |= *utf++ << 8;
 			iutf |= *utf++;
 		}
 		else if (l == 4)
 		{
 			iutf = *utf++ << 24;
 			iutf |= *utf++ << 16;
 			iutf |= *utf++ << 8;
 			iutf |= *utf++;
 		}

 		/*
 		 * first, try with combined map if possible
 		 */
 		if (cmap && len > l)
 		{
 			const unsigned char *utf_save = utf;
 			int			len_save = len;
 			int			l_save = l;

 			len -= l;

 			l = pg_utf_mblen(utf);
 			if (len < l)
 				break;

 			if (!pg_utf8_islegal(utf, l))
 				break;

 			cutf[0] = iutf;

 			if (l == 1)
 			{
 				if (len_save > 1)
 				{
 					p = bsearch(&cutf[0], map, size1,
 								sizeof(pg_utf_to_local), compare1);
 					if (p == NULL)
 						report_untranslatable_char(PG_UTF8, encoding,
 							   (const char *) (utf_save - l_save), len_save);
 					iso = set_iso_code(iso, p->code);
 				}

 				/* ASCII case is easy */
 				*iso++ = *utf++;
 				continue;
 			}
 			else if (l == 2)
 			{
 				iutf = *utf++ << 8;
 				iutf |= *utf++;
 			}
 			else if (l == 3)
 			{
 				iutf = *utf++ << 16;
 				iutf |= *utf++ << 8;
 				iutf |= *utf++;
 			}
 			else if (l == 4)
 			{
 				iutf = *utf++ << 24;
 				iutf |= *utf++ << 16;
 				iutf |= *utf++ << 8;
 				iutf |= *utf++;
 			}

 			cutf[1] = iutf;
 			cp = bsearch(cutf, cmap, size2,
 						 sizeof(pg_utf_to_local_combined), compare3);
 			if (cp)
 				code = cp->code;
 			else
 			{
 				/* not found in combined map. try with ordinary map */
 				p = bsearch(&cutf[0], map, size1,
 							sizeof(pg_utf_to_local), compare1);
 				if (p == NULL)
 					report_untranslatable_char(PG_UTF8, encoding,
 							   (const char *) (utf_save - l_save), len_save);
 				iso = set_iso_code(iso, p->code);

 				p = bsearch(&cutf[1], map, size1,
 							sizeof(pg_utf_to_local), compare1);
 				if (p == NULL)
 					report_untranslatable_char(PG_UTF8, encoding,
 											   (const char *) (utf - l), len);
 				code = p->code;
 			}
 		}
 		else	/* no cmap or no remaining data */
 		{
 			p = bsearch(&iutf, map, size1,
 						sizeof(pg_utf_to_local), compare1);
 			if (p == NULL)
 				report_untranslatable_char(PG_UTF8, encoding,
 										   (const char *) (utf - l), len);
 			code = p->code;
 		}
 		iso = set_iso_code(iso, code);
 	}

 	if (len > 0)
 		report_invalid_encoding(PG_UTF8, (const char *) utf, len);

 	*iso = '\0';
 }

 /*
  * local code ---> UTF8
  *
  * iso: input local string (need not be null-terminated).
  * utf: pointer to the output area (must be large enough!)
  * map: the conversion map.
  * cmap: the conversion map for combined characters.
  *		  (optional)
  * size1: the size of the conversion map.
  * size2: the size of the conversion map for combined characters
  *		  (optional)
  * encoding: the PG identifier for the local encoding.
  * len: length of input string.
  */
 void
 LocalToUtf(const unsigned char *iso, unsigned char *utf,
 		   const pg_local_to_utf *map, const pg_local_to_utf_combined *cmap,
 		   int size1, int size2, int encoding, int len)
 {
 	unsigned int iiso;
 	int			l;
 	pg_local_to_utf *p;
 	pg_local_to_utf_combined *cp;

 	if (!PG_VALID_ENCODING(encoding))
 		ereport(ERROR,
 				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
 				 errmsg("invalid encoding number: %d", encoding)));

 	for (; len > 0; len -= l)
 	{
 		/* "break" cases all represent errors */
 		if (*iso == '\0')
 			break;

 		if (!IS_HIGHBIT_SET(*iso))
 		{
 			/* ASCII case is easy */
 			*utf++ = *iso++;
 			l = 1;
 			continue;
 		}

 		l = pg_encoding_verifymb(encoding, (const char *) iso, len);
 		if (l < 0)
 			break;

 		if (l == 1)
 			iiso = *iso++;
 		else if (l == 2)
 		{
 			iiso = *iso++ << 8;
 			iiso |= *iso++;
 		}
 		else if (l == 3)
 		{
 			iiso = *iso++ << 16;
 			iiso |= *iso++ << 8;
 			iiso |= *iso++;
 		}
 		else if (l == 4)
 		{
 			iiso = *iso++ << 24;
 			iiso |= *iso++ << 16;
 			iiso |= *iso++ << 8;
 			iiso |= *iso++;
 		}

 		p = bsearch(&iiso, map, size1,
 					sizeof(pg_local_to_utf), compare2);

 		if (p == NULL)
 		{
 			/*
 			 * not found in the ordinary map. if there's a combined character
 			 * map, try with it
 			 */
 			if (cmap)
 			{
 				cp = bsearch(&iiso, cmap, size2,
 							 sizeof(pg_local_to_utf_combined), compare4);

 				if (cp)
 				{
 					if (cp->utf1 & 0xff000000)
 						*utf++ = cp->utf1 >> 24;
 					if (cp->utf1 & 0x00ff0000)
 						*utf++ = (cp->utf1 & 0x00ff0000) >> 16;
 					if (cp->utf1 & 0x0000ff00)
 						*utf++ = (cp->utf1 & 0x0000ff00) >> 8;
 					if (cp->utf1 & 0x000000ff)
 						*utf++ = cp->utf1 & 0x000000ff;

 					if (cp->utf2 & 0xff000000)
 						*utf++ = cp->utf2 >> 24;
 					if (cp->utf2 & 0x00ff0000)
 						*utf++ = (cp->utf2 & 0x00ff0000) >> 16;
 					if (cp->utf2 & 0x0000ff00)
 						*utf++ = (cp->utf2 & 0x0000ff00) >> 8;
 					if (cp->utf2 & 0x000000ff)
 						*utf++ = cp->utf2 & 0x000000ff;

 					continue;
 				}
 			}

 			report_untranslatable_char(encoding, PG_UTF8,
 									   (const char *) (iso - l), len);

 		}
 		else
 		{
 			if (p->utf & 0xff000000)
 				*utf++ = p->utf >> 24;
 			if (p->utf & 0x00ff0000)
 				*utf++ = (p->utf & 0x00ff0000) >> 16;
 			if (p->utf & 0x0000ff00)
 				*utf++ = (p->utf & 0x0000ff00) >> 8;
 			if (p->utf & 0x000000ff)
 				*utf++ = p->utf & 0x000000ff;
 		}
 	}

 	if (len > 0)
 		report_invalid_encoding(encoding, (const char *) iso, len);

 	*utf = '\0';
 }
	/*-------------------------------------------------------------------------
	*
	* Utility functions for conversion procs.
	*
	* Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group
	* Portions Copyright (c) 1994, Regents of the University of California
	*
	* IDENTIFICATION
	* $PostgreSQL: pgsql/src/backend/utils/mb/conv.c,v 1.68 2010/01/02 16:57:56 momjian Exp $
	*
	*-------------------------------------------------------------------------
	*/
	#include "postgres.h"
	#include "mb/pg_wchar.h"


	/*
	* LATINn ---> MIC when the charset's local codes map directly to MIC
	*
	* l points to the source string of length len
	* p is the output area (must be large enough!)
	* lc is the mule character set id for the local encoding
	* encoding is the PG identifier for the local encoding
	*/
	void
	latin2mic(const unsigned char l, unsigned char p, int len,
	int lc, int encoding)
	{
	int c1;

	while (len > 0)
	{
	c1 = *l;
	if (c1 == 0)
	report_invalid_encoding(encoding, (const char *) l, len);
	if (IS_HIGHBIT_SET(c1))
	*p++ = lc;
	*p++ = c1;
	l++;
	len--;
	}
	*p = '\0';
	}

	/*
	* MIC ---> LATINn when the charset's local codes map directly to MIC
	*
	* mic points to the source string of length len
	* p is the output area (must be large enough!)
	* lc is the mule character set id for the local encoding
	* encoding is the PG identifier for the local encoding
	*/
	void
	mic2latin(const unsigned char mic, unsigned char p, int len,
	int lc, int encoding)
	{
	int c1;

	while (len > 0)
	{
	c1 = *mic;
	if (c1 == 0)
	report_invalid_encoding(PG_MULE_INTERNAL, (const char *) mic, len);
	if (!IS_HIGHBIT_SET(c1))
	{
	/* easy for ASCII */
	*p++ = c1;
	mic++;
	len--;
	}
	else
	{
	int l = pg_mic_mblen(mic);

	if (len < l)
	report_invalid_encoding(PG_MULE_INTERNAL, (const char *) mic,
	len);
	if (l != 2 \|\| c1 != lc \|\| !IS_HIGHBIT_SET(mic[1]))
	report_untranslatable_char(PG_MULE_INTERNAL, encoding,
	(const char *) mic, len);
	*p++ = mic[1];
	mic += 2;
	len -= 2;
	}
	}
	*p = '\0';
	}


	/*
	* ASCII ---> MIC
	*
	* While ordinarily SQL_ASCII encoding is forgiving of high-bit-set
	* characters, here we must take a hard line because we don't know
	* the appropriate MIC equivalent.
	*/
	void
	pg_ascii2mic(const unsigned char l, unsigned char p, int len)
	{
	int c1;

	while (len > 0)
	{
	c1 = *l;
	if (c1 == 0 \|\| IS_HIGHBIT_SET(c1))
	report_invalid_encoding(PG_SQL_ASCII, (const char *) l, len);
	*p++ = c1;
	l++;
	len--;
	}
	*p = '\0';
	}

	/*
	* MIC ---> ASCII
	*/
	void
	pg_mic2ascii(const unsigned char mic, unsigned char p, int len)
	{
	int c1;

	while (len > 0)
	{
	c1 = *mic;
	if (c1 == 0 \|\| IS_HIGHBIT_SET(c1))
	report_untranslatable_char(PG_MULE_INTERNAL, PG_SQL_ASCII,
	(const char *) mic, len);
	*p++ = c1;
	mic++;
	len--;
	}
	*p = '\0';
	}

	/*
	* latin2mic_with_table: a generic single byte charset encoding
	* conversion from a local charset to the mule internal code.
	*
	* l points to the source string of length len
	* p is the output area (must be large enough!)
	* lc is the mule character set id for the local encoding
	* encoding is the PG identifier for the local encoding
	* tab holds conversion entries for the local charset
	* starting from 128 (0x80). each entry in the table
	* holds the corresponding code point for the mule internal code.
	*/
	void
	latin2mic_with_table(const unsigned char *l,
	unsigned char *p,
	int len,
	int lc,
	int encoding,
	const unsigned char *tab)
	{
	unsigned char c1,
	c2;

	while (len > 0)
	{
	c1 = *l;
	if (c1 == 0)
	report_invalid_encoding(encoding, (const char *) l, len);
	if (!IS_HIGHBIT_SET(c1))
	*p++ = c1;
	else
	{
	c2 = tab[c1 - HIGHBIT];
	if (c2)
	{
	*p++ = lc;
	*p++ = c2;
	}
	else
	report_untranslatable_char(encoding, PG_MULE_INTERNAL,
	(const char *) l, len);
	}
	l++;
	len--;
	}
	*p = '\0';
	}

	/*
	* mic2latin_with_table: a generic single byte charset encoding
	* conversion from the mule internal code to a local charset.
	*
	* mic points to the source string of length len
	* p is the output area (must be large enough!)
	* lc is the mule character set id for the local encoding
	* encoding is the PG identifier for the local encoding
	* tab holds conversion entries for the mule internal code's
	* second byte, starting from 128 (0x80). each entry in the table
	* holds the corresponding code point for the local charset.
	*/
	void
	mic2latin_with_table(const unsigned char *mic,
	unsigned char *p,
	int len,
	int lc,
	int encoding,
	const unsigned char *tab)
	{
	unsigned char c1,
	c2;

	while (len > 0)
	{
	c1 = *mic;
	if (c1 == 0)
	report_invalid_encoding(PG_MULE_INTERNAL, (const char *) mic, len);
	if (!IS_HIGHBIT_SET(c1))
	{
	/* easy for ASCII */
	*p++ = c1;
	mic++;
	len--;
	}
	else
	{
	int l = pg_mic_mblen(mic);

	if (len < l)
	report_invalid_encoding(PG_MULE_INTERNAL, (const char *) mic,
	len);
	if (l != 2 \|\| c1 != lc \|\| !IS_HIGHBIT_SET(mic[1]) \|\|
	(c2 = tab[mic[1] - HIGHBIT]) == 0)
	{
	report_untranslatable_char(PG_MULE_INTERNAL, encoding,
	(const char *) mic, len);
	break; /* keep compiler quiet */
	}
	*p++ = c2;
	mic += 2;
	len -= 2;
	}
	}
	*p = '\0';
	}

	/*
	* comparison routine for bsearch()
	* this routine is intended for UTF8 -> local code
	*/
	static int
	compare1(const void p1, const void p2)
	{
	uint32 v1,
	v2;

	v1 = (uint32 ) p1;
	v2 = ((pg_utf_to_local *) p2)->utf;
	return (v1 > v2) ? 1 : ((v1 == v2) ? 0 : -1);
	}

	/*
	* comparison routine for bsearch()
	* this routine is intended for local code -> UTF8
	*/
	static int
	compare2(const void p1, const void p2)
	{
	uint32 v1,
	v2;

	v1 = (uint32 ) p1;
	v2 = ((pg_local_to_utf *) p2)->code;
	return (v1 > v2) ? 1 : ((v1 == v2) ? 0 : -1);
	}

	/*
	* comparison routine for bsearch()
	* this routine is intended for combined UTF8 -> local code
	*/
	static int
	compare3(const void p1, const void p2)
	{
	uint32 s1,
	s2,
	d1,
	d2;

	s1 = (uint32 ) p1;
	s2 = ((uint32 ) p1 + 1);
	d1 = ((pg_utf_to_local_combined *) p2)->utf1;
	d2 = ((pg_utf_to_local_combined *) p2)->utf2;
	return (s1 > d1 \|\| (s1 == d1 && s2 > d2)) ? 1 : ((s1 == d1 && s2 == d2) ? 0 : -1);
	}

	/*
	* comparison routine for bsearch()
	* this routine is intended for local code -> combined UTF8
	*/
	static int
	compare4(const void p1, const void p2)
	{
	uint32 v1,
	v2;

	v1 = (uint32 ) p1;
	v2 = ((pg_local_to_utf_combined *) p2)->code;
	return (v1 > v2) ? 1 : ((v1 == v2) ? 0 : -1);
	}

	/*
	* convert 32bit wide character to mutibye stream pointed to by iso
	*/
	static unsigned char *
	set_iso_code(unsigned char *iso, uint32 code)
	{
	if (code & 0xff000000)
	*iso++ = code >> 24;
	if (code & 0x00ff0000)
	*iso++ = (code & 0x00ff0000) >> 16;
	if (code & 0x0000ff00)
	*iso++ = (code & 0x0000ff00) >> 8;
	if (code & 0x000000ff)
	*iso++ = code & 0x000000ff;
	return iso;
	}

	/*
	* UTF8 ---> local code
	*
	* utf: input UTF8 string (need not be null-terminated).
	* iso: pointer to the output area (must be large enough!)
	* map: the conversion map.
	* cmap: the conversion map for combined characters.
	* (optional)
	* size1: the size of the conversion map.
	* size2: the size of the conversion map for combined characters
	* (optional)
	* encoding: the PG identifier for the local encoding.
	* len: length of input string.
	*/
	void
	UtfToLocal(const unsigned char utf, unsigned char iso,
	const pg_utf_to_local map, const pg_utf_to_local_combined cmap,
	int size1, int size2, int encoding, int len)
	{
	uint32 iutf;
	uint32 cutf[2];
	uint32 code;
	pg_utf_to_local *p;
	pg_utf_to_local_combined *cp;
	int l;

	for (; len > 0; len -= l)
	{
	/* "break" cases all represent errors */
	if (*utf == '\0')
	break;

	l = pg_utf_mblen(utf);

	if (len < l)
	break;

	if (!pg_utf8_islegal(utf, l))
	break;

	if (l == 1)
	{
	/* ASCII case is easy */
	iso++ = utf++;
	continue;
	}
	else if (l == 2)
	{
	iutf = *utf++ << 8;
	iutf \|= *utf++;
	}
	else if (l == 3)
	{
	iutf = *utf++ << 16;
	iutf \|= *utf++ << 8;
	iutf \|= *utf++;
	}
	else if (l == 4)
	{
	iutf = *utf++ << 24;
	iutf \|= *utf++ << 16;
	iutf \|= *utf++ << 8;
	iutf \|= *utf++;
	}

	/*
	* first, try with combined map if possible
	*/
	if (cmap && len > l)
	{
	const unsigned char *utf_save = utf;
	int len_save = len;
	int l_save = l;

	len -= l;

	l = pg_utf_mblen(utf);
	if (len < l)
	break;

	if (!pg_utf8_islegal(utf, l))
	break;

	cutf[0] = iutf;

	if (l == 1)
	{
	if (len_save > 1)
	{
	p = bsearch(&cutf[0], map, size1,
	sizeof(pg_utf_to_local), compare1);
	if (p == NULL)
	report_untranslatable_char(PG_UTF8, encoding,
	(const char *) (utf_save - l_save), len_save);
	iso = set_iso_code(iso, p->code);
	}

	/* ASCII case is easy */
	iso++ = utf++;
	continue;
	}
	else if (l == 2)
	{
	iutf = *utf++ << 8;
	iutf \|= *utf++;
	}
	else if (l == 3)
	{
	iutf = *utf++ << 16;
	iutf \|= *utf++ << 8;
	iutf \|= *utf++;
	}
	else if (l == 4)
	{
	iutf = *utf++ << 24;
	iutf \|= *utf++ << 16;
	iutf \|= *utf++ << 8;
	iutf \|= *utf++;
	}

	cutf[1] = iutf;
	cp = bsearch(cutf, cmap, size2,
	sizeof(pg_utf_to_local_combined), compare3);
	if (cp)
	code = cp->code;
	else
	{
	/* not found in combined map. try with ordinary map */
	p = bsearch(&cutf[0], map, size1,
	sizeof(pg_utf_to_local), compare1);
	if (p == NULL)
	report_untranslatable_char(PG_UTF8, encoding,
	(const char *) (utf_save - l_save), len_save);
	iso = set_iso_code(iso, p->code);

	p = bsearch(&cutf[1], map, size1,
	sizeof(pg_utf_to_local), compare1);
	if (p == NULL)
	report_untranslatable_char(PG_UTF8, encoding,
	(const char *) (utf - l), len);
	code = p->code;
	}
	}
	else /* no cmap or no remaining data */
	{
	p = bsearch(&iutf, map, size1,
	sizeof(pg_utf_to_local), compare1);
	if (p == NULL)
	report_untranslatable_char(PG_UTF8, encoding,
	(const char *) (utf - l), len);
	code = p->code;
	}
	iso = set_iso_code(iso, code);
	}

	if (len > 0)
	report_invalid_encoding(PG_UTF8, (const char *) utf, len);

	*iso = '\0';
	}

	/*
	* local code ---> UTF8
	*
	* iso: input local string (need not be null-terminated).
	* utf: pointer to the output area (must be large enough!)
	* map: the conversion map.
	* cmap: the conversion map for combined characters.
	* (optional)
	* size1: the size of the conversion map.
	* size2: the size of the conversion map for combined characters
	* (optional)
	* encoding: the PG identifier for the local encoding.
	* len: length of input string.
	*/
	void
	LocalToUtf(const unsigned char iso, unsigned char utf,
	const pg_local_to_utf map, const pg_local_to_utf_combined cmap,
	int size1, int size2, int encoding, int len)
	{
	unsigned int iiso;
	int l;
	pg_local_to_utf *p;
	pg_local_to_utf_combined *cp;

	if (!PG_VALID_ENCODING(encoding))
	ereport(ERROR,
	(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
	errmsg("invalid encoding number: %d", encoding)));

	for (; len > 0; len -= l)
	{
	/* "break" cases all represent errors */
	if (*iso == '\0')
	break;

	if (!IS_HIGHBIT_SET(*iso))
	{
	/* ASCII case is easy */
	utf++ = iso++;
	l = 1;
	continue;
	}

	l = pg_encoding_verifymb(encoding, (const char *) iso, len);
	if (l < 0)
	break;

	if (l == 1)
	iiso = *iso++;
	else if (l == 2)
	{
	iiso = *iso++ << 8;
	iiso \|= *iso++;
	}
	else if (l == 3)
	{
	iiso = *iso++ << 16;
	iiso \|= *iso++ << 8;
	iiso \|= *iso++;
	}
	else if (l == 4)
	{
	iiso = *iso++ << 24;
	iiso \|= *iso++ << 16;
	iiso \|= *iso++ << 8;
	iiso \|= *iso++;
	}

	p = bsearch(&iiso, map, size1,
	sizeof(pg_local_to_utf), compare2);

	if (p == NULL)
	{
	/*
	* not found in the ordinary map. if there's a combined character
	* map, try with it
	*/
	if (cmap)
	{
	cp = bsearch(&iiso, cmap, size2,
	sizeof(pg_local_to_utf_combined), compare4);

	if (cp)
	{
	if (cp->utf1 & 0xff000000)
	*utf++ = cp->utf1 >> 24;
	if (cp->utf1 & 0x00ff0000)
	*utf++ = (cp->utf1 & 0x00ff0000) >> 16;
	if (cp->utf1 & 0x0000ff00)
	*utf++ = (cp->utf1 & 0x0000ff00) >> 8;
	if (cp->utf1 & 0x000000ff)
	*utf++ = cp->utf1 & 0x000000ff;

	if (cp->utf2 & 0xff000000)
	*utf++ = cp->utf2 >> 24;
	if (cp->utf2 & 0x00ff0000)
	*utf++ = (cp->utf2 & 0x00ff0000) >> 16;
	if (cp->utf2 & 0x0000ff00)
	*utf++ = (cp->utf2 & 0x0000ff00) >> 8;
	if (cp->utf2 & 0x000000ff)
	*utf++ = cp->utf2 & 0x000000ff;

	continue;
	}
	}

	report_untranslatable_char(encoding, PG_UTF8,
	(const char *) (iso - l), len);

	}
	else
	{
	if (p->utf & 0xff000000)
	*utf++ = p->utf >> 24;
	if (p->utf & 0x00ff0000)
	*utf++ = (p->utf & 0x00ff0000) >> 16;
	if (p->utf & 0x0000ff00)
	*utf++ = (p->utf & 0x0000ff00) >> 8;
	if (p->utf & 0x000000ff)
	*utf++ = p->utf & 0x000000ff;
	}
	}

	if (len > 0)
	report_invalid_encoding(encoding, (const char *) iso, len);

	*utf = '\0';
	}