src/backend/utils/mb/mbutils.c - cloudberry - Git at Google

 /*-------------------------------------------------------------------------
  *
  * mbutils.c
  *	  This file contains functions for encoding conversion.
  *
  * The string-conversion functions in this file share some API quirks.
  * Note the following:
  *
  * The functions return a palloc'd, null-terminated string if conversion
  * is required.  However, if no conversion is performed, the given source
  * string pointer is returned as-is.
  *
  * Although the presence of a length argument means that callers can pass
  * non-null-terminated strings, care is required because the same string
  * will be passed back if no conversion occurs.  Such callers *must* check
  * whether result == src and handle that case differently.
  *
  * If the source and destination encodings are the same, the source string
  * is returned without any verification; it's assumed to be valid data.
  * If that might not be the case, the caller is responsible for validating
  * the string using a separate call to pg_verify_mbstr().  Whenever the
  * source and destination encodings are different, the functions ensure that
  * the result is validly encoded according to the destination encoding.
  *
  *
  * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  *
  * IDENTIFICATION
  *	  src/backend/utils/mb/mbutils.c
  *
  *-------------------------------------------------------------------------
  */
 #include "postgres.h"

 #include "access/xact.h"
 #include "catalog/namespace.h"
 #include "mb/pg_wchar.h"
 #include "utils/builtins.h"
 #include "utils/memutils.h"
 #include "utils/syscache.h"
 #include "common/mdb_locale.h"

 /*
  * We maintain a simple linked list caching the fmgr lookup info for the
  * currently selected conversion functions, as well as any that have been
  * selected previously in the current session.  (We remember previous
  * settings because we must be able to restore a previous setting during
  * transaction rollback, without doing any fresh catalog accesses.)
  *
  * Since we'll never release this data, we just keep it in TopMemoryContext.
  */
 typedef struct ConvProcInfo
 {
 	int			s_encoding;		/* server and client encoding IDs */
 	int			c_encoding;
 	FmgrInfo	to_server_info; /* lookup info for conversion procs */
 	FmgrInfo	to_client_info;
 } ConvProcInfo;

 static List *ConvProcList = NIL;	/* List of ConvProcInfo */

 /*
  * These variables point to the currently active conversion functions,
  * or are NULL when no conversion is needed.
  */
 static FmgrInfo *ToServerConvProc = NULL;
 static FmgrInfo *ToClientConvProc = NULL;

 /*
  * This variable stores the conversion function to convert from UTF-8
  * to the server encoding.  It's NULL if the server encoding *is* UTF-8,
  * or if we lack a conversion function for this.
  */
 static FmgrInfo *Utf8ToServerConvProc = NULL;

 /*
  * These variables track the currently-selected encodings.
  */
 static const pg_enc2name *ClientEncoding = &pg_enc2name_tbl[PG_SQL_ASCII];
 static const pg_enc2name *DatabaseEncoding = &pg_enc2name_tbl[PG_SQL_ASCII];
 static const pg_enc2name *MessageEncoding = &pg_enc2name_tbl[PG_SQL_ASCII];

 /*
  * During backend startup we can't set client encoding because we (a)
  * can't look up the conversion functions, and (b) may not know the database
  * encoding yet either.  So SetClientEncoding() just accepts anything and
  * remembers it for InitializeClientEncoding() to apply later.
  */
 static bool backend_startup_complete = false;
 static int	pending_client_encoding = PG_SQL_ASCII;


 /* Internal functions */
 static char *
 perform_default_encoding_conversion(const char *src, int len, bool is_client_to_server,
 									int custom_client_encoding,
 									FmgrInfo *custom_encoding_proc);
 static int	cliplen(const char *str, int len, int limit);


 /*
  * Prepare for a future call to SetClientEncoding.  Success should mean
  * that SetClientEncoding is guaranteed to succeed for this encoding request.
  *
  * (But note that success before backend_startup_complete does not guarantee
  * success after ...)
  *
  * Returns 0 if okay, -1 if not (bad encoding or can't support conversion)
  */
 int
 PrepareClientEncoding(int encoding)
 {
 	int			current_server_encoding;
 	ListCell   *lc;

 	if (!PG_VALID_FE_ENCODING(encoding))
 		return -1;

 	/* Can't do anything during startup, per notes above */
 	if (!backend_startup_complete)
 		return 0;

 	current_server_encoding = GetDatabaseEncoding();

 	/*
 	 * Check for cases that require no conversion function.
 	 */
 	if (current_server_encoding == encoding ||
 		current_server_encoding == PG_SQL_ASCII ||
 		encoding == PG_SQL_ASCII)
 		return 0;

 	if (IsTransactionState())
 	{
 		/*
 		 * If we're in a live transaction, it's safe to access the catalogs,
 		 * so look up the functions.  We repeat the lookup even if the info is
 		 * already cached, so that we can react to changes in the contents of
 		 * pg_conversion.
 		 */
 		Oid			to_server_proc,
 					to_client_proc;
 		ConvProcInfo *convinfo;
 		MemoryContext oldcontext;

 		to_server_proc = FindDefaultConversionProc(encoding,
 												   current_server_encoding);
 		if (!OidIsValid(to_server_proc))
 			return -1;
 		to_client_proc = FindDefaultConversionProc(current_server_encoding,
 												   encoding);
 		if (!OidIsValid(to_client_proc))
 			return -1;

 		/*
 		 * Load the fmgr info into TopMemoryContext (could still fail here)
 		 */
 		convinfo = (ConvProcInfo *) MemoryContextAlloc(TopMemoryContext,
 													   sizeof(ConvProcInfo));
 		convinfo->s_encoding = current_server_encoding;
 		convinfo->c_encoding = encoding;
 		fmgr_info_cxt(to_server_proc, &convinfo->to_server_info,
 					  TopMemoryContext);
 		fmgr_info_cxt(to_client_proc, &convinfo->to_client_info,
 					  TopMemoryContext);

 		/* Attach new info to head of list */
 		oldcontext = MemoryContextSwitchTo(TopMemoryContext);
 		ConvProcList = lcons(convinfo, ConvProcList);
 		MemoryContextSwitchTo(oldcontext);

 		/*
 		 * We cannot yet remove any older entry for the same encoding pair,
 		 * since it could still be in use.  SetClientEncoding will clean up.
 		 */

 		return 0;				/* success */
 	}
 	else
 	{
 		/*
 		 * If we're not in a live transaction, the only thing we can do is
 		 * restore a previous setting using the cache.  This covers all
 		 * transaction-rollback cases.  The only case it might not work for is
 		 * trying to change client_encoding on the fly by editing
 		 * postgresql.conf and SIGHUP'ing.  Which would probably be a stupid
 		 * thing to do anyway.
 		 */
 		foreach(lc, ConvProcList)
 		{
 			ConvProcInfo *oldinfo = (ConvProcInfo *) lfirst(lc);

 			if (oldinfo->s_encoding == current_server_encoding &&
 				oldinfo->c_encoding == encoding)
 				return 0;
 		}

 		return -1;				/* it's not cached, so fail */
 	}
 }

 /*
  * Set the active client encoding and set up the conversion-function pointers.
  * PrepareClientEncoding should have been called previously for this encoding.
  *
  * Returns 0 if okay, -1 if not (bad encoding or can't support conversion)
  */
 int
 SetClientEncoding(int encoding)
 {
 	int			current_server_encoding;
 	bool		found;
 	ListCell   *lc;

 	if (!PG_VALID_FE_ENCODING(encoding))
 		return -1;

 	/* Can't do anything during startup, per notes above */
 	if (!backend_startup_complete)
 	{
 		pending_client_encoding = encoding;
 		return 0;
 	}

 	current_server_encoding = GetDatabaseEncoding();

 	/*
 	 * Check for cases that require no conversion function.
 	 */
 	if (current_server_encoding == encoding ||
 		current_server_encoding == PG_SQL_ASCII ||
 		encoding == PG_SQL_ASCII)
 	{
 		ClientEncoding = &pg_enc2name_tbl[encoding];
 		ToServerConvProc = NULL;
 		ToClientConvProc = NULL;
 		return 0;
 	}

 	/*
 	 * Search the cache for the entry previously prepared by
 	 * PrepareClientEncoding; if there isn't one, we lose.  While at it,
 	 * release any duplicate entries so that repeated Prepare/Set cycles don't
 	 * leak memory.
 	 */
 	found = false;
 	foreach(lc, ConvProcList)
 	{
 		ConvProcInfo *convinfo = (ConvProcInfo *) lfirst(lc);

 		if (convinfo->s_encoding == current_server_encoding &&
 			convinfo->c_encoding == encoding)
 		{
 			if (!found)
 			{
 				/* Found newest entry, so set up */
 				ClientEncoding = &pg_enc2name_tbl[encoding];
 				ToServerConvProc = &convinfo->to_server_info;
 				ToClientConvProc = &convinfo->to_client_info;
 				found = true;
 			}
 			else
 			{
 				/* Duplicate entry, release it */
 				ConvProcList = foreach_delete_current(ConvProcList, lc);
 				pfree(convinfo);
 			}
 		}
 	}

 	if (found)
 		return 0;				/* success */
 	else
 		return -1;				/* it's not cached, so fail */
 }

 /*
  * Initialize client encoding conversions.
  *		Called from InitPostgres() once during backend startup.
  */
 void
 InitializeClientEncoding(void)
 {
 	int			current_server_encoding;

 	Assert(!backend_startup_complete);
 	backend_startup_complete = true;

 	if (PrepareClientEncoding(pending_client_encoding) < 0 ||
 		SetClientEncoding(pending_client_encoding) < 0)
 	{
 		/*
 		 * Oops, the requested conversion is not available. We couldn't fail
 		 * before, but we can now.
 		 */
 		ereport(FATAL,
 				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
 				 errmsg("conversion between %s and %s is not supported",
 						pg_enc2name_tbl[pending_client_encoding].name,
 						GetDatabaseEncodingName())));
 	}

 	/*
 	 * Also look up the UTF8-to-server conversion function if needed.  Since
 	 * the server encoding is fixed within any one backend process, we don't
 	 * have to do this more than once.
 	 */
 	current_server_encoding = GetDatabaseEncoding();
 	if (current_server_encoding != PG_UTF8 &&
 		current_server_encoding != PG_SQL_ASCII)
 	{
 		Oid			utf8_to_server_proc;

 		Assert(IsTransactionState());
 		utf8_to_server_proc =
 			FindDefaultConversionProc(PG_UTF8,
 									  current_server_encoding);
 		/* If there's no such conversion, just leave the pointer as NULL */
 		if (OidIsValid(utf8_to_server_proc))
 		{
 			FmgrInfo   *finfo;

 			finfo = (FmgrInfo *) MemoryContextAlloc(TopMemoryContext,
 													sizeof(FmgrInfo));
 			fmgr_info_cxt(utf8_to_server_proc, finfo,
 						  TopMemoryContext);
 			/* Set Utf8ToServerConvProc only after data is fully valid */
 			Utf8ToServerConvProc = finfo;
 		}
 	}
 }

 /*
  * returns the current client encoding
  */
 int
 pg_get_client_encoding(void)
 {
 	return ClientEncoding->encoding;
 }

 /*
  * returns the current client encoding name
  */
 const char *
 pg_get_client_encoding_name(void)
 {
 	return ClientEncoding->name;
 }

 /*
  * Convert src string to another encoding (general case).
  *
  * See the notes about string conversion functions at the top of this file.
  */
 unsigned char *
 pg_do_encoding_conversion(unsigned char *src, int len,
 						  int src_encoding, int dest_encoding)
 {
 	unsigned char *result;
 	Oid			proc;

 	if (len <= 0)
 		return src;				/* empty string is always valid */

 	if (src_encoding == dest_encoding)
 		return src;				/* no conversion required, assume valid */

 	if (dest_encoding == PG_SQL_ASCII)
 		return src;				/* any string is valid in SQL_ASCII */

 	if (src_encoding == PG_SQL_ASCII)
 	{
 		/* No conversion is possible, but we must validate the result */
 		(void) pg_verify_mbstr(dest_encoding, (const char *) src, len, false);
 		return src;
 	}

 	if (!IsTransactionState())	/* shouldn't happen */
 		elog(ERROR, "cannot perform encoding conversion outside a transaction");

 	proc = FindDefaultConversionProc(src_encoding, dest_encoding);
 	if (!OidIsValid(proc))
 		ereport(ERROR,
 				(errcode(ERRCODE_UNDEFINED_FUNCTION),
 				 errmsg("default conversion function for encoding \"%s\" to \"%s\" does not exist",
 						pg_encoding_to_char(src_encoding),
 						pg_encoding_to_char(dest_encoding))));

 	/*
 	 * Allocate space for conversion result, being wary of integer overflow.
 	 *
 	 * len * MAX_CONVERSION_GROWTH is typically a vast overestimate of the
 	 * required space, so it might exceed MaxAllocSize even though the result
 	 * would actually fit.  We do not want to hand back a result string that
 	 * exceeds MaxAllocSize, because callers might not cope gracefully --- but
 	 * if we just allocate more than that, and don't use it, that's fine.
 	 */
 	if ((Size) len >= (MaxAllocHugeSize / (Size) MAX_CONVERSION_GROWTH))
 		ereport(ERROR,
 				(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
 				 errmsg("out of memory"),
 				 errdetail("String of %d bytes is too long for encoding conversion.",
 						   len)));

 	result = (unsigned char *)
 		MemoryContextAllocHuge(CurrentMemoryContext,
 							   (Size) len * MAX_CONVERSION_GROWTH + 1);

 	(void) OidFunctionCall6(proc,
 							Int32GetDatum(src_encoding),
 							Int32GetDatum(dest_encoding),
 							CStringGetDatum((char *)src),
 							CStringGetDatum((char *)result),
 							Int32GetDatum(len),
 							BoolGetDatum(false));

 	/*
 	 * If the result is large, it's worth repalloc'ing to release any extra
 	 * space we asked for.  The cutoff here is somewhat arbitrary, but we
 	 * *must* check when len * MAX_CONVERSION_GROWTH exceeds MaxAllocSize.
 	 */
 	if (len > 1000000)
 	{
 		Size		resultlen = strlen((char *) result);

 		if (resultlen >= MaxAllocSize)
 			ereport(ERROR,
 					(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
 					 errmsg("out of memory"),
 					 errdetail("String of %d bytes is too long for encoding conversion.",
 							   len)));

 		result = (unsigned char *) repalloc(result, resultlen + 1);
 	}

 	return result;
 }

 /*
  * Convert src string to another encoding.
  *
  * This function has a different API than the other conversion functions.
  * The caller should've looked up the conversion function using
  * FindDefaultConversionProc().  Unlike the other functions, the converted
  * result is not palloc'd.  It is written to the caller-supplied buffer
  * instead.
  *
  * src_encoding   - encoding to convert from
  * dest_encoding  - encoding to convert to
  * src, srclen    - input buffer and its length in bytes
  * dest, destlen  - destination buffer and its size in bytes
  *
  * The output is null-terminated.
  *
  * If destlen < srclen * MAX_CONVERSION_LENGTH + 1, the converted output
  * wouldn't necessarily fit in the output buffer, and the function will not
  * convert the whole input.
  *
  * TODO: The conversion function interface is not great.  Firstly, it
  * would be nice to pass through the destination buffer size to the
  * conversion function, so that if you pass a shorter destination buffer, it
  * could still continue to fill up the whole buffer.  Currently, we have to
  * assume worst case expansion and stop the conversion short, even if there
  * is in fact space left in the destination buffer.  Secondly, it would be
  * nice to return the number of bytes written to the caller, to avoid a call
  * to strlen().
  */
 int
 pg_do_encoding_conversion_buf(Oid proc,
 							  int src_encoding,
 							  int dest_encoding,
 							  unsigned char *src, int srclen,
 							  unsigned char *dest, int destlen,
 							  bool noError)
 {
 	Datum		result;

 	/*
 	 * If the destination buffer is not large enough to hold the result in the
 	 * worst case, limit the input size passed to the conversion function.
 	 */
 	if ((Size) srclen >= ((destlen - 1) / (Size) MAX_CONVERSION_GROWTH))
 		srclen = ((destlen - 1) / (Size) MAX_CONVERSION_GROWTH);

 	result = OidFunctionCall6(proc,
 							  Int32GetDatum(src_encoding),
 							  Int32GetDatum(dest_encoding),
 							  CStringGetDatum((char *)src),
 							  CStringGetDatum((char *)dest),
 							  Int32GetDatum(srclen),
 							  BoolGetDatum(noError));
 	return DatumGetInt32(result);
 }

 /*
  * Convert string to encoding encoding_name. The source
  * encoding is the DB encoding.
  *
  * BYTEA convert_to(TEXT string, NAME encoding_name) */
 Datum
 pg_convert_to(PG_FUNCTION_ARGS)
 {
 	Datum		string = PG_GETARG_DATUM(0);
 	Datum		dest_encoding_name = PG_GETARG_DATUM(1);
 	Datum		src_encoding_name = DirectFunctionCall1(namein,
 														CStringGetDatum(DatabaseEncoding->name));
 	Datum		result;

 	/*
 	 * pg_convert expects a bytea as its first argument. We're passing it a
 	 * text argument here, relying on the fact that they are both in fact
 	 * varlena types, and thus structurally identical.
 	 */
 	result = DirectFunctionCall3(pg_convert, string,
 								 src_encoding_name, dest_encoding_name);

 	PG_RETURN_DATUM(result);
 }

 /*
  * Convert string from encoding encoding_name. The destination
  * encoding is the DB encoding.
  *
  * TEXT convert_from(BYTEA string, NAME encoding_name) */
 Datum
 pg_convert_from(PG_FUNCTION_ARGS)
 {
 	Datum		string = PG_GETARG_DATUM(0);
 	Datum		src_encoding_name = PG_GETARG_DATUM(1);
 	Datum		dest_encoding_name = DirectFunctionCall1(namein,
 														 CStringGetDatum(DatabaseEncoding->name));
 	Datum		result;

 	result = DirectFunctionCall3(pg_convert, string,
 								 src_encoding_name, dest_encoding_name);

 	/*
 	 * pg_convert returns a bytea, which we in turn return as text, relying on
 	 * the fact that they are both in fact varlena types, and thus
 	 * structurally identical. Although not all bytea values are valid text,
 	 * in this case it will be because we've told pg_convert to return one
 	 * that is valid as text in the current database encoding.
 	 */
 	PG_RETURN_DATUM(result);
 }

 /*
  * Convert string between two arbitrary encodings.
  *
  * BYTEA convert(BYTEA string, NAME src_encoding_name, NAME dest_encoding_name)
  */
 Datum
 pg_convert(PG_FUNCTION_ARGS)
 {
 	bytea	   *string = PG_GETARG_BYTEA_PP(0);
 	char	   *src_encoding_name = NameStr(*PG_GETARG_NAME(1));
 	int			src_encoding = pg_char_to_encoding(src_encoding_name);
 	char	   *dest_encoding_name = NameStr(*PG_GETARG_NAME(2));
 	int			dest_encoding = pg_char_to_encoding(dest_encoding_name);
 	const char *src_str;
 	char	   *dest_str;
 	bytea	   *retval;
 	int			len;

 	if (src_encoding < 0)
 		ereport(ERROR,
 				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
 				 errmsg("invalid source encoding name \"%s\"",
 						src_encoding_name)));
 	if (dest_encoding < 0)
 		ereport(ERROR,
 				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
 				 errmsg("invalid destination encoding name \"%s\"",
 						dest_encoding_name)));

 	/* make sure that source string is valid */
 	len = VARSIZE_ANY_EXHDR(string);
 	src_str = VARDATA_ANY(string);
 	(void) pg_verify_mbstr(src_encoding, src_str, len, false);

 	/* perform conversion */
 	dest_str = (char *) pg_do_encoding_conversion((unsigned char *) unconstify(char *, src_str),
 												  len,
 												  src_encoding,
 												  dest_encoding);

 	/* update len if conversion actually happened */
 	if (dest_str != src_str)
 		len = strlen(dest_str);

 	/*
 	 * build bytea data type structure.
 	 */
 	retval = (bytea *) palloc(len + VARHDRSZ);
 	SET_VARSIZE(retval, len + VARHDRSZ);
 	memcpy(VARDATA(retval), dest_str, len);

 	if (dest_str != src_str)
 		pfree(dest_str);

 	/* free memory if allocated by the toaster */
 	PG_FREE_IF_COPY(string, 0);

 	PG_RETURN_BYTEA_P(retval);
 }

 /*
  * get the length of the string considered as text in the specified
  * encoding. Raises an error if the data is not valid in that
  * encoding.
  *
  * INT4 length (BYTEA string, NAME src_encoding_name)
  */
 Datum
 length_in_encoding(PG_FUNCTION_ARGS)
 {
 	bytea	   *string = PG_GETARG_BYTEA_PP(0);
 	char	   *src_encoding_name = NameStr(*PG_GETARG_NAME(1));
 	int			src_encoding = pg_char_to_encoding(src_encoding_name);
 	const char *src_str;
 	int			len;
 	int			retval;

 	if (src_encoding < 0)
 		ereport(ERROR,
 				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
 				 errmsg("invalid encoding name \"%s\"",
 						src_encoding_name)));

 	len = VARSIZE_ANY_EXHDR(string);
 	src_str = VARDATA_ANY(string);

 	retval = pg_verify_mbstr_len(src_encoding, src_str, len, false);

 	PG_RETURN_INT32(retval);
 }

 /*
  * Get maximum multibyte character length in the specified encoding.
  *
  * Note encoding is specified numerically, not by name as above.
  */
 Datum
 pg_encoding_max_length_sql(PG_FUNCTION_ARGS)
 {
 	int			encoding = PG_GETARG_INT32(0);

 	if (PG_VALID_ENCODING(encoding))
 		PG_RETURN_INT32(pg_wchar_table[encoding].maxmblen);
 	else
 		PG_RETURN_NULL();
 }

 /*
  * Convert client encoding to server encoding.
  *
  * See the notes about string conversion functions at the top of this file.
  */
 char *
 pg_client_to_server(const char *s, int len)
 {
 	return pg_any_to_server(s, len, ClientEncoding->encoding);
 }

 /*
  * Convert any encoding to server encoding.
  *
  * See the notes about string conversion functions at the top of this file.
  *
  * Unlike the other string conversion functions, this will apply validation
  * even if encoding == DatabaseEncoding->encoding.  This is because this is
  * used to process data coming in from outside the database, and we never
  * want to just assume validity.
  */
 char *
 pg_any_to_server(const char *s, int len, int encoding)
 {
 	if (len <= 0)
 		return unconstify(char *, s);	/* empty string is always valid */

 	if (encoding == DatabaseEncoding->encoding ||
 		encoding == PG_SQL_ASCII)
 	{
 		/*
 		 * No conversion is needed, but we must still validate the data.
 		 */
 		(void) pg_verify_mbstr(DatabaseEncoding->encoding, s, len, false);
 		return unconstify(char *, s);
 	}

 	if (DatabaseEncoding->encoding == PG_SQL_ASCII)
 	{
 		/*
 		 * No conversion is possible, but we must still validate the data,
 		 * because the client-side code might have done string escaping using
 		 * the selected client_encoding.  If the client encoding is ASCII-safe
 		 * then we just do a straight validation under that encoding.  For an
 		 * ASCII-unsafe encoding we have a problem: we dare not pass such data
 		 * to the parser but we have no way to convert it.  We compromise by
 		 * rejecting the data if it contains any non-ASCII characters.
 		 */
 		if (PG_VALID_BE_ENCODING(encoding))
 			(void) pg_verify_mbstr(encoding, s, len, false);
 		else
 		{
 			int			i;

 			for (i = 0; i < len; i++)
 			{
 				if (s[i] == '\0' || IS_HIGHBIT_SET(s[i]))
 					ereport(ERROR,
 							(errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
 							 errmsg("invalid byte value for encoding \"%s\": 0x%02x",
 									pg_enc2name_tbl[PG_SQL_ASCII].name,
 									(unsigned char) s[i])));
 			}
 		}
 		return unconstify(char *, s);
 	}

 	/* Fast path if we can use cached conversion function */
 	if (encoding == ClientEncoding->encoding)
 		return perform_default_encoding_conversion(s, len, true, -1, NULL);

 	/* General case ... will not work outside transactions */
 	return (char *) pg_do_encoding_conversion((unsigned char *) unconstify(char *, s),
 											  len,
 											  encoding,
 											  DatabaseEncoding->encoding);
 }

 /*
  * Convert server encoding to client encoding.
  *
  * See the notes about string conversion functions at the top of this file.
  */
 char *
 pg_server_to_client(const char *s, int len)
 {
 	return pg_server_to_any(s, len, ClientEncoding->encoding);
 }

 /*
  * Convert server encoding to any encoding.
  *
  * See the notes about string conversion functions at the top of this file.
  */
 char *
 pg_server_to_any(const char *s, int len, int encoding)
 {
 	if (len <= 0)
 		return unconstify(char *, s);	/* empty string is always valid */

 	if (encoding == DatabaseEncoding->encoding ||
 		encoding == PG_SQL_ASCII)
 		return unconstify(char *, s);	/* assume data is valid */

 	if (DatabaseEncoding->encoding == PG_SQL_ASCII)
 	{
 		/* No conversion is possible, but we must validate the result */
 		(void) pg_verify_mbstr(encoding, s, len, false);
 		return unconstify(char *, s);
 	}

 	/* Fast path if we can use cached conversion function */
 	if (encoding == ClientEncoding->encoding)
 		return perform_default_encoding_conversion(s, len, false, -1, NULL);

 	/* General case ... will not work outside transactions */
 	return (char *) pg_do_encoding_conversion((unsigned char *) unconstify(char *, s),
 											  len,
 											  DatabaseEncoding->encoding,
 											  encoding);
 }

 /*
  *	Perform default encoding conversion using cached FmgrInfo. Since
  *	this function does not access database at all, it is safe to call
  *	outside transactions. Explicit setting client encoding required
  *	before calling this function. Otherwise no conversion is
  *	performed.
  *
  *  NOTE: this function was slightly updated to allow passing in a source
  *  encoding that is not necessarily ClientEncoding->encoding for client-to-
  *  server conversion. Default value is -1, which means: use ClientEncoding.
  *  See pg_custom_client_to_server for information.
  */
 static char *
 perform_default_encoding_conversion(const char *src, int len,
 									bool is_client_to_server,
 									int custom_client_encoding,
 									FmgrInfo *custom_encoding_proc)
 {
 	char	   *result;
 	int			src_encoding,
 				dest_encoding;
 	FmgrInfo   *flinfo;

 	if (is_client_to_server)
 	{
 		if(custom_client_encoding == -1)
 		{
 			/* this is the normal path of execution */
 			src_encoding = ClientEncoding->encoding;
 			dest_encoding = DatabaseEncoding->encoding;
 			flinfo = ToServerConvProc;
 		}
 		else
 		{
 			/* this is the custom path of execution, for external tbl encodings */
 			src_encoding = custom_client_encoding;
 			dest_encoding = DatabaseEncoding->encoding;
 			flinfo = custom_encoding_proc;
 		}
 	}
 	else
 	{
 		if(custom_client_encoding == -1)
 		{
 			/* this is the normal path of execution */
 			src_encoding = DatabaseEncoding->encoding;
 			dest_encoding = ClientEncoding->encoding;
 			flinfo = ToClientConvProc;
 		}
 		else
 		{
 			/* this is the custom path of execution, for external tbl encodings */
 			src_encoding = DatabaseEncoding->encoding;
 			dest_encoding = custom_client_encoding;
 			flinfo = custom_encoding_proc;
 		}
 	}

 	if (flinfo == NULL)
 		return unconstify(char *, src);

 	/*
 	 * Allocate space for conversion result, being wary of integer overflow.
 	 * See comments in pg_do_encoding_conversion.
 	 */
 	if ((Size) len >= (MaxAllocHugeSize / (Size) MAX_CONVERSION_GROWTH))
 		ereport(ERROR,
 				(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
 				 errmsg("out of memory"),
 				 errdetail("String of %d bytes is too long for encoding conversion.",
 						   len)));

 	result = (char *)
 		MemoryContextAllocHuge(CurrentMemoryContext,
 							   (Size) len * MAX_CONVERSION_GROWTH + 1);

 	FunctionCall6(flinfo,
 				  Int32GetDatum(src_encoding),
 				  Int32GetDatum(dest_encoding),
 				  CStringGetDatum((char *) src),
 				  CStringGetDatum(result),
 				  Int32GetDatum(len),
 				  BoolGetDatum(false));

 	/*
 	 * Release extra space if there might be a lot --- see comments in
 	 * pg_do_encoding_conversion.
 	 */
 	if (len > 1000000)
 	{
 		Size		resultlen = strlen(result);

 		if (resultlen >= MaxAllocSize)
 			ereport(ERROR,
 					(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
 					 errmsg("out of memory"),
 					 errdetail("String of %d bytes is too long for encoding conversion.",
 							   len)));

 		result = (char *) repalloc(result, resultlen + 1);
 	}

 	return result;
 }

 /*
  * Convert a single Unicode code point into a string in the server encoding.
  *
  * The code point given by "c" is converted and stored at *s, which must
  * have at least MAX_UNICODE_EQUIVALENT_STRING+1 bytes available.
  * The output will have a trailing '\0'.  Throws error if the conversion
  * cannot be performed.
  *
  * Note that this relies on having previously looked up any required
  * conversion function.  That's partly for speed but mostly because the parser
  * may call this outside any transaction, or in an aborted transaction.
  */
 void
 pg_unicode_to_server(pg_wchar c, unsigned char *s)
 {
 	unsigned char c_as_utf8[MAX_MULTIBYTE_CHAR_LEN + 1];
 	int			c_as_utf8_len;
 	int			server_encoding;

 	/*
 	 * Complain if invalid Unicode code point.  The choice of errcode here is
 	 * debatable, but really our caller should have checked this anyway.
 	 */
 	if (!is_valid_unicode_codepoint(c))
 		ereport(ERROR,
 				(errcode(ERRCODE_SYNTAX_ERROR),
 				 errmsg("invalid Unicode code point")));

 	/* Otherwise, if it's in ASCII range, conversion is trivial */
 	if (c <= 0x7F)
 	{
 		s[0] = (unsigned char) c;
 		s[1] = '\0';
 		return;
 	}

 	/* If the server encoding is UTF-8, we just need to reformat the code */
 	server_encoding = GetDatabaseEncoding();
 	if (server_encoding == PG_UTF8)
 	{
 		unicode_to_utf8(c, s);
 		s[pg_utf_mblen(s)] = '\0';
 		return;
 	}

 	/* For all other cases, we must have a conversion function available */
 	if (Utf8ToServerConvProc == NULL)
 		ereport(ERROR,
 				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
 				 errmsg("conversion between %s and %s is not supported",
 						pg_enc2name_tbl[PG_UTF8].name,
 						GetDatabaseEncodingName())));

 	/* Construct UTF-8 source string */
 	unicode_to_utf8(c, c_as_utf8);
 	c_as_utf8_len = pg_utf_mblen(c_as_utf8);
 	c_as_utf8[c_as_utf8_len] = '\0';

 	/* Convert, or throw error if we can't */
 	FunctionCall6(Utf8ToServerConvProc,
 				  Int32GetDatum(PG_UTF8),
 				  Int32GetDatum(server_encoding),
 				  CStringGetDatum((const char *) c_as_utf8),
 				  CStringGetDatum((const char *) s),
 				  Int32GetDatum(c_as_utf8_len),
 				  BoolGetDatum(false));
 }


 /*
  * pg_custom_client_to_server
  *
  * convert client encoding to server encoding, but use the passed in encodings
  * instead of the global client and server encoding variables.
  *
  * This routine is basically a slightly modified version of pg_client_to_server.
  * Instead of creating this routine a better way may have been to just call
  * pg_do_encoding_conversion(), which takes in the necessary arguments, however
  * it does not do several necessary checks that pg_client_to_server() does, and
  * altering it to have those check may break other parts of the system. Therefore
  * until there's a better idea we resort to duplicating some code.
  *
  * The reason for creating this routine is to let external tables do data
  * conversion reliably. Since each external table has an encoding attached to
  * it we'd like to just convert from that encoding to the server encoding without
  * altering the global client_encoding variable for this local database.
  */
 char *
 pg_custom_to_server(const char *s, int len, int src_encoding, void *cep)
 {
 	FmgrInfo *custom_encoding_proc = (FmgrInfo *)cep;

 	Assert(DatabaseEncoding);
 	Assert(ClientEncoding);

 	if (len <= 0)
 		return (char *) s;

 	if (src_encoding == DatabaseEncoding->encoding ||
 		src_encoding == PG_SQL_ASCII)
 	{
 		/*
 		 * No conversion is needed, but we must still validate the data.
 		 */
 		(void) pg_verify_mbstr(DatabaseEncoding->encoding, s, len, false);
 		return (char *) s;
 	}

 	if (DatabaseEncoding->encoding == PG_SQL_ASCII)
 	{
 		/*
 		 * No conversion is possible, but we must still validate the data,
 		 * because the client-side code might have done string escaping using
 		 * the selected client_encoding.  If the client encoding is ASCII-safe
 		 * then we just do a straight validation under that encoding.  For an
 		 * ASCII-unsafe encoding we have a problem: we dare not pass such data
 		 * to the parser but we have no way to convert it.	We compromise by
 		 * rejecting the data if it contains any non-ASCII characters.
 		 */
 		if (PG_VALID_BE_ENCODING(src_encoding))
 			(void) pg_verify_mbstr(src_encoding, s, len, false);
 		else
 		{
 			int			i;

 			for (i = 0; i < len; i++)
 			{
 				if (s[i] == '\0' || IS_HIGHBIT_SET(s[i]))
 					ereport(ERROR,
 							(errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
 							 errmsg("invalid byte value for encoding \"%s\": 0x%02x",
 									pg_enc2name_tbl[PG_SQL_ASCII].name,
 									(unsigned char) s[i])));
 			}
 		}
 		return (char *) s;
 	}

 	return perform_default_encoding_conversion(s, len, true, src_encoding, custom_encoding_proc);
 }

 /*
  * pg_server_to_custom
  *
  * convert server encoding to custom encoding. the reverse of pg_custom_to_server.
  * see pg_custom_to_server, and perform_default_encoding_conversion headers for
  * more information.
  */
 char *
 pg_server_to_custom(const char *s, int len, int dest_encoding, void *cep)
 {
 	FmgrInfo *custom_encoding_proc = (FmgrInfo *)cep;

 	Assert(DatabaseEncoding);

 	if (len <= 0)
 		return (char *) s;

 	if (dest_encoding == DatabaseEncoding->encoding ||
 		dest_encoding == PG_SQL_ASCII ||
 		DatabaseEncoding->encoding == PG_SQL_ASCII)
 		return (char *) s;		/* assume data is valid */

 	return perform_default_encoding_conversion(s, len, false, dest_encoding, custom_encoding_proc);
 }

 /* convert a multibyte string to a wchar */
 int
 pg_mb2wchar(const char *from, pg_wchar *to)
 {
 	return pg_wchar_table[DatabaseEncoding->encoding].mb2wchar_with_len((const unsigned char *) from, to, strlen(from));
 }

 /* convert a multibyte string to a wchar with a limited length */
 int
 pg_mb2wchar_with_len(const char *from, pg_wchar *to, int len)
 {
 	return pg_wchar_table[DatabaseEncoding->encoding].mb2wchar_with_len((const unsigned char *) from, to, len);
 }

 /* same, with any encoding */
 int
 pg_encoding_mb2wchar_with_len(int encoding,
 							  const char *from, pg_wchar *to, int len)
 {
 	return pg_wchar_table[encoding].mb2wchar_with_len((const unsigned char *) from, to, len);
 }

 /* convert a wchar string to a multibyte */
 int
 pg_wchar2mb(const pg_wchar *from, char *to)
 {
 	return pg_wchar_table[DatabaseEncoding->encoding].wchar2mb_with_len(from, (unsigned char *) to, pg_wchar_strlen(from));
 }

 /* convert a wchar string to a multibyte with a limited length */
 int
 pg_wchar2mb_with_len(const pg_wchar *from, char *to, int len)
 {
 	return pg_wchar_table[DatabaseEncoding->encoding].wchar2mb_with_len(from, (unsigned char *) to, len);
 }

 /* same, with any encoding */
 int
 pg_encoding_wchar2mb_with_len(int encoding,
 							  const pg_wchar *from, char *to, int len)
 {
 	return pg_wchar_table[encoding].wchar2mb_with_len(from, (unsigned char *) to, len);
 }

 /* returns the byte length of a multibyte character */
 int
 pg_mblen(const char *mbstr)
 {
 	return pg_wchar_table[DatabaseEncoding->encoding].mblen((const unsigned char *) mbstr);
 }

 /* returns the display length of a multibyte character */
 int
 pg_dsplen(const char *mbstr)
 {
 	return pg_wchar_table[DatabaseEncoding->encoding].dsplen((const unsigned char *) mbstr);
 }

 /* returns the length (counted in wchars) of a multibyte string */
 int
 pg_mbstrlen(const char *mbstr)
 {
 	int			len = 0;

 	/* optimization for single byte encoding */
 	if (pg_database_encoding_max_length() == 1)
 		return strlen(mbstr);

 	while (*mbstr)
 	{
 		mbstr += pg_mblen(mbstr);
 		len++;
 	}
 	return len;
 }

 /* returns the length (counted in wchars) of a multibyte string
  * (not necessarily NULL terminated)
  */
 int
 pg_mbstrlen_with_len(const char *mbstr, int limit)
 {
 	int			len = 0;

 	/* optimization for single byte encoding */
 	if (pg_database_encoding_max_length() == 1)
 		return limit;

 	while (limit > 0 && *mbstr)
 	{
 		int			l = pg_mblen(mbstr);

 		limit -= l;
 		mbstr += l;
 		len++;
 	}
 	return len;
 }

 /*
  * returns the byte length of a multibyte string
  * (not necessarily NULL terminated)
  * that is no longer than limit.
  * this function does not break multibyte character boundary.
  */
 int
 pg_mbcliplen(const char *mbstr, int len, int limit)
 {
 	return pg_encoding_mbcliplen(DatabaseEncoding->encoding, mbstr,
 								 len, limit);
 }

 /*
  * pg_mbcliplen with specified encoding
  */
 int
 pg_encoding_mbcliplen(int encoding, const char *mbstr,
 					  int len, int limit)
 {
 	mblen_converter mblen_fn;
 	int			clen = 0;
 	int			l;

 	/* optimization for single byte encoding */
 	if (pg_encoding_max_length(encoding) == 1)
 		return cliplen(mbstr, len, limit);

 	mblen_fn = pg_wchar_table[encoding].mblen;

 	while (len > 0 && *mbstr)
 	{
 		l = (*mblen_fn) ((const unsigned char *) mbstr);
 		if ((clen + l) > limit)
 			break;
 		clen += l;
 		if (clen == limit)
 			break;
 		len -= l;
 		mbstr += l;
 	}
 	return clen;
 }

 /*
  * Similar to pg_mbcliplen except the limit parameter specifies the
  * character length, not the byte length.
  */
 int
 pg_mbcharcliplen(const char *mbstr, int len, int limit)
 {
 	int			clen = 0;
 	int			nch = 0;
 	int			l;

 	/* optimization for single byte encoding */
 	if (pg_database_encoding_max_length() == 1)
 		return cliplen(mbstr, len, limit);

 	while (len > 0 && *mbstr)
 	{
 		l = pg_mblen(mbstr);
 		nch++;
 		if (nch > limit)
 			break;
 		clen += l;
 		len -= l;
 		mbstr += l;
 	}
 	return clen;
 }

 /* mbcliplen for any single-byte encoding */
 static int
 cliplen(const char *str, int len, int limit)
 {
 	int			l = 0;

 	len = Min(len, limit);
 	while (l < len && str[l])
 		l++;
 	return l;
 }

 void
 SetDatabaseEncoding(int encoding)
 {
 	if (!PG_VALID_BE_ENCODING(encoding))
 		elog(ERROR, "invalid database encoding: %d", encoding);

 	DatabaseEncoding = &pg_enc2name_tbl[encoding];
 	Assert(DatabaseEncoding->encoding == encoding);
 }

 void
 SetMessageEncoding(int encoding)
 {
 	/* Some calls happen before we can elog()! */
 	Assert(PG_VALID_ENCODING(encoding));

 	MessageEncoding = &pg_enc2name_tbl[encoding];
 	Assert(MessageEncoding->encoding == encoding);
 }

 #ifdef ENABLE_NLS
 /*
  * Make one bind_textdomain_codeset() call, translating a pg_enc to a gettext
  * codeset.  Fails for MULE_INTERNAL, an encoding unknown to gettext; can also
  * fail for gettext-internal causes like out-of-memory.
  */
 static bool
 raw_pg_bind_textdomain_codeset(const char *domainname, int encoding)
 {
 	bool		elog_ok = (CurrentMemoryContext != NULL);
 	int			i;

 	for (i = 0; pg_enc2gettext_tbl[i].name != NULL; i++)
 	{
 		if (pg_enc2gettext_tbl[i].encoding == encoding)
 		{
 			if (bind_textdomain_codeset(domainname,
 										pg_enc2gettext_tbl[i].name) != NULL)
 				return true;

 			if (elog_ok)
 				elog(LOG, "bind_textdomain_codeset failed");
 			else
 				write_stderr("bind_textdomain_codeset failed");

 			break;
 		}
 	}

 	return false;
 }

 /*
  * Bind a gettext message domain to the codeset corresponding to the database
  * encoding.  For SQL_ASCII, instead bind to the codeset implied by LC_CTYPE.
  * Return the MessageEncoding implied by the new settings.
  *
  * On most platforms, gettext defaults to the codeset implied by LC_CTYPE.
  * When that matches the database encoding, we don't need to do anything.  In
  * CREATE DATABASE, we enforce or trust that the locale's codeset matches the
  * database encoding, except for the C locale.  (On Windows, we also permit a
  * discrepancy under the UTF8 encoding.)  For the C locale, explicitly bind
  * gettext to the right codeset.
  *
  * On Windows, gettext defaults to the Windows ANSI code page.  This is a
  * convenient departure for software that passes the strings to Windows ANSI
  * APIs, but we don't do that.  Compel gettext to use database encoding or,
  * failing that, the LC_CTYPE encoding as it would on other platforms.
  *
  * This function is called before elog() and palloc() are usable.
  */
 int
 pg_bind_textdomain_codeset(const char *domainname)
 {
 	bool		elog_ok = (CurrentMemoryContext != NULL);
 	int			encoding = GetDatabaseEncoding();
 	int			new_msgenc;

 #ifndef WIN32
 	const char *ctype = SETLOCALE(LC_CTYPE, NULL);

 	if (pg_strcasecmp(ctype, "C") == 0 || pg_strcasecmp(ctype, "POSIX") == 0)
 #endif
 		if (encoding != PG_SQL_ASCII &&
 			raw_pg_bind_textdomain_codeset(domainname, encoding))
 			return encoding;

 	new_msgenc = pg_get_encoding_from_locale(NULL, elog_ok);
 	if (new_msgenc < 0)
 		new_msgenc = PG_SQL_ASCII;

 #ifdef WIN32
 	if (!raw_pg_bind_textdomain_codeset(domainname, new_msgenc))
 		/* On failure, the old message encoding remains valid. */
 		return GetMessageEncoding();
 #endif

 	return new_msgenc;
 }
 #endif

 /*
  * The database encoding, also called the server encoding, represents the
  * encoding of data stored in text-like data types.  Affected types include
  * cstring, text, varchar, name, xml, and json.
  */
 int
 GetDatabaseEncoding(void)
 {
 	return DatabaseEncoding->encoding;
 }

 const char *
 GetDatabaseEncodingName(void)
 {
 	return DatabaseEncoding->name;
 }

 Datum
 getdatabaseencoding(PG_FUNCTION_ARGS)
 {
 	return DirectFunctionCall1(namein, CStringGetDatum(DatabaseEncoding->name));
 }

 Datum
 pg_client_encoding(PG_FUNCTION_ARGS)
 {
 	return DirectFunctionCall1(namein, CStringGetDatum(ClientEncoding->name));
 }

 Datum
 PG_char_to_encoding(PG_FUNCTION_ARGS)
 {
 	Name		s = PG_GETARG_NAME(0);

 	PG_RETURN_INT32(pg_char_to_encoding(NameStr(*s)));
 }

 Datum
 PG_encoding_to_char(PG_FUNCTION_ARGS)
 {
 	int32		encoding = PG_GETARG_INT32(0);
 	const char *encoding_name = pg_encoding_to_char(encoding);

 	return DirectFunctionCall1(namein, CStringGetDatum(encoding_name));
 }

 /*
  * gettext() returns messages in this encoding.  This often matches the
  * database encoding, but it differs for SQL_ASCII databases, for processes
  * not attached to a database, and under a database encoding lacking iconv
  * support (MULE_INTERNAL).
  */
 int
 GetMessageEncoding(void)
 {
 	return MessageEncoding->encoding;
 }


 /*
  * Generic character incrementer function.
  *
  * Not knowing anything about the properties of the encoding in use, we just
  * keep incrementing the last byte until we get a validly-encoded result,
  * or we run out of values to try.  We don't bother to try incrementing
  * higher-order bytes, so there's no growth in runtime for wider characters.
  * (If we did try to do that, we'd need to consider the likelihood that 255
  * is not a valid final byte in the encoding.)
  */
 static bool
 pg_generic_charinc(unsigned char *charptr, int len)
 {
 	unsigned char *lastbyte = charptr + len - 1;
 	mbchar_verifier mbverify;

 	/* We can just invoke the character verifier directly. */
 	mbverify = pg_wchar_table[GetDatabaseEncoding()].mbverifychar;

 	while (*lastbyte < (unsigned char) 255)
 	{
 		(*lastbyte)++;
 		if ((*mbverify) (charptr, len) == len)
 			return true;
 	}

 	return false;
 }

 /*
  * UTF-8 character incrementer function.
  *
  * For a one-byte character less than 0x7F, we just increment the byte.
  *
  * For a multibyte character, every byte but the first must fall between 0x80
  * and 0xBF; and the first byte must be between 0xC0 and 0xF4.  We increment
  * the last byte that's not already at its maximum value.  If we can't find a
  * byte that's less than the maximum allowable value, we simply fail.  We also
  * need some special-case logic to skip regions used for surrogate pair
  * handling, as those should not occur in valid UTF-8.
  *
  * Note that we don't reset lower-order bytes back to their minimums, since
  * we can't afford to make an exhaustive search (see make_greater_string).
  */
 static bool
 pg_utf8_increment(unsigned char *charptr, int length)
 {
 	unsigned char a;
 	unsigned char limit;

 	switch (length)
 	{
 		default:
 			/* reject lengths 5 and 6 for now */
 			return false;
 		case 4:
 			a = charptr[3];
 			if (a < 0xBF)
 			{
 				charptr[3]++;
 				break;
 			}
 			/* FALL THRU */
 		case 3:
 			a = charptr[2];
 			if (a < 0xBF)
 			{
 				charptr[2]++;
 				break;
 			}
 			/* FALL THRU */
 		case 2:
 			a = charptr[1];
 			switch (*charptr)
 			{
 				case 0xED:
 					limit = 0x9F;
 					break;
 				case 0xF4:
 					limit = 0x8F;
 					break;
 				default:
 					limit = 0xBF;
 					break;
 			}
 			if (a < limit)
 			{
 				charptr[1]++;
 				break;
 			}
 			/* FALL THRU */
 		case 1:
 			a = *charptr;
 			if (a == 0x7F || a == 0xDF || a == 0xEF || a == 0xF4)
 				return false;
 			charptr[0]++;
 			break;
 	}

 	return true;
 }

 /*
  * EUC-JP character incrementer function.
  *
  * If the sequence starts with SS2 (0x8e), it must be a two-byte sequence
  * representing JIS X 0201 characters with the second byte ranging between
  * 0xa1 and 0xdf.  We just increment the last byte if it's less than 0xdf,
  * and otherwise rewrite the whole sequence to 0xa1 0xa1.
  *
  * If the sequence starts with SS3 (0x8f), it must be a three-byte sequence
  * in which the last two bytes range between 0xa1 and 0xfe.  The last byte
  * is incremented if possible, otherwise the second-to-last byte.
  *
  * If the sequence starts with a value other than the above and its MSB
  * is set, it must be a two-byte sequence representing JIS X 0208 characters
  * with both bytes ranging between 0xa1 and 0xfe.  The last byte is
  * incremented if possible, otherwise the second-to-last byte.
  *
  * Otherwise, the sequence is a single-byte ASCII character. It is
  * incremented up to 0x7f.
  */
 static bool
 pg_eucjp_increment(unsigned char *charptr, int length)
 {
 	unsigned char c1,
 				c2;
 	int			i;

 	c1 = *charptr;

 	switch (c1)
 	{
 		case SS2:				/* JIS X 0201 */
 			if (length != 2)
 				return false;

 			c2 = charptr[1];

 			if (c2 >= 0xdf)
 				charptr[0] = charptr[1] = 0xa1;
 			else if (c2 < 0xa1)
 				charptr[1] = 0xa1;
 			else
 				charptr[1]++;
 			break;

 		case SS3:				/* JIS X 0212 */
 			if (length != 3)
 				return false;

 			for (i = 2; i > 0; i--)
 			{
 				c2 = charptr[i];
 				if (c2 < 0xa1)
 				{
 					charptr[i] = 0xa1;
 					return true;
 				}
 				else if (c2 < 0xfe)
 				{
 					charptr[i]++;
 					return true;
 				}
 			}

 			/* Out of 3-byte code region */
 			return false;

 		default:
 			if (IS_HIGHBIT_SET(c1)) /* JIS X 0208? */
 			{
 				if (length != 2)
 					return false;

 				for (i = 1; i >= 0; i--)
 				{
 					c2 = charptr[i];
 					if (c2 < 0xa1)
 					{
 						charptr[i] = 0xa1;
 						return true;
 					}
 					else if (c2 < 0xfe)
 					{
 						charptr[i]++;
 						return true;
 					}
 				}

 				/* Out of 2 byte code region */
 				return false;
 			}
 			else
 			{					/* ASCII, single byte */
 				if (c1 > 0x7e)
 					return false;
 				(*charptr)++;
 			}
 			break;
 	}

 	return true;
 }

 /*
  * get the character incrementer for the encoding for the current database
  */
 mbcharacter_incrementer
 pg_database_encoding_character_incrementer(void)
 {
 	/*
 	 * Eventually it might be best to add a field to pg_wchar_table[], but for
 	 * now we just use a switch.
 	 */
 	switch (GetDatabaseEncoding())
 	{
 		case PG_UTF8:
 			return pg_utf8_increment;

 		case PG_EUC_JP:
 			return pg_eucjp_increment;

 		default:
 			return pg_generic_charinc;
 	}
 }

 /*
  * fetch maximum length of the encoding for the current database
  */
 int
 pg_database_encoding_max_length(void)
 {
 	return pg_wchar_table[GetDatabaseEncoding()].maxmblen;
 }

 /*
  * Verify mbstr to make sure that it is validly encoded in the current
  * database encoding.  Otherwise same as pg_verify_mbstr().
  */
 bool
 pg_verifymbstr(const char *mbstr, int len, bool noError)
 {
 	return pg_verify_mbstr(GetDatabaseEncoding(), mbstr, len, noError);
 }

 /*
  * Verify mbstr to make sure that it is validly encoded in the specified
  * encoding.
  */
 bool
 pg_verify_mbstr(int encoding, const char *mbstr, int len, bool noError)
 {
 	int			oklen;

 	Assert(PG_VALID_ENCODING(encoding));

 	oklen = pg_wchar_table[encoding].mbverifystr((const unsigned char *) mbstr, len);
 	if (oklen != len)
 	{
 		if (noError)
 			return false;
 		report_invalid_encoding(encoding, mbstr + oklen, len - oklen);
 	}
 	return true;
 }

 /*
  * Verify mbstr to make sure that it is validly encoded in the specified
  * encoding.
  *
  * mbstr is not necessarily zero terminated; length of mbstr is
  * specified by len.
  *
  * If OK, return length of string in the encoding.
  * If a problem is found, return -1 when noError is
  * true; when noError is false, ereport() a descriptive message.
  *
  * Note: We cannot use the faster encoding-specific mbverifystr() function
  * here, because we need to count the number of characters in the string.
  */
 int
 pg_verify_mbstr_len(int encoding, const char *mbstr, int len, bool noError)
 {
 	mbchar_verifier mbverifychar;
 	int			mb_len;

 	Assert(PG_VALID_ENCODING(encoding));

 	/*
 	 * In single-byte encodings, we need only reject nulls (\0).
 	 */
 	if (pg_encoding_max_length(encoding) <= 1)
 	{
 		const char *nullpos = memchr(mbstr, 0, len);

 		if (nullpos == NULL)
 			return len;
 		if (noError)
 			return -1;
 		report_invalid_encoding(encoding, nullpos, 1);
 	}

 	/* fetch function pointer just once */
 	mbverifychar = pg_wchar_table[encoding].mbverifychar;

 	mb_len = 0;

 	while (len > 0)
 	{
 		int			l;

 		/* fast path for ASCII-subset characters */
 		if (!IS_HIGHBIT_SET(*mbstr))
 		{
 			if (*mbstr != '\0')
 			{
 				mb_len++;
 				mbstr++;
 				len--;
 				continue;
 			}
 			if (noError)
 				return -1;
 			report_invalid_encoding(encoding, mbstr, len);
 		}

 		l = (*mbverifychar) ((const unsigned char *) mbstr, len);

 		if (l < 0)
 		{
 			if (noError)
 				return -1;
 			report_invalid_encoding(encoding, mbstr, len);
 		}

 		mbstr += l;
 		len -= l;
 		mb_len++;
 	}
 	return mb_len;
 }

 /*
  * check_encoding_conversion_args: check arguments of a conversion function
  *
  * "expected" arguments can be either an encoding ID or -1 to indicate that
  * the caller will check whether it accepts the ID.
  *
  * Note: the errors here are not really user-facing, so elog instead of
  * ereport seems sufficient.  Also, we trust that the "expected" encoding
  * arguments are valid encoding IDs, but we don't trust the actuals.
  */
 void
 check_encoding_conversion_args(int src_encoding,
 							   int dest_encoding,
 							   int len,
 							   int expected_src_encoding,
 							   int expected_dest_encoding)
 {
 	if (!PG_VALID_ENCODING(src_encoding))
 		elog(ERROR, "invalid source encoding ID: %d", src_encoding);
 	if (src_encoding != expected_src_encoding && expected_src_encoding >= 0)
 		elog(ERROR, "expected source encoding \"%s\", but got \"%s\"",
 			 pg_enc2name_tbl[expected_src_encoding].name,
 			 pg_enc2name_tbl[src_encoding].name);
 	if (!PG_VALID_ENCODING(dest_encoding))
 		elog(ERROR, "invalid destination encoding ID: %d", dest_encoding);
 	if (dest_encoding != expected_dest_encoding && expected_dest_encoding >= 0)
 		elog(ERROR, "expected destination encoding \"%s\", but got \"%s\"",
 			 pg_enc2name_tbl[expected_dest_encoding].name,
 			 pg_enc2name_tbl[dest_encoding].name);
 	if (len < 0)
 		elog(ERROR, "encoding conversion length must not be negative");
 }

 /*
  * report_invalid_encoding: complain about invalid multibyte character
  *
  * note: len is remaining length of string, not length of character;
  * len must be greater than zero, as we always examine the first byte.
  */
 void
 report_invalid_encoding(int encoding, const char *mbstr, int len)
 {
 	int			l = pg_encoding_mblen(encoding, mbstr);
 	char		buf[8 * 5 + 1];
 	char	   *p = buf;
 	int			j,
 				jlimit;

 	jlimit = Min(l, len);
 	jlimit = Min(jlimit, 8);	/* prevent buffer overrun */

 	for (j = 0; j < jlimit; j++)
 	{
 		p += sprintf(p, "0x%02x", (unsigned char) mbstr[j]);
 		if (j < jlimit - 1)
 			p += sprintf(p, " ");
 	}

 	ereport(ERROR,
 			(errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
 			 errmsg("invalid byte sequence for encoding \"%s\": %s",
 					pg_enc2name_tbl[encoding].name,
 					buf)));
 }

 /*
  * report_untranslatable_char: complain about untranslatable character
  *
  * note: len is remaining length of string, not length of character;
  * len must be greater than zero, as we always examine the first byte.
  */
 void
 report_untranslatable_char(int src_encoding, int dest_encoding,
 						   const char *mbstr, int len)
 {
 	int			l = pg_encoding_mblen(src_encoding, mbstr);
 	char		buf[8 * 5 + 1];
 	char	   *p = buf;
 	int			j,
 				jlimit;

 	jlimit = Min(l, len);
 	jlimit = Min(jlimit, 8);	/* prevent buffer overrun */

 	for (j = 0; j < jlimit; j++)
 	{
 		p += sprintf(p, "0x%02x", (unsigned char) mbstr[j]);
 		if (j < jlimit - 1)
 			p += sprintf(p, " ");
 	}

 	ereport(ERROR,
 			(errcode(ERRCODE_UNTRANSLATABLE_CHARACTER),
 			 errmsg("character with byte sequence %s in encoding \"%s\" has no equivalent in encoding \"%s\"",
 					buf,
 					pg_enc2name_tbl[src_encoding].name,
 					pg_enc2name_tbl[dest_encoding].name)));
 }


 #ifdef WIN32
 /*
  * Convert from MessageEncoding to a palloc'ed, null-terminated utf16
  * string. The character length is also passed to utf16len if not
  * null. Returns NULL iff failed. Before MessageEncoding initialization, "str"
  * should be ASCII-only; this will function as though MessageEncoding is UTF8.
  */
 WCHAR *
 pgwin32_message_to_UTF16(const char *str, int len, int *utf16len)
 {
 	int			msgenc = GetMessageEncoding();
 	WCHAR	   *utf16;
 	int			dstlen;
 	UINT		codepage;

 	if (msgenc == PG_SQL_ASCII)
 		/* No conversion is possible, and SQL_ASCII is never utf16. */
 		return NULL;

 	codepage = pg_enc2name_tbl[msgenc].codepage;

 	/*
 	 * Use MultiByteToWideChar directly if there is a corresponding codepage,
 	 * or double conversion through UTF8 if not.  Double conversion is needed,
 	 * for example, in an ENCODING=LATIN8, LC_CTYPE=C database.
 	 */
 	if (codepage != 0)
 	{
 		utf16 = (WCHAR *) palloc(sizeof(WCHAR) * (len + 1));
 		dstlen = MultiByteToWideChar(codepage, 0, str, len, utf16, len);
 		utf16[dstlen] = (WCHAR) 0;
 	}
 	else
 	{
 		char	   *utf8;

 		/*
 		 * XXX pg_do_encoding_conversion() requires a transaction.  In the
 		 * absence of one, hope for the input to be valid UTF8.
 		 */
 		if (IsTransactionState())
 		{
 			utf8 = (char *) pg_do_encoding_conversion((unsigned char *) str,
 													  len,
 													  msgenc,
 													  PG_UTF8);
 			if (utf8 != str)
 				len = strlen(utf8);
 		}
 		else
 			utf8 = (char *) str;

 		utf16 = (WCHAR *) palloc(sizeof(WCHAR) * (len + 1));
 		dstlen = MultiByteToWideChar(CP_UTF8, 0, utf8, len, utf16, len);
 		utf16[dstlen] = (WCHAR) 0;

 		if (utf8 != str)
 			pfree(utf8);
 	}

 	if (dstlen == 0 && len > 0)
 	{
 		pfree(utf16);
 		return NULL;			/* error */
 	}

 	if (utf16len)
 		*utf16len = dstlen;
 	return utf16;
 }

 #endif							/* WIN32 */