src/backend/utils/adt/oracle_compat.c - hawq - Git at Google

 /*-------------------------------------------------------------------------
  * oracle_compat.c
  *	Oracle compatible functions.
  *
  * Copyright (c) 1996-2008, PostgreSQL Global Development Group
  *
  *	Author: Edmund Mergl <E.Mergl@bawue.de>
  *	Multibyte enhancement: Tatsuo Ishii <ishii@postgresql.org>
  *
  *
  * IDENTIFICATION
  *	$PostgreSQL: pgsql/src/backend/utils/adt/oracle_compat.c,v 1.67.2.1 2007/02/08 20:33:54 momjian Exp $
  *
  *-------------------------------------------------------------------------
  */
 #include "postgres.h"

 #include <ctype.h>
 #include <limits.h>
 /*
  * towlower() and friends should be in <wctype.h>, but some pre-C99 systems
  * declare them in <wchar.h>.
  */
 #ifdef HAVE_WCHAR_H
 #include <wchar.h>
 #endif
 #ifdef HAVE_WCTYPE_H
 #include <wctype.h>
 #endif

 #include "utils/builtins.h"
 #include "utils/pg_locale.h"
 #include "mb/pg_wchar.h"


 /*
  * If the system provides the needed functions for wide-character manipulation
  * (which are all standardized by C99), then we implement upper/lower/initcap
  * using wide-character functions.	Otherwise we use the traditional <ctype.h>
  * functions, which of course will not work as desired in multibyte character
  * sets.  Note that in either case we are effectively assuming that the
  * database character encoding matches the encoding implied by LC_CTYPE.
  *
  * We assume if we have these two functions, we have their friends too, and
  * can use the wide-character method.
  */
 #if defined(HAVE_WCSTOMBS) && defined(HAVE_TOWLOWER)
 #define USE_WIDE_UPPER_LOWER
 char *wstring_lower (char *str);
 char *wstring_upper(char *str);
 #endif

 static text *dotrim(const char *string, int stringlen,
 	   const char *set, int setlen,
 	   bool doltrim, bool dortrim);


 #ifdef USE_WIDE_UPPER_LOWER

 /*
  * Convert a TEXT value into a palloc'd wchar string.
  */
 static wchar_t *
 texttowcs(const text *txt)
 {
 	int			nbytes = VARSIZE(txt) - VARHDRSZ;
 	char	   *workstr;
 	wchar_t    *result;
 	size_t		ncodes;

 	/* Overflow paranoia */
 	if (nbytes < 0 ||
 		nbytes > (int) (INT_MAX / sizeof(wchar_t)) - 1)
 		ereport(ERROR,
 				(errcode(ERRCODE_OUT_OF_MEMORY),
 				 errmsg("out of memory")));

 	/* Need a null-terminated version of the input */
 	workstr = (char *) palloc(nbytes + 1);
 	memcpy(workstr, VARDATA(txt), nbytes);
 	workstr[nbytes] = '\0';

 	/* Output workspace cannot have more codes than input bytes */
 	result = (wchar_t *) palloc((nbytes + 1) * sizeof(wchar_t));

 	/* Do the conversion */
 	ncodes = mbstowcs(result, workstr, nbytes + 1);

 	if (ncodes == (size_t) -1)
 	{
 		/*
 		 * Invalid multibyte character encountered.  We try to give a useful
 		 * error message by letting pg_verifymbstr check the string.  But it's
 		 * possible that the string is OK to us, and not OK to mbstowcs ---
 		 * this suggests that the LC_CTYPE locale is different from the
 		 * database encoding.  Give a generic error message if verifymbstr
 		 * can't find anything wrong.
 		 */
 		pg_verifymbstr(workstr, nbytes, false);
 		ereport(ERROR,
 				(errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
 				 errmsg("invalid multibyte character for locale"),
 				 errhint("The server's LC_CTYPE locale is probably incompatible with the database encoding.")));
 	}

 	Assert(ncodes <= (size_t) nbytes);

 	return result;
 }


 /*
  * Convert a wchar string into a palloc'd TEXT value.  The wchar string
  * must be zero-terminated, but we also require the caller to pass the string
  * length, since it will know it anyway in current uses.
  */
 static text *
 wcstotext(const wchar_t *str, int ncodes)
 {
 	text	   *result;
 	size_t		nbytes;

 	/* Overflow paranoia */
 	if (ncodes < 0 ||
 		ncodes > (int) ((INT_MAX - VARHDRSZ) / MB_CUR_MAX) - 1)
 		ereport(ERROR,
 				(errcode(ERRCODE_OUT_OF_MEMORY),
 				 errmsg("out of memory")));

 	/* Make workspace certainly large enough for result */
 	result = (text *) palloc((ncodes + 1) * MB_CUR_MAX + VARHDRSZ);

 	/* Do the conversion */
 	nbytes = wcstombs((char *) VARDATA(result), str,
 					  (ncodes + 1) * MB_CUR_MAX);

 	if (nbytes == (size_t) -1)
 	{
 		/* Invalid multibyte character encountered ... shouldn't happen */
 		ereport(ERROR,
 				(errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
 				 errmsg("invalid multibyte character for locale")));
 	}

 	Assert(nbytes <= (size_t) (ncodes * MB_CUR_MAX));

 	SET_VARSIZE(result, nbytes + VARHDRSZ);

 	return result;
 }
 #endif   /* USE_WIDE_UPPER_LOWER */


 /*
  * On Windows, the "Unicode" locales assume UTF16 not UTF8 encoding.
  * To make use of the upper/lower functionality, we need to map UTF8 to
  * UTF16, which for some reason mbstowcs and wcstombs won't do for us.
  * This conversion layer takes care of it.
  */

 #ifdef WIN32

 /* texttowcs for the case of UTF8 to UTF16 */
 static wchar_t *
 win32_utf8_texttowcs(const text *txt)
 {
 	int			nbytes = VARSIZE(txt) - VARHDRSZ;
 	wchar_t    *result;
 	int			r;

 	/* Overflow paranoia */
 	if (nbytes < 0 ||
 		nbytes > (int) (INT_MAX / sizeof(wchar_t)) - 1)
 		ereport(ERROR,
 				(errcode(ERRCODE_OUT_OF_MEMORY),
 				 errmsg("out of memory")));

 	/* Output workspace cannot have more codes than input bytes */
 	result = (wchar_t *) palloc((nbytes + 1) * sizeof(wchar_t));

 	/* stupid Microsloth API does not work for zero-length input */
 	if (nbytes == 0)
 		r = 0;
 	else
 	{
 		/* Do the conversion */
 		r = MultiByteToWideChar(CP_UTF8, 0, VARDATA(txt), nbytes,
 								result, nbytes);

 		if (!r)					/* assume it's NO_UNICODE_TRANSLATION */
 		{
 			/* see notes above about error reporting */
 			pg_verifymbstr(VARDATA(txt), nbytes, false);
 			ereport(ERROR,
 					(errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
 					 errmsg("invalid multibyte character for locale"),
 					 errhint("The server's LC_CTYPE locale is probably incompatible with the database encoding.")));
 		}
 	}

 	Assert(r <= nbytes);
 	result[r] = 0;

 	return result;
 }

 /* wcstotext for the case of UTF16 to UTF8 */
 static text *
 win32_utf8_wcstotext(const wchar_t *str)
 {
 	text	   *result;
 	int			nbytes;
 	int			r;

 	nbytes = WideCharToMultiByte(CP_UTF8, 0, str, -1, NULL, 0, NULL, NULL);
 	if (nbytes == 0)			/* shouldn't happen */
 		ereport(ERROR,
 				(errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
 				 errmsg("UTF-16 to UTF-8 translation failed: %lu",
 						GetLastError())));

 	result = palloc(nbytes + VARHDRSZ);

 	r = WideCharToMultiByte(CP_UTF8, 0, str, -1, VARDATA(result), nbytes,
 							NULL, NULL);
 	if (r == 0)					/* shouldn't happen */
 		ereport(ERROR,
 				(errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
 				 errmsg("UTF-16 to UTF-8 translation failed: %lu",
 						GetLastError())));

 	SET_VARSIZE(result, nbytes + VARHDRSZ - 1);		/* -1 to ignore null */

 	return result;
 }

 /* interface layer to check which encoding is in use */

 static wchar_t *
 win32_texttowcs(const text *txt)
 {
 	if (GetDatabaseEncoding() == PG_UTF8)
 		return win32_utf8_texttowcs(txt);
 	else
 		return texttowcs(txt);
 }

 static text *
 win32_wcstotext(const wchar_t *str, int ncodes)
 {
 	if (GetDatabaseEncoding() == PG_UTF8)
 		return win32_utf8_wcstotext(str);
 	else
 		return wcstotext(str, ncodes);
 }

 /* use macros to cause routines below to call interface layer */

 #define texttowcs	win32_texttowcs
 #define wcstotext	win32_wcstotext
 #endif   /* WIN32 */

 #ifdef USE_WIDE_UPPER_LOWER
 /*
  * string_upper and string_lower are used for correct multibyte upper/lower
  * transformations localized strings. Returns pointers to transformated
  * string.
  */
 char *
 wstring_upper(char *str)
 {
 	wchar_t		*workspace;
 	text		*in_text;
 	text		*out_text;
 	char		*result;
 	int 	nbytes = strlen(str);
 	int	i;

 	in_text = palloc(nbytes + VARHDRSZ);
 	memcpy(VARDATA(in_text), str, nbytes);
 	SET_VARSIZE(in_text, nbytes + VARHDRSZ);

 	workspace = texttowcs(in_text);

 	for (i = 0; workspace[i] != 0; i++)
 		workspace[i] = towupper(workspace[i]);

 	out_text = wcstotext(workspace, i);

     	nbytes = VARSIZE(out_text) - VARHDRSZ;
 	result = palloc(nbytes + 1);
 	memcpy(result, VARDATA(out_text), nbytes);

 	result[nbytes] = '\0';

 	pfree(workspace);
 	pfree(in_text);
 	pfree(out_text);

 	return result;
 }

 char *
 wstring_lower(char *str)
 {
 	wchar_t		*workspace;
 	text		*in_text;
 	text		*out_text;
 	char		*result;
 	int 	nbytes = strlen(str);
 	int	i;

 	in_text = palloc(nbytes + VARHDRSZ);
 	memcpy(VARDATA(in_text), str, nbytes);
 	SET_VARSIZE(in_text, nbytes + VARHDRSZ);

 	workspace = texttowcs(in_text);

 	for (i = 0; workspace[i] != 0; i++)
 		workspace[i] = towlower(workspace[i]);

 	out_text = wcstotext(workspace, i);

     	nbytes = VARSIZE(out_text) - VARHDRSZ;
 	result = palloc(nbytes + 1);
 	memcpy(result, VARDATA(out_text), nbytes);

 	result[nbytes] = '\0';

 	pfree(workspace);
 	pfree(in_text);
 	pfree(out_text);

 	return result;
 }
 #endif	/* USE_WIDE_UPPER_LOWER */

 /********************************************************************
  *
  * lower
  *
  * Syntax:
  *
  *	 text lower(text string)
  *
  * Purpose:
  *
  *	 Returns string, with all letters forced to lowercase.
  *
  ********************************************************************/

 Datum
 lower(PG_FUNCTION_ARGS)
 {
 #ifdef USE_WIDE_UPPER_LOWER

 	/*
 	 * Use wide char code only when max encoding length > 1 and ctype != C.
 	 * Some operating systems fail with multi-byte encodings and a C locale.
 	 * Also, for a C locale there is no need to process as multibyte.
 	 */
 	if (pg_database_encoding_max_length() > 1 && !lc_ctype_is_c())
 	{
 		text	   *string = PG_GETARG_TEXT_P(0);
 		text	   *result;
 		wchar_t    *workspace;
 		int			i;

 		workspace = texttowcs(string);

 		for (i = 0; workspace[i] != 0; i++)
 			workspace[i] = towlower(workspace[i]);

 		result = wcstotext(workspace, i);

 		pfree(workspace);

 		PG_RETURN_TEXT_P(result);
 	}
 	else
 #endif   /* USE_WIDE_UPPER_LOWER */
 	{
 		text	   *string = PG_GETARG_TEXT_P_COPY(0);
 		char	   *ptr;
 		int			m;

 		/*
 		 * Since we copied the string, we can scribble directly on the value
 		 */
 		ptr = VARDATA(string);
 		m = VARSIZE(string) - VARHDRSZ;

 		while (m-- > 0)
 		{
 			*ptr = tolower((unsigned char) *ptr);
 			ptr++;
 		}

 		PG_RETURN_TEXT_P(string);
 	}
 }


 /********************************************************************
  *
  * upper
  *
  * Syntax:
  *
  *	 text upper(text string)
  *
  * Purpose:
  *
  *	 Returns string, with all letters forced to uppercase.
  *
  ********************************************************************/

 Datum
 upper(PG_FUNCTION_ARGS)
 {
 #ifdef USE_WIDE_UPPER_LOWER

 	/*
 	 * Use wide char code only when max encoding length > 1 and ctype != C.
 	 * Some operating systems fail with multi-byte encodings and a C locale.
 	 * Also, for a C locale there is no need to process as multibyte.
 	 */
 	if (pg_database_encoding_max_length() > 1 && !lc_ctype_is_c())
 	{
 		text	   *string = PG_GETARG_TEXT_P(0);
 		text	   *result;
 		wchar_t    *workspace;
 		int			i;

 		workspace = texttowcs(string);

 		for (i = 0; workspace[i] != 0; i++)
 			workspace[i] = towupper(workspace[i]);

 		result = wcstotext(workspace, i);

 		pfree(workspace);

 		PG_RETURN_TEXT_P(result);
 	}
 	else
 #endif   /* USE_WIDE_UPPER_LOWER */
 	{
 		text	   *string = PG_GETARG_TEXT_P_COPY(0);
 		char	   *ptr;
 		int			m;

 		/*
 		 * Since we copied the string, we can scribble directly on the value
 		 */
 		ptr = VARDATA(string);
 		m = VARSIZE(string) - VARHDRSZ;

 		while (m-- > 0)
 		{
 			*ptr = toupper((unsigned char) *ptr);
 			ptr++;
 		}

 		PG_RETURN_TEXT_P(string);
 	}
 }


 /********************************************************************
  *
  * initcap
  *
  * Syntax:
  *
  *	 text initcap(text string)
  *
  * Purpose:
  *
  *	 Returns string, with first letter of each word in uppercase, all
  *	 other letters in lowercase. A word is defined as a sequence of
  *	 alphanumeric characters, delimited by non-alphanumeric
  *	 characters.
  *
  ********************************************************************/

 Datum
 initcap(PG_FUNCTION_ARGS)
 {
 #ifdef USE_WIDE_UPPER_LOWER

 	/*
 	 * Use wide char code only when max encoding length > 1 and ctype != C.
 	 * Some operating systems fail with multi-byte encodings and a C locale.
 	 * Also, for a C locale there is no need to process as multibyte.
 	 */
 	if (pg_database_encoding_max_length() > 1 && !lc_ctype_is_c())
 	{
 		text	   *string = PG_GETARG_TEXT_P(0);
 		text	   *result;
 		wchar_t    *workspace;
 		int			wasalnum = 0;
 		int			i;

 		workspace = texttowcs(string);

 		for (i = 0; workspace[i] != 0; i++)
 		{
 			if (wasalnum)
 				workspace[i] = towlower(workspace[i]);
 			else
 				workspace[i] = towupper(workspace[i]);
 			wasalnum = iswalnum(workspace[i]);
 		}

 		result = wcstotext(workspace, i);

 		pfree(workspace);

 		PG_RETURN_TEXT_P(result);
 	}
 	else
 #endif   /* USE_WIDE_UPPER_LOWER */
 	{
 		text	   *string = PG_GETARG_TEXT_P_COPY(0);
 		int			wasalnum = 0;
 		char	   *ptr;
 		int			m;

 		/*
 		 * Since we copied the string, we can scribble directly on the value
 		 */
 		ptr = VARDATA(string);
 		m = VARSIZE(string) - VARHDRSZ;

 		while (m-- > 0)
 		{
 			if (wasalnum)
 				*ptr = tolower((unsigned char) *ptr);
 			else
 				*ptr = toupper((unsigned char) *ptr);
 			wasalnum = isalnum((unsigned char) *ptr);
 			ptr++;
 		}

 		PG_RETURN_TEXT_P(string);
 	}
 }


 /********************************************************************
  *
  * lpad
  *
  * Syntax:
  *
  *	 text lpad(text string1, int4 len, text string2)
  *
  * Purpose:
  *
  *	 Returns string1, left-padded to length len with the sequence of
  *	 characters in string2.  If len is less than the length of string1,
  *	 instead truncate (on the right) to len.
  *
  ********************************************************************/

 Datum
 lpad(PG_FUNCTION_ARGS)
 {
 	text	   *string1 = PG_GETARG_TEXT_P(0);
 	int32		len = PG_GETARG_INT32(1);
 	text	   *string2 = PG_GETARG_TEXT_P(2);
 	text	   *ret;
 	char	   *ptr1,
 			   *ptr2,
 			   *ptr2end,
 			   *ptr_ret;
 	int			m,
 				s1len,
 				s2len;

 	int			bytelen;

 	/* Negative len is silently taken as zero */
 	if (len < 0)
 		len = 0;

 	s1len = VARSIZE(string1) - VARHDRSZ;
 	if (s1len < 0)
 		s1len = 0;				/* shouldn't happen */

 	s2len = VARSIZE(string2) - VARHDRSZ;
 	if (s2len < 0)
 		s2len = 0;				/* shouldn't happen */

 	s1len = pg_mbstrlen_with_len(VARDATA(string1), s1len);

 	if (s1len > len)
 		s1len = len;			/* truncate string1 to len chars */

 	if (s2len <= 0)
 		len = s1len;			/* nothing to pad with, so don't pad */

 	bytelen = pg_database_encoding_max_length() * len;

 	/* check for integer overflow */
 	if (len != 0 && bytelen / pg_database_encoding_max_length() != len)
 		ereport(ERROR,
 				(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
 				 errmsg("requested length too large")));

 	ret = (text *) palloc(VARHDRSZ + bytelen);

 	m = len - s1len;

 	ptr2 = VARDATA(string2);
 	ptr2end = ptr2 + s2len;
 	ptr_ret = VARDATA(ret);

 	while (m--)
 	{
 		int			mlen = pg_mblen(ptr2);

 		memcpy(ptr_ret, ptr2, mlen);
 		ptr_ret += mlen;
 		ptr2 += mlen;
 		if (ptr2 == ptr2end)	/* wrap around at end of s2 */
 			ptr2 = VARDATA(string2);
 	}

 	ptr1 = VARDATA(string1);

 	while (s1len--)
 	{
 		int			mlen = pg_mblen(ptr1);

 		memcpy(ptr_ret, ptr1, mlen);
 		ptr_ret += mlen;
 		ptr1 += mlen;
 	}

 	SET_VARSIZE(ret, ptr_ret - (char *) ret);

 	PG_RETURN_TEXT_P(ret);
 }


 /********************************************************************
  *
  * rpad
  *
  * Syntax:
  *
  *	 text rpad(text string1, int4 len, text string2)
  *
  * Purpose:
  *
  *	 Returns string1, right-padded to length len with the sequence of
  *	 characters in string2.  If len is less than the length of string1,
  *	 instead truncate (on the right) to len.
  *
  ********************************************************************/

 Datum
 rpad(PG_FUNCTION_ARGS)
 {
 	text	   *string1 = PG_GETARG_TEXT_P(0);
 	int32		len = PG_GETARG_INT32(1);
 	text	   *string2 = PG_GETARG_TEXT_P(2);
 	text	   *ret;
 	char	   *ptr1,
 			   *ptr2,
 			   *ptr2end,
 			   *ptr_ret;
 	int			m,
 				s1len,
 				s2len;

 	int			bytelen;

 	/* Negative len is silently taken as zero */
 	if (len < 0)
 		len = 0;

 	s1len = VARSIZE(string1) - VARHDRSZ;
 	if (s1len < 0)
 		s1len = 0;				/* shouldn't happen */

 	s2len = VARSIZE(string2) - VARHDRSZ;
 	if (s2len < 0)
 		s2len = 0;				/* shouldn't happen */

 	s1len = pg_mbstrlen_with_len(VARDATA(string1), s1len);

 	if (s1len > len)
 		s1len = len;			/* truncate string1 to len chars */

 	if (s2len <= 0)
 		len = s1len;			/* nothing to pad with, so don't pad */

 	bytelen = pg_database_encoding_max_length() * len;

 	/* Check for integer overflow */
 	if (len != 0 && bytelen / pg_database_encoding_max_length() != len)
 		ereport(ERROR,
 				(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
 				 errmsg("requested length too large")));

 	ret = (text *) palloc(VARHDRSZ + bytelen);
 	m = len - s1len;

 	ptr1 = VARDATA(string1);
 	ptr_ret = VARDATA(ret);

 	while (s1len--)
 	{
 		int			mlen = pg_mblen(ptr1);

 		memcpy(ptr_ret, ptr1, mlen);
 		ptr_ret += mlen;
 		ptr1 += mlen;
 	}

 	ptr2 = VARDATA(string2);
 	ptr2end = ptr2 + s2len;

 	while (m--)
 	{
 		int			mlen = pg_mblen(ptr2);

 		memcpy(ptr_ret, ptr2, mlen);
 		ptr_ret += mlen;
 		ptr2 += mlen;
 		if (ptr2 == ptr2end)	/* wrap around at end of s2 */
 			ptr2 = VARDATA(string2);
 	}

 	SET_VARSIZE(ret, ptr_ret - (char *) ret);

 	PG_RETURN_TEXT_P(ret);
 }


 /********************************************************************
  *
  * btrim
  *
  * Syntax:
  *
  *	 text btrim(text string, text set)
  *
  * Purpose:
  *
  *	 Returns string with characters removed from the front and back
  *	 up to the first character not in set.
  *
  ********************************************************************/

 Datum
 btrim(PG_FUNCTION_ARGS)
 {
 	text	   *string = PG_GETARG_TEXT_P(0);
 	text	   *set = PG_GETARG_TEXT_P(1);
 	text	   *ret;

 	ret = dotrim(VARDATA(string), VARSIZE(string) - VARHDRSZ,
 				 VARDATA(set), VARSIZE(set) - VARHDRSZ,
 				 true, true);

 	PG_RETURN_TEXT_P(ret);
 }

 /********************************************************************
  *
  * btrim1 --- btrim with set fixed as ' '
  *
  ********************************************************************/

 Datum
 btrim1(PG_FUNCTION_ARGS)
 {
 	text	   *string = PG_GETARG_TEXT_P(0);
 	text	   *ret;

 	ret = dotrim(VARDATA(string), VARSIZE(string) - VARHDRSZ,
 				 " ", 1,
 				 true, true);

 	PG_RETURN_TEXT_P(ret);
 }

 /*
  * Common implementation for btrim, ltrim, rtrim
  */
 static text *
 dotrim(const char *string, int stringlen,
 	   const char *set, int setlen,
 	   bool doltrim, bool dortrim)
 {
 	text	   *result;
 	int			i;

 	/* Nothing to do if either string or set is empty */
 	if (stringlen > 0 && setlen > 0)
 	{
 		if (pg_database_encoding_max_length() > 1)
 		{
 			/*
 			 * In the multibyte-encoding case, build arrays of pointers to
 			 * character starts, so that we can avoid inefficient checks in
 			 * the inner loops.
 			 */
 			const char **stringchars;
 			const char **setchars;
 			int		   *stringmblen;
 			int		   *setmblen;
 			int			stringnchars;
 			int			setnchars;
 			int			resultndx;
 			int			resultnchars;
 			const char *p;
 			int			len;
 			int			mblen;
 			const char *str_pos;
 			int			str_len;

 			stringchars = (const char **) palloc(stringlen * sizeof(char *));
 			stringmblen = (int *) palloc(stringlen * sizeof(int));
 			stringnchars = 0;
 			p = string;
 			len = stringlen;
 			while (len > 0)
 			{
 				stringchars[stringnchars] = p;
 				stringmblen[stringnchars] = mblen = pg_mblen(p);
 				stringnchars++;
 				p += mblen;
 				len -= mblen;
 			}

 			setchars = (const char **) palloc(setlen * sizeof(char *));
 			setmblen = (int *) palloc(setlen * sizeof(int));
 			setnchars = 0;
 			p = set;
 			len = setlen;
 			while (len > 0)
 			{
 				setchars[setnchars] = p;
 				setmblen[setnchars] = mblen = pg_mblen(p);
 				setnchars++;
 				p += mblen;
 				len -= mblen;
 			}

 			resultndx = 0;		/* index in stringchars[] */
 			resultnchars = stringnchars;

 			if (doltrim)
 			{
 				while (resultnchars > 0)
 				{
 					str_pos = stringchars[resultndx];
 					str_len = stringmblen[resultndx];
 					for (i = 0; i < setnchars; i++)
 					{
 						if (str_len == setmblen[i] &&
 							memcmp(str_pos, setchars[i], str_len) == 0)
 							break;
 					}
 					if (i >= setnchars)
 						break;	/* no match here */
 					string += str_len;
 					stringlen -= str_len;
 					resultndx++;
 					resultnchars--;
 				}
 			}

 			if (dortrim)
 			{
 				while (resultnchars > 0)
 				{
 					str_pos = stringchars[resultndx + resultnchars - 1];
 					str_len = stringmblen[resultndx + resultnchars - 1];
 					for (i = 0; i < setnchars; i++)
 					{
 						if (str_len == setmblen[i] &&
 							memcmp(str_pos, setchars[i], str_len) == 0)
 							break;
 					}
 					if (i >= setnchars)
 						break;	/* no match here */
 					stringlen -= str_len;
 					resultnchars--;
 				}
 			}

 			pfree((void *)stringchars);
 			pfree(stringmblen);
 			pfree((void *)setchars);
 			pfree(setmblen);
 		}
 		else
 		{
 			/*
 			 * In the single-byte-encoding case, we don't need such overhead.
 			 */
 			if (doltrim)
 			{
 				while (stringlen > 0)
 				{
 					char		str_ch = *string;

 					for (i = 0; i < setlen; i++)
 					{
 						if (str_ch == set[i])
 							break;
 					}
 					if (i >= setlen)
 						break;	/* no match here */
 					string++;
 					stringlen--;
 				}
 			}

 			if (dortrim)
 			{
 				while (stringlen > 0)
 				{
 					char		str_ch = string[stringlen - 1];

 					for (i = 0; i < setlen; i++)
 					{
 						if (str_ch == set[i])
 							break;
 					}
 					if (i >= setlen)
 						break;	/* no match here */
 					stringlen--;
 				}
 			}
 		}
 	}

 	/* Return selected portion of string */
 	result = (text *) palloc(VARHDRSZ + stringlen);
 	SET_VARSIZE(result, VARHDRSZ + stringlen);
 	memcpy(VARDATA(result), string, stringlen);

 	return result;
 }

 /********************************************************************
  *
  * byteatrim
  *
  * Syntax:
  *
  *	 bytea byteatrim(byta string, bytea set)
  *
  * Purpose:
  *
  *	 Returns string with characters removed from the front and back
  *	 up to the first character not in set.
  *
  * Cloned from btrim and modified as required.
  ********************************************************************/

 Datum
 byteatrim(PG_FUNCTION_ARGS)
 {
 	bytea	   *string = PG_GETARG_BYTEA_P(0);
 	bytea	   *set = PG_GETARG_BYTEA_P(1);
 	bytea	   *ret;
 	char	   *ptr,
 			   *end,
 			   *ptr2,
 			   *end2;
 	int			m;

 	if ((m = VARSIZE(string) - VARHDRSZ) <= 0 ||
 		(VARSIZE(set) - VARHDRSZ) <= 0)
 		PG_RETURN_BYTEA_P(string);

 	ptr = VARDATA(string);
 	end = VARDATA(string) + VARSIZE(string) - VARHDRSZ - 1;
 	end2 = VARDATA(set) + VARSIZE(set) - VARHDRSZ - 1;

 	while (m > 0)
 	{
 		ptr2 = VARDATA(set);
 		while (ptr2 <= end2)
 		{
 			if (*ptr == *ptr2)
 				break;
 			++ptr2;
 		}
 		if (ptr2 > end2)
 			break;
 		ptr++;
 		m--;
 	}

 	while (m > 0)
 	{
 		ptr2 = VARDATA(set);
 		while (ptr2 <= end2)
 		{
 			if (*end == *ptr2)
 				break;
 			++ptr2;
 		}
 		if (ptr2 > end2)
 			break;
 		end--;
 		m--;
 	}

 	ret = (bytea *) palloc(VARHDRSZ + m);
 	SET_VARSIZE(ret, VARHDRSZ + m);
 	memcpy(VARDATA(ret), ptr, m);

 	PG_RETURN_BYTEA_P(ret);
 }

 /********************************************************************
  *
  * ltrim
  *
  * Syntax:
  *
  *	 text ltrim(text string, text set)
  *
  * Purpose:
  *
  *	 Returns string with initial characters removed up to the first
  *	 character not in set.
  *
  ********************************************************************/

 Datum
 ltrim(PG_FUNCTION_ARGS)
 {
 	text	   *string = PG_GETARG_TEXT_P(0);
 	text	   *set = PG_GETARG_TEXT_P(1);
 	text	   *ret;

 	ret = dotrim(VARDATA(string), VARSIZE(string) - VARHDRSZ,
 				 VARDATA(set), VARSIZE(set) - VARHDRSZ,
 				 true, false);

 	PG_RETURN_TEXT_P(ret);
 }

 /********************************************************************
  *
  * ltrim1 --- ltrim with set fixed as ' '
  *
  ********************************************************************/

 Datum
 ltrim1(PG_FUNCTION_ARGS)
 {
 	text	   *string = PG_GETARG_TEXT_P(0);
 	text	   *ret;

 	ret = dotrim(VARDATA(string), VARSIZE(string) - VARHDRSZ,
 				 " ", 1,
 				 true, false);

 	PG_RETURN_TEXT_P(ret);
 }

 /********************************************************************
  *
  * rtrim
  *
  * Syntax:
  *
  *	 text rtrim(text string, text set)
  *
  * Purpose:
  *
  *	 Returns string with final characters removed after the last
  *	 character not in set.
  *
  ********************************************************************/

 Datum
 rtrim(PG_FUNCTION_ARGS)
 {
 	text	   *string = PG_GETARG_TEXT_P(0);
 	text	   *set = PG_GETARG_TEXT_P(1);
 	text	   *ret;

 	ret = dotrim(VARDATA(string), VARSIZE(string) - VARHDRSZ,
 				 VARDATA(set), VARSIZE(set) - VARHDRSZ,
 				 false, true);

 	PG_RETURN_TEXT_P(ret);
 }

 /********************************************************************
  *
  * rtrim1 --- rtrim with set fixed as ' '
  *
  ********************************************************************/

 Datum
 rtrim1(PG_FUNCTION_ARGS)
 {
 	text	   *string = PG_GETARG_TEXT_P(0);
 	text	   *ret;

 	ret = dotrim(VARDATA(string), VARSIZE(string) - VARHDRSZ,
 				 " ", 1,
 				 false, true);

 	PG_RETURN_TEXT_P(ret);
 }


 /********************************************************************
  *
  * translate
  *
  * Syntax:
  *
  *	 text translate(text string, text from, text to)
  *
  * Purpose:
  *
  *	 Returns string after replacing all occurrences of characters in from
  *	 with the corresponding character in to.  If from is longer than to,
  *	 occurrences of the extra characters in from are deleted.
  *	 Improved by Edwin Ramirez <ramirez@doc.mssm.edu>.
  *
  ********************************************************************/

 Datum
 translate(PG_FUNCTION_ARGS)
 {
 	text	   *string = PG_GETARG_TEXT_P(0);
 	text	   *from = PG_GETARG_TEXT_P(1);
 	text	   *to = PG_GETARG_TEXT_P(2);
 	text	   *result;
 	char	   *from_ptr,
 			   *to_ptr;
 	char	   *source,
 			   *target;
 	int			m,
 				fromlen,
 				tolen,
 				retlen,
 				i;
 	int			worst_len;
 	int			len;
 	int			source_len;
 	int			from_index;

 	m = VARSIZE(string) - VARHDRSZ;
 	if (m <= 0)
 		PG_RETURN_TEXT_P(string);
 	source = VARDATA(string);

 	fromlen = VARSIZE(from) - VARHDRSZ;
 	from_ptr = VARDATA(from);
 	tolen = VARSIZE(to) - VARHDRSZ;
 	to_ptr = VARDATA(to);

 	/*
 	 * The worst-case expansion is to substitute a max-length character for
 	 * a single-byte character at each position of the string.
 	 */
 	worst_len = pg_database_encoding_max_length() * m;

 	/* check for integer overflow */
 	if (worst_len / pg_database_encoding_max_length() != m)
 		ereport(ERROR,
 				(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
 				 errmsg("requested length too large")));

 	result = (text *) palloc(worst_len + VARHDRSZ);
 	target = VARDATA(result);
 	retlen = 0;

 	while (m > 0)
 	{
 		source_len = pg_mblen(source);
 		from_index = 0;

 		for (i = 0; i < fromlen; i += len)
 		{
 			len = pg_mblen(&from_ptr[i]);
 			if (len == source_len &&
 				memcmp(source, &from_ptr[i], len) == 0)
 				break;

 			from_index++;
 		}
 		if (i < fromlen)
 		{
 			/* substitute */
 			char	   *p = to_ptr;

 			for (i = 0; i < from_index; i++)
 			{
 				p += pg_mblen(p);
 				if (p >= (to_ptr + tolen))
 					break;
 			}
 			if (p < (to_ptr + tolen))
 			{
 				len = pg_mblen(p);
 				memcpy(target, p, len);
 				target += len;
 				retlen += len;
 			}

 		}
 		else
 		{
 			/* no match, so copy */
 			memcpy(target, source, source_len);
 			target += source_len;
 			retlen += source_len;
 		}

 		source += source_len;
 		m -= source_len;
 	}

 	SET_VARSIZE(result, retlen + VARHDRSZ);

 	/*
 	 * The function result is probably much bigger than needed, if we're
 	 * using a multibyte encoding, but it's not worth reallocating it;
 	 * the result probably won't live long anyway.
 	 */

 	PG_RETURN_TEXT_P(result);
 }

 /********************************************************************
  *
  * ascii
  *
  * Syntax:
  *
  *	 int ascii(text string)
  *
  * Purpose:
  *
  *	 Returns the decimal representation of the first character from
  *	 string.
  *	 If the string is empty we return 0.
  *	 If the database encoding is UTF8, we return the Unicode codepoint.
  *	 If the database encoding is any other multi-byte encoding, we
  *	 return the value of the first byte if it is an ASCII character
  *	 (range 1 .. 127), or raise an error.
  *	 For all other encodings we return the value of the first byte,
  *	 (range 1..255).
  *
  ********************************************************************/

 Datum
 ascii(PG_FUNCTION_ARGS)
 {
 	text	   *string = PG_GETARG_TEXT_PP(0);
 	int			encoding = GetDatabaseEncoding();
 	unsigned char *data;

 	if (VARSIZE_ANY_EXHDR(string) <= 0)
 		PG_RETURN_INT32(0);

 	data = (unsigned char *) VARDATA_ANY(string);

 	if (encoding == PG_UTF8 && *data > 127)
 	{
 		/* return the code point for Unicode */

 		int			result = 0,
 					tbytes = 0,
 					i;

 		if (*data >= 0xF0)
 		{
 			result = *data & 0x07;
 			tbytes = 3;
 		}
 		else if (*data >= 0xE0)
 		{
 			result = *data & 0x0F;
 			tbytes = 2;
 		}
 		else
 		{
 			Assert(*data > 0xC0);
 			result = *data & 0x1f;
 			tbytes = 1;
 		}

 		Assert(tbytes > 0);

 		for (i = 1; i <= tbytes; i++)
 		{
 			Assert((data[i] & 0xC0) == 0x80);
 			result = (result << 6) + (data[i] & 0x3f);
 		}

 		PG_RETURN_INT32(result);
 	}
 	else
 	{
 		if (pg_encoding_max_length(encoding) > 1 && *data > 127)
 			ereport(ERROR,
 					(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
 					 errmsg("requested character too large")));


 		PG_RETURN_INT32((int32) *data);
 	}
 }

 /********************************************************************
  *
  * chr
  *
  * Syntax:
  *
  *	 text chr(int val)
  *
  * Purpose:
  *
  *	Returns the character having the binary equivalent to val.
  *
  * For UTF8 we treat the argumwent as a Unicode code point.
  * For other multi-byte encodings we raise an error for arguments
  * outside the strict ASCII range (1..127).
  *
  * It's important that we don't ever return a value that is not valid
  * in the database encoding, so that this doesn't become a way for
  * invalid data to enter the database.
  *
  ********************************************************************/

 Datum
 chr(PG_FUNCTION_ARGS)
 {
 	uint32		cvalue = PG_GETARG_UINT32(0);
 	text	   *result;
 	int			encoding = GetDatabaseEncoding();

 	if (encoding == PG_UTF8 && cvalue > 127)
 	{
 		/* for Unicode we treat the argument as a code point */
 		int			bytes;
 		char	   *wch;

 		/* We only allow valid Unicode code points */
 		if (cvalue > 0x001fffff)
 			ereport(ERROR,
 					(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
 					 errmsg("requested character too large for encoding: %d",
 							cvalue)));

 		if (cvalue > 0xffff)
 			bytes = 4;
 		else if (cvalue > 0x07ff)
 			bytes = 3;
 		else
 			bytes = 2;

 		result = (text *) palloc(VARHDRSZ + bytes);
 		SET_VARSIZE(result, VARHDRSZ + bytes);
 		wch = VARDATA(result);

 		if (bytes == 2)
 		{
 			wch[0] = 0xC0 | ((cvalue >> 6) & 0x1F);
 			wch[1] = 0x80 | (cvalue & 0x3F);;
 		}
 		else if (bytes == 3)
 		{
 			wch[0] = 0xE0 | ((cvalue >> 12) & 0x0F);
 			wch[1] = 0x80 | ((cvalue >> 6) & 0x3F);
 			wch[2] = 0x80 | (cvalue & 0x3F);
 		}
 		else
 		{
 			wch[0] = 0xF0 | ((cvalue >> 18) & 0x07);
 			wch[1] = 0x80 | ((cvalue >> 12) & 0x3F);
 			wch[2] = 0x80 | ((cvalue >> 6) & 0x3F);
 			wch[3] = 0x80 | (cvalue & 0x3F);
 		}

 	}

 	else
 	{
 		bool		is_mb;

 		/*
 		 * Error out on arguments that make no sense or that we can't validly
 		 * represent in the encoding.
 		 */

 		if (cvalue == 0)
 			ereport(ERROR,
 					(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
 					 errmsg("null character not permitted")));

 		is_mb = pg_encoding_max_length(encoding) > 1;

 		if ((is_mb && (cvalue > 127)) || (!is_mb && (cvalue > 255)))
 			ereport(ERROR,
 					(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
 					 errmsg("requested character too large for encoding: %d",
 							cvalue)));


 	result = (text *) palloc(VARHDRSZ + 1);
 	SET_VARSIZE(result, VARHDRSZ + 1);
 	*VARDATA(result) = (char) cvalue;
 	}

 	PG_RETURN_TEXT_P(result);
 }

 /********************************************************************
  *
  * repeat
  *
  * Syntax:
  *
  *	 text repeat(text string, int val)
  *
  * Purpose:
  *
  *	Repeat string by val.
  *
  ********************************************************************/

 Datum
 repeat(PG_FUNCTION_ARGS)
 {
 	text	   *string = PG_GETARG_TEXT_P(0);
 	int32		count = PG_GETARG_INT32(1);
 	text	   *result;
 	int			slen,
 				tlen;
 	int			i;
 	char	   *cp;

 	if (count < 0)
 		count = 0;

 	slen = (VARSIZE(string) - VARHDRSZ);
 	tlen = (VARHDRSZ + (count * slen));

 	/* Check for integer overflow */
 	if (slen != 0 && count != 0)
 	{
 		int			check = count * slen;
 		int			check2 = check + VARHDRSZ;

 		if ((check / slen) != count || check2 <= check)
 			ereport(ERROR,
 					(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
 					 errmsg("requested length too large")));
 	}

 	result = (text *) palloc(tlen);

 	SET_VARSIZE(result, tlen);
 	cp = VARDATA(result);
 	for (i = 0; i < count; i++)
 	{
 		memcpy(cp, VARDATA(string), slen);
 		cp += slen;
 	}

 	PG_RETURN_TEXT_P(result);
 }