src/backend/utils/adt/varlena.c - hawq - Git at Google

 /*-------------------------------------------------------------------------
  *
  * varlena.c
  *	  Functions for the variable-length built-in types.
  *
  * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  *
  * IDENTIFICATION
  *	  $PostgreSQL: pgsql/src/backend/utils/adt/varlena.c,v 1.154 2007/01/05 22:19:42 momjian Exp $
  *
  *-------------------------------------------------------------------------
  */
 #include "postgres.h"

 #include <ctype.h>

 #include "access/tupmacs.h"
 #include "access/tuptoaster.h"
 #include "catalog/pg_type.h"
 #include "libpq/md5.h"
 #include "libpq/pqformat.h"
 #include "miscadmin.h"
 #include "parser/scansup.h"
 #include "regex/regex.h"
 #include "utils/builtins.h"
 #include "utils/lsyscache.h"
 #include "utils/pg_locale.h"
 #include "utils/string_wrapper.h"
 #include "utils/memutils.h"

 typedef struct varlena unknown;

 typedef struct
 {
 	bool		use_wchar;		/* T if multibyte encoding */
 	char	   *str1;			/* use these if not use_wchar */
 	char	   *str2;			/* note: these point to original texts */
 	pg_wchar   *wstr1;			/* use these if use_wchar */
 	pg_wchar   *wstr2;			/* note: these are palloc'd */
 	int			len1;			/* string lengths in logical characters */
 	int			len2;
 } TextPositionState;

 #define DatumGetUnknownP(X)			((unknown *) PG_DETOAST_DATUM(X))
 #define DatumGetUnknownPCopy(X)		((unknown *) PG_DETOAST_DATUM_COPY(X))
 #define PG_GETARG_UNKNOWN_P(n)		DatumGetUnknownP(PG_GETARG_DATUM(n))
 #define PG_GETARG_UNKNOWN_P_COPY(n) DatumGetUnknownPCopy(PG_GETARG_DATUM(n))
 #define PG_RETURN_UNKNOWN_P(x)		PG_RETURN_POINTER(x)

 #define PG_TEXTARG_GET_STR(arg_) \
 	DatumGetCString(DirectFunctionCall1(textout, PG_GETARG_DATUM(arg_)))
 #define PG_TEXT_GET_STR(textp_) \
 	DatumGetCString(DirectFunctionCall1(textout, PointerGetDatum(textp_)))
 #define PG_STR_GET_TEXT(str_) \
 	DatumGetTextP(DirectFunctionCall1(textin, CStringGetDatum(str_)))

 /*
  * Max considered sub-string size is set to MaxAllocSize - 4MB).
  * The 4MB is saved aside for memory allocation overhead such
  * as allocation set headers.
  */
 #define MAX_STRING_BYTES	((Size) (MaxAllocSize - 0x400000))

 static int	text_position_ptr_len(char* p1, int len1, char *p2, int len2);
 static void text_position_setup_ptr_len(char* p1, int len1, char* p2, int len2, TextPositionState *state);

 static int	text_position_next(int start_pos, TextPositionState *state);
 static void text_position_cleanup(TextPositionState *state);
 static text *text_substring(Datum str,
 			   int32 start,
 			   int32 length,
 			   bool length_not_specified);
 static void appendStringInfoText(StringInfo str, const text *t);


 /*****************************************************************************
  *	 CONVERSION ROUTINES EXPORTED FOR USE BY C CODE							 *
  *****************************************************************************/

 /*
  * cstring_to_text
  *
  * Create a text value from a null-terminated C string.
  *
  * The new text value is freshly palloc'd with a full-size VARHDR.
  */
 text *
 cstring_to_text(const char *s)
 {
 	return cstring_to_text_with_len(s, strlen(s));
 }

 /*
  * cstring_to_text_with_len
  *
  * Same as cstring_to_text except the caller specifies the string length;
  * the string need not be null_terminated.
  */
 text *
 cstring_to_text_with_len(const char *s, int len)
 {
 	text	   *result = (text *) palloc(len + VARHDRSZ);

 	SET_VARSIZE(result, len + VARHDRSZ);
 	memcpy(VARDATA(result), s, len);

 	return result;
 }

 /*
  * text_to_cstring
  *
  * Create a palloc'd, null-terminated C string from a text value.
  *
  * We support being passed a compressed or toasted text value.
  * This is a bit bogus since such values shouldn't really be referred to as
  * "text *", but it seems useful for robustness.  If we didn't handle that
  * case here, we'd need another routine that did, anyway.
  */
 char *
 text_to_cstring(const text *t)
 {
 	/* must cast away the const, unfortunately */
 	text	   *tunpacked = pg_detoast_datum_packed((struct varlena *) t);
 	int			len = VARSIZE_ANY_EXHDR(tunpacked);
 	char	   *result;

 	result = (char *) palloc(len + 1);
 	memcpy(result, VARDATA_ANY(tunpacked), len);
 	result[len] = '\0';

 	if (tunpacked != t)
 		pfree(tunpacked);

 	return result;
 }

 /*
  * text_to_cstring_buffer
  *
  * Copy a text value into a caller-supplied buffer of size dst_len.
  *
  * The text string is truncated if necessary to fit.  The result is
  * guaranteed null-terminated (unless dst_len == 0).
  *
  * We support being passed a compressed or toasted text value.
  * This is a bit bogus since such values shouldn't really be referred to as
  * "text *", but it seems useful for robustness.  If we didn't handle that
  * case here, we'd need another routine that did, anyway.
  */
 void
 text_to_cstring_buffer(const text *src, char *dst, size_t dst_len)
 {
 	/* must cast away the const, unfortunately */
 	text	   *srcunpacked = pg_detoast_datum_packed((struct varlena *) src);
 	size_t		src_len = VARSIZE_ANY_EXHDR(srcunpacked);

 	if (dst_len > 0)
 	{
 		dst_len--;
 		if (dst_len >= src_len)
 			dst_len = src_len;
 		else	/* ensure truncation is encoding-safe */
 			dst_len = pg_mbcliplen(VARDATA_ANY(srcunpacked), src_len, dst_len);
 		memcpy(dst, VARDATA_ANY(srcunpacked), dst_len);
 		dst[dst_len] = '\0';
 	}

 	if (srcunpacked != src)
 		pfree(srcunpacked);
 }


 /*****************************************************************************
  *	 USER I/O ROUTINES														 *
  *****************************************************************************/


 #define VAL(CH)			((CH) - '0')
 #define DIG(VAL)		((VAL) + '0')

 /*
  *		byteain			- converts from printable representation of byte array
  *
  *		Non-printable characters must be passed as '\nnn' (octal) and are
  *		converted to internal form.  '\' must be passed as '\\'.
  *		ereport(ERROR, ...) if bad form.
  *
  *		BUGS:
  *				The input is scanned twice.
  *				The error checking of input is minimal.
  */
 Datum
 byteain(PG_FUNCTION_ARGS)
 {
 	char	   *inputText = PG_GETARG_CSTRING(0);
 	char	   *tp;
 	char	   *rp;
 	int			byte;
 	bytea	   *result;

 	for (byte = 0, tp = inputText; *tp != '\0'; byte++)
 	{
 		if (tp[0] != '\\')
 			tp++;
 		else if ((tp[0] == '\\') &&
 				 (tp[1] >= '0' && tp[1] <= '3') &&
 				 (tp[2] >= '0' && tp[2] <= '7') &&
 				 (tp[3] >= '0' && tp[3] <= '7'))
 			tp += 4;
 		else if ((tp[0] == '\\') &&
 				 (tp[1] == '\\'))
 			tp += 2;
 		else
 		{
 			/*
 			 * one backslash, not followed by 0 or ### valid octal
 			 */
 			ereport(ERROR,
 					(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
 					 errmsg("invalid input syntax for type bytea"),
 							 errOmitLocation(true)));
 		}
 	}

 	byte += VARHDRSZ;
 	result = (bytea *) palloc(byte);
 	SET_VARSIZE(result, byte);

 	tp = inputText;
 	rp = VARDATA(result);
 	while (*tp != '\0')
 	{
 		if (tp[0] != '\\')
 			*rp++ = *tp++;
 		else if ((tp[0] == '\\') &&
 				 (tp[1] >= '0' && tp[1] <= '3') &&
 				 (tp[2] >= '0' && tp[2] <= '7') &&
 				 (tp[3] >= '0' && tp[3] <= '7'))
 		{
 			byte = VAL(tp[1]);
 			byte <<= 3;
 			byte += VAL(tp[2]);
 			byte <<= 3;
 			*rp++ = byte + VAL(tp[3]);
 			tp += 4;
 		}
 		else if ((tp[0] == '\\') &&
 				 (tp[1] == '\\'))
 		{
 			*rp++ = '\\';
 			tp += 2;
 		}
 		else
 		{
 			/*
 			 * We should never get here. The first pass should not allow it.
 			 */
 			ereport(ERROR,
 					(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
 					 errmsg("invalid input syntax for type bytea"),
 							 errOmitLocation(true)));
 		}
 	}

 	PG_RETURN_BYTEA_P(result);
 }

 /*
  *		byteaout		- converts to printable representation of byte array
  *
  *		Non-printable characters are inserted as '\nnn' (octal) and '\' as
  *		'\\'.
  *
  *		NULL vlena should be an error--returning string with NULL for now.
  */
 Datum
 byteaout(PG_FUNCTION_ARGS)
 {
 	bytea	   *vlena = PG_GETARG_BYTEA_P(0);
 	char	   *result;
 	char	   *vp;
 	char	   *rp;
 	int			val;			/* holds unprintable chars */
 	int			i;
 	int			len;

 	len = 1;					/* empty string has 1 char */
 	vp = VARDATA_ANY(vlena);
 	for (i = VARSIZE_ANY_EXHDR(vlena); i != 0; i--, vp++)
 	{
 		if (*vp == '\\')
 			len += 2;
 		else if ((unsigned char) *vp < 0x20 || (unsigned char) *vp > 0x7e)
 			len += 4;
 		else
 			len++;
 	}
 	rp = result = (char *) palloc(len);
 	vp = VARDATA_ANY(vlena);
 	for (i = VARSIZE_ANY_EXHDR(vlena); i != 0; i--, vp++)
 	{
 		if (*vp == '\\')
 		{
 			*rp++ = '\\';
 			*rp++ = '\\';
 		}
 		else if ((unsigned char) *vp < 0x20 || (unsigned char) *vp > 0x7e)
 		{
 			val = *vp;
 			rp[0] = '\\';
 			rp[3] = DIG(val & 07);
 			val >>= 3;
 			rp[2] = DIG(val & 07);
 			val >>= 3;
 			rp[1] = DIG(val & 03);
 			rp += 4;
 		}
 		else
 			*rp++ = *vp;
 	}
 	*rp = '\0';
 	PG_RETURN_CSTRING(result);
 }

 /*
  *		bytearecv			- converts external binary format to bytea
  */
 Datum
 bytearecv(PG_FUNCTION_ARGS)
 {
 	StringInfo	buf = (StringInfo) PG_GETARG_POINTER(0);
 	bytea	   *result;
 	int			nbytes;

 	nbytes = buf->len - buf->cursor;
 	result = (bytea *) palloc(nbytes + VARHDRSZ);
 	SET_VARSIZE(result, nbytes + VARHDRSZ);
 	pq_copymsgbytes(buf, VARDATA(result), nbytes);
 	PG_RETURN_BYTEA_P(result);
 }

 /*
  *		byteasend			- converts bytea to binary format
  *
  * This is a special case: just copy the input...
  */
 Datum
 byteasend(PG_FUNCTION_ARGS)
 {
 	bytea	   *vlena = PG_GETARG_BYTEA_P_COPY(0);

 	PG_RETURN_BYTEA_P(vlena);
 }


 /*
  *		textin			- converts "..." to internal representation
  */
 Datum
 textin(PG_FUNCTION_ARGS)
 {
 	char	   *inputText = PG_GETARG_CSTRING(0);
 	text	   *result;
 	int			len;

 	len = strlen(inputText);
 	result = (text *) palloc(len + VARHDRSZ);
 	SET_VARSIZE(result, len + VARHDRSZ);

 	memcpy(VARDATA(result), inputText, len);

 	PG_RETURN_TEXT_P(result);
 }

 /*
  *		textout			- converts internal representation to "..."
  */
 Datum
 textout(PG_FUNCTION_ARGS)
 {
 	char *result;

 	Datum d = PG_GETARG_DATUM(0);
 	char *p; void *tofree; int len;
 	varattrib_untoast_ptr_len(d, &p, &len, &tofree);

 	result = (char *) palloc(len + 1);
 	memcpy(result, p, len);
 	result[len] = '\0';

 	if(tofree != NULL)
 		pfree(tofree);

 	PG_RETURN_CSTRING(result);
 }

 /*
  *		textrecv			- converts external binary format to text
  */
 Datum
 textrecv(PG_FUNCTION_ARGS)
 {
 	StringInfo	buf = (StringInfo) PG_GETARG_POINTER(0);
 	text	   *result;
 	char	   *str;
 	int			nbytes;

 	str = pq_getmsgtext(buf, buf->len - buf->cursor, &nbytes);

 	result = (text *) palloc(nbytes + VARHDRSZ);
 	SET_VARSIZE(result, nbytes + VARHDRSZ);
 	memcpy(VARDATA(result), str, nbytes);
 	pfree(str);
 	PG_RETURN_TEXT_P(result);
 }

 /*
  *		textsend			- converts text to binary format
  */
 Datum
 textsend(PG_FUNCTION_ARGS)
 {
 	text	   *t = PG_GETARG_TEXT_PP(0);
 	StringInfoData buf;

 	pq_begintypsend(&buf);
 	pq_sendtext(&buf, VARDATA_ANY(t), VARSIZE_ANY_EXHDR(t));
 	PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
 }


 /*
  *		unknownin			- converts "..." to internal representation
  */
 Datum
 unknownin(PG_FUNCTION_ARGS)
 {
 	char	   *str = PG_GETARG_CSTRING(0);

 	/* representation is same as cstring */
 	PG_RETURN_CSTRING(pstrdup(str));
 }

 /*
  *		unknownout			- converts internal representation to "..."
  */
 Datum
 unknownout(PG_FUNCTION_ARGS)
 {
 	/* representation is same as cstring */
 	char	   *str = PG_GETARG_CSTRING(0);

 	PG_RETURN_CSTRING(pstrdup(str));
 }

 /*
  *		unknownrecv			- converts external binary format to unknown
  */
 Datum
 unknownrecv(PG_FUNCTION_ARGS)
 {
 	StringInfo	buf = (StringInfo) PG_GETARG_POINTER(0);
 	char	   *str;
 	int			nbytes;

 	str = pq_getmsgtext(buf, buf->len - buf->cursor, &nbytes);
 	/* representation is same as cstring */
 	PG_RETURN_CSTRING(str);
 }

 /*
  *		unknownsend			- converts unknown to binary format
  */
 Datum
 unknownsend(PG_FUNCTION_ARGS)
 {
 	/* representation is same as cstring */
 	char	   *str = PG_GETARG_CSTRING(0);
 	StringInfoData buf;

 	pq_begintypsend(&buf);
 	pq_sendtext(&buf, str, strlen(str));
 	PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
 }


 /* ========== PUBLIC ROUTINES ========== */

 /*
  * text_length -
  *	Does the real work for textlen()
  *
  *	This is broken out so it can be called directly by other string processing
  *	functions.	Note that the argument is passed as a Datum, to indicate that
  *	it may still be in compressed form.  We can avoid decompressing it at all
  *	in some cases.
  */
 static inline int32
 text_length(Datum str)
 {
 	/* fastpath when max encoding length is one */
 	if (pg_database_encoding_max_length() == 1)
 		PG_RETURN_INT32(toast_raw_datum_size(str) - VARHDRSZ);
 	else
 	{
 		int32 ret;
 		char *p; void *tofree; int len;
 		varattrib_untoast_ptr_len(str, &p, &len, &tofree);

 		ret = pg_mbstrlen_with_len(p, len);
 		if(tofree)
 			pfree(tofree);

 		PG_RETURN_INT32(ret);
 	}
 }

 /*
  * textlen -
  *	  returns the logical length of a text*
  *	   (which is less than the VARSIZE of the text*)
  */
 Datum
 textlen(PG_FUNCTION_ARGS)
 {
 	Datum		str = PG_GETARG_DATUM(0);

 	/* try to avoid decompressing argument */
 	PG_RETURN_INT32(text_length(str));
 }

 /*
  * textoctetlen -
  *	  returns the physical length of a text*
  *	   (which is less than the VARSIZE of the text*)
  */
 Datum
 textoctetlen(PG_FUNCTION_ARGS)
 {
 	Datum		str = PG_GETARG_DATUM(0);

 	/* We need not detoast the input at all */
 	PG_RETURN_INT32(toast_raw_datum_size(str) - VARHDRSZ);
 }

 /*
  * Generaic concatenation of two varlena.  The varlena may comes in differnt
  * flavors, but they are really the same.  Duplicate the body of the function makes
  * no sense.
  */
 static inline Datum generic_varlena_cat(PG_FUNCTION_ARGS)
 {
 	Datum d0 = PG_GETARG_DATUM(0);
 	char *p0; void *tofree0; int len0;

 	Datum d1 = PG_GETARG_DATUM(1);
 	char *p1; void *tofree1; int len1;

 	int len;
 	text *result;
 	char *ptr;

 	varattrib_untoast_ptr_len(d0, &p0, &len0, &tofree0);
 	varattrib_untoast_ptr_len(d1, &p1, &len1, &tofree1);

 	len = len0 + len1 + VARHDRSZ;
 	result = (text *) palloc(len);

 	/* Set size of result string... */
 	SET_VARSIZE(result, len);

 	/* Fill data field of result string... */
 	ptr = VARDATA(result);

 	if (len0 > 0)
 		memcpy(ptr, p0, len0);
 	if (len1 > 0)
 		memcpy(ptr + len0, p1, len1);

 	PG_RETURN_TEXT_P(result);
 }

 /*
  * textcat -
  *	  takes two text* and returns a text* that is the concatenation of
  *	  the two.
  *
  * Rewritten by Sapa, sapa@hq.icb.chel.su. 8-Jul-96.
  * Updated by Thomas, Thomas.Lockhart@jpl.nasa.gov 1997-07-10.
  * Allocate space for output in all cases.
  * XXX - thomas 1997-07-10
  */
 Datum
 textcat(PG_FUNCTION_ARGS)
 {
 	return generic_varlena_cat(fcinfo);
 }

 /*
  * charlen_to_bytelen()
  *	Compute the number of bytes occupied by n characters starting at *p
  *
  * It is caller's responsibility that there actually are n characters;
  * the string need not be null-terminated.
  */
 static int
 charlen_to_bytelen(const char *p, int n)
 {
 	if (pg_database_encoding_max_length() == 1)
 	{
 		/* Optimization for single-byte encodings */
 		return n;
 	}
 	else
 	{
 		const char *s;

 		for (s = p; n > 0; n--)
 			s += pg_mblen(s);

 		return s - p;
 	}
 }

 /* find_memory_limited_substring()
  *	Computes the sub-string length in number of characters and number
  *	of bytes where the sub-string consumes up to "memoryLimit" amount of memory.
  *
  *	Parameters:
  *		strStart: starting pointer in the string
  * 		byteLen: number of bytes in the string, starting from strStart
  * 		memoryLimit: max string size in terms of bytes
  *
  * 	Out parameters:
  *		subStringByteLen: length of chosen sub-string in bytes
  *		subStringCharLen: length of chosen sub-string in character count
  *
  * It is caller's responsibility that there actually are byteLen bytes
  * starting from strStart; the string needs not be null-terminated.
  */
 static void
 find_memory_limited_substring(const char *strStart, int byteLen, int memoryLimit, int *subStringByteLen, int *subStringCharLen)
 {
 	AssertArg(byteLen > memoryLimit);
 	AssertArg(NULL != strStart);
 	AssertArg(NULL != subStringCharLen);

 	if (pg_database_encoding_max_length() == 1)
 	{
 		/* Optimization for single-byte encodings */
 		*subStringByteLen = byteLen < memoryLimit ? byteLen : memoryLimit;
 		*subStringCharLen = *subStringByteLen;

 		return;
 	}
 	else
 	{
 		const char *strCurPointer = strStart;;

 		int consumedBytes = 0;
 		int consumedChars = 0;

 		while (consumedBytes <= byteLen)
 		{
 			int curCharBytes = pg_mblen(strCurPointer);
 			strCurPointer += curCharBytes;
 			consumedChars++;
 			consumedBytes += curCharBytes;

 			if (consumedBytes > memoryLimit)
 			{
 				*subStringByteLen = consumedBytes - curCharBytes;
 				*subStringCharLen = consumedChars - 1;

 				Insist((*subStringByteLen > 0) && (*subStringCharLen > 0));

 				return;
 			}
 		}
 	}
 }


 /*
  * text_substr()
  * Return a substring starting at the specified position.
  * - thomas 1997-12-31
  *
  * Input:
  *	- string
  *	- starting position (is one-based)
  *	- string length
  *
  * If the starting position is zero or less, then return from the start of the string
  *	adjusting the length to be consistent with the "negative start" per SQL92.
  * If the length is less than zero, return the remaining string.
  *
  * Added multibyte support.
  * - Tatsuo Ishii 1998-4-21
  * Changed behavior if starting position is less than one to conform to SQL92 behavior.
  * Formerly returned the entire string; now returns a portion.
  * - Thomas Lockhart 1998-12-10
  * Now uses faster TOAST-slicing interface
  * - John Gray 2002-02-22
  * Remove "#ifdef MULTIBYTE" and test for encoding_max_length instead. Change
  * behaviors conflicting with SQL92 to meet SQL92 (if E = S + L < S throw
  * error; if E < 1, return '', not entire string). Fixed MB related bug when
  * S > LC and < LC + 4 sometimes garbage characters are returned.
  * - Joe Conway 2002-08-10
  */
 Datum
 text_substr(PG_FUNCTION_ARGS)
 {
 	PG_RETURN_TEXT_P(text_substring(PG_GETARG_DATUM(0),
 									PG_GETARG_INT32(1),
 									PG_GETARG_INT32(2),
 									false));
 }

 /*
  * text_substr_no_len -
  *	  Wrapper to avoid opr_sanity failure due to
  *	  one function accepting a different number of args.
  */
 Datum
 text_substr_no_len(PG_FUNCTION_ARGS)
 {
 	PG_RETURN_TEXT_P(text_substring(PG_GETARG_DATUM(0),
 									PG_GETARG_INT32(1),
 									-1, true));
 }

 /*
  * text_substring -
  *	Does the real work for text_substr() and text_substr_no_len()
  *
  *	This is broken out so it can be called directly by other string processing
  *	functions.	Note that the argument is passed as a Datum, to indicate that
  *	it may still be in compressed/toasted form.  We can avoid detoasting all
  *	of it in some cases.
  *
  *	The result is always a freshly palloc'd datum.
  */
 static text *
 text_substring(Datum str, int32 start, int32 length, bool length_not_specified)
 {
 	int32		eml = pg_database_encoding_max_length();
 	int32		S = start;		/* start position */
 	int32		S1;				/* adjusted start position */
 	int32		L1;				/* adjusted substring length */

 	/* life is easy if the encoding max length is 1 */
 	if (eml == 1)
 	{
 		S1 = Max(S, 1);

 		if (length_not_specified)		/* special case - get length to end of
 										 * string */
 			L1 = -1;
 		else
 		{
 			/* end position */
 			int			E = S + length;

 			/*
 			 * A negative value for L is the only way for the end position to
 			 * be before the start. SQL99 says to throw an error.
 			 */
 			if (E < S)
 				ereport(ERROR,
 						(errcode(ERRCODE_SUBSTRING_ERROR),
 						 errmsg("negative substring length not allowed"),
 								 errOmitLocation(true)));

 			/*
 			 * A zero or negative value for the end position can happen if the
 			 * start was negative or one. SQL99 says to return a zero-length
 			 * string.
 			 */
 			if (E < 1)
 				return cstring_to_text("");

 			L1 = E - S1;
 		}

 		/*
 		 * If the start position is past the end of the string, SQL99 says to
 		 * return a zero-length string -- PG_GETARG_TEXT_P_SLICE() will do
 		 * that for us. Convert to zero-based starting position
 		 */
 		return DatumGetTextPSlice(str, S1 - 1, L1);
 	}
 	else if (eml > 1)
 	{
 		/*
 		 * When encoding max length is > 1, we can't get LC without
 		 * detoasting, so we'll grab a conservatively large slice now and go
 		 * back later to do the right thing
 		 */
 		int32		slice_start;
 		int32		slice_size;
 		int32		slice_strlen;
 		text	   *slice;
 		int32		E1;
 		int32		i;
 		char	   *p;
 		char	   *s;
 		text	   *ret;

 		/*
 		 * if S is past the end of the string, the tuple toaster will return a
 		 * zero-length string to us
 		 */
 		S1 = Max(S, 1);

 		/*
 		 * We need to start at position zero because there is no way to know
 		 * in advance which byte offset corresponds to the supplied start
 		 * position.
 		 */
 		slice_start = 0;

 		if (length_not_specified)		/* special case - get length to end of
 										 * string */
 			slice_size = L1 = -1;
 		else
 		{
 			int			E = S + length;

 			/*
 			 * A negative value for L is the only way for the end position to
 			 * be before the start. SQL99 says to throw an error.
 			 */
 			if (E < S)
 				ereport(ERROR,
 						(errcode(ERRCODE_SUBSTRING_ERROR),
 						 errmsg("negative substring length not allowed"),
 								 errOmitLocation(true)));

 			/*
 			 * A zero or negative value for the end position can happen if the
 			 * start was negative or one. SQL99 says to return a zero-length
 			 * string.
 			 */
 			if (E < 1)
 				return cstring_to_text("");

 			/*
 			 * if E is past the end of the string, the tuple toaster will
 			 * truncate the length for us
 			 */
 			L1 = E - S1;

 			/*
 			 * Total slice size in bytes can't be any longer than the start
 			 * position plus substring length times the encoding max length.
 			 */
 			slice_size = (S1 + L1) * eml;
 		}

 		/*
 		 * If we're working with an untoasted source, no need to do an extra
 		 * copying step.
 		 */
 		if (VARATT_IS_COMPRESSED(DatumGetPointer(str)) ||
 			VARATT_IS_EXTERNAL(DatumGetPointer(str)))
 			slice = DatumGetTextPSlice(str, slice_start, slice_size);
 		else
 			slice = (text *) DatumGetPointer(str);

 		/* see if we got back an empty string */
 		if (VARSIZE_ANY_EXHDR(slice) == 0)
 		{
 			if (slice != (text *) DatumGetPointer(str))
 				pfree(slice);
 			return cstring_to_text("");
 		}

 		/* Now we can get the actual length of the slice in MB characters */
 		slice_strlen = pg_mbstrlen_with_len(VARDATA_ANY(slice),
 											VARSIZE_ANY_EXHDR(slice));

 		/*
 		 * Check that the start position wasn't > slice_strlen. If so, SQL99
 		 * says to return a zero-length string.
 		 */
 		if (S1 > slice_strlen)
 		{
 			if (slice != (text *) DatumGetPointer(str))
 				pfree(slice);
 			return cstring_to_text("");
 		}

 		/*
 		 * Adjust L1 and E1 now that we know the slice string length. Again
 		 * remember that S1 is one based, and slice_start is zero based.
 		 */
 		if (L1 > -1)
 			E1 = Min(S1 + L1, slice_start + 1 + slice_strlen);
 		else
 			E1 = slice_start + 1 + slice_strlen;

 		/*
 		 * Find the start position in the slice; remember S1 is not zero based
 		 */
 		p = VARDATA_ANY(slice);
 		for (i = 0; i < S1 - 1; i++)
 			p += pg_mblen(p);

 		/* hang onto a pointer to our start position */
 		s = p;

 		/*
 		 * Count the actual bytes used by the substring of the requested
 		 * length.
 		 */
 		for (i = S1; i < E1; i++)
 			p += pg_mblen(p);

 		ret = (text *) palloc(VARHDRSZ + (p - s));
 		SET_VARSIZE(ret, VARHDRSZ + (p - s));
 		memcpy(VARDATA(ret), s, (p - s));

 		if (slice != (text *) DatumGetPointer(str))
 			pfree(slice);

 		return ret;
 	}
 	else
 		elog(ERROR, "invalid backend encoding: encoding max length < 1");

 	/* not reached: suppress compiler warning */
 	return NULL;
 }

 /*
  * textpos -
  *	  Return the position of the specified substring.
  *	  Implements the SQL92 POSITION() function.
  *	  Ref: A Guide To The SQL Standard, Date & Darwen, 1997
  * - thomas 1997-07-27
  */
 Datum
 textpos(PG_FUNCTION_ARGS)
 {
 	Datum d0 = PG_GETARG_DATUM(0);
 	char *p0; void *tofree0; int len0;

 	Datum d1 = PG_GETARG_DATUM(1);
 	char *p1; void *tofree1; int len1;

 	int32 pos;

 	varattrib_untoast_ptr_len(d0, &p0, &len0, &tofree0);
 	varattrib_untoast_ptr_len(d1, &p1, &len1, &tofree1);

 	pos = text_position_ptr_len(p0, len0, p1, len1);

 	if(tofree0)
 		pfree(tofree0);
 	if(tofree1)
 		pfree(tofree1);

 	PG_RETURN_INT32(pos);
 }

 /*
  * text_position -
  *	Does the real work for textpos()
  *
  * Inputs:
  *		p1, len1 - string to be searched
  *		p2, len2 - pattern to match within t1
  * Result:
  *		Character index of the first matched char, starting from 1,
  *		or 0 if no match.
  *
  *	This is broken out so it can be called directly by other string processing
  *	functions.
  */
 static int
 text_position_ptr_len(char* p1, int len1, char* p2, int len2)
 {
 	TextPositionState state =
 		{
 		0, /* use_wchar */
 		NULL, /* str1 */
 		NULL, /* str2 */
 		NULL, /* wstr1 */
 		NULL, /* wstr2 */
 		0, /* len1 */
 		0, /* len2 */
 		};


 	int result;

 	text_position_setup_ptr_len(p1, len1, p2, len2, &state);

 	result = text_position_next(1, &state);
 	text_position_cleanup(&state);
 	return result;
 }

 /*
  * text_position_setup, text_position_next, text_position_cleanup -
  *	Component steps of text_position()
  *
  * These are broken out so that a string can be efficiently searched for
  * multiple occurrences of the same pattern.  text_position_next may be
  * called multiple times with increasing values of start_pos, which is
  * the 1-based character position to start the search from.  The "state"
  * variable is normally just a local variable in the caller.
  */

 /* Set up text postion, using pointer and len. */
 static void
 text_position_setup_ptr_len(char* p1, int len1, char* p2, int len2, TextPositionState *state)
 {
 	if (pg_database_encoding_max_length() == 1)
 	{
 		/* simple case - single byte encoding */
 		state->use_wchar = false;
 		state->str1 = p1;
 		state->str2 = p2;
 		state->len1 = len1;
 		state->len2 = len2;
 	}
 	else
 	{
 		/* not as simple - multibyte encoding */
 		pg_wchar   *wp1,
 				   *wp2;

 		wp1 = (pg_wchar *) palloc((len1 + 1) * sizeof(pg_wchar));
 		len1 = pg_mb2wchar_with_len(p1, wp1, len1);
 		wp2 = (pg_wchar *) palloc((len2 + 1) * sizeof(pg_wchar));
 		len2 = pg_mb2wchar_with_len(p2, wp2, len2);

 		state->use_wchar = true;
 		state->wstr1 = wp1;
 		state->wstr2 = wp2;
 		state->len1 = len1;
 		state->len2 = len2;
 	}
 }
 static int
 text_position_next(int start_pos, TextPositionState *state)
 {
 	int			pos = 0,
 				p,
 				px;

 	Assert(start_pos > 0);		/* else caller error */

 	if (state->len2 <= 0)
 		return start_pos;		/* result for empty pattern */

 	if (!state->use_wchar)
 	{
 		/* simple case - single byte encoding */
 		char	   *p1 = state->str1;
 		char	   *p2 = state->str2;

 		/* no use in searching str past point where search_str will fit */
 		px = (state->len1 - state->len2);

 		p1 += start_pos - 1;

 		for (p = start_pos - 1; p <= px; p++)
 		{
 			if ((*p1 == *p2) && (strncmp(p1, p2, state->len2) == 0))
 			{
 				pos = p + 1;
 				break;
 			}
 			p1++;
 		}
 	}
 	else
 	{
 		/* not as simple - multibyte encoding */
 		pg_wchar   *p1 = state->wstr1;
 		pg_wchar   *p2 = state->wstr2;

 		/* no use in searching str past point where search_str will fit */
 		px = (state->len1 - state->len2);

 		p1 += start_pos - 1;

 		for (p = start_pos - 1; p <= px; p++)
 		{
 			if ((*p1 == *p2) && (pg_wchar_strncmp(p1, p2, state->len2) == 0))
 			{
 				pos = p + 1;
 				break;
 			}
 			p1++;
 		}
 	}

 	return pos;
 }

 static void
 text_position_cleanup(TextPositionState *state)
 {
 	if (state->use_wchar)
 	{
 		pfree(state->wstr1);
 		pfree(state->wstr2);
 	}
 }

 /* varstr_cmp()
  * Comparison function for text strings with given lengths.
  * Includes locale support, but must copy strings to temporary memory
  *	to allow null-termination for inputs to strcoll().
  * Returns -1, 0 or 1
  */
 int
 varstr_cmp(char *arg1, int len1, char *arg2, int len2)
 {
 	int			result;

 	/*
 	 * Unfortunately, there is no strncoll(), so in the non-C locale case we
 	 * have to do some memory copying.	This turns out to be significantly
 	 * slower, so we optimize the case where LC_COLLATE is C.  We also try to
 	 * optimize relatively-short strings by avoiding palloc/pfree overhead.
 	 */
 	if (lc_collate_is_c())
 	{
 		result = strncmp(arg1, arg2, Min(len1, len2));
 		if ((result == 0) && (len1 != len2))
 			result = (len1 < len2) ? -1 : 1;
 	}
 	else
 	{
 #define STACKBUFLEN		1024

 		char		a1buf[STACKBUFLEN];
 		char		a2buf[STACKBUFLEN];
 		char	   *a1p,
 				   *a2p;

 #ifdef WIN32
 		/* Win32 does not have UTF-8, so we need to map to UTF-16 */
 		if (GetDatabaseEncoding() == PG_UTF8)
 		{
 			int			a1len;
 			int			a2len;
 			int			r;

 			if (len1 >= STACKBUFLEN / 2)
 			{
 				a1len = len1 * 2 + 2;
 				a1p = palloc(a1len);
 			}
 			else
 			{
 				a1len = STACKBUFLEN;
 				a1p = a1buf;
 			}
 			if (len2 >= STACKBUFLEN / 2)
 			{
 				a2len = len2 * 2 + 2;
 				a2p = palloc(a2len);
 			}
 			else
 			{
 				a2len = STACKBUFLEN;
 				a2p = a2buf;
 			}

 			/* stupid Microsloth API does not work for zero-length input */
 			if (len1 == 0)
 				r = 0;
 			else
 			{
 				r = MultiByteToWideChar(CP_UTF8, 0, arg1, len1,
 										(LPWSTR) a1p, a1len / 2);
 				if (!r)
 					ereport(ERROR,
 					 (errmsg("could not convert string to UTF-16: error %lu",
 							 GetLastError())));
 			}
 			((LPWSTR) a1p)[r] = 0;

 			if (len2 == 0)
 				r = 0;
 			else
 			{
 				r = MultiByteToWideChar(CP_UTF8, 0, arg2, len2,
 										(LPWSTR) a2p, a2len / 2);
 				if (!r)
 					ereport(ERROR,
 					 (errmsg("could not convert string to UTF-16: error %lu",
 							 GetLastError())));
 			}
 			((LPWSTR) a2p)[r] = 0;

 			errno = 0;
 			result = wcscoll((LPWSTR) a1p, (LPWSTR) a2p);
 			if (result == 2147483647)	/* _NLSCMPERROR; missing from mingw
 										 * headers */
 				ereport(ERROR,
 						(errmsg("could not compare Unicode strings: %m")));

 			/*
 			 * In some locales wcscoll() can claim that nonidentical strings
 			 * are equal.  Believing that would be bad news for a number of
 			 * reasons, so we follow Perl's lead and sort "equal" strings
 			 * according to strcmp (on the UTF-8 representation).
 			 */
 			if (result == 0)
 			{
 				result = strncmp(arg1, arg2, Min(len1, len2));
 				if ((result == 0) && (len1 != len2))
 					result = (len1 < len2) ? -1 : 1;
 			}

 			if (a1p != a1buf)
 				pfree(a1p);
 			if (a2p != a2buf)
 				pfree(a2p);

 			return result;
 		}
 #endif   /* WIN32 */

 		if (len1 >= STACKBUFLEN)
 			a1p = (char *) palloc(len1 + 1);
 		else
 			a1p = a1buf;
 		if (len2 >= STACKBUFLEN)
 			a2p = (char *) palloc(len2 + 1);
 		else
 			a2p = a2buf;

 		memcpy(a1p, arg1, len1);
 		a1p[len1] = '\0';
 		memcpy(a2p, arg2, len2);
 		a2p[len2] = '\0';

 		result = gp_strcoll(a1p, a2p);

 		/*
 		 * In some locales strcoll() can claim that nonidentical strings are
 		 * equal.  Believing that would be bad news for a number of reasons,
 		 * so we follow Perl's lead and sort "equal" strings according to
 		 * strcmp().
 		 */
 		if (result == 0)
 			result = strcmp(a1p, a2p);

 		if (a1p != a1buf)
 			pfree(a1p);
 		if (a2p != a2buf)
 			pfree(a2p);
 	}

 	return result;
 }

 static inline int
 text_cmp_datum(Datum d0, Datum d1)
 {
 	char *p0; void *tofree0; int len0;
 	char *p1; void *tofree1; int len1;
 	int result;

 	varattrib_untoast_ptr_len(d0, &p0, &len0, &tofree0);
 	varattrib_untoast_ptr_len(d1, &p1, &len1, &tofree1);

 	result = varstr_cmp(p0, len0, p1, len1);

 	if(tofree0)
 		pfree(tofree0);
 	if(tofree1)
 		pfree(tofree1);

 	return result;
 }

 static inline Datum generic_varlena_eq(PG_FUNCTION_ARGS)
 {
 	bool		result;

 	Datum d0 = PG_GETARG_DATUM(0);
 	char *p0; void *tofree0; int len0;

 	Datum d1 = PG_GETARG_DATUM(1);
 	char *p1; void *tofree1; int len1;

 	varattrib_untoast_ptr_len(d0, &p0, &len0, &tofree0);
 	varattrib_untoast_ptr_len(d1, &p1, &len1, &tofree1);

 	/*
 	 * Since we only care about equality or not-equality, we can avoid all the
 	 * expense of strcoll() here, and just do bitwise comparison.
 	 */
 	if(len0 != len1)
 		result = false;
 	else
 		result = (memcmp(p0, p1, len1) == 0);

 	if(tofree0)
 		pfree(tofree0);
 	if(tofree1)
 		pfree(tofree1);

 	PG_RETURN_BOOL(result);
 }

 /*
  * Comparison functions for text strings.
  *
  * Note: btree indexes need these routines not to leak memory; therefore,
  * be careful to free working copies of toasted datums.  Most places don't
  * need to be so careful.
  */

 Datum
 texteq(PG_FUNCTION_ARGS)
 {
 	return generic_varlena_eq(fcinfo);
 }

 Datum
 textne(PG_FUNCTION_ARGS)
 {
 	Datum d = generic_varlena_eq(fcinfo);
 	return (d==0 ? 1 : 0);
 }

 Datum
 text_lt(PG_FUNCTION_ARGS)
 {
 	Datum d0 = PG_GETARG_DATUM(0);
 	Datum d1 = PG_GETARG_DATUM(1);

 	bool result = (text_cmp_datum(d0, d1) < 0);
 	PG_RETURN_BOOL(result);
 }

 Datum
 text_le(PG_FUNCTION_ARGS)
 {
 	Datum d0 = PG_GETARG_DATUM(0);
 	Datum d1 = PG_GETARG_DATUM(1);

 	bool result = (text_cmp_datum(d0, d1) <= 0);
 	PG_RETURN_BOOL(result);
 }

 Datum
 text_gt(PG_FUNCTION_ARGS)
 {
 	Datum d0 = PG_GETARG_DATUM(0);
 	Datum d1 = PG_GETARG_DATUM(1);

 	bool result = (text_cmp_datum(d0, d1) > 0);
 	PG_RETURN_BOOL(result);
 }

 Datum
 text_ge(PG_FUNCTION_ARGS)
 {
 	Datum d0 = PG_GETARG_DATUM(0);
 	Datum d1 = PG_GETARG_DATUM(1);

 	bool result = (text_cmp_datum(d0, d1) >= 0);
 	PG_RETURN_BOOL(result);
 }

 Datum
 bttextcmp(PG_FUNCTION_ARGS)
 {
 	Datum d0 = PG_GETARG_DATUM(0);
 	Datum d1 = PG_GETARG_DATUM(1);

 	int result = text_cmp_datum(d0, d1);
 	PG_RETURN_INT32(result);
 }


 Datum
 text_larger(PG_FUNCTION_ARGS)
 {
 	Datum d0 = PG_GETARG_DATUM(0);
 	Datum d1 = PG_GETARG_DATUM(1);

 	int cmp = text_cmp_datum(d0, d1);
 	return (cmp > 0 ? d0 : d1);
 }

 Datum
 text_smaller(PG_FUNCTION_ARGS)
 {
 	Datum d0 = PG_GETARG_DATUM(0);
 	Datum d1 = PG_GETARG_DATUM(1);

 	int cmp = text_cmp_datum(d0, d1);
 	return (cmp < 0 ? d0 : d1);
 }


 /*
  * The following operators support character-by-character comparison
  * of text data types, to allow building indexes suitable for LIKE
  * clauses.
  */
 static inline int generic_varlena_cmp(PG_FUNCTION_ARGS)
 {
 	Datum d0 = PG_GETARG_DATUM(0);
 	Datum d1 = PG_GETARG_DATUM(1);

 	int			result;

 	char *p0; void *tofree0; int len0;
 	char *p1; void *tofree1; int len1;

 	varattrib_untoast_ptr_len(d0, &p0, &len0, &tofree0);
 	varattrib_untoast_ptr_len(d1, &p1, &len1, &tofree1);

 	result = memcmp(p0, p1, Min(len0, len1));

 	if(tofree0)
 		pfree(tofree0);
 	if(tofree1)
 		pfree(tofree1);

 	if(result != 0)
 		return result;

 	if (len0 < len1)
 		return -1;
 	else if (len0 > len1)
 		return 1;
 	else
 		return 0;
 }

 Datum
 text_pattern_lt(PG_FUNCTION_ARGS)
 {
 	int result = generic_varlena_cmp(fcinfo);
 	PG_RETURN_BOOL(result < 0);
 }


 Datum
 text_pattern_le(PG_FUNCTION_ARGS)
 {
 	int result = generic_varlena_cmp(fcinfo);
 	PG_RETURN_BOOL(result <= 0);
 }


 Datum
 text_pattern_ge(PG_FUNCTION_ARGS)
 {
 	int result = generic_varlena_cmp(fcinfo);
 	PG_RETURN_BOOL(result >= 0);
 }


 Datum
 text_pattern_gt(PG_FUNCTION_ARGS)
 {
 	int result = generic_varlena_cmp(fcinfo);
 	PG_RETURN_BOOL(result > 0);
 }


 Datum
 bttext_pattern_cmp(PG_FUNCTION_ARGS)
 {
 	int result = generic_varlena_cmp(fcinfo);
 	PG_RETURN_INT32(result);
 }

 /*-------------------------------------------------------------
  * byteaoctetlen
  *
  * get the number of bytes contained in an instance of type 'bytea'
  *-------------------------------------------------------------
  */
 Datum
 byteaoctetlen(PG_FUNCTION_ARGS)
 {
 	Datum		str = PG_GETARG_DATUM(0);

 	/* We need not detoast the input at all */
 	PG_RETURN_INT32(toast_raw_datum_size(str) - VARHDRSZ);
 }

 /*
  * byteacat -
  *	  takes two bytea* and returns a bytea* that is the concatenation of
  *	  the two.
  *
  * Cloned from textcat and modified as required.
  */
 Datum
 byteacat(PG_FUNCTION_ARGS)
 {
 	return generic_varlena_cat(fcinfo);
 }

 #define PG_STR_GET_BYTEA(str_) \
 	DatumGetByteaP(DirectFunctionCall1(byteain, CStringGetDatum(str_)))

 /*
  * bytea_substr()
  * Return a substring starting at the specified position.
  * Cloned from text_substr and modified as required.
  *
  * Input:
  *	- string
  *	- starting position (is one-based)
  *	- string length (optional)
  *
  * If the starting position is zero or less, then return from the start of the string
  * adjusting the length to be consistent with the "negative start" per SQL92.
  * If the length is less than zero, an ERROR is thrown. If no third argument
  * (length) is provided, the length to the end of the string is assumed.
  */
 Datum
 bytea_substr(PG_FUNCTION_ARGS)
 {
 	int			S = PG_GETARG_INT32(1); /* start position */
 	int			S1;				/* adjusted start position */
 	int			L1;				/* adjusted substring length */

 	S1 = Max(S, 1);

 	if (fcinfo->nargs == 2)
 	{
 		/*
 		 * Not passed a length - PG_GETARG_BYTEA_P_SLICE() grabs everything to
 		 * the end of the string if we pass it a negative value for length.
 		 */
 		L1 = -1;
 	}
 	else
 	{
 		/* end position */
 		int			E = S + PG_GETARG_INT32(2);

 		/*
 		 * A negative value for L is the only way for the end position to be
 		 * before the start. SQL99 says to throw an error.
 		 */
 		if (E < S)
 			ereport(ERROR,
 					(errcode(ERRCODE_SUBSTRING_ERROR),
 					 errmsg("negative substring length not allowed"),
 							 errOmitLocation(true)));

 		/*
 		 * A zero or negative value for the end position can happen if the
 		 * start was negative or one. SQL99 says to return a zero-length
 		 * string.
 		 */
 		if (E < 1)
 			PG_RETURN_BYTEA_P(PG_STR_GET_BYTEA(""));

 		L1 = E - S1;
 	}

 	/*
 	 * If the start position is past the end of the string, SQL99 says to
 	 * return a zero-length string -- PG_GETARG_TEXT_P_SLICE() will do that
 	 * for us. Convert to zero-based starting position
 	 */
 	PG_RETURN_BYTEA_P(PG_GETARG_BYTEA_P_SLICE(0, S1 - 1, L1));
 }

 /*
  * bytea_substr_no_len -
  *	  Wrapper to avoid opr_sanity failure due to
  *	  one function accepting a different number of args.
  */
 Datum
 bytea_substr_no_len(PG_FUNCTION_ARGS)
 {
 	return bytea_substr(fcinfo);
 }

 /*
  * byteapos -
  *	  Return the position of the specified substring.
  *	  Implements the SQL92 POSITION() function.
  * Cloned from textpos and modified as required.
  */
 Datum
 byteapos(PG_FUNCTION_ARGS)
 {
 	Datum d0 = PG_GETARG_DATUM(0);
 	char *p0; void *tofree0; int len0;

 	Datum d1 = PG_GETARG_DATUM(1);
 	char *p1; void *tofree1; int len1;

 	int			pos;
 	int			px, p;

 	varattrib_untoast_ptr_len(d0, &p0, &len0, &tofree0);
 	varattrib_untoast_ptr_len(d1, &p1, &len1, &tofree1);

 	/* empty pattern */
 	if(len1 == 0)
 		PG_RETURN_INT32(1);

 	pos = 0;
 	px = (len0 - len1);
 	for (p = 0; p <= px; p++)
 	{
 		if ((*p1 == *p0) && (memcmp(p0, p1, len1) == 0))
 		{
 			pos = p + 1;
 			break;
 		};
 		p0++;
 	};

 	if(tofree0)
 		pfree(tofree0);
 	if(tofree1)
 		pfree(tofree1);

 	PG_RETURN_INT32(pos);
 }

 /*-------------------------------------------------------------
  * byteaGetByte
  *
  * this routine treats "bytea" as an array of bytes.
  * It returns the Nth byte (a number between 0 and 255).
  *-------------------------------------------------------------
  */
 Datum
 byteaGetByte(PG_FUNCTION_ARGS)
 {
 	Datum d0 = PG_GETARG_DATUM(0);
 	char *p0; void *tofree0; int len0;

 	int32		n = PG_GETARG_INT32(1);
 	int32 result;

 	varattrib_untoast_ptr_len(d0, &p0, &len0, &tofree0);

 	if (n < 0 || n >= len0)
 		ereport(ERROR,
 				(errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
 				 errmsg("index %d out of valid range, 0..%d",
 						n, len0 - 1),
 								 errOmitLocation(true)));

 	result = (unsigned char) p0[n];

 	if(tofree0)
 		pfree(tofree0);

 	PG_RETURN_INT32(result);
 }

 /*-------------------------------------------------------------
  * byteaGetBit
  *
  * This routine treats a "bytea" type like an array of bits.
  * It returns the value of the Nth bit (0 or 1).
  *
  *-------------------------------------------------------------
  */
 Datum
 byteaGetBit(PG_FUNCTION_ARGS)
 {
 	Datum d0 = PG_GETARG_DATUM(0);
 	char *p0; void *tofree0; int len0;

 	int32		n = PG_GETARG_INT32(1);

 	int			byteNo, bitNo;
 	int			result;

 	varattrib_untoast_ptr_len(d0, &p0, &len0, &tofree0);

 	if (n < 0 || n >= len0 * 8)
 		ereport(ERROR,
 				(errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
 				 errmsg("index %d out of valid range, 0..%d",
 						n, len0 * 8 - 1),
 								 errOmitLocation(true)));

 	byteNo = n / 8;
 	bitNo = n % 8;

 	result = (unsigned char) p0[byteNo];

 	if(tofree0)
 		pfree(tofree0);

 	if (result & (1 << bitNo))
 		PG_RETURN_INT32(1);
 	else
 		PG_RETURN_INT32(0);
 }

 /*-------------------------------------------------------------
  * byteaSetByte
  *
  * Given an instance of type 'bytea' creates a new one with
  * the Nth byte set to the given value.
  *
  *-------------------------------------------------------------
  */
 Datum
 byteaSetByte(PG_FUNCTION_ARGS)
 {
 	bytea	   *v = PG_GETARG_BYTEA_P(0);
 	int32		n = PG_GETARG_INT32(1);
 	int32		newByte = PG_GETARG_INT32(2);
 	int			len;
 	bytea	   *res;

 	len = VARSIZE(v) - VARHDRSZ;

 	if (n < 0 || n >= len)
 		ereport(ERROR,
 				(errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
 				 errmsg("index %d out of valid range, 0..%d",
 						n, len - 1),
 								 errOmitLocation(true)));

 	/*
 	 * Make a copy of the original varlena.
 	 */
 	res = (bytea *) palloc(VARSIZE(v));
 	memcpy((char *) res, (char *) v, VARSIZE(v));

 	/*
 	 * Now set the byte.
 	 */
 	((unsigned char *) VARDATA(res))[n] = newByte;

 	PG_RETURN_BYTEA_P(res);
 }

 /*-------------------------------------------------------------
  * byteaSetBit
  *
  * Given an instance of type 'bytea' creates a new one with
  * the Nth bit set to the given value.
  *
  *-------------------------------------------------------------
  */
 Datum
 byteaSetBit(PG_FUNCTION_ARGS)
 {
 	bytea	   *v = PG_GETARG_BYTEA_P(0);
 	int32		n = PG_GETARG_INT32(1);
 	int32		newBit = PG_GETARG_INT32(2);
 	bytea	   *res;
 	int			len;
 	int			oldByte,
 				newByte;
 	int			byteNo,
 				bitNo;

 	len = VARSIZE(v) - VARHDRSZ;

 	if (n < 0 || n >= len * 8)
 		ereport(ERROR,
 				(errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
 				 errmsg("index %d out of valid range, 0..%d",
 						n, len * 8 - 1),
 								 errOmitLocation(true)));

 	byteNo = n / 8;
 	bitNo = n % 8;

 	/*
 	 * sanity check!
 	 */
 	if (newBit != 0 && newBit != 1)
 		ereport(ERROR,
 				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
 				 errmsg("new bit must be 0 or 1"),
 						 errOmitLocation(true)));

 	/*
 	 * Make a copy of the original varlena.
 	 */
 	res = (bytea *) palloc(VARSIZE(v));
 	memcpy((char *) res, (char *) v, VARSIZE(v));

 	/*
 	 * Update the byte.
 	 */
 	oldByte = ((unsigned char *) VARDATA(res))[byteNo];

 	if (newBit == 0)
 		newByte = oldByte & (~(1 << bitNo));
 	else
 		newByte = oldByte | (1 << bitNo);

 	((unsigned char *) VARDATA(res))[byteNo] = newByte;

 	PG_RETURN_BYTEA_P(res);
 }


 /* text_name()
  * Converts a text type to a Name type.
  */
 Datum
 text_name(PG_FUNCTION_ARGS)
 {
 	Datum d0 = PG_GETARG_DATUM(0);
 	char *p0; void *tofree0; int len0;
 	Name		result;

 	varattrib_untoast_ptr_len(d0, &p0, &len0, &tofree0);

 	/* Truncate oversize input */
 	if (len0 >= NAMEDATALEN)
 		len0 = NAMEDATALEN - 1;

 	result = (Name) palloc(NAMEDATALEN);
 	memcpy(NameStr(*result), p0, len0);

 	/* now null pad to full length... */
 	while (len0 < NAMEDATALEN)
 	{
 		*(NameStr(*result) + len0) = '\0';
 		len0++;
 	}

 	if(tofree0)
 		pfree(tofree0);

 	PG_RETURN_NAME(result);
 }

 /* name_text()
  * Converts a Name type to a text type.
  */
 Datum
 name_text(PG_FUNCTION_ARGS)
 {
 	Name		s = PG_GETARG_NAME(0);

 	PG_RETURN_TEXT_P(cstring_to_text(NameStr(*s)));
 }


 /*
  * textToQualifiedNameList - convert a text object to list of names
  *
  * This implements the input parsing needed by nextval() and other
  * functions that take a text parameter representing a qualified name.
  * We split the name at dots, downcase if not double-quoted, and
  * truncate names if they're too long.
  */
 List *
 textToQualifiedNameList(text *textval)
 {
 	char	   *rawname;
 	List	   *result = NIL;
 	List	   *namelist;
 	ListCell   *l;

 	/* Convert to C string (handles possible detoasting). */
 	/* Note we rely on being able to modify rawname below. */
 	rawname = text_to_cstring(textval);

 	if (!SplitIdentifierString(rawname, '.', &namelist))
 		ereport(ERROR,
 				(errcode(ERRCODE_INVALID_NAME),
 				 errmsg("invalid name syntax"),
 						 errOmitLocation(true)));

 	if (namelist == NIL)
 		ereport(ERROR,
 				(errcode(ERRCODE_INVALID_NAME),
 				 errmsg("invalid name syntax"),
 						 errOmitLocation(true)));

 	foreach(l, namelist)
 	{
 		char	   *curname = (char *) lfirst(l);

 		result = lappend(result, makeString(pstrdup(curname)));
 	}

 	pfree(rawname);
 	list_free(namelist);

 	return result;
 }

 /*
  * SplitIdentifierString --- parse a string containing identifiers
  *
  * This is the guts of textToQualifiedNameList, and is exported for use in
  * other situations such as parsing GUC variables.	In the GUC case, it's
  * important to avoid memory leaks, so the API is designed to minimize the
  * amount of stuff that needs to be allocated and freed.
  *
  * Inputs:
  *	rawstring: the input string; must be overwritable!	On return, it's
  *			   been modified to contain the separated identifiers.
  *	separator: the separator punctuation expected between identifiers
  *			   (typically '.' or ',').	Whitespace may also appear around
  *			   identifiers.
  * Outputs:
  *	namelist: filled with a palloc'd list of pointers to identifiers within
  *			  rawstring.  Caller should list_free() this even on error return.
  *
  * Returns TRUE if okay, FALSE if there is a syntax error in the string.
  *
  * Note that an empty string is considered okay here, though not in
  * textToQualifiedNameList.
  */
 bool
 SplitIdentifierString(char *rawstring, char separator,
 					  List **namelist)
 {
 	char	   *nextp = rawstring;
 	bool		done = false;

 	*namelist = NIL;

 	while (isspace((unsigned char) *nextp))
 		nextp++;				/* skip leading whitespace */

 	if (*nextp == '\0')
 		return true;			/* allow empty string */

 	/* At the top of the loop, we are at start of a new identifier. */
 	do
 	{
 		char	   *curname;
 		char	   *endp;

 		if (*nextp == '\"')
 		{
 			/* Quoted name --- collapse quote-quote pairs, no downcasing */
 			curname = nextp + 1;
 			for (;;)
 			{
 				endp = strchr(nextp + 1, '\"');
 				if (endp == NULL)
 					return false;		/* mismatched quotes */
 				if (endp[1] != '\"')
 					break;		/* found end of quoted name */
 				/* Collapse adjacent quotes into one quote, and look again */
 				memmove(endp, endp + 1, strlen(endp));
 				nextp = endp;
 			}
 			/* endp now points at the terminating quote */
 			nextp = endp + 1;
 		}
 		else
 		{
 			/* Unquoted name --- extends to separator or whitespace */
 			char	   *downname;
 			int			len;

 			curname = nextp;
 			while (*nextp && *nextp != separator &&
 				   !isspace((unsigned char) *nextp))
 				nextp++;
 			endp = nextp;
 			if (curname == nextp)
 				return false;	/* empty unquoted name not allowed */

 			/*
 			 * Downcase the identifier, using same code as main lexer does.
 			 *
 			 * XXX because we want to overwrite the input in-place, we cannot
 			 * support a downcasing transformation that increases the string
 			 * length.	This is not a problem given the current implementation
 			 * of downcase_truncate_identifier, but we'll probably have to do
 			 * something about this someday.
 			 */
 			len = endp - curname;
 			downname = downcase_truncate_identifier(curname, len, false);
 			Assert(strlen(downname) <= len);
 			strncpy(curname, downname, len);
 			pfree(downname);
 		}

 		while (isspace((unsigned char) *nextp))
 			nextp++;			/* skip trailing whitespace */

 		if (*nextp == separator)
 		{
 			nextp++;
 			while (isspace((unsigned char) *nextp))
 				nextp++;		/* skip leading whitespace for next */
 			/* we expect another name, so done remains false */
 		}
 		else if (*nextp == '\0')
 			done = true;
 		else
 			return false;		/* invalid syntax */

 		/* Now safe to overwrite separator with a null */
 		*endp = '\0';

 		/* Truncate name if it's overlength */
 		truncate_identifier(curname, strlen(curname), false);

 		/*
 		 * Finished isolating current name --- add it to list
 		 */
 		*namelist = lappend(*namelist, curname);

 		/* Loop back if we didn't reach end of string */
 	} while (!done);

 	return true;
 }


 /*****************************************************************************
  *	Comparison Functions used for bytea
  *
  * Note: btree indexes need these routines not to leak memory; therefore,
  * be careful to free working copies of toasted datums.  Most places don't
  * need to be so careful.
  *****************************************************************************/

 Datum
 byteaeq(PG_FUNCTION_ARGS)
 {
 	return generic_varlena_eq(fcinfo);
 }

 Datum
 byteane(PG_FUNCTION_ARGS)
 {
 	Datum d = generic_varlena_eq(fcinfo);
 	return (d==0 ? 1 : 0);
 }

 Datum
 bytealt(PG_FUNCTION_ARGS)
 {
 	int result = generic_varlena_cmp(fcinfo);
 	PG_RETURN_BOOL(result < 0);
 }

 Datum
 byteale(PG_FUNCTION_ARGS)
 {
 	int result = generic_varlena_cmp(fcinfo);
 	PG_RETURN_BOOL(result <= 0);
 }

 Datum
 byteagt(PG_FUNCTION_ARGS)
 {
 	int result = generic_varlena_cmp(fcinfo);
 	PG_RETURN_BOOL(result > 0);
 }

 Datum
 byteage(PG_FUNCTION_ARGS)
 {
 	int result = generic_varlena_cmp(fcinfo);
 	PG_RETURN_BOOL(result >= 0);
 }

 Datum
 byteacmp(PG_FUNCTION_ARGS)
 {
 	int result = generic_varlena_cmp(fcinfo);
 	PG_RETURN_INT32(result);
 }

 /*
  * appendStringInfoText
  *
  * Append a text to str.
  * Like appendStringInfoString(str, PG_TEXT_GET_STR(s)) but faster.
  */
 static void
 appendStringInfoText(StringInfo str, const text *t)
 {
 	appendBinaryStringInfo(str, VARDATA_ANY((void *) t), VARSIZE_ANY_EXHDR((void *) t));
 }

 /*
  * replace_text
  * replace all occurrences of 'old_sub_str' in 'orig_str'
  * with 'new_sub_str' to form 'new_str'
  *
  * returns 'orig_str' if 'old_sub_str' == '' or 'orig_str' == ''
  * otherwise returns 'new_str'
  */
 Datum
 replace_text(PG_FUNCTION_ARGS)
 {
 	Datum d0 = PG_GETARG_DATUM(0);
 	char *p0; void *tofree0; int len0;

 	Datum d1 = PG_GETARG_DATUM(1);
 	char *p1; void *tofree1; int len1;

 	Datum d2 = PG_GETARG_DATUM(2);
 	char *p2; void *tofree2; int len2;

 	int from_sub_text_len;

 	TextPositionState state =
 		{
 		0, /* use_wchar */
 		NULL, /* str1 */
 		NULL, /* str2 */
 		NULL, /* wstr1 */
 		NULL, /* wstr2 */
 		0, /* len1 */
 		0, /* len2 */
 		};
 	text	   *ret_text;
 	int			start_posn;
 	int			curr_posn;
 	int			chunk_len;
 	char	   *start_ptr;
 	StringInfoData str;

 	varattrib_untoast_ptr_len(d0, &p0, &len0, &tofree0);
 	varattrib_untoast_ptr_len(d1, &p1, &len1, &tofree1);
 	varattrib_untoast_ptr_len(d2, &p2, &len2, &tofree2);

 	if(pg_database_encoding_max_length() == 1)
 		from_sub_text_len = len1;
 	else
 		from_sub_text_len = pg_mbstrlen_with_len(p1, len1);

 	if (len0 == 0 || from_sub_text_len == 0)
 	{
 		if(tofree0)
 			pfree(tofree0);
 		if(tofree1)
 			pfree(tofree1);
 		if(tofree2)
 			pfree(tofree2);
 		return d0;
 	}

 	text_position_setup_ptr_len(p0, len0, p1, len1, &state);

 	start_posn = 1;
 	curr_posn = text_position_next(1, &state);

 	/* When the from_sub_text is not found, there is nothing to do. */
 	if (curr_posn == 0)
 	{
 		if(tofree0)
 			pfree(tofree0);
 		if(tofree1)
 			pfree(tofree1);
 		if(tofree2)
 			pfree(tofree2);

 		text_position_cleanup(&state);
 		return d0;
 	}

 	/* start_ptr points to the start_posn'th character of src_text */
 	start_ptr = p0;

 	initStringInfo(&str);

 	do
 	{
 		/* copy the data skipped over by last text_position_next() */
 		chunk_len = charlen_to_bytelen(start_ptr, curr_posn - start_posn);
 		appendBinaryStringInfo(&str, start_ptr, chunk_len);
 		appendBinaryStringInfo(&str, p2, len2);

 		start_posn = curr_posn;
 		start_ptr += chunk_len;
 		start_posn += from_sub_text_len;
 		start_ptr += charlen_to_bytelen(start_ptr, from_sub_text_len);

 		curr_posn = text_position_next(start_posn, &state);
 	}
 	while (curr_posn > 0);

 	/* copy trailing data */
 	chunk_len = ((char *) p0 + len0) - start_ptr;
 	appendBinaryStringInfo(&str, start_ptr, chunk_len);

 	text_position_cleanup(&state);

 	ret_text = PG_STR_GET_TEXT(str.data);
 	pfree(str.data);
 	if(tofree0)
 		pfree(tofree0);
 	if(tofree1)
 		pfree(tofree1);
 	if(tofree2)
 		pfree(tofree2);

 	PG_RETURN_TEXT_P(ret_text);
 }

 /*
  * check_replace_text_has_escape_char
  *
  * check whether replace_text contains escape char.
  */
 static bool
 check_replace_text_has_escape_char(const text *replace_text)
 {
 	const char *p = VARDATA_ANY((void *) replace_text);
 	const char *p_end = p + VARSIZE_ANY((void *) replace_text);

 	if (pg_database_encoding_max_length() == 1)
 	{
 		for (; p < p_end; p++)
 		{
 			if (*p == '\\')
 				return true;
 		}
 	}
 	else
 	{
 		for (; p < p_end; p += pg_mblen(p))
 		{
 			if (*p == '\\')
 				return true;
 		}
 	}

 	return false;
 }

 /*
  * appendStringInfoRegexpSubstr
  *
  * Append replace_text to str, substituting regexp back references for
  * \n escapes.	start_ptr is the start of the match in the source string,
  * at logical character position data_pos.
  */
 static void
 appendStringInfoRegexpSubstr(StringInfo str, text *replace_text,
 							 regmatch_t *pmatch,
 							 char *start_ptr, int data_pos)
 {
 	const char *p = VARDATA_ANY(replace_text);
 	const char *p_end = p + VARSIZE_ANY_EXHDR(replace_text);
 	int			eml = pg_database_encoding_max_length();

 	for (;;)
 	{
 		const char *chunk_start = p;
 		int			so;
 		int			eo;

 		/* Find next escape char. */
 		if (eml == 1)
 		{
 			for (; p < p_end && *p != '\\'; p++)
 				 /* nothing */ ;
 		}
 		else
 		{
 			for (; p < p_end && *p != '\\'; p += pg_mblen(p))
 				 /* nothing */ ;
 		}

 		/* Copy the text we just scanned over, if any. */
 		if (p > chunk_start)
 			appendBinaryStringInfo(str, chunk_start, p - chunk_start);

 		/* Done if at end of string, else advance over escape char. */
 		if (p >= p_end)
 			break;
 		p++;

 		if (p >= p_end)
 		{
 			/* Escape at very end of input.  Treat same as unexpected char */
 			appendStringInfoChar(str, '\\');
 			break;
 		}

 		if (*p >= '1' && *p <= '9')
 		{
 			/* Use the back reference of regexp. */
 			int			idx = *p - '0';

 			so = pmatch[idx].rm_so;
 			eo = pmatch[idx].rm_eo;
 			p++;
 		}
 		else if (*p == '&')
 		{
 			/* Use the entire matched string. */
 			so = pmatch[0].rm_so;
 			eo = pmatch[0].rm_eo;
 			p++;
 		}
 		else if (*p == '\\')
 		{
 			/* \\ means transfer one \ to output. */
 			appendStringInfoChar(str, '\\');
 			p++;
 			continue;
 		}
 		else
 		{
 			/*
 			 * If escape char is not followed by any expected char, just treat
 			 * it as ordinary data to copy.  (XXX would it be better to throw
 			 * an error?)
 			 */
 			appendStringInfoChar(str, '\\');
 			continue;
 		}

 		if (so != -1 && eo != -1)
 		{
 			/*
 			 * Copy the text that is back reference of regexp.	Note so and eo
 			 * are counted in characters not bytes.
 			 */
 			char	   *chunk_start;
 			int			chunk_len;

 			Assert(so >= data_pos);
 			chunk_start = start_ptr;
 			chunk_start += charlen_to_bytelen(chunk_start, so - data_pos);
 			chunk_len = charlen_to_bytelen(chunk_start, eo - so);
 			appendBinaryStringInfo(str, chunk_start, chunk_len);
 		}
 	}
 }

 #define REGEXP_REPLACE_BACKREF_CNT		10

 /*
  * replace_text_regexp
  *
  * replace text that matches to regexp in src_text to replace_text.
  *
  * Note: to avoid having to include regex.h in builtins.h, we declare
  * the regexp argument as void *, but really it's regex_t *.
  */
 text *
 replace_text_regexp(text *src_text, void *regexp,
 					text *replace_text, bool glob)
 {
 	text	   *ret_text;
 	regex_t    *re = (regex_t *) regexp;
 	int			src_text_len = VARSIZE_ANY_EXHDR(src_text);
 	StringInfoData buf;
 	regmatch_t	pmatch[REGEXP_REPLACE_BACKREF_CNT];
 	pg_wchar   *data;
 	size_t		data_len;
 	int			search_start;
 	int			data_pos;
 	char	   *start_ptr;
 	bool		have_escape;

 	initStringInfo(&buf);

 	/* Convert data string to wide characters. */
 	data = (pg_wchar *) palloc((src_text_len + 1) * sizeof(pg_wchar));
 	data_len = pg_mb2wchar_with_len(VARDATA_ANY(src_text), data, src_text_len);

 	/* Check whether replace_text has escape char. */
 	have_escape = check_replace_text_has_escape_char(replace_text);

 	/* start_ptr points to the data_pos'th character of src_text */
 	start_ptr = (char *) VARDATA_ANY(src_text);
 	data_pos = 0;

 	search_start = 0;
 	while (search_start <= data_len)
 	{
 		int			regexec_result;

 		CHECK_FOR_INTERRUPTS();

 		regexec_result = pg_regexec(re,
 									data,
 									data_len,
 									search_start,
 									NULL,		/* no details */
 									REGEXP_REPLACE_BACKREF_CNT,
 									pmatch,
 									0);

 		if (regexec_result == REG_NOMATCH)
 			break;

 		if (regexec_result != REG_OKAY)
 		{
 			char		errMsg[100];

 			pg_regerror(regexec_result, re, errMsg, sizeof(errMsg));
 			ereport(ERROR,
 					(errcode(ERRCODE_INVALID_REGULAR_EXPRESSION),
 					 errmsg("regular expression failed: %s", errMsg),
 							 errOmitLocation(true)));
 		}

 		/*
 		 * Copy the text to the left of the match position.  Note we are given
 		 * character not byte indexes.
 		 */
 		if (pmatch[0].rm_so - data_pos > 0)
 		{
 			int			chunk_len;

 			chunk_len = charlen_to_bytelen(start_ptr,
 										   pmatch[0].rm_so - data_pos);
 			appendBinaryStringInfo(&buf, start_ptr, chunk_len);
 			/*
 			 * Advance start_ptr over that text, to avoid multiple rescans of
 			 * it if the replace_text contains multiple back-references.
 			 */
 			start_ptr += chunk_len;
 			data_pos = pmatch[0].rm_so;
 		}

 		/*
 		 * Copy the replace_text. Process back references when the
 		 * replace_text has escape characters.
 		 */
 		if (have_escape)
 			appendStringInfoRegexpSubstr(&buf, replace_text, pmatch,
 										 start_ptr, data_pos);
 		else
 			appendStringInfoText(&buf, replace_text);

 		/* Advance start_ptr and data_pos over the matched text. */
 		start_ptr += charlen_to_bytelen(start_ptr,
 										pmatch[0].rm_eo - data_pos);
 		data_pos = pmatch[0].rm_eo;

 		/*
 		 * When global option is off, replace the first instance only.
 		 */
 		if (!glob)
 			break;

 		/*
 		 * Search from next character when the matching text is zero width.
 		 */
 		search_start = data_pos;
 		if (pmatch[0].rm_so == pmatch[0].rm_eo)
 			search_start++;
 	}

 	/*
 	 * Copy the text to the right of the last match.
 	 */
 	if (data_pos < data_len)
 	{
 		int			chunk_len;

 		chunk_len = ((char *) src_text + VARSIZE_ANY(src_text)) - start_ptr;
 		appendBinaryStringInfo(&buf, start_ptr, chunk_len);
 	}

 	ret_text = cstring_to_text_with_len(buf.data, buf.len);
 	pfree(buf.data);
 	pfree(data);

 	return ret_text;
 }

 /*
  * split_text
  * parse input string
  * return ord item (1 based)
  * based on provided field separator
  */
 Datum
 split_text(PG_FUNCTION_ARGS)
 {
 	Datum d0 = PG_GETARG_DATUM(0);
 	char *p0; void *tofree0; int len0;

 	Datum d1 = PG_GETARG_DATUM(1);
 	char *p1; void *tofree1; int len1;

 	int			fldnum = PG_GETARG_INT32(2);

 	int			inputstring_len;
 	int			fldsep_len;
 	TextPositionState state =
 		{
 		0, /* use_wchar */
 		NULL, /* str1 */
 		NULL, /* str2 */
 		NULL, /* wstr1 */
 		NULL, /* wstr2 */
 		0, /* len1 */
 		0, /* len2 */
 		};

 	int			start_posn;
 	int			end_posn;
 	text	   *result_text;

 	varattrib_untoast_ptr_len(d0, &p0, &len0, &tofree0);
 	varattrib_untoast_ptr_len(d1, &p1, &len1, &tofree1);

 	if(pg_database_encoding_max_length() == 1)
 	{
 		inputstring_len = len0;
 		fldsep_len = len1;
 	}
 	else
 	{
 		inputstring_len = pg_mbstrlen_with_len(p0, len0);
 		fldsep_len = pg_mbstrlen_with_len(p1, len1);
 	}

 	/* field number is 1 based */
 	if (fldnum < 1)
 		ereport(ERROR,
 				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
 				 errmsg("field position must be greater than zero"),
 						 errOmitLocation(true)));

 	/* return empty string for empty input string */
 	if (inputstring_len < 1)
 	{
 		if(tofree0)
 			pfree(tofree0);
 		if(tofree1)
 			pfree(tofree1);

 		PG_RETURN_TEXT_P(PG_STR_GET_TEXT(""));
 	}

 	/* empty field separator */
 	if (fldsep_len < 1)
 	{
 		if(tofree0)
 			pfree(tofree0);
 		if(tofree1)
 			pfree(tofree1);

 		/* if first field, return input string, else empty string */
 		if (fldnum == 1)
 			return d0;
 		else
 			PG_RETURN_TEXT_P(PG_STR_GET_TEXT(""));
 	}

 	text_position_setup_ptr_len(p0, len0, p1, len1, &state);

 	/* identify bounds of first field */
 	start_posn = 1;
 	end_posn = text_position_next(1, &state);

 	/* special case if fldsep not found at all */
 	if (end_posn == 0)
 	{
 		text_position_cleanup(&state);
 		if(tofree0)
 			pfree(tofree0);
 		if(tofree1)
 			pfree(tofree1);

 		/* if field 1 requested, return input string, else empty string */
 		if (fldnum == 1)
 			return d0;
 		else
 			PG_RETURN_TEXT_P(cstring_to_text(""));
 	}

 	while (end_posn > 0 && --fldnum > 0)
 	{
 		/* identify bounds of next field */
 		start_posn = end_posn + fldsep_len;
 		end_posn = text_position_next(start_posn, &state);
 	}

 	text_position_cleanup(&state);

 	if (fldnum > 0)
 	{
 		/* N'th field separator not found */
 		/* if last field requested, return it, else empty string */
 		if (fldnum == 1)
 			result_text = text_substring(d0,
 										 start_posn,
 										 -1,
 										 true);
 		else
 			result_text = cstring_to_text("");
 	}
 	else
 	{
 		/* non-last field requested */
 		result_text = text_substring(d0,
 									 start_posn,
 									 end_posn - start_posn,
 									 false);
 	}

 	if(tofree0)
 		pfree(tofree0);
 	if(tofree1)
 		pfree(tofree1);

 	PG_RETURN_TEXT_P(result_text);
 }


 /*
  * text_to_array_impl
  *		Carries out the actual tokenization and array conversion of an input string.
  *
  * Parameters:
  * 		string: Where to start in the input string
  * 		stringByteLen: Length of current string
  * 		delimiter: Which delimiter to use
  * 		delimiterByteLen: Length of delimiter in bytes
  * 		delimiterCharLen: Length of delimiter in chars
  * 		arrayState: State of the output array where we accumulate results
  * 		endOfString: Do we expect any more chunk of the main input string?
  *
  * Returns the pointer where the last match was found. Successively the
  * caller can splice more data starting from this address to find further
  * array elements.
  */
 static char* text_to_array_impl(char *string, int stringByteLen, char *delimiter,
 		int delimiterByteLen, int delimiterCharLen, ArrayBuildState **arrayState, bool endOfString)
 {
 	int start_posn = 1;
 	int fldnum = 1;
 	int end_posn = 0;
 	int chunk_len = 0;
 	text	   *result_text;

 	char* cur_ptr = string;

 	TextPositionState state =
 		{
 		0, /* use_wchar */
 		NULL, /* str1 */
 		NULL, /* str2 */
 		NULL, /* wstr1 */
 		NULL, /* wstr2 */
 		0, /* len1 */
 		0, /* len2 */
 		};

 	text_position_setup_ptr_len(string, stringByteLen, delimiter, delimiterByteLen, &state);

 	for (fldnum = 1;; fldnum++) /* field number is 1 based */
 	{
 		end_posn = text_position_next(start_posn, &state);

 		if (end_posn == 0 && !endOfString)
 		{
 			break;
 		}
 		else if (end_posn == 0)
 		{
 			/* fetch last field */
 			chunk_len = (string + stringByteLen) - cur_ptr;
 		}
 		else
 		{
 			/* fetch non-last field */
 			chunk_len = charlen_to_bytelen(cur_ptr, end_posn - start_posn);
 		}

 		/* must build a temp text datum to pass to accumArrayResult */
 		result_text = cstring_to_text_with_len(cur_ptr, chunk_len);

 		/* stash away this field */
 		*arrayState = accumArrayResult(*arrayState,
 								  PointerGetDatum(result_text),
 								  false,
 								  TEXTOID,
 								  CurrentMemoryContext);

 		pfree(result_text);

 		if (end_posn == 0)
 		{
 			/* Process next sub-string if any */
 			break;
 		}

 		start_posn = end_posn;
 		cur_ptr += chunk_len;
 		start_posn += delimiterCharLen;
 		cur_ptr += charlen_to_bytelen(cur_ptr, delimiterCharLen);
 	}

 	text_position_cleanup(&state);

 	return cur_ptr;
 }


 /*
  * text_to_array_multi_pass
  *		Carries out the actual tokenization and array conversion of input string
  *		in multiple passes, where each pass is restricted to GPDB memory allocation limit.
  *
  * Parameters:
  * 		string: The start of the input string
  * 		stringByteLen: Length of current string
  * 		delimiter: Which delimiter to use
  * 		delimiterByteLen: Length of delimiter in bytes
  * 		delimiterCharLen: Length of delimiter in chars
  * 		endOfString: Do we expect any more chunk of the main input string?
  *
  * Returns the ArrayBuildState containing all the array elements.
  */
 static ArrayBuildState* text_to_array_multi_pass(char *string, int stringByteLen, char *delimiter, int delimiterByteLen, int delimiterCharLen)
 {
 	ArrayBuildState *astate = NULL;

 	/* Start with full string. If it is too big then we chunk it later */
 	char	   *start_ptr = string;
 	int curSubStringByteLen = stringByteLen;

 	bool endOfString = false;

 	/* More bytes to consider? */
 	while (!endOfString)
 	{
 		/*
 		 * Give the rest of the string to the current pass; may be chunked if
 		 * the rest still doesn't fit in the memory
 		 */
 		curSubStringByteLen = (string + stringByteLen) - start_ptr;

 		/* Will this MBCS become too big to fit in memory once converted to wchar? */
 		if (pg_database_encoding_max_length() > 1 && curSubStringByteLen > ((MAX_STRING_BYTES)/ sizeof(pg_wchar)))
 		{
 			int curSubStringCharLen = 0;
 			/* We need multi-pass. So find the sub-string boundary for the current pass */
 			find_memory_limited_substring(start_ptr, string + stringByteLen - start_ptr,
 				(MAX_STRING_BYTES) / sizeof(pg_wchar), &curSubStringByteLen, &curSubStringCharLen);
 		}

 		Insist(start_ptr + curSubStringByteLen <= string + stringByteLen);

 		endOfString = ((start_ptr + curSubStringByteLen) == (string + stringByteLen));

 		char *nextStartPtr = text_to_array_impl(start_ptr, curSubStringByteLen, delimiter, delimiterByteLen, delimiterCharLen, &astate, endOfString);

 		Insist(nextStartPtr >= start_ptr);

 		if (!endOfString && nextStartPtr == start_ptr)
 		{
 			elog(ERROR, "String size not supported.");
 		}

 		start_ptr = nextStartPtr;
 	}

 	return astate;
 }


 /*
  *  * text_to_array
  *   * parse input string
  *    * return text array of elements
  *     * based on provided field separator
  *      */
 Datum
 text_to_array(PG_FUNCTION_ARGS)
 {
 	Datum stringDatum = PG_GETARG_DATUM(0);
 	char *string = NULL;
 	void *toFreeString = NULL;
 	int stringByteLen = 0;

 	Datum delimiterDatum = PG_GETARG_DATUM(1);
 	char *delimiter = NULL;
 	void *toFreeDelimiter = NULL;
 	int delimiterByteLen = 0;

 	int stringCharLen = 0;
 	int	delimiterCharLen = 0;

 	varattrib_untoast_ptr_len(stringDatum, &string, &stringByteLen, &toFreeString);
 	varattrib_untoast_ptr_len(delimiterDatum, &delimiter, &delimiterByteLen, &toFreeDelimiter);

 	if(pg_database_encoding_max_length() == 1)
 	{
 		stringCharLen = stringByteLen;
 		delimiterCharLen = delimiterByteLen;
 	}
 	else
 	{
 		stringCharLen = pg_mbstrlen_with_len(string, stringByteLen);
 		delimiterCharLen = pg_mbstrlen_with_len(delimiter, delimiterByteLen);
 	}

 	/* return NULL for empty input string */
 	if (stringCharLen < 1)
 	{
 		if(toFreeString)
 		{
 			pfree(toFreeString);
 		}

 		if(toFreeDelimiter)
 		{
 			pfree(toFreeDelimiter);
 		}

 		PG_RETURN_NULL();
 	}

 	/*
 	 * empty field separator return one element, 1D, array using the input
 	 * string
 	 */
 	if (delimiterCharLen < 1)
 	{
 		if(toFreeString)
 		{
 			pfree(toFreeString);
 		}

 		if(toFreeDelimiter)
 		{
 			pfree(toFreeDelimiter);
 		}

 		PG_RETURN_ARRAYTYPE_P(create_singleton_array(fcinfo, TEXTOID, stringDatum, 1));
 	}

 	ArrayBuildState *astate = text_to_array_multi_pass(string, stringByteLen, delimiter, delimiterByteLen, delimiterCharLen);

 	if(toFreeString)
 	{
 		pfree(toFreeString);
 	}
 	if(toFreeDelimiter)
 	{
 		pfree(toFreeDelimiter);
 	}

 	PG_RETURN_DATUM(makeArrayResult(astate, CurrentMemoryContext));
 }

 /*
  * array_to_text
  * concatenate Cstring representation of input array elements
  * using provided field separator
  */
 Datum
 array_to_text(PG_FUNCTION_ARGS)
 {
 	ArrayType  *v = PG_GETARG_ARRAYTYPE_P(0);
 	char	   *fldsep = text_to_cstring(PG_GETARG_TEXT_PP(1));
 	int			nitems,
 			   *dims,
 				ndims;
 	Oid			element_type;
 	int			typlen;
 	bool		typbyval;
 	char		typalign;
 	StringInfoData buf;
 	bool		printed = false;
 	char	   *p;
 	bits8	   *bitmap;
 	int			bitmask;
 	int			i;
 	ArrayMetaState *my_extra;

 	ndims = ARR_NDIM(v);
 	dims = ARR_DIMS(v);
 	nitems = ArrayGetNItems(ndims, dims);

 	/* if there are no elements, return an empty string */
 	if (nitems == 0)
 		PG_RETURN_TEXT_P(cstring_to_text(""));

 	element_type = ARR_ELEMTYPE(v);
 	initStringInfo(&buf);

 	/*
 	 * We arrange to look up info about element type, including its output
 	 * conversion proc, only once per series of calls, assuming the element
 	 * type doesn't change underneath us.
 	 */
 	my_extra = (ArrayMetaState *) fcinfo->flinfo->fn_extra;
 	if (my_extra == NULL)
 	{
 		fcinfo->flinfo->fn_extra = MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
 													  sizeof(ArrayMetaState));
 		my_extra = (ArrayMetaState *) fcinfo->flinfo->fn_extra;
 		my_extra->element_type = ~element_type;
 	}

 	if (my_extra->element_type != element_type)
 	{
 		/*
 		 * Get info about element type, including its output conversion proc
 		 */
 		get_type_io_data(element_type, IOFunc_output,
 						 &my_extra->typlen, &my_extra->typbyval,
 						 &my_extra->typalign, &my_extra->typdelim,
 						 &my_extra->typioparam, &my_extra->typiofunc);
 		fmgr_info_cxt(my_extra->typiofunc, &my_extra->proc,
 					  fcinfo->flinfo->fn_mcxt);
 		my_extra->element_type = element_type;
 	}
 	typlen = my_extra->typlen;
 	typbyval = my_extra->typbyval;
 	typalign = my_extra->typalign;

 	p = ARR_DATA_PTR(v);
 	bitmap = ARR_NULLBITMAP(v);
 	bitmask = 1;

 	for (i = 0; i < nitems; i++)
 	{
 		Datum		itemvalue;
 		char	   *value;

 		/* Get source element, checking for NULL */
 		if (bitmap && (*bitmap & bitmask) == 0)
 		{
 			/* we ignore nulls */
 		}
 		else
 		{
 			itemvalue = fetch_att(p, typbyval, typlen);

 			value = OutputFunctionCall(&my_extra->proc, itemvalue);

 			if (printed)
 				appendStringInfo(&buf, "%s%s", fldsep, value);
 			else
 				appendStringInfoString(&buf, value);
 			printed = true;

 			p = att_addlength_pointer(p, typlen, p);
 			p = (char *) att_align_nominal(p, typalign);
 		}

 		/* advance bitmap pointer if any */
 		if (bitmap)
 		{
 			bitmask <<= 1;
 			if (bitmask == 0x100)
 			{
 				bitmap++;
 				bitmask = 1;
 			}
 		}
 	}

 	PG_RETURN_TEXT_P(cstring_to_text_with_len(buf.data, buf.len));
 }

 #define HEXBASE 16
 /*
  * Convert a int32 to a string containing a base 16 (hex) representation of
  * the number.
  */
 Datum
 to_hex32(PG_FUNCTION_ARGS)
 {
 	uint32		value = (uint32) PG_GETARG_INT32(0);
 	char	   *ptr;
 	const char *digits = "0123456789abcdef";
 	char		buf[32];		/* bigger than needed, but reasonable */

 	ptr = buf + sizeof(buf) - 1;
 	*ptr = '\0';

 	do
 	{
 		*--ptr = digits[value % HEXBASE];
 		value /= HEXBASE;
 	} while (ptr > buf && value);

 	PG_RETURN_TEXT_P(cstring_to_text(ptr));
 }

 /*
  * Convert a int64 to a string containing a base 16 (hex) representation of
  * the number.
  */
 Datum
 to_hex64(PG_FUNCTION_ARGS)
 {
 	uint64		value = (uint64) PG_GETARG_INT64(0);
 	char	   *ptr;
 	const char *digits = "0123456789abcdef";
 	char		buf[32];		/* bigger than needed, but reasonable */

 	ptr = buf + sizeof(buf) - 1;
 	*ptr = '\0';

 	do
 	{
 		*--ptr = digits[value % HEXBASE];
 		value /= HEXBASE;
 	} while (ptr > buf && value);

 	PG_RETURN_TEXT_P(cstring_to_text(ptr));
 }

 /*
  * Create an md5 hash of a text string and return it as hex
  *
  * md5 produces a 16 byte (128 bit) hash; double it for hex
  */
 #define MD5_HASH_LEN  32

 Datum
 md5_text(PG_FUNCTION_ARGS)
 {
 	text	   *in_text = PG_GETARG_TEXT_PP(0);
 	size_t		len;
 	char		hexsum[MD5_HASH_LEN + 1];

 	/* Calculate the length of the buffer using varlena metadata */
 	len = VARSIZE_ANY_EXHDR(in_text);

 	/* get the hash result */
 	if (pg_md5_hash(VARDATA_ANY(in_text), len, hexsum) == false)
 		ereport(ERROR,
 				(errcode(ERRCODE_OUT_OF_MEMORY),
 				 errmsg("out of memory")));

 	/* convert to text and return it */
 	PG_RETURN_TEXT_P(cstring_to_text(hexsum));
 }

 /*
  * Create an md5 hash of a bytea field and return it as a hex string:
  * 16-byte md5 digest is represented in 32 hex characters.
  */
 Datum
 md5_bytea(PG_FUNCTION_ARGS)
 {
 	bytea	   *in = PG_GETARG_BYTEA_PP(0);
 	size_t		len;
 	char		hexsum[MD5_HASH_LEN + 1];

 	len = VARSIZE_ANY_EXHDR(in);
 	if (pg_md5_hash(VARDATA_ANY(in), len, hexsum) == false)
 		ereport(ERROR,
 				(errcode(ERRCODE_OUT_OF_MEMORY),
 				 errmsg("out of memory")));

 	PG_RETURN_TEXT_P(cstring_to_text(hexsum));
 }

 /*
  * Return the size of a datum, possibly compressed
  *
  * Works on any data type
  */
 Datum
 pg_column_size(PG_FUNCTION_ARGS)
 {
 	Datum		value = PG_GETARG_DATUM(0);
 	int32		result;
 	int			typlen;

 	/* On first call, get the input type's typlen, and save at *fn_extra */
 	if (fcinfo->flinfo->fn_extra == NULL)
 	{
 		/* Lookup the datatype of the supplied argument */
 		Oid			argtypeid = get_fn_expr_argtype(fcinfo->flinfo, 0);

 		typlen = get_typlen(argtypeid);
 		if (typlen == 0)		/* should not happen */
 			elog(ERROR, "cache lookup failed for type %u", argtypeid);

 		fcinfo->flinfo->fn_extra = MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
 													  sizeof(int));
 		*((int *) fcinfo->flinfo->fn_extra) = typlen;
 	}
 	else
 		typlen = *((int *) fcinfo->flinfo->fn_extra);

 	if (typlen == -1)
 	{
 		/* varlena type, possibly toasted */
 		result = toast_datum_size(value);
 	}
 	else if (typlen == -2)
 	{
 		/* cstring */
 		result = strlen(DatumGetCString(value)) + 1;
 	}
 	else
 	{
 		/* ordinary fixed-width type */
 		result = typlen;
 	}

 	PG_RETURN_INT32(result);
 }


 /*
  * string_agg - Concatenates values and returns string.
  *
  * Syntax: string_agg(value text, delimiter text = '') RETURNS text
  *
  * Note: Any NULL values are ignored. The first-call delimiter isn't
  * actually used at all, and on subsequent calls the delimiter precedes
  * the associated value.
  */

 /* subroutine to initialize state */
 static StringInfo
 makeStringAggState(FunctionCallInfo fcinfo)
 {
 	StringInfo	state;
 	MemoryContext aggcontext;
 	MemoryContext oldcontext;

 	if (!(fcinfo->context && IsA(fcinfo->context, AggState)))
 	{
 		/* cannot be called directly because of internal-type argument */
 		elog(ERROR, "string_agg_transfn called in non-aggregate context");
 	}
 	aggcontext = ((AggState*)fcinfo->context)->aggcontext;

 	/*
 	 * Create state in aggregate context.  It'll stay there across subsequent
 	 * calls.
 	 */
 	oldcontext = MemoryContextSwitchTo(aggcontext);
 	state = makeStringInfo();
 	MemoryContextSwitchTo(oldcontext);

 	return state;
 }

 Datum
 string_agg_transfn(PG_FUNCTION_ARGS)
 {
 	StringInfo	state;

 	state = PG_ARGISNULL(0) ? NULL : (StringInfo) PG_GETARG_POINTER(0);

 	/* Append the element unless null. */
 	if (!PG_ARGISNULL(1))
 	{
 		if (state == NULL)
 			state = makeStringAggState(fcinfo);
 		appendStringInfoText(state, PG_GETARG_TEXT_PP(1));		/* value */
 	}

 	/*
 	 * The transition type for string_agg() is declared to be "internal",
 	 * which is a pass-by-value type the same size as a pointer.
 	 */
 	PG_RETURN_POINTER(state);
 }

 Datum
 string_agg_delim_transfn(PG_FUNCTION_ARGS)
 {
 	StringInfo	state;

 	state = PG_ARGISNULL(0) ? NULL : (StringInfo) PG_GETARG_POINTER(0);

 	/* Append the value unless null. */
 	if (!PG_ARGISNULL(1))
 	{
 		/* On the first time through, we ignore the delimiter. */
 		if (state == NULL)
 			state = makeStringAggState(fcinfo);
 		else if (!PG_ARGISNULL(2))
 			appendStringInfoText(state, PG_GETARG_TEXT_PP(2));	/* delimiter */

 		appendStringInfoText(state, PG_GETARG_TEXT_PP(1));		/* value */
 	}

 	/*
 	 * The transition type for string_agg() is declared to be "internal",
 	 * which is a pass-by-value type the same size as a pointer.
 	 */
 	PG_RETURN_POINTER(state);
 }

 Datum
 string_agg_finalfn(PG_FUNCTION_ARGS)
 {
 	StringInfo	state;

 	/* cannot be called directly because of internal-type argument */
 	if (!(fcinfo->context && IsA(fcinfo->context, AggState)))
 	{
 		/* cannot be called directly because of internal-type argument */
 		elog(ERROR, "string_agg_finalfn called in non-aggregate context");
 	}

 	state = PG_ARGISNULL(0) ? NULL : (StringInfo) PG_GETARG_POINTER(0);

 	if (state != NULL)
 		PG_RETURN_TEXT_P(cstring_to_text(state->data));
 	else
 		PG_RETURN_NULL();
 }