src/backend/utils/adt/varchar.c - cloudberry - Git at Google

 /*-------------------------------------------------------------------------
  *
  * varchar.c
  *	  Functions for the built-in types char(n) and varchar(n).
  *
  * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  *
  * IDENTIFICATION
  *	  src/backend/utils/adt/varchar.c
  *
  *-------------------------------------------------------------------------
  */
 #include "postgres.h"

 #include "access/detoast.h"
 #include "access/htup_details.h"
 #include "catalog/pg_collation.h"
 #include "catalog/pg_type.h"
 #include "common/hashfn.h"
 #include "libpq/pqformat.h"
 #include "mb/pg_wchar.h"
 #include "nodes/nodeFuncs.h"
 #include "nodes/supportnodes.h"
 #include "utils/array.h"
 #include "utils/builtins.h"
 #include "utils/lsyscache.h"
 #include "utils/pg_locale.h"
 #include "utils/varlena.h"

 /* common code for bpchartypmodin and varchartypmodin */
 static int32
 anychar_typmodin(ArrayType *ta, const char *typename)
 {
 	int32		typmod;
 	int32	   *tl;
 	int			n;

 	tl = ArrayGetIntegerTypmods(ta, &n);

 	/*
 	 * we're not too tense about good error message here because grammar
 	 * shouldn't allow wrong number of modifiers for CHAR
 	 */
 	if (n != 1)
 		ereport(ERROR,
 				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
 				 errmsg("invalid type modifier")));

 	if (*tl < 1)
 		ereport(ERROR,
 				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
 				 errmsg("length for type %s must be at least 1", typename)));
 	if (*tl > MaxAttrSize)
 		ereport(ERROR,
 				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
 				 errmsg("length for type %s cannot exceed %d",
 						typename, MaxAttrSize)));

 	/*
 	 * For largely historical reasons, the typmod is VARHDRSZ plus the number
 	 * of characters; there is enough client-side code that knows about that
 	 * that we'd better not change it.
 	 */
 	typmod = VARHDRSZ + *tl;

 	return typmod;
 }

 /* common code for bpchartypmodout and varchartypmodout */
 static char *
 anychar_typmodout(int32 typmod)
 {
 	char	   *res = (char *) palloc(64);

 	if (typmod > VARHDRSZ)
 		snprintf(res, 64, "(%d)", (int) (typmod - VARHDRSZ));
 	else
 		*res = '\0';

 	return res;
 }


 /*
  * CHAR() and VARCHAR() types are part of the SQL standard. CHAR()
  * is for blank-padded string whose length is specified in CREATE TABLE.
  * VARCHAR is for storing string whose length is at most the length specified
  * at CREATE TABLE time.
  *
  * It's hard to implement these types because we cannot figure out
  * the length of the type from the type itself. I changed (hopefully all) the
  * fmgr calls that invoke input functions of a data type to supply the
  * length also. (eg. in INSERTs, we have the tupleDescriptor which contains
  * the length of the attributes and hence the exact length of the char() or
  * varchar(). We pass this to bpcharin() or varcharin().) In the case where
  * we cannot determine the length, we pass in -1 instead and the input
  * converter does not enforce any length check.
  *
  * We actually implement this as a varlena so that we don't have to pass in
  * the length for the comparison functions. (The difference between these
  * types and "text" is that we truncate and possibly blank-pad the string
  * at insertion time.)
  *
  *															  - ay 6/95
  */


 /*****************************************************************************
  *	 bpchar - char()														 *
  *****************************************************************************/

 /*
  * bpchar_input -- common guts of bpcharin and bpcharrecv
  *
  * s is the input text of length len (may not be null-terminated)
  * atttypmod is the typmod value to apply
  *
  * Note that atttypmod is measured in characters, which
  * is not necessarily the same as the number of bytes.
  *
  * If the input string is too long, raise an error, unless the extra
  * characters are spaces, in which case they're truncated.  (per SQL)
  *
  * If escontext points to an ErrorSaveContext node, that is filled instead
  * of throwing an error; the caller must check SOFT_ERROR_OCCURRED()
  * to detect errors.
  */
 BpChar *
 bpchar_input(const char *s, size_t len, int32 atttypmod, Node *escontext)
 {
 	BpChar	   *result;
 	char	   *r;
 	size_t		maxlen;

 	/* If typmod is -1 (or invalid), use the actual string length */
 	if (atttypmod < (int32) VARHDRSZ)
 		maxlen = len;
 	else
 	{
 		size_t		charlen;	/* number of CHARACTERS in the input */

 		maxlen = atttypmod - VARHDRSZ;
 		charlen = pg_mbstrlen_with_len(s, len);
 		if (charlen > maxlen)
 		{
 			/* Verify that extra characters are spaces, and clip them off */
 			size_t		mbmaxlen = pg_mbcharcliplen(s, len, maxlen);
 			size_t		j;

 			/*
 			 * at this point, len is the actual BYTE length of the input
 			 * string, maxlen is the max number of CHARACTERS allowed for this
 			 * bpchar type, mbmaxlen is the length in BYTES of those chars.
 			 */
 			for (j = mbmaxlen; j < len; j++)
 			{
 				if (s[j] != ' ')
 					ereturn(escontext, NULL,
 							(errcode(ERRCODE_STRING_DATA_RIGHT_TRUNCATION),
 							 errmsg("value too long for type character(%d)",
 									(int) maxlen)));
 			}

 			/*
 			 * Now we set maxlen to the necessary byte length, not the number
 			 * of CHARACTERS!
 			 */
 			maxlen = len = mbmaxlen;
 		}
 		else
 		{
 			/*
 			 * Now we set maxlen to the necessary byte length, not the number
 			 * of CHARACTERS!
 			 */
 			maxlen = len + (maxlen - charlen);
 		}
 	}

 	result = (BpChar *) palloc(maxlen + VARHDRSZ);
 	SET_VARSIZE(result, maxlen + VARHDRSZ);
 	r = VARDATA(result);
 	memcpy(r, s, len);

 	/* blank pad the string if necessary */
 	if (maxlen > len)
 		memset(r + len, ' ', maxlen - len);

 	return result;
 }

 /*
  * Convert a C string to CHARACTER internal representation.  atttypmod
  * is the declared length of the type plus VARHDRSZ.
  */
 Datum
 bpcharin(PG_FUNCTION_ARGS)
 {
 	char	   *s = PG_GETARG_CSTRING(0);
 #ifdef NOT_USED
 	Oid			typelem = PG_GETARG_OID(1);
 #endif
 	int32		atttypmod = PG_GETARG_INT32(2);
 	BpChar	   *result;

 	result = bpchar_input(s, strlen(s), atttypmod, fcinfo->context);
 	PG_RETURN_BPCHAR_P(result);
 }


 /*
  * Convert a CHARACTER value to a C string.
  *
  * Uses the text conversion functions, which is only appropriate if BpChar
  * and text are equivalent types.
  */
 Datum
 bpcharout(PG_FUNCTION_ARGS)
 {
 	Datum		txt = PG_GETARG_DATUM(0);

 	PG_RETURN_CSTRING(TextDatumGetCString(txt));
 }

 /*
  *		bpcharrecv			- converts external binary format to bpchar
  */
 Datum
 bpcharrecv(PG_FUNCTION_ARGS)
 {
 	StringInfo	buf = (StringInfo) PG_GETARG_POINTER(0);
 #ifdef NOT_USED
 	Oid			typelem = PG_GETARG_OID(1);
 #endif
 	int32		atttypmod = PG_GETARG_INT32(2);
 	BpChar	   *result;
 	char	   *str;
 	int			nbytes;

 	str = pq_getmsgtext(buf, buf->len - buf->cursor, &nbytes);
 	result = bpchar_input(str, nbytes, atttypmod, NULL);
 	pfree(str);
 	PG_RETURN_BPCHAR_P(result);
 }

 /*
  *		bpcharsend			- converts bpchar to binary format
  */
 Datum
 bpcharsend(PG_FUNCTION_ARGS)
 {
 	/* Exactly the same as textsend, so share code */
 	return textsend(fcinfo);
 }


 /*
  * Converts a CHARACTER type to the specified size.
  *
  * maxlen is the typmod, ie, declared length plus VARHDRSZ bytes.
  * isExplicit is true if this is for an explicit cast to char(N).
  *
  * Truncation rules: for an explicit cast, silently truncate to the given
  * length; for an implicit cast, raise error unless extra characters are
  * all spaces.  (This is sort-of per SQL: the spec would actually have us
  * raise a "completion condition" for the explicit cast case, but Postgres
  * hasn't got such a concept.)
  */
 Datum
 bpchar(PG_FUNCTION_ARGS)
 {
 	BpChar	   *source = PG_GETARG_BPCHAR_PP(0);
 	int32		maxlen = PG_GETARG_INT32(1);
 	bool		isExplicit = PG_GETARG_BOOL(2);
 	BpChar	   *result;
 	int32		len;
 	char	   *r;
 	char	   *s;
 	int			i;
 	int			charlen;		/* number of characters in the input string +
 								 * VARHDRSZ */

 	/* No work if typmod is invalid */
 	if (maxlen < (int32) VARHDRSZ)
 		PG_RETURN_BPCHAR_P(source);

 	maxlen -= VARHDRSZ;

 	len = VARSIZE_ANY_EXHDR(source);
 	s = VARDATA_ANY(source);

 	charlen = pg_mbstrlen_with_len(s, len);

 	/* No work if supplied data matches typmod already */
 	if (charlen == maxlen)
 		PG_RETURN_BPCHAR_P(source);

 	if (charlen > maxlen)
 	{
 		/* Verify that extra characters are spaces, and clip them off */
 		size_t		maxmblen;

 		maxmblen = pg_mbcharcliplen(s, len, maxlen);

 		if (!isExplicit)
 		{
 			for (i = maxmblen; i < len; i++)
 				if (s[i] != ' ')
 					ereport(ERROR,
 							(errcode(ERRCODE_STRING_DATA_RIGHT_TRUNCATION),
 							 errmsg("value too long for type character(%d)",
 									maxlen)));
 		}

 		len = maxmblen;

 		/*
 		 * At this point, maxlen is the necessary byte length, not the number
 		 * of CHARACTERS!
 		 */
 		maxlen = len;
 	}
 	else
 	{
 		/*
 		 * At this point, maxlen is the necessary byte length, not the number
 		 * of CHARACTERS!
 		 */
 		maxlen = len + (maxlen - charlen);
 	}

 	Assert(maxlen >= len);

 	result = palloc(maxlen + VARHDRSZ);
 	SET_VARSIZE(result, maxlen + VARHDRSZ);
 	r = VARDATA(result);

 	memcpy(r, s, len);

 	/* blank pad the string if necessary */
 	if (maxlen > len)
 		memset(r + len, ' ', maxlen - len);

 	PG_RETURN_BPCHAR_P(result);
 }


 /* char_bpchar()
  * Convert char to bpchar(1).
  */
 Datum
 char_bpchar(PG_FUNCTION_ARGS)
 {
 	char		c = PG_GETARG_CHAR(0);
 	BpChar	   *result;

 	result = (BpChar *) palloc(VARHDRSZ + 1);

 	SET_VARSIZE(result, VARHDRSZ + 1);
 	*(VARDATA(result)) = c;

 	PG_RETURN_BPCHAR_P(result);
 }


 /* bpchar_name()
  * Converts a bpchar() type to a NameData type.
  */
 Datum
 bpchar_name(PG_FUNCTION_ARGS)
 {
 	BpChar	   *s = PG_GETARG_BPCHAR_PP(0);
 	char	   *s_data;
 	Name		result;
 	int			len;

 	len = VARSIZE_ANY_EXHDR(s);
 	s_data = VARDATA_ANY(s);

 	/* Truncate oversize input */
 	if (len >= NAMEDATALEN)
 		len = pg_mbcliplen(s_data, len, NAMEDATALEN - 1);

 	/* Remove trailing blanks */
 	while (len > 0)
 	{
 		if (s_data[len - 1] != ' ')
 			break;
 		len--;
 	}

 	/* We use palloc0 here to ensure result is zero-padded */
 	result = (Name) palloc0(NAMEDATALEN);
 	memcpy(NameStr(*result), s_data, len);

 	PG_RETURN_NAME(result);
 }

 /* name_bpchar()
  * Converts a NameData type to a bpchar type.
  *
  * Uses the text conversion functions, which is only appropriate if BpChar
  * and text are equivalent types.
  */
 Datum
 name_bpchar(PG_FUNCTION_ARGS)
 {
 	Name		s = PG_GETARG_NAME(0);
 	BpChar	   *result;

 	result = (BpChar *) cstring_to_text(NameStr(*s));
 	PG_RETURN_BPCHAR_P(result);
 }

 Datum
 bpchartypmodin(PG_FUNCTION_ARGS)
 {
 	ArrayType  *ta = PG_GETARG_ARRAYTYPE_P(0);

 	PG_RETURN_INT32(anychar_typmodin(ta, "char"));
 }

 Datum
 bpchartypmodout(PG_FUNCTION_ARGS)
 {
 	int32		typmod = PG_GETARG_INT32(0);

 	PG_RETURN_CSTRING(anychar_typmodout(typmod));
 }


 /*****************************************************************************
  *	 varchar - varchar(n)
  *
  * Note: varchar piggybacks on type text for most operations, and so has no
  * C-coded functions except for I/O and typmod checking.
  *****************************************************************************/

 /*
  * varchar_input -- common guts of varcharin and varcharrecv
  *
  * s is the input text of length len (may not be null-terminated)
  * atttypmod is the typmod value to apply
  *
  * Note that atttypmod is measured in characters, which
  * is not necessarily the same as the number of bytes.
  *
  * If the input string is too long, raise an error, unless the extra
  * characters are spaces, in which case they're truncated.  (per SQL)
  *
  * If escontext points to an ErrorSaveContext node, that is filled instead
  * of throwing an error; the caller must check SOFT_ERROR_OCCURRED()
  * to detect errors.
  */
 VarChar *
 varchar_input(const char *s, size_t len, int32 atttypmod, Node *escontext)
 {
 	VarChar    *result;
 	size_t		maxlen;

 	maxlen = atttypmod - VARHDRSZ;

 	if (atttypmod >= (int32) VARHDRSZ && len > maxlen)
 	{
 		/* Verify that extra characters are spaces, and clip them off */
 		size_t		mbmaxlen = pg_mbcharcliplen(s, len, maxlen);
 		size_t		j;

 		for (j = mbmaxlen; j < len; j++)
 		{
 			if (s[j] != ' ')
 				ereturn(escontext, NULL,
 						(errcode(ERRCODE_STRING_DATA_RIGHT_TRUNCATION),
 						 errmsg("value too long for type character varying(%d)",
 								(int) maxlen)));
 		}

 		len = mbmaxlen;
 	}

 	/*
 	 * We can use cstring_to_text_with_len because VarChar and text are
 	 * binary-compatible types.
 	 */
 	result = (VarChar *) cstring_to_text_with_len(s, len);
 	return result;
 }

 /*
  * Convert a C string to VARCHAR internal representation.  atttypmod
  * is the declared length of the type plus VARHDRSZ.
  */
 Datum
 varcharin(PG_FUNCTION_ARGS)
 {
 	char	   *s = PG_GETARG_CSTRING(0);
 #ifdef NOT_USED
 	Oid			typelem = PG_GETARG_OID(1);
 #endif
 	int32		atttypmod = PG_GETARG_INT32(2);
 	VarChar    *result;

 	result = varchar_input(s, strlen(s), atttypmod, fcinfo->context);
 	PG_RETURN_VARCHAR_P(result);
 }


 /*
  * Convert a VARCHAR value to a C string.
  *
  * Uses the text to C string conversion function, which is only appropriate
  * if VarChar and text are equivalent types.
  */
 Datum
 varcharout(PG_FUNCTION_ARGS)
 {
 	Datum		txt = PG_GETARG_DATUM(0);

 	PG_RETURN_CSTRING(TextDatumGetCString(txt));
 }

 /*
  *		varcharrecv			- converts external binary format to varchar
  */
 Datum
 varcharrecv(PG_FUNCTION_ARGS)
 {
 	StringInfo	buf = (StringInfo) PG_GETARG_POINTER(0);
 #ifdef NOT_USED
 	Oid			typelem = PG_GETARG_OID(1);
 #endif
 	int32		atttypmod = PG_GETARG_INT32(2);
 	VarChar    *result;
 	char	   *str;
 	int			nbytes;

 	str = pq_getmsgtext(buf, buf->len - buf->cursor, &nbytes);
 	result = varchar_input(str, nbytes, atttypmod, NULL);
 	pfree(str);
 	PG_RETURN_VARCHAR_P(result);
 }

 /*
  *		varcharsend			- converts varchar to binary format
  */
 Datum
 varcharsend(PG_FUNCTION_ARGS)
 {
 	/* Exactly the same as textsend, so share code */
 	return textsend(fcinfo);
 }


 /*
  * varchar_support()
  *
  * Planner support function for the varchar() length coercion function.
  *
  * Currently, the only interesting thing we can do is flatten calls that set
  * the new maximum length >= the previous maximum length.  We can ignore the
  * isExplicit argument, since that only affects truncation cases.
  */
 Datum
 varchar_support(PG_FUNCTION_ARGS)
 {
 	Node	   *rawreq = (Node *) PG_GETARG_POINTER(0);
 	Node	   *ret = NULL;

 	if (IsA(rawreq, SupportRequestSimplify))
 	{
 		SupportRequestSimplify *req = (SupportRequestSimplify *) rawreq;
 		FuncExpr   *expr = req->fcall;
 		Node	   *typmod;

 		Assert(list_length(expr->args) >= 2);

 		typmod = (Node *) lsecond(expr->args);

 		if (IsA(typmod, Const) && !((Const *) typmod)->constisnull)
 		{
 			Node	   *source = (Node *) linitial(expr->args);
 			int32		old_typmod = exprTypmod(source);
 			int32		new_typmod = DatumGetInt32(((Const *) typmod)->constvalue);
 			int32		old_max = old_typmod - VARHDRSZ;
 			int32		new_max = new_typmod - VARHDRSZ;

 			if (new_typmod < 0 || (old_typmod >= 0 && old_max <= new_max))
 				ret = relabel_to_typmod(source, new_typmod);
 		}
 	}

 	PG_RETURN_POINTER(ret);
 }

 /*
  * Converts a VARCHAR type to the specified size.
  *
  * maxlen is the typmod, ie, declared length plus VARHDRSZ bytes.
  * isExplicit is true if this is for an explicit cast to varchar(N).
  *
  * Truncation rules: for an explicit cast, silently truncate to the given
  * length; for an implicit cast, raise error unless extra characters are
  * all spaces.  (This is sort-of per SQL: the spec would actually have us
  * raise a "completion condition" for the explicit cast case, but Postgres
  * hasn't got such a concept.)
  */
 Datum
 varchar(PG_FUNCTION_ARGS)
 {
 	VarChar    *source = PG_GETARG_VARCHAR_PP(0);
 	int32		typmod = PG_GETARG_INT32(1);
 	bool		isExplicit = PG_GETARG_BOOL(2);
 	int32		len,
 				maxlen;
 	size_t		maxmblen;
 	int			i;
 	char	   *s_data;

 	len = VARSIZE_ANY_EXHDR(source);
 	s_data = VARDATA_ANY(source);
 	maxlen = typmod - VARHDRSZ;

 	/* No work if typmod is invalid or supplied data fits it already */
 	if (maxlen < 0 || len <= maxlen)
 		PG_RETURN_VARCHAR_P(source);

 	/* only reach here if string is too long... */

 	/* truncate multibyte string preserving multibyte boundary */
 	maxmblen = pg_mbcharcliplen(s_data, len, maxlen);

 	if (!isExplicit)
 	{
 		for (i = maxmblen; i < len; i++)
 			if (s_data[i] != ' ')
 				ereport(ERROR,
 						(errcode(ERRCODE_STRING_DATA_RIGHT_TRUNCATION),
 						 errmsg("value too long for type character varying(%d)",
 								maxlen)));
 	}

 	PG_RETURN_VARCHAR_P((VarChar *) cstring_to_text_with_len(s_data,
 															 maxmblen));
 }

 Datum
 varchartypmodin(PG_FUNCTION_ARGS)
 {
 	ArrayType  *ta = PG_GETARG_ARRAYTYPE_P(0);

 	PG_RETURN_INT32(anychar_typmodin(ta, "varchar"));
 }

 Datum
 varchartypmodout(PG_FUNCTION_ARGS)
 {
 	int32		typmod = PG_GETARG_INT32(0);

 	PG_RETURN_CSTRING(anychar_typmodout(typmod));
 }


 /*****************************************************************************
  * Exported functions
  *****************************************************************************/

 /* "True" length (not counting trailing blanks) of a BpChar */
 static inline int
 bcTruelen(BpChar *arg)
 {
 	return bpchartruelen(VARDATA_ANY(arg), VARSIZE_ANY_EXHDR(arg));
 }

 int
 bpchartruelen(char *s, int len)
 {
 	int			i;

 	/*
 	 * Note that we rely on the assumption that ' ' is a singleton unit on
 	 * every supported multibyte server encoding.
 	 */
 	for (i = len - 1; i >= 0; i--)
 	{
 		if (s[i] != ' ')
 			break;
 	}
 	return i + 1;
 }

 Datum
 bpcharlen(PG_FUNCTION_ARGS)
 {
 	BpChar	   *arg = PG_GETARG_BPCHAR_PP(0);
 	int			len;

 	/* get number of bytes, ignoring trailing spaces */
 	len = bcTruelen(arg);

 	/* in multibyte encoding, convert to number of characters */
 	if (pg_database_encoding_max_length() != 1)
 		len = pg_mbstrlen_with_len(VARDATA_ANY(arg), len);

 	PG_RETURN_INT32(len);
 }

 Datum
 bpcharoctetlen(PG_FUNCTION_ARGS)
 {
 	Datum		arg = PG_GETARG_DATUM(0);

 	/* We need not detoast the input at all */
 	PG_RETURN_INT32(toast_raw_datum_size(arg) - VARHDRSZ);
 }


 /*****************************************************************************
  *	Comparison Functions used for bpchar
  *
  * Note: btree indexes need these routines not to leak memory; therefore,
  * be careful to free working copies of toasted datums.  Most places don't
  * need to be so careful.
  *****************************************************************************/

 static void
 check_collation_set(Oid collid)
 {
 	if (!OidIsValid(collid))
 	{
 		/*
 		 * This typically means that the parser could not resolve a conflict
 		 * of implicit collations, so report it that way.
 		 */
 		ereport(ERROR,
 				(errcode(ERRCODE_INDETERMINATE_COLLATION),
 				 errmsg("could not determine which collation to use for string comparison"),
 				 errhint("Use the COLLATE clause to set the collation explicitly.")));
 	}
 }

 Datum
 bpchareq(PG_FUNCTION_ARGS)
 {
 	BpChar	   *arg1 = PG_GETARG_BPCHAR_PP(0);
 	BpChar	   *arg2 = PG_GETARG_BPCHAR_PP(1);
 	int			len1,
 				len2;
 	bool		result;
 	Oid			collid = PG_GET_COLLATION();
 	bool		locale_is_c = false;
 	pg_locale_t mylocale = 0;

 	check_collation_set(collid);

 	len1 = bcTruelen(arg1);
 	len2 = bcTruelen(arg2);

 	if (lc_collate_is_c(collid))
 		locale_is_c = true;
 	else
 		mylocale = pg_newlocale_from_collation(collid);

 	if (locale_is_c || pg_locale_deterministic(mylocale))
 	{
 		/*
 		 * Since we only care about equality or not-equality, we can avoid all
 		 * the expense of strcoll() here, and just do bitwise comparison.
 		 */
 		if (len1 != len2)
 			result = false;
 		else
 			result = (memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), len1) == 0);
 	}
 	else
 	{
 		result = (varstr_cmp(VARDATA_ANY(arg1), len1, VARDATA_ANY(arg2), len2,
 							 collid) == 0);
 	}

 	PG_FREE_IF_COPY(arg1, 0);
 	PG_FREE_IF_COPY(arg2, 1);

 	PG_RETURN_BOOL(result);
 }

 Datum
 bpcharne(PG_FUNCTION_ARGS)
 {
 	BpChar	   *arg1 = PG_GETARG_BPCHAR_PP(0);
 	BpChar	   *arg2 = PG_GETARG_BPCHAR_PP(1);
 	int			len1,
 				len2;
 	bool		result;
 	Oid			collid = PG_GET_COLLATION();
 	bool		locale_is_c = false;
 	pg_locale_t mylocale = 0;

 	check_collation_set(collid);

 	len1 = bcTruelen(arg1);
 	len2 = bcTruelen(arg2);

 	if (lc_collate_is_c(collid))
 		locale_is_c = true;
 	else
 		mylocale = pg_newlocale_from_collation(collid);

 	if (locale_is_c || pg_locale_deterministic(mylocale))
 	{
 		/*
 		 * Since we only care about equality or not-equality, we can avoid all
 		 * the expense of strcoll() here, and just do bitwise comparison.
 		 */
 		if (len1 != len2)
 			result = true;
 		else
 			result = (memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), len1) != 0);
 	}
 	else
 	{
 		result = (varstr_cmp(VARDATA_ANY(arg1), len1, VARDATA_ANY(arg2), len2,
 							 collid) != 0);
 	}

 	PG_FREE_IF_COPY(arg1, 0);
 	PG_FREE_IF_COPY(arg2, 1);

 	PG_RETURN_BOOL(result);
 }

 Datum
 bpcharlt(PG_FUNCTION_ARGS)
 {
 	BpChar	   *arg1 = PG_GETARG_BPCHAR_PP(0);
 	BpChar	   *arg2 = PG_GETARG_BPCHAR_PP(1);
 	int			len1,
 				len2;
 	int			cmp;

 	len1 = bcTruelen(arg1);
 	len2 = bcTruelen(arg2);

 	cmp = varstr_cmp(VARDATA_ANY(arg1), len1, VARDATA_ANY(arg2), len2,
 					 PG_GET_COLLATION());

 	PG_FREE_IF_COPY(arg1, 0);
 	PG_FREE_IF_COPY(arg2, 1);

 	PG_RETURN_BOOL(cmp < 0);
 }

 Datum
 bpcharle(PG_FUNCTION_ARGS)
 {
 	BpChar	   *arg1 = PG_GETARG_BPCHAR_PP(0);
 	BpChar	   *arg2 = PG_GETARG_BPCHAR_PP(1);
 	int			len1,
 				len2;
 	int			cmp;

 	len1 = bcTruelen(arg1);
 	len2 = bcTruelen(arg2);

 	cmp = varstr_cmp(VARDATA_ANY(arg1), len1, VARDATA_ANY(arg2), len2,
 					 PG_GET_COLLATION());

 	PG_FREE_IF_COPY(arg1, 0);
 	PG_FREE_IF_COPY(arg2, 1);

 	PG_RETURN_BOOL(cmp <= 0);
 }

 Datum
 bpchargt(PG_FUNCTION_ARGS)
 {
 	BpChar	   *arg1 = PG_GETARG_BPCHAR_PP(0);
 	BpChar	   *arg2 = PG_GETARG_BPCHAR_PP(1);
 	int			len1,
 				len2;
 	int			cmp;

 	len1 = bcTruelen(arg1);
 	len2 = bcTruelen(arg2);

 	cmp = varstr_cmp(VARDATA_ANY(arg1), len1, VARDATA_ANY(arg2), len2,
 					 PG_GET_COLLATION());

 	PG_FREE_IF_COPY(arg1, 0);
 	PG_FREE_IF_COPY(arg2, 1);

 	PG_RETURN_BOOL(cmp > 0);
 }

 Datum
 bpcharge(PG_FUNCTION_ARGS)
 {
 	BpChar	   *arg1 = PG_GETARG_BPCHAR_PP(0);
 	BpChar	   *arg2 = PG_GETARG_BPCHAR_PP(1);
 	int			len1,
 				len2;
 	int			cmp;

 	len1 = bcTruelen(arg1);
 	len2 = bcTruelen(arg2);

 	cmp = varstr_cmp(VARDATA_ANY(arg1), len1, VARDATA_ANY(arg2), len2,
 					 PG_GET_COLLATION());

 	PG_FREE_IF_COPY(arg1, 0);
 	PG_FREE_IF_COPY(arg2, 1);

 	PG_RETURN_BOOL(cmp >= 0);
 }

 Datum
 bpcharcmp(PG_FUNCTION_ARGS)
 {
 	BpChar	   *arg1 = PG_GETARG_BPCHAR_PP(0);
 	BpChar	   *arg2 = PG_GETARG_BPCHAR_PP(1);
 	int			len1,
 				len2;
 	int			cmp;

 	len1 = bcTruelen(arg1);
 	len2 = bcTruelen(arg2);

 	cmp = varstr_cmp(VARDATA_ANY(arg1), len1, VARDATA_ANY(arg2), len2,
 					 PG_GET_COLLATION());

 	PG_FREE_IF_COPY(arg1, 0);
 	PG_FREE_IF_COPY(arg2, 1);

 	PG_RETURN_INT32(cmp);
 }

 Datum
 bpchar_sortsupport(PG_FUNCTION_ARGS)
 {
 	SortSupport ssup = (SortSupport) PG_GETARG_POINTER(0);
 	Oid			collid = ssup->ssup_collation;
 	MemoryContext oldcontext;

 	oldcontext = MemoryContextSwitchTo(ssup->ssup_cxt);

 	/* Use generic string SortSupport */
 	varstr_sortsupport(ssup, BPCHAROID, collid);

 	MemoryContextSwitchTo(oldcontext);

 	PG_RETURN_VOID();
 }

 Datum
 bpchar_larger(PG_FUNCTION_ARGS)
 {
 	BpChar	   *arg1 = PG_GETARG_BPCHAR_PP(0);
 	BpChar	   *arg2 = PG_GETARG_BPCHAR_PP(1);
 	int			len1,
 				len2;
 	int			cmp;

 	len1 = bcTruelen(arg1);
 	len2 = bcTruelen(arg2);

 	cmp = varstr_cmp(VARDATA_ANY(arg1), len1, VARDATA_ANY(arg2), len2,
 					 PG_GET_COLLATION());

 	PG_RETURN_BPCHAR_P((cmp >= 0) ? arg1 : arg2);
 }

 Datum
 bpchar_smaller(PG_FUNCTION_ARGS)
 {
 	BpChar	   *arg1 = PG_GETARG_BPCHAR_PP(0);
 	BpChar	   *arg2 = PG_GETARG_BPCHAR_PP(1);
 	int			len1,
 				len2;
 	int			cmp;

 	len1 = bcTruelen(arg1);
 	len2 = bcTruelen(arg2);

 	cmp = varstr_cmp(VARDATA_ANY(arg1), len1, VARDATA_ANY(arg2), len2,
 					 PG_GET_COLLATION());

 	PG_RETURN_BPCHAR_P((cmp <= 0) ? arg1 : arg2);
 }


 /*
  * bpchar needs a specialized hash function because we want to ignore
  * trailing blanks in comparisons.
  */
 Datum
 hashbpchar(PG_FUNCTION_ARGS)
 {
 	BpChar	   *key = PG_GETARG_BPCHAR_PP(0);
 	Oid			collid = PG_GET_COLLATION();
 	char	   *keydata;
 	int			keylen;
 	pg_locale_t mylocale = 0;
 	Datum		result;

 	if (!collid)
 		ereport(ERROR,
 				(errcode(ERRCODE_INDETERMINATE_COLLATION),
 				 errmsg("could not determine which collation to use for string hashing"),
 				 errhint("Use the COLLATE clause to set the collation explicitly.")));

 	keydata = VARDATA_ANY(key);
 	keylen = bcTruelen(key);

 	if (!lc_collate_is_c(collid))
 		mylocale = pg_newlocale_from_collation(collid);

 	if (pg_locale_deterministic(mylocale))
 	{
 		result = hash_any((unsigned char *) keydata, keylen);
 	}
 	else
 	{
 		Size		bsize,
 					rsize;
 		char	   *buf;

 		bsize = pg_strnxfrm(NULL, 0, keydata, keylen, mylocale);
 		buf = palloc(bsize + 1);

 		rsize = pg_strnxfrm(buf, bsize + 1, keydata, keylen, mylocale);

 		/* the second call may return a smaller value than the first */
 		if (rsize > bsize)
 			elog(ERROR, "pg_strnxfrm() returned unexpected result");

 		/*
 		 * In principle, there's no reason to include the terminating NUL
 		 * character in the hash, but it was done before and the behavior must
 		 * be preserved.
 		 */
 		result = hash_any((uint8_t *) buf, bsize + 1);

 		pfree(buf);
 	}

 	/* Avoid leaking memory for toasted inputs */
 	PG_FREE_IF_COPY(key, 0);

 	return result;
 }

 Datum
 hashbpcharextended(PG_FUNCTION_ARGS)
 {
 	BpChar	   *key = PG_GETARG_BPCHAR_PP(0);
 	Oid			collid = PG_GET_COLLATION();
 	char	   *keydata;
 	int			keylen;
 	pg_locale_t mylocale = 0;
 	Datum		result;

 	if (!collid)
 		ereport(ERROR,
 				(errcode(ERRCODE_INDETERMINATE_COLLATION),
 				 errmsg("could not determine which collation to use for string hashing"),
 				 errhint("Use the COLLATE clause to set the collation explicitly.")));

 	keydata = VARDATA_ANY(key);
 	keylen = bcTruelen(key);

 	if (!lc_collate_is_c(collid))
 		mylocale = pg_newlocale_from_collation(collid);

 	if (pg_locale_deterministic(mylocale))
 	{
 		result = hash_any_extended((unsigned char *) keydata, keylen,
 								   PG_GETARG_INT64(1));
 	}
 	else
 	{
 		Size		bsize,
 					rsize;
 		char	   *buf;

 		bsize = pg_strnxfrm(NULL, 0, keydata, keylen, mylocale);
 		buf = palloc(bsize + 1);

 		rsize = pg_strnxfrm(buf, bsize + 1, keydata, keylen, mylocale);

 		/* the second call may return a smaller value than the first */
 		if (rsize > bsize)
 			elog(ERROR, "pg_strnxfrm() returned unexpected result");

 		/*
 		 * In principle, there's no reason to include the terminating NUL
 		 * character in the hash, but it was done before and the behavior must
 		 * be preserved.
 		 */
 		result = hash_any_extended((uint8_t *) buf, bsize + 1,
 								   PG_GETARG_INT64(1));

 		pfree(buf);
 	}

 	PG_FREE_IF_COPY(key, 0);

 	return result;
 }

 /*
  * The following operators support character-by-character comparison
  * of bpchar datums, to allow building indexes suitable for LIKE clauses.
  * Note that the regular bpchareq/bpcharne comparison operators, and
  * regular support functions 1 and 2 with "C" collation are assumed to be
  * compatible with these!
  */

 static int
 internal_bpchar_pattern_compare(BpChar *arg1, BpChar *arg2)
 {
 	int			result;
 	int			len1,
 				len2;

 	len1 = bcTruelen(arg1);
 	len2 = bcTruelen(arg2);

 	result = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
 	if (result != 0)
 		return result;
 	else if (len1 < len2)
 		return -1;
 	else if (len1 > len2)
 		return 1;
 	else
 		return 0;
 }


 Datum
 bpchar_pattern_lt(PG_FUNCTION_ARGS)
 {
 	BpChar	   *arg1 = PG_GETARG_BPCHAR_PP(0);
 	BpChar	   *arg2 = PG_GETARG_BPCHAR_PP(1);
 	int			result;

 	result = internal_bpchar_pattern_compare(arg1, arg2);

 	PG_FREE_IF_COPY(arg1, 0);
 	PG_FREE_IF_COPY(arg2, 1);

 	PG_RETURN_BOOL(result < 0);
 }


 Datum
 bpchar_pattern_le(PG_FUNCTION_ARGS)
 {
 	BpChar	   *arg1 = PG_GETARG_BPCHAR_PP(0);
 	BpChar	   *arg2 = PG_GETARG_BPCHAR_PP(1);
 	int			result;

 	result = internal_bpchar_pattern_compare(arg1, arg2);

 	PG_FREE_IF_COPY(arg1, 0);
 	PG_FREE_IF_COPY(arg2, 1);

 	PG_RETURN_BOOL(result <= 0);
 }


 Datum
 bpchar_pattern_ge(PG_FUNCTION_ARGS)
 {
 	BpChar	   *arg1 = PG_GETARG_BPCHAR_PP(0);
 	BpChar	   *arg2 = PG_GETARG_BPCHAR_PP(1);
 	int			result;

 	result = internal_bpchar_pattern_compare(arg1, arg2);

 	PG_FREE_IF_COPY(arg1, 0);
 	PG_FREE_IF_COPY(arg2, 1);

 	PG_RETURN_BOOL(result >= 0);
 }


 Datum
 bpchar_pattern_gt(PG_FUNCTION_ARGS)
 {
 	BpChar	   *arg1 = PG_GETARG_BPCHAR_PP(0);
 	BpChar	   *arg2 = PG_GETARG_BPCHAR_PP(1);
 	int			result;

 	result = internal_bpchar_pattern_compare(arg1, arg2);

 	PG_FREE_IF_COPY(arg1, 0);
 	PG_FREE_IF_COPY(arg2, 1);

 	PG_RETURN_BOOL(result > 0);
 }


 Datum
 btbpchar_pattern_cmp(PG_FUNCTION_ARGS)
 {
 	BpChar	   *arg1 = PG_GETARG_BPCHAR_PP(0);
 	BpChar	   *arg2 = PG_GETARG_BPCHAR_PP(1);
 	int			result;

 	result = internal_bpchar_pattern_compare(arg1, arg2);

 	PG_FREE_IF_COPY(arg1, 0);
 	PG_FREE_IF_COPY(arg2, 1);

 	PG_RETURN_INT32(result);
 }


 Datum
 btbpchar_pattern_sortsupport(PG_FUNCTION_ARGS)
 {
 	SortSupport ssup = (SortSupport) PG_GETARG_POINTER(0);
 	MemoryContext oldcontext;

 	oldcontext = MemoryContextSwitchTo(ssup->ssup_cxt);

 	/* Use generic string SortSupport, forcing "C" collation */
 	varstr_sortsupport(ssup, BPCHAROID, C_COLLATION_OID);

 	MemoryContextSwitchTo(oldcontext);

 	PG_RETURN_VOID();
 }