| /*------------------------------------------------------------------------- |
| * |
| * varchar.c |
| * Functions for the built-in types char(n) and varchar(n). |
| * |
| * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group |
| * Portions Copyright (c) 1994, Regents of the University of California |
| * |
| * |
| * IDENTIFICATION |
| * src/backend/utils/adt/varchar.c |
| * |
| *------------------------------------------------------------------------- |
| */ |
| #include "postgres.h" |
| |
| #include "access/detoast.h" |
| #include "access/htup_details.h" |
| #include "catalog/pg_collation.h" |
| #include "catalog/pg_type.h" |
| #include "common/hashfn.h" |
| #include "libpq/pqformat.h" |
| #include "mb/pg_wchar.h" |
| #include "nodes/nodeFuncs.h" |
| #include "nodes/supportnodes.h" |
| #include "utils/array.h" |
| #include "utils/builtins.h" |
| #include "utils/lsyscache.h" |
| #include "utils/pg_locale.h" |
| #include "utils/varlena.h" |
| |
| /* common code for bpchartypmodin and varchartypmodin */ |
| static int32 |
| anychar_typmodin(ArrayType *ta, const char *typename) |
| { |
| int32 typmod; |
| int32 *tl; |
| int n; |
| |
| tl = ArrayGetIntegerTypmods(ta, &n); |
| |
| /* |
| * we're not too tense about good error message here because grammar |
| * shouldn't allow wrong number of modifiers for CHAR |
| */ |
| if (n != 1) |
| ereport(ERROR, |
| (errcode(ERRCODE_INVALID_PARAMETER_VALUE), |
| errmsg("invalid type modifier"))); |
| |
| if (*tl < 1) |
| ereport(ERROR, |
| (errcode(ERRCODE_INVALID_PARAMETER_VALUE), |
| errmsg("length for type %s must be at least 1", typename))); |
| if (*tl > MaxAttrSize) |
| ereport(ERROR, |
| (errcode(ERRCODE_INVALID_PARAMETER_VALUE), |
| errmsg("length for type %s cannot exceed %d", |
| typename, MaxAttrSize))); |
| |
| /* |
| * For largely historical reasons, the typmod is VARHDRSZ plus the number |
| * of characters; there is enough client-side code that knows about that |
| * that we'd better not change it. |
| */ |
| typmod = VARHDRSZ + *tl; |
| |
| return typmod; |
| } |
| |
| /* common code for bpchartypmodout and varchartypmodout */ |
| static char * |
| anychar_typmodout(int32 typmod) |
| { |
| char *res = (char *) palloc(64); |
| |
| if (typmod > VARHDRSZ) |
| snprintf(res, 64, "(%d)", (int) (typmod - VARHDRSZ)); |
| else |
| *res = '\0'; |
| |
| return res; |
| } |
| |
| |
| /* |
| * CHAR() and VARCHAR() types are part of the SQL standard. CHAR() |
| * is for blank-padded string whose length is specified in CREATE TABLE. |
| * VARCHAR is for storing string whose length is at most the length specified |
| * at CREATE TABLE time. |
| * |
| * It's hard to implement these types because we cannot figure out |
| * the length of the type from the type itself. I changed (hopefully all) the |
| * fmgr calls that invoke input functions of a data type to supply the |
| * length also. (eg. in INSERTs, we have the tupleDescriptor which contains |
| * the length of the attributes and hence the exact length of the char() or |
| * varchar(). We pass this to bpcharin() or varcharin().) In the case where |
| * we cannot determine the length, we pass in -1 instead and the input |
| * converter does not enforce any length check. |
| * |
| * We actually implement this as a varlena so that we don't have to pass in |
| * the length for the comparison functions. (The difference between these |
| * types and "text" is that we truncate and possibly blank-pad the string |
| * at insertion time.) |
| * |
| * - ay 6/95 |
| */ |
| |
| |
| /***************************************************************************** |
| * bpchar - char() * |
| *****************************************************************************/ |
| |
| /* |
| * bpchar_input -- common guts of bpcharin and bpcharrecv |
| * |
| * s is the input text of length len (may not be null-terminated) |
| * atttypmod is the typmod value to apply |
| * |
| * Note that atttypmod is measured in characters, which |
| * is not necessarily the same as the number of bytes. |
| * |
| * If the input string is too long, raise an error, unless the extra |
| * characters are spaces, in which case they're truncated. (per SQL) |
| * |
| * If escontext points to an ErrorSaveContext node, that is filled instead |
| * of throwing an error; the caller must check SOFT_ERROR_OCCURRED() |
| * to detect errors. |
| */ |
| BpChar * |
| bpchar_input(const char *s, size_t len, int32 atttypmod, Node *escontext) |
| { |
| BpChar *result; |
| char *r; |
| size_t maxlen; |
| |
| /* If typmod is -1 (or invalid), use the actual string length */ |
| if (atttypmod < (int32) VARHDRSZ) |
| maxlen = len; |
| else |
| { |
| size_t charlen; /* number of CHARACTERS in the input */ |
| |
| maxlen = atttypmod - VARHDRSZ; |
| charlen = pg_mbstrlen_with_len(s, len); |
| if (charlen > maxlen) |
| { |
| /* Verify that extra characters are spaces, and clip them off */ |
| size_t mbmaxlen = pg_mbcharcliplen(s, len, maxlen); |
| size_t j; |
| |
| /* |
| * at this point, len is the actual BYTE length of the input |
| * string, maxlen is the max number of CHARACTERS allowed for this |
| * bpchar type, mbmaxlen is the length in BYTES of those chars. |
| */ |
| for (j = mbmaxlen; j < len; j++) |
| { |
| if (s[j] != ' ') |
| ereturn(escontext, NULL, |
| (errcode(ERRCODE_STRING_DATA_RIGHT_TRUNCATION), |
| errmsg("value too long for type character(%d)", |
| (int) maxlen))); |
| } |
| |
| /* |
| * Now we set maxlen to the necessary byte length, not the number |
| * of CHARACTERS! |
| */ |
| maxlen = len = mbmaxlen; |
| } |
| else |
| { |
| /* |
| * Now we set maxlen to the necessary byte length, not the number |
| * of CHARACTERS! |
| */ |
| maxlen = len + (maxlen - charlen); |
| } |
| } |
| |
| result = (BpChar *) palloc(maxlen + VARHDRSZ); |
| SET_VARSIZE(result, maxlen + VARHDRSZ); |
| r = VARDATA(result); |
| memcpy(r, s, len); |
| |
| /* blank pad the string if necessary */ |
| if (maxlen > len) |
| memset(r + len, ' ', maxlen - len); |
| |
| return result; |
| } |
| |
| /* |
| * Convert a C string to CHARACTER internal representation. atttypmod |
| * is the declared length of the type plus VARHDRSZ. |
| */ |
| Datum |
| bpcharin(PG_FUNCTION_ARGS) |
| { |
| char *s = PG_GETARG_CSTRING(0); |
| #ifdef NOT_USED |
| Oid typelem = PG_GETARG_OID(1); |
| #endif |
| int32 atttypmod = PG_GETARG_INT32(2); |
| BpChar *result; |
| |
| result = bpchar_input(s, strlen(s), atttypmod, fcinfo->context); |
| PG_RETURN_BPCHAR_P(result); |
| } |
| |
| |
| /* |
| * Convert a CHARACTER value to a C string. |
| * |
| * Uses the text conversion functions, which is only appropriate if BpChar |
| * and text are equivalent types. |
| */ |
| Datum |
| bpcharout(PG_FUNCTION_ARGS) |
| { |
| Datum txt = PG_GETARG_DATUM(0); |
| |
| PG_RETURN_CSTRING(TextDatumGetCString(txt)); |
| } |
| |
| /* |
| * bpcharrecv - converts external binary format to bpchar |
| */ |
| Datum |
| bpcharrecv(PG_FUNCTION_ARGS) |
| { |
| StringInfo buf = (StringInfo) PG_GETARG_POINTER(0); |
| #ifdef NOT_USED |
| Oid typelem = PG_GETARG_OID(1); |
| #endif |
| int32 atttypmod = PG_GETARG_INT32(2); |
| BpChar *result; |
| char *str; |
| int nbytes; |
| |
| str = pq_getmsgtext(buf, buf->len - buf->cursor, &nbytes); |
| result = bpchar_input(str, nbytes, atttypmod, NULL); |
| pfree(str); |
| PG_RETURN_BPCHAR_P(result); |
| } |
| |
| /* |
| * bpcharsend - converts bpchar to binary format |
| */ |
| Datum |
| bpcharsend(PG_FUNCTION_ARGS) |
| { |
| /* Exactly the same as textsend, so share code */ |
| return textsend(fcinfo); |
| } |
| |
| |
| /* |
| * Converts a CHARACTER type to the specified size. |
| * |
| * maxlen is the typmod, ie, declared length plus VARHDRSZ bytes. |
| * isExplicit is true if this is for an explicit cast to char(N). |
| * |
| * Truncation rules: for an explicit cast, silently truncate to the given |
| * length; for an implicit cast, raise error unless extra characters are |
| * all spaces. (This is sort-of per SQL: the spec would actually have us |
| * raise a "completion condition" for the explicit cast case, but Postgres |
| * hasn't got such a concept.) |
| */ |
| Datum |
| bpchar(PG_FUNCTION_ARGS) |
| { |
| BpChar *source = PG_GETARG_BPCHAR_PP(0); |
| int32 maxlen = PG_GETARG_INT32(1); |
| bool isExplicit = PG_GETARG_BOOL(2); |
| BpChar *result; |
| int32 len; |
| char *r; |
| char *s; |
| int i; |
| int charlen; /* number of characters in the input string + |
| * VARHDRSZ */ |
| |
| /* No work if typmod is invalid */ |
| if (maxlen < (int32) VARHDRSZ) |
| PG_RETURN_BPCHAR_P(source); |
| |
| maxlen -= VARHDRSZ; |
| |
| len = VARSIZE_ANY_EXHDR(source); |
| s = VARDATA_ANY(source); |
| |
| charlen = pg_mbstrlen_with_len(s, len); |
| |
| /* No work if supplied data matches typmod already */ |
| if (charlen == maxlen) |
| PG_RETURN_BPCHAR_P(source); |
| |
| if (charlen > maxlen) |
| { |
| /* Verify that extra characters are spaces, and clip them off */ |
| size_t maxmblen; |
| |
| maxmblen = pg_mbcharcliplen(s, len, maxlen); |
| |
| if (!isExplicit) |
| { |
| for (i = maxmblen; i < len; i++) |
| if (s[i] != ' ') |
| ereport(ERROR, |
| (errcode(ERRCODE_STRING_DATA_RIGHT_TRUNCATION), |
| errmsg("value too long for type character(%d)", |
| maxlen))); |
| } |
| |
| len = maxmblen; |
| |
| /* |
| * At this point, maxlen is the necessary byte length, not the number |
| * of CHARACTERS! |
| */ |
| maxlen = len; |
| } |
| else |
| { |
| /* |
| * At this point, maxlen is the necessary byte length, not the number |
| * of CHARACTERS! |
| */ |
| maxlen = len + (maxlen - charlen); |
| } |
| |
| Assert(maxlen >= len); |
| |
| result = palloc(maxlen + VARHDRSZ); |
| SET_VARSIZE(result, maxlen + VARHDRSZ); |
| r = VARDATA(result); |
| |
| memcpy(r, s, len); |
| |
| /* blank pad the string if necessary */ |
| if (maxlen > len) |
| memset(r + len, ' ', maxlen - len); |
| |
| PG_RETURN_BPCHAR_P(result); |
| } |
| |
| |
| /* char_bpchar() |
| * Convert char to bpchar(1). |
| */ |
| Datum |
| char_bpchar(PG_FUNCTION_ARGS) |
| { |
| char c = PG_GETARG_CHAR(0); |
| BpChar *result; |
| |
| result = (BpChar *) palloc(VARHDRSZ + 1); |
| |
| SET_VARSIZE(result, VARHDRSZ + 1); |
| *(VARDATA(result)) = c; |
| |
| PG_RETURN_BPCHAR_P(result); |
| } |
| |
| |
| /* bpchar_name() |
| * Converts a bpchar() type to a NameData type. |
| */ |
| Datum |
| bpchar_name(PG_FUNCTION_ARGS) |
| { |
| BpChar *s = PG_GETARG_BPCHAR_PP(0); |
| char *s_data; |
| Name result; |
| int len; |
| |
| len = VARSIZE_ANY_EXHDR(s); |
| s_data = VARDATA_ANY(s); |
| |
| /* Truncate oversize input */ |
| if (len >= NAMEDATALEN) |
| len = pg_mbcliplen(s_data, len, NAMEDATALEN - 1); |
| |
| /* Remove trailing blanks */ |
| while (len > 0) |
| { |
| if (s_data[len - 1] != ' ') |
| break; |
| len--; |
| } |
| |
| /* We use palloc0 here to ensure result is zero-padded */ |
| result = (Name) palloc0(NAMEDATALEN); |
| memcpy(NameStr(*result), s_data, len); |
| |
| PG_RETURN_NAME(result); |
| } |
| |
| /* name_bpchar() |
| * Converts a NameData type to a bpchar type. |
| * |
| * Uses the text conversion functions, which is only appropriate if BpChar |
| * and text are equivalent types. |
| */ |
| Datum |
| name_bpchar(PG_FUNCTION_ARGS) |
| { |
| Name s = PG_GETARG_NAME(0); |
| BpChar *result; |
| |
| result = (BpChar *) cstring_to_text(NameStr(*s)); |
| PG_RETURN_BPCHAR_P(result); |
| } |
| |
| Datum |
| bpchartypmodin(PG_FUNCTION_ARGS) |
| { |
| ArrayType *ta = PG_GETARG_ARRAYTYPE_P(0); |
| |
| PG_RETURN_INT32(anychar_typmodin(ta, "char")); |
| } |
| |
| Datum |
| bpchartypmodout(PG_FUNCTION_ARGS) |
| { |
| int32 typmod = PG_GETARG_INT32(0); |
| |
| PG_RETURN_CSTRING(anychar_typmodout(typmod)); |
| } |
| |
| |
| /***************************************************************************** |
| * varchar - varchar(n) |
| * |
| * Note: varchar piggybacks on type text for most operations, and so has no |
| * C-coded functions except for I/O and typmod checking. |
| *****************************************************************************/ |
| |
| /* |
| * varchar_input -- common guts of varcharin and varcharrecv |
| * |
| * s is the input text of length len (may not be null-terminated) |
| * atttypmod is the typmod value to apply |
| * |
| * Note that atttypmod is measured in characters, which |
| * is not necessarily the same as the number of bytes. |
| * |
| * If the input string is too long, raise an error, unless the extra |
| * characters are spaces, in which case they're truncated. (per SQL) |
| * |
| * If escontext points to an ErrorSaveContext node, that is filled instead |
| * of throwing an error; the caller must check SOFT_ERROR_OCCURRED() |
| * to detect errors. |
| */ |
| VarChar * |
| varchar_input(const char *s, size_t len, int32 atttypmod, Node *escontext) |
| { |
| VarChar *result; |
| size_t maxlen; |
| |
| maxlen = atttypmod - VARHDRSZ; |
| |
| if (atttypmod >= (int32) VARHDRSZ && len > maxlen) |
| { |
| /* Verify that extra characters are spaces, and clip them off */ |
| size_t mbmaxlen = pg_mbcharcliplen(s, len, maxlen); |
| size_t j; |
| |
| for (j = mbmaxlen; j < len; j++) |
| { |
| if (s[j] != ' ') |
| ereturn(escontext, NULL, |
| (errcode(ERRCODE_STRING_DATA_RIGHT_TRUNCATION), |
| errmsg("value too long for type character varying(%d)", |
| (int) maxlen))); |
| } |
| |
| len = mbmaxlen; |
| } |
| |
| /* |
| * We can use cstring_to_text_with_len because VarChar and text are |
| * binary-compatible types. |
| */ |
| result = (VarChar *) cstring_to_text_with_len(s, len); |
| return result; |
| } |
| |
| /* |
| * Convert a C string to VARCHAR internal representation. atttypmod |
| * is the declared length of the type plus VARHDRSZ. |
| */ |
| Datum |
| varcharin(PG_FUNCTION_ARGS) |
| { |
| char *s = PG_GETARG_CSTRING(0); |
| #ifdef NOT_USED |
| Oid typelem = PG_GETARG_OID(1); |
| #endif |
| int32 atttypmod = PG_GETARG_INT32(2); |
| VarChar *result; |
| |
| result = varchar_input(s, strlen(s), atttypmod, fcinfo->context); |
| PG_RETURN_VARCHAR_P(result); |
| } |
| |
| |
| /* |
| * Convert a VARCHAR value to a C string. |
| * |
| * Uses the text to C string conversion function, which is only appropriate |
| * if VarChar and text are equivalent types. |
| */ |
| Datum |
| varcharout(PG_FUNCTION_ARGS) |
| { |
| Datum txt = PG_GETARG_DATUM(0); |
| |
| PG_RETURN_CSTRING(TextDatumGetCString(txt)); |
| } |
| |
| /* |
| * varcharrecv - converts external binary format to varchar |
| */ |
| Datum |
| varcharrecv(PG_FUNCTION_ARGS) |
| { |
| StringInfo buf = (StringInfo) PG_GETARG_POINTER(0); |
| #ifdef NOT_USED |
| Oid typelem = PG_GETARG_OID(1); |
| #endif |
| int32 atttypmod = PG_GETARG_INT32(2); |
| VarChar *result; |
| char *str; |
| int nbytes; |
| |
| str = pq_getmsgtext(buf, buf->len - buf->cursor, &nbytes); |
| result = varchar_input(str, nbytes, atttypmod, NULL); |
| pfree(str); |
| PG_RETURN_VARCHAR_P(result); |
| } |
| |
| /* |
| * varcharsend - converts varchar to binary format |
| */ |
| Datum |
| varcharsend(PG_FUNCTION_ARGS) |
| { |
| /* Exactly the same as textsend, so share code */ |
| return textsend(fcinfo); |
| } |
| |
| |
| /* |
| * varchar_support() |
| * |
| * Planner support function for the varchar() length coercion function. |
| * |
| * Currently, the only interesting thing we can do is flatten calls that set |
| * the new maximum length >= the previous maximum length. We can ignore the |
| * isExplicit argument, since that only affects truncation cases. |
| */ |
| Datum |
| varchar_support(PG_FUNCTION_ARGS) |
| { |
| Node *rawreq = (Node *) PG_GETARG_POINTER(0); |
| Node *ret = NULL; |
| |
| if (IsA(rawreq, SupportRequestSimplify)) |
| { |
| SupportRequestSimplify *req = (SupportRequestSimplify *) rawreq; |
| FuncExpr *expr = req->fcall; |
| Node *typmod; |
| |
| Assert(list_length(expr->args) >= 2); |
| |
| typmod = (Node *) lsecond(expr->args); |
| |
| if (IsA(typmod, Const) && !((Const *) typmod)->constisnull) |
| { |
| Node *source = (Node *) linitial(expr->args); |
| int32 old_typmod = exprTypmod(source); |
| int32 new_typmod = DatumGetInt32(((Const *) typmod)->constvalue); |
| int32 old_max = old_typmod - VARHDRSZ; |
| int32 new_max = new_typmod - VARHDRSZ; |
| |
| if (new_typmod < 0 || (old_typmod >= 0 && old_max <= new_max)) |
| ret = relabel_to_typmod(source, new_typmod); |
| } |
| } |
| |
| PG_RETURN_POINTER(ret); |
| } |
| |
| /* |
| * Converts a VARCHAR type to the specified size. |
| * |
| * maxlen is the typmod, ie, declared length plus VARHDRSZ bytes. |
| * isExplicit is true if this is for an explicit cast to varchar(N). |
| * |
| * Truncation rules: for an explicit cast, silently truncate to the given |
| * length; for an implicit cast, raise error unless extra characters are |
| * all spaces. (This is sort-of per SQL: the spec would actually have us |
| * raise a "completion condition" for the explicit cast case, but Postgres |
| * hasn't got such a concept.) |
| */ |
| Datum |
| varchar(PG_FUNCTION_ARGS) |
| { |
| VarChar *source = PG_GETARG_VARCHAR_PP(0); |
| int32 typmod = PG_GETARG_INT32(1); |
| bool isExplicit = PG_GETARG_BOOL(2); |
| int32 len, |
| maxlen; |
| size_t maxmblen; |
| int i; |
| char *s_data; |
| |
| len = VARSIZE_ANY_EXHDR(source); |
| s_data = VARDATA_ANY(source); |
| maxlen = typmod - VARHDRSZ; |
| |
| /* No work if typmod is invalid or supplied data fits it already */ |
| if (maxlen < 0 || len <= maxlen) |
| PG_RETURN_VARCHAR_P(source); |
| |
| /* only reach here if string is too long... */ |
| |
| /* truncate multibyte string preserving multibyte boundary */ |
| maxmblen = pg_mbcharcliplen(s_data, len, maxlen); |
| |
| if (!isExplicit) |
| { |
| for (i = maxmblen; i < len; i++) |
| if (s_data[i] != ' ') |
| ereport(ERROR, |
| (errcode(ERRCODE_STRING_DATA_RIGHT_TRUNCATION), |
| errmsg("value too long for type character varying(%d)", |
| maxlen))); |
| } |
| |
| PG_RETURN_VARCHAR_P((VarChar *) cstring_to_text_with_len(s_data, |
| maxmblen)); |
| } |
| |
| Datum |
| varchartypmodin(PG_FUNCTION_ARGS) |
| { |
| ArrayType *ta = PG_GETARG_ARRAYTYPE_P(0); |
| |
| PG_RETURN_INT32(anychar_typmodin(ta, "varchar")); |
| } |
| |
| Datum |
| varchartypmodout(PG_FUNCTION_ARGS) |
| { |
| int32 typmod = PG_GETARG_INT32(0); |
| |
| PG_RETURN_CSTRING(anychar_typmodout(typmod)); |
| } |
| |
| |
| /***************************************************************************** |
| * Exported functions |
| *****************************************************************************/ |
| |
| /* "True" length (not counting trailing blanks) of a BpChar */ |
| static inline int |
| bcTruelen(BpChar *arg) |
| { |
| return bpchartruelen(VARDATA_ANY(arg), VARSIZE_ANY_EXHDR(arg)); |
| } |
| |
| int |
| bpchartruelen(char *s, int len) |
| { |
| int i; |
| |
| /* |
| * Note that we rely on the assumption that ' ' is a singleton unit on |
| * every supported multibyte server encoding. |
| */ |
| for (i = len - 1; i >= 0; i--) |
| { |
| if (s[i] != ' ') |
| break; |
| } |
| return i + 1; |
| } |
| |
| Datum |
| bpcharlen(PG_FUNCTION_ARGS) |
| { |
| BpChar *arg = PG_GETARG_BPCHAR_PP(0); |
| int len; |
| |
| /* get number of bytes, ignoring trailing spaces */ |
| len = bcTruelen(arg); |
| |
| /* in multibyte encoding, convert to number of characters */ |
| if (pg_database_encoding_max_length() != 1) |
| len = pg_mbstrlen_with_len(VARDATA_ANY(arg), len); |
| |
| PG_RETURN_INT32(len); |
| } |
| |
| Datum |
| bpcharoctetlen(PG_FUNCTION_ARGS) |
| { |
| Datum arg = PG_GETARG_DATUM(0); |
| |
| /* We need not detoast the input at all */ |
| PG_RETURN_INT32(toast_raw_datum_size(arg) - VARHDRSZ); |
| } |
| |
| |
| /***************************************************************************** |
| * Comparison Functions used for bpchar |
| * |
| * Note: btree indexes need these routines not to leak memory; therefore, |
| * be careful to free working copies of toasted datums. Most places don't |
| * need to be so careful. |
| *****************************************************************************/ |
| |
| static void |
| check_collation_set(Oid collid) |
| { |
| if (!OidIsValid(collid)) |
| { |
| /* |
| * This typically means that the parser could not resolve a conflict |
| * of implicit collations, so report it that way. |
| */ |
| ereport(ERROR, |
| (errcode(ERRCODE_INDETERMINATE_COLLATION), |
| errmsg("could not determine which collation to use for string comparison"), |
| errhint("Use the COLLATE clause to set the collation explicitly."))); |
| } |
| } |
| |
| Datum |
| bpchareq(PG_FUNCTION_ARGS) |
| { |
| BpChar *arg1 = PG_GETARG_BPCHAR_PP(0); |
| BpChar *arg2 = PG_GETARG_BPCHAR_PP(1); |
| int len1, |
| len2; |
| bool result; |
| Oid collid = PG_GET_COLLATION(); |
| bool locale_is_c = false; |
| pg_locale_t mylocale = 0; |
| |
| check_collation_set(collid); |
| |
| len1 = bcTruelen(arg1); |
| len2 = bcTruelen(arg2); |
| |
| if (lc_collate_is_c(collid)) |
| locale_is_c = true; |
| else |
| mylocale = pg_newlocale_from_collation(collid); |
| |
| if (locale_is_c || pg_locale_deterministic(mylocale)) |
| { |
| /* |
| * Since we only care about equality or not-equality, we can avoid all |
| * the expense of strcoll() here, and just do bitwise comparison. |
| */ |
| if (len1 != len2) |
| result = false; |
| else |
| result = (memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), len1) == 0); |
| } |
| else |
| { |
| result = (varstr_cmp(VARDATA_ANY(arg1), len1, VARDATA_ANY(arg2), len2, |
| collid) == 0); |
| } |
| |
| PG_FREE_IF_COPY(arg1, 0); |
| PG_FREE_IF_COPY(arg2, 1); |
| |
| PG_RETURN_BOOL(result); |
| } |
| |
| Datum |
| bpcharne(PG_FUNCTION_ARGS) |
| { |
| BpChar *arg1 = PG_GETARG_BPCHAR_PP(0); |
| BpChar *arg2 = PG_GETARG_BPCHAR_PP(1); |
| int len1, |
| len2; |
| bool result; |
| Oid collid = PG_GET_COLLATION(); |
| bool locale_is_c = false; |
| pg_locale_t mylocale = 0; |
| |
| check_collation_set(collid); |
| |
| len1 = bcTruelen(arg1); |
| len2 = bcTruelen(arg2); |
| |
| if (lc_collate_is_c(collid)) |
| locale_is_c = true; |
| else |
| mylocale = pg_newlocale_from_collation(collid); |
| |
| if (locale_is_c || pg_locale_deterministic(mylocale)) |
| { |
| /* |
| * Since we only care about equality or not-equality, we can avoid all |
| * the expense of strcoll() here, and just do bitwise comparison. |
| */ |
| if (len1 != len2) |
| result = true; |
| else |
| result = (memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), len1) != 0); |
| } |
| else |
| { |
| result = (varstr_cmp(VARDATA_ANY(arg1), len1, VARDATA_ANY(arg2), len2, |
| collid) != 0); |
| } |
| |
| PG_FREE_IF_COPY(arg1, 0); |
| PG_FREE_IF_COPY(arg2, 1); |
| |
| PG_RETURN_BOOL(result); |
| } |
| |
| Datum |
| bpcharlt(PG_FUNCTION_ARGS) |
| { |
| BpChar *arg1 = PG_GETARG_BPCHAR_PP(0); |
| BpChar *arg2 = PG_GETARG_BPCHAR_PP(1); |
| int len1, |
| len2; |
| int cmp; |
| |
| len1 = bcTruelen(arg1); |
| len2 = bcTruelen(arg2); |
| |
| cmp = varstr_cmp(VARDATA_ANY(arg1), len1, VARDATA_ANY(arg2), len2, |
| PG_GET_COLLATION()); |
| |
| PG_FREE_IF_COPY(arg1, 0); |
| PG_FREE_IF_COPY(arg2, 1); |
| |
| PG_RETURN_BOOL(cmp < 0); |
| } |
| |
| Datum |
| bpcharle(PG_FUNCTION_ARGS) |
| { |
| BpChar *arg1 = PG_GETARG_BPCHAR_PP(0); |
| BpChar *arg2 = PG_GETARG_BPCHAR_PP(1); |
| int len1, |
| len2; |
| int cmp; |
| |
| len1 = bcTruelen(arg1); |
| len2 = bcTruelen(arg2); |
| |
| cmp = varstr_cmp(VARDATA_ANY(arg1), len1, VARDATA_ANY(arg2), len2, |
| PG_GET_COLLATION()); |
| |
| PG_FREE_IF_COPY(arg1, 0); |
| PG_FREE_IF_COPY(arg2, 1); |
| |
| PG_RETURN_BOOL(cmp <= 0); |
| } |
| |
| Datum |
| bpchargt(PG_FUNCTION_ARGS) |
| { |
| BpChar *arg1 = PG_GETARG_BPCHAR_PP(0); |
| BpChar *arg2 = PG_GETARG_BPCHAR_PP(1); |
| int len1, |
| len2; |
| int cmp; |
| |
| len1 = bcTruelen(arg1); |
| len2 = bcTruelen(arg2); |
| |
| cmp = varstr_cmp(VARDATA_ANY(arg1), len1, VARDATA_ANY(arg2), len2, |
| PG_GET_COLLATION()); |
| |
| PG_FREE_IF_COPY(arg1, 0); |
| PG_FREE_IF_COPY(arg2, 1); |
| |
| PG_RETURN_BOOL(cmp > 0); |
| } |
| |
| Datum |
| bpcharge(PG_FUNCTION_ARGS) |
| { |
| BpChar *arg1 = PG_GETARG_BPCHAR_PP(0); |
| BpChar *arg2 = PG_GETARG_BPCHAR_PP(1); |
| int len1, |
| len2; |
| int cmp; |
| |
| len1 = bcTruelen(arg1); |
| len2 = bcTruelen(arg2); |
| |
| cmp = varstr_cmp(VARDATA_ANY(arg1), len1, VARDATA_ANY(arg2), len2, |
| PG_GET_COLLATION()); |
| |
| PG_FREE_IF_COPY(arg1, 0); |
| PG_FREE_IF_COPY(arg2, 1); |
| |
| PG_RETURN_BOOL(cmp >= 0); |
| } |
| |
| Datum |
| bpcharcmp(PG_FUNCTION_ARGS) |
| { |
| BpChar *arg1 = PG_GETARG_BPCHAR_PP(0); |
| BpChar *arg2 = PG_GETARG_BPCHAR_PP(1); |
| int len1, |
| len2; |
| int cmp; |
| |
| len1 = bcTruelen(arg1); |
| len2 = bcTruelen(arg2); |
| |
| cmp = varstr_cmp(VARDATA_ANY(arg1), len1, VARDATA_ANY(arg2), len2, |
| PG_GET_COLLATION()); |
| |
| PG_FREE_IF_COPY(arg1, 0); |
| PG_FREE_IF_COPY(arg2, 1); |
| |
| PG_RETURN_INT32(cmp); |
| } |
| |
| Datum |
| bpchar_sortsupport(PG_FUNCTION_ARGS) |
| { |
| SortSupport ssup = (SortSupport) PG_GETARG_POINTER(0); |
| Oid collid = ssup->ssup_collation; |
| MemoryContext oldcontext; |
| |
| oldcontext = MemoryContextSwitchTo(ssup->ssup_cxt); |
| |
| /* Use generic string SortSupport */ |
| varstr_sortsupport(ssup, BPCHAROID, collid); |
| |
| MemoryContextSwitchTo(oldcontext); |
| |
| PG_RETURN_VOID(); |
| } |
| |
| Datum |
| bpchar_larger(PG_FUNCTION_ARGS) |
| { |
| BpChar *arg1 = PG_GETARG_BPCHAR_PP(0); |
| BpChar *arg2 = PG_GETARG_BPCHAR_PP(1); |
| int len1, |
| len2; |
| int cmp; |
| |
| len1 = bcTruelen(arg1); |
| len2 = bcTruelen(arg2); |
| |
| cmp = varstr_cmp(VARDATA_ANY(arg1), len1, VARDATA_ANY(arg2), len2, |
| PG_GET_COLLATION()); |
| |
| PG_RETURN_BPCHAR_P((cmp >= 0) ? arg1 : arg2); |
| } |
| |
| Datum |
| bpchar_smaller(PG_FUNCTION_ARGS) |
| { |
| BpChar *arg1 = PG_GETARG_BPCHAR_PP(0); |
| BpChar *arg2 = PG_GETARG_BPCHAR_PP(1); |
| int len1, |
| len2; |
| int cmp; |
| |
| len1 = bcTruelen(arg1); |
| len2 = bcTruelen(arg2); |
| |
| cmp = varstr_cmp(VARDATA_ANY(arg1), len1, VARDATA_ANY(arg2), len2, |
| PG_GET_COLLATION()); |
| |
| PG_RETURN_BPCHAR_P((cmp <= 0) ? arg1 : arg2); |
| } |
| |
| |
| /* |
| * bpchar needs a specialized hash function because we want to ignore |
| * trailing blanks in comparisons. |
| */ |
| Datum |
| hashbpchar(PG_FUNCTION_ARGS) |
| { |
| BpChar *key = PG_GETARG_BPCHAR_PP(0); |
| Oid collid = PG_GET_COLLATION(); |
| char *keydata; |
| int keylen; |
| pg_locale_t mylocale = 0; |
| Datum result; |
| |
| if (!collid) |
| ereport(ERROR, |
| (errcode(ERRCODE_INDETERMINATE_COLLATION), |
| errmsg("could not determine which collation to use for string hashing"), |
| errhint("Use the COLLATE clause to set the collation explicitly."))); |
| |
| keydata = VARDATA_ANY(key); |
| keylen = bcTruelen(key); |
| |
| if (!lc_collate_is_c(collid)) |
| mylocale = pg_newlocale_from_collation(collid); |
| |
| if (pg_locale_deterministic(mylocale)) |
| { |
| result = hash_any((unsigned char *) keydata, keylen); |
| } |
| else |
| { |
| Size bsize, |
| rsize; |
| char *buf; |
| |
| bsize = pg_strnxfrm(NULL, 0, keydata, keylen, mylocale); |
| buf = palloc(bsize + 1); |
| |
| rsize = pg_strnxfrm(buf, bsize + 1, keydata, keylen, mylocale); |
| |
| /* the second call may return a smaller value than the first */ |
| if (rsize > bsize) |
| elog(ERROR, "pg_strnxfrm() returned unexpected result"); |
| |
| /* |
| * In principle, there's no reason to include the terminating NUL |
| * character in the hash, but it was done before and the behavior must |
| * be preserved. |
| */ |
| result = hash_any((uint8_t *) buf, bsize + 1); |
| |
| pfree(buf); |
| } |
| |
| /* Avoid leaking memory for toasted inputs */ |
| PG_FREE_IF_COPY(key, 0); |
| |
| return result; |
| } |
| |
| Datum |
| hashbpcharextended(PG_FUNCTION_ARGS) |
| { |
| BpChar *key = PG_GETARG_BPCHAR_PP(0); |
| Oid collid = PG_GET_COLLATION(); |
| char *keydata; |
| int keylen; |
| pg_locale_t mylocale = 0; |
| Datum result; |
| |
| if (!collid) |
| ereport(ERROR, |
| (errcode(ERRCODE_INDETERMINATE_COLLATION), |
| errmsg("could not determine which collation to use for string hashing"), |
| errhint("Use the COLLATE clause to set the collation explicitly."))); |
| |
| keydata = VARDATA_ANY(key); |
| keylen = bcTruelen(key); |
| |
| if (!lc_collate_is_c(collid)) |
| mylocale = pg_newlocale_from_collation(collid); |
| |
| if (pg_locale_deterministic(mylocale)) |
| { |
| result = hash_any_extended((unsigned char *) keydata, keylen, |
| PG_GETARG_INT64(1)); |
| } |
| else |
| { |
| Size bsize, |
| rsize; |
| char *buf; |
| |
| bsize = pg_strnxfrm(NULL, 0, keydata, keylen, mylocale); |
| buf = palloc(bsize + 1); |
| |
| rsize = pg_strnxfrm(buf, bsize + 1, keydata, keylen, mylocale); |
| |
| /* the second call may return a smaller value than the first */ |
| if (rsize > bsize) |
| elog(ERROR, "pg_strnxfrm() returned unexpected result"); |
| |
| /* |
| * In principle, there's no reason to include the terminating NUL |
| * character in the hash, but it was done before and the behavior must |
| * be preserved. |
| */ |
| result = hash_any_extended((uint8_t *) buf, bsize + 1, |
| PG_GETARG_INT64(1)); |
| |
| pfree(buf); |
| } |
| |
| PG_FREE_IF_COPY(key, 0); |
| |
| return result; |
| } |
| |
| /* |
| * The following operators support character-by-character comparison |
| * of bpchar datums, to allow building indexes suitable for LIKE clauses. |
| * Note that the regular bpchareq/bpcharne comparison operators, and |
| * regular support functions 1 and 2 with "C" collation are assumed to be |
| * compatible with these! |
| */ |
| |
| static int |
| internal_bpchar_pattern_compare(BpChar *arg1, BpChar *arg2) |
| { |
| int result; |
| int len1, |
| len2; |
| |
| len1 = bcTruelen(arg1); |
| len2 = bcTruelen(arg2); |
| |
| result = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2)); |
| if (result != 0) |
| return result; |
| else if (len1 < len2) |
| return -1; |
| else if (len1 > len2) |
| return 1; |
| else |
| return 0; |
| } |
| |
| |
| Datum |
| bpchar_pattern_lt(PG_FUNCTION_ARGS) |
| { |
| BpChar *arg1 = PG_GETARG_BPCHAR_PP(0); |
| BpChar *arg2 = PG_GETARG_BPCHAR_PP(1); |
| int result; |
| |
| result = internal_bpchar_pattern_compare(arg1, arg2); |
| |
| PG_FREE_IF_COPY(arg1, 0); |
| PG_FREE_IF_COPY(arg2, 1); |
| |
| PG_RETURN_BOOL(result < 0); |
| } |
| |
| |
| Datum |
| bpchar_pattern_le(PG_FUNCTION_ARGS) |
| { |
| BpChar *arg1 = PG_GETARG_BPCHAR_PP(0); |
| BpChar *arg2 = PG_GETARG_BPCHAR_PP(1); |
| int result; |
| |
| result = internal_bpchar_pattern_compare(arg1, arg2); |
| |
| PG_FREE_IF_COPY(arg1, 0); |
| PG_FREE_IF_COPY(arg2, 1); |
| |
| PG_RETURN_BOOL(result <= 0); |
| } |
| |
| |
| Datum |
| bpchar_pattern_ge(PG_FUNCTION_ARGS) |
| { |
| BpChar *arg1 = PG_GETARG_BPCHAR_PP(0); |
| BpChar *arg2 = PG_GETARG_BPCHAR_PP(1); |
| int result; |
| |
| result = internal_bpchar_pattern_compare(arg1, arg2); |
| |
| PG_FREE_IF_COPY(arg1, 0); |
| PG_FREE_IF_COPY(arg2, 1); |
| |
| PG_RETURN_BOOL(result >= 0); |
| } |
| |
| |
| Datum |
| bpchar_pattern_gt(PG_FUNCTION_ARGS) |
| { |
| BpChar *arg1 = PG_GETARG_BPCHAR_PP(0); |
| BpChar *arg2 = PG_GETARG_BPCHAR_PP(1); |
| int result; |
| |
| result = internal_bpchar_pattern_compare(arg1, arg2); |
| |
| PG_FREE_IF_COPY(arg1, 0); |
| PG_FREE_IF_COPY(arg2, 1); |
| |
| PG_RETURN_BOOL(result > 0); |
| } |
| |
| |
| Datum |
| btbpchar_pattern_cmp(PG_FUNCTION_ARGS) |
| { |
| BpChar *arg1 = PG_GETARG_BPCHAR_PP(0); |
| BpChar *arg2 = PG_GETARG_BPCHAR_PP(1); |
| int result; |
| |
| result = internal_bpchar_pattern_compare(arg1, arg2); |
| |
| PG_FREE_IF_COPY(arg1, 0); |
| PG_FREE_IF_COPY(arg2, 1); |
| |
| PG_RETURN_INT32(result); |
| } |
| |
| |
| Datum |
| btbpchar_pattern_sortsupport(PG_FUNCTION_ARGS) |
| { |
| SortSupport ssup = (SortSupport) PG_GETARG_POINTER(0); |
| MemoryContext oldcontext; |
| |
| oldcontext = MemoryContextSwitchTo(ssup->ssup_cxt); |
| |
| /* Use generic string SortSupport, forcing "C" collation */ |
| varstr_sortsupport(ssup, BPCHAROID, C_COLLATION_OID); |
| |
| MemoryContextSwitchTo(oldcontext); |
| |
| PG_RETURN_VOID(); |
| } |