| /*------------------------------------------------------------------------- |
| * |
| * datum.c |
| * POSTGRES Datum (abstract data type) manipulation routines. |
| * |
| * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group |
| * Portions Copyright (c) 1994, Regents of the University of California |
| * |
| * |
| * IDENTIFICATION |
| * src/backend/utils/adt/datum.c |
| * |
| *------------------------------------------------------------------------- |
| */ |
| |
| /* |
| * In the implementation of these routines we assume the following: |
| * |
| * A) if a type is "byVal" then all the information is stored in the |
| * Datum itself (i.e. no pointers involved!). In this case the |
| * length of the type is always greater than zero and not more than |
| * "sizeof(Datum)" |
| * |
| * B) if a type is not "byVal" and it has a fixed length (typlen > 0), |
| * then the "Datum" always contains a pointer to a stream of bytes. |
| * The number of significant bytes are always equal to the typlen. |
| * |
| * C) if a type is not "byVal" and has typlen == -1, |
| * then the "Datum" always points to a "struct varlena". |
| * This varlena structure has information about the actual length of this |
| * particular instance of the type and about its value. |
| * |
| * D) if a type is not "byVal" and has typlen == -2, |
| * then the "Datum" always points to a null-terminated C string. |
| * |
| * Note that we do not treat "toasted" datums specially; therefore what |
| * will be copied or compared is the compressed data or toast reference. |
| * An exception is made for datumCopy() of an expanded object, however, |
| * because most callers expect to get a simple contiguous (and pfree'able) |
| * result from datumCopy(). See also datumTransfer(). |
| */ |
| |
| #include "postgres.h" |
| |
| #include "access/detoast.h" |
| #include "catalog/pg_type_d.h" |
| #include "common/hashfn.h" |
| #include "fmgr.h" |
| #include "utils/builtins.h" |
| #include "utils/datum.h" |
| #include "utils/expandeddatum.h" |
| |
| |
| /*------------------------------------------------------------------------- |
| * datumGetSize |
| * |
| * Find the "real" size of a datum, given the datum value, |
| * whether it is a "by value", and the declared type length. |
| * (For TOAST pointer datums, this is the size of the pointer datum.) |
| * |
| * This is essentially an out-of-line version of the att_addlength_datum() |
| * macro in access/tupmacs.h. We do a tad more error checking though. |
| *------------------------------------------------------------------------- |
| */ |
| Size |
| datumGetSize(Datum value, bool typByVal, int typLen) |
| { |
| Size size; |
| |
| if (typByVal) |
| { |
| /* Pass-by-value types are always fixed-length */ |
| Assert(typLen > 0 && typLen <= sizeof(Datum)); |
| size = (Size) typLen; |
| } |
| else |
| { |
| if (typLen > 0) |
| { |
| /* Fixed-length pass-by-ref type */ |
| size = (Size) typLen; |
| } |
| else if (typLen == -1) |
| { |
| /* It is a varlena datatype */ |
| struct varlena *s = (struct varlena *) DatumGetPointer(value); |
| |
| if (!PointerIsValid(s)) |
| ereport(ERROR, |
| (errcode(ERRCODE_DATA_EXCEPTION), |
| errmsg("invalid Datum pointer"))); |
| |
| size = (Size) VARSIZE_ANY(s); |
| } |
| else if (typLen == -2) |
| { |
| /* It is a cstring datatype */ |
| char *s = (char *) DatumGetPointer(value); |
| |
| if (!PointerIsValid(s)) |
| ereport(ERROR, |
| (errcode(ERRCODE_DATA_EXCEPTION), |
| errmsg("invalid Datum pointer"))); |
| |
| size = (Size) (strlen(s) + 1); |
| } |
| else |
| { |
| elog(ERROR, "invalid typLen: %d", typLen); |
| size = 0; /* keep compiler quiet */ |
| } |
| } |
| |
| return size; |
| } |
| |
| /*------------------------------------------------------------------------- |
| * datumCopy |
| * |
| * Make a copy of a non-NULL datum. |
| * |
| * If the datatype is pass-by-reference, memory is obtained with palloc(). |
| * |
| * If the value is a reference to an expanded object, we flatten into memory |
| * obtained with palloc(). We need to copy because one of the main uses of |
| * this function is to copy a datum out of a transient memory context that's |
| * about to be destroyed, and the expanded object is probably in a child |
| * context that will also go away. Moreover, many callers assume that the |
| * result is a single pfree-able chunk. |
| * |
| * NOTICE: GPDB had a similar function datumCopyWithMemManager to acceleration |
| * aggreation, please keep datumCopyWithMemManager in sync with datumCopy. |
| *------------------------------------------------------------------------- |
| */ |
| Datum |
| datumCopy(Datum value, bool typByVal, int typLen) |
| { |
| Datum res; |
| |
| if (typByVal) |
| res = value; |
| else if (typLen == -1) |
| { |
| /* It is a varlena datatype */ |
| struct varlena *vl = (struct varlena *) DatumGetPointer(value); |
| |
| if (VARATT_IS_EXTERNAL_EXPANDED(vl)) |
| { |
| /* Flatten into the caller's memory context */ |
| ExpandedObjectHeader *eoh = DatumGetEOHP(value); |
| Size resultsize; |
| char *resultptr; |
| |
| resultsize = EOH_get_flat_size(eoh); |
| resultptr = (char *) palloc(resultsize); |
| EOH_flatten_into(eoh, (void *) resultptr, resultsize); |
| res = PointerGetDatum(resultptr); |
| } |
| else |
| { |
| /* Otherwise, just copy the varlena datum verbatim */ |
| Size realSize; |
| char *resultptr; |
| |
| realSize = (Size) VARSIZE_ANY(vl); |
| resultptr = (char *) palloc(realSize); |
| memcpy(resultptr, vl, realSize); |
| res = PointerGetDatum(resultptr); |
| } |
| } |
| else |
| { |
| /* Pass by reference, but not varlena, so not toasted */ |
| Size realSize; |
| char *resultptr; |
| |
| realSize = datumGetSize(value, typByVal, typLen); |
| |
| resultptr = (char *) palloc(realSize); |
| memcpy(resultptr, DatumGetPointer(value), realSize); |
| res = PointerGetDatum(resultptr); |
| } |
| return res; |
| } |
| |
| /*------------------------------------------------------------------------- |
| * datumTransfer |
| * |
| * Transfer a non-NULL datum into the current memory context. |
| * |
| * This is equivalent to datumCopy() except when the datum is a read-write |
| * pointer to an expanded object. In that case we merely reparent the object |
| * into the current context, and return its standard R/W pointer (in case the |
| * given one is a transient pointer of shorter lifespan). |
| *------------------------------------------------------------------------- |
| */ |
| Datum |
| datumTransfer(Datum value, bool typByVal, int typLen) |
| { |
| if (!typByVal && typLen == -1 && |
| VARATT_IS_EXTERNAL_EXPANDED_RW(DatumGetPointer(value))) |
| value = TransferExpandedObject(value, CurrentMemoryContext); |
| else |
| value = datumCopy(value, typByVal, typLen); |
| return value; |
| } |
| |
| /*------------------------------------------------------------------------- |
| * datumIsEqual |
| * |
| * Return true if two datums are equal, false otherwise |
| * |
| * NOTE: XXX! |
| * We just compare the bytes of the two values, one by one. |
| * This routine will return false if there are 2 different |
| * representations of the same value (something along the lines |
| * of say the representation of zero in one's complement arithmetic). |
| * Also, it will probably not give the answer you want if either |
| * datum has been "toasted". |
| * |
| * Do not try to make this any smarter than it currently is with respect |
| * to "toasted" datums, because some of the callers could be working in the |
| * context of an aborted transaction. |
| *------------------------------------------------------------------------- |
| */ |
| bool |
| datumIsEqual(Datum value1, Datum value2, bool typByVal, int typLen) |
| { |
| bool res; |
| |
| if (typByVal) |
| { |
| /* |
| * just compare the two datums. NOTE: just comparing "len" bytes will |
| * not do the work, because we do not know how these bytes are aligned |
| * inside the "Datum". We assume instead that any given datatype is |
| * consistent about how it fills extraneous bits in the Datum. |
| */ |
| res = (value1 == value2); |
| } |
| else |
| { |
| Size size1, |
| size2; |
| char *s1, |
| *s2; |
| |
| /* |
| * Compare the bytes pointed by the pointers stored in the datums. |
| */ |
| size1 = datumGetSize(value1, typByVal, typLen); |
| size2 = datumGetSize(value2, typByVal, typLen); |
| if (size1 != size2) |
| return false; |
| s1 = (char *) DatumGetPointer(value1); |
| s2 = (char *) DatumGetPointer(value2); |
| res = (memcmp(s1, s2, size1) == 0); |
| } |
| return res; |
| } |
| |
| /*------------------------------------------------------------------------- |
| * datum_image_eq |
| * |
| * Compares two datums for identical contents, based on byte images. Return |
| * true if the two datums are equal, false otherwise. |
| *------------------------------------------------------------------------- |
| */ |
| bool |
| datum_image_eq(Datum value1, Datum value2, bool typByVal, int typLen) |
| { |
| Size len1, |
| len2; |
| bool result = true; |
| |
| if (typByVal) |
| { |
| result = (value1 == value2); |
| } |
| else if (typLen > 0) |
| { |
| result = (memcmp(DatumGetPointer(value1), |
| DatumGetPointer(value2), |
| typLen) == 0); |
| } |
| else if (typLen == -1) |
| { |
| len1 = toast_raw_datum_size(value1); |
| len2 = toast_raw_datum_size(value2); |
| /* No need to de-toast if lengths don't match. */ |
| if (len1 != len2) |
| result = false; |
| else |
| { |
| struct varlena *arg1val; |
| struct varlena *arg2val; |
| |
| arg1val = PG_DETOAST_DATUM_PACKED(value1); |
| arg2val = PG_DETOAST_DATUM_PACKED(value2); |
| |
| result = (memcmp(VARDATA_ANY(arg1val), |
| VARDATA_ANY(arg2val), |
| len1 - VARHDRSZ) == 0); |
| |
| /* Only free memory if it's a copy made here. */ |
| if ((Pointer) arg1val != (Pointer) value1) |
| pfree(arg1val); |
| if ((Pointer) arg2val != (Pointer) value2) |
| pfree(arg2val); |
| } |
| } |
| else if (typLen == -2) |
| { |
| char *s1, |
| *s2; |
| |
| /* Compare cstring datums */ |
| s1 = DatumGetCString(value1); |
| s2 = DatumGetCString(value2); |
| len1 = strlen(s1) + 1; |
| len2 = strlen(s2) + 1; |
| if (len1 != len2) |
| return false; |
| result = (memcmp(s1, s2, len1) == 0); |
| } |
| else |
| elog(ERROR, "unexpected typLen: %d", typLen); |
| |
| return result; |
| } |
| |
| /*------------------------------------------------------------------------- |
| * datum_image_hash |
| * |
| * Generate a hash value based on the binary representation of 'value'. Most |
| * use cases will want to use the hash function specific to the Datum's type, |
| * however, some corner cases require generating a hash value based on the |
| * actual bits rather than the logical value. |
| *------------------------------------------------------------------------- |
| */ |
| uint32 |
| datum_image_hash(Datum value, bool typByVal, int typLen) |
| { |
| Size len; |
| uint32 result; |
| |
| if (typByVal) |
| result = hash_bytes((unsigned char *) &value, sizeof(Datum)); |
| else if (typLen > 0) |
| result = hash_bytes((unsigned char *) DatumGetPointer(value), typLen); |
| else if (typLen == -1) |
| { |
| struct varlena *val; |
| |
| len = toast_raw_datum_size(value); |
| |
| val = PG_DETOAST_DATUM_PACKED(value); |
| |
| result = hash_bytes((unsigned char *) VARDATA_ANY(val), len - VARHDRSZ); |
| |
| /* Only free memory if it's a copy made here. */ |
| if ((Pointer) val != (Pointer) value) |
| pfree(val); |
| } |
| else if (typLen == -2) |
| { |
| char *s; |
| |
| s = DatumGetCString(value); |
| len = strlen(s) + 1; |
| |
| result = hash_bytes((unsigned char *) s, len); |
| } |
| else |
| { |
| elog(ERROR, "unexpected typLen: %d", typLen); |
| result = 0; /* keep compiler quiet */ |
| } |
| |
| return result; |
| } |
| |
| /*------------------------------------------------------------------------- |
| * btequalimage |
| * |
| * Generic "equalimage" support function. |
| * |
| * B-Tree operator classes whose equality function could safely be replaced by |
| * datum_image_eq() in all cases can use this as their "equalimage" support |
| * function. |
| * |
| * Earlier minor releases erroneously associated this function with |
| * interval_ops. Detect that case to rescind deduplication support, without |
| * requiring initdb. |
| *------------------------------------------------------------------------- |
| */ |
| Datum |
| btequalimage(PG_FUNCTION_ARGS) |
| { |
| Oid opcintype = PG_GETARG_OID(0); |
| |
| PG_RETURN_BOOL(opcintype != INTERVALOID); |
| } |
| |
| /*------------------------------------------------------------------------- |
| * datumEstimateSpace |
| * |
| * Compute the amount of space that datumSerialize will require for a |
| * particular Datum. |
| *------------------------------------------------------------------------- |
| */ |
| Size |
| datumEstimateSpace(Datum value, bool isnull, bool typByVal, int typLen) |
| { |
| Size sz = sizeof(int); |
| |
| if (!isnull) |
| { |
| /* no need to use add_size, can't overflow */ |
| if (typByVal) |
| sz += sizeof(Datum); |
| else if (typLen == -1 && |
| VARATT_IS_EXTERNAL_EXPANDED(DatumGetPointer(value))) |
| { |
| /* Expanded objects need to be flattened, see comment below */ |
| sz += EOH_get_flat_size(DatumGetEOHP(value)); |
| } |
| else |
| sz += datumGetSize(value, typByVal, typLen); |
| } |
| |
| return sz; |
| } |
| |
| /*------------------------------------------------------------------------- |
| * datumSerialize |
| * |
| * Serialize a possibly-NULL datum into caller-provided storage. |
| * |
| * Note: "expanded" objects are flattened so as to produce a self-contained |
| * representation, but other sorts of toast pointers are transferred as-is. |
| * This is because the intended use of this function is to pass the value |
| * to another process within the same database server. The other process |
| * could not access an "expanded" object within this process's memory, but |
| * we assume it can dereference the same TOAST pointers this one can. |
| * |
| * The format is as follows: first, we write a 4-byte header word, which |
| * is either the length of a pass-by-reference datum, -1 for a |
| * pass-by-value datum, or -2 for a NULL. If the value is NULL, nothing |
| * further is written. If it is pass-by-value, sizeof(Datum) bytes |
| * follow. Otherwise, the number of bytes indicated by the header word |
| * follow. The caller is responsible for ensuring that there is enough |
| * storage to store the number of bytes that will be written; use |
| * datumEstimateSpace() to find out how many will be needed. |
| * *start_address is updated to point to the byte immediately following |
| * those written. |
| *------------------------------------------------------------------------- |
| */ |
| void |
| datumSerialize(Datum value, bool isnull, bool typByVal, int typLen, |
| char **start_address) |
| { |
| ExpandedObjectHeader *eoh = NULL; |
| int header; |
| |
| /* Write header word. */ |
| if (isnull) |
| header = -2; |
| else if (typByVal) |
| header = -1; |
| else if (typLen == -1 && |
| VARATT_IS_EXTERNAL_EXPANDED(DatumGetPointer(value))) |
| { |
| eoh = DatumGetEOHP(value); |
| header = EOH_get_flat_size(eoh); |
| } |
| else |
| header = datumGetSize(value, typByVal, typLen); |
| memcpy(*start_address, &header, sizeof(int)); |
| *start_address += sizeof(int); |
| |
| /* If not null, write payload bytes. */ |
| if (!isnull) |
| { |
| if (typByVal) |
| { |
| memcpy(*start_address, &value, sizeof(Datum)); |
| *start_address += sizeof(Datum); |
| } |
| else if (eoh) |
| { |
| char *tmp; |
| |
| /* |
| * EOH_flatten_into expects the target address to be maxaligned, |
| * so we can't store directly to *start_address. |
| */ |
| tmp = (char *) palloc(header); |
| EOH_flatten_into(eoh, (void *) tmp, header); |
| memcpy(*start_address, tmp, header); |
| *start_address += header; |
| |
| /* be tidy. */ |
| pfree(tmp); |
| } |
| else |
| { |
| memcpy(*start_address, DatumGetPointer(value), header); |
| *start_address += header; |
| } |
| } |
| } |
| |
| /*------------------------------------------------------------------------- |
| * datumRestore |
| * |
| * Restore a possibly-NULL datum previously serialized by datumSerialize. |
| * *start_address is updated according to the number of bytes consumed. |
| *------------------------------------------------------------------------- |
| */ |
| Datum |
| datumRestore(char **start_address, bool *isnull) |
| { |
| int header; |
| void *d; |
| |
| /* Read header word. */ |
| memcpy(&header, *start_address, sizeof(int)); |
| *start_address += sizeof(int); |
| |
| /* If this datum is NULL, we can stop here. */ |
| if (header == -2) |
| { |
| *isnull = true; |
| return (Datum) 0; |
| } |
| |
| /* OK, datum is not null. */ |
| *isnull = false; |
| |
| /* If this datum is pass-by-value, sizeof(Datum) bytes follow. */ |
| if (header == -1) |
| { |
| Datum val; |
| |
| memcpy(&val, *start_address, sizeof(Datum)); |
| *start_address += sizeof(Datum); |
| return val; |
| } |
| |
| /* Pass-by-reference case; copy indicated number of bytes. */ |
| Assert(header > 0); |
| d = palloc(header); |
| memcpy(d, *start_address, header); |
| *start_address += header; |
| return PointerGetDatum(d); |
| } |