| /*------------------------------------------------------------------------- |
| * |
| * detoast.c |
| * Retrieve compressed or external variable size attributes. |
| * |
| * Copyright (c) 2000-2021, PostgreSQL Global Development Group |
| * |
| * IDENTIFICATION |
| * src/backend/access/common/detoast.c |
| * |
| *------------------------------------------------------------------------- |
| */ |
| |
| #include "postgres.h" |
| |
| #include "access/detoast.h" |
| #include "access/table.h" |
| #include "access/tableam.h" |
| #include "access/toast_internals.h" |
| #include "common/int.h" |
| #include "common/pg_lzcompress.h" |
| #include "utils/expandeddatum.h" |
| #include "utils/rel.h" |
| |
| static struct varlena *toast_fetch_datum(struct varlena *attr); |
| static struct varlena *toast_fetch_datum_slice(struct varlena *attr, |
| int32 sliceoffset, |
| int32 slicelength); |
| static struct varlena *toast_decompress_datum(struct varlena *attr); |
| static struct varlena *toast_decompress_datum_slice(struct varlena *attr, int32 slicelength); |
| |
| /* ---------- |
| * detoast_external_attr - |
| * |
| * Public entry point to get back a toasted value from |
| * external source (possibly still in compressed format). |
| * |
| * This will return a datum that contains all the data internally, ie, not |
| * relying on external storage or memory, but it can still be compressed or |
| * have a short header. Note some callers assume that if the input is an |
| * EXTERNAL datum, the result will be a pfree'able chunk. |
| * ---------- |
| */ |
| struct varlena * |
| detoast_external_attr(struct varlena *attr) |
| { |
| struct varlena *result; |
| |
| if (VARATT_IS_EXTERNAL_ONDISK(attr)) |
| { |
| /* |
| * This is an external stored plain value |
| */ |
| result = toast_fetch_datum(attr); |
| } |
| else if (VARATT_IS_EXTERNAL_INDIRECT(attr)) |
| { |
| /* |
| * This is an indirect pointer --- dereference it |
| */ |
| struct varatt_indirect redirect; |
| |
| VARATT_EXTERNAL_GET_POINTER(redirect, attr); |
| attr = (struct varlena *) redirect.pointer; |
| |
| /* nested indirect Datums aren't allowed */ |
| Assert(!VARATT_IS_EXTERNAL_INDIRECT(attr)); |
| |
| /* recurse if value is still external in some other way */ |
| if (VARATT_IS_EXTERNAL(attr)) |
| return detoast_external_attr(attr); |
| |
| /* |
| * Copy into the caller's memory context, in case caller tries to |
| * pfree the result. |
| */ |
| result = (struct varlena *) palloc(VARSIZE_ANY(attr)); |
| memcpy(result, attr, VARSIZE_ANY(attr)); |
| } |
| else if (VARATT_IS_EXTERNAL_EXPANDED(attr)) |
| { |
| /* |
| * This is an expanded-object pointer --- get flat format |
| */ |
| ExpandedObjectHeader *eoh; |
| Size resultsize; |
| |
| eoh = DatumGetEOHP(PointerGetDatum(attr)); |
| resultsize = EOH_get_flat_size(eoh); |
| result = (struct varlena *) palloc(resultsize); |
| EOH_flatten_into(eoh, (void *) result, resultsize); |
| } |
| else |
| { |
| /* |
| * This is a plain value inside of the main tuple - why am I called? |
| */ |
| result = attr; |
| } |
| |
| return result; |
| } |
| |
| |
| /* ---------- |
| * detoast_attr - |
| * |
| * Public entry point to get back a toasted value from compression |
| * or external storage. The result is always non-extended varlena form. |
| * |
| * Note some callers assume that if the input is an EXTERNAL or COMPRESSED |
| * datum, the result will be a pfree'able chunk. |
| * ---------- |
| */ |
| struct varlena * |
| detoast_attr(struct varlena *attr) |
| { |
| if (VARATT_IS_EXTERNAL_ONDISK(attr)) |
| { |
| /* |
| * This is an externally stored datum --- fetch it back from there |
| */ |
| attr = toast_fetch_datum(attr); |
| /* If it's compressed, decompress it */ |
| if (VARATT_IS_COMPRESSED(attr)) |
| { |
| struct varlena *tmp = attr; |
| |
| attr = toast_decompress_datum(tmp); |
| pfree(tmp); |
| } |
| } |
| else if (VARATT_IS_EXTERNAL_INDIRECT(attr)) |
| { |
| /* |
| * This is an indirect pointer --- dereference it |
| */ |
| struct varatt_indirect redirect; |
| |
| VARATT_EXTERNAL_GET_POINTER(redirect, attr); |
| attr = (struct varlena *) redirect.pointer; |
| |
| /* nested indirect Datums aren't allowed */ |
| Assert(!VARATT_IS_EXTERNAL_INDIRECT(attr)); |
| |
| /* recurse in case value is still extended in some other way */ |
| attr = detoast_attr(attr); |
| |
| /* if it isn't, we'd better copy it */ |
| if (attr == (struct varlena *) redirect.pointer) |
| { |
| struct varlena *result; |
| |
| result = (struct varlena *) palloc(VARSIZE_ANY(attr)); |
| memcpy(result, attr, VARSIZE_ANY(attr)); |
| attr = result; |
| } |
| } |
| else if (VARATT_IS_EXTERNAL_EXPANDED(attr)) |
| { |
| /* |
| * This is an expanded-object pointer --- get flat format |
| */ |
| attr = detoast_external_attr(attr); |
| /* flatteners are not allowed to produce compressed/short output */ |
| Assert(!VARATT_IS_EXTENDED(attr)); |
| } |
| else if (VARATT_IS_COMPRESSED(attr)) |
| { |
| /* |
| * This is a compressed value inside of the main tuple |
| */ |
| attr = toast_decompress_datum(attr); |
| } |
| else if (VARATT_IS_SHORT(attr)) |
| { |
| /* |
| * This is a short-header varlena --- convert to 4-byte header format |
| */ |
| Size data_size = VARSIZE_SHORT(attr) - VARHDRSZ_SHORT; |
| Size new_size = data_size + VARHDRSZ; |
| struct varlena *new_attr; |
| |
| new_attr = (struct varlena *) palloc(new_size); |
| SET_VARSIZE(new_attr, new_size); |
| memcpy(VARDATA(new_attr), VARDATA_SHORT(attr), data_size); |
| attr = new_attr; |
| } |
| |
| return attr; |
| } |
| |
| |
| /* ---------- |
| * detoast_attr_slice - |
| * |
| * Public entry point to get back part of a toasted value |
| * from compression or external storage. |
| * |
| * sliceoffset is where to start (zero or more) |
| * If slicelength < 0, return everything beyond sliceoffset |
| * ---------- |
| */ |
| struct varlena * |
| detoast_attr_slice(struct varlena *attr, |
| int32 sliceoffset, int32 slicelength) |
| { |
| struct varlena *preslice; |
| struct varlena *result; |
| char *attrdata; |
| int32 slicelimit; |
| int32 attrsize; |
| |
| if (sliceoffset < 0) |
| elog(ERROR, "invalid sliceoffset: %d", sliceoffset); |
| |
| /* |
| * Compute slicelimit = offset + length, or -1 if we must fetch all of the |
| * value. In case of integer overflow, we must fetch all. |
| */ |
| if (slicelength < 0) |
| slicelimit = -1; |
| else if (pg_add_s32_overflow(sliceoffset, slicelength, &slicelimit)) |
| slicelength = slicelimit = -1; |
| |
| if (VARATT_IS_EXTERNAL_ONDISK(attr)) |
| { |
| struct varatt_external toast_pointer; |
| |
| VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr); |
| |
| /* fast path for non-compressed external datums */ |
| if (!VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer)) |
| return toast_fetch_datum_slice(attr, sliceoffset, slicelength); |
| |
| /* |
| * For compressed values, we need to fetch enough slices to decompress |
| * at least the requested part (when a prefix is requested). |
| * Otherwise, just fetch all slices. |
| */ |
| if (slicelimit >= 0) |
| { |
| int32 max_size = VARATT_EXTERNAL_GET_EXTSIZE(toast_pointer); |
| |
| /* |
| * Determine maximum amount of compressed data needed for a prefix |
| * of a given length (after decompression). |
| * |
| * At least for now, if it's LZ4 data, we'll have to fetch the |
| * whole thing, because there doesn't seem to be an API call to |
| * determine how much compressed data we need to be sure of being |
| * able to decompress the required slice. |
| */ |
| if (VARATT_EXTERNAL_GET_COMPRESS_METHOD(toast_pointer) == |
| TOAST_PGLZ_COMPRESSION_ID) |
| max_size = pglz_maximum_compressed_size(slicelimit, max_size); |
| |
| /* |
| * Fetch enough compressed slices (compressed marker will get set |
| * automatically). |
| */ |
| preslice = toast_fetch_datum_slice(attr, 0, max_size); |
| } |
| else |
| preslice = toast_fetch_datum(attr); |
| } |
| else if (VARATT_IS_EXTERNAL_INDIRECT(attr)) |
| { |
| struct varatt_indirect redirect; |
| |
| VARATT_EXTERNAL_GET_POINTER(redirect, attr); |
| |
| /* nested indirect Datums aren't allowed */ |
| Assert(!VARATT_IS_EXTERNAL_INDIRECT(redirect.pointer)); |
| |
| return detoast_attr_slice(redirect.pointer, |
| sliceoffset, slicelength); |
| } |
| else if (VARATT_IS_EXTERNAL_EXPANDED(attr)) |
| { |
| /* pass it off to detoast_external_attr to flatten */ |
| preslice = detoast_external_attr(attr); |
| } |
| else |
| preslice = attr; |
| |
| Assert(!VARATT_IS_EXTERNAL(preslice)); |
| |
| if (VARATT_IS_COMPRESSED(preslice)) |
| { |
| struct varlena *tmp = preslice; |
| |
| /* Decompress enough to encompass the slice and the offset */ |
| if (slicelimit >= 0) |
| preslice = toast_decompress_datum_slice(tmp, slicelimit); |
| else |
| preslice = toast_decompress_datum(tmp); |
| |
| if (tmp != attr) |
| pfree(tmp); |
| } |
| |
| if (VARATT_IS_SHORT(preslice)) |
| { |
| attrdata = VARDATA_SHORT(preslice); |
| attrsize = VARSIZE_SHORT(preslice) - VARHDRSZ_SHORT; |
| } |
| else |
| { |
| attrdata = VARDATA(preslice); |
| attrsize = VARSIZE(preslice) - VARHDRSZ; |
| } |
| |
| /* slicing of datum for compressed cases and plain value */ |
| |
| if (sliceoffset >= attrsize) |
| { |
| sliceoffset = 0; |
| slicelength = 0; |
| } |
| else if (slicelength < 0 || slicelimit > attrsize) |
| slicelength = attrsize - sliceoffset; |
| |
| result = (struct varlena *) palloc(slicelength + VARHDRSZ); |
| SET_VARSIZE(result, slicelength + VARHDRSZ); |
| |
| memcpy(VARDATA(result), attrdata + sliceoffset, slicelength); |
| |
| if (preslice != attr) |
| pfree(preslice); |
| |
| return result; |
| } |
| |
| /* ---------- |
| * toast_fetch_datum - |
| * |
| * Reconstruct an in memory Datum from the chunks saved |
| * in the toast relation |
| * ---------- |
| */ |
| static struct varlena * |
| toast_fetch_datum(struct varlena *attr) |
| { |
| Relation toastrel; |
| struct varlena *result; |
| struct varatt_external toast_pointer; |
| int32 attrsize; |
| |
| if (!VARATT_IS_EXTERNAL_ONDISK(attr)) |
| elog(ERROR, "toast_fetch_datum shouldn't be called for non-ondisk datums"); |
| |
| /* Must copy to access aligned fields */ |
| VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr); |
| |
| attrsize = VARATT_EXTERNAL_GET_EXTSIZE(toast_pointer); |
| |
| result = (struct varlena *) palloc(attrsize + VARHDRSZ); |
| |
| if (VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer)) |
| SET_VARSIZE_COMPRESSED(result, attrsize + VARHDRSZ); |
| else |
| SET_VARSIZE(result, attrsize + VARHDRSZ); |
| |
| if (attrsize == 0) |
| return result; /* Probably shouldn't happen, but just in |
| * case. */ |
| |
| /* |
| * Open the toast relation and its indexes |
| */ |
| toastrel = table_open(toast_pointer.va_toastrelid, AccessShareLock); |
| |
| /* Fetch all chunks */ |
| table_relation_fetch_toast_slice(toastrel, toast_pointer.va_valueid, |
| attrsize, 0, attrsize, result); |
| |
| /* Close toast table */ |
| table_close(toastrel, AccessShareLock); |
| |
| return result; |
| } |
| |
| /* ---------- |
| * toast_fetch_datum_slice - |
| * |
| * Reconstruct a segment of a Datum from the chunks saved |
| * in the toast relation |
| * |
| * Note that this function supports non-compressed external datums |
| * and compressed external datums (in which case the requested slice |
| * has to be a prefix, i.e. sliceoffset has to be 0). |
| * ---------- |
| */ |
| static struct varlena * |
| toast_fetch_datum_slice(struct varlena *attr, int32 sliceoffset, |
| int32 slicelength) |
| { |
| Relation toastrel; |
| struct varlena *result; |
| struct varatt_external toast_pointer; |
| int32 attrsize; |
| |
| if (!VARATT_IS_EXTERNAL_ONDISK(attr)) |
| elog(ERROR, "toast_fetch_datum_slice shouldn't be called for non-ondisk datums"); |
| |
| /* Must copy to access aligned fields */ |
| VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr); |
| |
| /* |
| * It's nonsense to fetch slices of a compressed datum unless when it's a |
| * prefix -- this isn't lo_* we can't return a compressed datum which is |
| * meaningful to toast later. |
| */ |
| Assert(!VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer) || 0 == sliceoffset); |
| |
| attrsize = VARATT_EXTERNAL_GET_EXTSIZE(toast_pointer); |
| |
| if (sliceoffset >= attrsize) |
| { |
| sliceoffset = 0; |
| slicelength = 0; |
| } |
| |
| /* |
| * When fetching a prefix of a compressed external datum, account for the |
| * space required by va_tcinfo, which is stored at the beginning as an |
| * int32 value. |
| */ |
| if (VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer) && slicelength > 0) |
| slicelength = slicelength + sizeof(int32); |
| |
| /* |
| * Adjust length request if needed. (Note: our sole caller, |
| * detoast_attr_slice, protects us against sliceoffset + slicelength |
| * overflowing.) |
| */ |
| if (((sliceoffset + slicelength) > attrsize) || slicelength < 0) |
| slicelength = attrsize - sliceoffset; |
| |
| result = (struct varlena *) palloc(slicelength + VARHDRSZ); |
| |
| if (VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer)) |
| SET_VARSIZE_COMPRESSED(result, slicelength + VARHDRSZ); |
| else |
| SET_VARSIZE(result, slicelength + VARHDRSZ); |
| |
| if (slicelength == 0) |
| return result; /* Can save a lot of work at this point! */ |
| |
| /* Open the toast relation */ |
| toastrel = table_open(toast_pointer.va_toastrelid, AccessShareLock); |
| |
| /* Fetch all chunks */ |
| table_relation_fetch_toast_slice(toastrel, toast_pointer.va_valueid, |
| attrsize, sliceoffset, slicelength, |
| result); |
| |
| /* Close toast table */ |
| table_close(toastrel, AccessShareLock); |
| |
| return result; |
| } |
| |
| /* ---------- |
| * toast_decompress_datum - |
| * |
| * Decompress a compressed version of a varlena datum |
| */ |
| static struct varlena * |
| toast_decompress_datum(struct varlena *attr) |
| { |
| ToastCompressionId cmid; |
| |
| Assert(VARATT_IS_COMPRESSED(attr)); |
| |
| /* |
| * Fetch the compression method id stored in the compression header and |
| * decompress the data using the appropriate decompression routine. |
| */ |
| cmid = TOAST_COMPRESS_METHOD(attr); |
| switch (cmid) |
| { |
| case TOAST_PGLZ_COMPRESSION_ID: |
| return pglz_decompress_datum(attr); |
| case TOAST_LZ4_COMPRESSION_ID: |
| return lz4_decompress_datum(attr); |
| default: |
| elog(ERROR, "invalid compression method id %d", cmid); |
| return NULL; /* keep compiler quiet */ |
| } |
| } |
| |
| |
| /* ---------- |
| * toast_decompress_datum_slice - |
| * |
| * Decompress the front of a compressed version of a varlena datum. |
| * offset handling happens in detoast_attr_slice. |
| * Here we just decompress a slice from the front. |
| */ |
| static struct varlena * |
| toast_decompress_datum_slice(struct varlena *attr, int32 slicelength) |
| { |
| ToastCompressionId cmid; |
| |
| Assert(VARATT_IS_COMPRESSED(attr)); |
| |
| /* |
| * Some callers may pass a slicelength that's more than the actual |
| * decompressed size. If so, just decompress normally. This avoids |
| * possibly allocating a larger-than-necessary result object, and may be |
| * faster and/or more robust as well. Notably, some versions of liblz4 |
| * have been seen to give wrong results if passed an output size that is |
| * more than the data's true decompressed size. |
| */ |
| if ((uint32) slicelength >= TOAST_COMPRESS_EXTSIZE(attr)) |
| return toast_decompress_datum(attr); |
| |
| /* |
| * Fetch the compression method id stored in the compression header and |
| * decompress the data slice using the appropriate decompression routine. |
| */ |
| cmid = TOAST_COMPRESS_METHOD(attr); |
| switch (cmid) |
| { |
| case TOAST_PGLZ_COMPRESSION_ID: |
| return pglz_decompress_datum_slice(attr, slicelength); |
| case TOAST_LZ4_COMPRESSION_ID: |
| return lz4_decompress_datum_slice(attr, slicelength); |
| default: |
| elog(ERROR, "invalid compression method id %d", cmid); |
| return NULL; /* keep compiler quiet */ |
| } |
| } |
| |
| /* ---------- |
| * toast_raw_datum_size - |
| * |
| * Return the raw (detoasted) size of a varlena datum |
| * (including the VARHDRSZ header) |
| * ---------- |
| */ |
| Size |
| toast_raw_datum_size(Datum value) |
| { |
| struct varlena *attr = (struct varlena *) DatumGetPointer(value); |
| Size result; |
| |
| if (VARATT_IS_EXTERNAL_ONDISK(attr)) |
| { |
| /* va_rawsize is the size of the original datum -- including header */ |
| struct varatt_external toast_pointer; |
| |
| VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr); |
| result = toast_pointer.va_rawsize; |
| } |
| else if (VARATT_IS_EXTERNAL_INDIRECT(attr)) |
| { |
| struct varatt_indirect toast_pointer; |
| |
| VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr); |
| |
| /* nested indirect Datums aren't allowed */ |
| Assert(!VARATT_IS_EXTERNAL_INDIRECT(toast_pointer.pointer)); |
| |
| return toast_raw_datum_size(PointerGetDatum(toast_pointer.pointer)); |
| } |
| else if (VARATT_IS_EXTERNAL_EXPANDED(attr)) |
| { |
| result = EOH_get_flat_size(DatumGetEOHP(value)); |
| } |
| else if (VARATT_IS_COMPRESSED(attr)) |
| { |
| /* here, va_rawsize is just the payload size */ |
| result = VARDATA_COMPRESSED_GET_EXTSIZE(attr) + VARHDRSZ; |
| } |
| else if (VARATT_IS_SHORT(attr)) |
| { |
| /* |
| * we have to normalize the header length to VARHDRSZ or else the |
| * callers of this function will be confused. |
| */ |
| result = VARSIZE_SHORT(attr) - VARHDRSZ_SHORT + VARHDRSZ; |
| } |
| else |
| { |
| /* plain untoasted datum */ |
| result = VARSIZE(attr); |
| } |
| return result; |
| } |
| |
| /* ---------- |
| * toast_datum_size |
| * |
| * Return the physical storage size (possibly compressed) of a varlena datum |
| * ---------- |
| */ |
| Size |
| toast_datum_size(Datum value) |
| { |
| struct varlena *attr = (struct varlena *) DatumGetPointer(value); |
| Size result; |
| |
| if (VARATT_IS_EXTERNAL_ONDISK(attr)) |
| { |
| /* |
| * Attribute is stored externally - return the extsize whether |
| * compressed or not. We do not count the size of the toast pointer |
| * ... should we? |
| */ |
| struct varatt_external toast_pointer; |
| |
| VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr); |
| result = VARATT_EXTERNAL_GET_EXTSIZE(toast_pointer); |
| } |
| else if (VARATT_IS_EXTERNAL_INDIRECT(attr)) |
| { |
| struct varatt_indirect toast_pointer; |
| |
| VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr); |
| |
| /* nested indirect Datums aren't allowed */ |
| Assert(!VARATT_IS_EXTERNAL_INDIRECT(attr)); |
| |
| return toast_datum_size(PointerGetDatum(toast_pointer.pointer)); |
| } |
| else if (VARATT_IS_EXTERNAL_EXPANDED(attr)) |
| { |
| result = EOH_get_flat_size(DatumGetEOHP(value)); |
| } |
| else if (VARATT_IS_SHORT(attr)) |
| { |
| result = VARSIZE_SHORT(attr); |
| } |
| else |
| { |
| /* |
| * Attribute is stored inline either compressed or not, just calculate |
| * the size of the datum in either case. |
| */ |
| result = VARSIZE(attr); |
| } |
| return result; |
| } |
| |
| /* |
| * If this function is changed then update varattrib_untoast_len as well |
| */ |
| void |
| varattrib_untoast_ptr_len(Datum d, char **datastart, int *len, void **tofree) |
| { |
| if (DatumGetPointer(d) == NULL) |
| { |
| ereport(ERROR, |
| (errcode(ERRCODE_INTERNAL_ERROR), |
| errmsg(" Unable to detoast datum "), |
| errprintstack(true))); |
| } |
| |
| struct varlena *va = (struct varlena *) DatumGetPointer(d); |
| struct varlena *attr = va; |
| |
| *len = -1; |
| *tofree = NULL; |
| |
| if (VARATT_IS_EXTENDED(va)) |
| { |
| if (VARATT_IS_EXTERNAL(va)) |
| { |
| attr = toast_fetch_datum(va); |
| /* toast_fetch_datum will palloc, so set it up for free */ |
| *tofree = attr; |
| } |
| |
| if (VARATT_IS_COMPRESSED(attr)) |
| { |
| struct varlena *tmp = attr; |
| attr = toast_decompress_datum(tmp); |
| |
| /* If tofree is set, that is, we get it from toast_fetch_datum. |
| * We need to free it here |
| */ |
| if(*tofree) |
| pfree(*tofree); |
| *tofree = attr; |
| } |
| else if (VARATT_IS_SHORT(attr)) |
| { |
| /* Warning! Return unaligned pointer! */ |
| *len = VARSIZE_SHORT(attr) - VARHDRSZ_SHORT; |
| *datastart = VARDATA_SHORT(attr); |
| attr = NULL; |
| } |
| } |
| |
| if (*len == -1) |
| { |
| *datastart = VARDATA(attr); |
| *len = VARSIZE(attr) - VARHDRSZ; |
| } |
| |
| Assert(*len >= 0); |
| } |