src/backend/access/common/toast_internals.c - cloudberry - Git at Google

 /*-------------------------------------------------------------------------
  *
  * toast_internals.c
  *	  Functions for internal use by the TOAST system.
  *
  * Copyright (c) 2000-2023, PostgreSQL Global Development Group
  *
  * IDENTIFICATION
  *	  src/backend/access/common/toast_internals.c
  *
  *-------------------------------------------------------------------------
  */

 #include "postgres.h"

 #include "access/detoast.h"
 #include "access/genam.h"
 #include "access/heapam.h"
 #include "access/heaptoast.h"
 #include "access/table.h"
 #include "access/toast_internals.h"
 #include "access/xact.h"
 #include "catalog/catalog.h"
 #include "common/pg_lzcompress.h"
 #include "miscadmin.h"
 #include "utils/fmgroids.h"
 #include "utils/rel.h"
 #include "utils/snapmgr.h"

 static bool toastrel_valueid_exists(Relation toastrel, Oid valueid);
 static bool toastid_valueid_exists(Oid toastrelid, Oid valueid);

 /* ----------
  * toast_compress_datum -
  *
  *	Create a compressed version of a varlena datum
  *
  *	If we fail (ie, compressed result is actually bigger than original)
  *	then return NULL.  We must not use compressed data if it'd expand
  *	the tuple!
  *
  *	We use VAR{SIZE,DATA}_ANY so we can handle short varlenas here without
  *	copying them.  But we can't handle external or compressed datums.
  * ----------
  */
 Datum
 toast_compress_datum(Datum value, char cmethod)
 {
 	struct varlena *tmp = NULL;
 	int32		valsize;
 	ToastCompressionId cmid = TOAST_INVALID_COMPRESSION_ID;

 	Assert(!VARATT_IS_EXTERNAL(DatumGetPointer(value)));
 	Assert(!VARATT_IS_COMPRESSED(DatumGetPointer(value)));

 	valsize = VARSIZE_ANY_EXHDR(DatumGetPointer(value));

 	/* If the compression method is not valid, use the current default */
 	if (!CompressionMethodIsValid(cmethod))
 		cmethod = default_toast_compression;

 	/*
 	 * Call appropriate compression routine for the compression method.
 	 */
 	switch (cmethod)
 	{
 		case TOAST_PGLZ_COMPRESSION:
 			tmp = pglz_compress_datum((const struct varlena *) value);
 			cmid = TOAST_PGLZ_COMPRESSION_ID;
 			break;
 		case TOAST_LZ4_COMPRESSION:
 			tmp = lz4_compress_datum((const struct varlena *) value);
 			cmid = TOAST_LZ4_COMPRESSION_ID;
 			break;
 		default:
 			elog(ERROR, "invalid compression method %c", cmethod);
 	}

 	if (tmp == NULL)
 		return PointerGetDatum(NULL);

 	/*
 	 * We recheck the actual size even if compression reports success, because
 	 * it might be satisfied with having saved as little as one byte in the
 	 * compressed data --- which could turn into a net loss once you consider
 	 * header and alignment padding.  Worst case, the compressed format might
 	 * require three padding bytes (plus header, which is included in
 	 * VARSIZE(tmp)), whereas the uncompressed format would take only one
 	 * header byte and no padding if the value is short enough.  So we insist
 	 * on a savings of more than 2 bytes to ensure we have a gain.
 	 */
 	if (VARSIZE(tmp) < valsize - 2)
 	{
 		/* successful compression */
 		Assert(cmid != TOAST_INVALID_COMPRESSION_ID);
 		TOAST_COMPRESS_SET_SIZE_AND_COMPRESS_METHOD(tmp, valsize, cmid);
 		return PointerGetDatum(tmp);
 	}
 	else
 	{
 		/* incompressible data */
 		pfree(tmp);
 		return PointerGetDatum(NULL);
 	}
 }

 /* ----------
  * toast_save_datum -
  *
  *	Save one single datum into the secondary relation and return
  *	a Datum reference for it.
  *
  * rel: the main relation we're working with (not the toast rel!)
  * value: datum to be pushed to toast storage
  * oldexternal: if not NULL, toast pointer previously representing the datum
  * options: options to be passed to heap_insert() for toast rows
  * ----------
  */
 Datum
 toast_save_datum(Relation rel, Datum value,
 				 struct varlena *oldexternal, int options)
 {
 	Relation	toastrel;
 	Relation   *toastidxs;
 	HeapTuple	toasttup;
 	TupleDesc	toasttupDesc;
 	Datum		t_values[3];
 	bool		t_isnull[3];
 	TransactionId xid = GetCurrentTransactionId();
 	CommandId	mycid = GetCurrentCommandId(true);
 	struct varlena *result;
 	struct varatt_external toast_pointer;
 	union
 	{
 		struct varlena hdr;
 		/* this is to make the union big enough for a chunk: */
 		char		data[TOAST_MAX_CHUNK_SIZE + VARHDRSZ];
 		/* ensure union is aligned well enough: */
 		int32		align_it;
 	}			chunk_data;
 	int32		chunk_size;
 	int32		chunk_seq = 0;
 	char	   *data_p;
 	int32		data_todo;
 	Pointer		dval = DatumGetPointer(value);
 	int			num_indexes;
 	int			validIndex;

 	Assert(!VARATT_IS_EXTERNAL(value));

 	/*
 	 * Open the toast relation and its indexes.  We can use the index to check
 	 * uniqueness of the OID we assign to the toasted item, even though it has
 	 * additional columns besides OID.
 	 */
 	toastrel = table_open(rel->rd_rel->reltoastrelid, RowExclusiveLock);
 	toasttupDesc = toastrel->rd_att;

 	/* Open all the toast indexes and look for the valid one */
 	validIndex = toast_open_indexes(toastrel,
 									RowExclusiveLock,
 									&toastidxs,
 									&num_indexes);

 	/*
 	 * Get the data pointer and length, and compute va_rawsize and va_extinfo.
 	 *
 	 * va_rawsize is the size of the equivalent fully uncompressed datum, so
 	 * we have to adjust for short headers.
 	 *
 	 * va_extinfo stored the actual size of the data payload in the toast
 	 * records and the compression method in first 2 bits if data is
 	 * compressed.
 	 */
 	if (VARATT_IS_SHORT(dval))
 	{
 		data_p = VARDATA_SHORT(dval);
 		data_todo = VARSIZE_SHORT(dval) - VARHDRSZ_SHORT;
 		toast_pointer.va_rawsize = data_todo + VARHDRSZ;	/* as if not short */
 		toast_pointer.va_extinfo = data_todo;
 	}
 	else if (VARATT_IS_COMPRESSED(dval))
 	{
 		data_p = VARDATA(dval);
 		data_todo = VARSIZE(dval) - VARHDRSZ;
 		/* rawsize in a compressed datum is just the size of the payload */
 		toast_pointer.va_rawsize = VARDATA_COMPRESSED_GET_EXTSIZE(dval) + VARHDRSZ;

 		/* set external size and compression method */
 		VARATT_EXTERNAL_SET_SIZE_AND_COMPRESS_METHOD(toast_pointer, data_todo,
 													 VARDATA_COMPRESSED_GET_COMPRESS_METHOD(dval));
 		/* Assert that the numbers look like it's compressed */
 		Assert(VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer));
 	}
 	else
 	{
 		data_p = VARDATA(dval);
 		data_todo = VARSIZE(dval) - VARHDRSZ;
 		toast_pointer.va_rawsize = VARSIZE(dval);
 		toast_pointer.va_extinfo = data_todo;
 	}

 	/*
 	 * Insert the correct table OID into the result TOAST pointer.
 	 *
 	 * Normally this is the actual OID of the target toast table, but during
 	 * table-rewriting operations such as CLUSTER, we have to insert the OID
 	 * of the table's real permanent toast table instead.  rd_toastoid is set
 	 * if we have to substitute such an OID.
 	 */
 	if (OidIsValid(rel->rd_toastoid))
 		toast_pointer.va_toastrelid = rel->rd_toastoid;
 	else
 		toast_pointer.va_toastrelid = RelationGetRelid(toastrel);

 	/*
 	 * Choose an OID to use as the value ID for this toast value.
 	 *
 	 * Normally we just choose an unused OID within the toast table.  But
 	 * during table-rewriting operations where we are preserving an existing
 	 * toast table OID, we want to preserve toast value OIDs too.  So, if
 	 * rd_toastoid is set and we had a prior external value from that same
 	 * toast table, re-use its value ID.  If we didn't have a prior external
 	 * value (which is a corner case, but possible if the table's attstorage
 	 * options have been changed), we have to pick a value ID that doesn't
 	 * conflict with either new or existing toast value OIDs.
 	 */
 	if (!OidIsValid(rel->rd_toastoid))
 	{
 		/* normal case: just choose an unused OID */
 		toast_pointer.va_valueid =
 			GetNewOidWithIndex(toastrel,
 							   RelationGetRelid(toastidxs[validIndex]),
 							   (AttrNumber) 1);
 	}
 	else
 	{
 		/* rewrite case: check to see if value was in old toast table */
 		toast_pointer.va_valueid = InvalidOid;
 		if (oldexternal != NULL)
 		{
 			struct varatt_external old_toast_pointer;

 			Assert(VARATT_IS_EXTERNAL_ONDISK(oldexternal));
 			/* Must copy to access aligned fields */
 			VARATT_EXTERNAL_GET_POINTER(old_toast_pointer, oldexternal);
 			if (old_toast_pointer.va_toastrelid == rel->rd_toastoid)
 			{
 				/* This value came from the old toast table; reuse its OID */
 				toast_pointer.va_valueid = old_toast_pointer.va_valueid;

 				/*
 				 * There is a corner case here: the table rewrite might have
 				 * to copy both live and recently-dead versions of a row, and
 				 * those versions could easily reference the same toast value.
 				 * When we copy the second or later version of such a row,
 				 * reusing the OID will mean we select an OID that's already
 				 * in the new toast table.  Check for that, and if so, just
 				 * fall through without writing the data again.
 				 *
 				 * While annoying and ugly-looking, this is a good thing
 				 * because it ensures that we wind up with only one copy of
 				 * the toast value when there is only one copy in the old
 				 * toast table.  Before we detected this case, we'd have made
 				 * multiple copies, wasting space; and what's worse, the
 				 * copies belonging to already-deleted heap tuples would not
 				 * be reclaimed by VACUUM.
 				 */
 				if (toastrel_valueid_exists(toastrel,
 											toast_pointer.va_valueid))
 				{
 					/* Match, so short-circuit the data storage loop below */
 					data_todo = 0;
 				}
 			}
 		}
 		if (toast_pointer.va_valueid == InvalidOid)
 		{
 			/*
 			 * new value; must choose an OID that doesn't conflict in either
 			 * old or new toast table
 			 */
 			do
 			{
 				toast_pointer.va_valueid =
 					GetNewOidWithIndex(toastrel,
 									   RelationGetRelid(toastidxs[validIndex]),
 									   (AttrNumber) 1);
 			} while (toastid_valueid_exists(rel->rd_toastoid,
 											toast_pointer.va_valueid));
 		}
 	}

 	/*
 	 * Initialize constant parts of the tuple data
 	 */
 	t_values[0] = ObjectIdGetDatum(toast_pointer.va_valueid);
 	t_values[2] = PointerGetDatum(&chunk_data);
 	t_isnull[0] = false;
 	t_isnull[1] = false;
 	t_isnull[2] = false;

 	/*
 	 * Split up the item into chunks
 	 */
 	while (data_todo > 0)
 	{
 		int			i;

 		CHECK_FOR_INTERRUPTS();

 		/*
 		 * Calculate the size of this chunk
 		 */
 		chunk_size = Min(TOAST_MAX_CHUNK_SIZE, data_todo);

 		/*
 		 * Build a tuple and store it
 		 */
 		t_values[1] = Int32GetDatum(chunk_seq++);
 		SET_VARSIZE(&chunk_data, chunk_size + VARHDRSZ);
 		memcpy(VARDATA(&chunk_data), data_p, chunk_size);
 		toasttup = heap_form_tuple(toasttupDesc, t_values, t_isnull);

 		heap_insert(toastrel, toasttup, mycid, options, NULL, xid);

 		/*
 		 * Create the index entry.  We cheat a little here by not using
 		 * FormIndexDatum: this relies on the knowledge that the index columns
 		 * are the same as the initial columns of the table for all the
 		 * indexes.  We also cheat by not providing an IndexInfo: this is okay
 		 * for now because btree doesn't need one, but we might have to be
 		 * more honest someday.
 		 *
 		 * Note also that there had better not be any user-created index on
 		 * the TOAST table, since we don't bother to update anything else.
 		 */
 		for (i = 0; i < num_indexes; i++)
 		{
 			/* Only index relations marked as ready can be updated */
 			if (toastidxs[i]->rd_index->indisready)
 				index_insert(toastidxs[i], t_values, t_isnull,
 							 &(toasttup->t_self),
 							 toastrel,
 							 toastidxs[i]->rd_index->indisunique ?
 							 UNIQUE_CHECK_YES : UNIQUE_CHECK_NO,
 							 false, NULL);
 		}

 		/*
 		 * Free memory
 		 */
 		heap_freetuple(toasttup);

 		/*
 		 * Move on to next chunk
 		 */
 		data_todo -= chunk_size;
 		data_p += chunk_size;
 	}

 	/*
 	 * Done - close toast relation and its indexes but keep the lock until
 	 * commit, so as a concurrent reindex done directly on the toast relation
 	 * would be able to wait for this transaction.
 	 */
 	toast_close_indexes(toastidxs, num_indexes, NoLock);
 	table_close(toastrel, NoLock);

 	/*
 	 * Create the TOAST pointer value that we'll return
 	 */
 	result = (struct varlena *) palloc(TOAST_POINTER_SIZE);
 	SET_VARTAG_EXTERNAL(result, VARTAG_ONDISK);
 	memcpy(VARDATA_EXTERNAL(result), &toast_pointer, sizeof(toast_pointer));

 	return PointerGetDatum(result);
 }

 /* ----------
  * toast_delete_datum -
  *
  *	Delete a single external stored value.
  * ----------
  */
 void
 toast_delete_datum(Relation rel, Datum value, bool is_speculative)
 {
 	struct varlena *attr = (struct varlena *) DatumGetPointer(value);
 	struct varatt_external toast_pointer;
 	Relation	toastrel;
 	Relation   *toastidxs;
 	ScanKeyData toastkey;
 	SysScanDesc toastscan;
 	HeapTuple	toasttup;
 	int			num_indexes;
 	int			validIndex;
 	SnapshotData SnapshotToast;

 	if (!VARATT_IS_EXTERNAL_ONDISK(attr))
 		return;

 	/* Must copy to access aligned fields */
 	VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr);

 	/*
 	 * Open the toast relation and its indexes
 	 */
 	toastrel = table_open(toast_pointer.va_toastrelid, RowExclusiveLock);

 	/* Fetch valid relation used for process */
 	validIndex = toast_open_indexes(toastrel,
 									RowExclusiveLock,
 									&toastidxs,
 									&num_indexes);

 	/*
 	 * Setup a scan key to find chunks with matching va_valueid
 	 */
 	ScanKeyInit(&toastkey,
 				(AttrNumber) 1,
 				BTEqualStrategyNumber, F_OIDEQ,
 				ObjectIdGetDatum(toast_pointer.va_valueid));

 	/*
 	 * Find all the chunks.  (We don't actually care whether we see them in
 	 * sequence or not, but since we've already locked the index we might as
 	 * well use systable_beginscan_ordered.)
 	 */
 	init_toast_snapshot(&SnapshotToast);
 	toastscan = systable_beginscan_ordered(toastrel, toastidxs[validIndex],
 										   &SnapshotToast, 1, &toastkey);
 	while ((toasttup = systable_getnext_ordered(toastscan, ForwardScanDirection)) != NULL)
 	{
 		/*
 		 * Have a chunk, delete it
 		 */
 		if (is_speculative)
 			heap_abort_speculative(toastrel, &toasttup->t_self);
 		else
 			simple_heap_delete(toastrel, &toasttup->t_self);
 	}

 	/*
 	 * End scan and close relations but keep the lock until commit, so as a
 	 * concurrent reindex done directly on the toast relation would be able to
 	 * wait for this transaction.
 	 */
 	systable_endscan_ordered(toastscan);
 	toast_close_indexes(toastidxs, num_indexes, NoLock);
 	table_close(toastrel, NoLock);
 }

 /* ----------
  * toastrel_valueid_exists -
  *
  *	Test whether a toast value with the given ID exists in the toast relation.
  *	For safety, we consider a value to exist if there are either live or dead
  *	toast rows with that ID; see notes for GetNewOidWithIndex().
  * ----------
  */
 static bool
 toastrel_valueid_exists(Relation toastrel, Oid valueid)
 {
 	bool		result = false;
 	ScanKeyData toastkey;
 	SysScanDesc toastscan;
 	int			num_indexes;
 	int			validIndex;
 	Relation   *toastidxs;

 	/* Fetch a valid index relation */
 	validIndex = toast_open_indexes(toastrel,
 									RowExclusiveLock,
 									&toastidxs,
 									&num_indexes);

 	/*
 	 * Setup a scan key to find chunks with matching va_valueid
 	 */
 	ScanKeyInit(&toastkey,
 				(AttrNumber) 1,
 				BTEqualStrategyNumber, F_OIDEQ,
 				ObjectIdGetDatum(valueid));

 	/*
 	 * Is there any such chunk?
 	 */
 	toastscan = systable_beginscan(toastrel,
 								   RelationGetRelid(toastidxs[validIndex]),
 								   true, SnapshotAny, 1, &toastkey);

 	if (systable_getnext(toastscan) != NULL)
 		result = true;

 	systable_endscan(toastscan);

 	/* Clean up */
 	toast_close_indexes(toastidxs, num_indexes, RowExclusiveLock);

 	return result;
 }

 /* ----------
  * toastid_valueid_exists -
  *
  *	As above, but work from toast rel's OID not an open relation
  * ----------
  */
 static bool
 toastid_valueid_exists(Oid toastrelid, Oid valueid)
 {
 	bool		result;
 	Relation	toastrel;

 	toastrel = table_open(toastrelid, AccessShareLock);

 	result = toastrel_valueid_exists(toastrel, valueid);

 	table_close(toastrel, AccessShareLock);

 	return result;
 }

 /* ----------
  * toast_get_valid_index
  *
  *	Get OID of valid index associated to given toast relation. A toast
  *	relation can have only one valid index at the same time.
  */
 Oid
 toast_get_valid_index(Oid toastoid, LOCKMODE lock)
 {
 	int			num_indexes;
 	int			validIndex;
 	Oid			validIndexOid;
 	Relation   *toastidxs;
 	Relation	toastrel;

 	/* Open the toast relation */
 	toastrel = table_open(toastoid, lock);

 	/* Look for the valid index of the toast relation */
 	validIndex = toast_open_indexes(toastrel,
 									lock,
 									&toastidxs,
 									&num_indexes);
 	validIndexOid = RelationGetRelid(toastidxs[validIndex]);

 	/* Close the toast relation and all its indexes */
 	toast_close_indexes(toastidxs, num_indexes, NoLock);
 	table_close(toastrel, NoLock);

 	return validIndexOid;
 }

 /* ----------
  * toast_open_indexes
  *
  *	Get an array of the indexes associated to the given toast relation
  *	and return as well the position of the valid index used by the toast
  *	relation in this array. It is the responsibility of the caller of this
  *	function to close the indexes as well as free them.
  */
 int
 toast_open_indexes(Relation toastrel,
 				   LOCKMODE lock,
 				   Relation **toastidxs,
 				   int *num_indexes)
 {
 	int			i = 0;
 	int			res = 0;
 	bool		found = false;
 	List	   *indexlist;
 	ListCell   *lc;

 	/* Get index list of the toast relation */
 	indexlist = RelationGetIndexList(toastrel);
 	Assert(indexlist != NIL);

 	*num_indexes = list_length(indexlist);

 	/* Open all the index relations */
 	*toastidxs = (Relation *) palloc(*num_indexes * sizeof(Relation));
 	foreach(lc, indexlist)
 		(*toastidxs)[i++] = index_open(lfirst_oid(lc), lock);

 	/* Fetch the first valid index in list */
 	for (i = 0; i < *num_indexes; i++)
 	{
 		Relation	toastidx = (*toastidxs)[i];

 		if (toastidx->rd_index->indisvalid)
 		{
 			res = i;
 			found = true;
 			break;
 		}
 	}

 	/*
 	 * Free index list, not necessary anymore as relations are opened and a
 	 * valid index has been found.
 	 */
 	list_free(indexlist);

 	/*
 	 * The toast relation should have one valid index, so something is going
 	 * wrong if there is nothing.
 	 */
 	if (!found)
 		elog(ERROR, "no valid index found for toast relation with Oid %u",
 			 RelationGetRelid(toastrel));

 	return res;
 }

 /* ----------
  * toast_close_indexes
  *
  *	Close an array of indexes for a toast relation and free it. This should
  *	be called for a set of indexes opened previously with toast_open_indexes.
  */
 void
 toast_close_indexes(Relation *toastidxs, int num_indexes, LOCKMODE lock)
 {
 	int			i;

 	/* Close relations and clean up things */
 	for (i = 0; i < num_indexes; i++)
 		index_close(toastidxs[i], lock);
 	pfree(toastidxs);
 }

 /* ----------
  * init_toast_snapshot
  *
  *	Initialize an appropriate TOAST snapshot.  We must use an MVCC snapshot
  *	to initialize the TOAST snapshot; since we don't know which one to use,
  *	just use the oldest one.  This is safe: at worst, we will get a "snapshot
  *	too old" error that might have been avoided otherwise.
  */
 void
 init_toast_snapshot(Snapshot toast_snapshot)
 {
 	Snapshot	snapshot = GetOldestSnapshot();

 	/*
 	 * GetOldestSnapshot returns NULL if the session has no active snapshots.
 	 * We can get that if, for example, a procedure fetches a toasted value
 	 * into a local variable, commits, and then tries to detoast the value.
 	 * Such coding is unsafe, because once we commit there is nothing to
 	 * prevent the toast data from being deleted.  Detoasting *must* happen in
 	 * the same transaction that originally fetched the toast pointer.  Hence,
 	 * rather than trying to band-aid over the problem, throw an error.  (This
 	 * is not very much protection, because in many scenarios the procedure
 	 * would have already created a new transaction snapshot, preventing us
 	 * from detecting the problem.  But it's better than nothing, and for sure
 	 * we shouldn't expend code on masking the problem more.)
 	 */
 	if (snapshot == NULL)
 		elog(ERROR, "cannot fetch toast data without an active snapshot");

 	/*
 	 * Catalog snapshots can be returned by GetOldestSnapshot() even if not
 	 * registered or active. That easily hides bugs around not having a
 	 * snapshot set up - most of the time there is a valid catalog snapshot.
 	 * So additionally insist that the current snapshot is registered or
 	 * active.
 	 */
 	Assert(HaveRegisteredOrActiveSnapshot());

 	InitToastSnapshot(*toast_snapshot, snapshot->lsn, snapshot->whenTaken);
 }
	/*-------------------------------------------------------------------------
	*
	* toast_internals.c
	* Functions for internal use by the TOAST system.
	*
	* Copyright (c) 2000-2023, PostgreSQL Global Development Group
	*
	* IDENTIFICATION
	* src/backend/access/common/toast_internals.c
	*
	*-------------------------------------------------------------------------
	*/

	#include "postgres.h"

	#include "access/detoast.h"
	#include "access/genam.h"
	#include "access/heapam.h"
	#include "access/heaptoast.h"
	#include "access/table.h"
	#include "access/toast_internals.h"
	#include "access/xact.h"
	#include "catalog/catalog.h"
	#include "common/pg_lzcompress.h"
	#include "miscadmin.h"
	#include "utils/fmgroids.h"
	#include "utils/rel.h"
	#include "utils/snapmgr.h"

	static bool toastrel_valueid_exists(Relation toastrel, Oid valueid);
	static bool toastid_valueid_exists(Oid toastrelid, Oid valueid);

	/* ----------
	* toast_compress_datum -
	*
	* Create a compressed version of a varlena datum
	*
	* If we fail (ie, compressed result is actually bigger than original)
	* then return NULL. We must not use compressed data if it'd expand
	* the tuple!
	*
	* We use VAR{SIZE,DATA}_ANY so we can handle short varlenas here without
	* copying them. But we can't handle external or compressed datums.
	* ----------
	*/
	Datum
	toast_compress_datum(Datum value, char cmethod)
	{
	struct varlena *tmp = NULL;
	int32 valsize;
	ToastCompressionId cmid = TOAST_INVALID_COMPRESSION_ID;

	Assert(!VARATT_IS_EXTERNAL(DatumGetPointer(value)));
	Assert(!VARATT_IS_COMPRESSED(DatumGetPointer(value)));

	valsize = VARSIZE_ANY_EXHDR(DatumGetPointer(value));

	/* If the compression method is not valid, use the current default */
	if (!CompressionMethodIsValid(cmethod))
	cmethod = default_toast_compression;

	/*
	* Call appropriate compression routine for the compression method.
	*/
	switch (cmethod)
	{
	case TOAST_PGLZ_COMPRESSION:
	tmp = pglz_compress_datum((const struct varlena *) value);
	cmid = TOAST_PGLZ_COMPRESSION_ID;
	break;
	case TOAST_LZ4_COMPRESSION:
	tmp = lz4_compress_datum((const struct varlena *) value);
	cmid = TOAST_LZ4_COMPRESSION_ID;
	break;
	default:
	elog(ERROR, "invalid compression method %c", cmethod);
	}

	if (tmp == NULL)
	return PointerGetDatum(NULL);

	/*
	* We recheck the actual size even if compression reports success, because
	* it might be satisfied with having saved as little as one byte in the
	* compressed data --- which could turn into a net loss once you consider
	* header and alignment padding. Worst case, the compressed format might
	* require three padding bytes (plus header, which is included in
	* VARSIZE(tmp)), whereas the uncompressed format would take only one
	* header byte and no padding if the value is short enough. So we insist
	* on a savings of more than 2 bytes to ensure we have a gain.
	*/
	if (VARSIZE(tmp) < valsize - 2)
	{
	/* successful compression */
	Assert(cmid != TOAST_INVALID_COMPRESSION_ID);
	TOAST_COMPRESS_SET_SIZE_AND_COMPRESS_METHOD(tmp, valsize, cmid);
	return PointerGetDatum(tmp);
	}
	else
	{
	/* incompressible data */
	pfree(tmp);
	return PointerGetDatum(NULL);
	}
	}

	/* ----------
	* toast_save_datum -
	*
	* Save one single datum into the secondary relation and return
	* a Datum reference for it.
	*
	* rel: the main relation we're working with (not the toast rel!)
	* value: datum to be pushed to toast storage
	* oldexternal: if not NULL, toast pointer previously representing the datum
	* options: options to be passed to heap_insert() for toast rows
	* ----------
	*/
	Datum
	toast_save_datum(Relation rel, Datum value,
	struct varlena *oldexternal, int options)
	{
	Relation toastrel;
	Relation *toastidxs;
	HeapTuple toasttup;
	TupleDesc toasttupDesc;
	Datum t_values[3];
	bool t_isnull[3];
	TransactionId xid = GetCurrentTransactionId();
	CommandId mycid = GetCurrentCommandId(true);
	struct varlena *result;
	struct varatt_external toast_pointer;
	union
	{
	struct varlena hdr;
	/* this is to make the union big enough for a chunk: */
	char data[TOAST_MAX_CHUNK_SIZE + VARHDRSZ];
	/* ensure union is aligned well enough: */
	int32 align_it;
	} chunk_data;
	int32 chunk_size;
	int32 chunk_seq = 0;
	char *data_p;
	int32 data_todo;
	Pointer dval = DatumGetPointer(value);
	int num_indexes;
	int validIndex;

	Assert(!VARATT_IS_EXTERNAL(value));

	/*
	* Open the toast relation and its indexes. We can use the index to check
	* uniqueness of the OID we assign to the toasted item, even though it has
	* additional columns besides OID.
	*/
	toastrel = table_open(rel->rd_rel->reltoastrelid, RowExclusiveLock);
	toasttupDesc = toastrel->rd_att;

	/* Open all the toast indexes and look for the valid one */
	validIndex = toast_open_indexes(toastrel,
	RowExclusiveLock,
	&toastidxs,
	&num_indexes);

	/*
	* Get the data pointer and length, and compute va_rawsize and va_extinfo.
	*
	* va_rawsize is the size of the equivalent fully uncompressed datum, so
	* we have to adjust for short headers.
	*
	* va_extinfo stored the actual size of the data payload in the toast
	* records and the compression method in first 2 bits if data is
	* compressed.
	*/
	if (VARATT_IS_SHORT(dval))
	{
	data_p = VARDATA_SHORT(dval);
	data_todo = VARSIZE_SHORT(dval) - VARHDRSZ_SHORT;
	toast_pointer.va_rawsize = data_todo + VARHDRSZ; /* as if not short */
	toast_pointer.va_extinfo = data_todo;
	}
	else if (VARATT_IS_COMPRESSED(dval))
	{
	data_p = VARDATA(dval);
	data_todo = VARSIZE(dval) - VARHDRSZ;
	/* rawsize in a compressed datum is just the size of the payload */
	toast_pointer.va_rawsize = VARDATA_COMPRESSED_GET_EXTSIZE(dval) + VARHDRSZ;

	/* set external size and compression method */
	VARATT_EXTERNAL_SET_SIZE_AND_COMPRESS_METHOD(toast_pointer, data_todo,
	VARDATA_COMPRESSED_GET_COMPRESS_METHOD(dval));
	/* Assert that the numbers look like it's compressed */
	Assert(VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer));
	}
	else
	{
	data_p = VARDATA(dval);
	data_todo = VARSIZE(dval) - VARHDRSZ;
	toast_pointer.va_rawsize = VARSIZE(dval);
	toast_pointer.va_extinfo = data_todo;
	}

	/*
	* Insert the correct table OID into the result TOAST pointer.
	*
	* Normally this is the actual OID of the target toast table, but during
	* table-rewriting operations such as CLUSTER, we have to insert the OID
	* of the table's real permanent toast table instead. rd_toastoid is set
	* if we have to substitute such an OID.
	*/
	if (OidIsValid(rel->rd_toastoid))
	toast_pointer.va_toastrelid = rel->rd_toastoid;
	else
	toast_pointer.va_toastrelid = RelationGetRelid(toastrel);

	/*
	* Choose an OID to use as the value ID for this toast value.
	*
	* Normally we just choose an unused OID within the toast table. But
	* during table-rewriting operations where we are preserving an existing
	* toast table OID, we want to preserve toast value OIDs too. So, if
	* rd_toastoid is set and we had a prior external value from that same
	* toast table, re-use its value ID. If we didn't have a prior external
	* value (which is a corner case, but possible if the table's attstorage
	* options have been changed), we have to pick a value ID that doesn't
	* conflict with either new or existing toast value OIDs.
	*/
	if (!OidIsValid(rel->rd_toastoid))
	{
	/* normal case: just choose an unused OID */
	toast_pointer.va_valueid =
	GetNewOidWithIndex(toastrel,
	RelationGetRelid(toastidxs[validIndex]),
	(AttrNumber) 1);
	}
	else
	{
	/* rewrite case: check to see if value was in old toast table */
	toast_pointer.va_valueid = InvalidOid;
	if (oldexternal != NULL)
	{
	struct varatt_external old_toast_pointer;

	Assert(VARATT_IS_EXTERNAL_ONDISK(oldexternal));
	/* Must copy to access aligned fields */
	VARATT_EXTERNAL_GET_POINTER(old_toast_pointer, oldexternal);
	if (old_toast_pointer.va_toastrelid == rel->rd_toastoid)
	{
	/* This value came from the old toast table; reuse its OID */
	toast_pointer.va_valueid = old_toast_pointer.va_valueid;

	/*
	* There is a corner case here: the table rewrite might have
	* to copy both live and recently-dead versions of a row, and
	* those versions could easily reference the same toast value.
	* When we copy the second or later version of such a row,
	* reusing the OID will mean we select an OID that's already
	* in the new toast table. Check for that, and if so, just
	* fall through without writing the data again.
	*
	* While annoying and ugly-looking, this is a good thing
	* because it ensures that we wind up with only one copy of
	* the toast value when there is only one copy in the old
	* toast table. Before we detected this case, we'd have made
	* multiple copies, wasting space; and what's worse, the
	* copies belonging to already-deleted heap tuples would not
	* be reclaimed by VACUUM.
	*/
	if (toastrel_valueid_exists(toastrel,
	toast_pointer.va_valueid))
	{
	/* Match, so short-circuit the data storage loop below */
	data_todo = 0;
	}
	}
	}
	if (toast_pointer.va_valueid == InvalidOid)
	{
	/*
	* new value; must choose an OID that doesn't conflict in either
	* old or new toast table
	*/
	do
	{
	toast_pointer.va_valueid =
	GetNewOidWithIndex(toastrel,
	RelationGetRelid(toastidxs[validIndex]),
	(AttrNumber) 1);
	} while (toastid_valueid_exists(rel->rd_toastoid,
	toast_pointer.va_valueid));
	}
	}

	/*
	* Initialize constant parts of the tuple data
	*/
	t_values[0] = ObjectIdGetDatum(toast_pointer.va_valueid);
	t_values[2] = PointerGetDatum(&chunk_data);
	t_isnull[0] = false;
	t_isnull[1] = false;
	t_isnull[2] = false;

	/*
	* Split up the item into chunks
	*/
	while (data_todo > 0)
	{
	int i;

	CHECK_FOR_INTERRUPTS();

	/*
	* Calculate the size of this chunk
	*/
	chunk_size = Min(TOAST_MAX_CHUNK_SIZE, data_todo);

	/*
	* Build a tuple and store it
	*/
	t_values[1] = Int32GetDatum(chunk_seq++);
	SET_VARSIZE(&chunk_data, chunk_size + VARHDRSZ);
	memcpy(VARDATA(&chunk_data), data_p, chunk_size);
	toasttup = heap_form_tuple(toasttupDesc, t_values, t_isnull);

	heap_insert(toastrel, toasttup, mycid, options, NULL, xid);

	/*
	* Create the index entry. We cheat a little here by not using
	* FormIndexDatum: this relies on the knowledge that the index columns
	* are the same as the initial columns of the table for all the
	* indexes. We also cheat by not providing an IndexInfo: this is okay
	* for now because btree doesn't need one, but we might have to be
	* more honest someday.
	*
	* Note also that there had better not be any user-created index on
	* the TOAST table, since we don't bother to update anything else.
	*/
	for (i = 0; i < num_indexes; i++)
	{
	/* Only index relations marked as ready can be updated */
	if (toastidxs[i]->rd_index->indisready)
	index_insert(toastidxs[i], t_values, t_isnull,
	&(toasttup->t_self),
	toastrel,
	toastidxs[i]->rd_index->indisunique ?
	UNIQUE_CHECK_YES : UNIQUE_CHECK_NO,
	false, NULL);
	}

	/*
	* Free memory
	*/
	heap_freetuple(toasttup);

	/*
	* Move on to next chunk
	*/
	data_todo -= chunk_size;
	data_p += chunk_size;
	}

	/*
	* Done - close toast relation and its indexes but keep the lock until
	* commit, so as a concurrent reindex done directly on the toast relation
	* would be able to wait for this transaction.
	*/
	toast_close_indexes(toastidxs, num_indexes, NoLock);
	table_close(toastrel, NoLock);

	/*
	* Create the TOAST pointer value that we'll return
	*/
	result = (struct varlena *) palloc(TOAST_POINTER_SIZE);
	SET_VARTAG_EXTERNAL(result, VARTAG_ONDISK);
	memcpy(VARDATA_EXTERNAL(result), &toast_pointer, sizeof(toast_pointer));

	return PointerGetDatum(result);
	}

	/* ----------
	* toast_delete_datum -
	*
	* Delete a single external stored value.
	* ----------
	*/
	void
	toast_delete_datum(Relation rel, Datum value, bool is_speculative)
	{
	struct varlena attr = (struct varlena ) DatumGetPointer(value);
	struct varatt_external toast_pointer;
	Relation toastrel;
	Relation *toastidxs;
	ScanKeyData toastkey;
	SysScanDesc toastscan;
	HeapTuple toasttup;
	int num_indexes;
	int validIndex;
	SnapshotData SnapshotToast;

	if (!VARATT_IS_EXTERNAL_ONDISK(attr))
	return;

	/* Must copy to access aligned fields */
	VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr);

	/*
	* Open the toast relation and its indexes
	*/
	toastrel = table_open(toast_pointer.va_toastrelid, RowExclusiveLock);

	/* Fetch valid relation used for process */
	validIndex = toast_open_indexes(toastrel,
	RowExclusiveLock,
	&toastidxs,
	&num_indexes);

	/*
	* Setup a scan key to find chunks with matching va_valueid
	*/
	ScanKeyInit(&toastkey,
	(AttrNumber) 1,
	BTEqualStrategyNumber, F_OIDEQ,
	ObjectIdGetDatum(toast_pointer.va_valueid));

	/*
	* Find all the chunks. (We don't actually care whether we see them in
	* sequence or not, but since we've already locked the index we might as
	* well use systable_beginscan_ordered.)
	*/
	init_toast_snapshot(&SnapshotToast);
	toastscan = systable_beginscan_ordered(toastrel, toastidxs[validIndex],
	&SnapshotToast, 1, &toastkey);
	while ((toasttup = systable_getnext_ordered(toastscan, ForwardScanDirection)) != NULL)
	{
	/*
	* Have a chunk, delete it
	*/
	if (is_speculative)
	heap_abort_speculative(toastrel, &toasttup->t_self);
	else
	simple_heap_delete(toastrel, &toasttup->t_self);
	}

	/*
	* End scan and close relations but keep the lock until commit, so as a
	* concurrent reindex done directly on the toast relation would be able to
	* wait for this transaction.
	*/
	systable_endscan_ordered(toastscan);
	toast_close_indexes(toastidxs, num_indexes, NoLock);
	table_close(toastrel, NoLock);
	}

	/* ----------
	* toastrel_valueid_exists -
	*
	* Test whether a toast value with the given ID exists in the toast relation.
	* For safety, we consider a value to exist if there are either live or dead
	* toast rows with that ID; see notes for GetNewOidWithIndex().
	* ----------
	*/
	static bool
	toastrel_valueid_exists(Relation toastrel, Oid valueid)
	{
	bool result = false;
	ScanKeyData toastkey;
	SysScanDesc toastscan;
	int num_indexes;
	int validIndex;
	Relation *toastidxs;

	/* Fetch a valid index relation */
	validIndex = toast_open_indexes(toastrel,
	RowExclusiveLock,
	&toastidxs,
	&num_indexes);

	/*
	* Setup a scan key to find chunks with matching va_valueid
	*/
	ScanKeyInit(&toastkey,
	(AttrNumber) 1,
	BTEqualStrategyNumber, F_OIDEQ,
	ObjectIdGetDatum(valueid));

	/*
	* Is there any such chunk?
	*/
	toastscan = systable_beginscan(toastrel,
	RelationGetRelid(toastidxs[validIndex]),
	true, SnapshotAny, 1, &toastkey);

	if (systable_getnext(toastscan) != NULL)
	result = true;

	systable_endscan(toastscan);

	/* Clean up */
	toast_close_indexes(toastidxs, num_indexes, RowExclusiveLock);

	return result;
	}

	/* ----------
	* toastid_valueid_exists -
	*
	* As above, but work from toast rel's OID not an open relation
	* ----------
	*/
	static bool
	toastid_valueid_exists(Oid toastrelid, Oid valueid)
	{
	bool result;
	Relation toastrel;

	toastrel = table_open(toastrelid, AccessShareLock);

	result = toastrel_valueid_exists(toastrel, valueid);

	table_close(toastrel, AccessShareLock);

	return result;
	}

	/* ----------
	* toast_get_valid_index
	*
	* Get OID of valid index associated to given toast relation. A toast
	* relation can have only one valid index at the same time.
	*/
	Oid
	toast_get_valid_index(Oid toastoid, LOCKMODE lock)
	{
	int num_indexes;
	int validIndex;
	Oid validIndexOid;
	Relation *toastidxs;
	Relation toastrel;

	/* Open the toast relation */
	toastrel = table_open(toastoid, lock);

	/* Look for the valid index of the toast relation */
	validIndex = toast_open_indexes(toastrel,
	lock,
	&toastidxs,
	&num_indexes);
	validIndexOid = RelationGetRelid(toastidxs[validIndex]);

	/* Close the toast relation and all its indexes */
	toast_close_indexes(toastidxs, num_indexes, NoLock);
	table_close(toastrel, NoLock);

	return validIndexOid;
	}

	/* ----------
	* toast_open_indexes
	*
	* Get an array of the indexes associated to the given toast relation
	* and return as well the position of the valid index used by the toast
	* relation in this array. It is the responsibility of the caller of this
	* function to close the indexes as well as free them.
	*/
	int
	toast_open_indexes(Relation toastrel,
	LOCKMODE lock,
	Relation **toastidxs,
	int *num_indexes)
	{
	int i = 0;
	int res = 0;
	bool found = false;
	List *indexlist;
	ListCell *lc;

	/* Get index list of the toast relation */
	indexlist = RelationGetIndexList(toastrel);
	Assert(indexlist != NIL);

	*num_indexes = list_length(indexlist);

	/* Open all the index relations */
	toastidxs = (Relation ) palloc(num_indexes sizeof(Relation));
	foreach(lc, indexlist)
	(*toastidxs)[i++] = index_open(lfirst_oid(lc), lock);

	/* Fetch the first valid index in list */
	for (i = 0; i < *num_indexes; i++)
	{
	Relation toastidx = (*toastidxs)[i];

	if (toastidx->rd_index->indisvalid)
	{
	res = i;
	found = true;
	break;
	}
	}

	/*
	* Free index list, not necessary anymore as relations are opened and a
	* valid index has been found.
	*/
	list_free(indexlist);

	/*
	* The toast relation should have one valid index, so something is going
	* wrong if there is nothing.
	*/
	if (!found)
	elog(ERROR, "no valid index found for toast relation with Oid %u",
	RelationGetRelid(toastrel));

	return res;
	}

	/* ----------
	* toast_close_indexes
	*
	* Close an array of indexes for a toast relation and free it. This should
	* be called for a set of indexes opened previously with toast_open_indexes.
	*/
	void
	toast_close_indexes(Relation *toastidxs, int num_indexes, LOCKMODE lock)
	{
	int i;

	/* Close relations and clean up things */
	for (i = 0; i < num_indexes; i++)
	index_close(toastidxs[i], lock);
	pfree(toastidxs);
	}

	/* ----------
	* init_toast_snapshot
	*
	* Initialize an appropriate TOAST snapshot. We must use an MVCC snapshot
	* to initialize the TOAST snapshot; since we don't know which one to use,
	* just use the oldest one. This is safe: at worst, we will get a "snapshot
	* too old" error that might have been avoided otherwise.
	*/
	void
	init_toast_snapshot(Snapshot toast_snapshot)
	{
	Snapshot snapshot = GetOldestSnapshot();

	/*
	* GetOldestSnapshot returns NULL if the session has no active snapshots.
	* We can get that if, for example, a procedure fetches a toasted value
	* into a local variable, commits, and then tries to detoast the value.
	* Such coding is unsafe, because once we commit there is nothing to
	* prevent the toast data from being deleted. Detoasting must happen in
	* the same transaction that originally fetched the toast pointer. Hence,
	* rather than trying to band-aid over the problem, throw an error. (This
	* is not very much protection, because in many scenarios the procedure
	* would have already created a new transaction snapshot, preventing us
	* from detecting the problem. But it's better than nothing, and for sure
	* we shouldn't expend code on masking the problem more.)
	*/
	if (snapshot == NULL)
	elog(ERROR, "cannot fetch toast data without an active snapshot");

	/*
	* Catalog snapshots can be returned by GetOldestSnapshot() even if not
	* registered or active. That easily hides bugs around not having a
	* snapshot set up - most of the time there is a valid catalog snapshot.
	* So additionally insist that the current snapshot is registered or
	* active.
	*/
	Assert(HaveRegisteredOrActiveSnapshot());

	InitToastSnapshot(*toast_snapshot, snapshot->lsn, snapshot->whenTaken);
	}