src/backend/access/tablesample/system.c - cloudberry - Git at Google

 /*-------------------------------------------------------------------------
  *
  * system.c
  *	  support routines for SYSTEM tablesample method
  *
  * To ensure repeatability of samples, it is necessary that selection of a
  * given tuple be history-independent; otherwise syncscanning would break
  * repeatability, to say nothing of logically-irrelevant maintenance such
  * as physical extension or shortening of the relation.
  *
  * To achieve that, we proceed by hashing each candidate block number together
  * with the active seed, and then selecting it if the hash is less than the
  * cutoff value computed from the selection probability by BeginSampleScan.
  *
  *
  * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
  *	  src/backend/access/tablesample/system.c
  *
  *-------------------------------------------------------------------------
  */

 #include "postgres.h"

 #include <math.h>

 #include "access/relscan.h"
 #include "access/tsmapi.h"
 #include "catalog/pg_type.h"
 #include "common/hashfn.h"
 #include "optimizer/optimizer.h"
 #include "utils/builtins.h"

 #include "optimizer/cost.h"

 /* Private state */
 typedef struct
 {
 	uint64		cutoff;			/* select blocks with hash less than this */
 	uint32		seed;			/* random seed */
 	BlockNumber nextblock;		/* next block to consider sampling */
 	OffsetNumber lt;			/* last tuple returned from current block */
 } SystemSamplerData;


 static void system_samplescangetsamplesize(PlannerInfo *root,
 										   RelOptInfo *baserel,
 										   List *paramexprs,
 										   BlockNumber *pages,
 										   double *tuples);
 static void system_initsamplescan(SampleScanState *node,
 								  int eflags);
 static void system_beginsamplescan(SampleScanState *node,
 								   Datum *params,
 								   int nparams,
 								   uint32 seed);
 BlockNumber system_nextsampleblock(SampleScanState *node, BlockNumber nblocks);
 static OffsetNumber system_nextsampletuple(SampleScanState *node,
 										   BlockNumber blockno,
 										   OffsetNumber maxoffset);


 /*
  * Create a TsmRoutine descriptor for the SYSTEM method.
  */
 Datum
 tsm_system_handler(PG_FUNCTION_ARGS)
 {
 	TsmRoutine *tsm = makeNode(TsmRoutine);

 	tsm->parameterTypes = list_make1_oid(FLOAT4OID);
 	tsm->repeatable_across_queries = true;
 	tsm->repeatable_across_scans = true;
 	tsm->SampleScanGetSampleSize = system_samplescangetsamplesize;
 	tsm->InitSampleScan = system_initsamplescan;
 	tsm->BeginSampleScan = system_beginsamplescan;
 	tsm->NextSampleBlock = system_nextsampleblock;
 	tsm->NextSampleTuple = system_nextsampletuple;
 	tsm->EndSampleScan = NULL;

 	PG_RETURN_POINTER(tsm);
 }

 /*
  * Sample size estimation.
  */
 static void
 system_samplescangetsamplesize(PlannerInfo *root,
 							   RelOptInfo *baserel,
 							   List *paramexprs,
 							   BlockNumber *pages,
 							   double *tuples)
 {
 	Node	   *pctnode;
 	float4		samplefract;

 	/* Try to extract an estimate for the sample percentage */
 	pctnode = (Node *) linitial(paramexprs);
 	pctnode = estimate_expression_value(root, pctnode);

 	if (IsA(pctnode, Const) &&
 		!((Const *) pctnode)->constisnull)
 	{
 		samplefract = DatumGetFloat4(((Const *) pctnode)->constvalue);
 		if (samplefract >= 0 && samplefract <= 100 && !isnan(samplefract))
 			samplefract /= 100.0f;
 		else
 		{
 			/* Default samplefract if the value is bogus */
 			samplefract = 0.1f;
 		}
 	}
 	else
 	{
 		/* Default samplefract if we didn't obtain a non-null Const */
 		samplefract = 0.1f;
 	}

 	/* We'll visit a sample of the pages ... */
 	*pages = clamp_row_est(baserel->pages * samplefract);

 	/* ... and hopefully get a representative number of tuples from them */
 	*tuples = clamp_row_est(baserel->tuples * samplefract);
 }

 /*
  * Initialize during executor setup.
  */
 static void
 system_initsamplescan(SampleScanState *node, int eflags)
 {
 	node->tsm_state = palloc0(sizeof(SystemSamplerData));
 }

 /*
  * Examine parameters and prepare for a sample scan.
  */
 static void
 system_beginsamplescan(SampleScanState *node,
 					   Datum *params,
 					   int nparams,
 					   uint32 seed)
 {
 	SystemSamplerData *sampler = (SystemSamplerData *) node->tsm_state;
 	double		percent = DatumGetFloat4(params[0]);
 	double		dcutoff;

 	if (percent < 0 || percent > 100 || isnan(percent))
 		ereport(ERROR,
 				(errcode(ERRCODE_INVALID_TABLESAMPLE_ARGUMENT),
 				 errmsg("sample percentage must be between 0 and 100")));

 	/*
 	 * The cutoff is sample probability times (PG_UINT32_MAX + 1); we have to
 	 * store that as a uint64, of course.  Note that this gives strictly
 	 * correct behavior at the limits of zero or one probability.
 	 */
 	dcutoff = rint(((double) PG_UINT32_MAX + 1) * percent / 100);
 	sampler->cutoff = (uint64) dcutoff;
 	sampler->seed = seed;
 	sampler->nextblock = 0;
 	sampler->lt = InvalidOffsetNumber;

 	/*
 	 * Bulkread buffer access strategy probably makes sense unless we're
 	 * scanning a very small fraction of the table.  The 1% cutoff here is a
 	 * guess.  We should use pagemode visibility checking, since we scan all
 	 * tuples on each selected page.
 	 */
 	node->use_bulkread = (percent >= 1);
 	node->use_pagemode = true;
 }

 /*
  * Select next block to sample.
  */
 BlockNumber
 system_nextsampleblock(SampleScanState *node, BlockNumber nblocks)
 {
 	SystemSamplerData *sampler = (SystemSamplerData *) node->tsm_state;
 	BlockNumber nextblock = sampler->nextblock;
 	uint32		hashinput[2];

 	/*
 	 * We compute the hash by applying hash_any to an array of 2 uint32's
 	 * containing the block number and seed.  This is efficient to set up, and
 	 * with the current implementation of hash_any, it gives
 	 * machine-independent results, which is a nice property for regression
 	 * testing.
 	 *
 	 * These words in the hash input are the same throughout the block:
 	 */
 	hashinput[1] = sampler->seed;

 	/*
 	 * Loop over block numbers until finding suitable block or reaching end of
 	 * relation.
 	 */
 	for (; nextblock < nblocks; nextblock++)
 	{
 		uint32		hash;

 		hashinput[0] = nextblock;

 		hash = DatumGetUInt32(hash_any((const unsigned char *) hashinput,
 									   (int) sizeof(hashinput)));
 		if (hash < sampler->cutoff)
 			break;
 	}

 	if (nextblock < nblocks)
 	{
 		/* Found a suitable block; remember where we should start next time */
 		sampler->nextblock = nextblock + 1;
 		return nextblock;
 	}

 	/* Done, but let's reset nextblock to 0 for safety. */
 	sampler->nextblock = 0;
 	return InvalidBlockNumber;
 }

 /*
  * Select next sampled tuple in current block.
  *
  * In block sampling, we just want to sample all the tuples in each selected
  * block.
  *
  * It is OK here to return an offset without knowing if the tuple is visible
  * (or even exists); nodeSamplescan.c will deal with that.
  *
  * When we reach end of the block, return InvalidOffsetNumber which tells
  * SampleScan to go to next block.
  */
 static OffsetNumber
 system_nextsampletuple(SampleScanState *node,
 					   BlockNumber blockno,
 					   OffsetNumber maxoffset)
 {
 	SystemSamplerData *sampler = (SystemSamplerData *) node->tsm_state;
 	OffsetNumber tupoffset = sampler->lt;

 	/* Advance to next possible offset on page */
 	if (tupoffset == InvalidOffsetNumber)
 		tupoffset = FirstOffsetNumber;
 	else
 		tupoffset++;

 	/* Done? */
 	if (tupoffset > maxoffset)
 		tupoffset = InvalidOffsetNumber;

 	sampler->lt = tupoffset;

 	return tupoffset;
 }
	/*-------------------------------------------------------------------------
	*
	* system.c
	* support routines for SYSTEM tablesample method
	*
	* To ensure repeatability of samples, it is necessary that selection of a
	* given tuple be history-independent; otherwise syncscanning would break
	* repeatability, to say nothing of logically-irrelevant maintenance such
	* as physical extension or shortening of the relation.
	*
	* To achieve that, we proceed by hashing each candidate block number together
	* with the active seed, and then selecting it if the hash is less than the
	* cutoff value computed from the selection probability by BeginSampleScan.
	*
	*
	* Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
	* Portions Copyright (c) 1994, Regents of the University of California
	*
	* IDENTIFICATION
	* src/backend/access/tablesample/system.c
	*
	*-------------------------------------------------------------------------
	*/

	#include "postgres.h"

	#include <math.h>

	#include "access/relscan.h"
	#include "access/tsmapi.h"
	#include "catalog/pg_type.h"
	#include "common/hashfn.h"
	#include "optimizer/optimizer.h"
	#include "utils/builtins.h"

	#include "optimizer/cost.h"

	/* Private state */
	typedef struct
	{
	uint64 cutoff; /* select blocks with hash less than this */
	uint32 seed; /* random seed */
	BlockNumber nextblock; /* next block to consider sampling */
	OffsetNumber lt; /* last tuple returned from current block */
	} SystemSamplerData;


	static void system_samplescangetsamplesize(PlannerInfo *root,
	RelOptInfo *baserel,
	List *paramexprs,
	BlockNumber *pages,
	double *tuples);
	static void system_initsamplescan(SampleScanState *node,
	int eflags);
	static void system_beginsamplescan(SampleScanState *node,
	Datum *params,
	int nparams,
	uint32 seed);
	BlockNumber system_nextsampleblock(SampleScanState *node, BlockNumber nblocks);
	static OffsetNumber system_nextsampletuple(SampleScanState *node,
	BlockNumber blockno,
	OffsetNumber maxoffset);


	/*
	* Create a TsmRoutine descriptor for the SYSTEM method.
	*/
	Datum
	tsm_system_handler(PG_FUNCTION_ARGS)
	{
	TsmRoutine *tsm = makeNode(TsmRoutine);

	tsm->parameterTypes = list_make1_oid(FLOAT4OID);
	tsm->repeatable_across_queries = true;
	tsm->repeatable_across_scans = true;
	tsm->SampleScanGetSampleSize = system_samplescangetsamplesize;
	tsm->InitSampleScan = system_initsamplescan;
	tsm->BeginSampleScan = system_beginsamplescan;
	tsm->NextSampleBlock = system_nextsampleblock;
	tsm->NextSampleTuple = system_nextsampletuple;
	tsm->EndSampleScan = NULL;

	PG_RETURN_POINTER(tsm);
	}

	/*
	* Sample size estimation.
	*/
	static void
	system_samplescangetsamplesize(PlannerInfo *root,
	RelOptInfo *baserel,
	List *paramexprs,
	BlockNumber *pages,
	double *tuples)
	{
	Node *pctnode;
	float4 samplefract;

	/* Try to extract an estimate for the sample percentage */
	pctnode = (Node *) linitial(paramexprs);
	pctnode = estimate_expression_value(root, pctnode);

	if (IsA(pctnode, Const) &&
	!((Const *) pctnode)->constisnull)
	{
	samplefract = DatumGetFloat4(((Const *) pctnode)->constvalue);
	if (samplefract >= 0 && samplefract <= 100 && !isnan(samplefract))
	samplefract /= 100.0f;
	else
	{
	/* Default samplefract if the value is bogus */
	samplefract = 0.1f;
	}
	}
	else
	{
	/* Default samplefract if we didn't obtain a non-null Const */
	samplefract = 0.1f;
	}

	/* We'll visit a sample of the pages ... */
	pages = clamp_row_est(baserel->pages samplefract);

	/* ... and hopefully get a representative number of tuples from them */
	tuples = clamp_row_est(baserel->tuples samplefract);
	}

	/*
	* Initialize during executor setup.
	*/
	static void
	system_initsamplescan(SampleScanState *node, int eflags)
	{
	node->tsm_state = palloc0(sizeof(SystemSamplerData));
	}

	/*
	* Examine parameters and prepare for a sample scan.
	*/
	static void
	system_beginsamplescan(SampleScanState *node,
	Datum *params,
	int nparams,
	uint32 seed)
	{
	SystemSamplerData sampler = (SystemSamplerData ) node->tsm_state;
	double percent = DatumGetFloat4(params[0]);
	double dcutoff;

	if (percent < 0 \|\| percent > 100 \|\| isnan(percent))
	ereport(ERROR,
	(errcode(ERRCODE_INVALID_TABLESAMPLE_ARGUMENT),
	errmsg("sample percentage must be between 0 and 100")));

	/*
	* The cutoff is sample probability times (PG_UINT32_MAX + 1); we have to
	* store that as a uint64, of course. Note that this gives strictly
	* correct behavior at the limits of zero or one probability.
	*/
	dcutoff = rint(((double) PG_UINT32_MAX + 1) * percent / 100);
	sampler->cutoff = (uint64) dcutoff;
	sampler->seed = seed;
	sampler->nextblock = 0;
	sampler->lt = InvalidOffsetNumber;

	/*
	* Bulkread buffer access strategy probably makes sense unless we're
	* scanning a very small fraction of the table. The 1% cutoff here is a
	* guess. We should use pagemode visibility checking, since we scan all
	* tuples on each selected page.
	*/
	node->use_bulkread = (percent >= 1);
	node->use_pagemode = true;
	}

	/*
	* Select next block to sample.
	*/
	BlockNumber
	system_nextsampleblock(SampleScanState *node, BlockNumber nblocks)
	{
	SystemSamplerData sampler = (SystemSamplerData ) node->tsm_state;
	BlockNumber nextblock = sampler->nextblock;
	uint32 hashinput[2];

	/*
	* We compute the hash by applying hash_any to an array of 2 uint32's
	* containing the block number and seed. This is efficient to set up, and
	* with the current implementation of hash_any, it gives
	* machine-independent results, which is a nice property for regression
	* testing.
	*
	* These words in the hash input are the same throughout the block:
	*/
	hashinput[1] = sampler->seed;

	/*
	* Loop over block numbers until finding suitable block or reaching end of
	* relation.
	*/
	for (; nextblock < nblocks; nextblock++)
	{
	uint32 hash;

	hashinput[0] = nextblock;

	hash = DatumGetUInt32(hash_any((const unsigned char *) hashinput,
	(int) sizeof(hashinput)));
	if (hash < sampler->cutoff)
	break;
	}

	if (nextblock < nblocks)
	{
	/* Found a suitable block; remember where we should start next time */
	sampler->nextblock = nextblock + 1;
	return nextblock;
	}

	/* Done, but let's reset nextblock to 0 for safety. */
	sampler->nextblock = 0;
	return InvalidBlockNumber;
	}

	/*
	* Select next sampled tuple in current block.
	*
	* In block sampling, we just want to sample all the tuples in each selected
	* block.
	*
	* It is OK here to return an offset without knowing if the tuple is visible
	* (or even exists); nodeSamplescan.c will deal with that.
	*
	* When we reach end of the block, return InvalidOffsetNumber which tells
	* SampleScan to go to next block.
	*/
	static OffsetNumber
	system_nextsampletuple(SampleScanState *node,
	BlockNumber blockno,
	OffsetNumber maxoffset)
	{
	SystemSamplerData sampler = (SystemSamplerData ) node->tsm_state;
	OffsetNumber tupoffset = sampler->lt;

	/* Advance to next possible offset on page */
	if (tupoffset == InvalidOffsetNumber)
	tupoffset = FirstOffsetNumber;
	else
	tupoffset++;

	/* Done? */
	if (tupoffset > maxoffset)
	tupoffset = InvalidOffsetNumber;

	sampler->lt = tupoffset;

	return tupoffset;
	}