src/include/executor/hashjoin.h - hawq - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
  * regarding copyright ownership.  The ASF licenses this file
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
  *
  *   http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */

 /*-------------------------------------------------------------------------
  *
  * hashjoin.h
  *	  internal structures for hash joins
  *
  *
  * Portions Copyright (c) 2007-2008, Greenplum inc
  * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * $PostgreSQL: pgsql/src/include/executor/hashjoin.h,v 1.41 2006/07/13 18:01:02 momjian Exp $
  *
  *-------------------------------------------------------------------------
  */
 #ifndef HASHJOIN_H
 #define HASHJOIN_H

 #include "fmgr.h"
 #include "executor/execWorkfile.h"
 #include "cdb/cdbpublic.h"                 /* CdbExplain_Agg */
 #include "utils/workfile_mgr.h"

 struct StringInfoData;                  /* #include "lib/stringinfo.h" */


 /* ----------------------------------------------------------------
  *				hash-join hash table structures
  *
  * Each active hashjoin has a HashJoinTable control block, which is
  * palloc'd in the executor's per-query context.  All other storage needed
  * for the hashjoin is kept in private memory contexts, two for each hashjoin.
  * This makes it easy and fast to release the storage when we don't need it
  * anymore.  (Exception: data associated with the temp files lives in the
  * per-query context too, since we always call buffile.c in that context.)
  *
  * The hashtable contexts are made children of the per-query context, ensuring
  * that they will be discarded at end of statement even if the join is
  * aborted early by an error.  (Likewise, any temporary files we make will
  * be cleaned up by the virtual file manager in event of an error.)
  *
  * Storage that should live through the entire join is allocated from the
  * "hashCxt", while storage that is only wanted for the current batch is
  * allocated in the "batchCxt".  By resetting the batchCxt at the end of
  * each batch, we free all the per-batch storage reliably and without tedium.
  *
  * During first scan of inner relation, we get its tuples from executor.
  * If nbatch > 1 then tuples that don't belong in first batch get saved
  * into inner-batch temp files. The same statements apply for the
  * first scan of the outer relation, except we write tuples to outer-batch
  * temp files.	After finishing the first scan, we do the following for
  * each remaining batch:
  *	1. Read tuples from inner batch file, load into hash buckets.
  *	2. Read tuples from outer batch file, match to hash buckets and output.
  *
  * It is possible to increase nbatch on the fly if the in-memory hash table
  * gets too big.  The hash-value-to-batch computation is arranged so that this
  * can only cause a tuple to go into a later batch than previously thought,
  * never into an earlier batch.  When we increase nbatch, we rescan the hash
  * table and dump out any tuples that are now of a later batch to the correct
  * inner batch file.  Subsequently, while reading either inner or outer batch
  * files, we might find tuples that no longer belong to the current batch;
  * if so, we just dump them out to the correct batch file.
  * ----------------------------------------------------------------
  */

 /* these are in nodes/execnodes.h: */
 /* typedef struct HashJoinTupleData *HashJoinTuple; */
 /* typedef struct HashJoinTableData *HashJoinTable; */

 typedef struct HashJoinTupleData
 {
 	struct HashJoinTupleData *next;		/* link to next tuple in same bucket */
 	uint32		hashvalue;		/* tuple's hash code */
 	/* Tuple data, in MinimalTuple format, follows on a MAXALIGN boundary */
 } HashJoinTupleData;

 #define HJTUPLE_OVERHEAD  MAXALIGN(sizeof(HashJoinTupleData))
 #define HJTUPLE_MINTUPLE(hjtup)  \
 	((MemTuple) ((char *) (hjtup) + HJTUPLE_OVERHEAD))


 /* Statistics collection workareas for EXPLAIN ANALYZE */
 typedef struct HashJoinBatchStats
 {
     uint64      outerfilesize;
     uint64      innerfilesize;
     uint64      irdbytes;           /* inner bytes read from workfile */
     uint64      ordbytes;           /* outer bytes read from workfile */
     uint64      iwrbytes;           /* inner bytes written (to later batches) */
     uint64      owrbytes;           /* outer bytes written (to later batches) */
     uint64      hashspace_final;    /* work_mem for tuples kept in hash table */
     uint64      spillspace_in;      /* work_mem from lower batches to this one */
     uint64      spillspace_out;     /* work_mem from this batch to higher ones */
     uint64      spillrows_out;      /* rows spilled from this batch to higher */
 } HashJoinBatchStats;

 typedef struct HashJoinTableStats
 {
     struct StringInfoData  *joinexplainbuf; /* Join operator's report buf */
     HashJoinBatchStats     *batchstats;     /* -> array[0..nbatchstats-1] */
     int                     nbatchstats;    /* num of batchstats slots */
     int                     endedbatch;     /* index of last batch ended */

     /* These statistics are cumulative over all nontrivial batches... */
     int                     nonemptybatches;    /* num of nontrivial batches */
     Size                    workmem_max;        /* work_mem high water mark */
     CdbExplain_Agg          chainlength;        /* hash chain length stats */
 } HashJoinTableStats;


 /*
  * HashJoinBatchSide
  *
  * State of the outer or inner side of one batch.
  */
 typedef struct HashJoinBatchSide
 {
 	/*
 	 * A file is opened only when we first write a tuple into it
 	 * (otherwise its pointer remains NULL).  Note that the zero'th
 	 * batch never has files, since we will process rather than dump
 	 * out any tuples of batch zero.
 	 */
 	ExecWorkFile *workfile;
 	int total_tuples;
 } HashJoinBatchSide;


 /*
  * HashJoinBatchData
  *
  * State of one batch.
  */
 typedef struct HashJoinBatchData
 {
     Size                innerspace;     /* work_mem bytes for inner tuples */
     unsigned            innertuples;    /* inner number of tuples */

     HashJoinBatchSide   innerside;
     HashJoinBatchSide   outerside;
 } HashJoinBatchData;


 /*
  * HashJoinTableData
  */
 typedef struct HashJoinTableData
 {
 	int			nbuckets;		/* # buckets in the in-memory hash table */
 	/* buckets[i] is head of list of tuples in i'th in-memory bucket */
 	struct HashJoinTupleData **buckets;

 	BloomFilter bloomfilter;

 	/* buckets array is per-batch storage, as are all the tuples */

 	int			nbatch;			/* number of batches */
 	int			curbatch;		/* current batch #; 0 during 1st pass */

 	int			nbatch_original;	/* nbatch when we started inner scan */
 	int			nbatch_outstart;	/* nbatch when we started outer scan */

 	bool		growEnabled;	/* flag to shut off nbatch increases */

 	double		totalTuples;	/* # tuples obtained from inner plan */

 	HashJoinBatchData **batches;    /* array [0..nbatch-1] of ptr to HJBD */

 	/* Representation of all spill file names, for spill file reuse */
 	workfile_set * work_set;

 	ExecWorkFile * state_file;

 	/*
 	 * Info about the datatype-specific hash functions for the datatypes being
 	 * hashed.	We assume that the inner and outer sides of each hashclause
 	 * are the same type, or at least share the same hash function. This is an
 	 * array of the same length as the number of hash keys.
 	 */
 	FmgrInfo   *hashfunctions;	/* lookup data for hash functions */

 	bool	   *hashStrict;		/* is each hash join operator strict? */

 	Size		spaceAllowed;	/* upper limit for space used */

 	MemoryContext hashCxt;		/* context for whole-hash-join storage */
 	MemoryContext batchCxt;		/* context for this-batch-only storage */
 	MemoryContext bloomfilterCtx; /* context for Bloom filter */
 	MemoryContext bfCxt;		/* CDB */ /* context for temp buf file */

     HashJoinTableStats *stats;  /* statistics workarea for EXPLAIN ANALYZE */
     bool		eagerlyReleased; /* Has this hash-table been eagerly released? */

     HashJoinState * hjstate; /* reference to the enclosing HashJoinState */

 } HashJoinTableData;

 #endif   /* HASHJOIN_H */
	/*
	* Licensed to the Apache Software Foundation (ASF) under one
	* or more contributor license agreements. See the NOTICE file
	* distributed with this work for additional information
	* regarding copyright ownership. The ASF licenses this file
	* to you under the Apache License, Version 2.0 (the
	* "License"); you may not use this file except in compliance
	* with the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/

	/*-------------------------------------------------------------------------
	*
	* hashjoin.h
	* internal structures for hash joins
	*
	*
	* Portions Copyright (c) 2007-2008, Greenplum inc
	* Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
	* Portions Copyright (c) 1994, Regents of the University of California
	*
	* $PostgreSQL: pgsql/src/include/executor/hashjoin.h,v 1.41 2006/07/13 18:01:02 momjian Exp $
	*
	*-------------------------------------------------------------------------
	*/
	#ifndef HASHJOIN_H
	#define HASHJOIN_H

	#include "fmgr.h"
	#include "executor/execWorkfile.h"
	#include "cdb/cdbpublic.h" /* CdbExplain_Agg */
	#include "utils/workfile_mgr.h"

	struct StringInfoData; /* #include "lib/stringinfo.h" */


	/* ----------------------------------------------------------------
	* hash-join hash table structures
	*
	* Each active hashjoin has a HashJoinTable control block, which is
	* palloc'd in the executor's per-query context. All other storage needed
	* for the hashjoin is kept in private memory contexts, two for each hashjoin.
	* This makes it easy and fast to release the storage when we don't need it
	* anymore. (Exception: data associated with the temp files lives in the
	* per-query context too, since we always call buffile.c in that context.)
	*
	* The hashtable contexts are made children of the per-query context, ensuring
	* that they will be discarded at end of statement even if the join is
	* aborted early by an error. (Likewise, any temporary files we make will
	* be cleaned up by the virtual file manager in event of an error.)
	*
	* Storage that should live through the entire join is allocated from the
	* "hashCxt", while storage that is only wanted for the current batch is
	* allocated in the "batchCxt". By resetting the batchCxt at the end of
	* each batch, we free all the per-batch storage reliably and without tedium.
	*
	* During first scan of inner relation, we get its tuples from executor.
	* If nbatch > 1 then tuples that don't belong in first batch get saved
	* into inner-batch temp files. The same statements apply for the
	* first scan of the outer relation, except we write tuples to outer-batch
	* temp files. After finishing the first scan, we do the following for
	* each remaining batch:
	* 1. Read tuples from inner batch file, load into hash buckets.
	* 2. Read tuples from outer batch file, match to hash buckets and output.
	*
	* It is possible to increase nbatch on the fly if the in-memory hash table
	* gets too big. The hash-value-to-batch computation is arranged so that this
	* can only cause a tuple to go into a later batch than previously thought,
	* never into an earlier batch. When we increase nbatch, we rescan the hash
	* table and dump out any tuples that are now of a later batch to the correct
	* inner batch file. Subsequently, while reading either inner or outer batch
	* files, we might find tuples that no longer belong to the current batch;
	* if so, we just dump them out to the correct batch file.
	* ----------------------------------------------------------------
	*/

	/* these are in nodes/execnodes.h: */
	/* typedef struct HashJoinTupleData HashJoinTuple; /
	/* typedef struct HashJoinTableData HashJoinTable; /

	typedef struct HashJoinTupleData
	{
	struct HashJoinTupleData next; / link to next tuple in same bucket */
	uint32 hashvalue; /* tuple's hash code */
	/* Tuple data, in MinimalTuple format, follows on a MAXALIGN boundary */
	} HashJoinTupleData;

	#define HJTUPLE_OVERHEAD MAXALIGN(sizeof(HashJoinTupleData))
	#define HJTUPLE_MINTUPLE(hjtup) \
	((MemTuple) ((char *) (hjtup) + HJTUPLE_OVERHEAD))


	/* Statistics collection workareas for EXPLAIN ANALYZE */
	typedef struct HashJoinBatchStats
	{
	uint64 outerfilesize;
	uint64 innerfilesize;
	uint64 irdbytes; /* inner bytes read from workfile */
	uint64 ordbytes; /* outer bytes read from workfile */
	uint64 iwrbytes; /* inner bytes written (to later batches) */
	uint64 owrbytes; /* outer bytes written (to later batches) */
	uint64 hashspace_final; /* work_mem for tuples kept in hash table */
	uint64 spillspace_in; /* work_mem from lower batches to this one */
	uint64 spillspace_out; /* work_mem from this batch to higher ones */
	uint64 spillrows_out; /* rows spilled from this batch to higher */
	} HashJoinBatchStats;

	typedef struct HashJoinTableStats
	{
	struct StringInfoData joinexplainbuf; / Join operator's report buf */
	HashJoinBatchStats batchstats; / -> array[0..nbatchstats-1] */
	int nbatchstats; /* num of batchstats slots */
	int endedbatch; /* index of last batch ended */

	/* These statistics are cumulative over all nontrivial batches... */
	int nonemptybatches; /* num of nontrivial batches */
	Size workmem_max; /* work_mem high water mark */
	CdbExplain_Agg chainlength; /* hash chain length stats */
	} HashJoinTableStats;


	/*
	* HashJoinBatchSide
	*
	* State of the outer or inner side of one batch.
	*/
	typedef struct HashJoinBatchSide
	{
	/*
	* A file is opened only when we first write a tuple into it
	* (otherwise its pointer remains NULL). Note that the zero'th
	* batch never has files, since we will process rather than dump
	* out any tuples of batch zero.
	*/
	ExecWorkFile *workfile;
	int total_tuples;
	} HashJoinBatchSide;


	/*
	* HashJoinBatchData
	*
	* State of one batch.
	*/
	typedef struct HashJoinBatchData
	{
	Size innerspace; /* work_mem bytes for inner tuples */
	unsigned innertuples; /* inner number of tuples */

	HashJoinBatchSide innerside;
	HashJoinBatchSide outerside;
	} HashJoinBatchData;


	/*
	* HashJoinTableData
	*/
	typedef struct HashJoinTableData
	{
	int nbuckets; /* # buckets in the in-memory hash table */
	/* buckets[i] is head of list of tuples in i'th in-memory bucket */
	struct HashJoinTupleData **buckets;

	BloomFilter bloomfilter;

	/* buckets array is per-batch storage, as are all the tuples */

	int nbatch; /* number of batches */
	int curbatch; /* current batch #; 0 during 1st pass */

	int nbatch_original; /* nbatch when we started inner scan */
	int nbatch_outstart; /* nbatch when we started outer scan */

	bool growEnabled; /* flag to shut off nbatch increases */

	double totalTuples; /* # tuples obtained from inner plan */

	HashJoinBatchData *batches; / array [0..nbatch-1] of ptr to HJBD */

	/* Representation of all spill file names, for spill file reuse */
	workfile_set * work_set;

	ExecWorkFile * state_file;

	/*
	* Info about the datatype-specific hash functions for the datatypes being
	* hashed. We assume that the inner and outer sides of each hashclause
	* are the same type, or at least share the same hash function. This is an
	* array of the same length as the number of hash keys.
	*/
	FmgrInfo hashfunctions; / lookup data for hash functions */

	bool hashStrict; / is each hash join operator strict? */

	Size spaceAllowed; /* upper limit for space used */

	MemoryContext hashCxt; /* context for whole-hash-join storage */
	MemoryContext batchCxt; /* context for this-batch-only storage */
	MemoryContext bloomfilterCtx; /* context for Bloom filter */
	MemoryContext bfCxt; /* CDB / / context for temp buf file */

	HashJoinTableStats stats; / statistics workarea for EXPLAIN ANALYZE */
	bool eagerlyReleased; /* Has this hash-table been eagerly released? */

	HashJoinState * hjstate; /* reference to the enclosing HashJoinState */

	} HashJoinTableData;

	#endif /* HASHJOIN_H */