blob: 9e250c8b32652dc2da2557b7cb50167c71bd4433 [file] [log] [blame]
/*
* gp_partition_functions.c
* Define dynamic partition selection related functions in GPDB.
*
* gp_partition_propagation: This function accumulates unique partition
* oids for a specified dynamic table scan. A dynamic table scan node
* will be executed only after this function is called.
*
* gp_partition_selection: This function finds the child partition of
* a given parent partition oid, which satisfies a given partition
* key value.
*
* gp_partition_expansion: This function finds all child partition oids
* for the given parent oid.
*
* gp_partition_inverse: This function returns all child partitition oids
* with their constarints for a given parent oid.
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*
*/
#include "postgres.h"
#include "access/heapam.h"
#include "cdb/cdbpartition.h"
#include "funcapi.h"
#include "nodes/execnodes.h"
#include "utils/array.h"
#include "utils/hsearch.h"
#include "utils/builtins.h"
#include "utils/memutils.h"
#include "utils/palloc.h"
#include "utils/elog.h"
#include "utils/guc.h"
#define PARTITION_INVERSE_RECORD_NUM_ATTRS 5
#define PARTITION_INVERSE_RECORD_PARCHILDRELID_ATTNO 1
#define PARTITION_INVERSE_RECORD_MINKEY_ATTNO 2
#define PARTITION_INVERSE_RECORD_MININCLUDED_ATTNO 3
#define PARTITION_INVERSE_RECORD_MAXKEY_ATTNO 4
#define PARTITION_INVERSE_RECORD_MAXINCLUDED_ATTNO 5
/*
* increaseScanArraySize
* Increase the array size for dynamic table scans.
*
* The final array size is the maximum of the following two values:
* (1) (dynamicTableScanInfo->numScans + NUM_PID_INDEXES_ADDED)
* (2) newMaxPartIndex + 1.
*/
static void
increaseScanArraySize(int newMaxPartIndex)
{
int oldNumScans = dynamicTableScanInfo->numScans;
int newNumScans = oldNumScans + NUM_PID_INDEXES_ADDED;
if (newNumScans < newMaxPartIndex)
{
newNumScans = newMaxPartIndex;
}
dynamicTableScanInfo->numScans = newNumScans;
if (dynamicTableScanInfo->pidIndexes == NULL)
{
dynamicTableScanInfo->pidIndexes = (HTAB **)
palloc0(dynamicTableScanInfo->numScans * sizeof(HTAB*));
Assert(dynamicTableScanInfo->iterators == NULL);
dynamicTableScanInfo->iterators = palloc0(dynamicTableScanInfo->numScans * sizeof(DynamicPartitionIterator*));
}
else
{
dynamicTableScanInfo->pidIndexes = (HTAB **)
repalloc(dynamicTableScanInfo->pidIndexes,
dynamicTableScanInfo->numScans * sizeof(HTAB*));
dynamicTableScanInfo->iterators = repalloc(dynamicTableScanInfo->iterators,
dynamicTableScanInfo->numScans * sizeof(DynamicPartitionIterator*));
for (int scanNo = oldNumScans; scanNo < dynamicTableScanInfo->numScans; scanNo++)
{
dynamicTableScanInfo->pidIndexes[scanNo] = NULL;
dynamicTableScanInfo->iterators[scanNo] = NULL;
}
}
}
/*
* createPidIndex
* Create the pid index for a given dynamic table scan.
*/
static HTAB *
createPidIndex(int index)
{
Assert((dynamicTableScanInfo->pidIndexes)[index - 1] == NULL);
HASHCTL hashCtl;
MemSet(&hashCtl, 0, sizeof(HASHCTL));
hashCtl.keysize = sizeof(Oid);
hashCtl.entrysize = sizeof(PartOidEntry);
hashCtl.hash = oid_hash;
hashCtl.hcxt = dynamicTableScanInfo->memoryContext;
return hash_create("Dynamic Table Scan Pid Index",
INITIAL_NUM_PIDS,
&hashCtl,
HASH_ELEM | HASH_CONTEXT | HASH_FUNCTION);
}
/*
* InsertPidIntoDynamicTableScanInfo
* Inserts a partition oid into the dynamicTableScanInfo's
* pidIndexes at the provided index. If partOid is an invalid
* oid, it doesn't insert that, but ensures that the dynahash
* exists at the index position in dynamicTableScanInfo.
*/
void
InsertPidIntoDynamicTableScanInfo(int32 index, Oid partOid, int32 selectorId)
{
Assert(dynamicTableScanInfo != NULL &&
dynamicTableScanInfo->memoryContext != NULL);
/* It's 1 based indexing */
Assert(index > 0);
MemoryContext oldCxt = MemoryContextSwitchTo(dynamicTableScanInfo->memoryContext);
if (index > dynamicTableScanInfo->numScans)
{
increaseScanArraySize(index);
}
Assert(index <= dynamicTableScanInfo->numScans);
if ((dynamicTableScanInfo->pidIndexes)[index - 1] == NULL)
{
dynamicTableScanInfo->pidIndexes[index - 1] = createPidIndex(index);
}
Assert(dynamicTableScanInfo->pidIndexes[index - 1] != NULL);
if (partOid != InvalidOid)
{
bool found = false;
PartOidEntry *hashEntry =
hash_search(dynamicTableScanInfo->pidIndexes[index - 1],
&partOid, HASH_ENTER, &found);
if (found)
{
Assert(hashEntry->partOid == partOid);
Assert(NIL != hashEntry->selectorList);
hashEntry->selectorList = list_append_unique_int(hashEntry->selectorList, selectorId);
}
else
{
hashEntry->partOid = partOid;
hashEntry->selectorList = list_make1_int(selectorId);
}
}
MemoryContextSwitchTo(oldCxt);
}
PG_FUNCTION_INFO_V1(gp_partition_propagation);
/*
* gp_partition_propagation
* Insert a partition oid into its pid-index.
*/
Datum
gp_partition_propagation(PG_FUNCTION_ARGS)
{
int32 index = PG_GETARG_INT32(0);
Oid partOid = PG_GETARG_OID(1);
InsertPidIntoDynamicTableScanInfo(index, partOid, InvalidPartitionSelectorId);
PG_RETURN_VOID();
}
PG_FUNCTION_INFO_V1(gp_partition_selection);
/*
* gp_partition_selection
* Find the child partition oid for a given parent partition, which
* satisfies the given partition key value.
*
* This function assumes that there is only one partition key in this level.
*
* If no such a child partition is found, return NULL.
*/
Datum
gp_partition_selection(PG_FUNCTION_ARGS)
{
Oid parentOid = PG_GETARG_OID(0);
Assert(dynamicTableScanInfo != NULL);
Assert(dynamicTableScanInfo->memoryContext != NULL);
if (dynamicTableScanInfo->partsMetadata == NULL)
{
PG_RETURN_NULL();
}
PartitionNode *partsAndRules = NULL;
PartitionAccessMethods *accessMethods = NULL;
findPartitionMetadataEntry(dynamicTableScanInfo->partsMetadata,
parentOid,
&partsAndRules,
&accessMethods);
if (NULL == partsAndRules)
{
PG_RETURN_NULL();
}
Assert(partsAndRules != NULL);
Assert(accessMethods != NULL);
Partition *part = partsAndRules->part;
Assert(part->parnatts == 1);
AttrNumber partAttno = part->paratts[0];
Relation rel = relation_open(parentOid, NoLock);
TupleDesc tupDesc = RelationGetDescr(rel);
Assert(tupDesc->natts >= partAttno);
Datum *values = NULL;
bool *isnull = NULL;
createValueArrays(partAttno, &values, &isnull);
isnull[partAttno - 1] = PG_ARGISNULL(1);
if (!isnull[partAttno - 1])
{
values[partAttno - 1] = PG_GETARG_DATUM(1);
}
/* set the memory context for the access methods */
accessMethods->part_cxt = dynamicTableScanInfo->memoryContext;
MemoryContext oldCxt = MemoryContextSwitchTo(dynamicTableScanInfo->memoryContext);
Oid childOid = selectPartition(partsAndRules,
values,
isnull,
tupDesc,
accessMethods);
MemoryContextSwitchTo(oldCxt);
freeValueArrays(values, isnull);
relation_close(rel, NoLock);
/*
* There might not be a child partition that satisfies the given
* value. In that case, this function returns NULL.
*/
if (OidIsValid(childOid))
{
PG_RETURN_OID(childOid);
}
PG_RETURN_NULL();
}
/*
* PartitionIterator
* Contains the state that are necessary to iterate through all
* child partitions, one at a time.
*
* This is used by set-returning partition functions.
*/
typedef struct PartitionIterator
{
PartitionNode *partsAndRules;
/*
* The cell to the next PartitionRule.
*/
ListCell *nextRuleCell;
/*
* The current child partition that is being processed.
*/
PartitionRule *currentRule;
/*
* Indicate whether the information about the default partition
* has been returned.
*/
bool defaultPartReturned;
} PartitionIterator;
/*
* createPartitionIterator
* create a new PartitionIterator object for a given parent oid.
*
* The metadata information for the given parent oid is found in
* dynamicTableScanInfo.
*/
static PartitionIterator *
createPartitionIterator(Oid parentOid)
{
PartitionIterator *partitionIterator = palloc(sizeof(PartitionIterator));
PartitionAccessMethods *accessMethods = NULL;
findPartitionMetadataEntry(dynamicTableScanInfo->partsMetadata,
parentOid,
&(partitionIterator->partsAndRules),
&accessMethods);
partitionIterator->currentRule = NULL;
partitionIterator->nextRuleCell = NULL;
Assert(NULL != partitionIterator->partsAndRules);
partitionIterator->nextRuleCell = list_head(partitionIterator->partsAndRules->rules);
partitionIterator->defaultPartReturned = true;
if (NULL != partitionIterator->partsAndRules->default_part)
{
partitionIterator->defaultPartReturned = false;
}
return partitionIterator;
}
PG_FUNCTION_INFO_V1(gp_partition_expansion);
/*
* gp_partition_expansion
* Find all child partition oids for the given parent oid.
*
* This function is a set-returning function, returning a set of
* child oids.
*/
Datum
gp_partition_expansion(PG_FUNCTION_ARGS)
{
FuncCallContext *funcCallContext = NULL;
/*
* Setup the function call context for set-returning functions.
* At the first time of calling this function, we find the partition
* metadata for the given parent oid, and store that in an PartitionIterator
* structure.
*/
if (SRF_IS_FIRSTCALL())
{
funcCallContext = SRF_FIRSTCALL_INIT();
Oid parentOid = PG_GETARG_OID(0);
MemoryContext oldContext = MemoryContextSwitchTo(funcCallContext->multi_call_memory_ctx);
funcCallContext->user_fctx = createPartitionIterator(parentOid);
MemoryContextSwitchTo(oldContext);
}
funcCallContext = SRF_PERCALL_SETUP();
PartitionIterator *partitionIterator = (PartitionIterator *)funcCallContext->user_fctx;
Assert(partitionIterator != NULL);
ListCell *ruleCell = partitionIterator->nextRuleCell;
if (ruleCell != NULL)
{
partitionIterator->nextRuleCell = lnext(ruleCell);
partitionIterator->currentRule = (PartitionRule *)lfirst(ruleCell);
Oid childOid = partitionIterator->currentRule->parchildrelid;
SRF_RETURN_NEXT(funcCallContext, ObjectIdGetDatum(childOid));
}
/*
* Return default partition oid if any.
*/
if (!partitionIterator->defaultPartReturned)
{
Assert(NULL != partitionIterator->partsAndRules);
Assert(NULL != partitionIterator->partsAndRules->default_part);
PartitionRule *defaultPart = partitionIterator->partsAndRules->default_part;
Oid childOid = defaultPart->parchildrelid;
partitionIterator->defaultPartReturned = true;
SRF_RETURN_NEXT(funcCallContext, ObjectIdGetDatum(childOid));
}
pfree(partitionIterator);
SRF_RETURN_DONE(funcCallContext);
}
/*
* createInverseTupleDesc
* Create a tuple descriptor for the record returned by gp_partition_inverse.
*
* The record has the following format:
* Oid: child partition oid
* typeOid: the date type for the low end of a range partition;
* the data type for the value in a list partition
* bool: whether to include the low end of a range partition;
* always true for a list partition
* typeOid: used by range partitions only;
* represents the data type for the high end of a range partition
* bool: used by range partitions only;
* represents whether to include the high end of a range partition.
*/
static TupleDesc
createInverseTupleDesc(Oid typeOid, int32 typeMod)
{
TupleDesc tupleDesc = CreateTemplateTupleDesc(PARTITION_INVERSE_RECORD_NUM_ATTRS, false);
TupleDescInitEntry(tupleDesc, (AttrNumber) PARTITION_INVERSE_RECORD_PARCHILDRELID_ATTNO,
"partchildrelid", OIDOID, -1, 0);
TupleDescInitEntry(tupleDesc, (AttrNumber) PARTITION_INVERSE_RECORD_MINKEY_ATTNO,
"minkey", typeOid, typeMod, 0);
TupleDescInitEntry(tupleDesc, (AttrNumber) PARTITION_INVERSE_RECORD_MININCLUDED_ATTNO,
"minincluded", BOOLOID, -1, 0);
TupleDescInitEntry(tupleDesc, (AttrNumber) PARTITION_INVERSE_RECORD_MAXKEY_ATTNO,
"maxkey", typeOid, typeMod, 0);
TupleDescInitEntry(tupleDesc, (AttrNumber) PARTITION_INVERSE_RECORD_MAXINCLUDED_ATTNO,
"maxincluded", BOOLOID, -1, 0);
return tupleDesc;
}
/*
* InverseContext
* Context data for gp_partition_inverse function.
*
* This is the base structure to maintain context information for
* various partition types.
*/
typedef struct InverseContext InverseContext;
struct InverseContext
{
/*
* The iterator to iterate through all child partitions,
* one at a time.
*/
PartitionIterator *partitionIterator;
/*
* The arrays to hold output record.
*/
Datum values[PARTITION_INVERSE_RECORD_NUM_ATTRS];
bool nulls[PARTITION_INVERSE_RECORD_NUM_ATTRS];
/*
* The pointer to the function that produces the next output record.
* The function returns false when no record is found. Otherwise, this
* function returns true.
*/
bool (*findNextRecord)(InverseContext *inverseContext);
};
/*
* InverseContextForRange
* Context data for gp_partition_inverse function on range partitions.
*/
typedef InverseContext InverseContextForRange;
/*
* InverseContextForList
* Context data for gp_partition_inverse function on list partitions.
*/
typedef struct InverseContextForList
{
InverseContext context;
/*
* The cell for the next value in a list partition.
*/
ListCell *listValueCell;
}InverseContextForList;
/*
* setInverseRecordForRange
* Set the record value array for the inverse function on a range partition, based
* on the given partition rule.
*
* This function does not handle the default partition.
*
* Range partitions can be of the form:
*
* (-inf ,e], (-inf, e), (s, e), [s, e], (s,e], [s,e), (s,inf),
* and [s, inf).
*/
static void
setInverseRecordForRange(PartitionRule *rule,
Datum *values,
bool *nulls,
int numAttrs)
{
Assert(numAttrs == PARTITION_INVERSE_RECORD_NUM_ATTRS);
Assert(rule != NULL);
/* Default partitions should not be handled here. */
Assert(!rule->parisdefault);
MemSet(nulls, true, sizeof(bool) * PARTITION_INVERSE_RECORD_NUM_ATTRS);
MemSet(values, 0, sizeof(Datum) * PARTITION_INVERSE_RECORD_NUM_ATTRS);
if (NULL != rule->parrangestart)
{
Assert(IsA(rule->parrangestart, List) &&
list_length((List *)rule->parrangestart) == 1);
Node *rangeStart = (Node *)linitial((List *)rule->parrangestart);
Assert(IsA(rangeStart, Const));
Const *rangeStartConst = (Const *)rangeStart;
values[PARTITION_INVERSE_RECORD_MINKEY_ATTNO - 1] = rangeStartConst->constvalue;
nulls[PARTITION_INVERSE_RECORD_MINKEY_ATTNO - 1] = rangeStartConst->constisnull;
values[PARTITION_INVERSE_RECORD_MININCLUDED_ATTNO - 1] = BoolGetDatum(rule->parrangestartincl);
nulls[PARTITION_INVERSE_RECORD_MININCLUDED_ATTNO - 1] = false;
}
if (NULL != rule->parrangeend)
{
Assert(IsA(rule->parrangeend, List) &&
list_length((List *)rule->parrangeend) == 1);
Node *rangeEnd = (Node *)linitial((List *)rule->parrangeend);
Assert(IsA(rangeEnd, Const));
Const *rangeEndConst = (Const *)rangeEnd;
values[PARTITION_INVERSE_RECORD_MAXKEY_ATTNO - 1] = rangeEndConst->constvalue;
nulls[PARTITION_INVERSE_RECORD_MAXKEY_ATTNO - 1] = rangeEndConst->constisnull;
values[PARTITION_INVERSE_RECORD_MAXINCLUDED_ATTNO - 1] = BoolGetDatum(rule->parrangeendincl);
nulls[PARTITION_INVERSE_RECORD_MAXKEY_ATTNO - 1] = false;
}
values[PARTITION_INVERSE_RECORD_PARCHILDRELID_ATTNO - 1] = ObjectIdGetDatum(rule->parchildrelid);
nulls[PARTITION_INVERSE_RECORD_PARCHILDRELID_ATTNO - 1] = false;
}
/*
* setInverseRecordForList
* Set the record value array for the inverse function on a list partition, based
* on the given partition rule.
*
* This function only supports single-column partition key in the partition level.
*/
static void
setInverseRecordForList(PartitionRule *rule,
ListCell *listValueCell,
Datum *values,
bool *nulls,
int numAttrs)
{
Assert(numAttrs == PARTITION_INVERSE_RECORD_NUM_ATTRS);
Assert(rule != NULL &&
rule->parlistvalues != NULL &&
listValueCell != NULL);
/*
* Note that in partition rule, list values are stored in a list of lists to support
* multi-column partitions.
*/
List *listValue = (List *)lfirst(listValueCell);
/* This function only supports single-column partition key. */
Assert(list_length(listValue) == 1);
Const *listValueConst = (Const *)linitial(listValue);
Assert(IsA(listValueConst, Const));
values[PARTITION_INVERSE_RECORD_PARCHILDRELID_ATTNO - 1] = ObjectIdGetDatum(rule->parchildrelid);
nulls[PARTITION_INVERSE_RECORD_PARCHILDRELID_ATTNO - 1] = false;
values[PARTITION_INVERSE_RECORD_MINKEY_ATTNO - 1] = listValueConst->constvalue;
nulls[PARTITION_INVERSE_RECORD_MINKEY_ATTNO - 1] = listValueConst->constisnull;
values[PARTITION_INVERSE_RECORD_MININCLUDED_ATTNO - 1] = BoolGetDatum(true);
nulls[PARTITION_INVERSE_RECORD_MININCLUDED_ATTNO - 1] = false;
values[PARTITION_INVERSE_RECORD_MAXKEY_ATTNO - 1] = listValueConst->constvalue;
nulls[PARTITION_INVERSE_RECORD_MAXKEY_ATTNO - 1] = false;
values[PARTITION_INVERSE_RECORD_MAXINCLUDED_ATTNO - 1] = BoolGetDatum(true);
nulls[PARTITION_INVERSE_RECORD_MAXINCLUDED_ATTNO - 1] = false;
}
/*
* setInverseRecordForDefaultPart
* Set the record value array for the inverse function on both range and list default partitions.
*
* The default partition does not contain any constraint information,
* this function simple returns the default partition oid with null values on other
* columns in the return record.
*/
static void
setInverseRecordForDefaultPart(PartitionRule *rule,
Datum *values,
bool *nulls,
int numAttrs)
{
Assert(numAttrs == PARTITION_INVERSE_RECORD_NUM_ATTRS);
Assert(rule != NULL &&
((rule->parrangestart == NULL &&
rule->parrangeend == NULL) ||
(rule->parlistvalues == NULL)));
MemSet(nulls, true, sizeof(bool) * PARTITION_INVERSE_RECORD_NUM_ATTRS);
MemSet(values, 0, sizeof(Datum) * PARTITION_INVERSE_RECORD_NUM_ATTRS);
values[PARTITION_INVERSE_RECORD_PARCHILDRELID_ATTNO - 1] = ObjectIdGetDatum(rule->parchildrelid);
nulls[PARTITION_INVERSE_RECORD_PARCHILDRELID_ATTNO - 1] = false;
}
/*
* findNextRecordForRange
* Find the next return record for range partitions in gp_partition_inverse() calls,
* and store the record in the given values/nulls array pair.
*
* This function returns false when no record is found. Otherwise, this function
* returns true.
*/
static bool
findNextRecordForRange(InverseContext *inverseContext)
{
Assert(inverseContext != NULL &&
inverseContext->partitionIterator);
PartitionIterator *partitionIterator = inverseContext->partitionIterator;
bool hasNext = false;
ListCell *ruleCell = partitionIterator->nextRuleCell;
if (ruleCell != NULL)
{
partitionIterator->nextRuleCell = lnext(ruleCell);
partitionIterator->currentRule = (PartitionRule *)lfirst(ruleCell);
setInverseRecordForRange(partitionIterator->currentRule,
inverseContext->values,
inverseContext->nulls,
PARTITION_INVERSE_RECORD_NUM_ATTRS);
hasNext = true;
}
/* Return the default partition if any. */
else if (!partitionIterator->defaultPartReturned)
{
Assert(NULL != partitionIterator->partsAndRules);
Assert(NULL != partitionIterator->partsAndRules->default_part);
PartitionRule *defaultPart = partitionIterator->partsAndRules->default_part;
setInverseRecordForDefaultPart(defaultPart,
inverseContext->values,
inverseContext->nulls,
PARTITION_INVERSE_RECORD_NUM_ATTRS);
partitionIterator->defaultPartReturned = true;
hasNext = true;
}
return hasNext;
}
/*
* findNextRecordForList
* Find the next return record for list partitions in gp_partition_inverse() calls,
* and store it into the values/nulls array pair.
*
* This function returns false when no record is found. Otherwise, this function
* returns true.
*/
static bool
findNextRecordForList(InverseContext *inverseContext)
{
Assert(inverseContext != NULL);
PartitionIterator *partitionIterator = inverseContext->partitionIterator;
Assert(partitionIterator != NULL);
InverseContextForList *contextForList = (InverseContextForList *)inverseContext;
bool hasNext = false;
/*
* There might be multiple values for the same partition. Each call to
* gp_partition_inverse() will produce a record containing one such value.
*/
if (contextForList->listValueCell != NULL)
{
ListCell *currentListValueCell = contextForList->listValueCell;
contextForList->listValueCell = lnext(contextForList->listValueCell);
setInverseRecordForList(partitionIterator->currentRule, currentListValueCell,
inverseContext->values, inverseContext->nulls,
PARTITION_INVERSE_RECORD_NUM_ATTRS);
hasNext = true;
}
/*
* After processing all values in a partition, if there are more partitions
* left, process the next one.
*/
else if (partitionIterator->nextRuleCell != NULL)
{
ListCell *ruleCell = partitionIterator->nextRuleCell;
partitionIterator->nextRuleCell = lnext(ruleCell);
partitionIterator->currentRule = (PartitionRule *)lfirst(ruleCell);
Assert(partitionIterator->currentRule->parlistvalues != NULL);
ListCell *currentListValueCell = list_head(partitionIterator->currentRule->parlistvalues);
contextForList->listValueCell = lnext(currentListValueCell);
setInverseRecordForList(partitionIterator->currentRule, currentListValueCell,
inverseContext->values, inverseContext->nulls,
PARTITION_INVERSE_RECORD_NUM_ATTRS);
hasNext = true;
}
/* Return the default partition if any. */
else if (!partitionIterator->defaultPartReturned)
{
Assert(NULL != partitionIterator->partsAndRules);
Assert(NULL != partitionIterator->partsAndRules->default_part);
PartitionRule *defaultPart = partitionIterator->partsAndRules->default_part;
setInverseRecordForDefaultPart(defaultPart,
inverseContext->values,
inverseContext->nulls,
PARTITION_INVERSE_RECORD_NUM_ATTRS);
partitionIterator->defaultPartReturned = true;
hasNext = true;
}
return hasNext;
}
/*
* createInverseContext
* Create the context for gp_partition_inverse for a given parent oid.
*/
static InverseContext*
createInverseContext(Oid parentOid)
{
InverseContext *inverseContext = NULL;
PartitionIterator *partitionIterator = createPartitionIterator(parentOid);
Assert(NULL != partitionIterator->partsAndRules);
Assert(NULL != partitionIterator->partsAndRules->part);
switch(partitionIterator->partsAndRules->part->parkind)
{
case 'r':
inverseContext = palloc(sizeof(InverseContextForRange));
inverseContext->partitionIterator = partitionIterator;
inverseContext->findNextRecord = findNextRecordForRange;
break;
case 'l':
inverseContext = palloc(sizeof(InverseContextForList));
inverseContext->partitionIterator = partitionIterator;
inverseContext->findNextRecord = findNextRecordForList;
((InverseContextForList *)inverseContext)->listValueCell = NULL;
break;
default:
elog(ERROR, "partitioning kind '%c' not allowed",
partitionIterator->partsAndRules->part->parkind);
}
return inverseContext;
}
/*
* freeInverseContext
* Free the context for gp_partition_inverse.
*/
static void
freeInverseContext(InverseContext *inverseContext)
{
Assert(inverseContext != NULL);
pfree(inverseContext->partitionIterator);
pfree(inverseContext);
}
/*
* findPartitionKeyType
* Find the type oid and typeMod for the given partition key.
*/
static void
findPartitionKeyType(Oid parentOid,
int keyAttNo,
Oid *typeOid,
int32 *typeMod)
{
Relation rel = relation_open(parentOid, NoLock);
TupleDesc tupDesc = RelationGetDescr(rel);
Assert(tupDesc->natts >= keyAttNo);
*typeOid = tupDesc->attrs[keyAttNo - 1]->atttypid;
*typeMod = tupDesc->attrs[keyAttNo - 1]->atttypmod;
relation_close(rel, NoLock);
}
/*
* gp_partition_inverse
* Returns all child partition oids with their constraints for a given parent oid.
*
* Currently, this function assumes that the parent partition is the root partition.
*
* This function is a set-returning function.
*/
Datum
gp_partition_inverse(PG_FUNCTION_ARGS)
{
FuncCallContext *funcCallContext = NULL;
InverseContext *inverseContext = NULL;
/*
* Setup the function call context for set-returning functions.
* At the first time of calling this function, we create and initialize
* necessary context data in inverseContext, such as finding the partition
* metadata for the given parent oid.
*/
if (SRF_IS_FIRSTCALL())
{
funcCallContext = SRF_FIRSTCALL_INIT();
Oid parentOid = PG_GETARG_OID(0);
MemoryContext oldContext = MemoryContextSwitchTo(funcCallContext->multi_call_memory_ctx);
funcCallContext->user_fctx = createInverseContext(parentOid);
inverseContext = (InverseContext *)funcCallContext->user_fctx;
Assert(NULL != inverseContext);
Assert(NULL != inverseContext->partitionIterator);
Assert(NULL != inverseContext->partitionIterator->partsAndRules);
Partition *part = inverseContext->partitionIterator->partsAndRules->part;
Assert(NULL != part);
Oid typeOid = 0;
int32 typeMod = 0;
findPartitionKeyType(parentOid, part->paratts[0], &typeOid, &typeMod);
TupleDesc tupleDesc = createInverseTupleDesc(typeOid, typeMod);
funcCallContext->tuple_desc = BlessTupleDesc(tupleDesc);
MemoryContextSwitchTo(oldContext);
}
funcCallContext = SRF_PERCALL_SETUP();
inverseContext = (InverseContext *)funcCallContext->user_fctx;
Assert(inverseContext != NULL &&
inverseContext->partitionIterator != NULL);
if (inverseContext->findNextRecord(inverseContext))
{
HeapTuple tuple = heap_form_tuple(funcCallContext->tuple_desc,
inverseContext->values,
inverseContext->nulls);
Datum result = HeapTupleGetDatum(tuple);
SRF_RETURN_NEXT(funcCallContext, result);
}
freeInverseContext(inverseContext);
SRF_RETURN_DONE(funcCallContext);
}
/*
* dumpDynamicTableScanPidIndex
* Write out pids for a given dynamic table scan.
*/
void
dumpDynamicTableScanPidIndex(int index)
{
if (index < 0 ||
dynamicTableScanInfo == NULL ||
index > dynamicTableScanInfo->numScans ||
dynamicTableScanInfo->pidIndexes[index] == NULL)
{
return;
}
Assert(dynamicTableScanInfo != NULL &&
index < dynamicTableScanInfo->numScans &&
dynamicTableScanInfo->pidIndexes[index] != NULL);
HASH_SEQ_STATUS status;
hash_seq_init(&status, dynamicTableScanInfo->pidIndexes[index]);
StringInfoData pids;
initStringInfo(&pids);
Oid *partOid = NULL;
while ((partOid = (Oid *)hash_seq_search(&status)) != NULL)
{
appendStringInfo(&pids, "%d ", *partOid);
}
elog(LOG, "Dynamic Table Scan %d pids: %s", index, pids.data);
pfree(pids.data);
}