blob: f5f320f652ee3b1505a41af28337b258defac8fe [file] [log] [blame]
/**********************************************************************
// @@@ START COPYRIGHT @@@
//
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
//
// @@@ END COPYRIGHT @@@
**********************************************************************/
#ifndef PARTFUNC_H
#define PARTFUNC_H
/* -*-C++-*-
*************************************************************************
*
* File: PartFunc.h
* Description: Partitioning Function
* Created: 01/03/96
* Language: C++
*
*
*
*
*************************************************************************
*/
// -----------------------------------------------------------------------
#include "Int64.h"
#include "Collections.h"
#include "ItemExpr.h"
#include "ItemExprList.h"
#include "RelExpr.h"
#include "NodeMap.h"
// ----------------------------------------------------------------------
// contents of this file
// ----------------------------------------------------------------------
class PartitioningFunction;
class SinglePartitionPartitioningFunction;
class ReplicateViaBroadcastPartitioningFunction;
class ReplicateNoBroadcastPartitioningFunction;
class HashPartitioningFunction;
class TableHashPartitioningFunction;
class HashDistPartitioningFunction;
class Hash2PartitioningFunction;
class RangePartitionBoundaries;
class RangePartitioningFunction;
class LogPhysPartitioningFunction;
class RoundRobinPartitioningFunction;
class HivePartitioningFunction;
class SkewedDataPartitioningFunction;
// ----------------------------------------------------------------------
// forward declarations
// ----------------------------------------------------------------------
class PartitioningRequirement;
class NormWA;
class Generator;
class Attributes;
class NAColumnArray;
class SearchKey;
class SkewedValueList;
typedef LIST(Int64) Int64List;
typedef NABoolean (*compFuncPtrT)(const char* low, const char* key, const char* high, Int32 keyLen, NABoolean checkLast);
// ----------------------------------------------------------------------
// literals for special numbers of partitions (don't care, exactly one)
// ----------------------------------------------------------------------
enum
{
ANY_NUMBER_OF_PARTITIONS = -1,
EXACTLY_ONE_PARTITION = 1
};
// ----------------------------------------------------------------------
// literals for partition grouping distribution to use
// (use default, uniform # of physical parts, uniform # of active parts)
// ----------------------------------------------------------------------
enum PartitionGroupingDistEnum
{
DEFAULT_PARTITION_GROUPING = -1,
UNIFORM_PHYSICAL_PARTITION_GROUPING = 0,
UNIFORM_ACTIVE_PARTITION_GROUPING = 1
};
// A class representing skew property
class skewProperty : public NABasicObject {
public:
// -----------------------------------------------------------------------
// literals for skew data handling
// -----------------------------------------------------------------------
enum skewDataHandlingEnum
{ ANY, // Any skew
UNIFORM_DISTRIBUTE, // skewed values are uniformly distributed
// (e.g., through round-robin)
BROADCAST // skewed values are broadcasted
};
skewProperty(enum skewDataHandlingEnum x = ANY,
SkewedValueList* v= NULL,
Int32 numEsps = -1,
NAMemory* heap = CmpCommon::statementHeap()
):
indicator_(x), skewValues_(v), heap_(heap), numESPs_(numEsps),
broadcastOneRow_(FALSE){};
skewProperty(const skewProperty& sk):
indicator_(sk.indicator_), skewValues_(sk.skewValues_), heap_(sk.heap_),
numESPs_(sk.numESPs_),
broadcastOneRow_(sk.broadcastOneRow_) {}
~skewProperty() {};
enum skewDataHandlingEnum getIndicator() const { return indicator_; };
void setIndicator(enum skewDataHandlingEnum x) { indicator_ = x; };
NABoolean getBroadcastOneRow() const { return broadcastOneRow_; };
void setBroadcastOneRow(NABoolean x) { broadcastOneRow_ = x; };
const SkewedValueList* getSkewValues() const { return skewValues_; };
void setSkewValues(const SkewedValueList* v) { skewValues_ = v; };
void set(skewProperty& sk)
{
setIndicator(sk.getIndicator());
setSkewValues(sk.getSkewValues());
setBroadcastOneRow(sk.getBroadcastOneRow());
};
NABoolean operator ==(const skewProperty&) const;
// If abbre. form
// = FALSE: skewed distribution method name and values are returned
// = TRUE: only the abbreviation of the skewed distribution method
// is returned
const NAString getText(NABoolean inAbbreviatedForm = FALSE) const;
NABoolean isAnySkew() const { return indicator_ == skewProperty::ANY; };
NABoolean isUniformDistributed() const { return indicator_ == skewProperty::UNIFORM_DISTRIBUTE; };
NABoolean isBroadcasted() const { return indicator_ == skewProperty::BROADCAST; };
NABoolean hasSkewValues() const
{ return skewValues_ AND skewValues_->entries() > 0; };
Int32 getAntiSkewESPs() const { return numESPs_; };
NABoolean skewedListHasOnlyNonSkewedNull() const
{
return skewValues_->hasOnlyNonSkewedNull();
}
protected:
enum skewDataHandlingEnum indicator_; // How the data is skewed
const SkewedValueList* skewValues_; // The skewed values. Multiple skew
// property objects can share
// a single skew value list
NAMemory * heap_; // the heap
NABoolean broadcastOneRow_;
// the number of ESPs that will deal with skew. -1 means "use all"
Int32 numESPs_;
};
// Define one useful object: the any-skew object
extern const skewProperty ANY_SKEW_PROPERTY;
// -----------------------------------------------------------------------
// PartitioningFunction
//
// A base class for defining the partitioning characteristics for
// horizontally partitioned data. We envisage its use for hash,
// range or any other built-in (system supported) partitioning schemes.
//
// The partition function specifies
// 1) the number of partitions (>= 1) and
// 2) a partitioning key and
// 3) an expression that can be used for distributing
// data over the partitions.
//
// -----------------------------------------------------------------------
class PartitioningFunction : public NABasicObject
{
protected:
// --------------------------------------------------------------------
// Partitioning function type identifier.
// It should be visible only to the derived classes.
// --------------------------------------------------------------------
enum PartitioningFunctionTypeEnum
{
SINGLE_PARTITION_PARTITIONING_FUNCTION,
REPLICATE_VIA_BROADCAST_PARTITIONING_FUNCTION,
REPLICATE_NO_BROADCAST_PARTITIONING_FUNCTION,
HASH_PARTITIONING_FUNCTION,
HASH_DIST_PARTITIONING_FUNCTION,
HASH2_PARTITIONING_FUNCTION,
RANGE_PARTITIONING_FUNCTION,
LOGPHYS_PARTITIONING_FUNCTION,
ROUND_ROBIN_PARTITIONING_FUNCTION,
SKEWEDDATA_PARTITIONING_FUNCTION,
HIVE_PARTITIONING_FUNCTION
};
public:
// --------------------------------------------------------------------
// Constructor functions
// --------------------------------------------------------------------
PartitioningFunction(const PartitioningFunctionTypeEnum ftype,
NodeMap* nodeMap = 0,
NAMemory* heap = CmpCommon::statementHeap())
: functionType_(ftype),
partitioningExpression_(NULL),
dataConversionErrorFlag_(NULL),
assignPartition_(FALSE),
partKeyPredsCreated_(FALSE),
partitionSelectionExpr_(NULL),
nodeMap_(nodeMap),
setupForStatement_(FALSE),
resetAfterStatement_(FALSE),
restrictedBeginPartNumber_(-1),
restrictedEndPartNumber_(-1),
activeStreams_(0.),
heap_(heap)
{
}
PartitioningFunction(const PartitioningFunctionTypeEnum ftype,
const ValueIdSet& partitioningKey,
NodeMap* nodeMap= 0,
NAMemory* heap = CmpCommon::statementHeap())
: functionType_(ftype),
partitioningKeyColumns_(partitioningKey),
partitioningExpression_(NULL),
dataConversionErrorFlag_(NULL),
assignPartition_(FALSE),
partKeyPredsCreated_(FALSE),
partitionSelectionExpr_(NULL),
nodeMap_(nodeMap),
setupForStatement_(FALSE),
resetAfterStatement_(FALSE),
restrictedBeginPartNumber_(-1),
restrictedEndPartNumber_(-1),
activeStreams_(0.),
heap_(heap)
{
}
PartitioningFunction(const PartitioningFunction& other,
NAMemory* heap = CmpCommon::statementHeap())
: functionType_(other.functionType_),
partitioningKeyColumns_(other.partitioningKeyColumns_),
partitioningKeyPredicates_(other.partitioningKeyPredicates_),
partitionInputValues_(other.partitionInputValues_),
partitionInputValuesLayout_(other.partitionInputValuesLayout_),
partKeyPredsCreated_(other.partKeyPredsCreated_),
partitioningExpression_(other.partitioningExpression_),
dataConversionErrorFlag_(other.dataConversionErrorFlag_),
assignPartition_(other.assignPartition_),
partitionSelectionExpr_(other.partitionSelectionExpr_),
partitionSelectionExprInputs_(other.partitionSelectionExprInputs_),
nodeMap_((other.nodeMap_) ? other.nodeMap_->copy(heap) : 0),
setupForStatement_(FALSE),
resetAfterStatement_(FALSE),
restrictedBeginPartNumber_(other.restrictedBeginPartNumber_),
restrictedEndPartNumber_(other.restrictedEndPartNumber_),
activeStreams_(other.activeStreams_),
heap_(heap)
{
}
// --------------------------------------------------------------------
// Destructor functions
// --------------------------------------------------------------------
virtual ~PartitioningFunction();
// ---------------------------------------------------------------------
// Perform type-safe pointer casts.
// ---------------------------------------------------------------------
virtual const
LogPhysPartitioningFunction* castToLogPhysPartitioningFunction() const;
virtual const
SinglePartitionPartitioningFunction*
castToSinglePartitionPartitioningFunction() const;
virtual const
ReplicateViaBroadcastPartitioningFunction*
castToReplicateViaBroadcastPartitioningFunction() const;
virtual const
ReplicateNoBroadcastPartitioningFunction*
castToReplicateNoBroadcastPartitioningFunction() const;
virtual const
HashPartitioningFunction* castToHashPartitioningFunction() const;
virtual const
TableHashPartitioningFunction* castToTableHashPartitioningFunction() const;
virtual const
HashDistPartitioningFunction* castToHashDistPartitioningFunction() const;
virtual const
Hash2PartitioningFunction* castToHash2PartitioningFunction() const;
virtual const
RangePartitioningFunction* castToRangePartitioningFunction() const;
virtual const
RoundRobinPartitioningFunction* castToRoundRobinPartitioningFunction() const;
virtual const
SkewedDataPartitioningFunction* castToSkewedDataPartitioningFunction() const;
virtual const
HivePartitioningFunction* castToHivePartitioningFunction() const;
// ---------------------------------------------------------------------
// Accessor method for the partitioning key.
// ---------------------------------------------------------------------
const ValueIdSet& getPartitioningKey() const
{ return partitioningKeyColumns_; }
// ---------------------------------------------------------------------
// Accessor method for the partial partitioning key. A partial key
// describes part of the data. In default case, the partial key is the
// full partitioning key. This is not true for SkewedDataPartioningFunction.
// ---------------------------------------------------------------------
virtual const ValueIdSet& getPartialPartitioningKey() const
{ return getPartitioningKey(); }
// ---------------------------------------------------------------------
// Accessor method for the number of partitions.
// ---------------------------------------------------------------------
virtual Lng32 getCountOfPartitions() const;
// --------------------------------------------------------------------
// Method used for run-time type identification.
// --------------------------------------------------------------------
PartitioningFunctionTypeEnum getPartitioningFunctionType() const
{ return functionType_; }
// ---------------------------------------------------------------------
// Partitioning Function Type Tests
// ---------------------------------------------------------------------
NABoolean isPartitioned() const
{
return (getCountOfPartitions() > EXACTLY_ONE_PARTITION);
}
inline NABoolean isASinglePartitionPartitioningFunction() const
{ return (functionType_ == SINGLE_PARTITION_PARTITIONING_FUNCTION); }
inline NABoolean isAReplicateViaBroadcastPartitioningFunction() const
{ return (functionType_ == REPLICATE_VIA_BROADCAST_PARTITIONING_FUNCTION); }
inline NABoolean isAReplicateNoBroadcastPartitioningFunction() const
{ return (functionType_ == REPLICATE_NO_BROADCAST_PARTITIONING_FUNCTION); }
inline NABoolean isAReplicationPartitioningFunction() const
{ return (isAReplicateViaBroadcastPartitioningFunction() OR
isAReplicateNoBroadcastPartitioningFunction()); }
inline NABoolean isAHashPartitioningFunction() const
{ return (functionType_ == HASH_PARTITIONING_FUNCTION); }
inline NABoolean isATableHashPartitioningFunction() const
{ return ((functionType_ == HASH_DIST_PARTITIONING_FUNCTION) ||
(functionType_ == HASH2_PARTITIONING_FUNCTION)); }
inline NABoolean isAHashDistPartitioningFunction() const
{ return (functionType_ == HASH_DIST_PARTITIONING_FUNCTION); }
inline NABoolean isAHash2PartitioningFunction() const
{ return (functionType_ == HASH2_PARTITIONING_FUNCTION); }
inline NABoolean isARangePartitioningFunction() const
{ return (functionType_ == RANGE_PARTITIONING_FUNCTION); }
inline NABoolean isALogPhysPartitioningFunction() const
{ return (functionType_ == LOGPHYS_PARTITIONING_FUNCTION); }
inline NABoolean isARoundRobinPartitioningFunction() const
{ return (functionType_ == ROUND_ROBIN_PARTITIONING_FUNCTION); }
inline NABoolean isASkewedDataPartitioningFunction() const
{ return (functionType_ == SKEWEDDATA_PARTITIONING_FUNCTION); }
inline NABoolean isAHivePartitioningFunction() const
{ return (functionType_ == HIVE_PARTITIONING_FUNCTION); }
inline NABoolean isARandomPartitioningFunction() const
{ return isAHash2PartitioningFunction() &&
partitioningKeyColumns_.hasRandom(); }
// ---------------------------------------------------------------------
// Method to test if the partitioning key contains any approximate
// numeric type columns. Necessary because in some cases certain
// parallel operations do not function properly if the partitioning
// key of the table contains approximate numeric columns.
// ---------------------------------------------------------------------
NABoolean partKeyContainsFloatColumn() const;
// ---------------------------------------------------------------------
// Accessor function for retrieving entries from node map.
// ---------------------------------------------------------------------
inline const NodeMapEntry* getNodeMapEntry(CollIndex position) const
{ return nodeMap_->getNodeMapEntry(position); }
// ---------------------------------------------------------------------
// Retrieve a pointer to partitioning function's node map.
// ---------------------------------------------------------------------
virtual const NodeMap* getNodeMap() const;
// use any existing nodemap from my req or my child (or synthesize one) that
// matches my partition count requirement
void useNodeMapFromReqOrChild(PartitioningRequirement *req,
PartitioningFunction *childPF,
NABoolean forESP);
// ---------------------------------------------------------------------
// Replace existing node map with a specified node map.
// ---------------------------------------------------------------------
virtual void replaceNodeMap(NodeMap* nodeMap);
// --------------------------------------------------------------------
// A method for copying the partitioning function.
// --------------------------------------------------------------------
virtual PartitioningFunction* copy() const;
// --------------------------------------------------------------------
// Rewrite the partitioning keys of the partitioning function in
// terms of the VEGReference for the VEG to which the partitioning
// key column belongs.
// --------------------------------------------------------------------
virtual void normalizePartitioningKeys(NormWA& normWARef);
// --------------------------------------------------------------------
// Each partitioning function constructs a set of partitioning key
// predicates. They are used for restricting accesses to a specific
// partition or to a specific set of partitions. This method is
// used by the optimizer.
//
// Some partitioning functions can not create partitioning key
// predicates (they're either not smart enough or it is impossible
// to do so, e.g. because the partitioning key is not available to
// them).
// --------------------------------------------------------------------
virtual NABoolean canProducePartitioningKeyPredicates() const;
const ValueIdSet& getPartitioningKeyPredicates() const;
// ----
// The partition input values are the "variables" that appear
// in a partitioning key predicate. They are used for identifying
// the specific partition that is defined by the key. They are
// created for constructing the partitioning key predicates.
// ----
const ValueIdSet& getPartitionInputValues() const;
// ----
// Method for obtaining the layout of the partition input values
// in the buffers that are allocated for the partitioning key.
// It is used by the code generator.
// ----
const ValueIdList& getPartitionInputValuesLayout() const;
// Create the above expressions (partitioning key predicates and
// partition input values including layout) for a non-const object
virtual void createPartitioningKeyPredicates();
// Replace the pivs, partitioning key predicates and partitioning
// expression with those passed in.
virtual void replacePivs(
const ValueIdList& newPivs,
const ValueIdSet& newPartKeyPreds);
// Compare two part funcs on push-down compatibility
virtual NABoolean
partFuncAndFuncPushDownCompatible(const PartitioningFunction&) const
{ return FALSE;};
// Test whether it is necessary to check two search keys specify
// the same partition.
virtual NABoolean checkSamePartitionNeeded() const { return TRUE; };
// --------------------------------------------------------------------
// A method that is used by optimizer for comparing partitioning
// function with the random number partitioning function i.e. it only
// compares the partitioning function type and number of partitions.
// It does not compare partitioning key.
// --------------------------------------------------------------------
virtual COMPARE_RESULT comparePartFuncsForUnion
(const PartitioningFunction &other) const;
NABoolean isKnownReplicaPartFunc() const;
// --------------------------------------------------------------------
// A method that is used for comparing two partitioning functions
// by the optimizer.
// --------------------------------------------------------------------
virtual COMPARE_RESULT comparePartFuncToFunc
(const PartitioningFunction &other) const;
// ---------------------------------------------------------------------
// Check whether one partitioning function is a grouping of another.
// A grouping of a partitioning function can be created by combining
// two of its partitions zero or more times. The combined partitions
// do not have to be adjacent.
// ---------------------------------------------------------------------
virtual NABoolean isAGroupingOf(const PartitioningFunction &other,
Lng32* maxPartsPerGroup = NULL) const;
// ---------------------------------------------------------------------
// Transform a partitioning function into a partitioning requirement
// ---------------------------------------------------------------------
virtual PartitioningRequirement* makePartitioningRequirement();
// --------------------------------------------------------------------
// Change the number of partitions in the partitioning function, if
// possible. Because of limitations in the code to do this, the
// new number of partitions is merely a suggestion, and the actual
// number used is returned as the result value.
// --------------------------------------------------------------------
virtual PartitioningFunction *
scaleNumberOfPartitions(Lng32 &suggestedNewNumberOfPartitions,
PartitionGroupingDistEnum partGroupDist =
DEFAULT_PARTITION_GROUPING);
// --------------------------------------------------------------------
// Copy this partitioning function and rewrite the copy in terms of
// the top or bottom values that are contained in the map.
// If the parameter mapItUp is set to TRUE, then the partitioning
// function is rewritten in terms of the values in the top map.
// Otherwise, it is rewritten in terms of values in the bottom map.
// The virtual function remapIt() implements the remapping.
// --------------------------------------------------------------------
virtual PartitioningFunction* copyAndRemap
(ValueIdMap& map, NABoolean mapItUp) const;
virtual void remapIt(const PartitioningFunction* opf,
ValueIdMap& map, NABoolean mapItUp);
// --------------------------------------------------------------------
// A method for creating a partitioning function for the IndexDesc
// based on the partitioning function for the NAFileSet
// --------------------------------------------------------------------
virtual PartitioningFunction* createPartitioningFunctionForIndexDesc
(IndexDesc *idesc) const;
// --------------------------------------------------------------------
// Constructor for the partitioning expression.
// It is a bound ItemExpr tree, i.e, each operator has a ValueId
// and a NAType assigned to it.
//
// This method is used by the code generator.
//
// Notionally, the partitioning expression would have the following
// prototype in the C language:
// int getPartitionNumber(<param1>, <param2>, ..., <paramN>)
// The role of the partitioning expression is to receive zero or more
// values as parametric inputs and return an index that denotes the
// data stream/partition to which the row of interest should belong.
// The index is usually called the "partition number". The values
// that it receives as parametric inputs are called the
// "partitioning key values". Incidentally, getPartitioningKey()
// returns precisely those expressions that are evaluated at
// run-time for computing the partitioning key values.
// --------------------------------------------------------------------
virtual ItemExpr* createPartitioningExpression();
ItemExpr *getPartitioningExpression() const
{ return partitioningExpression_; }
// Create the partition selection expression. 'Partition selection'
// means that an expression is used to determine the partition to
// access as opposed to using the File System to determine the range
// of partitions to access based on a set of partitioning key
// predicates. 'Partition assignment' is a form of partition
// selection in which the expression calculates the partition into
// which a row is inserted. Partition assignment is currently only
// used for Round Robin partitioning. Partition selection is
// currently used for Hash Dist and Round Robin Partitioning. And
// the File System is used for Range Partitioning. The default
// implementation of this method returns NULL, meaning no partition
// selection expression can be created. If a partitioning selection
// expression is created, it is cached in the data member
// 'partitionSelectionExpr_' and the partition selection inputs are
// generated and stored in 'partitionSelectionExprInputs_'. This
// method is redefined for HashDistPartitioningFunction and
// RoundRobinPartitioningFunction.
//
virtual ItemExpr *
createPartitionSelectionExpr(const SearchKey *partSearchKey,
const ValueIdSet &availableValues)
{ return NULL; };
// Return the cached partition selection expression. See
// createPartitionSelectionExpr() above.
//
ItemExpr *partitionSelectionExpr() const { return partitionSelectionExpr_;};
ItemExpr * &partitionSelectionExpr() { return partitionSelectionExpr_;};
// The PartitionAccess::codeGen() calls createPartSelectionExprFromSearchKey
// to create a partition selection expression based on the search key
// expressions. Base classes, such as Hash2PartitioningFunction,
// may have a more complicated expression than the default case which
// simply copies the search key expressions into the begin and end
// partition selection expressions.
virtual void
createPartSelectionExprFromSearchKey(const ValueId beginPartSelId,
const ValueId endPartSelId,
ValueIdList &partSelectionValIds) const;
// partitionSelectionExprInputs(): A list of inputs to the partition
// selection expression. This list is populated when the partition
// selection expression is created. The partition selection
// expression needs two inputs: the partition number and the total
// number of partitions. These are set up as internal host
// variables when the partition selection expression is created. The
// inputs should be layed out as follows:
//
// |-----------------------------------------|
// | partition number | number of partitions |
// | (4 byte integer) | (4 byte integer) |
// |-----------------------------------------|
//
// These variables are actually both inputs and the output of the
// partition selection function. Round Robin partitioning uses
// both fields as inputs and produces its output in the first
// field. HashDist partitioning uses the second field as an
// input and produces its output in the first field. It is
// inportant that the variables be layed out in this order, because
// when the partition selection expressions are generated, the
// resulting integer must map to the same location as the 'partition
// number' field. This happens (luckily or by design) because they
// are both the first values in their respective lists.
//
const ValueIdList &partitionSelectionExprInputs() const
{ return partitionSelectionExprInputs_;};
ValueIdList &partitionSelectionExprInputs()
{ return partitionSelectionExprInputs_;};
// ---------------------------------------------------------------------
// True if partition assignment needs to be done when inserting
// rows. 'Partition assignment' is a form of partition selection in
// which the expression calculates the partition into which a row is
// inserted. Partition assignment is currently only used for Round
// Robin partitioning. This flag is set to TRUE (during binding)
// when inserting into a Round Robin partitioned table. This will
// cause the partitioning function to generate a partition selection
// expression which does the proper calculation. This flag is
// initialized to false by all constructors.
// ---------------------------------------------------------------------
inline NABoolean assignPartition() const { return assignPartition_; }
// set the 'assignPartition' indicator to the specified value.
//
inline void setAssignPartition(NABoolean assignPartition)
{ assignPartition_ = assignPartition; }
inline CostScalar getActiveStreams() const { return activeStreams_; }
inline void setActiveStreams(CostScalar streams)
{ activeStreams_ = streams; }
// This method indicates if the partitioning function uses the File
// System to determine the range of partitions to access. If the
// method returns TRUE, the File System is used. If it returns
// FALSE, then a partition selection expression is used to determine
// the range. The default implementation of this method returns
// FALSE (use partition selection expression). Currently, only the
// range partitioning function redefines this method to return TRUE.
//
virtual NABoolean usesFSForPartitionSelection() const { return FALSE; }
// --------------------------------------------------------------------
// Rewrite the partitioning keys of the partitioning function that
// are expressed using VEGReferences in terms of the available values.
// --------------------------------------------------------------------
virtual void preCodeGen(const ValueIdSet& availableValues);
// --------------------------------------------------------------------
// Generate an equivalent executor structure.
// --------------------------------------------------------------------
virtual short codeGen(Generator* generator, Lng32 partInputDataLength);
// --------------------------------------------------------------------
// Assign offsets to partition input values that are sent in a tuple.
// This unusual method is used because the layout gets determined
// in two different places (the sender and the receiver) and it must
// match. Also, range partitioning requires a non-standard alignment.
// --------------------------------------------------------------------
virtual void generatePivLayout(Generator *generator,
Lng32 &partitionInputDataLength,
Lng32 atp,
Lng32 atpIndex,
Attributes ***pivAttrs);
// Make a new partSearchKey with the partitioning key preds of
// the partitioning function, if there are any. Note that ignoring
// the part key preds will result in a wrong answer if we use
// PA_PARTITION_GROUPING, since the PA node is the node responsible
// for the grouping. If it doesn't select a subgroup of partitions,
// too much data may be returned. For now we only consider a
// search key for the PA node, MDAM to be implemented later.
// MDAM will be useful for combining user-specified part key preds
// with logicalPartFunc->getPartitioningKeyPredicates().
virtual SearchKey *createSearchKey(const IndexDesc *indexDesc,
ValueIdSet availInputs,
ValueIdSet additionalPreds) const
{return NULL;};
virtual NABoolean shouldUseSynchronousAccess(
const ReqdPhysicalProperty* rpp,
const EstLogPropSharedPtr& inputLogProp,
GroupAttributes* ga) const;
ItemExpr* getConvErrorExpr() const { return dataConversionErrorFlag_; }
// if begin or endPartNumber_ is specified, then the range selected
// by this partitioning function is restricted to the specified range
// when partitions are selected at runtime.
// Used when users specify a begin/end (to/from) partition number
// range in the table name specification.
NABoolean partitionRangeRestricted() const
{
return ((restrictedBeginPartNumber_ > 0) ||
(restrictedEndPartNumber_ > 0));
}
Lng32 getRestrictedBeginPartNumber() const { return restrictedBeginPartNumber_; }
Lng32 getRestrictedEndPartNumber() const { return restrictedEndPartNumber_; }
void setRestrictedBeginPartNumber(Lng32 v) { restrictedBeginPartNumber_ = v; }
void setRestrictedEndPartNumber(Lng32 v) { restrictedEndPartNumber_ = v; }
// ---------------------------------------------------------------------
// Print and get a short descriptive text
// ---------------------------------------------------------------------
virtual const NAString getText() const;
virtual void print(FILE* ofd = stdout,
const char* indent = DEFAULT_INDENT,
const char* title = "PartitioningFunction") const;
void display() const;
virtual void setupForStatement();
virtual void resetAfterStatement();
// Does this partitioning function refer to any remote partitions
inline NABoolean hasRemotePartitions() const {
return (getNodeMap()
&& getNodeMap()->hasRemotePartitions());
}
virtual NABoolean canHandleSkew() const { return FALSE; };
virtual UInt32 computeHashValue(char* data, UInt32 flags, Int32 len)
{ return 0; };
virtual ItemExpr *getHashingExpression() const
{ return NULL; };
// A virtual method returning a compiler-time hashing expression that
// hashes a skew value into a hash value. Called during codeGen phase.
virtual ItemExpr *buildHashingExpressionForExpr(ItemExpr*) const
{ return NULL; }
inline void hasNoPartitioningKeyPredicates()
{ partKeyPredsCreated_ = TRUE; }
protected:
// --------------------------------------------------------------------
// Modify key, key predicates and partition input values
// --------------------------------------------------------------------
inline void setPartKey(const ValueIdSet &key)
{ partitioningKeyColumns_ = key; }
inline const ValueIdSet & partitioningKeyPredicates()
{ return partitioningKeyPredicates_; }
inline void storePartitioningKeyPredicates(const ValueIdSet& partKeyPreds)
{ partitioningKeyPredicates_ = partKeyPreds;
partKeyPredsCreated_ = TRUE; }
void storePartitionInputValues(const ValueIdList& partInputValues)
{ partitionInputValues_ = partInputValues;
partitionInputValuesLayout_ = partInputValues; }
inline NABoolean partKeyPredsCreated() const { return partKeyPredsCreated_; }
// --------------------------------------------------------------------
// The get() and store() primitives permit each derived class to store
// and retrieve the expression after it is created.
// --------------------------------------------------------------------
ItemExpr* getExpression() const { return partitioningExpression_; }
void storeExpression(ItemExpr* partExpr)
{ partitioningExpression_ = partExpr; }
void storeConvErrorExpr(ItemExpr* convErrExpr)
{ dataConversionErrorFlag_ = convErrExpr; }
//heap
NAMemory * heap_;
// A helper function to create a cast expression casting the input
// expresssion iv to otype. Also create the data conversion error expression
// dataConversionErrorFlag_ if it is not null.
ItemExpr* getCastedItemExpre(ItemExpr* iv, const NAType& otype, CollHeap*) ;
// Helper function to create a simple partitioning key predicate of the form
// <partNum> between <piv1> and <piv2>
// Can be called a first time with partNumExpr = NULL (e.g. in the
// binder) and then a second time with something like _SALT_
// specified for partNumExpr (e.g. in preCodeGen).
void createBetweenPartitioningKeyPredicates(
const char * pivLoName,
const char * pivHiName,
ItemExpr * partNumExpr = NULL,
NABoolean useHash2Split = FALSE);
private:
// --------------------------------------------------------------------
// For run-time type identification.
// --------------------------------------------------------------------
PartitioningFunctionTypeEnum functionType_;
// --------------------------------------------------------------------
// A set of key columns that are used for determining the specific
// partition to which a row of a table should belong.
// If an ordering exists on the key columns, its implementation is
// specific to the partitioning function, i.e., a derived class.
// --------------------------------------------------------------------
ValueIdSet partitioningKeyColumns_;
// --------------------------------------------------------------------
// Storage for the partitioning key predicates and the partition
// input values that are built.
// The partition inputs values is a set of values (variables) that
// identify the specific partition or a set of partitions that
// must be accessed.
// --------------------------------------------------------------------
ValueIdSet partitioningKeyPredicates_;
ValueIdSet partitionInputValues_;
ValueIdList partitionInputValuesLayout_;
NABoolean partKeyPredsCreated_;
// --------------------------------------------------------------------
// The implementation for the partitioning function.
// --------------------------------------------------------------------
ItemExpr* partitioningExpression_;
// ---------------------------------------------------------------------
// An integer variable that will hold the status of a data
// conversion in the partitioning expression. The variable may
// indicate that we couldn't convert a value and that therefore the
// result of the expression is invalid. Depending on the context,
// this is either an internal error or it means that we need to
// agree on a default partition number for those cases, or it means
// that we can discard the row that caused this error.
// ---------------------------------------------------------------------
ItemExpr *dataConversionErrorFlag_;
// ---------------------------------------------------------------------
// For Round Robin, and possibly other types of partitioning, it is
// necessary at insertion to assign a partition to the row being
// inserted. The partitioniong function determines if the partition
// selection expression is to do partition assignment through use of
// this boolean. The binder sets the boolean to TRUE:
// - If the table is being inserted into and the partitioning function is
// RoundRobin.
// ---------------------------------------------------------------------
NABoolean assignPartition_;
// ------------------------------------------------------------------------
// An association of a partition's process (either ESP or DP2) with an SMP
// node and its cluster. The node map has an entry for each partition.
// ------------------------------------------------------------------------
NodeMap* nodeMap_;
// The partition selection expression. 'Partition selection' means
// that an expression is used to determine the partition to access
// as opposed to using the File System to determine the range of
// partitions to access based on a set of partitioning key
// predicates.
//
ItemExpr *partitionSelectionExpr_;
// A list of inputs to the partition selection expression. This
// list is populated when the partition selection expression is
// created. The partition selection expression needs two inputs:
// the partition number and the total number of partitions.
//
ValueIdList partitionSelectionExprInputs_;
// search key of a (select) predicate. Used by in-DP2
// Compound Statement to assure all its containing
// statements share the same serach key.
NABoolean setupForStatement_;
NABoolean resetAfterStatement_;
Lng32 restrictedBeginPartNumber_;
Lng32 restrictedEndPartNumber_;
// to store number of active streams
CostScalar activeStreams_;
}; // class PartitioningFunction
// -----------------------------------------------------------------------
// SINGLE PARTITION PARTITIONING FUNCTION
// -----------------------------------------------------------------------
// -----------------------------------------------------------------------
// A function for creating a single partition.
// Set of partitioning keys is empty.
// The partitioning expression is a ConstValue(0).
// -----------------------------------------------------------------------
class SinglePartitionPartitioningFunction : public PartitioningFunction
{
public:
// --------------------------------------------------------------------
// Constructor functions
// --------------------------------------------------------------------
SinglePartitionPartitioningFunction(NodeMap* nodeMap = 0,
NAMemory* heap = CmpCommon::statementHeap())
: PartitioningFunction(SINGLE_PARTITION_PARTITIONING_FUNCTION,
nodeMap,heap)
{ hasNoPartitioningKeyPredicates(); }
SinglePartitionPartitioningFunction (const SinglePartitionPartitioningFunction& other,
NAMemory* heap = CmpCommon::statementHeap())
: PartitioningFunction(other,heap)
{ hasNoPartitioningKeyPredicates(); }
// --------------------------------------------------------------------
// Destructor functions
// --------------------------------------------------------------------
virtual ~SinglePartitionPartitioningFunction();
// ---------------------------------------------------------------------
// see base class for explanations of the virtual methods
// ---------------------------------------------------------------------
virtual const
SinglePartitionPartitioningFunction*
castToSinglePartitionPartitioningFunction() const;
virtual Lng32 getCountOfPartitions() const;
virtual NABoolean isAGroupingOf(const PartitioningFunction &other,
Lng32* maxPartsPerGroup = NULL) const;
virtual PartitioningRequirement* makePartitioningRequirement();
virtual PartitioningFunction* copy() const;
virtual void createPartitioningKeyPredicates();
// Replace the pivs, partitioning key predicates and partitioning
// expression with those passed in.
virtual void replacePivs(
const ValueIdList& newPivs,
const ValueIdSet& newPartKeyPreds);
virtual ItemExpr* createPartitioningExpression() ;
virtual short codeGen(Generator* generator, Lng32 partInputDataLength);
virtual NABoolean shouldUseSynchronousAccess(
const ReqdPhysicalProperty* rpp,
const EstLogPropSharedPtr& inputLogProp,
GroupAttributes* ga) const;
NABoolean checkSamePartitionNeeded() const { return FALSE; };
NABoolean
partFuncAndFuncPushDownCompatible(const PartitioningFunction&) const;
virtual const NAString getText() const;
virtual void print(
FILE* ofd = stdout,
const char* indent = DEFAULT_INDENT,
const char* title = "PartitioningFunction") const;
private :
}; // class SinglePartitionPartitioningFunction
// -----------------------------------------------------------------------
// REPLICATE VIA BROADCAST PARTITIONING FUNCTION
// -----------------------------------------------------------------------
// -----------------------------------------------------------------------
// A function for replicating data via a broadcast - i.e. repartitioning
// the data but sending all the data to all the consumers.
// Set of partitioning keys is empty.
// The partitioning expression is absent.
// It allows the optimizer to be ask for the replication of data
// such as for the parallel execution PLAN2 for hash joins.
// The ReplicateViaBroadcastPartitioningFunction has an empty partitioning
// key. This causes it to assign every row that is supplied to it to
// be assigned to each partition that it forms.
// -----------------------------------------------------------------------
class ReplicateViaBroadcastPartitioningFunction : public PartitioningFunction
{
public:
// --------------------------------------------------------------------
// Constructor functions
// --------------------------------------------------------------------
ReplicateViaBroadcastPartitioningFunction(Lng32 numberOfPartitions,
NAMemory* heap = CmpCommon::statementHeap())
: PartitioningFunction(REPLICATE_VIA_BROADCAST_PARTITIONING_FUNCTION,
NULL, heap),
numberOfPartitions_(numberOfPartitions)
{ hasNoPartitioningKeyPredicates(); }
ReplicateViaBroadcastPartitioningFunction
(Lng32 numberOfPartitions, NodeMap *nodemap,
NAMemory* heap = CmpCommon::statementHeap())
: PartitioningFunction(REPLICATE_VIA_BROADCAST_PARTITIONING_FUNCTION,
nodemap, heap)
, numberOfPartitions_(numberOfPartitions)
{ hasNoPartitioningKeyPredicates(); }
ReplicateViaBroadcastPartitioningFunction(
const ReplicateViaBroadcastPartitioningFunction& other,
NAMemory* heap = CmpCommon::statementHeap())
: PartitioningFunction(other,heap),
numberOfPartitions_(other.numberOfPartitions_)
{ hasNoPartitioningKeyPredicates(); }
// --------------------------------------------------------------------
// Destructor functions
// --------------------------------------------------------------------
virtual ~ReplicateViaBroadcastPartitioningFunction();
// ---------------------------------------------------------------------
// see base class for explanations of the virtual methods
// ---------------------------------------------------------------------
virtual const
ReplicateViaBroadcastPartitioningFunction*
castToReplicateViaBroadcastPartitioningFunction() const;
virtual Lng32 getCountOfPartitions() const;
virtual PartitioningRequirement* makePartitioningRequirement();
virtual PartitioningFunction* copy() const;
virtual void createPartitioningKeyPredicates();
// Replace the pivs, partitioning key predicates and partitioning
// expression with those passed in.
virtual void replacePivs(
const ValueIdList& newPivs,
const ValueIdSet& newPartKeyPreds);
virtual ItemExpr* createPartitioningExpression() ;
virtual NABoolean isAGroupingOf(const PartitioningFunction &other,
Lng32* maxPartsPerGroup = NULL) const;
virtual PartitioningFunction *
scaleNumberOfPartitions(Lng32 &suggestedNewNumberOfPartitions,
PartitionGroupingDistEnum partGroupDist =
DEFAULT_PARTITION_GROUPING);
virtual short codeGen(Generator* generator, Lng32 partInputDataLength);
virtual const NAString getText() const;
virtual void print( FILE* ofd = stdout,
const char* indent = DEFAULT_INDENT,
const char* title = "PartitioningFunction") const;
private :
// ---------------------------------------------------------------------
// The number of partitions that are desired.
// ---------------------------------------------------------------------
Lng32 numberOfPartitions_;
}; // class ReplicateViaBroadcastPartitioningFunction
// -----------------------------------------------------------------------
// REPLICATE NO BROADCAST PARTITIONING FUNCTION
// -----------------------------------------------------------------------
// -----------------------------------------------------------------------
// A function for replicating data with no broadcast - i.e. each ESP
// process asks his child to access all partitions, instead of only 1/N
// of the data.
// Set of partitioning keys is empty.
// The partitioning expression is absent.
// It allows the optimizer to be ask for the replication of data
// such as for the parallel execution PLAN2 for nested joins.
// The ReplicateNoBroadcastPartitioningFunction has an empty partitioning
// key. This causes it to assign every row that is supplied to it to
// be assigned to each partition that it forms.
// -----------------------------------------------------------------------
class ReplicateNoBroadcastPartitioningFunction : public PartitioningFunction
{
public:
// --------------------------------------------------------------------
// Constructor functions
// --------------------------------------------------------------------
ReplicateNoBroadcastPartitioningFunction(Lng32 numberOfPartitions,
NAMemory* heap = CmpCommon::statementHeap())
: PartitioningFunction(REPLICATE_NO_BROADCAST_PARTITIONING_FUNCTION,
heap),
numberOfPartitions_(numberOfPartitions)
{ hasNoPartitioningKeyPredicates(); }
ReplicateNoBroadcastPartitioningFunction
(Lng32 numberOfPartitions, NodeMap *nodemap,
NAMemory* heap = CmpCommon::statementHeap())
: PartitioningFunction(REPLICATE_NO_BROADCAST_PARTITIONING_FUNCTION,
nodemap, heap)
, numberOfPartitions_(numberOfPartitions)
{ hasNoPartitioningKeyPredicates(); }
ReplicateNoBroadcastPartitioningFunction(
const ReplicateNoBroadcastPartitioningFunction& other,
NAMemory* heap = CmpCommon::statementHeap())
: PartitioningFunction(other,heap),
numberOfPartitions_(other.numberOfPartitions_)
{ hasNoPartitioningKeyPredicates(); }
// --------------------------------------------------------------------
// Destructor functions
// --------------------------------------------------------------------
virtual ~ReplicateNoBroadcastPartitioningFunction();
// ---------------------------------------------------------------------
// see base class for explanations of the virtual methods
// ---------------------------------------------------------------------
virtual const
ReplicateNoBroadcastPartitioningFunction*
castToReplicateNoBroadcastPartitioningFunction() const;
virtual Lng32 getCountOfPartitions() const;
virtual PartitioningRequirement* makePartitioningRequirement();
virtual PartitioningFunction* copy() const;
virtual void createPartitioningKeyPredicates();
// Replace the pivs, partitioning key predicates and partitioning
// expression with those passed in.
virtual void replacePivs(
const ValueIdList& newPivs,
const ValueIdSet& newPartKeyPreds);
virtual ItemExpr* createPartitioningExpression() ;
virtual NABoolean isAGroupingOf(const PartitioningFunction &other,
Lng32* maxPartsPerGroup = NULL) const;
virtual PartitioningFunction *
scaleNumberOfPartitions(Lng32 &suggestedNewNumberOfPartitions,
PartitionGroupingDistEnum partGroupDist =
DEFAULT_PARTITION_GROUPING);
virtual short codeGen(Generator* generator, Lng32 partInputDataLength);
virtual const NAString getText() const;
virtual void print( FILE* ofd = stdout,
const char* indent = DEFAULT_INDENT,
const char* title = "PartitioningFunction") const;
private :
// ---------------------------------------------------------------------
// The number of partitions that are desired.
// ---------------------------------------------------------------------
Lng32 numberOfPartitions_;
}; // class ReplicateNoBroadcastPartitioningFunction
// -----------------------------------------------------------------------
// HASH PARTITIONING FUNCTION
// -----------------------------------------------------------------------
// -----------------------------------------------------------------------
// HashPartitioningFunction
// -----------------------------------------------------------------------
class HashPartitioningFunction : public PartitioningFunction
{
public:
// --------------------------------------------------------------------
// Constructor functions
// --------------------------------------------------------------------
HashPartitioningFunction(Lng32 numberOfHashPartitions,
NodeMap* nodeMap,
NAMemory* heap = CmpCommon::statementHeap())
: PartitioningFunction(HASH_PARTITIONING_FUNCTION, nodeMap,heap),
numberOfHashPartitions_(numberOfHashPartitions)
{}
HashPartitioningFunction(const ValueIdSet& partitioningKeyColumns,
const ValueIdList& partitioningKeyColumnList,
Lng32 numberOfHashPartitions,
NodeMap* nodeMap = 0,
NAMemory* heap = CmpCommon::statementHeap(),
const PartitioningFunctionTypeEnum ftype
= HASH_PARTITIONING_FUNCTION
)
: PartitioningFunction(ftype,
partitioningKeyColumns,
nodeMap,
heap),
keyColumnList_(partitioningKeyColumnList),
originalKeyColumnList_(partitioningKeyColumnList),
numberOfHashPartitions_(numberOfHashPartitions)
{
// MUST be given some partitioning keys and a hash table size.
CMPASSERT((NOT getPartitioningKey().isEmpty()) AND
(NOT getKeyColumnList().isEmpty()) AND
numberOfHashPartitions_);
}
HashPartitioningFunction(const HashPartitioningFunction& other,
NAMemory* heap = CmpCommon::statementHeap())
: PartitioningFunction(other,heap),
keyColumnList_(other.keyColumnList_),
originalKeyColumnList_(other.originalKeyColumnList_),
numberOfHashPartitions_(other.numberOfHashPartitions_)
{
// MUST be given some partitioning keys and a hash table size.
CMPASSERT( (NOT getPartitioningKey().isEmpty()) AND
(NOT getKeyColumnList().isEmpty()) AND
numberOfHashPartitions_);
}
// --------------------------------------------------------------------
// Destructor functions
// --------------------------------------------------------------------
virtual ~HashPartitioningFunction();
// ---------------------------------------------------------------------
// see base class for explanations of the virtual methods
// ---------------------------------------------------------------------
virtual const
HashPartitioningFunction* castToHashPartitioningFunction() const;
virtual Lng32 getCountOfPartitions() const;
const ValueIdList& getKeyColumnList() const { return keyColumnList_; }
const ValueIdList& getOriginalKeyColumnList() const
{ return originalKeyColumnList_; }
virtual PartitioningRequirement* makePartitioningRequirement();
virtual PartitioningFunction* copy() const;
virtual COMPARE_RESULT comparePartFuncToFunc
(const PartitioningFunction &other) const;
virtual void createPartitioningKeyPredicates();
// Replace the pivs, partitioning key predicates and partitioning
// expression with those passed in.
virtual void replacePivs(
const ValueIdList& newPivs,
const ValueIdSet& newPartKeyPreds);
// build the part expr using the part key columns as input
virtual ItemExpr* createPartitioningExpression() ;
virtual void remapIt(const PartitioningFunction* opf,
ValueIdMap& map, NABoolean mapItUp);
virtual PartitioningFunction *
scaleNumberOfPartitions(Lng32 &suggestedNewNumberOfPartitions,
PartitionGroupingDistEnum partGroupDist =
DEFAULT_PARTITION_GROUPING);
virtual void preCodeGen(const ValueIdSet& availableValues);
virtual short codeGen(Generator* generator, Lng32 partInputDataLength);
virtual const NAString getText() const;
virtual void print( FILE* ofd = stdout,
const char* indent = DEFAULT_INDENT,
const char* title = "PartitioningFunction") const;
ItemExpr * buildHashingExpressionForExpr(ItemExpr* expr) const;
ItemExpr *getHashingExpression() const;
UInt32 computeHashValue(char* data, UInt32 flags, Int32 len);
protected:
virtual const NAString getTextImp(const char*) const;
COMPARE_RESULT comparePartKeyToKey(const PartitioningFunction &other) const;
private:
protected:
// ---------------------------------------------------------------------
// The size of the hash table that is built using this partitioning
// function.
// ---------------------------------------------------------------------
Lng32 numberOfHashPartitions_;
// ----------------------------------------------------------------------
// An order-sensitive representation for the partitioning keys.
// ----------------------------------------------------------------------
ValueIdList keyColumnList_;
// ---------------------------------------------------------------------
// The original keyColumnList_. This is different from keyColumnList_
// only if the original PartitioningFunction got remapped with remapIt.
// Used to determine the original data types of the key columns.
// ---------------------------------------------------------------------
ValueIdList originalKeyColumnList_;
}; // class HashPartitioningFunction
// -----------------------------------------------------------------------
// TableHashPartitioningFunction
// -----------------------------------------------------------------------
class TableHashPartitioningFunction : public PartitioningFunction
{
friend class SkewedDataPartitioningFunction;
public:
// --------------------------------------------------------------------
// Constructor functions
// --------------------------------------------------------------------
TableHashPartitioningFunction(const PartitioningFunctionTypeEnum ftype,
Lng32 numberOfHashPartitions,
NodeMap* nodeMap,
NAMemory* heap = CmpCommon::statementHeap())
: PartitioningFunction(ftype, nodeMap, heap),
numberOfOrigHashPartitions_(numberOfHashPartitions),
numberOfPartitions_(numberOfHashPartitions),
setupForStatement_(FALSE),
resetAfterStatement_(FALSE),
doVarCharCast_(FALSE)
{}
TableHashPartitioningFunction(const PartitioningFunctionTypeEnum ftype,
const ValueIdSet& partitioningKeyColumns,
const ValueIdList& partitioningKeyColumnList,
Lng32 numberOfHashPartitions,
NodeMap* nodeMap = 0,
NAMemory* heap = CmpCommon::statementHeap())
: PartitioningFunction(ftype,
partitioningKeyColumns,
nodeMap,
heap),
keyColumnList_(partitioningKeyColumnList),
originalKeyColumnList_(partitioningKeyColumnList),
numberOfOrigHashPartitions_(numberOfHashPartitions),
numberOfPartitions_(numberOfHashPartitions),
setupForStatement_(FALSE),
resetAfterStatement_(FALSE),
doVarCharCast_(FALSE)
{
// MUST be given some partitioning keys and a hash table size.
CMPASSERT((NOT getPartitioningKey().isEmpty()) AND
(NOT getKeyColumnList().isEmpty()) AND
numberOfHashPartitions);
}
TableHashPartitioningFunction(const TableHashPartitioningFunction& other,
NAMemory* heap = CmpCommon::statementHeap())
: PartitioningFunction(other, heap),
keyColumnList_(other.keyColumnList_),
originalKeyColumnList_(other.originalKeyColumnList_),
numberOfOrigHashPartitions_(other.numberOfOrigHashPartitions_),
numberOfPartitions_(other.numberOfPartitions_),
setupForStatement_(other.setupForStatement_),
resetAfterStatement_(other.resetAfterStatement_),
doVarCharCast_(other.doVarCharCast_)
{
// MUST be given some partitioning keys and a hash table size.
CMPASSERT((NOT getPartitioningKey().isEmpty()) AND
(NOT getKeyColumnList().isEmpty()) AND
numberOfOrigHashPartitions_ AND
numberOfPartitions_);
}
// --------------------------------------------------------------------
// Destructor functions
// --------------------------------------------------------------------
virtual ~TableHashPartitioningFunction();
// ---------------------------------------------------------------------
// see base class for explanations of the virtual methods
// ---------------------------------------------------------------------
virtual const
TableHashPartitioningFunction* castToTableHashPartitioningFunction() const;
virtual Lng32 getCountOfPartitions() const;
virtual void normalizePartitioningKeys(NormWA& normWARef);
virtual void createPartitioningKeyPredicates();
void createPartitioningKeyPredicatesForSaltedTable(ValueId saltCol);
virtual void replacePivs(
const ValueIdList& newPivs,
const ValueIdSet& newPartKeyPreds);
virtual PartitioningRequirement* makePartitioningRequirement();
virtual void remapIt(const PartitioningFunction* opf,
ValueIdMap& map, NABoolean mapItUp);
virtual PartitioningFunction *
createPartitioningFunctionForIndexDesc(IndexDesc *idesc) const;
virtual void preCodeGen(const ValueIdSet& availableValues);
virtual short codeGen(Generator* generator, Lng32 partInputDataLength) = 0;
// Make a new partSearchKey that indicates that
// PA_PARTITION_GROUPING is being done. Note that a search key can
// not be generated which can group hashed partitions. For
// TableHashPartitioning, a flag in the search key is used to
// indicate that PA_PARTITION_GROUPING is being done and the
// begin/end key values of the search key are set to the partition
// input values of the partitioning function.
virtual SearchKey *createSearchKey(const IndexDesc *indexDesc,
ValueIdSet availInputs,
ValueIdSet additionalPreds) const;
virtual ItemExpr* createPartitioningExpression() ;
void createPartitionSelectionExprInputs();
virtual ItemExpr *
createPartitionSelectionExpr(const SearchKey *partSearchKey,
const ValueIdSet &availableValues);
// ---------------------------------------------------------------------
// The original (physical) number of hash partitions before any scaling.
// ---------------------------------------------------------------------
Lng32 getCountOfOrigHashPartitions() const
// Replace the pivs, partitioning key predicates and partitioning
// expression with those passed in.
{ return numberOfOrigHashPartitions_;};
// ---------------------------------------------------------------------
// Accessor method for the list of key columns.
// TableHashPartitioningFunction is sensitive to the order of the
// partitioning keys.
// ---------------------------------------------------------------------
const ValueIdList& getKeyColumnList() const { return keyColumnList_; }
const ValueIdList& getOriginalKeyColumnList() const
{ return originalKeyColumnList_; }
void setupForStatement();
void resetAfterStatement();
virtual const NAString getText() const = 0;
ItemExpr *buildHashingExpressionForExpr(ItemExpr*) const;
ItemExpr *getHashingExpression() const;
UInt32 computeHashValue(char* data, UInt32 flags, Int32 len);
protected :
// ---------------------------------------------------------------------
// The number of partitions of the original (physical) partitioning
// function before any scaling.
// ---------------------------------------------------------------------
Lng32 numberOfOrigHashPartitions_;
// After any scaling
//
Lng32 numberOfPartitions_;
// ----------------------------------------------------------------------
// An order-sensitive representation for the partitioning keys.
// ----------------------------------------------------------------------
ValueIdList keyColumnList_;
// ---------------------------------------------------------------------
// The original keyColumnList_. This is different from keyColumnList_
// only if the original PartitioningFunction got remapped with remapIt.
// Used to determine the original data types of the key columns.
// Note that the original data types of a requirement must match the
// actual data types exactly if we want to match it with an actual table.
// ---------------------------------------------------------------------
ValueIdList originalKeyColumnList_;
NABoolean setupForStatement_;
NABoolean resetAfterStatement_;
private:
virtual ItemExpr *buildPartitioningExpression(
const ValueIdList &keyCols) const = 0;
virtual ItemExpr *buildPartitioningSelectionExpr(
const ValueIdList &keyCols,
ItemExpr *numParts) const = 0;
ItemExpr* createPartitioningExpressionImp(NABoolean doVarCharCast) ;
NABoolean doVarCharCast_;
}; // class TableHashPartitioningFunction
// -----------------------------------------------------------------------
// HASH DISTRIBUTION PARTITIONING FUNCTION for Hash Fragmentation of a table
// The external hash partitioning function.
// -----------------------------------------------------------------------
// -----------------------------------------------------------------------
// HashDistPartitioningFunction
// -----------------------------------------------------------------------
class HashDistPartitioningFunction : public TableHashPartitioningFunction
{
public:
// --------------------------------------------------------------------
// Constructor functions
// --------------------------------------------------------------------
HashDistPartitioningFunction(Lng32 numberOfHashPartitions,
NodeMap* nodeMap,
NAMemory* heap = CmpCommon::statementHeap())
: TableHashPartitioningFunction(HASH_DIST_PARTITIONING_FUNCTION,
numberOfHashPartitions, nodeMap, heap)
{};
HashDistPartitioningFunction(const ValueIdSet& partitioningKeyColumns,
const ValueIdList& partitioningKeyColumnList,
Lng32 numberOfHashPartitions,
NodeMap* nodeMap = 0,
NAMemory* heap = CmpCommon::statementHeap())
: TableHashPartitioningFunction(HASH_DIST_PARTITIONING_FUNCTION,
partitioningKeyColumns,
partitioningKeyColumnList,
numberOfHashPartitions,
nodeMap,
heap)
{}
HashDistPartitioningFunction(const HashDistPartitioningFunction& other,
NAMemory* heap = CmpCommon::statementHeap())
: TableHashPartitioningFunction(other, heap)
{}
// --------------------------------------------------------------------
// Destructor functions
// --------------------------------------------------------------------
virtual ~HashDistPartitioningFunction();
// ---------------------------------------------------------------------
// see base class for explanations of the virtual methods
// ---------------------------------------------------------------------
virtual const
HashDistPartitioningFunction* castToHashDistPartitioningFunction() const;
virtual PartitioningRequirement* makePartitioningRequirement();
virtual short codeGen(Generator* generator, Lng32 partInputDataLength);
virtual PartitioningFunction* copy() const;
virtual const NAString getText() const;
virtual void print( FILE* ofd = stdout,
const char* indent = DEFAULT_INDENT,
const char* title = "PartitioningFunction") const;
virtual PartitioningFunction *
createPartitioningFunctionForIndexDesc(IndexDesc *idesc) const;
virtual COMPARE_RESULT comparePartFuncToFunc
(const PartitioningFunction &other) const;
virtual PartitioningFunction *
scaleNumberOfPartitions(Lng32 &suggestedNewNumberOfPartitions,
PartitionGroupingDistEnum partGroupDist =
DEFAULT_PARTITION_GROUPING);
virtual NABoolean isAGroupingOf(const PartitioningFunction &other,
Lng32* maxPartsPerGroup = NULL) const;
protected:
private :
virtual ItemExpr *buildPartitioningExpression(const ValueIdList &keyCols) const;
virtual ItemExpr *buildPartitioningSelectionExpr(const ValueIdList &keyCols,
ItemExpr *numParts) const;
}; // class HashDistPartitioningFunction
// -----------------------------------------------------------------------
// Hash2PartitioningFunction
// -----------------------------------------------------------------------
class Hash2PartitioningFunction : public TableHashPartitioningFunction
{
public:
// --------------------------------------------------------------------
// Constructor functions
// --------------------------------------------------------------------
Hash2PartitioningFunction(Lng32 numberOfHashPartitions,
NodeMap* nodeMap,
NAMemory* heap = CmpCommon::statementHeap())
: TableHashPartitioningFunction(HASH2_PARTITIONING_FUNCTION,
numberOfHashPartitions, nodeMap, heap)
{}
Hash2PartitioningFunction(const ValueIdSet& partitioningKeyColumns,
const ValueIdList& partitioningKeyColumnList,
Lng32 numberOfHashPartitions,
NodeMap* nodeMap = 0,
NAMemory* heap = CmpCommon::statementHeap())
: TableHashPartitioningFunction(HASH2_PARTITIONING_FUNCTION,
partitioningKeyColumns,
partitioningKeyColumnList,
numberOfHashPartitions,
nodeMap,
heap)
{}
Hash2PartitioningFunction(const Hash2PartitioningFunction& other,
NAMemory* heap = CmpCommon::statementHeap())
: TableHashPartitioningFunction(other, heap)
{}
// --------------------------------------------------------------------
// Destructor functions
// --------------------------------------------------------------------
virtual ~Hash2PartitioningFunction();
// ---------------------------------------------------------------------
// see base class for explanations of the virtual methods
// ---------------------------------------------------------------------
virtual const
Hash2PartitioningFunction* castToHash2PartitioningFunction() const;
virtual PartitioningRequirement* makePartitioningRequirement();
virtual short codeGen(Generator* generator, Lng32 partInputDataLength);
virtual PartitioningFunction* copy() const;
virtual const NAString getText() const;
virtual void print( FILE* ofd = stdout,
const char* indent = DEFAULT_INDENT,
const char* title = "PartitioningFunction") const;
virtual PartitioningFunction *
createPartitioningFunctionForIndexDesc(IndexDesc *idesc) const;
virtual void
createPartSelectionExprFromSearchKey(const ValueId beginPartSelId,
const ValueId endPartSelId,
ValueIdList &partSelectionValIds) const;
virtual COMPARE_RESULT comparePartFuncToFunc
(const PartitioningFunction &other) const;
virtual PartitioningFunction *
scaleNumberOfPartitions(Lng32 &suggestedNewNumberOfPartitions,
PartitionGroupingDistEnum partGroupDist =
DEFAULT_PARTITION_GROUPING);
virtual NABoolean isAGroupingOf(const PartitioningFunction &other,
Lng32* maxPartsPerGroup = NULL) const;
NABoolean canHandleSkew() const { return TRUE; };
private :
ItemExpr *buildPartitioningExpression(const ValueIdList &keyCols) const;
virtual ItemExpr *buildPartitioningSelectionExpr(const ValueIdList &keyCols,
ItemExpr *numParts) const;
}; // class Hash2PartitioningFunction
// -----------------------------------------------------------------------
// SkewedDataPartitioningFunction
//
// A partitioning function describing the partitioning of skew data.
//
// One unique feature of this function is that its partitioning key
// contains a special value such that this function will
// not match with any other non skeweddata partfunc.
//
// In addition, this partitioning function contains a partital
// partitioning function describing the non-skewed data
// portion. This function is accessable through the virtual method
// getPartialPartitioningFunction(). All other non-skew partitioning functions
// implement this method by returning their original partitioning keys
// (i.e., getPartitioningKey() == getPartialPartitioningKey() for all non-skew
// partfuncs.
//
// Most required methods for this class are deligated to the contained
// partial partitioning function.
// -----------------------------------------------------------------------
class SkewedDataPartitioningFunction : public PartitioningFunction
{
public:
// --------------------------------------------------------------------
// Constructor functions
// --------------------------------------------------------------------
SkewedDataPartitioningFunction(PartitioningFunction* partFuncForUnskewed,
const skewProperty& sk,
NAMemory* heap = CmpCommon::statementHeap()
);
SkewedDataPartitioningFunction(const SkewedDataPartitioningFunction& other,
NAMemory* heap = CmpCommon::statementHeap());
// --------------------------------------------------------------------
// Destructor functions
// --------------------------------------------------------------------
virtual ~SkewedDataPartitioningFunction() {};
// get the partition key for the non-skewed data
const ValueIdSet& getPartialPartitioningKey() const
{ return partialPartFunc_ -> getPartitioningKey(); }
// get the partition function for the non-skewed data
const PartitioningFunction* getPartialPartitioningFunction() const
{ return partialPartFunc_; }
void createPartitioningKeyPredicates();
Lng32 getCountOfPartitions() const;
void createPIV(ValueIdList &partInputValues);
void replacePivs(const ValueIdList& newPivs,
const ValueIdSet& newPartKeyPreds);
// ---------------------------------------------------------------------
// see base class for explanations of the virtual methods
// ---------------------------------------------------------------------
virtual const
SkewedDataPartitioningFunction* castToSkewedDataPartitioningFunction() const
{ return this; };
virtual PartitioningRequirement* makePartitioningRequirement();
virtual PartitioningFunction* copyAndRemap
(ValueIdMap& map, NABoolean mapItUp) const;
ItemExpr* createPartitioningExpression();
virtual void preCodeGen(const ValueIdSet& availableValues);
virtual short codeGen(Generator* generator, Lng32 partInputDataLength);
virtual PartitioningFunction* copy() const;
virtual const NAString getText() const;
virtual void print( FILE* ofd = stdout,
const char* indent = DEFAULT_INDENT,
const char* title = "PartitioningFunction") const;
virtual COMPARE_RESULT comparePartFuncToFunc
(const PartitioningFunction &other) const;
virtual PartitioningFunction *
scaleNumberOfPartitions(Lng32 &suggestedNewNumberOfPartitions,
PartitionGroupingDistEnum partGroupDist =
DEFAULT_PARTITION_GROUPING);
virtual NABoolean isAGroupingOf(const PartitioningFunction &other,
Lng32* maxPartsPerGroup = NULL) const;
// Helper functions on skewed values
const skewProperty& getSkewProperty() const { return skewProperty_; };
void setSkewProperty(const skewProperty& sk) { skewProperty_ = sk; };
// A virtual method returning the hash value list for skewed values.
// Used during run-time to identify input rows containing any skew
// values.
virtual Int64List* buildHashListForSkewedValues();
// A virtual method returning a run-time hashing expression that hashes a
// skew value into a hash value. Called during codeGen phase.
ItemExpr *getHashingExpression() const
{ return partialPartFunc_-> getHashingExpression(); };
// A method returns the hash for a skew value
UInt32 computeHashValue(char* data, UInt32 flags, Int32 len)
{ return partialPartFunc_-> computeHashValue(data, flags, len); };
// A virtual method returning a compiler-time hashing expression that
// hashes a skew value into a hash value. Called during codeGen phase.
ItemExpr *buildHashingExpressionForExpr(ItemExpr* ie) const
{ return partialPartFunc_-> buildHashingExpressionForExpr(ie); };
protected:
// the partfunc describes the non-skewed part of the data
PartitioningFunction* partialPartFunc_;
// the description of the skewed part of the data
skewProperty skewProperty_;
// a hash list for skewed values to help speedy identification of skew
// values during run-time.
Int64List* skewHashList_;
}; // class SkewedDataPartitioningFunction
// -----------------------------------------------------------------------
// hiveHashPartitioningFunction
// -----------------------------------------------------------------------
class HivePartitioningFunction : public HashPartitioningFunction
{
public:
// --------------------------------------------------------------------
// Constructor functions
// --------------------------------------------------------------------
HivePartitioningFunction(Lng32 numberOfHashPartitions,
NodeMap* nodeMap,
NAMemory* heap = CmpCommon::statementHeap())
: HashPartitioningFunction(HIVE_PARTITIONING_FUNCTION, nodeMap,heap)
{}
HivePartitioningFunction(const ValueIdSet& partitioningKeyColumns,
const ValueIdList& partitioningKeyColumnList,
Lng32 numberOfHashPartitions,
NodeMap* nodeMap = 0,
NAMemory* heap = CmpCommon::statementHeap())
: HashPartitioningFunction(partitioningKeyColumns,
partitioningKeyColumnList,
numberOfHashPartitions,
nodeMap,
heap,
HIVE_PARTITIONING_FUNCTION)
{
}
HivePartitioningFunction(const HashPartitioningFunction& other,
NAMemory* heap = CmpCommon::statementHeap())
: HashPartitioningFunction(other,heap)
{
}
// --------------------------------------------------------------------
// Destructor functions
// --------------------------------------------------------------------
virtual ~HivePartitioningFunction();
// ---------------------------------------------------------------------
// see base class for explanations of the virtual methods
// ---------------------------------------------------------------------
virtual const
HivePartitioningFunction* castToHivePartitioningFunction() const
{ return this; } ;
virtual PartitioningRequirement* makePartitioningRequirement();
virtual PartitioningFunction* copy() const;
virtual void createPartitioningKeyPredicates();
// build the part expr using the part key columns as input
//virtual ItemExpr* createPartitioningExpression() ;
//virtual void remapIt(const PartitioningFunction* opf,
// ValueIdMap& map, NABoolean mapItUp);
//virtual short codeGen(Generator* generator, Lng32 partInputDataLength);
virtual const NAString getText() const;
virtual void print( FILE* ofd = stdout,
const char* indent = DEFAULT_INDENT,
const char* title = "PartitioningFunction") const;
ItemExpr * buildHashingExpressionForExpr(ItemExpr* expr) const;
ItemExpr *getHashingExpression() const;
UInt32 computeHashValue(char* data, UInt32 flags, Int32 len);
PartitioningFunction*
createPartitioningFunctionForIndexDesc(IndexDesc *idesc) const;
virtual NABoolean isAGroupingOf(const PartitioningFunction &other,
Lng32* maxPartsPerGroup = NULL) const;
virtual void normalizePartitioningKeys(NormWA& normWARef);
private :
}; // class HivePartitioningFunction
// -----------------------------------------------------------------------
// RANGE PARTITIONING FUNCTION
// -----------------------------------------------------------------------
// -----------------------------------------------------------------------
// A range partitioning scheme distributes data amongst a set of
// partitions according to a predefined range of values that each
// partition can contain. Each partition therefore publishes the
// minimum value that it can contain in each of the partitioning
// key column. We call a tuple of minimum permissible key values
// for a partition, a partition boundary. A range-partitioned table
// with n partition has n+1 boundaries, numbered 0 through n, where
// boundary n is the maximum possible partitioning key value in the table.
//
// If the number of partitions is n, then there are n+1 partition
// boundaries. Entries 1 through n-1 define the actual boundaries
// between two consecutive partitions. Entries 0 and n contain the
// minimum and maximum permissible values, respectively. They are
// initialized to NULL in the constructor and updated later by the
// method RangePartitioningFunction::completePartitionBoundaries()
// -----------------------------------------------------------------------
class RangePartitionBoundaries : public NABasicObject
{
public:
// --------------------------------------------------------------------
// Constructor functions
// Allocate an array with 'numberOfPartitions+1' elements.
// --------------------------------------------------------------------
RangePartitionBoundaries(Lng32 numberOfPartitions,
Lng32 numberOfPartitioningKeyColumns,
NAMemory *h = CmpCommon::statementHeap());
// copy constructor
RangePartitionBoundaries(const RangePartitionBoundaries& other,
NAMemory *h = CmpCommon::statementHeap())
: partKeyColumnCount_(other.partKeyColumnCount_),
origPartKeyColumnCount_(other.origPartKeyColumnCount_),
boundaryValuesList_(other.boundaryValuesList_,h),
boundaryStringsList_(other.boundaryStringsList_,h),
boundaryValues_(other.boundaryValues_,h),
binaryBoundaryValues_(other.binaryBoundaryValues_,h),
partitionCount_(other.partitionCount_),
origPartitionCount_(other.origPartitionCount_),
encodedBoundaryKeyLength_(other.encodedBoundaryKeyLength_),
setupForStatement_(other.setupForStatement_),
resetAfterStatement_(other.resetAfterStatement_),
setBinaryBoundaryFirstLastKey_(other.setBinaryBoundaryFirstLastKey_),
heap_ (h)
{}
// --------------------------------------------------------------------
// Destructor functions
// --------------------------------------------------------------------
virtual ~RangePartitionBoundaries();
// --------------------------------------------------------------------
// Each partition boundary is a tuple that contains as many values
// as there are partitioning key columns.
// --------------------------------------------------------------------
void defineUnboundBoundary(Lng32 partitionNumber,
const ItemExpr* boundaryValue,
const char *encodedKeyValue);
// --------------------------------------------------------------------
// Each partition boundary is a tuple that contains as many values
// as there are partitioning key columns.
// --------------------------------------------------------------------
void defineBoundary(Lng32 partitionNumber,
const ItemExprList* boundaryValue,
const char *encodedKeyValue);
// --------------------------------------------------------------------
// bind a unbound boundary value and add it to the list of
// boundaryValues_
// --------------------------------------------------------------------
void bindAddBoundaryValue(Lng32 partitionNumber);
void checkConsistency(const Lng32 numberOfPartitions) const;
// ---------------------------------------------------------------------
// The number of partitions = the number of partition boundaries.
// ---------------------------------------------------------------------
Lng32 getCountOfPartitions() const { return partitionCount_; }
// ---------------------------------------------------------------------
// the length of the encoded partition boundary key
// ---------------------------------------------------------------------
Lng32 getEncodedBoundaryKeyLength() const
{ return encodedBoundaryKeyLength_; }
// ---------------------------------------------------------------------
// Like for partitioning functions, change the number of partitions
// to be close to a suggested new value and return the chosen new value.
// ---------------------------------------------------------------------
Lng32 scaleNumberOfPartitions(Lng32 suggestedNewNumberOfPartitions,
const NodeMap* nodeMap,
PartitionGroupingDistEnum partGroupDist =
DEFAULT_PARTITION_GROUPING);
// ---------------------------------------------------------------------
// Check whether one set of boundaries is a grouping of another
// ---------------------------------------------------------------------
NABoolean isAGroupingOf(const RangePartitionBoundaries &other,
Lng32* maxPartsPerGroup = NULL) const;
// --------------------------------------------------------------------
// Indexing operator
// --------------------------------------------------------------------
const ItemExprList* getBoundaryValues(Lng32 index) const;
const char * getBinaryBoundaryValue(Lng32 index) const;
// ---------------------------------------------------------------------
// A method that is used for comparing two range partition boundaries
// by the optimizer.
// ---------------------------------------------------------------------
NABoolean compareRangePartitionBoundaries(
const RangePartitionBoundaries& other,
NABoolean groupingAllowed = FALSE,
Lng32* maxPartsPerGroup = NULL) const;
// ---------------------------------------------------------------------
// Merge two compatible sets of boundaries and produce a corresponding
// node map for the new boundaries.
// ---------------------------------------------------------------------
RangePartitionBoundaries * merge(
const RangePartitionBoundaries& other,
const NodeMap& thisNodeMap,
NodeMap& resultNodeMap ) const;
// ---------------------------------------------------------------------
// Determine the minimum number of partitioning keys based on the
// start key values that are specified. Columns for which no explicit
// start key values are specified need not be part of the part key
// ---------------------------------------------------------------------
Lng32 getOptimizedNumberOfPartKeys();
// ---------------------------------------------------------------------
// Add the start boundary for the first partition (min key) and the
// end boundary for the last range partition (max key).
// This method is defined in PartFunc.cpp
// ---------------------------------------------------------------------
void completePartitionBoundaries(const ValueIdList& partitioningKeyOrder,
Lng32 encodedBoundaryKeyLength);
// find a boundary pair [low, high) with smallest low value in which keys fall, and return the
// // index of the boundary low. Return -1 otherwise, or the key lengths are different.
Int32 findBeginBoundary(char* encodedKey, Int32 keyLen, compFuncPtrT compFunc) const;
// find a boundary pair [low, high) with the largest low value in which keys fall, and return the
// // index of the boundary low. Return -1 otherwise, or the key lengths are different.
Int32 findEndBoundary(char* encodedKey, Int32 keyLen, compFuncPtrT compFunc) const;
void setupForStatement(NABoolean useStringVersion);
void resetAfterStatement();
// ---------------------------------------------------------------------
// Print
// ---------------------------------------------------------------------
void display() const { print(); }
void print( FILE* ofd = stdout,
const char* indent = DEFAULT_INDENT,
const char* title = "RangePartitionBoundaries") const;
private:
// --------------------------------------------------------------------
// This data is maintained simply to check the consistency of the
// values supplied for each partition boundary.
// --------------------------------------------------------------------
Lng32 partKeyColumnCount_;
Lng32 origPartKeyColumnCount_;
ARRAY(const ItemExpr *) boundaryValuesList_;
// The string list stores the string version of the boundary values
// needed to reconstruct boundaryValuesList_ after one compilation
// a statement.
// Each string is a comma separated list of SQL constants, representing
// one partition boundary.
ARRAY(const NAString*) boundaryStringsList_;
// --------------------------------------------------------------------
// Each partition boundary is a tuple that contains as many values
// as there are key columns.
// --------------------------------------------------------------------
ARRAY(const ItemExprList *) boundaryValues_;
ARRAY(const char *) binaryBoundaryValues_;
Lng32 partitionCount_;
Lng32 origPartitionCount_;
// --------------------------------------------------------------------
// length of the encoded boundary keys
// --------------------------------------------------------------------
Lng32 encodedBoundaryKeyLength_;
NABoolean setupForStatement_;
NABoolean resetAfterStatement_;
NABoolean setBinaryBoundaryFirstLastKey_;
//heap
NAMemory * heap_;
}; // class RangePartitionBoundaries
// -----------------------------------------------------------------------
// RangePartitioningFunction
// -----------------------------------------------------------------------
class RangePartitioningFunction : public PartitioningFunction
{
public:
// --------------------------------------------------------------------
// Constructor functions
// --------------------------------------------------------------------
RangePartitioningFunction(RangePartitionBoundaries* partitionBoundaries,
NodeMap* nodeMap,
NAMemory* heap = CmpCommon::statementHeap())
: PartitioningFunction(RANGE_PARTITIONING_FUNCTION,
nodeMap,
heap),
partitionBoundaries_(partitionBoundaries),
setupForStatement_(FALSE),
resetAfterStatement_(FALSE)
{}
RangePartitioningFunction(const ValueIdSet& partitioningKeyColumns,
const ValueIdList& partitioningKeyColumnsList,
const ValueIdList& partitioningKeyColumnsOrder,
RangePartitionBoundaries* partitionBoundaries,
NodeMap* nodeMap,
NAMemory* heap = CmpCommon::statementHeap())
: PartitioningFunction(RANGE_PARTITIONING_FUNCTION,
partitioningKeyColumns,
nodeMap,
heap),
keyColumnList_(partitioningKeyColumnsList),
orderOfKeyValues_(partitioningKeyColumnsOrder),
originalKeyColumnList_(partitioningKeyColumnsList),
partitionBoundaries_(partitionBoundaries),
setupForStatement_(FALSE),
resetAfterStatement_(FALSE)
{
// MUST be given some partitioning keys and partition boundaries.
CMPASSERT(partitioningKeyColumns.entries() AND
partitioningKeyColumnsList.entries() AND
partitioningKeyColumnsList.entries() ==
partitioningKeyColumnsOrder.entries() AND
partitionBoundaries);
}
RangePartitioningFunction(const RangePartitioningFunction& other,
NAMemory* heap = CmpCommon::statementHeap());
// --------------------------------------------------------------------
// Destructor functions
// --------------------------------------------------------------------
virtual ~RangePartitioningFunction();
// ---------------------------------------------------------------------
// Perform a type-safe pointer cast.
// ---------------------------------------------------------------------
virtual const
RangePartitioningFunction* castToRangePartitioningFunction() const;
// ---------------------------------------------------------------------
// The number of range partitions that will be formed using this scheme.
// ---------------------------------------------------------------------
virtual Lng32 getCountOfPartitions() const;
// ---------------------------------------------------------------------
// Accessor method for the list of key columns.
// ---------------------------------------------------------------------
const ValueIdList& getKeyColumnList() const { return keyColumnList_; }
// ---------------------------------------------------------------------
// Accessor method for the range partition boundaries.
// ---------------------------------------------------------------------
const RangePartitionBoundaries* getRangePartitionBoundaries() const
{ return partitionBoundaries_; }
// ---------------------------------------------------------------------
// A list of expression that define the sort order on the values
// that are contained in each key column.
// If the values in a certain key column appear in the ascending
// sequence, it contains the ValueId of the key column. Otherwise,
// it contains the ValueId of an Inverse(key column) expression.
// ---------------------------------------------------------------------
const ValueIdList& getOrderOfKeyValues() const
{ return orderOfKeyValues_; }
// ---------------------------------------------------------------------
// see base class for explanations of the virtual methods
// ---------------------------------------------------------------------
virtual COMPARE_RESULT comparePartFuncToFunc
(const PartitioningFunction &other) const;
virtual NABoolean isAGroupingOf(const PartitioningFunction &other,
Lng32* maxPartsPerGroup = NULL) const;
virtual PartitioningRequirement* makePartitioningRequirement();
virtual PartitioningFunction* copy() const;
virtual void normalizePartitioningKeys(NormWA& normWARef);
virtual void createPartitioningKeyPredicates();
// Replace the pivs, partitioning key predicates and partitioning
// expression with those passed in.
virtual void replacePivs(
const ValueIdList& newPivs,
const ValueIdSet& newPartKeyPreds);
virtual PartitioningFunction *
scaleNumberOfPartitions(Lng32 &suggestedNewNumberOfPartitions,
PartitionGroupingDistEnum partGroupDist =
DEFAULT_PARTITION_GROUPING);
virtual void remapIt(const PartitioningFunction* opf,
ValueIdMap& map, NABoolean mapItUp);
virtual PartitioningFunction* createPartitioningFunctionForIndexDesc
(IndexDesc *idesc) const;
virtual ItemExpr* createPartitioningExpression() ;
virtual void preCodeGen(const ValueIdSet& availableValues);
virtual short codeGen(Generator* generator, Lng32 partInputDataLength);
virtual void generatePivLayout(Generator *generator,
Lng32 &partitionInputDataLength,
Lng32 atp,
Lng32 atpIndex,
Attributes ***pivAttrs);
void setupForStatement();
void resetAfterStatement();
// Make a new partSearchKey with the partitioning key preds of
// the partitioning function, if there are any. Note that ignoring
// the part key preds will result in a wrong answer if we use
// PA_PARTITION_GROUPING, since the PA node is the node responsible
// for the grouping. If it doesn't select a subgroup of partitions,
// too much data may be returned. For now we only consider a
// search key for the PA node, MDAM to be implemented later.
// MDAM will be useful for combining user-specified part key preds
// with logicalPartFunc->getPartitioningKeyPredicates().
virtual SearchKey *createSearchKey(const IndexDesc *indexDesc,
ValueIdSet availInputs,
ValueIdSet additionalPreds) const;
virtual NABoolean usesFSForPartitionSelection() const { return TRUE; }
virtual NABoolean shouldUseSynchronousAccess(
const ReqdPhysicalProperty* rpp,
const EstLogPropSharedPtr& inputLogProp,
GroupAttributes* ga) const;
NABoolean
partFuncAndFuncPushDownCompatible(const PartitioningFunction&) const;
// ---------------------------------------------------------------------
// Compute the number of active partitions. Active partitions are those
// that will be accessed applying the search key skey.
// ---------------------------------------------------------------------
Int32 computeNumOfActivePartitions(SearchKey* skey, const TableDesc* tDesc) const;
virtual const NAString getText() const;
virtual void print( FILE* ofd = stdout,
const char* indent = DEFAULT_INDENT,
const char* title = "PartitioningFunction") const;
private:
// ----------------------------------------------------------------------
// An order-sensitive representation for the keys.
// ----------------------------------------------------------------------
ValueIdList keyColumnList_;
// --------------------------------------------------------------------
// If the values in a certain key column appear in the ascending
// sequence, it contains the ValueId of the key column. Otherwise,
// it contains the ValueId of an Inverse(key column) expression.
// --------------------------------------------------------------------
ValueIdList orderOfKeyValues_;
// ---------------------------------------------------------------------
// The original keyColumnList_. This is different from keyColumnList_
// only if the original PartitioningFunction got remapped with remapIt.
// Used to determine the original data types of the key columns.
// ---------------------------------------------------------------------
ValueIdList originalKeyColumnList_;
// ----------------------------------------------------------------------
// The partition boundaries for range-partitioned data.
// ----------------------------------------------------------------------
RangePartitionBoundaries* partitionBoundaries_;
NABoolean setupForStatement_;
NABoolean resetAfterStatement_;
}; // class RangePartitioningFunction
// -----------------------------------------------------------------------
// LogPhysPartitioningFunction
// -----------------------------------------------------------------------
// -----------------------------------------------------------------------
// This partitioning function is actually an overlay of two different
// partitioning functions and it is generated by a DP2 scan node.
// This partitioning function reflects both the physical organization
// of the data in the DP2 partitions and the logical partitioning of
// the result, represented by the data returned by one or more DP2
// exchange nodes.
// -----------------------------------------------------------------------
class LogPhysPartitioningFunction : public PartitioningFunction
{
public:
// ---------------------------------------------------------------------
// An enumerated type that indicates the type of logical partitioning.
//
// ANY_LOGICAL_PARTITIONING
// This literal only occurs in requirements as a "don't care"
// entry. It is never actually synthesized.
//
// PA_PARTITION_GROUPING
// In this case, the PA node on top of a DP2 scan groups one or
// more DP2 partitions and represents them to its parent as a
// single partitions. No two PA nodes access the same DP2 partition.
// This also covers the most simple cases where there is only one
// DP2 partition and one PA or where the PA's partitioning function
// is the same as the table's "natural" partitioning function.
//
// LOGICAL_SUBPARTITIONING
// More than one PA may access a given DP2 partition and a PA may
// also access more than one DP2 partition. The PA nodes divide
// the table into exclusive ranges that are defined by the
// clustering key of the table which is also the partitioning key.
//
// HORIZONTAL_PARTITION_SLICING
// Similar to LOGICAL_SUBPARTITIONING, except that the clustering
// key is not the partitioning key of the table.
//
// PA_GROUPED_REPARTITIONING
// The PA nodes on top of the DP2 scan will perform
// PA_PARTITION_GROUPING to read the data and then repartition it,
// using an additional ESP exchange on top of the PA node. The number
// of ESPs that read DP2 data and repartition is determined by the
// DP2 exchange in this case, only the number of clients (PA nodes)
// is determined by the DP2 scan node.
//
// ---------------------------------------------------------------------
enum logPartType
{
ANY_LOGICAL_PARTITIONING,
PA_PARTITION_GROUPING,
LOGICAL_SUBPARTITIONING,
HORIZONTAL_PARTITION_SLICING,
PA_GROUPED_REPARTITIONING
};
// --------------------------------------------------------------------
// Constructor functions
// --------------------------------------------------------------------
LogPhysPartitioningFunction(
PartitioningFunction * logPartFunc,
PartitioningFunction * physPartFunc,
logPartType logPartType,
Lng32 numOfClients,
NABoolean usePapa,
NABoolean synchronousAccess,
NAMemory* heap = CmpCommon::statementHeap());
LogPhysPartitioningFunction (const LogPhysPartitioningFunction& other,
NAMemory* heap = CmpCommon::statementHeap())
: PartitioningFunction(other,heap),
logPartFunc_(other.logPartFunc_),physPartFunc_(other.physPartFunc_),
realPartFunc_(other.realPartFunc_),
logPartType_(other.logPartType_),numOfClients_(other.numOfClients_),
usePapa_(other.usePapa_),
synchronousAccess_(other.synchronousAccess_)
{}
// --------------------------------------------------------------------
// Destructor functions
// --------------------------------------------------------------------
virtual ~LogPhysPartitioningFunction();
// ---------------------------------------------------------------------
// accessor methods
// ---------------------------------------------------------------------
inline PartitioningFunction * getLogPartitioningFunction() const
{ return logPartFunc_; }
inline PartitioningFunction * getPhysPartitioningFunction() const
{ return physPartFunc_; }
inline PartitioningFunction * getRealPartitioningFunction() const
{ return realPartFunc_; }
inline logPartType getLogPartType() const { return logPartType_; }
inline Lng32 getNumOfClients() const { return numOfClients_; }
inline NABoolean getUsePapa() const { return usePapa_; }
inline NABoolean getSynchronousAccess() const
{ return synchronousAccess_; }
// ---------------------------------------------------------------------
// mutator methods
// ---------------------------------------------------------------------
inline void setNumOfClients(Lng32 numOfClients)
{ numOfClients_ = numOfClients; }
// ---------------------------------------------------------------------
// see base class for explanations of the virtual methods
// ---------------------------------------------------------------------
virtual const
LogPhysPartitioningFunction * castToLogPhysPartitioningFunction() const;
virtual Lng32 getCountOfPartitions() const;
virtual PartitioningRequirement* makePartitioningRequirement();
// ---------------------------------------------------------------------
// Retrieve a reference to partitioning function node map.
// ---------------------------------------------------------------------
virtual const NodeMap* getNodeMap() const;
// get any existing (logical or physical) nodemap (or synthesize one) that
// matches logPartFunc_'s partition count requirement
NodeMap* getOrMakeSuitableNodeMap(NABoolean forESP) const;
virtual PartitioningFunction* copy() const;
virtual NABoolean canProducePartitioningKeyPredicates() const; // yes
virtual void createPartitioningKeyPredicates();
// Replace the pivs, partitioning key predicates and partitioning
// expression with those passed in.
virtual void replacePivs(
const ValueIdList& newPivs,
const ValueIdSet& newPartKeyPreds);
virtual ItemExpr* createPartitioningExpression() ;
PartitioningFunction* createRealPartitioningFunction();
// Can this logPhys partitioning function maintain the order of an
// individual partition of the physical partitioning function. In
// order to maintain the order, a merge expression may be required.
//
NABoolean canMaintainSortOrder(const ValueIdList& sortOrder) const;
virtual COMPARE_RESULT comparePartFuncToFunc
(const PartitioningFunction &other) const;
virtual NABoolean isAGroupingOf(const PartitioningFunction &other,
Lng32* maxPartsPerGroup = NULL) const;
virtual void remapIt(const PartitioningFunction* opf,
ValueIdMap& map, NABoolean mapItUp);
virtual short codeGen(Generator* generator, Lng32 partInputDataLength);
virtual const NAString getText() const;
//combined output of getLogForSplitTop and getPhysForSplitTop is similar
//to getText, however, they are used whenever LogPhysPartitioningFunction
//is the bottom partitioning function of the split_top node
virtual const NAString getLogForSplitTop() const;
virtual const NAString getPhysForSplitTop() const;
virtual void print(
FILE* ofd = stdout,
const char* indent = DEFAULT_INDENT,
const char* title = "PartitioningFunction") const;
private:
// ---------------------------------------------------------------------
// The "logical" part of the partitioning function. This is the top
// partitioning function that the DP2 exchange will produce. Its
// number of partitions indicates the number of processes with a PA
// node and therefore the number of partitions in the ESP (or
// master) in the non-DP2 plan fragment above. If we perform
// PA_GROUPED_REPARTITIONING then the logical partitioning function
// is the top partitioning function of the ESP exchange.
// ---------------------------------------------------------------------
PartitioningFunction * logPartFunc_;
// ---------------------------------------------------------------------
// The "physical" part of the partitioning function. It describes
// the partitioning scheme of the DP2 table (or the DP2 tables in
// case we are performing a join in DP2).
// ---------------------------------------------------------------------
PartitioningFunction * physPartFunc_;
// ---------------------------------------------------------------------
// The "real" partitioning scheme is calculated as an overlay of the
// logical and physical partitioning scheme, except for
// PA_GROUPED_REPARTITIONING where it is a grouping of the physical
// partitioning function.
//
// For example, if both logical and physical partitioning function
// are range partitioning functions on the clustering key and if we
// are not performing PA_GROUPED_REPARTITIONING, then the "real"
// partitioning function can be obtained by combining the partition
// boundaries of both logical and physical partitioning function.
// This would be needed to determine the partitioning key.
//
// Note that for any correctness conditions for operators in DP2, the
// real partitioning function is what counts. So, if we want to
// perform a type 1 join in DP2, we have to make sure that the *real*
// partitioning functions of both its children are equivalent and
// have the equi-join columns as their partitioning keys.
// ---------------------------------------------------------------------
PartitioningFunction * realPartFunc_;
// ---------------------------------------------------------------------
// The logical partitioning type indicates to the DP2 exchange above
// which type of logical partitioning should be performed. See the
// comment above in the enum declaration about possible types.
// ---------------------------------------------------------------------
logPartType logPartType_;
// ---------------------------------------------------------------------
// The number of clients is the actual number of PA nodes used. This
// does not need to be identical to the number of logical partitions,
// if we use PAPA nodes. If the number of clients is different from the
// number of logical partitions, it must be a multiple of the number
// of logical partitions. We will then generate a PAPA node with
// #log parts / numOfClients_ PA nodes.
// ---------------------------------------------------------------------
Lng32 numOfClients_;
// ---------------------------------------------------------------------
// Indicator whether to use a PAPA node. The number of partitions of
// the logical partitioning function divided by the number of clients
// should already indicate when it is necessary to use a PAPA. This
// flag is always set when the above ratio is > 1, and it may be set
// when we want a PAPA with only one PA underneath it (e.g. for
// certain situations where a PAPA is needed, like insert VSBB).
// ---------------------------------------------------------------------
NABoolean usePapa_;
// ---------------------------------------------------------------------
// Indicator whether synchronous access will be used to satisfy a
// required order or arrangement that came from above the DP2 exchange.
// ---------------------------------------------------------------------
NABoolean synchronousAccess_;
}; // class LogPhysPartitioningFunction
// -----------------------------------------------------------------------
// ROUND ROBIN PARTITIONING FUNCTION
// -----------------------------------------------------------------------
// -----------------------------------------------------------------------
// A Round Robin partitioning scheme distributes data amongst a set of
// partitions in a random fashion so as to balance the number of records
// in each partition.
// -----------------------------------------------------------------------
class RoundRobinPartitioningFunction : public PartitioningFunction
{
public:
// --------------------------------------------------------------------
// Constructor functions
// --------------------------------------------------------------------
RoundRobinPartitioningFunction(const Lng32 partitionCount,
NodeMap* nodeMap,
NAMemory* heap = CmpCommon::statementHeap())
: PartitioningFunction(ROUND_ROBIN_PARTITIONING_FUNCTION,
nodeMap,
heap),
numberOfOrigRRPartitions_(partitionCount),
partitionCount_(partitionCount),
setupForStatement_(FALSE),
resetAfterStatement_(FALSE)
{}
RoundRobinPartitioningFunction(const Lng32 partitionCount,
const ValueIdSet& partitioningKeyColumns,
NodeMap* nodeMap,
NAMemory* heap = CmpCommon::statementHeap())
: PartitioningFunction(ROUND_ROBIN_PARTITIONING_FUNCTION,
partitioningKeyColumns,
nodeMap,
heap),
numberOfOrigRRPartitions_(partitionCount),
partitionCount_(partitionCount),
setupForStatement_(FALSE),
resetAfterStatement_(FALSE)
{
// MUST be given one (SYSKEY) column as the partitioning key and
// a greater than zero number or partitions
//
CMPASSERT((getPartitioningKey().entries() == 1) AND
(partitionCount_ > 0));
}
RoundRobinPartitioningFunction(const RoundRobinPartitioningFunction& other,
NAMemory* heap = CmpCommon::statementHeap())
: PartitioningFunction(other,heap),
numberOfOrigRRPartitions_(other.numberOfOrigRRPartitions_),
partitionCount_(other.partitionCount_),
setupForStatement_(other.setupForStatement_),
resetAfterStatement_(other.resetAfterStatement_)
{
// MUST be given one (SYSKEY) column as the partitioning key and
// a greater than zero number or partitions
//
CMPASSERT((getPartitioningKey().entries() == 1) AND
(partitionCount_ > 0));
}
// --------------------------------------------------------------------
// Destructor functions
// --------------------------------------------------------------------
virtual ~RoundRobinPartitioningFunction() {}
// ---------------------------------------------------------------------
// see base class for explanations of the virtual methods
// ---------------------------------------------------------------------
virtual const RoundRobinPartitioningFunction *
castToRoundRobinPartitioningFunction() const { return this; }
virtual Lng32 getCountOfPartitions() const { return partitionCount_; }
virtual PartitioningRequirement* makePartitioningRequirement();
virtual PartitioningFunction* copy() const;
virtual COMPARE_RESULT comparePartFuncToFunc
(const PartitioningFunction &other) const;
virtual NABoolean isAGroupingOf(const PartitioningFunction &other,
Lng32* maxPartsPerGroup = NULL) const;
virtual void createPartitioningKeyPredicates();
// Replace the pivs, partitioning key predicates and partitioning
// expression with those passed in.
virtual void replacePivs(
const ValueIdList& newPivs,
const ValueIdSet& newPartKeyPreds);
virtual PartitioningFunction *
scaleNumberOfPartitions(Lng32 &suggestedNewNumberOfPartitions,
PartitionGroupingDistEnum partGroupDist =
DEFAULT_PARTITION_GROUPING);
virtual PartitioningFunction* createPartitioningFunctionForIndexDesc
(IndexDesc *idesc) const;
virtual ItemExpr* createPartitioningExpression() ;
virtual ItemExpr *
createPartitionSelectionExpr(const SearchKey *partSearchKey,
const ValueIdSet &availableValues);
virtual short codeGen(Generator* generator, Lng32 partInputDataLength);
// Make a new partSearchKey that indicates that
// PA_PARTITION_GROUPING is being done. Note that a search key can
// not be generated which can group RR partitions. For
// RoundRobinPartitioning, a flag in the search key is used to
// indicate that PA_PARTITION_GROUPING is being done and the
// begin/end key values of the search key are set to the partition
// input values of the partitioning function.
virtual SearchKey *createSearchKey(const IndexDesc *indexDesc,
ValueIdSet availInputs,
ValueIdSet additionalPreds) const;
// ---------------------------------------------------------------------
// The original (physical) number of RR partitions before any scaling.
// ---------------------------------------------------------------------
Lng32 getCountOfOrigRRPartitions() const { return numberOfOrigRRPartitions_;};
void setupForStatement();
void resetAfterStatement();
virtual const NAString getText() const;
virtual void print(
FILE* ofd = stdout,
const char* indent = DEFAULT_INDENT,
const char* title = "PartitioningFunction") const;
private:
// ---------------------------------------------------------------------
// The number of partitions of the original (physical) partitioning
// function before any scaling.
// ---------------------------------------------------------------------
Lng32 numberOfOrigRRPartitions_;
// ----------------------------------------------------------------------
// The number of partitions.
// ----------------------------------------------------------------------
Lng32 partitionCount_;
NABoolean setupForStatement_;
NABoolean resetAfterStatement_;
}; // class RoundRobinPartitioningFunction
//
#endif /* PARTFUNC_H */