blob: 51bb6fbf8bb9968bec333a55db2688fba2e8658b [file] [log] [blame]
/**********************************************************************
// @@@ START COPYRIGHT @@@
//
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
//
// @@@ END COPYRIGHT @@@
**********************************************************************/
/* -*-C++-*-
******************************************************************************
*
* File: NAFileSet.C
* Description: Implementations for the FileSet class.
* Created: 12/27/95
* Language: C++
*
*
*
*
******************************************************************************
*/
// -----------------------------------------------------------------------
#include "Sqlcomp.h"
#include "PartFunc.h"
#include "ComSysUtils.h"
#include "NAFileSet.h"
#include "opt.h"
#include "HDFSHook.h"
#include "CliSemaphore.h"
#include "ExpHbaseDefs.h"
NAFileSet::NAFileSet(const QualifiedName & fileSetName,
const QualifiedName & extFileSetObj,
const NAString & extFileSetName,
enum FileOrganizationEnum org,
NABoolean isSystemTable,
Lng32 numberOfFiles,
Cardinality estimatedNumberOfRecords,
Lng32 recordLength,
Lng32 blockSize,
Int32 indexLevels,
const NAColumnArray & allColumns,
const NAColumnArray & indexKeyColumns,
const NAColumnArray & horizontalPartKeyColumns,
PartitioningFunction * forHorizontalPartitioning,
short keytag,
Int64 redefTime,
NABoolean audited,
NABoolean auditCompressed,
NABoolean compressed,
ComCompressionType dcompressed,
NABoolean icompressed,
NABoolean buffered,
NABoolean clearOnPurge,
NABoolean packedRows,
NABoolean hasRemotePartition,
NABoolean isUniqueSecondaryIndex,
NABoolean isDecoupledRangePartitioned,
Lng32 fileCode,
NABoolean isVolatile,
NABoolean inMemObjectDefn,
Int64 indexUID,
TrafDesc *keysDesc,
HHDFSTableStats *hHDFSTableStats,
Lng32 numSaltPartns,
NAList<HbaseCreateOption*>* hbaseCreateOptions,
CollHeap * h)
: fileSetName_(fileSetName, h),
extFileSetObj_(extFileSetObj, h),
extFileSetName_(extFileSetName, h),
fileOrganization_(org),
isSystemTable_(isSystemTable),
countOfFiles_(numberOfFiles),
estimatedNumberOfRecords_(estimatedNumberOfRecords),
recordLength_(recordLength),
blockSize_(blockSize),
indexLevels_(indexLevels),
allColumns_(allColumns, h),
indexKeyColumns_(indexKeyColumns, h),
partitioningKeyColumns_(horizontalPartKeyColumns, h),
partFunc_(forHorizontalPartitioning),
keytag_(keytag),
redefTime_(redefTime),
audited_(audited),
auditCompressed_(auditCompressed),
compressed_(compressed),
dcompressed_(dcompressed),
icompressed_(icompressed),
buffered_(buffered),
clearOnPurge_(clearOnPurge),
packedRows_(packedRows),
hasRemotePartition_(hasRemotePartition),
setupForStatement_(FALSE),
resetAfterStatement_(FALSE),
bitFlags_(0),
keyLength_(0),
encodedKeyLength_(0),
thisRemoteIndexGone_(FALSE),
isDecoupledRangePartitioned_(isDecoupledRangePartitioned),
fileCode_(fileCode),
indexUID_(indexUID),
keysDesc_(keysDesc),
hHDFSTableStats_(hHDFSTableStats),
numSaltPartns_(numSaltPartns),
hbaseCreateOptions_(hbaseCreateOptions),
numMaxVersions_(1)
{
setUniqueIndex(isUniqueSecondaryIndex);
setIsVolatile(isVolatile);
setInMemoryObjectDefn(inMemObjectDefn);
if (hbaseCreateOptions_)
{
for (Lng32 i = 0; i < hbaseCreateOptions_->entries(); i++)
{
HbaseCreateOption * hco = (*hbaseCreateOptions_)[i];
if (hco->key() == "MAX_VERSIONS")
{
numMaxVersions_ = atoInt64(hco->val().data());
}
}
}
}
NAFileSet::~NAFileSet()
{
delete partFunc_;
if (hHDFSTableStats_)
delete hHDFSTableStats_;
}
NABoolean NAFileSet::isPartitioned() const
{
return partFunc_ && partFunc_->getCountOfPartitions() > 1;
}
// returns the length of the key in bytes for this index
Lng32 NAFileSet::getKeyLength()
{
if(keyLength_ >0) return keyLength_;
for(CollIndex i=0;i<indexKeyColumns_.entries();i++)
{
keyLength_ += indexKeyColumns_[i]->getType()->getTotalSize();
}
return keyLength_;
}
// returns the length of the encoded key in bytes for this index
Lng32 NAFileSet::getEncodedKeyLength()
{
if(encodedKeyLength_ >0) return encodedKeyLength_;
for(CollIndex i=0;i<indexKeyColumns_.entries();i++)
{
encodedKeyLength_ += indexKeyColumns_[i]->getType()->getEncodedKeyLength();
}
return encodedKeyLength_;
}
Lng32 NAFileSet::getCountOfPartitions() const
{
return partFunc_ ? partFunc_->getCountOfPartitions() : 1;
}
NABoolean NAFileSet::containsPartition(const NAString &partitionName) const
{
return partFunc_ &&
partFunc_->getNodeMap()->containsPartition(partitionName);
}
NABoolean NAFileSet::isSyskeyLeading() const
{
return (indexKeyColumns_.entries() > 0 &&
indexKeyColumns_[0]->getPosition() == 0 &&
indexKeyColumns_[0]->isSyskeyColumn());
}
Int32 NAFileSet::getSysKeyPosition() const
{
for(CollIndex i=0;i<indexKeyColumns_.entries();i++)
{
if ( indexKeyColumns_[i]->isSyskeyColumn() )
return i;
}
return -1;
}
NABoolean NAFileSet::hasSyskey() const
{
// check the NAColumn class of the key column
// ++MV - 7/3/01 bug fix
// If the table only contains a clustering key, it is last.
// If it contains a clustering and hash key and the hash key has columns
// not in the clustering key, then the system key falls between the store
// by columns and the hash columns.
Int32 numKeyCols = indexKeyColumns_.entries();
if (indexKeyColumns_[numKeyCols - 1]->isSyskeyColumn())
return TRUE;
// the array element index right before the first partition key col in the
// indexKeyColumns_[]
Int32 otherSyskeyLoc = numKeyCols - partitioningKeyColumns_.entries() - 1;
return ( otherSyskeyLoc >= 0 ) ?
indexKeyColumns_[otherSyskeyLoc]->isSyskeyColumn()
: FALSE;
}
NABoolean NAFileSet::hasOnlySyskey() const
{
// Syskey is the only clustering key
return ((indexKeyColumns_.entries() == 1) &&
(indexKeyColumns_[0]->isSyskeyColumn()));
}
NABoolean NAFileSet::hasSingleColVarcharKey() const
{
// clustering key is single column varchar
return ((indexKeyColumns_.entries() == 1) &&
(indexKeyColumns_[0]->getType()->isVaryingLen()));
}
//cleanup after statement, so that this can be used for the next statement
void NAFileSet::resetAfterStatement()
{
if(resetAfterStatement_)
return;
if(partFunc_)
partFunc_->resetAfterStatement();
if (hHDFSTableStats_)
hHDFSTableStats_->resetAfterStatement();
for (UInt32 i = 0; i < allColumns_.entries(); i++)
{
//reset each NAColumn
if(allColumns_[i])
allColumns_[i]->resetAfterStatement();
}
for (UInt32 j = 0; j < indexKeyColumns_.entries(); j++)
{
//reset each NAColumn
if(indexKeyColumns_[j])
indexKeyColumns_[j]->resetAfterStatement();
}
resetAfterStatement_ = TRUE;
setupForStatement_ = FALSE;
return;
}
//setup before being used in a statement
void NAFileSet::setupForStatement()
{
if(setupForStatement_)
return;
if(partFunc_)
partFunc_->setupForStatement();
if (hHDFSTableStats_)
hHDFSTableStats_->setupForStatement();
setupForStatement_ = TRUE;
resetAfterStatement_= FALSE;
}
Lng32 NAFileSet::getCountOfColumns(
NABoolean excludeNonKeyColumns,
NABoolean excludeNonUserSpecifiedAlternateIndexColumns,
NABoolean excludeSystemColumns,
NABoolean excludeAlwaysComputedSystemColumns) const
{
Lng32 numCols = 0;
const NAColumnArray *colArray = &allColumns_;
if (excludeNonKeyColumns ||
excludeNonUserSpecifiedAlternateIndexColumns)
colArray = &indexKeyColumns_;
for (CollIndex i=0; i < colArray->entries(); i++)
{
// figure out the various exclusion conditions other
// that non-key columns
const NAColumn *nac = (*colArray)[i];
if ( NOT ((excludeNonUserSpecifiedAlternateIndexColumns &&
nac->getIndexColName() == nac->getColName())
||
(excludeSystemColumns && nac->isSystemColumn())
||
(excludeAlwaysComputedSystemColumns &&
nac->isComputedColumnAlways() &&
nac->isSystemColumn())))
numCols++;
}
return numCols;
}
// load-time initialization
static THREAD_P RandomSequence* random_ = NULL;
static THREAD_P NABoolean seeded_ = FALSE;
// seed random number generator
static void seedIt()
{
if (seeded_)
return;
// else
Lng32 seed = CmpCommon::getDefaultLong(FLOAT_ESP_RANDOM_NUM_SEED);
if (!random_)
random_ = new(GetCliGlobals()->exCollHeap()) RandomSequence();
CLISemaphore * sema = GetCliGlobals()->getSemaphore();
sema->get();
if (!seeded_) {
if (seed == 0) {
TimeVal tim;
GETTIMEOFDAY(&tim, 0);
seed = tim.tv_usec;
}
seed = seed % 65535;
random_->initialize(seed);
seeded_ = TRUE;
}
sema->release();
}
static double srandom()
{
seedIt();
double rv;
CLISemaphore * sema = GetCliGlobals()->getSemaphore();
sema->get();
rv = random_->random();
sema->release();
return rv;
};
const QualifiedName& NAFileSet::getRandomPartition() const
{
if (!partFunc_)
return getFileSetName();
Int32 numEntries = partFunc_->getNodeMap()->getNumEntries();
const NodeMapEntry *nme =
partFunc_->getNodeMapEntry((CollIndex)floor(srandom()*numEntries));
QualifiedName *partQName =
new (STMTHEAP) QualifiedName(nme->getPartitionName(), 3, STMTHEAP, NULL);
return *partQName;
}
NAString NAFileSet::getBestPartitioningKeyColumns(char separator) const
{
const NAColumnArray & partKeyCols = getPartitioningKeyColumns();
if ( partKeyCols.entries() > 0 ) {
return partKeyCols.getColumnNamesAsString(separator);
} else {
const NAColumnArray& allCols = getAllColumns();
UInt32 ct = allCols.entries();
if ( ct > 2 ) ct=2;
return allCols.getColumnNamesAsString(separator, ct);
}
}