blob: 4fd24709a6530baec258a15a1799288f971d39eb [file] [log] [blame]
/**********************************************************************
// @@@ START COPYRIGHT @@@
//
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
//
// @@@ END COPYRIGHT @@@
**********************************************************************/
/* -*-C++-*-
******************************************************************************
*
* File: PackedColDesc.cpp
* Description: All the methods of PackedAPDesc PackedTableDesc
*
* Created: 6/27/97
* Language: C++
*
*
******************************************************************************
*/
// exclude this whole file from coverage since this code is related to vertical
// partitioning and that feature is not active anymore
// -----------------------------------------------------------------------
// This file contains all the methods for the class PackedAPDesc
// (Packed Access Path Descriptor) and PackedTableDesc. (Packed
// Table Descriptor). These classes are used during table creation
// (catman) and binding, to determine how the table should be packed.
// Currently, no packing information is stored in the catalog. Only
// a flag indicating that the table is packed. Therefore, the binder
// must determine the packing information at run time in the same way
// that was done at create time.
//
#include "PackedColDesc.h"
#include "NumericType.h"
// The layout of a packed table is illustrated in the following diagram:
//
//
// Original Row Definition
// -----------------------
// SYSKEY Col1 Col2 Col3
// LARGEINT INT CHAR(3) CHAR(2)
// Not Nullable Nullable Not Nullable Nullable
//
// Packed Row Definition for a 'packing factor' of 5
// -------------------------------------------------
// SYSKEY Col1_Packed Col2_Packed Col3_Packed
// LARGEINT CHAR(25) CHAR(19) CHAR(15)
// Not Nullable Not Nullable Not Nullable Not Nullable
//
// Each Char field in the packed row contains the following sub-fields.
// --------------------------------------------------------------------
// NUMROWS NULL_BITMAP DATA
// INT CHAR(N) CHAR(M)
// Not Nullable Not Nullable NotNullable
//
// These fields are not defined (ie. the system just sees the packed columns
// as CHAR columns.) and the UnPackCol ItemExpr is used to extract the
// information.
//
// NUMROWS - This contains the number of actual values packed into a
// packed column. This value should be between 1 and the packing factor.
// The value of NUMROWS should be the same in each packed column of a
// packed row. If this value is less than the packing factor, the values
// are packed into the lower bytes of the char field.
//
// NULL_BITMAP - This field contains enough bytes to hold a bit for each
// Null indicator ('packing factor' bits).
//
// DATA - This field contains enough bytes to hold the packed values.
// (datasize * 'packing factor')
//
// In the above example, with a packing factor of 5, the first column
// is a nullable int. The data size for an int is 4 bytes, so the
// packed column will need:
//
// 4 bytes for the NUMROWS field
// 1 byte for 5 bits of NULL_BITMAP
// 20 bytes for 5 ints ( 5 * 4)
// --
// 25 total bytes required.
//
// The packing factor is determined based on two limiting factors.
// First the packed row from each access path (vertical partition) must
// fit within MaxPackedAPSize (currently 4000 bytes). Secondly, the
// sum of the sizes of the packed rows from all access paths for this
// table must fit within MaxPackedTableSize (currently 32000 bytes).
// These two limits are somewhat arbitrary. The first limits a row
// of an access path to fit within a block. The second is needed due
// to the way inserts into packed VP Tables are done. The complete
// packed row is buffered in DP2 and the split into the VP's. Having
// this buffer bigger than 32K may cause problems.
//
// PackedColDesc::determinePackedColSize() -------------------------------
// Determine the size in bytes of a packed column for the column
// given a packingFactor.
//
Lng32
PackedColDesc::determinePackedColSize(Lng32 packingFactor) const
{
const Int32 BitsPerByte = 8;
const NAType *colType = getType();
// Variable length columns cannot be packed.
//
CMPASSERT(NOT DFS2REC::isAnyVarChar(colType->getFSDatatype()));
Lng32 nullBitMapSize = (colType->supportsSQLnull()
? ((packingFactor-1)/BitsPerByte)+1
: 0);
// The size of this column in bits.
//
Lng32 dataSizeInBits;
if((colType->getTypeQualifier() == NA_NUMERIC_TYPE) &&
((NumericType *)colType)->binaryPrecision() &&
((NumericType *)colType)->isUnsigned()) {
// If the column is a bit precision integer, get the
// number of bits used by this column. When we pack we
// also compress the bit precision integers.
//
dataSizeInBits = ((NumericType *)colType)->getPrecision();
} else {
dataSizeInBits = colType->getNominalSize() * BitsPerByte;
}
// Total size of the DATA field.
//
Lng32 totalDataSize = (((dataSizeInBits * packingFactor)-1)/BitsPerByte)+1;
// Total size of this packed column given the packing factor.
// SQL_INT_SIZE is for the NUM_ROWS field.
//
return SQL_INT_SIZE + nullBitMapSize + totalDataSize;
}
// PackedColDesc::generatePackingInfo()---------------------------------
// Generate the packing information (dataOffset_, dataSize_, totalSize_,
// and nullBitmapPresent_) for this column given a packing factor.
// This packing information is:
//
// long dataOffset_: The offset in bytes to the start of the DATA field.
// In the above example, the dataOffset_ would be 5 (4 bytes for the NUMROWS
// field plus 1 byte for the NULL_BITMAP field).
//
// long dataSize_: The size in bits of a single data item. In the above
// example, the dataSize_ would be 32 bits (4 bytes).
//
// long totalSize_: The total size in bytes of the packed column. This
// includes the size of the NUM_ROWS field, the size of the NULL_BITMAP
// field and the size of the DATA field. In the above example, totalSize_
// would be 25 (4 + 1 + 20).
//
// NABoolean nullBitmapPresent_: A boolean flag indicating if there is
// a NULL_BITMAP field present in this packed column.
//
//
void
PackedColDesc::generatePackingInfo(Lng32 packingFactor)
{
CMPASSERT(packingFactor > 1);
const Int32 BitsPerByte = 8;
const NAType *colType = getType();
Lng32 nullBitMapSize = (colType->supportsSQLnull()
? ((packingFactor-1)/BitsPerByte)+1
: 0);
Lng32 dataSize;
if((colType->getTypeQualifier() == NA_NUMERIC_TYPE) &&
((NumericType *)colType)->binaryPrecision() &&
((NumericType *)colType)->isUnsigned()) {
// If the column is a bit precision integer, get the
// number of bits used by this column. When we pack we
// also compress the bit precision integers.
//
dataSize = ((NumericType *)colType)->getPrecision();
} else {
dataSize = colType->getNominalSize() * BitsPerByte;
}
Lng32 totalDataSize = (((dataSize * packingFactor)-1)/BitsPerByte)+1;
dataOffset_ = SQL_INT_SIZE + nullBitMapSize;
dataSize_ = dataSize;
totalSize_ = SQL_INT_SIZE + nullBitMapSize + totalDataSize;
nullBitmapPresent_ = colType->supportsSQLnull();
}
// PackedAPDesc::determinePackingFactor() ------------------------
// Determine the maximum packing factor for this AP limited by the
// given maxPackedRecLen.
//
Lng32
PackedAPDesc::determinePackingFactor(Lng32 maxPackedRecLen) const
{
const Int32 BitsPerByte = 8;
PackedColDescList apCols = getAPColumns();
// Number of columns requiring a null bitmap.
//
Lng32 numNullFlags = 0;
// Size of the key for this AP.
//
Lng32 keySizeInBytes = getKeySize();
// Size of the data for all columns of this AP.
//
Lng32 dataSizeInBits = 0;
// Number of user columns in this AP. (SYSKEY is handled separately.)
//
CollIndex numUserColumns = apCols.entries();
for(CollIndex c = 0; c < numUserColumns; c++) {
const NAType *colType = apCols[c]->getType();
// Variable length columns can not be packed at this time.
// If any column of this AP is variable length, then the AP
// cannot be packed.
//
if(colType->isVaryingLen())
// Cannot pack this column.
//
return 0;
// Does this column require a null bitmap.
//
numNullFlags += (colType->supportsSQLnull() ? 1 : 0);
// If the column is a bit precision integer, get the
// number of bits used by this column. When we pack we
// also compress the bit precision integers.
//
if((colType->getTypeQualifier() == NA_NUMERIC_TYPE) &&
((NumericType *)colType)->binaryPrecision() &&
((NumericType *)colType)->isUnsigned()) {
dataSizeInBits += ((NumericType *)colType)->getPrecision();
} else {
// Calculate how many bits required to store this column.
//
dataSizeInBits += colType->getNominalSize() * BitsPerByte;
}
}
// For now we only support packing with SYSKEYs of 8 bytes.
//
CMPASSERT(keySizeInBytes == 8);
// Calculate the packing factor. This does not take into account
// rounding each packed field to the closest byte, but this should
// be close enough.
// The keySizeinBytes is for the SYSKEY (one per AP).
// The (SQL_INT_SIZE * numUserColumns) is for the NUM_ROWS fields.
//
Lng32 packingFactor =
((maxPackedRecLen - keySizeInBytes - (SQL_INT_SIZE * numUserColumns))
* BitsPerByte) /
(numNullFlags + dataSizeInBits);
return packingFactor;
}
// PackedAPDesc::determinePackedAPSize() -------------------------------
// Determine the size in bytes of all the packed columns of this AP
// given a packing factor.
//
Lng32
PackedAPDesc::determinePackedAPSize(Lng32 packingFactor) const
{
Lng32 packedAPSize = 0;
PackedColDescList apCols = getAPColumns();
CollIndex numUserColumns = apCols.entries();
for(CollIndex c = 0; c < numUserColumns; c++) {
packedAPSize += apCols[c]->determinePackedColSize(packingFactor);
}
return packedAPSize;
}
// PackedAPDesc::generatePackingInfo() ---------------------------------
// Generate the packing information for this AP and all its
// columns given a packing factor. After this call the packing
// info can be retrieved for a given column of the base table.
//
void
PackedAPDesc::generatePackingInfo(Lng32 packingFactor)
{
PackedColDescList apCols = getAPColumns();
if(packingFactor > 1) {
packingFactor_ = packingFactor;
for (CollIndex i = 0; i < apCols.entries(); i++) {
apCols[i]->generatePackingInfo(packingFactor);
}
} else {
// Not Packed.
//
packingFactor_ = 0;
}
}
// PackedAPDesc::getPackingInfoForColumn() --------------------------------
// Retrieve the packing information for a column given the columns
// ordinal position in the base table.
//
PackedColDesc *
PackedAPDesc::getPackingInfoForColumn(Lng32 position)
{
PackedColDesc *packingInfo;
for(CollIndex i = 0; i < cols_.entries(); i++) {
packingInfo = cols_[i];
if(packingInfo->getPosition() == position)
return packingInfo;
}
return (PackedColDesc *)NULL;
}
// PackedAPDesc::addColumn() ------------------------------------------
// Add a column (PackedColDesc) to this PackedAPDesc.
//
void
PackedAPDesc::addColumn(const NAType *type, Lng32 position, CollHeap *h)
{
PackedColDesc *packedColDesc = new (h) PackedColDesc(position, type);
cols_.insert(packedColDesc);
}
// PackedTableDesc::PackedTableDesc() ------------------------------------
// Constructor called from the binder. Constructs and populates a
// PackedTableDesc given a NATable. The result will indicate if the
// table can be packed and if so, will describe how each of the columns
// of each of the access paths (read vertical partitions) of this table
// is packed.
//
PackedTableDesc::PackedTableDesc(const NATable *naTable, CollHeap *h)
{
// This constructor is only called from the binder when naTable is
// already available.
//
callFromCatMan_ = FALSE;
// For now, only vertically partitioned tables are considered for
// packing.
//
if(naTable->getVerticalPartitionList().entries() > 0) {
// For each VP of the table,
//
// - determine the size of the key. For now the keysize must
// be 8, the size of a SYSKEY.
//
// - add a PackedAPDesc to this PackedTableDesc which
// contains a PackedColDesc for each user column of the VP
//
NAFileSetList vpList = naTable->getVerticalPartitionList();
for(CollIndex i = 0; i < vpList.entries(); i++) {
NAColumnArray vpCols = vpList[i]->getAllColumns();
Lng32 keySize = 0;
CollIndex j = 0;
for(j = 0; j < vpCols.entries(); j++) {
const NAType *colType = vpCols[j]->getType();
if(vpCols[j]->isClusteringKey()) {
keySize += colType->getNominalSize();
}
}
PackedAPDesc *apDesc = new(h) PackedAPDesc(keySize);
for(j = 0; j < vpCols.entries(); j++) {
const NAType *colType = vpCols[j]->getType();
if(!vpCols[j]->isClusteringKey()) {
apDesc->addColumn(colType, vpCols[j]->getPosition(), h);
}
}
addAP(apDesc);
}
// Generate the Packing information.
//
generatePackingInfo();
}
}
// PackedTableDesc::generatePackingInfo() ---------------------------------
// Generate the packing information of each column of each AP of this
// table. If this table can be packed, the resulting packing information
// will be (for each column):
//
// long dataOffset_: The offset in bytes to the start of the DATA field.
// In the above example, the dataOffset_ would be 5 (4 bytes for the NUMROWS
// field plus 1 byte for the NULL_BITMAP field).
//
// long dataSize_: The size in bits of a single data item. In the above
// example, the dataSize_ would be 32 bits (4 bytes).
//
// long totalSize_: The total size in bytes of the packed column. This
// includes the size of the NUM_ROWS field, the size of the NULL_BITMAP
// field and the size of the DATA field. In the above example, totalSize_
// would be 25 (4 + 1 + 20).
//
// NABoolean nullBitmapPresent_: A boolean flag indicating if there is
// a NULL_BITMAP field present in this packed column.
void
PackedTableDesc::generatePackingInfo()
{
// The packing factor is determined based on two limiting factors.
// First the packed row from each access path (vertical partition)
// must fit within MaxPackedAPSize (currently 4000 bytes). Secondly,
// the sum of the sizes of the packed rows from all access paths for
// this table must fit within MaxPackedTableSize (currently 32000 bytes).
// These two limits are somewhat arbitrary. The first limits a row of an
// access path to fit within a block. The second is needed due to the way
// inserts into packed VP Tables are done. The complete packed row is
// buffered in DP2 and then split into the VP's. Having this buffer
// bigger than 32K may cause problems.
//
// The maximum size of any packed AP (which could have more than
// one column.
//
const Int32 MaxPackedAPSize = 4000;
// The maximum size of all the packed AP's of this table.
//
const Int32 MaxPackedTableSize = 24000;
// The current minimum packing factor. This will be the packing
// factor limited by MaxPackedAPSize for the worst case AP.
// Initialize to the max possible packing factor.
//
Lng32 minPackingFactor = MaxPackedAPSize * 8;
// IF packing is turned off. Check to see if we're creating a new table.
// Only check when we are creating a new table. If we are scanning
// then the table is already packed and we cannot do anything about
// it. If a table that could have been packed is create with
// "PACKING_OFF" set, then the packed flag in the catalog will not
// be set and we will not reach this code when scanning the table.
//
if(getenv("PACKING_OFF") != NULL AND callFromCatMan_) {
// If we are, don't create a packed table.
//
minPackingFactor = 0;
} else {
// For each AP determine its packing factor given MaxPackedAPSize.
// Keep track of the minimum. This is the limiting AP.
//
CollIndex i = 0;
for (i = 0; i < packedAPDescList_.entries(); i++) {
Lng32 packingFactor =
packedAPDescList_[i]->determinePackingFactor(MaxPackedAPSize);
minPackingFactor = (packingFactor < minPackingFactor)
? packingFactor
: minPackingFactor;
}
if(minPackingFactor > 1) {
// Determine the size of all the packed AP's given the packing
// factor limited by MaxPackedAPSize.
//
Lng32 packedTableSize = 0;
for (i = 0; i < packedAPDescList_.entries(); i++) {
packedTableSize +=
packedAPDescList_[i]->determinePackedAPSize(minPackingFactor);
}
// If the total size is larger than the max allowed, adjust the
// packing factor by the same ratio.
//
if(packedTableSize > MaxPackedTableSize) {
float packingFactorAdj = MaxPackedTableSize/(float)packedTableSize;
minPackingFactor = (Int32)(minPackingFactor * packingFactorAdj) - 1;
}
}
}
// Now that a suitable packing factor has been determined
// generate the packing information for each column of each
// AP of this table.
//
for (CollIndex i = 0; i < packedAPDescList_.entries(); i++) {
packedAPDescList_[i]->generatePackingInfo(minPackingFactor);
}
}
// PackedTableDesc::getPackingInfoForColumn()------------------------------
// Retrieve the packing information for a column given its
// ordinal postion in the base table.
//
PackedColDesc *
PackedTableDesc::getPackingInfoForColumn(Lng32 position)
{
PackedColDesc *packingInfo = (PackedColDesc *)NULL;
for(CollIndex i = 0; i < packedAPDescList_.entries(); i++) {
packingInfo = packedAPDescList_[i]->getPackingInfoForColumn(position);
if(packingInfo)
return packingInfo;
}
return (PackedColDesc *)packingInfo;
}