/**********************************************************************
// @@@ START COPYRIGHT @@@
//
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements.  See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership.  The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License.  You may obtain a copy of the License at
//
//   http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied.  See the License for the
// specific language governing permissions and limitations
// under the License.
//
// @@@ END COPYRIGHT @@@
**********************************************************************/
/* -*-C++-*-
******************************************************************************
*
* File:         PackedColDesc.cpp
* Description:  All the methods of PackedAPDesc PackedTableDesc
*
* Created:      6/27/97
* Language:     C++
*
*
******************************************************************************
*/

// exclude this whole file from coverage since this code is related to vertical
// partitioning and that feature is not active anymore

// -----------------------------------------------------------------------
// This file contains all the methods for the class PackedAPDesc
// (Packed Access Path Descriptor) and PackedTableDesc. (Packed
// Table Descriptor).  These classes are used during table creation
// (catman) and binding, to determine how the table should be packed.
// Currently, no packing information is stored in the catalog.  Only
// a flag indicating that the table is packed.  Therefore, the binder
// must determine the packing information at run time in the same way
// that was done at create time.
//
#include "PackedColDesc.h"
#include "NumericType.h"

// The layout of a packed table is illustrated in the following diagram:
//
//
//   Original Row Definition
//   -----------------------
//     SYSKEY        Col1          Col2          Col3
//     LARGEINT      INT           CHAR(3)       CHAR(2)
//     Not Nullable  Nullable      Not Nullable  Nullable
//
//   Packed Row Definition for a 'packing factor' of 5
//   -------------------------------------------------
//     SYSKEY        Col1_Packed   Col2_Packed   Col3_Packed
//     LARGEINT      CHAR(25)      CHAR(19)      CHAR(15)
//     Not Nullable  Not Nullable  Not Nullable  Not Nullable
//
//   Each Char field in the packed row contains the following sub-fields.
//   --------------------------------------------------------------------
//     NUMROWS       NULL_BITMAP   DATA
//     INT           CHAR(N)       CHAR(M)
//     Not Nullable  Not Nullable  NotNullable
//
//   These fields are not defined (ie. the system just sees the packed columns
//   as CHAR columns.) and the UnPackCol ItemExpr is used to extract the
//   information.
//
//   NUMROWS - This contains the number of actual values packed into a
//   packed column.  This value should be between 1 and the packing factor.
//   The value of NUMROWS should be the same in each packed column of a
//   packed row.  If this value is less than the packing factor, the values
//   are packed into the lower bytes of the char field.
//
//   NULL_BITMAP - This field contains enough bytes to hold a bit for each
//   Null indicator ('packing factor' bits).
//
//   DATA - This field contains enough bytes to hold the packed values.
//   (datasize * 'packing factor')
//
//   In the above example, with a packing factor of 5, the first column
//   is a nullable int.  The data size for an int is 4 bytes, so the
//   packed column will need:
//
//           4 bytes for the NUMROWS field
//           1 byte for 5 bits of NULL_BITMAP
//          20 bytes for 5 ints ( 5 * 4)
//          --
//          25 total bytes required.
//
// The packing factor is determined based on two limiting factors.
// First the packed row from each access path (vertical partition) must
// fit within MaxPackedAPSize (currently 4000 bytes).  Secondly, the
// sum of the sizes of the packed rows from all access paths for this
// table must fit within MaxPackedTableSize (currently 32000 bytes).
// These two limits are somewhat arbitrary.  The first limits a row
// of an access path to fit within a block.  The second is needed due
// to the way inserts into packed VP Tables are done.  The complete
// packed row is buffered in DP2 and the split into the VP's.  Having
// this buffer bigger than 32K may cause problems.
//

// PackedColDesc::determinePackedColSize() -------------------------------
// Determine the size in bytes of a packed column for the column
// given a packingFactor.
//
Lng32
PackedColDesc::determinePackedColSize(Lng32 packingFactor) const
{
  const Int32 BitsPerByte = 8;

  const NAType *colType = getType();

  // Variable length columns cannot be packed.
  //
  CMPASSERT(NOT DFS2REC::isAnyVarChar(colType->getFSDatatype()));

  Lng32 nullBitMapSize = (colType->supportsSQLnull()
                         ? ((packingFactor-1)/BitsPerByte)+1
                         : 0);

  // The size of this column in bits.
  //
  Lng32 dataSizeInBits;

  if((colType->getTypeQualifier() == NA_NUMERIC_TYPE) &&
     ((NumericType *)colType)->binaryPrecision() &&
     ((NumericType *)colType)->isUnsigned()) {

    // If the column is a bit precision integer, get the
    // number of bits used by this column.  When we pack we
    // also compress the bit precision integers.
    //
    dataSizeInBits = ((NumericType *)colType)->getPrecision();
      
  } else {
      
    dataSizeInBits = colType->getNominalSize() * BitsPerByte;
  }

  // Total size of the DATA field.
  //
  Lng32 totalDataSize = (((dataSizeInBits * packingFactor)-1)/BitsPerByte)+1;

  // Total size of this packed column given the packing factor.
  // SQL_INT_SIZE is for the NUM_ROWS field.
  //
  return SQL_INT_SIZE + nullBitMapSize + totalDataSize;
}

// PackedColDesc::generatePackingInfo()---------------------------------
// Generate the packing information (dataOffset_, dataSize_, totalSize_, 
// and nullBitmapPresent_) for this column given a packing factor.
// This packing information is:
//
//   long dataOffset_: The offset in bytes to the start of the DATA field.
//   In the above example, the dataOffset_ would be 5 (4 bytes for the NUMROWS
//   field plus 1 byte for the NULL_BITMAP field).
//
//   long dataSize_: The size in bits of a single data item. In the above
//   example, the dataSize_ would be 32 bits (4 bytes).
//
//   long totalSize_: The total size in bytes of the packed column.  This
//   includes the size of the NUM_ROWS field, the size of the NULL_BITMAP
//   field and the size of the DATA field.  In the above example, totalSize_
//   would be 25 (4 + 1 + 20).
//
//   NABoolean nullBitmapPresent_: A boolean flag indicating if there is
//   a NULL_BITMAP field present in this packed column.
//
//
void
PackedColDesc::generatePackingInfo(Lng32 packingFactor)
{

  CMPASSERT(packingFactor > 1);

  const Int32 BitsPerByte = 8;

  const NAType *colType = getType();
      
        
  Lng32 nullBitMapSize = (colType->supportsSQLnull()
                         ? ((packingFactor-1)/BitsPerByte)+1
                         : 0);

  Lng32 dataSize;
      
  if((colType->getTypeQualifier() == NA_NUMERIC_TYPE) &&
     ((NumericType *)colType)->binaryPrecision() &&
     ((NumericType *)colType)->isUnsigned()) {

    // If the column is a bit precision integer, get the
    // number of bits used by this column.  When we pack we
    // also compress the bit precision integers.
    //
    dataSize = ((NumericType *)colType)->getPrecision();
      
  } else {
    
    dataSize = colType->getNominalSize() * BitsPerByte;
  }

  Lng32 totalDataSize = (((dataSize * packingFactor)-1)/BitsPerByte)+1;

  dataOffset_ = SQL_INT_SIZE + nullBitMapSize;

  dataSize_ = dataSize;

  totalSize_ = SQL_INT_SIZE + nullBitMapSize + totalDataSize;

  nullBitmapPresent_ = colType->supportsSQLnull();
}

// PackedAPDesc::determinePackingFactor() ------------------------
// Determine the maximum packing factor for this AP limited by the
// given maxPackedRecLen.
//
Lng32
PackedAPDesc::determinePackingFactor(Lng32 maxPackedRecLen) const
{

  const Int32 BitsPerByte = 8;

  PackedColDescList apCols = getAPColumns();

  // Number of columns requiring a null bitmap.
  //
  Lng32 numNullFlags = 0;

  // Size of the key for this AP.
  //
  Lng32 keySizeInBytes = getKeySize();

  // Size of the data for all columns of this AP.
  //
  Lng32 dataSizeInBits = 0;

  // Number of user columns in this AP. (SYSKEY is handled separately.)
  //
  CollIndex numUserColumns = apCols.entries();

  for(CollIndex c = 0; c < numUserColumns; c++) {
    
    const NAType *colType = apCols[c]->getType();

    // Variable length columns can not be packed at this time.
    // If any column of this AP is variable length, then the AP
    // cannot be packed.
    //
    if(colType->isVaryingLen())

      // Cannot pack this column.
      //
      return 0;

    // Does this column require a null bitmap.
    //
    numNullFlags += (colType->supportsSQLnull() ? 1 : 0);

    // If the column is a bit precision integer, get the
    // number of bits used by this column.  When we pack we
    // also compress the bit precision integers.
    //
    if((colType->getTypeQualifier() == NA_NUMERIC_TYPE) &&
       ((NumericType *)colType)->binaryPrecision() &&
       ((NumericType *)colType)->isUnsigned()) {

      dataSizeInBits += ((NumericType *)colType)->getPrecision();
      
    } else {

      // Calculate how many bits required to store this column.
      //
      dataSizeInBits += colType->getNominalSize() * BitsPerByte;
    }
  }

  // For now we only support packing with SYSKEYs of 8 bytes.
  //
  CMPASSERT(keySizeInBytes == 8);

  // Calculate the packing factor.  This does not take into account
  // rounding each packed field to the closest byte, but this should
  // be close enough.
  // The keySizeinBytes is for the SYSKEY (one per AP).
  // The (SQL_INT_SIZE * numUserColumns) is for the NUM_ROWS fields.
  //
  Lng32 packingFactor =
    ((maxPackedRecLen - keySizeInBytes - (SQL_INT_SIZE * numUserColumns))
     * BitsPerByte) /
    (numNullFlags + dataSizeInBits);

  return packingFactor;
}

// PackedAPDesc::determinePackedAPSize() -------------------------------
// Determine the size in bytes of all the packed columns of this AP
// given a packing factor.
//
Lng32
PackedAPDesc::determinePackedAPSize(Lng32 packingFactor) const
{

  Lng32 packedAPSize = 0;

  PackedColDescList apCols = getAPColumns();
  CollIndex numUserColumns = apCols.entries();

  for(CollIndex c = 0; c < numUserColumns; c++) {

    packedAPSize += apCols[c]->determinePackedColSize(packingFactor);
  }

  return packedAPSize;
}


// PackedAPDesc::generatePackingInfo() ---------------------------------
// Generate the packing information for this AP and all its
// columns given a packing factor.  After this call the packing
// info can be retrieved for a given column of the base table.
//
void
PackedAPDesc::generatePackingInfo(Lng32 packingFactor)
{

  PackedColDescList apCols = getAPColumns();

  if(packingFactor > 1) {
    packingFactor_ = packingFactor;
  
    for (CollIndex i = 0; i < apCols.entries(); i++) {
      apCols[i]->generatePackingInfo(packingFactor);
    }
  } else {
    // Not Packed.
    //
    packingFactor_ = 0;
  }
}


// PackedAPDesc::getPackingInfoForColumn() --------------------------------
// Retrieve the packing information for a column given the columns
// ordinal position in the base table.
//
PackedColDesc *
PackedAPDesc::getPackingInfoForColumn(Lng32 position)
{
  PackedColDesc *packingInfo;

  for(CollIndex i = 0; i < cols_.entries(); i++) {
    packingInfo = cols_[i];
    if(packingInfo->getPosition() == position)
      return packingInfo;
  }
  return (PackedColDesc *)NULL;
}

// PackedAPDesc::addColumn() ------------------------------------------
// Add a column (PackedColDesc) to this PackedAPDesc.
//
void 
PackedAPDesc::addColumn(const NAType *type, Lng32 position, CollHeap *h)
{
  PackedColDesc *packedColDesc = new (h) PackedColDesc(position, type);

  cols_.insert(packedColDesc);
  
}


// PackedTableDesc::PackedTableDesc() ------------------------------------
// Constructor called from the binder.  Constructs and populates a
// PackedTableDesc given a NATable.  The result will indicate if the
// table can be packed and if so, will describe how each of the columns
// of each of the access paths (read vertical partitions) of this table 
// is packed.
//
PackedTableDesc::PackedTableDesc(const NATable *naTable, CollHeap *h)
{
  // This constructor is only called from the binder when naTable is
  // already available.
  //
  callFromCatMan_ = FALSE;

  // For now, only vertically partitioned tables are considered for
  // packing.
  //
  if(naTable->getVerticalPartitionList().entries() > 0) {

    // For each VP of the table,
    //
    //   - determine the size of the key.  For now the keysize must
    //     be 8, the size of a SYSKEY.
    //
    //   - add a PackedAPDesc to this PackedTableDesc which
    //     contains a PackedColDesc for each user column of the VP
    //
    NAFileSetList vpList = naTable->getVerticalPartitionList();
  
    for(CollIndex i = 0; i < vpList.entries(); i++) {
      NAColumnArray vpCols = vpList[i]->getAllColumns();

      Lng32 keySize = 0;

      CollIndex j = 0;
      for(j = 0; j < vpCols.entries(); j++) {
        const NAType *colType = vpCols[j]->getType();

        if(vpCols[j]->isClusteringKey()) {
          keySize += colType->getNominalSize();
        }
      }

      PackedAPDesc *apDesc = new(h) PackedAPDesc(keySize);
      for(j = 0; j < vpCols.entries(); j++) {
        const NAType *colType = vpCols[j]->getType();

        if(!vpCols[j]->isClusteringKey()) {

          apDesc->addColumn(colType, vpCols[j]->getPosition(), h);
        }
      }

      addAP(apDesc);
    }

    // Generate the Packing information.
    //
    generatePackingInfo();
  }
}


// PackedTableDesc::generatePackingInfo() ---------------------------------
// Generate the packing information of each column of each AP of this
// table.  If this table can be packed, the resulting packing information
// will be (for each column):
//
//   long dataOffset_: The offset in bytes to the start of the DATA field.
//   In the above example, the dataOffset_ would be 5 (4 bytes for the NUMROWS
//   field plus 1 byte for the NULL_BITMAP field).
//
//   long dataSize_: The size in bits of a single data item. In the above
//   example, the dataSize_ would be 32 bits (4 bytes).
//
//   long totalSize_: The total size in bytes of the packed column.  This
//   includes the size of the NUM_ROWS field, the size of the NULL_BITMAP
//   field and the size of the DATA field.  In the above example, totalSize_
//   would be 25 (4 + 1 + 20).
//
//   NABoolean nullBitmapPresent_: A boolean flag indicating if there is
//   a NULL_BITMAP field present in this packed column.


void
PackedTableDesc::generatePackingInfo()
{

  // The packing factor is determined based on two limiting factors.
  // First the packed row from each access path (vertical partition)
  // must fit within MaxPackedAPSize (currently 4000 bytes).  Secondly,
  // the sum of the sizes of the packed rows from all access paths for
  // this table must fit within MaxPackedTableSize (currently 32000 bytes).
  // These two limits are somewhat arbitrary.  The first limits a row of an
  // access path to fit within a block.  The second is needed due to the way
  // inserts into packed VP Tables are done.  The complete packed row is
  // buffered in DP2 and then split into the VP's.  Having this buffer 
  // bigger than 32K may cause problems.
  //
  // The maximum size of any packed AP (which could have more than
  // one column.
  //
  const Int32 MaxPackedAPSize = 4000;

  // The maximum size of all the packed AP's of this table.
  //
  const Int32 MaxPackedTableSize = 24000;

  // The current minimum packing factor.  This will be the packing
  // factor limited by MaxPackedAPSize for the worst case AP.
  // Initialize to the max possible packing factor.
  //
  Lng32 minPackingFactor = MaxPackedAPSize * 8;
    
  // IF packing is turned off. Check to see if we're creating a new table.
  // Only check when we are creating a new table.  If we are scanning
  // then the table is already packed and we cannot do anything about
  // it.  If a table that could have been packed is create with
  // "PACKING_OFF" set, then the packed flag in the catalog will not
  // be set and we will not reach this code when scanning the table.
  //
  if(getenv("PACKING_OFF") != NULL AND callFromCatMan_) {
    // If we are, don't create a packed table.
    //
    minPackingFactor = 0;

  }  else {
    // For each AP determine its packing factor given MaxPackedAPSize.
    // Keep track of the minimum. This is the limiting AP.
    //
    CollIndex i = 0;
    for (i = 0; i < packedAPDescList_.entries(); i++) {

      Lng32 packingFactor = 
        packedAPDescList_[i]->determinePackingFactor(MaxPackedAPSize);

      minPackingFactor = (packingFactor < minPackingFactor)
        ? packingFactor
        : minPackingFactor;
    }


    if(minPackingFactor > 1) {
      // Determine the size of all the packed AP's given the packing
      // factor limited by MaxPackedAPSize.
      //
      Lng32 packedTableSize = 0;
      for (i = 0; i < packedAPDescList_.entries(); i++) {
        
        packedTableSize +=
          packedAPDescList_[i]->determinePackedAPSize(minPackingFactor);
      }
      
      // If the total size is larger than the max allowed, adjust the
      // packing factor by the same ratio.
      //
      if(packedTableSize > MaxPackedTableSize) {

        float packingFactorAdj = MaxPackedTableSize/(float)packedTableSize;
        minPackingFactor = (Int32)(minPackingFactor * packingFactorAdj) - 1;
      }
    }
  }

  // Now that a suitable packing factor has been determined
  // generate the packing information for each column of each
  // AP of this table.
  //
  for (CollIndex i = 0; i < packedAPDescList_.entries(); i++) {
    packedAPDescList_[i]->generatePackingInfo(minPackingFactor);
  }

}

// PackedTableDesc::getPackingInfoForColumn()------------------------------
// Retrieve the packing information for a column given its
// ordinal postion in the base table.
//
PackedColDesc *
PackedTableDesc::getPackingInfoForColumn(Lng32 position)
{
  PackedColDesc *packingInfo = (PackedColDesc *)NULL;

  for(CollIndex i = 0; i < packedAPDescList_.entries(); i++) {
    packingInfo = packedAPDescList_[i]->getPackingInfoForColumn(position);
    if(packingInfo)
      return packingInfo;
  }
  return (PackedColDesc *)packingInfo;
}

