core/sql/optimizer/EncodedValue.cpp - trafodion - Git at Google

 /**********************************************************************
 // @@@ START COPYRIGHT @@@
 //
 // Licensed to the Apache Software Foundation (ASF) under one
 // or more contributor license agreements.  See the NOTICE file
 // distributed with this work for additional information
 // regarding copyright ownership.  The ASF licenses this file
 // to you under the Apache License, Version 2.0 (the
 // "License"); you may not use this file except in compliance
 // with the License.  You may obtain a copy of the License at
 //
 //   http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing,
 // software distributed under the License is distributed on an
 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 // KIND, either express or implied.  See the License for the
 // specific language governing permissions and limitations
 // under the License.
 //
 // @@@ END COPYRIGHT @@@
 **********************************************************************/
 /* -*-C++-*-
  *****************************************************************************
  *
  * File:         EncodedValue.C
  * Description:  Encoded Value class contains the encoded format for (possibly
  *               multi-attribute) values, used for storing histogram values.
  * Created:      June 7, 1995
  * Language:     C++
  *
  *
  *
  *
  *****************************************************************************
  */

 #include "Platform.h"
 #include "Sqlcomp.h"	       /* must be first included file */
 #include "EncodedValue.h"
 #include "ItemColRef.h"
 #include "parser.h"
 #include "str.h"
 #include <wchar.h>
 #include "NLSConversion.h"
 #include "hs_const.h"          /* for HS_MAX_BOUNDARY_LEN */
 #include "wstr.h"
 #include <exp_function.h>


 #include <ostream>

 #include "CompException.h"

 #include "exp_function.h"

 const EncodedValue NULL_ENCODEDVALUE (WIDE_("(NULL)")) ;
 const EncodedValue UNINIT_ENCODEDVALUE (_ENCODEDVALUE_UNINIT_VALUE_) ;

 //
 // NOTE: This code came from encodeString() in .../common/CharType.cpp and
 // was moved here because
 // (1) this coded needed to start calling ex_function_encode::encodeCollationKey()
 // which is in the Executor and therefore not callable from the common library
 // code and
 // (2) the places in this source file (EncodedValue.cpp) that called
 // CharType.cpp's encodeString() routine were the ONLY places that needed
 // that functionality, so the encodeString() logic more properly belonged here.
 //
 double EncVal_encodeString(const char * str, Lng32 strLen, CharType *cType)
 {
   double result;

   // the blank-padded first 8 bytes of string <str>
   unsigned char stringValues[8];

   // two long integers that, when concatenated, contain the result
   // as a 64 bit integer
   ULng32 hiResult = 0;
   ULng32 loResult = 0;

   // ---------------------------------------------------------------------
   // the encode function performs blank-padding just as the comparison
   // operator does, in order to make encoded values of all character data
   // types compatible
   // ---------------------------------------------------------------------
   // for the NCHAR prototype we want to treat nchars just like chars.
   // redo this when we do the real thing.
   if (cType->getCharSet() != CharInfo::UNICODE ) // ... used to be ... if (cType->getBytesPerChar() == 1)
     {
       // fill "stringValues" with 8 bit blanks
       unsigned char blank = (unsigned char) cType->getBlankCharacterValue();

       for (Int32 i = 0; i < 8; i++)
 	stringValues[i] = blank;

     } // bytes per char == 1
   else
     {
       ComASSERT(cType->getBytesPerChar() == SQL_DBCHAR_SIZE);

       // 2/19/98: copy the blank char as byte string to stringValues[]
       // so that endian-ness will not be an issue.

       unsigned short blank = (unsigned short) cType->getBlankCharacterValue();
       for (Int32 i = 0; i < 4; i++)
 	{
           str_cpy_all((char*)&stringValues[SQL_DBCHAR_SIZE*i],
 		      (char*)&blank, sizeof(blank));
 	}

     } // bytes per char != 1

   // NOTE: Caseinsensitive is not supported with Czech Collation
   if ((cType->isCaseinsensitive()) &&
       (NOT cType->isUpshifted()) && cType->getBytesPerChar() == 1)
     {
       // upshift and copy the first 8 bytes of str
       // (or all bytes if it is shorter than 8)
       str_cpy_convert((char *) stringValues, (char *)str, MINOF(strLen,8), -1);
     }
   else
     {
 #define MAXPASSES 4
 #define ENCKEYBUFLEN  (8 * (MAXPASSES+1) + 2) //Ensure Temp buffer big enough!
       UInt8 encodeKeyBuf1[ ENCKEYBUFLEN ];     //Temp buffer
       UInt8 * tmpstr = (UInt8 *)str;
       Int32    tmpLen = strLen;
       CharInfo::Collation collation = cType->getCollation();
       if (collation != CharInfo::DefaultCollation) {
           memset(encodeKeyBuf1, 0, ENCKEYBUFLEN);
           Int16 nPasses = CollationInfo::getCollationNPasses(collation);
           ex_function_encode::encodeCollationKey( (const UInt8 *)str,
                                                   MINOF(strLen,8),
                                                   encodeKeyBuf1,
                                                   ENCKEYBUFLEN,
                                                   nPasses,
                                                   collation,
                                                   FALSE /*remove trailing spaces? */
                                                 );
           tmpstr = encodeKeyBuf1;
           tmpLen = ENCKEYBUFLEN;
       }
       // copy the first 8 bytes of str (or all bytes if it is shorter than 8)
       str_cpy_all((char *) stringValues, (char *)tmpstr, MINOF(tmpLen,8));
     }

 #ifdef NA_LITTLE_ENDIAN
   if (cType->getBytesPerChar() == SQL_DBCHAR_SIZE)
   {
       wc_swap_bytes((NAWchar*)stringValues, 4);
   }
 #endif

   // leave the upper 12 bits (mask = 0xfff00000) of hiResult empty
   //
   // We only use the 52 fraction bits of the floating point double so that the round trip conversion
   // from decimal to double and then from double to decimal results in exactly the original decimal.
   // From Wiki "If a decimal string with at most 15 significant digits is converted to IEEE 754 double
   // precision representation and then converted back to a string with the same number of significant digits,
   // then the final string should match the original"

    // 8 bits of the first character
    hiResult += (ULng32) stringValues[0] << 12; // char 0
    hiResult += (ULng32) stringValues[1] <<  4; // char 1
    hiResult += (ULng32) stringValues[2] >>  4; // 4 bits of char 2

    loResult += (ULng32) stringValues[2] << 28; // 4 bits of char 2
    loResult += (ULng32) stringValues[3] << 20; // char 3
    loResult += (ULng32) stringValues[4] << 12; // char 4
    loResult += (ULng32) stringValues[5] <<  4; // char 5
    loResult += (ULng32) stringValues[6] >>  4; // 4 bits of char 6


   // combine the two 32 bit integers to a floating point number
   // (2**32 * hiResult + loResult)
   result = hiResult * .4294967296E10 + loResult;

   return result;
 }

 double EncVal_Char_encode(const char * theValue, const NAType *theType)
 {
    CharType *cType = (CharType *)theType;
    char *charBufPtr = (char *) theValue;

    if (cType->supportsSQLnull())
       charBufPtr += cType->getSQLnullHdrSize();
    if (cType->isVaryingLen())
    {
       // copy the actual length of the string into an aligned variable
       short actualLenShort;
       Lng32 actualLen;

       //      ComASSERT(sizeof(short) == cType->getVarLenHdrSize());
       if (cType->getVarLenHdrSize() == sizeof(short))
 	{
 	  str_cpy_all((char *) &actualLenShort, charBufPtr, sizeof(short));
 	  actualLen = actualLenShort;
 	}
       else
 	str_cpy_all((char *) &actualLen, charBufPtr, sizeof(Lng32));

       return EncVal_encodeString(&charBufPtr[cType->getVarLenHdrSize()],
                                  actualLen, cType);
    }
    else // Fixed length
    {
       return EncVal_encodeString(charBufPtr, cType->getNominalSize(), cType);
    }
 }
 // -----------------------------------------------------------------------
 //  Methods for NormValue
 // -----------------------------------------------------------------------
 NormValue::NormValue(const ConstValue * constant, NABoolean negate)
 {
   CMPASSERT(constant);				// Genesis 10-980626-6634
   const NAType * theType = constant->getType();
   void * theValue = constant->getConstValue();

   NABoolean reset = FALSE;
   if ((CmpCommon::getDefault(::MODE_SPECIAL_1) == DF_ON) &&
       (theType->getTypeQualifier() == NA_CHARACTER_TYPE))
     {
       CharType *cType = (CharType *)theType;
       if((! cType->isCaseinsensitive()) &&
 	 (cType->getBytesPerChar() == 1))
         {
           cType->setCaseinsensitive(TRUE);
           reset = TRUE;
         }
     }

   if (theType->getTypeQualifier() == NA_CHARACTER_TYPE)
     {
       theValue_ = EncVal_Char_encode((char *)theValue, theType);
     }
   else theValue_ = theType->encode (theValue);

   if (reset)
     {
       CharType *cType = (CharType *)theType;
       cType->setCaseinsensitive(FALSE);
     }

   if (negate)
     theValue_ *= -1;

   isNullFlag_ = FALSE;
 }

 void NormValue::display (FILE *f, const char * prefix, const char * suffix,
                          CollHeap *c, char *buf) const
 {
   Space * space = (Space *)c;
   char mybuf[1000];

   if (isNullFlag_)
   {
     snprintf(mybuf, sizeof(mybuf), "%sNULL%s", prefix, suffix);
     PRINTIT(f, c, space, buf, mybuf);
   }
   else if (theValue_ == _ENCODEDVALUE_UNINIT_VALUE_ )
   {
     snprintf(mybuf, sizeof(mybuf), "%szINIT%s", prefix, suffix);
     PRINTIT(f, c, space, buf, mybuf);
   }
   else
   {
     snprintf(mybuf, sizeof(mybuf), "%s%.4f%s", prefix, theValue_, suffix);
     PRINTIT(f, c, space, buf, mybuf);
   }
 }

 NormValueList::NormValueList(const NormValueList & nvl, NAMemory *h)
   :NAArray<NormValue>(h ? h : CmpCommon::statementHeap(),nvl.entries()),
   heap_(h ? h : CmpCommon::statementHeap())
 {
   for(CollIndex i=0; i<nvl.entries(); i++)
   {
     NormValue newVal = nvl[i];
     insertAt(i, newVal);
   }
 }

 NormValueList & NormValueList::operator= (const NormValueList& other)
 {
   if (this != &other)
   {
     for (CollIndex i = 0; i < other.entries(); i++)
     {
       NormValue newVal = other[i];
       insertAt(i, newVal);
     }
   }
   return *this;
 }

 NormValueList & NormValueList::operator+ (const NormValueList& other)
 {
   CMPASSERT(this->entries() == other.entries());

   for (CollIndex i = 0; i < other.entries(); i++)
   {
      double newVal = this->at(i).getValue() + other[i].getValue();
      this->at(i).setValue(newVal);
   }

   return *this;
 }

 NormValueList & NormValueList::operator- (const NormValueList& other)
 {
   CMPASSERT(this->entries() == other.entries());

   for (CollIndex i = 0; i < other.entries(); i++)
   {
      double newVal = this->at(i).getValue() - other[i].getValue();
      this->at(i).setValue(newVal);
   }

   return *this;
 }

 NormValueList & NormValueList::operator* (const double factor)
 {
   for (CollIndex i = 0; i < entries(); i++)
   {
      double newVal = this->at(i).getValue() * factor;
      this->at(i).setValue(newVal);
   }

   return *this;
 }

 void NormValueList::round ()
 {
   for (CollIndex i = 0; i < entries(); i++)
   {
      CostScalar newVal = this->at(i).getValue();
      newVal = newVal.round();
      this->at(i).setValue(newVal.getValue());
   }
 }

 //
 // A help data structure to sort the columns by column positions in descending order.
 // Used by MCSB to compute the run-time hash function correctly.
 // See HashDistPartHash::preCodeGen() for detail on the part key column list is converted
 // to the partExpression.
 //
 struct pos_index
 {
     void setPos(Int32 x) { pos_ = x; }
     void setIdx(Int32 x) { idx_ = x; }

     Int32 getPos() { return pos_; }
     Int32 getIdx() { return idx_; }

     Int32 pos_;
     Int32 idx_;
 };

 Int32 comparePosIdx(const void* p1, const void* p2)
 {
   pos_index* pi1 = (pos_index*)p1;
   pos_index* pi2 = (pos_index*)p2;

   if ( pi1->pos_ == pi2->pos_ )
     return 0;
   if ( pi1->pos_ > pi2->pos_ )
     return -1;
   else
     return 1;
 }


 // A helper method to compute a run-time hash value for a composite
 // value.
 //
 // The ith component of the composite value is represented as follows,
 // depending on the type of the component value.
 //
 // SQL Integer types:  in NormValue_[i]
 // NUMERIC types:      in NormValue_[i]
 // CHAR types:         in cvPtrs[i]

 UInt32 EncodedValue::computeRunTimeHashValue(const NAColumnArray & colArray, const NAWchar * boundary, ConstValue* cvPtrs[])
 {
   UInt32 hashValueForCurCol = 0, hashValueForColGrp = 0;
   NABoolean useHashValue = TRUE;

   const NormValueList *nvl = getValueList();
   CollIndex colEntries = nvl->entries();

   // If not all columns of a multi-column group fit in the boundary value,
   // return 0 as the final hash value.
   if (colArray.entries() != colEntries)
      return 0;

   // Re-arrage the colArray in the descending order of col positions. The result
   // is saved in a pos_index array below
   pos_index* posIndexArray =
          (pos_index*)(new (STMTHEAP) char[sizeof(pos_index)*colArray.entries()]);


   for(CollIndex k=0; k<colEntries; k++) {
      posIndexArray[k].setPos(colArray[k]->getPosition());
      posIndexArray[k].setIdx(k);
   }

   // Sort based on the column position numbers.
   qsort(posIndexArray, colArray.entries(), sizeof(pos_index), comparePosIdx);

   for(CollIndex j=0; j<colEntries; j++)
   {
     // Indirectly find out the corresponding actual NAColumn and constantValue, for the jth
     // column position in decending order.
     CollIndex i = posIndexArray[j].getIdx();

     const NAType * type = colArray[i]->getType();

     useHashValue = type->useHashRepresentation();

     NormValue nV = nvl->at(i);

     hashValueForCurCol = 0;

     if(nV.isNull()) {

       hashValueForCurCol = ExHDPHash::nullHashValue;

     } else
     if( !useHashValue )
     {
       char data[10]; Int32 len = 0;
       UInt32 flags = ExHDPHash::NO_FLAGS;

       EncodedValue ev(nV.getValue());

       ev.outputToBufferToComputeRTHash(type, data, len, flags);
       hashValueForCurCol = ExHDPHash::hash(data, flags, len);

     } else if (type->getTypeQualifier() == NA_NUMERIC_TYPE &&
              type->getTypeName() == LiteralNumeric)
     {
       hashValueForCurCol = nV.computeHashForNumeric((SQLNumeric*)type);
     } else if ( type->getTypeQualifier() == NA_CHARACTER_TYPE && cvPtrs ) {

       ConstValue* cv = cvPtrs[i];

      if ( cv ) {

          if ( type->getTypeQualifier() == NA_CHARACTER_TYPE &&
               ((CharType*)type)->isCaseinsensitive() &&
               ((CharType*)type)->getCharSet() != CharInfo::UNICODE
             )
             cv = cv->toUpper(HISTHEAP);

          hashValueForCurCol = cv->computeHashValue(*type);
       }

     } else {
       // arbitrarily make up a hash value. SB code will not form a SB
       // plan when one of the join columns is of a SQL type (i.e. a float)
       // not capable of being SB processed.
       hashValueForCurCol = 0;
     }

     //
     // Combine the current hash value into the final hash value
     //
     if(j == 0) // First time, directly copy the hash value to the result
       hashValueForColGrp = hashValueForCurCol;
     else
     {
       // The following code is from ExHDPHashComb::eval() method in exp_function.cpp
       // and shouldnt be changed.
       hashValueForColGrp = ((hashValueForColGrp << 1) | (hashValueForColGrp >> 31));
       hashValueForColGrp = hashValueForColGrp ^ hashValueForCurCol;
     }
   }
   NADELETEBASIC(posIndexArray, STMTHEAP);

   return  hashValueForColGrp;
 }

 void
 EncodedValue::outputToBufferToComputeRTHash(
          const NAType* naType,
          char* data, // output buffer to hold the data to be hashed
          Int32& len, // length of the data
          UInt32& flags // flags to be used during hash
          ) const
 {
       double x = getDblValue();
       flags = ExHDPHash::NO_FLAGS;
       switch (naType->getFSDatatype()) {
         case REC_BIN8_UNSIGNED:
         case REC_BOOLEAN:
            len = 1;
            { UInt8 y = (UInt8)x; memcpy(data, &y, len); }
            break;
         case REC_BIN8_SIGNED:
            len = 1;
            { Int8 y = (Int8)x; memcpy(data, &y, len); }
            break;
         case REC_BIN16_UNSIGNED:
            len = 2;
            flags =ExHDPHash::SWAP_TWO;
            { unsigned short y = (unsigned short)x; memcpy(data, &y, len); }
            break;
         case REC_BIN16_SIGNED:
            len = 2;
            flags =ExHDPHash::SWAP_TWO;
            { short y = (short)x; memcpy(data, &y, len); }
            break;
          case REC_BIN32_UNSIGNED:
            len = 4;
            flags =ExHDPHash::SWAP_FOUR;
            { UInt32 y = (UInt32)x; memcpy(data, &y, len); }
            break;
         case REC_BIN32_SIGNED:
            len = 4;
            flags =ExHDPHash::SWAP_FOUR;
            { Int32 y = (Int32)x; memcpy(data, &y, len); }
            break;
         case REC_BIN64_SIGNED:
            len = 8;
            flags =ExHDPHash::SWAP_EIGHT;
            { Int64 y = (Int64)x; len = 8; memcpy(data, &y, len); }
            break;
         case REC_BIN64_UNSIGNED:
            len = 8;
            flags =ExHDPHash::SWAP_EIGHT;
            { UInt64 y = (UInt64)x; len = 8; memcpy(data, &y, len); }
            break;
         default:
           len = 0; // For column types not supported by SB, we just
                    // skip the column value here.
        }
 }

 // -----------------------------------------------------------------------
 //  Methods for EncodedValue
 // -----------------------------------------------------------------------

 // It's very important to make sure that something that's not really NULL
 // doesn't have a double-encoding that's so close to NULL that it might
 // be sorted == or > than NULL (!)

 void EncodedValue::enforceNullMechanism()
 {
   if ( (value_.getValue() > _ENCODEDVALUE_CLOSE_TO_NULL_) AND NOT value_.isNull() )
     value_.setValue (_ENCODEDVALUE_CLOSE_TO_NULL_) ;
 }

 void EncodedValue::addANormValue(EncodedValue * thisPtr,
 				 ItemExpr * exprPtr,
 				 NABoolean negate)
 {
   if(exprPtr->doesExprEvaluateToConstant(TRUE))
     {
   	NABoolean neg = FALSE;
   	ConstValue * cv = exprPtr->castToConstValue(neg);
   	NormValue value (cv, neg); // *might* generate NULL
   	if ( value.getValue() == _ENCODEDVALUE_NULL_VALUE_ )
   	   value.setNull() ; // explicitly set NULL flag
   	thisPtr->setValue (value) ;
   	return;
     }
     ABORT("EncodedValue::addANormValue encountered an illegal expression");
   return;

 } // EncodedValue::addANormValue()

 double
 EncodedValue::minMaxValue(const NAType *pType, const NABoolean wantMin)
 {
   Lng32 len = pType->getTotalSize();
   char *buf = new char[len + 2+2];
   char *pt = buf;

   if (pType->supportsSQLnullPhysical())
     {
       Lng32 nullHdrSize = pType->getSQLnullHdrSize();
       pt = &buf[nullHdrSize];
       buf[0] = buf[1] = '\0';
       len -= nullHdrSize;
     }


   if (wantMin)
     pType->minRepresentableValue(pt, &len, NULL, CmpCommon::statementHeap());
   else
     pType->maxRepresentableValue(pt, &len, NULL, CmpCommon::statementHeap());

   double dblVal ;
   if (pType->getTypeQualifier() == NA_CHARACTER_TYPE)
        dblVal = EncVal_Char_encode(buf, pType);
   else
        dblVal = pType->encode(buf);

   delete [] buf;
   return dblVal;
 }

 EncodedValue::EncodedValue (const EncodedValue & other, NAMemory * h)
 : valueList_(NULL), heap_(h ? h : CmpCommon::statementHeap())
 {
   const NormValueList * nvl = other.getValueList();
   if(nvl)
   {
     valueList_ = new (heap_) NormValueList(nvl->entries(), heap_);
     *valueList_ = *nvl;
   }
   else
     this->setValue (other.value_);
 }

 EncodedValue::EncodedValue (const NormValueList& nvl, NAMemory * h)
 : valueList_(NULL), heap_(h ? h : CmpCommon::statementHeap())

 {
     valueList_ = new (heap_) NormValueList(nvl.entries(), heap_);
     *valueList_ = nvl;

     this->setValue (nvl[0].getValue());
 }

 EncodedValue::EncodedValue (double val)
 : valueList_(NULL), heap_(HISTHEAP)
 {
   NormValue value (val) ;  // will never generate NULL
   this->setValue (value) ; // filters out non-NULLs with NULL double-value
 }

 EncodedValue::EncodedValue (ItemExpr *expr,
                             NABoolean negate)
 : valueList_(NULL), heap_(HISTHEAP)
 {
   addANormValue(this,expr,negate);
 }

 void
 EncodedValue::constructorFunction (const NAWchar * theValue,
                                    const NAColumnArray &columns,
                                    NABoolean okToReportErrors,
                                    ConstValue* cvPtrs[])
 {
   // Some notes about error reporting for this function:
   //
   // The error reporting for this function is a little strange
   // and should be re-engineered when we can imagine a better
   // design for it.
   //
   // This function is called from two very different contexts.
   //
   // One is from the constructors of global objects, to provide
   // convenient encoded constants. Being global objects, this
   // call is made as a result of global constructor calls before
   // the C++ main for the process is invoked. As such, we cannot
   // depend on other global objects being constructed. So, for
   // example, we cannot depend on CmpCommon::diags() being
   // initialized, as C++ makes no guarantees about the order in
   // which global objects are created. Too, it does no good to
   // throw a C++ exception as there would be nothing to catch it
   // and process it. So, if an error happens in this code path,
   // we'll simply assert.
   //
   // The other is in the course of histogram processing. Histograms
   // have been read in, and now we want to encode the boundary
   // values in the histogram. These might be stale or corrupted
   // so that condition has to be detected. When detected, this
   // routine raises a warning in CmpCommon::diags(), and then
   // throws a C++ exception.
   //
   // This warning processing has to be done carefully. The way
   // it works is that lower level routines report errors into
   // CmpCommon::diags(). This routine checks for such errors and
   // if it sees any, it throws them away, replacing them with
   // a warning in CmpCommon::diags(). It then throws a C++
   // exception which is typically caught by HSHistogrmCursor::fetch
   // (ustat/hs_read.cpp). We use default histograms in that case.
   //
   // Why do we throw away the errors? We do this because of the
   // way the Normalizer handles CmpCommon::diags(). During
   // synthesise logical properties processing, histograms may
   // be read and processed. (Note: They can be read and processed
   // from other phases as well, such as table analysis.) The
   // Normalizer checks for errors in CmpCommon::diags(), and if
   // found, retries compilation. On the retry, CmpCommon::diags()
   // will be cleared, and the histograms code will simply use
   // a default histogram. So, if there is any error in
   // CmpCommon::diags(), we will lose the histogram warnings
   // generated in this method.
   //
   // Note that if there is already an error in CmpCommon::diags()
   // when this method is called, we'll lose the histogram warnings
   // anyway. Sigh.

   Lng32 mark = okToReportErrors ? CmpCommon::diags()->mark() : -1;

   // Find the first non-blank char.
   const NAWchar *item = theValue;
   while (*item == L' ')
     item++;

   if ( *item != L'(' ) // must be '('
     {
       if (okToReportErrors)
         {
           *CmpCommon::diags() << DgSqlCode(CATALOG_HISTOGRM_HISTINTS_TABLES_CONTAIN_BAD_VALUE)
              << DgWString0(theValue)
              << DgString1(columns[0]->getFullColRefNameAsAnsiString());
           CmpInternalException("Bad Interval Boundary", __FILE__ , __LINE__).throwException();
         }
       else
         {
           CMPASSERT(FALSE); // developer needs to fix the bug
         }
     }

   item++;

   NAWchar *next;
   NABoolean boundaryValueTruncated = FALSE;

   const Int32 BOUNDARY_LEN = HS_MAX_BOUNDARY_LEN + 10; // +10 just in case
   NAWchar buf[BOUNDARY_LEN];

   NormValue val;
   double dblVal;

   // If read from cache.
   if (*item == L'=')
     {
 #ifndef NDEBUG
       //this is supposedly dead code
       //putting the assert here to see
       //if we ever get to this part of
       //the code
       CMPASSERT(FALSE);
 #endif //NDEBUG
       item++;
       switch (*item)
       {
       case L'N':
 	{
 	  val.setNull(); // NULL flag explicitly set
 	  break;
 	}
       case L'L':
 	{ // generate min as default.
 	  dblVal = minMaxValue(columns[0]->getType(), TRUE);
 	  val.setValue(dblVal); // will never generate NULL
 	  break;
 	}
       case L'H':
 	{ // generate max as default.
 	  dblVal = minMaxValue(columns[0]->getType(), FALSE);
 	  val.setValue(dblVal); // will never generate NULL
 	  break;
 	}
       default:
 	{
 	  dblVal = na_wcstod(item,NULL);
 	  val.setValue(dblVal); // will never generate NULL
 	}
       }
       this->setValue (val) ; // filters out non-NULLs with NULL double-value
       return;
     }

   // If read from histogram tables.
   while (*item == L' ')
     item++;
   next = (NAWchar *)item;

   CollIndex entries = columns.entries() ;
   if(entries == 0)
     entries = 1;

   // CMPASSERT (entries == 1) ;
   // this assertion's not true! when reading in MCH's, we have multiple
   // columns --> however, for the EncodedValue we can only encode one of
   // them, so just ignore the others

   for (CollIndex i = 0 ; i < entries; i++)
   {
     // Check if a quoted item first.
     // Need to skip quotes within quotes and find the
     // corresponding right quote.
     if (*next++ == L'\'')
     {
       while ((*next != L'\0') &&
              ((*next++ != L'\'') || (*next++ == L'\'')))
         ;
       next--;
     }

     // Scan for delimiter (either ',' or ')' from next instead of item, because
     // in the case of a string, next has been advanced to the closing quote while
     // item still points to the beginning of the string. Scanning for a comma
     // starting at item may find a comma that is part of the string.
     // Note: In the case of INTERVAL literals, we might have a nasty SECOND(m,n)
     // qualifier at the end. We don't want to mistake a possible comma within such
     // a qualifier for our delimiter, so we have to use na_wcschrSkipOverParenText
     // instead of na_wcschr to search for the comma.
     if ( i == entries-1 OR entries==0 ) // sometimes columns is an empty list
       next = na_wcsrchr(next, L')') ;
     else  // it's an MCH
       {
     	NAWchar* nextSave = next;
         next = na_wcschrSkipOverParenText(next, L',');
         if ( next == NULL )
           {
             // Number of components of boundary value is less than the number of
             // cols in the MCH; they must have not all fit in the max boundary size.
             next = na_wcsrchr(nextSave, L')');
             boundaryValueTruncated = TRUE;
           }
       }

     if ( next == NULL ) // should never happen!
       {
         if (okToReportErrors)
           {
             CmpCommon::diags()->rewind(mark,TRUE); // get rid of any diags we may have added
             *CmpCommon::diags()
              << DgSqlCode(CATALOG_HISTOGRM_HISTINTS_TABLES_CONTAIN_BAD_VALUE)
              << DgWString0(theValue)
              << DgString1(columns[i]->getFullColRefNameAsAnsiString());
             CmpInternalException("Bad Interval Boundary", __FILE__ , __LINE__).throwException();
           }
         else
           {
             CMPASSERT(FALSE);  // developer needs to fix the bug
           }
       }

     Lng32 len = BOUNDARY_LEN;
     Lng32 storageSize = 0;
     Lng32 offset = 0;

     switch(*item)
     {
     case L'>':
       { // generate min as default.
 	dblVal = minMaxValue(columns[i]->getType(), TRUE);
 	val.setValue(dblVal); // will never generate NULL
         break;
       }
     case L'<':
       { // generate max as default.
 	dblVal = minMaxValue(columns[i]->getType(), FALSE);
 	val.setValue(dblVal); // will never generate NULL
 	break;
       }

     default:
       {
 	if ((item[0] == L'N') &&
 	    (item[1] == L'U') &&
 	    (item[2] == L'L') &&
 	    (item[3] == L'L'))
 	{
 	  val.setNull();
 	  break;
 	}

         // Parser assumes CmpCommon::diags() is initialized and available
         // so okToReportErrors better be true in this code path.
         CMPASSERT(okToReportErrors);

 	// invoke parser to parse the char string and generate a ConstValue
         Parser parser(CmpCommon::context());

                          // Leave space for both semi-colon and null
                          // next points to the next char after the value
         Int32 numChars = MINOF(BOUNDARY_LEN-2,na_wcslen(item)-na_wcslen(next));

         // Genesis solution 10-031101-0981
         // When fetching histogram for UNICODE column we need to prefix
         // the buffer to be analyzed by parser with _USC2
         const NAType* colType = columns[i]->getType();
         Int32 prefixLen = 0;
         if ( colType->getTypeQualifier() == NA_CHARACTER_TYPE )
         {
            switch (((CharType *)colType)->getCharSet()) {
              case CharInfo::UNICODE:
                 prefixLen = 5;
                 na_wcsncpy(buf,WIDE_("_UCS2"), prefixLen);
                 break;
              case CharInfo::UTF8:
                 prefixLen = 5;
                 na_wcsncpy(buf,WIDE_("_UTF8"), prefixLen);
                 break;
 /*********                          Uncomment if we ever support SJIS
              case CharInfo::SJIS:
                 prefixLen = 5;
                 na_wcsncpy(buf,WIDE_("_SJIS"), prefixLen);
                 break;
 */
              case CharInfo::KANJI_MP:
                 prefixLen = 6;
                 na_wcsncpy(buf,WIDE_("_KANJI"), prefixLen);
                 break;
              case CharInfo::KSC5601_MP:
                 prefixLen = 8;
                 na_wcsncpy(buf,WIDE_("_KSC5601"), prefixLen);
                 break;
              default: // no prefix is needed (ISO88591)
                 break;
            }
         }
         na_wcsncpy(buf+prefixLen,item,numChars);
         buf[numChars+prefixLen] = L';';
         buf[numChars+prefixLen+1] = L'\0';


 	NABoolean negate = FALSE;

 	ItemExpr *ie = parser.get_w_ItemExprTree(buf);
 	ConstValue * constVal = ie ? ie->castToConstValue(negate) : NULL;

         // Genesis 10-980626-6634, 10-040322-4395 and 10-090206-9004
         // ALM 5956 -- don't do the type check for MCH. Overflowed MCH boundary
         //             value may skip 1 or more column values, so pair-wise type
         //             matching may not work.
         if (!constVal ||
              (entries == 1 &&
               constVal->getType()->getTypeQualifier() != colType->getTypeQualifier()))
         {
           CmpCommon::diags()->rewind(mark,TRUE);  // get rid of any diags parser may have added
           *CmpCommon::diags()
               << DgSqlCode(CATALOG_HISTOGRM_HISTINTS_TABLES_CONTAIN_BAD_VALUE)
               << DgWString0(theValue)
               << DgString1(columns[i]->getFullColRefNameAsAnsiString());

           CmpInternalException("Bad Interval Boundary", __FILE__ , __LINE__).throwException();
         }

         // For a case-insensitive char type, it is necessary to upper-case the
         // boundary value first before compute the hash. This is to mimic the
         // run-time behavior where values of such char type are always
         // upper shifted before case-insensitive comparison, being hashed or
         // encoded.
         // skip the upshifting for Unicode characters, as that is how these
         // are treated in USTAT. 10-090225-9553
         if ((colType->getTypeQualifier() == NA_CHARACTER_TYPE) &&
             ((CharType*)colType)->isCaseinsensitive() &&
             (((CharType*)colType)->getCharSet() != CharInfo::UNICODE))
           constVal = constVal->toUpper(HISTHEAP);

         if (colType->getTypeQualifier() == NA_CHARACTER_TYPE)
         {
           CharInfo::Collation Col_Coll ;
           Col_Coll = ((CharType*)colType)->getCollation();

           if (Col_Coll != CharInfo::DefaultCollation) {
              ((CharType *)(constVal->getType()))->setCollation(Col_Coll);
           }
         }

 	val = NormValue (constVal, negate);

         // populate the ith entry in cvPtrs[]
         if ( cvPtrs ) cvPtrs[i] = constVal;

 	break;
       }
     }

     if(entries == 1)
       this->setValue(val); // filters out non-NULLs with NULL double-value
     else
     {
       if(!valueList_)
         valueList_ = new (heap_) NormValueList(entries);
       valueList_->insertAt(i,val);
     }

     // If boundaryValueTruncated is true, we have just processed the last
     // component of the MCH boundary value that was present.
     if (i == (entries - 1) || boundaryValueTruncated)
       return;

     // next should now point to ','
     item = next + 1;
     while (*item == L' ')
       item++;
     next = (NAWchar *)item;

     // Check if trailing part has been truncated due to size limit.
     if ((next[0] == L'.') &&
         (next[1] == L'.'))
       return;
   } // For loop

   return;
 }

 // -----------------------------------------------------------------------
 //  Given an upper and lower bound, represented in multi-attribute
 //  encoded version where:
 //    lowBound    = (x1, y1, ...)
 //    *this value = (x2, y2, ...)
 //    upperBound  = (x3, y3, .. )
 //
 //  this method returns a ratio stating where this value lies.
 //
 //  ratio = *thisvalue - lowBound
 //          ---------------------
 //          upperBound - lowBound
 //
 //        = x2 - x1 + (y2-y1)/(Maxy-Miny) + (z2-z1)/(Maxz-Minz)*#(Maxy-Miny) ...
 //         -------------------------------------------------------------------
 //          x3 - x1 + (y3-y1)/(Maxy-Miny) + (z3-z1)/(Maxz-Minz)*#(Maxy-Miny) ...
 //
 //
 // -----------------------------------------------------------------------
 double EncodedValue::ratio (const EncodedValue & lowBound,
 			    const EncodedValue & upperBound) const
 {
   // NB: ignore the comments above -- we only have 1 column in our
   // EncodedValues now, so this function is simpler than what's described

   double retval = -1.0 ;

   double hi = upperBound.getDblValue() ;
   double lo = lowBound.getDblValue() ;
   double me = this->getDblValue() ;

   if ( hi > lo ) // implies hi-lo != 0 -- avoid div-by-zero!
   {
     CMPASSERT(hi>=me);
     CMPASSERT(lo<=me);
     if      ( me == hi )
       retval = 1.0 ;
     else if ( me == lo )
       retval = 0.0 ;
     else
       retval = (me-lo)/(hi-lo);
   }

   // if someone passes the parameters in incorrectly, we should
   // still return a reasonable result!
   else if ( hi < lo )
     retval = ratio (upperBound,lowBound) ;

   else // hi == lo
     CMPASSERT(FALSE) ; // misuse of this function!

   CMPASSERT(retval != -1.0) ;

   // make sure we didn't generate any invalid numbers!

   CMPASSERT ( NOT isnan(retval) ) ;

   return retval;
 }

 COMPARE_RESULT NormValue::compare (const NormValue &other) const
 {
   if ( *this > other )
     return (MORE) ;
   else if ( *this < other )
     return (LESS) ;
   else
     return (SAME) ;
 }

 COMPARE_RESULT NormValueList::compare (const NormValueList * other) const
 {
   COMPARE_RESULT result = SAME;
   CollIndex i = 0;
   while(result == SAME && i < entries())
   {
     result = this->at(i).compare(other->at(i));
     i++;
   }
   return result;
 }

 COMPARE_RESULT EncodedValue::compare (const EncodedValue &other) const
 {
   COMPARE_RESULT result = SAME;
   const NormValueList * nvl = other.getValueList();
   if(valueList_ && nvl)
     result = valueList_->compare(nvl);
   else if(valueList_ && !nvl)
     result = valueList_->at(0).compare(other.getValue());
   else if(!valueList_ && nvl)
     result = getValue().compare(nvl->at(0));
   else
     result = getValue().compare(other.getValue());
   return result;
 }

 void EncodedValue::display (FILE *f, const char * prefix, const char * suffix,
                             CollHeap *c, char *buf) const
 {
   Space * space = (Space *)c;
   char mybuf[1000];

   snprintf(mybuf, sizeof(mybuf), "%s( ", prefix);
   PRINTIT(f, c, space, buf, mybuf);

   if(valueList_)
   {
       CollIndex i = 0;
       CollIndex noOfCols = valueList_->entries();
       while(i < noOfCols)
       {
         valueList_->at(i).display(f, prefix, suffix, c, buf);
         snprintf(mybuf, sizeof(mybuf), "%s,", prefix);
         i++;
       };
   }
   else
     getValue().display(f, prefix, suffix, c, buf);

   snprintf(mybuf, sizeof(mybuf), " )%s", suffix);
   PRINTIT(f, c, space, buf, mybuf);
 }

 const NAString EncodedValue::getText(NABoolean parenthesized, NABoolean showFractionalPart) const
 {
   char cp [100];  // max double value ~= 1.16e77, hence 100 bytes is plenty.
   char *s = cp;
   char *t = s;

   if ( parenthesized == TRUE ) {
     sprintf (s, "( ");
     t += strlen(s);
   }

   if ( getValue().isNull() )
     sprintf (t, "NULL");
   else {
     if ( showFractionalPart == TRUE )
        sprintf (t, "%.4f", getDblValue());
     else
        sprintf (t, "%.0f", getDblValue());
   }

   if ( parenthesized == TRUE ) {
     t = s + strlen(s);
     sprintf (t, " )");
   }

   return cp;
 }

 UInt32 EncodedValue::computeHashForNumeric(SQLNumeric* nt)
 {
   return getValue().computeHashForNumeric(nt);
 }

 UInt32 NormValue::computeHashForNumeric(SQLNumeric* nt)
 {
    CMPASSERT(nt);
    Lng32 len = nt->getNominalSize();
    CMPASSERT(len >= 0);

    Lng32 longTemp;
    ULng32 usLongTemp;
    short shrtTemp;
    unsigned short usShrtTemp;

    double x = getValue();

     char result[8];

     // Fix 10-091117-6417.
     // Warning: some of the following conversions can be lossy when the
     // scale is relatively big.  For example, 2.9998611111111111
     // will become 2.9998611111111112 after converting to Int64,
     // where 2.9998611111111111 is of type NUMERIC(18, 16). When that value
     // is skewed, we will miss it in the skew buster.

     UInt32 flags = ExHDPHash::NO_FLAGS;

     if ( len <= 2 ) {
       flags = ExHDPHash::SWAP_TWO;
       if ( nt->isUnsigned()) {
          usShrtTemp = (unsigned short)(x*pow(10.0, nt->getScale()));
          memcpy(result, (char*)&usShrtTemp, len);
       } else {
          shrtTemp = (short)(x*pow(10.0, nt->getScale()));
          memcpy(result, (char*)&shrtTemp, len);
       }
     } else
     if ( len <= 4 ) {
       flags = ExHDPHash::SWAP_FOUR;
       if ( nt->isUnsigned()) {
          usLongTemp = (ULng32)(x*pow(10.0, nt->getScale()));
          memcpy(result, (char*)&usLongTemp, len);
       } else {
          longTemp = (Lng32)(x*pow(10.0, nt->getScale()));
          memcpy(result, (char*)&longTemp, len);
       }
     } else {
       flags = ExHDPHash::SWAP_EIGHT;
        Int64 int64Temp = (Int64) (x * pow(10.0, nt->getScale()));
        memcpy(result, (char*)&int64Temp, len);
     }

     UInt32 hash =  ExHDPHash::hash(result, flags, len);
     return hash;
 }