blob: f8c6b7bb985054d1df48e8b19f7f910f8790d0f0 [file] [log] [blame]
/**********************************************************************
// @@@ START COPYRIGHT @@@
//
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
//
// @@@ END COPYRIGHT @@@
**********************************************************************/
/* -*-C++-*-
*****************************************************************************
*
* File: EncodedValue.C
* Description: Encoded Value class contains the encoded format for (possibly
* multi-attribute) values, used for storing histogram values.
* Created: June 7, 1995
* Language: C++
*
*
*
*
*****************************************************************************
*/
#include "Platform.h"
#include "Sqlcomp.h" /* must be first included file */
#include "EncodedValue.h"
#include "ItemColRef.h"
#include "parser.h"
#include "str.h"
#include <wchar.h>
#include "NLSConversion.h"
#include "hs_const.h" /* for HS_MAX_BOUNDARY_LEN */
#include "wstr.h"
#include <exp_function.h>
#include <ostream>
#include "CompException.h"
#include "exp_function.h"
const EncodedValue NULL_ENCODEDVALUE (WIDE_("(NULL)")) ;
const EncodedValue UNINIT_ENCODEDVALUE (_ENCODEDVALUE_UNINIT_VALUE_) ;
//
// NOTE: This code came from encodeString() in .../common/CharType.cpp and
// was moved here because
// (1) this coded needed to start calling ex_function_encode::encodeCollationKey()
// which is in the Executor and therefore not callable from the common library
// code and
// (2) the places in this source file (EncodedValue.cpp) that called
// CharType.cpp's encodeString() routine were the ONLY places that needed
// that functionality, so the encodeString() logic more properly belonged here.
//
double EncVal_encodeString(const char * str, Lng32 strLen, CharType *cType)
{
double result;
// the blank-padded first 8 bytes of string <str>
unsigned char stringValues[8];
// two long integers that, when concatenated, contain the result
// as a 64 bit integer
ULng32 hiResult = 0;
ULng32 loResult = 0;
// ---------------------------------------------------------------------
// the encode function performs blank-padding just as the comparison
// operator does, in order to make encoded values of all character data
// types compatible
// ---------------------------------------------------------------------
// for the NCHAR prototype we want to treat nchars just like chars.
// redo this when we do the real thing.
if (cType->getCharSet() != CharInfo::UNICODE ) // ... used to be ... if (cType->getBytesPerChar() == 1)
{
// fill "stringValues" with 8 bit blanks
unsigned char blank = (unsigned char) cType->getBlankCharacterValue();
for (Int32 i = 0; i < 8; i++)
stringValues[i] = blank;
} // bytes per char == 1
else
{
ComASSERT(cType->getBytesPerChar() == SQL_DBCHAR_SIZE);
// 2/19/98: copy the blank char as byte string to stringValues[]
// so that endian-ness will not be an issue.
unsigned short blank = (unsigned short) cType->getBlankCharacterValue();
for (Int32 i = 0; i < 4; i++)
{
str_cpy_all((char*)&stringValues[SQL_DBCHAR_SIZE*i],
(char*)&blank, sizeof(blank));
}
} // bytes per char != 1
// NOTE: Caseinsensitive is not supported with Czech Collation
if ((cType->isCaseinsensitive()) &&
(NOT cType->isUpshifted()) && cType->getBytesPerChar() == 1)
{
// upshift and copy the first 8 bytes of str
// (or all bytes if it is shorter than 8)
str_cpy_convert((char *) stringValues, (char *)str, MINOF(strLen,8), -1);
}
else
{
#define MAXPASSES 4
#define ENCKEYBUFLEN (8 * (MAXPASSES+1) + 2) //Ensure Temp buffer big enough!
UInt8 encodeKeyBuf1[ ENCKEYBUFLEN ]; //Temp buffer
UInt8 * tmpstr = (UInt8 *)str;
Int32 tmpLen = strLen;
CharInfo::Collation collation = cType->getCollation();
if (collation != CharInfo::DefaultCollation) {
memset(encodeKeyBuf1, 0, ENCKEYBUFLEN);
Int16 nPasses = CollationInfo::getCollationNPasses(collation);
ex_function_encode::encodeCollationKey( (const UInt8 *)str,
MINOF(strLen,8),
encodeKeyBuf1,
ENCKEYBUFLEN,
nPasses,
collation,
FALSE /*remove trailing spaces? */
);
tmpstr = encodeKeyBuf1;
tmpLen = ENCKEYBUFLEN;
}
// copy the first 8 bytes of str (or all bytes if it is shorter than 8)
str_cpy_all((char *) stringValues, (char *)tmpstr, MINOF(tmpLen,8));
}
#ifdef NA_LITTLE_ENDIAN
if (cType->getBytesPerChar() == SQL_DBCHAR_SIZE)
{
wc_swap_bytes((NAWchar*)stringValues, 4);
}
#endif
// leave the upper 12 bits (mask = 0xfff00000) of hiResult empty
//
// We only use the 52 fraction bits of the floating point double so that the round trip conversion
// from decimal to double and then from double to decimal results in exactly the original decimal.
// From Wiki "If a decimal string with at most 15 significant digits is converted to IEEE 754 double
// precision representation and then converted back to a string with the same number of significant digits,
// then the final string should match the original"
// 8 bits of the first character
hiResult += (ULng32) stringValues[0] << 12; // char 0
hiResult += (ULng32) stringValues[1] << 4; // char 1
hiResult += (ULng32) stringValues[2] >> 4; // 4 bits of char 2
loResult += (ULng32) stringValues[2] << 28; // 4 bits of char 2
loResult += (ULng32) stringValues[3] << 20; // char 3
loResult += (ULng32) stringValues[4] << 12; // char 4
loResult += (ULng32) stringValues[5] << 4; // char 5
loResult += (ULng32) stringValues[6] >> 4; // 4 bits of char 6
// combine the two 32 bit integers to a floating point number
// (2**32 * hiResult + loResult)
result = hiResult * .4294967296E10 + loResult;
return result;
}
double EncVal_Char_encode(const char * theValue, const NAType *theType)
{
CharType *cType = (CharType *)theType;
char *charBufPtr = (char *) theValue;
if (cType->supportsSQLnull())
charBufPtr += cType->getSQLnullHdrSize();
if (cType->isVaryingLen())
{
// copy the actual length of the string into an aligned variable
short actualLenShort;
Lng32 actualLen;
// ComASSERT(sizeof(short) == cType->getVarLenHdrSize());
if (cType->getVarLenHdrSize() == sizeof(short))
{
str_cpy_all((char *) &actualLenShort, charBufPtr, sizeof(short));
actualLen = actualLenShort;
}
else
str_cpy_all((char *) &actualLen, charBufPtr, sizeof(Lng32));
return EncVal_encodeString(&charBufPtr[cType->getVarLenHdrSize()],
actualLen, cType);
}
else // Fixed length
{
return EncVal_encodeString(charBufPtr, cType->getNominalSize(), cType);
}
}
// -----------------------------------------------------------------------
// Methods for NormValue
// -----------------------------------------------------------------------
NormValue::NormValue(const ConstValue * constant, NABoolean negate)
{
CMPASSERT(constant); // Genesis 10-980626-6634
const NAType * theType = constant->getType();
void * theValue = constant->getConstValue();
NABoolean reset = FALSE;
if ((CmpCommon::getDefault(::MODE_SPECIAL_1) == DF_ON) &&
(theType->getTypeQualifier() == NA_CHARACTER_TYPE))
{
CharType *cType = (CharType *)theType;
if((! cType->isCaseinsensitive()) &&
(cType->getBytesPerChar() == 1))
{
cType->setCaseinsensitive(TRUE);
reset = TRUE;
}
}
if (theType->getTypeQualifier() == NA_CHARACTER_TYPE)
{
theValue_ = EncVal_Char_encode((char *)theValue, theType);
}
else theValue_ = theType->encode (theValue);
if (reset)
{
CharType *cType = (CharType *)theType;
cType->setCaseinsensitive(FALSE);
}
if (negate)
theValue_ *= -1;
isNullFlag_ = FALSE;
}
void NormValue::display (FILE *f, const char * prefix, const char * suffix,
CollHeap *c, char *buf) const
{
Space * space = (Space *)c;
char mybuf[1000];
if (isNullFlag_)
{
snprintf(mybuf, sizeof(mybuf), "%sNULL%s", prefix, suffix);
PRINTIT(f, c, space, buf, mybuf);
}
else if (theValue_ == _ENCODEDVALUE_UNINIT_VALUE_ )
{
snprintf(mybuf, sizeof(mybuf), "%szINIT%s", prefix, suffix);
PRINTIT(f, c, space, buf, mybuf);
}
else
{
snprintf(mybuf, sizeof(mybuf), "%s%.4f%s", prefix, theValue_, suffix);
PRINTIT(f, c, space, buf, mybuf);
}
}
NormValueList::NormValueList(const NormValueList & nvl, NAMemory *h)
:NAArray<NormValue>(h ? h : CmpCommon::statementHeap(),nvl.entries()),
heap_(h ? h : CmpCommon::statementHeap())
{
for(CollIndex i=0; i<nvl.entries(); i++)
{
NormValue newVal = nvl[i];
insertAt(i, newVal);
}
}
NormValueList & NormValueList::operator= (const NormValueList& other)
{
if (this != &other)
{
for (CollIndex i = 0; i < other.entries(); i++)
{
NormValue newVal = other[i];
insertAt(i, newVal);
}
}
return *this;
}
NormValueList & NormValueList::operator+ (const NormValueList& other)
{
CMPASSERT(this->entries() == other.entries());
for (CollIndex i = 0; i < other.entries(); i++)
{
double newVal = this->at(i).getValue() + other[i].getValue();
this->at(i).setValue(newVal);
}
return *this;
}
NormValueList & NormValueList::operator- (const NormValueList& other)
{
CMPASSERT(this->entries() == other.entries());
for (CollIndex i = 0; i < other.entries(); i++)
{
double newVal = this->at(i).getValue() - other[i].getValue();
this->at(i).setValue(newVal);
}
return *this;
}
NormValueList & NormValueList::operator* (const double factor)
{
for (CollIndex i = 0; i < entries(); i++)
{
double newVal = this->at(i).getValue() * factor;
this->at(i).setValue(newVal);
}
return *this;
}
void NormValueList::round ()
{
for (CollIndex i = 0; i < entries(); i++)
{
CostScalar newVal = this->at(i).getValue();
newVal = newVal.round();
this->at(i).setValue(newVal.getValue());
}
}
//
// A help data structure to sort the columns by column positions in descending order.
// Used by MCSB to compute the run-time hash function correctly.
// See HashDistPartHash::preCodeGen() for detail on the part key column list is converted
// to the partExpression.
//
struct pos_index
{
void setPos(Int32 x) { pos_ = x; }
void setIdx(Int32 x) { idx_ = x; }
Int32 getPos() { return pos_; }
Int32 getIdx() { return idx_; }
Int32 pos_;
Int32 idx_;
};
Int32 comparePosIdx(const void* p1, const void* p2)
{
pos_index* pi1 = (pos_index*)p1;
pos_index* pi2 = (pos_index*)p2;
if ( pi1->pos_ == pi2->pos_ )
return 0;
if ( pi1->pos_ > pi2->pos_ )
return -1;
else
return 1;
}
// A helper method to compute a run-time hash value for a composite
// value.
//
// The ith component of the composite value is represented as follows,
// depending on the type of the component value.
//
// SQL Integer types: in NormValue_[i]
// NUMERIC types: in NormValue_[i]
// CHAR types: in cvPtrs[i]
UInt32 EncodedValue::computeRunTimeHashValue(const NAColumnArray & colArray, const NAWchar * boundary, ConstValue* cvPtrs[])
{
UInt32 hashValueForCurCol = 0, hashValueForColGrp = 0;
NABoolean useHashValue = TRUE;
const NormValueList *nvl = getValueList();
CollIndex colEntries = nvl->entries();
// If not all columns of a multi-column group fit in the boundary value,
// return 0 as the final hash value.
if (colArray.entries() != colEntries)
return 0;
// Re-arrage the colArray in the descending order of col positions. The result
// is saved in a pos_index array below
pos_index* posIndexArray =
(pos_index*)(new (STMTHEAP) char[sizeof(pos_index)*colArray.entries()]);
for(CollIndex k=0; k<colEntries; k++) {
posIndexArray[k].setPos(colArray[k]->getPosition());
posIndexArray[k].setIdx(k);
}
// Sort based on the column position numbers.
qsort(posIndexArray, colArray.entries(), sizeof(pos_index), comparePosIdx);
for(CollIndex j=0; j<colEntries; j++)
{
// Indirectly find out the corresponding actual NAColumn and constantValue, for the jth
// column position in decending order.
CollIndex i = posIndexArray[j].getIdx();
const NAType * type = colArray[i]->getType();
useHashValue = type->useHashRepresentation();
NormValue nV = nvl->at(i);
hashValueForCurCol = 0;
if(nV.isNull()) {
hashValueForCurCol = ExHDPHash::nullHashValue;
} else
if( !useHashValue )
{
char data[10]; Int32 len = 0;
UInt32 flags = ExHDPHash::NO_FLAGS;
EncodedValue ev(nV.getValue());
ev.outputToBufferToComputeRTHash(type, data, len, flags);
hashValueForCurCol = ExHDPHash::hash(data, flags, len);
} else if (type->getTypeQualifier() == NA_NUMERIC_TYPE &&
type->getTypeName() == LiteralNumeric)
{
hashValueForCurCol = nV.computeHashForNumeric((SQLNumeric*)type);
} else if ( type->getTypeQualifier() == NA_CHARACTER_TYPE && cvPtrs ) {
ConstValue* cv = cvPtrs[i];
if ( cv ) {
if ( type->getTypeQualifier() == NA_CHARACTER_TYPE &&
((CharType*)type)->isCaseinsensitive() &&
((CharType*)type)->getCharSet() != CharInfo::UNICODE
)
cv = cv->toUpper(HISTHEAP);
hashValueForCurCol = cv->computeHashValue(*type);
}
} else {
// arbitrarily make up a hash value. SB code will not form a SB
// plan when one of the join columns is of a SQL type (i.e. a float)
// not capable of being SB processed.
hashValueForCurCol = 0;
}
//
// Combine the current hash value into the final hash value
//
if(j == 0) // First time, directly copy the hash value to the result
hashValueForColGrp = hashValueForCurCol;
else
{
// The following code is from ExHDPHashComb::eval() method in exp_function.cpp
// and shouldnt be changed.
hashValueForColGrp = ((hashValueForColGrp << 1) | (hashValueForColGrp >> 31));
hashValueForColGrp = hashValueForColGrp ^ hashValueForCurCol;
}
}
NADELETEBASIC(posIndexArray, STMTHEAP);
return hashValueForColGrp;
}
void
EncodedValue::outputToBufferToComputeRTHash(
const NAType* naType,
char* data, // output buffer to hold the data to be hashed
Int32& len, // length of the data
UInt32& flags // flags to be used during hash
) const
{
double x = getDblValue();
flags = ExHDPHash::NO_FLAGS;
switch (naType->getFSDatatype()) {
case REC_BIN8_UNSIGNED:
case REC_BOOLEAN:
len = 1;
{ UInt8 y = (UInt8)x; memcpy(data, &y, len); }
break;
case REC_BIN8_SIGNED:
len = 1;
{ Int8 y = (Int8)x; memcpy(data, &y, len); }
break;
case REC_BIN16_UNSIGNED:
len = 2;
flags =ExHDPHash::SWAP_TWO;
{ unsigned short y = (unsigned short)x; memcpy(data, &y, len); }
break;
case REC_BIN16_SIGNED:
len = 2;
flags =ExHDPHash::SWAP_TWO;
{ short y = (short)x; memcpy(data, &y, len); }
break;
case REC_BIN32_UNSIGNED:
len = 4;
flags =ExHDPHash::SWAP_FOUR;
{ UInt32 y = (UInt32)x; memcpy(data, &y, len); }
break;
case REC_BIN32_SIGNED:
len = 4;
flags =ExHDPHash::SWAP_FOUR;
{ Int32 y = (Int32)x; memcpy(data, &y, len); }
break;
case REC_BIN64_SIGNED:
len = 8;
flags =ExHDPHash::SWAP_EIGHT;
{ Int64 y = (Int64)x; len = 8; memcpy(data, &y, len); }
break;
case REC_BIN64_UNSIGNED:
len = 8;
flags =ExHDPHash::SWAP_EIGHT;
{ UInt64 y = (UInt64)x; len = 8; memcpy(data, &y, len); }
break;
default:
len = 0; // For column types not supported by SB, we just
// skip the column value here.
}
}
// -----------------------------------------------------------------------
// Methods for EncodedValue
// -----------------------------------------------------------------------
// It's very important to make sure that something that's not really NULL
// doesn't have a double-encoding that's so close to NULL that it might
// be sorted == or > than NULL (!)
void EncodedValue::enforceNullMechanism()
{
if ( (value_.getValue() > _ENCODEDVALUE_CLOSE_TO_NULL_) AND NOT value_.isNull() )
value_.setValue (_ENCODEDVALUE_CLOSE_TO_NULL_) ;
}
void EncodedValue::addANormValue(EncodedValue * thisPtr,
ItemExpr * exprPtr,
NABoolean negate)
{
if(exprPtr->doesExprEvaluateToConstant(TRUE))
{
NABoolean neg = FALSE;
ConstValue * cv = exprPtr->castToConstValue(neg);
NormValue value (cv, neg); // *might* generate NULL
if ( value.getValue() == _ENCODEDVALUE_NULL_VALUE_ )
value.setNull() ; // explicitly set NULL flag
thisPtr->setValue (value) ;
return;
}
ABORT("EncodedValue::addANormValue encountered an illegal expression");
return;
} // EncodedValue::addANormValue()
double
EncodedValue::minMaxValue(const NAType *pType, const NABoolean wantMin)
{
Lng32 len = pType->getTotalSize();
char *buf = new char[len + 2+2];
char *pt = buf;
if (pType->supportsSQLnullPhysical())
{
Lng32 nullHdrSize = pType->getSQLnullHdrSize();
pt = &buf[nullHdrSize];
buf[0] = buf[1] = '\0';
len -= nullHdrSize;
}
if (wantMin)
pType->minRepresentableValue(pt, &len, NULL, CmpCommon::statementHeap());
else
pType->maxRepresentableValue(pt, &len, NULL, CmpCommon::statementHeap());
double dblVal ;
if (pType->getTypeQualifier() == NA_CHARACTER_TYPE)
dblVal = EncVal_Char_encode(buf, pType);
else
dblVal = pType->encode(buf);
delete [] buf;
return dblVal;
}
EncodedValue::EncodedValue (const EncodedValue & other, NAMemory * h)
: valueList_(NULL), heap_(h ? h : CmpCommon::statementHeap())
{
const NormValueList * nvl = other.getValueList();
if(nvl)
{
valueList_ = new (heap_) NormValueList(nvl->entries(), heap_);
*valueList_ = *nvl;
}
else
this->setValue (other.value_);
}
EncodedValue::EncodedValue (const NormValueList& nvl, NAMemory * h)
: valueList_(NULL), heap_(h ? h : CmpCommon::statementHeap())
{
valueList_ = new (heap_) NormValueList(nvl.entries(), heap_);
*valueList_ = nvl;
this->setValue (nvl[0].getValue());
}
EncodedValue::EncodedValue (double val)
: valueList_(NULL), heap_(HISTHEAP)
{
NormValue value (val) ; // will never generate NULL
this->setValue (value) ; // filters out non-NULLs with NULL double-value
}
EncodedValue::EncodedValue (ItemExpr *expr,
NABoolean negate)
: valueList_(NULL), heap_(HISTHEAP)
{
addANormValue(this,expr,negate);
}
void
EncodedValue::constructorFunction (const NAWchar * theValue,
const NAColumnArray &columns,
NABoolean okToReportErrors,
ConstValue* cvPtrs[])
{
// Some notes about error reporting for this function:
//
// The error reporting for this function is a little strange
// and should be re-engineered when we can imagine a better
// design for it.
//
// This function is called from two very different contexts.
//
// One is from the constructors of global objects, to provide
// convenient encoded constants. Being global objects, this
// call is made as a result of global constructor calls before
// the C++ main for the process is invoked. As such, we cannot
// depend on other global objects being constructed. So, for
// example, we cannot depend on CmpCommon::diags() being
// initialized, as C++ makes no guarantees about the order in
// which global objects are created. Too, it does no good to
// throw a C++ exception as there would be nothing to catch it
// and process it. So, if an error happens in this code path,
// we'll simply assert.
//
// The other is in the course of histogram processing. Histograms
// have been read in, and now we want to encode the boundary
// values in the histogram. These might be stale or corrupted
// so that condition has to be detected. When detected, this
// routine raises a warning in CmpCommon::diags(), and then
// throws a C++ exception.
//
// This warning processing has to be done carefully. The way
// it works is that lower level routines report errors into
// CmpCommon::diags(). This routine checks for such errors and
// if it sees any, it throws them away, replacing them with
// a warning in CmpCommon::diags(). It then throws a C++
// exception which is typically caught by HSHistogrmCursor::fetch
// (ustat/hs_read.cpp). We use default histograms in that case.
//
// Why do we throw away the errors? We do this because of the
// way the Normalizer handles CmpCommon::diags(). During
// synthesise logical properties processing, histograms may
// be read and processed. (Note: They can be read and processed
// from other phases as well, such as table analysis.) The
// Normalizer checks for errors in CmpCommon::diags(), and if
// found, retries compilation. On the retry, CmpCommon::diags()
// will be cleared, and the histograms code will simply use
// a default histogram. So, if there is any error in
// CmpCommon::diags(), we will lose the histogram warnings
// generated in this method.
//
// Note that if there is already an error in CmpCommon::diags()
// when this method is called, we'll lose the histogram warnings
// anyway. Sigh.
Lng32 mark = okToReportErrors ? CmpCommon::diags()->mark() : -1;
// Find the first non-blank char.
const NAWchar *item = theValue;
while (*item == L' ')
item++;
if ( *item != L'(' ) // must be '('
{
if (okToReportErrors)
{
*CmpCommon::diags() << DgSqlCode(CATALOG_HISTOGRM_HISTINTS_TABLES_CONTAIN_BAD_VALUE)
<< DgWString0(theValue)
<< DgString1(columns[0]->getFullColRefNameAsAnsiString());
CmpInternalException("Bad Interval Boundary", __FILE__ , __LINE__).throwException();
}
else
{
CMPASSERT(FALSE); // developer needs to fix the bug
}
}
item++;
NAWchar *next;
NABoolean boundaryValueTruncated = FALSE;
const Int32 BOUNDARY_LEN = HS_MAX_BOUNDARY_LEN + 10; // +10 just in case
NAWchar buf[BOUNDARY_LEN];
NormValue val;
double dblVal;
// If read from cache.
if (*item == L'=')
{
#ifndef NDEBUG
//this is supposedly dead code
//putting the assert here to see
//if we ever get to this part of
//the code
CMPASSERT(FALSE);
#endif //NDEBUG
item++;
switch (*item)
{
case L'N':
{
val.setNull(); // NULL flag explicitly set
break;
}
case L'L':
{ // generate min as default.
dblVal = minMaxValue(columns[0]->getType(), TRUE);
val.setValue(dblVal); // will never generate NULL
break;
}
case L'H':
{ // generate max as default.
dblVal = minMaxValue(columns[0]->getType(), FALSE);
val.setValue(dblVal); // will never generate NULL
break;
}
default:
{
dblVal = na_wcstod(item,NULL);
val.setValue(dblVal); // will never generate NULL
}
}
this->setValue (val) ; // filters out non-NULLs with NULL double-value
return;
}
// If read from histogram tables.
while (*item == L' ')
item++;
next = (NAWchar *)item;
CollIndex entries = columns.entries() ;
if(entries == 0)
entries = 1;
// CMPASSERT (entries == 1) ;
// this assertion's not true! when reading in MCH's, we have multiple
// columns --> however, for the EncodedValue we can only encode one of
// them, so just ignore the others
for (CollIndex i = 0 ; i < entries; i++)
{
// Check if a quoted item first.
// Need to skip quotes within quotes and find the
// corresponding right quote.
if (*next++ == L'\'')
{
while ((*next != L'\0') &&
((*next++ != L'\'') || (*next++ == L'\'')))
;
next--;
}
// Scan for delimiter (either ',' or ')' from next instead of item, because
// in the case of a string, next has been advanced to the closing quote while
// item still points to the beginning of the string. Scanning for a comma
// starting at item may find a comma that is part of the string.
// Note: In the case of INTERVAL literals, we might have a nasty SECOND(m,n)
// qualifier at the end. We don't want to mistake a possible comma within such
// a qualifier for our delimiter, so we have to use na_wcschrSkipOverParenText
// instead of na_wcschr to search for the comma.
if ( i == entries-1 OR entries==0 ) // sometimes columns is an empty list
next = na_wcsrchr(next, L')') ;
else // it's an MCH
{
NAWchar* nextSave = next;
next = na_wcschrSkipOverParenText(next, L',');
if ( next == NULL )
{
// Number of components of boundary value is less than the number of
// cols in the MCH; they must have not all fit in the max boundary size.
next = na_wcsrchr(nextSave, L')');
boundaryValueTruncated = TRUE;
}
}
if ( next == NULL ) // should never happen!
{
if (okToReportErrors)
{
CmpCommon::diags()->rewind(mark,TRUE); // get rid of any diags we may have added
*CmpCommon::diags()
<< DgSqlCode(CATALOG_HISTOGRM_HISTINTS_TABLES_CONTAIN_BAD_VALUE)
<< DgWString0(theValue)
<< DgString1(columns[i]->getFullColRefNameAsAnsiString());
CmpInternalException("Bad Interval Boundary", __FILE__ , __LINE__).throwException();
}
else
{
CMPASSERT(FALSE); // developer needs to fix the bug
}
}
Lng32 len = BOUNDARY_LEN;
Lng32 storageSize = 0;
Lng32 offset = 0;
switch(*item)
{
case L'>':
{ // generate min as default.
dblVal = minMaxValue(columns[i]->getType(), TRUE);
val.setValue(dblVal); // will never generate NULL
break;
}
case L'<':
{ // generate max as default.
dblVal = minMaxValue(columns[i]->getType(), FALSE);
val.setValue(dblVal); // will never generate NULL
break;
}
default:
{
if ((item[0] == L'N') &&
(item[1] == L'U') &&
(item[2] == L'L') &&
(item[3] == L'L'))
{
val.setNull();
break;
}
// Parser assumes CmpCommon::diags() is initialized and available
// so okToReportErrors better be true in this code path.
CMPASSERT(okToReportErrors);
// invoke parser to parse the char string and generate a ConstValue
Parser parser(CmpCommon::context());
// Leave space for both semi-colon and null
// next points to the next char after the value
Int32 numChars = MINOF(BOUNDARY_LEN-2,na_wcslen(item)-na_wcslen(next));
// Genesis solution 10-031101-0981
// When fetching histogram for UNICODE column we need to prefix
// the buffer to be analyzed by parser with _USC2
const NAType* colType = columns[i]->getType();
Int32 prefixLen = 0;
if ( colType->getTypeQualifier() == NA_CHARACTER_TYPE )
{
switch (((CharType *)colType)->getCharSet()) {
case CharInfo::UNICODE:
prefixLen = 5;
na_wcsncpy(buf,WIDE_("_UCS2"), prefixLen);
break;
case CharInfo::UTF8:
prefixLen = 5;
na_wcsncpy(buf,WIDE_("_UTF8"), prefixLen);
break;
/********* Uncomment if we ever support SJIS
case CharInfo::SJIS:
prefixLen = 5;
na_wcsncpy(buf,WIDE_("_SJIS"), prefixLen);
break;
*/
case CharInfo::KANJI_MP:
prefixLen = 6;
na_wcsncpy(buf,WIDE_("_KANJI"), prefixLen);
break;
case CharInfo::KSC5601_MP:
prefixLen = 8;
na_wcsncpy(buf,WIDE_("_KSC5601"), prefixLen);
break;
default: // no prefix is needed (ISO88591)
break;
}
}
na_wcsncpy(buf+prefixLen,item,numChars);
buf[numChars+prefixLen] = L';';
buf[numChars+prefixLen+1] = L'\0';
NABoolean negate = FALSE;
ItemExpr *ie = parser.get_w_ItemExprTree(buf);
ConstValue * constVal = ie ? ie->castToConstValue(negate) : NULL;
// Genesis 10-980626-6634, 10-040322-4395 and 10-090206-9004
// ALM 5956 -- don't do the type check for MCH. Overflowed MCH boundary
// value may skip 1 or more column values, so pair-wise type
// matching may not work.
if (!constVal ||
(entries == 1 &&
constVal->getType()->getTypeQualifier() != colType->getTypeQualifier()))
{
CmpCommon::diags()->rewind(mark,TRUE); // get rid of any diags parser may have added
*CmpCommon::diags()
<< DgSqlCode(CATALOG_HISTOGRM_HISTINTS_TABLES_CONTAIN_BAD_VALUE)
<< DgWString0(theValue)
<< DgString1(columns[i]->getFullColRefNameAsAnsiString());
CmpInternalException("Bad Interval Boundary", __FILE__ , __LINE__).throwException();
}
// For a case-insensitive char type, it is necessary to upper-case the
// boundary value first before compute the hash. This is to mimic the
// run-time behavior where values of such char type are always
// upper shifted before case-insensitive comparison, being hashed or
// encoded.
// skip the upshifting for Unicode characters, as that is how these
// are treated in USTAT. 10-090225-9553
if ((colType->getTypeQualifier() == NA_CHARACTER_TYPE) &&
((CharType*)colType)->isCaseinsensitive() &&
(((CharType*)colType)->getCharSet() != CharInfo::UNICODE))
constVal = constVal->toUpper(HISTHEAP);
if (colType->getTypeQualifier() == NA_CHARACTER_TYPE)
{
CharInfo::Collation Col_Coll ;
Col_Coll = ((CharType*)colType)->getCollation();
if (Col_Coll != CharInfo::DefaultCollation) {
((CharType *)(constVal->getType()))->setCollation(Col_Coll);
}
}
val = NormValue (constVal, negate);
// populate the ith entry in cvPtrs[]
if ( cvPtrs ) cvPtrs[i] = constVal;
break;
}
}
if(entries == 1)
this->setValue(val); // filters out non-NULLs with NULL double-value
else
{
if(!valueList_)
valueList_ = new (heap_) NormValueList(entries);
valueList_->insertAt(i,val);
}
// If boundaryValueTruncated is true, we have just processed the last
// component of the MCH boundary value that was present.
if (i == (entries - 1) || boundaryValueTruncated)
return;
// next should now point to ','
item = next + 1;
while (*item == L' ')
item++;
next = (NAWchar *)item;
// Check if trailing part has been truncated due to size limit.
if ((next[0] == L'.') &&
(next[1] == L'.'))
return;
} // For loop
return;
}
// -----------------------------------------------------------------------
// Given an upper and lower bound, represented in multi-attribute
// encoded version where:
// lowBound = (x1, y1, ...)
// *this value = (x2, y2, ...)
// upperBound = (x3, y3, .. )
//
// this method returns a ratio stating where this value lies.
//
// ratio = *thisvalue - lowBound
// ---------------------
// upperBound - lowBound
//
// = x2 - x1 + (y2-y1)/(Maxy-Miny) + (z2-z1)/(Maxz-Minz)*#(Maxy-Miny) ...
// -------------------------------------------------------------------
// x3 - x1 + (y3-y1)/(Maxy-Miny) + (z3-z1)/(Maxz-Minz)*#(Maxy-Miny) ...
//
//
// -----------------------------------------------------------------------
double EncodedValue::ratio (const EncodedValue & lowBound,
const EncodedValue & upperBound) const
{
// NB: ignore the comments above -- we only have 1 column in our
// EncodedValues now, so this function is simpler than what's described
double retval = -1.0 ;
double hi = upperBound.getDblValue() ;
double lo = lowBound.getDblValue() ;
double me = this->getDblValue() ;
if ( hi > lo ) // implies hi-lo != 0 -- avoid div-by-zero!
{
CMPASSERT(hi>=me);
CMPASSERT(lo<=me);
if ( me == hi )
retval = 1.0 ;
else if ( me == lo )
retval = 0.0 ;
else
retval = (me-lo)/(hi-lo);
}
// if someone passes the parameters in incorrectly, we should
// still return a reasonable result!
else if ( hi < lo )
retval = ratio (upperBound,lowBound) ;
else // hi == lo
CMPASSERT(FALSE) ; // misuse of this function!
CMPASSERT(retval != -1.0) ;
// make sure we didn't generate any invalid numbers!
CMPASSERT ( NOT isnan(retval) ) ;
return retval;
}
COMPARE_RESULT NormValue::compare (const NormValue &other) const
{
if ( *this > other )
return (MORE) ;
else if ( *this < other )
return (LESS) ;
else
return (SAME) ;
}
COMPARE_RESULT NormValueList::compare (const NormValueList * other) const
{
COMPARE_RESULT result = SAME;
CollIndex i = 0;
while(result == SAME && i < entries())
{
result = this->at(i).compare(other->at(i));
i++;
}
return result;
}
COMPARE_RESULT EncodedValue::compare (const EncodedValue &other) const
{
COMPARE_RESULT result = SAME;
const NormValueList * nvl = other.getValueList();
if(valueList_ && nvl)
result = valueList_->compare(nvl);
else if(valueList_ && !nvl)
result = valueList_->at(0).compare(other.getValue());
else if(!valueList_ && nvl)
result = getValue().compare(nvl->at(0));
else
result = getValue().compare(other.getValue());
return result;
}
void EncodedValue::display (FILE *f, const char * prefix, const char * suffix,
CollHeap *c, char *buf) const
{
Space * space = (Space *)c;
char mybuf[1000];
snprintf(mybuf, sizeof(mybuf), "%s( ", prefix);
PRINTIT(f, c, space, buf, mybuf);
if(valueList_)
{
CollIndex i = 0;
CollIndex noOfCols = valueList_->entries();
while(i < noOfCols)
{
valueList_->at(i).display(f, prefix, suffix, c, buf);
snprintf(mybuf, sizeof(mybuf), "%s,", prefix);
i++;
};
}
else
getValue().display(f, prefix, suffix, c, buf);
snprintf(mybuf, sizeof(mybuf), " )%s", suffix);
PRINTIT(f, c, space, buf, mybuf);
}
const NAString EncodedValue::getText(NABoolean parenthesized, NABoolean showFractionalPart) const
{
char cp [100]; // max double value ~= 1.16e77, hence 100 bytes is plenty.
char *s = cp;
char *t = s;
if ( parenthesized == TRUE ) {
sprintf (s, "( ");
t += strlen(s);
}
if ( getValue().isNull() )
sprintf (t, "NULL");
else {
if ( showFractionalPart == TRUE )
sprintf (t, "%.4f", getDblValue());
else
sprintf (t, "%.0f", getDblValue());
}
if ( parenthesized == TRUE ) {
t = s + strlen(s);
sprintf (t, " )");
}
return cp;
}
UInt32 EncodedValue::computeHashForNumeric(SQLNumeric* nt)
{
return getValue().computeHashForNumeric(nt);
}
UInt32 NormValue::computeHashForNumeric(SQLNumeric* nt)
{
CMPASSERT(nt);
Lng32 len = nt->getNominalSize();
CMPASSERT(len >= 0);
Lng32 longTemp;
ULng32 usLongTemp;
short shrtTemp;
unsigned short usShrtTemp;
double x = getValue();
char result[8];
// Fix 10-091117-6417.
// Warning: some of the following conversions can be lossy when the
// scale is relatively big. For example, 2.9998611111111111
// will become 2.9998611111111112 after converting to Int64,
// where 2.9998611111111111 is of type NUMERIC(18, 16). When that value
// is skewed, we will miss it in the skew buster.
UInt32 flags = ExHDPHash::NO_FLAGS;
if ( len <= 2 ) {
flags = ExHDPHash::SWAP_TWO;
if ( nt->isUnsigned()) {
usShrtTemp = (unsigned short)(x*pow(10.0, nt->getScale()));
memcpy(result, (char*)&usShrtTemp, len);
} else {
shrtTemp = (short)(x*pow(10.0, nt->getScale()));
memcpy(result, (char*)&shrtTemp, len);
}
} else
if ( len <= 4 ) {
flags = ExHDPHash::SWAP_FOUR;
if ( nt->isUnsigned()) {
usLongTemp = (ULng32)(x*pow(10.0, nt->getScale()));
memcpy(result, (char*)&usLongTemp, len);
} else {
longTemp = (Lng32)(x*pow(10.0, nt->getScale()));
memcpy(result, (char*)&longTemp, len);
}
} else {
flags = ExHDPHash::SWAP_EIGHT;
Int64 int64Temp = (Int64) (x * pow(10.0, nt->getScale()));
memcpy(result, (char*)&int64Temp, len);
}
UInt32 hash = ExHDPHash::hash(result, flags, len);
return hash;
}