blob: 982403acff4c7ab9d5ae8d984cef1a5321ddce47 [file] [log] [blame]
/**********************************************************************
// @@@ START COPYRIGHT @@@
//
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
//
// @@@ END COPYRIGHT @@@
**********************************************************************/
/* -*-C++-*-
*****************************************************************************
*
* File: hs_util.C
* Description: Utility functions.
* Created: 03/25/96
* Language: C++
*
*
*
*****************************************************************************
*/
#define HS_FILE "hs_util"
#include "Platform.h"
#define SQLPARSERGLOBALS_NADEFAULTS // must be first
#include <math.h>
#include <time.h>
#include "hs_util.h"
#include "hs_log.h"
#include "hs_globals.h"
#include "CompException.h" // CmpInternalException()
#include "ComCextdecs.h" // NA_JulianTimestamp()
#include "NAHeap.h" // For NADELETEARRAY.
#include "CmpContext.h"
#include "CmpSeabaseDDL.h"
#include "ComSmallDefs.h"
#include "NLSConversion.h"
#include "SqlParserGlobals.h" // must be last #include
static NAWString appendFraction (Lng32 scale);
static unsigned char toHexDecimalDigit(unsigned char c)
{
return ( c <= 9 ) ? c + '0' : c - 10 + 'A';
}
NABoolean isSpecialObject(const NAString &tableName)
{
return ((tableName == "HISTOGRM") ||
(tableName == "HISTINTS") ||
(tableName == "HISTOGRAMS") ||
(tableName == "HISTOGRAM_INTERVALS") ||
(tableName == HBASE_HIST_NAME) ||
(tableName == HBASE_HISTINT_NAME)
);
}
NABoolean isSpecialObject(const ComObjectName& objectName)
{
if ( isSpecialObject(objectName.getObjectNamePartAsAnsiString()) )
return TRUE;
return isHBaseMeta(objectName);
}
NABoolean isHBaseUmdHistograms(const ComObjectName& objectName)
{
return (
HSGlobalsClass::isHBaseUMDHistogram(
objectName.getObjectNamePart().getInternalName()
) &&
HSGlobalsClass::isTrafodionCatalog(
objectName.getCatalogNamePart().getInternalName()
)
);
}
NABoolean isHBaseMeta(const ComObjectName& objectName)
{
return
( CmpSeabaseDDL::isSeabaseMD (objectName) ||
CmpSeabaseDDL::isSeabasePrivMgrMD(objectName) ||
isHBaseUmdHistograms(objectName)
);
}
NABoolean isHBaseMeta(const QualifiedName& qualifiedName)
{
return
( qualifiedName.isSeabaseMD() || qualifiedName.isSeabasePrivMgrMD() ||
isHBaseUmdHistograms(qualifiedName)
);
}
NABoolean isHBaseUmdHistograms(const QualifiedName& qualifiedName)
{
return (
HSGlobalsClass::isHBaseUMDHistogram(
qualifiedName.getQualifiedNameAsAnsiString()
) &&
HSGlobalsClass::isTrafodionCatalog(qualifiedName.getCatalogName())
);
}
NABoolean isSpecialObject(const QualifiedName& qualifiedName)
{
if ( isSpecialObject(ToAnsiIdentifier(qualifiedName.getObjectName())) )
return TRUE;
return isHBaseMeta(qualifiedName);
}
// Convert a fully-qualified Trafodion table name to the name used for its
// backing sample table, which is a Hive table. Hive table names only allow
// letters, digits, and underscores, and are case-insensitive.
// @ZXbl -- need to do some kind of conversion for other chars besides periods,
// to handle delimited ids.
void TrafToHiveSampleTableName(NAString& name)
{
size_t len = name.length();
const char* oldName = name.data();
char* newName = new(STMTHEAP) char[len];
for (size_t i=0; i<len; i++)
{
if (oldName[i] == '.')
newName[i] = '_';
else
newName[i] = oldName[i];
}
strcpy((char*)newName+len, "_SAMPLE");
name = newName;
}
// -----------------------------------------------------------------------
// Return the root of fx = 0 for uec estimate.
// -----------------------------------------------------------------------
double xValue( const double x0
, const double n
)
{
double x = x0;
double fx, gx, t;
for (Int32 i = 0; i < 100; i++)
{
t = exp(-n/x);
fx = x * (1 - t) - x0;
// ok to return if fx is very close to 0.
if (fx < 1e-8 &&
fx > -1e-8)
break;
gx = 1 - (1 + n/x) * t;
x = x - fx / gx;
}
return x;
}
// Compute UEC using a first-order unsmoothed jacknife estimator.
//
// For details on this method, see the paper Estimating the Number of
// Classes in a Finite Population, IBM Research Report, by Haas and
// Stokes, pg. 6.
//
static double computeUJack(double d, double n, double N,
double q, double f1)
{
// input parameters:
// d - number of distinct values in sample
// n - number of rows in sample
// N - number of rows in full table
// q - sampling fraction (e.g., .01 for 1% sample)
// f1 - number of distinct values that occur exactly once in sample
// Duj - unsmoothed jacknife estimate of D, the number of distinct
// values in the full table
//
double Duj = d / (1.0 - ((1.0 - q)*f1) / n);
// return min(Duj, N), to make sure that UEC estimate never
// exceeds the row count
//
return Duj > N ? N : Duj;
}
//
// Compute an estimate, gamma squared, of the squared coefficient
// of variation of the class sizes (i.e., the sizes, or number of
// rows, with each unique value in the sample). This quantity is
// used by the second order unsmoothed jacknife estimator.
//
// For more information, see the paper Estimating the Number of
// Classes in a Finite Population, IBM Research Report, by Haas and
// Stokes, pgs. 7-8.
//
static double gamma_2(FrequencyCounts &fi, double D, double n, double N)
{
double sum = 0;
double n2 = n*n;
ULng32 sampleRowCnt = (ULng32) n;
for (ULng32 i=1; i<=sampleRowCnt; i++)
{
ULng32 cnt = fi[i];
if (cnt) sum = sum + ((double)i * (double)(i-1) * (double)cnt/n2);
}
double g2 = D * sum + D/N - 1;
return g2 < 0 ? 0 : g2;
}
// Compute UEC using a second-order unsmoothed jacknife estimator.
//
// For details on this method, see the paper Estimating the Number of
// Classes in a Finite Population, IBM Research Report, by Haas and
// Stokes, pg. 9.
//
static double computeUJack2(double d, double n, double N,
double q, FrequencyCounts &fi, double D)
{
// input parameters:
// d - number of distinct values in sample
// n - number of rows in sample
// N - number of rows in full table
// q - sampling fraction (e.g., .01 for 1% sample)
// fi - sample frequency counts
// D - number of distinct values in full table (estimate)
// Duj2 - second order unsmoothed jacknife estimate of D, the number
// of distinct values in the full table
//
double g2 = gamma_2(fi, D, n, N);
double numer = d - (fi[1] * (1-q) * log(1-q) * g2)/q;
double denom = 1 - (fi[1] * (1-q) / n);
double Duj2 = numer / denom;
// return min(Duj2, N), to make sure that UEC estimate never
// exceeds the row count
//
return Duj2 > N ? N : Duj2;
}
//
// Compute UEC using a variant of Shlosser's method.
//
// For details on this method, see the paper Estimating the Number of
// Classes in a Finite Population, IBM Research Report, by Haas and
// Stokes, pg. 14.
//
static double computeShloss(double d, Int64 n, double N,
double q, FrequencyCounts &fi)
{
// input parameters:
// d - number of distinct values in sample
// n - number of rows in sample
// N - number of rows in full table
// q - sampling fraction (e.g., .01 for 1% sample)
// fi - frequency counts (for each i, the number of distinct
// values in the sample that occur exactly i times)
// the Shloss estimator, Dsh, is defined as follows:
//
// Dsh = d + fi[1] * (n2i/d2i) * (ni/di)^2
//
// where d and fi are as described above. the other terms
// (ni, n2i, di, d2i) are summations that are accumulated as
// i goes from 1 to n. the summations involve i, q, and fi[i]
// (see the reference above for details).
// the summations include (1-q)^i, (1-q^2)^i, and (1+q)^i. these
// values are stored in the following variables at each step in
// the summation
//
double qi = 1; // (1-q)^i
double q2i = 1; // (1-q^2)^i
double q3i = 1; // (1+q)^i
double di = 0, d2i = 0;
double ni = 0, n2i = 0;
// used to update qi, q2i and q3i, at each step in the summation
//
double q1 = 1.0 - q;
double q2 = 1.0 - q*q;
double q3 = 1.0 + q;
double qsquared = q*q;
// for each i value from 1 to sample size, accumulate
// summations ni, di, n2i, d2i
//
for (Int64 i=1; i<=n; i++)
{
double idbl = (double) i;
ULng32 cnt = fi[i];
if (cnt) di = di + qi * cnt * idbl * q;
qi = qi * q1;
if (cnt) ni = ni + qi * cnt;
if (cnt) n2i = n2i + idbl * qsquared * q2i * cnt;
q2i = q2i * q2;
q3i = q3i * q3;
if (cnt) d2i = d2i + qi * (q3i - 1) * cnt;
// break if qi or q2i get too small
if (qi < .000001 || q2i < .000001) break;
}
double Dsh =
(fi[1]==0) ? d : (d + fi[1] * (n2i/d2i) * ((ni/di) * (ni/di)));
// return min(Dsh, N), to ensure UEC estimate doesn't exceed row count
//
return Dsh > N ? N : Dsh;
}
//
// Compute the coefficient of variation of the class sizes
// in the sample. (Each distinct value in the sample represents
// a "class" and the class size is the number of times the
// class value appears in the sample.)
//
// The coefficient of variation is a measure of the skew of
// the class sizes.
//
static double computeCoeffOfVar(double d, Int64 n, double D, double N,
FrequencyCounts &fi)
{
// input parameters:
// d - number of distinct values in sample
// n - number of rows in sample
// D - number of distinct values in full table (estimate)
// N - number of rows in full table
// fi - frequency counts (for each i, the number of distinct
// values in the sample that occur exactly i times)
double sum = 0;
for (Int64 i=1; i<=n; i++)
sum += (double) (i * (i-1) * fi[i]);
double est = (D / (double) (n * n)) * sum + D/N - 1;
return est > 0 ? est : 0;
}
double lwcUecEstimate(double sampleUec, double sampleRowCnt,
double estTotalRowCnt, FrequencyCounts *fi,
double DshMax, double &coeffOfVar, double &Duj, double &Dsh)
{
// q is the sample fraction, e.g., .01 for a 1% sample
double q = sampleRowCnt / estTotalRowCnt;
// Duj (first order unsmoothed jacknife) is used as the
// estimate of D, when computing Duj2
//
Duj = computeUJack(sampleUec, sampleRowCnt, estTotalRowCnt,
q, (double) ((*fi)[1]));
// Duj2 - second order unsmoothed jacknife
//
double Duj2 = computeUJack2(sampleUec, sampleRowCnt, estTotalRowCnt,
q, *fi, Duj);
// Dsh - Shloss estimate
//
Dsh = computeShloss(sampleUec, (Int64) sampleRowCnt, estTotalRowCnt,
q, *fi);
coeffOfVar = computeCoeffOfVar(sampleUec, (Int64) sampleRowCnt,
Duj, estTotalRowCnt, *fi);
// at this point, we have two estimates of D (the actual UEC), Duj2
// and Dsh. Dlwc is a weighted combination of these two estimates.
// two weights are computed, DujWt and DshWt. the sum of these
// weights is 1, and each is in the range 0-1. Dlwc is
// DujWt * Duj2 + DshWt * Dsh.
//
// DshWt is coeffOfVar / DshMax, with a max value of 1. (DshMax has
// a default value of 50.) So DshWt becomes larger as the coefficient
// of variation gets larger, and is capped at 1 for coeffOfVar values
// above DshMax. This weights Dsh more as the skew of the data increases.
//
// DujWt is 1 - DshWt.
double DshWt = coeffOfVar / DshMax;
if (DshWt > 1.0) DshWt = 1;
double DujWt = 1.0 - DshWt;
double Dlwc = ceil(DujWt*Duj2 + DshWt*Dsh);
// make sure Dlwc <= max(Duj2,Dsh)
//
if (Dlwc > Duj2 && Dlwc > Dsh)
{
Dlwc = (Duj2 < Dsh) ? Dsh : Duj2;
}
// pass Duj2 estimate back to caller through ref param Duj
//
Duj = Duj2;
return Dlwc;
}
Lng32 FormatRow(const HSColumnStruct *srcDesc,
const char *src,
HSDataBuffer &target)
{
const Lng32 REC_INTERVAL = REC_MIN_INTERVAL;
Lng32 retcode = 0;
const Lng32 workBufLen = 4096;
NAWchar workBuf[workBufLen];
Lng32 type = srcDesc->datatype;
NAWString wStr;
//The input source buffer will always be in the following form and will
//contain unicode format. We need to separate the buffer accordingly.
// |-------|--------------|
// SRC -->| LEN | DATA |
// |-------|--------------|
short inDataLen;
memcpy((char*)&inDataLen, src, sizeof(short));
const NAWchar *inData = (NAWchar*)(src + sizeof(short));
if (DFS2REC::isInterval(type))
type = REC_INTERVAL;
if (DFS2REC::isAnyCharacter(type))
{
wStr = WIDE_("'");
for (short i = 0; i < inDataLen/sizeof(NAWchar); i++)
{
if (inData[i] == NAWchar('\0'))
wStr += NAWchar('\1'); /* convert x00 to x01 */
else
{
wStr += inData[i];
if (inData[i] == NAWchar('\''))
wStr.append(WIDE_("'"));
}
}
wStr.append(WIDE_("'"));
target = wStr.data();
}
else
{
switch (type)
{
case REC_DATETIME:
{
switch (srcDesc->precision)
{
case REC_DTCODE_DATE:
{
wStr = WIDE_("DATE '");
wStr.append(inData, inDataLen/sizeof(NAWchar));
wStr.append(WIDE_("'"));
break;
}
case REC_DTCODE_TIME:
{
wStr = WIDE_("TIME '");
wStr.append(inData, inDataLen/sizeof(NAWchar));
wStr.append(WIDE_("'"));
break;
}
case REC_DTCODE_TIMESTAMP:
{
wStr = WIDE_("TIMESTAMP '");
wStr.append(inData, inDataLen/sizeof(NAWchar));
wStr.append(WIDE_("'"));
break;
}
// Here begin a number of cases that are only possible with MP datetime types.
case REC_DTCODE_YEAR:
{
wStr = WIDE_("DATETIME '");
wStr.append(inData, inDataLen/sizeof(NAWchar));
wStr.append(WIDE_("' YEAR"));
break;
}
case REC_DTCODE_YEAR_MONTH:
{
wStr = WIDE_("DATETIME '");
wStr.append(inData, inDataLen/sizeof(NAWchar));
wStr.append(WIDE_("' YEAR TO MONTH"));
break;
}
case REC_DTCODE_YEAR_HOUR:
{
wStr = WIDE_("DATETIME '");
wStr.append(inData, inDataLen/sizeof(NAWchar));
wStr.append(WIDE_("' YEAR TO HOUR"));
break;
}
case REC_DTCODE_YEAR_MINUTE:
{
wStr = WIDE_("DATETIME '");
wStr.append(inData, inDataLen/sizeof(NAWchar));
wStr.append(WIDE_("' YEAR TO MINUTE"));
break;
}
case REC_DTCODE_MONTH:
{
wStr = WIDE_("DATETIME '");
wStr.append(inData, inDataLen/sizeof(NAWchar));
wStr.append(WIDE_("' MONTH"));
break;
}
case REC_DTCODE_MONTH_DAY:
{
wStr = WIDE_("DATETIME '");
wStr.append(inData, inDataLen/sizeof(NAWchar));
wStr.append(WIDE_("' MONTH TO DAY"));
break;
}
case REC_DTCODE_MONTH_HOUR:
{
wStr = WIDE_("DATETIME '");
wStr.append(inData, inDataLen/sizeof(NAWchar));
wStr.append(WIDE_("' MONTH TO HOUR"));
break;
}
case REC_DTCODE_MONTH_MINUTE:
{
wStr = WIDE_("DATETIME '");
wStr.append(inData, inDataLen/sizeof(NAWchar));
wStr.append(WIDE_("' MONTH TO MINUTE"));
break;
}
case REC_DTCODE_MONTH_SECOND:
{
wStr = WIDE_("DATETIME '");
wStr.append(inData, inDataLen/sizeof(NAWchar));
if (srcDesc->scale == 0)
wStr.append(WIDE_("' MONTH TO SECOND"));
else
{
wStr.append(WIDE_("' MONTH TO "));
wStr.append(appendFraction(srcDesc->scale));
}
break;
}
case REC_DTCODE_DAY:
{
wStr = WIDE_("DATETIME '");
wStr.append(inData, inDataLen/sizeof(NAWchar));
wStr.append(WIDE_("' DAY"));
break;
}
case REC_DTCODE_DAY_HOUR:
{
wStr = WIDE_("DATETIME '");
wStr.append(inData, inDataLen/sizeof(NAWchar));
wStr.append(WIDE_("' DAY TO HOUR"));
break;
}
case REC_DTCODE_DAY_MINUTE:
{
wStr = WIDE_("DATETIME '");
wStr.append(inData, inDataLen/sizeof(NAWchar));
wStr.append(WIDE_("' DAY TO MINUTE"));
break;
}
case REC_DTCODE_DAY_SECOND:
{
wStr = WIDE_("DATETIME '");
wStr.append(inData, inDataLen/sizeof(NAWchar));
if (srcDesc->scale == 0)
wStr.append(WIDE_("' DAY TO SECOND"));
else
{
wStr.append(WIDE_("' DAY TO "));
wStr.append(appendFraction(srcDesc->scale));
}
break;
}
case REC_DTCODE_HOUR:
{
wStr = WIDE_("DATETIME '");
wStr.append(inData, inDataLen/sizeof(NAWchar));
wStr.append(WIDE_("' HOUR"));
break;
}
case REC_DTCODE_HOUR_MINUTE:
{
wStr = WIDE_("DATETIME '");
wStr.append(inData, inDataLen/sizeof(NAWchar));
wStr.append(WIDE_("' HOUR TO MINUTE"));
break;
}
case REC_DTCODE_MINUTE:
{
wStr = WIDE_("DATETIME '");
wStr.append(inData, inDataLen/sizeof(NAWchar));
wStr.append(WIDE_("' MINUTE"));
break;
}
case REC_DTCODE_MINUTE_SECOND:
{
wStr = WIDE_("DATETIME '");
wStr.append(inData, inDataLen/sizeof(NAWchar));
if (srcDesc->scale == 0)
wStr.append(WIDE_("' MINUTE TO SECOND"));
else
{
wStr.append(WIDE_("' MINUTE TO "));
wStr.append(appendFraction(srcDesc->scale));
}
break;
}
case REC_DTCODE_SECOND:
{
wStr = WIDE_("DATETIME '");
wStr.append(inData, inDataLen/sizeof(NAWchar));
if (srcDesc->scale == 0)
wStr.append(WIDE_("' SECOND"));
else
{
wStr.append(WIDE_("' SECOND TO "));
wStr.append(appendFraction(srcDesc->scale));
}
break;
}
default:
{
HS_ASSERT(FALSE);
break;
}
}
target = wStr.data();
break;
}
case REC_INTERVAL:
{
//The INTERVAL may contain spaces and the negative sign
//in front of the number.
//We must capture the sign, but do not copy the extra character.
Int32 spaceLen = 0;
NABoolean signPresent = FALSE;
spaceLen = wcsspn(inData, L" ");
if (inData[spaceLen] == L'-')
{
signPresent = TRUE;
wStr = WIDE_("INTERVAL -'");
}
else
wStr = WIDE_("INTERVAL '");
for (short i=0; i < spaceLen; i++)
wStr.append(L" ");
wStr.append( (inData+((signPresent) ? 1 : 0)+spaceLen),
(inDataLen/sizeof(NAWchar)-((signPresent) ? 1 : 0)-spaceLen));
wStr.append(WIDE_("'"));
switch (srcDesc->datatype)
{
case REC_INT_YEAR:
{
na_wsprintf(workBuf, WIDE_("%s YEAR(%d)"), wStr.data(), srcDesc->precision);
break;
}
case REC_INT_YEAR_MONTH:
{
na_wsprintf(workBuf, WIDE_("%s YEAR(%d) TO MONTH"), wStr.data(), srcDesc->precision);
break;
}
case REC_INT_MONTH:
{
na_wsprintf(workBuf, WIDE_("%s MONTH(%d)"), wStr.data(), srcDesc->precision);
break;
}
case REC_INT_DAY:
{
na_wsprintf(workBuf, WIDE_("%s DAY(%d)"), wStr.data(), srcDesc->precision);
break;
}
case REC_INT_DAY_HOUR:
{
na_wsprintf(workBuf, WIDE_("%s DAY(%d) TO HOUR"), wStr.data(), srcDesc->precision);
break;
}
case REC_INT_DAY_MINUTE:
{
na_wsprintf(workBuf, WIDE_("%s DAY(%d) TO MINUTE"), wStr.data(), srcDesc->precision);
break;
}
case REC_INT_DAY_SECOND:
{
na_wsprintf(workBuf, WIDE_("%s DAY(%d) TO SECOND(%d)"), wStr.data(), srcDesc->precision, srcDesc->scale);
break;
}
case REC_INT_HOUR:
{
na_wsprintf(workBuf, WIDE_("%s HOUR(%d)"), wStr.data(), srcDesc->precision);
break;
}
case REC_INT_HOUR_MINUTE:
{
na_wsprintf(workBuf, WIDE_("%s HOUR(%d) TO MINUTE"), wStr.data(), srcDesc->precision);
break;
}
case REC_INT_HOUR_SECOND:
{
na_wsprintf(workBuf, WIDE_("%s HOUR(%d) TO SECOND(%d)"), wStr.data(), srcDesc->precision, srcDesc->scale);
break;
}
case REC_INT_MINUTE:
{
na_wsprintf(workBuf, WIDE_("%s MINUTE(%d)"), wStr.data(), srcDesc->precision);
break;
}
case REC_INT_MINUTE_SECOND:
{
na_wsprintf(workBuf, WIDE_("%s MINUTE(%d) TO SECOND(%d)"), wStr.data(), srcDesc->precision, srcDesc->scale);
break;
}
case REC_INT_SECOND:
{
na_wsprintf(workBuf, WIDE_("%s SECOND(%d, %d)"), wStr.data(), srcDesc->precision, srcDesc->scale);
break;
}
default:
{
HS_ASSERT(FALSE);
break;
}
}
target = workBuf;
break;
}
default:
{
wStr.replace(0, wStr.length(), inData, inDataLen/sizeof(NAWchar));
target = wStr.data();
break;
}
}
}
return retcode;
}
void ConvWcharToHexadecimal(const NAWchar *source,
ULng32 sourceLength,
NAString &output)
{
unsigned char ucs2Hex[9];
unsigned char ascii2Hex[7];
unsigned char ascii[2];
ucs2Hex[0] = ' ';
ucs2Hex[1] = '0';
ucs2Hex[2] = 'x';
ucs2Hex[7] = ' ';
ucs2Hex[8] = 0;
ascii2Hex[0] = ' ';
ascii2Hex[1] = '0';
ascii2Hex[2] = 'x';
ascii2Hex[5] = ' ';
ascii2Hex[6] = 0;
ascii[1] = 0;
output = "";
for (ULng32 i = 0; i < sourceLength; i++)
{
if ( source[i] <= (NAWchar)0xFF )
{
if (isprint((unsigned char)source[i]))
{
ascii[0] = (unsigned char)source[i];
output += (const char*)ascii;
}
else
{
ascii2Hex[3] = toHexDecimalDigit((unsigned char)((source[i] >> 4) & 0xF));
ascii2Hex[4] = toHexDecimalDigit((unsigned char)((source[i]) & 0xF));
output += (const char*)ascii2Hex;
}
}
else
{
ucs2Hex[3] = toHexDecimalDigit((unsigned char)((source[i] >> 12) & 0xF));
ucs2Hex[4] = toHexDecimalDigit((unsigned char)((source[i] >> 8) & 0xF));
ucs2Hex[5] = toHexDecimalDigit((unsigned char)((source[i] >> 4) & 0xF));
ucs2Hex[6] = toHexDecimalDigit((unsigned char)((source[i]) & 0xF));
output += (const char*)ucs2Hex;
}
}
}
NAString getTableName(const NAString name, const ComAnsiNameSpace nameSpace)
{
switch(nameSpace)
{
case COM_TABLE_NAME:
return name;
case COM_IUD_LOG_TABLE_NAME:
return "TABLE(IUD_LOG_TABLE " + name + " )";
case COM_INDEX_NAME:
return "TABLE(INDEX_TABLE " + name + " )";
default:
return name;
}
}
static NAWString appendFraction (Lng32 scale)
{
NAWString wStr;
switch (scale)
{
case 1: wStr = WIDE_("FRACTION(1)"); break;
case 2: wStr = WIDE_("FRACTION(2)"); break;
case 3: wStr = WIDE_("FRACTION(3)"); break;
case 4: wStr = WIDE_("FRACTION(4)"); break;
case 5: wStr = WIDE_("FRACTION(5)"); break;
case 6: wStr = WIDE_("FRACTION(6)"); break;
default:
NABoolean invalid_FRACTION = FALSE;
HS_ASSERT(invalid_FRACTION);
break;
}
return wStr;
}
// Calculate default sample size for table.
Int64 getDefaultSampleSize(Int64 tblRowCount)
{
Int64 result = (Int64) ceil(convertInt64ToDouble(tblRowCount) *
CmpCommon::getDefaultNumeric(HIST_DEFAULT_SAMPLE_RATIO));
result = MINOF(result, (Int64)CmpCommon::getDefaultLong(HIST_DEFAULT_SAMPLE_MAX));
return result;
}
// Calculate default sample size for table, given its cardinality and current
// CQD values.
Int64 getDefaultSlidingSampleSize(Int64 tblRowCount)
{
// Minimum sample size.
Int64 minSampleRows = (Int64)CmpCommon::getDefaultLong(HIST_DEFAULT_SAMPLE_MIN);
// Maximum sample size.
Int64 maxSampleRows = (Int64)CmpCommon::getDefaultLong(HIST_DEFAULT_SAMPLE_MAX);
// Minimum table size for which to use sampling.
Int64 minTblRows = HSGlobalsClass::getMinRowCountForSample();
// Minimum table size for which to use lowest sampling rate.
Int64 minTblRowsLowSamp = HSGlobalsClass::getMinRowCountForLowSample();
// We won't ordinarily be sampling if the following condition is true, but
// this function is called when the bulk load utility is creating a persistent
// Hive sample table, and in that case the sample will be done anyway.
if (tblRowCount < minTblRows)
return tblRowCount;
Int64 sampleRows;
if (tblRowCount < minTblRowsLowSamp)
sampleRows = minSampleRows;
else
{
sampleRows = (Int64)
((double)CmpCommon::getDefaultNumeric(HIST_DEFAULT_SAMPLE_RATIO)
* tblRowCount);
if (sampleRows < minSampleRows)
sampleRows = minSampleRows;
else if (sampleRows > maxSampleRows)
sampleRows = maxSampleRows;
}
return sampleRows;
}
// use CATMAN API to get the schema version of a table
COM_VERSION getTableSchemaVersion(const NAString& tableName)
{
return COM_VERS_CURR_SCHEMA;
}
/***************************************************************************/
/* METHOD: hs_getBaseTime() */
/* PURPOSE: Gets the base time (in seconds) for zero hour Jan 1, 1970 */
/* This is an efficient implementation since the calls */
/* time() and gettimeofday() are extremely slow. */
/* NOTES : An exception occurs if there is an error [debug build only] */
/* This method is overloaded; see below */
/* INPUT : None */
/* RETURNS: current time in seconds */
/***************************************************************************/
Int64 hs_getBaseTime()
{
#ifdef _DEBUG
static Int64 baseTs;
HSLogMan *LM = HSLogMan::Instance();
// Verify that the number that we have pre-computed (HS_EPOCH_TIMESTAMP) is the correct value
// Adjust the time to time since zero hour Jan 1, 1970
short baseTsStr[] = {1970, 1, 1, 0, 0, 0, 0, 0};
short error;
baseTs = COMPUTETIMESTAMP(baseTsStr, &error);
if (error)
{
sprintf(LM->msg,
"INTERNAL ERROR: error in COMPUTETIMESTAMP: %d", error);
}
else
if (baseTs != HS_EPOCH_TIMESTAMP)
{
sprintf(LM->msg,
"INTERNAL ERROR: wrong baseTS in getEpochTime(): " PF64", HS_EPOCH_TIMESTAMP=" PF64"",
baseTs, HS_EPOCH_TIMESTAMP);
error = 1;
}
if (error)
{
if (LM->LogNeeded())
{
LM->Log(LM->msg);
}
throw CmpInternalException("failure in getEpochTime()",
__FILE__, __LINE__);
}
// baseTs == HS_EPOCH_TIMESTAMP;
return baseTs / 1000000;
#else
return HS_EPOCH_TIMESTAMP / 1000000;
#endif
}
/***************************************************************************/
/* METHOD: hs_getEpochTime() */
/* PURPOSE: Gets the current time (in seconds) since zero hour Jan 1, 1970 */
/* This is an efficient implementation since the calls */
/* time() and gettimeofday() are extremely slow. */
/* NOTES : An exception occurs if there is an error [debug build only] */
/* This method is overloaded; see below */
/* INPUT : None */
/* RETURNS: current time in seconds */
/***************************************************************************/
Int64 hs_getEpochTime()
{
#ifdef _DEBUG
Int64 jt;
static Int64 baseTs;
if (! baseTs)
{
HSLogMan *LM = HSLogMan::Instance();
// Verify that the number that we have pre-computed (HS_EPOCH_TIMESTAMP) is the correct value
// Adjust the time to time since zero hour Jan 1, 1970
short baseTsStr[] = {1970, 1, 1, 0, 0, 0, 0, 0};
short error;
baseTs = COMPUTETIMESTAMP(baseTsStr, &error);
if (error)
{
sprintf(LM->msg,
"INTERNAL ERROR: error in COMPUTETIMESTAMP: %d", error);
}
else
if (baseTs != HS_EPOCH_TIMESTAMP)
{
sprintf(LM->msg,
"INTERNAL ERROR: wrong baseTS in getEpochTime(): " PF64 "; HS_EPOCH_TIMESTAMP=" PF64 ,
baseTs, HS_EPOCH_TIMESTAMP);
error = 1;
}
if (error)
{
if (LM->LogNeeded())
{
LM->Log(LM->msg);
}
throw CmpInternalException("failure in getEpochTime()",
__FILE__, __LINE__);
}
// baseTs == HS_EPOCH_TIMESTAMP;
}
jt=NA_JulianTimestamp();
return (jt - baseTs) / 1000000;
#else
return (NA_JulianTimestamp() - HS_EPOCH_TIMESTAMP) / 1000000;
#endif
}
/***************************************************************************/
/* METHOD: hs_getEpochTime() */
/* PURPOSE: Converts the Julian time passed in as argument to an Epoch Time*/
/* NOTES : This method is overloaded; see above */
/* INPUT : tm - JULIANTIMESTAMP */
/* RETURNS: Epoch time corresponding to the Julian timestamp passed in */
/* 0 - if the Julian timestamp passed in is before the UNIX Epoch */
/***************************************************************************/
Int64 hs_getEpochTime(Int64 tm)
{
return (tm > HS_EPOCH_TIMESTAMP ? (tm - HS_EPOCH_TIMESTAMP) / 1000000 : 0);
}
/***************************************************************************/
/* METHOD: hs_formatTimestamp() */
/* PURPOSE: Formats the current Epoch timestamp in a TIMESTAMP(0) format */
/* string. The time is converted to GMT before formatting. */
/* NOTES : This method is overloaded; see below. */
/* INPUT : - */
/* OUTPUT : time_string - the formatted Epoch Timestamp */
/* RETURNS: - */
/***************************************************************************/
char *hs_formatTimestamp(char *time_string)
{
time_t tm = (time_t) hs_getEpochTime();
strftime(time_string, HS_TIMESTAMP_SIZE, "%Y-%m-%d %H:%M:%S", gmtime(&tm));
return time_string;
}
/***************************************************************************/
/* METHOD: hs_formatTimestamp() */
/* PURPOSE: Formats the timestamp passed in in a TIMESTAMP(0) format */
/* The time is converted to GMT before formatting. */
/* NOTES : This method is overloaded; see above. */
/* INPUT : tm - Epoch timestamp to be formatted */
/* OUTPUT : time_string - the formatted Epoch Timestamp */
/* RETURNS: - */
/***************************************************************************/
char *hs_formatTimestamp(Int64 tm, char *time_string)
{
const time_t time = (const time_t) tm;
strftime(time_string, HS_TIMESTAMP_SIZE, "%Y-%m-%d %H:%M:%S", gmtime(&time));
return time_string;
}
/***********************************************/
/* METHOD: getTimeDiff() */
/* PURPOSE: get time difference */
/* between calls. */
/* INPUT: reset flag */
/* OUTPUT: elapsed time in seconds */
/***********************************************/
Int64 getTimeDiff(NABoolean reset)
{
static Int64 prevTm;
Int64 tm;
Int64 elapsed = 0;
tm = hs_getEpochTime();
if (!reset)
elapsed = tm - prevTm;
prevTm = tm;
return elapsed;
}
//==========================================================================
// This method returns the location of histogram tables.
// If the location is to be returned for an InMemory table, then it returns
// it based on the volatile schema location.
// If CQD HISTOGRAMS_SCHEMA is set and not null, then its contents are
// returned as the location.
// Otherwise, the input regularLocation is returned.
//
// INPUT: regularLocation: contains the location that was generated by
// the caller. See caller for how this is generated.
// inMemObj: if this is an InMemory object.
//==========================================================================
NAString getHistogramsTableLocation(
NAString regularLocation, NABoolean inMemObj)
{
NAString histLoc;
if (inMemObj)
{
histLoc =
CmpCommon::context()->sqlSession()->volatileCatalogName()
+ "."
+ CmpCommon::context()->sqlSession()->volatileSchemaName();
}
else
{
NAString catName = regularLocation;
catName.remove(catName.first('.'));
if ( HSGlobalsClass::isNativeHbaseCat(catName) )
histLoc = HBASE_STATS_CATALOG "." HBASE_STATS_SCHEMA;
else
if (HSGlobalsClass::isHiveCat(catName))
histLoc = HIVE_STATS_CATALOG "." HIVE_STATS_SCHEMA;
else
{
CmpCommon::getDefault(HISTOGRAMS_SCHEMA, histLoc, FALSE);
if (histLoc.isNull())
return regularLocation;
}
}
return histLoc;
}
//==========================================================================
// getRowCountForFetchFuncs() - this function returns the rowcount based
// on whether this is NT, Linux, or NSK.
//==========================================================================
double getRowCountForFetchFuncs(HSTableDef *tabDef, NABoolean &isEstimate)
{
NABoolean isHbaseTable = HSGlobalsClass::isHbaseCat(tabDef->getCatName());
NABoolean isHiveTable = HSGlobalsClass::isHiveCat(tabDef->getCatName());
// getRowCount below does not use SQL for Hbase and Hive tables, so there
// is no need to set the CQD for these tables
if (!isHbaseTable && !isHiveTable)
HSFuncExecQuery("CONTROL QUERY DEFAULT USTAT_FETCHCOUNT_ACTIVE 'ON'");
isEstimate = FALSE;
Int64 rows=-1;
// On NSK and Linux, getRowCount() will return an accurate count
// (from DP2 file label), in all testing environments (and in almost
// all other cases).
Int32 errorCode = 0;
Int32 breadCrumb = 0;
rows = tabDef->getRowCount(isEstimate, errorCode /* out */, breadCrumb /* out */);
if (!isHbaseTable && !isHiveTable)
HSFuncExecQuery("CONTROL QUERY DEFAULT USTAT_FETCHCOUNT_ACTIVE 'OFF'");
return convertInt64ToDouble(rows);
}