blob: 497419b9412fe294f4d684ab6d55e4c10ef9f167 [file] [log] [blame]
/**********************************************************************
// @@@ START COPYRIGHT @@@
//
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
//
// @@@ END COPYRIGHT @@@
**********************************************************************/
/* -*-C++-*-
*****************************************************************************
*
* File: hs_parser.C
* Description: Functions used by parser.
* Created: 09/25/96
* Language: C++
*
*
*****************************************************************************/
#define SQLPARSERGLOBALS_NADEFAULTS // must be first
#define HS_FILE "hs_parser"
#include <time.h>
#include <sys/time.h>
#include "CmpCommon.h"
#include "CmpContext.h"
#include "ComAnsiNamePart.h"
#include "SchemaDB.h"
//IMPORTANT: The following header is required or else the main sqlparser will be invoked
#include "hs_parser_defs.h"
#include "hs_parser.h"
#include "hs_globals.h"
#include "hs_cli.h"
#include "hs_la.h"
#include "SqlParserGlobals.h" // must be last #include
#include "ComSchemaName.h" // for ComSchemaName
extern Int32 yyparse(void*);
extern void HSFuncResetLexer(void*);
extern void init_scanner (void* &);
extern void destroy_scanner(void* &scanner);
Lng32 AddSingleColumn(const Lng32 colNumber);
// -----------------------------------------------------------------------
// Invoke yyparse, set hsGlobal structure for each column group.
// -----------------------------------------------------------------------
Lng32 HSFuncParseStmt()
{
HSGlobalsClass *hs_globals = GetHSContext();
Lng32 retcode;
HSColGroupStruct *mgroup = NULL;
void* scanner;
init_scanner (scanner);
HSFuncResetLexer(scanner);
retcode = yyparse(scanner);
HSHandleError(retcode);
destroy_scanner (scanner);
// The parser does not always return immediately following an error, so that
// it can report as many errors as possible. This can result in a nonzero
// return code being overwritten. To detect this case, we check the diagnostics
// area and return with the appropriate sqlcode if necessary.
retcode = hs_globals->getRetcodeFromDiags();
HSHandleError(retcode);
hs_globals->parserError = HSGlobalsClass::ERROR_SEMANTICS;
// We are done here if it is the showstats command
if (hs_globals->optFlags & SHOWSTATS_OPT)
return 0;
// Automatically generate single-column histograms for all multi-column
// histograms requested. This is required to calculate UEC counts for
// multi-column histograms.
// However, do not generate them when the CLEAR option is requested, unless
// ON EVERY COLUMN or ON EVERY KEY is also specified.
if (NOT (hs_globals->optFlags & CLEAR_OPT) ||
hs_globals->optFlags & EVERYCOL_OPT ||
hs_globals->optFlags & EVERYKEY_OPT)
{
mgroup = hs_globals->multiGroup;
while (mgroup != NULL)
{
for (Int32 i=0; i<mgroup->colCount; i++)
{
HSColumnStruct &col = mgroup->colSet[i];
if (NOT ColumnExists(col.colnum))
{
retcode = AddSingleColumn(col.colnum);
HSHandleError(retcode);
}
}
mgroup = mgroup->next;
}
}
// ----------------------------------------------------------------------
// Construct statistics time in the format: YYYY-MM-DD:HH:MM:SS.
// We use GMT time.
// ----------------------------------------------------------------------
time_t t;
time(&t);
char pt[30];
strftime(pt, 30, "%Y-%m-%d:%H:%M:%S", gmtime(&t));
*hs_globals->statstime = pt;
// Also store a numerical timestamp with 10 digits MMDDHHMMSS.
hs_globals->statsTimeInt = (pt[5] - '0') * 1000000000 +
(pt[6] - '0') * 100000000 +
(pt[8] - '0') * 10000000 +
(pt[9] - '0') * 1000000 +
(pt[11] - '0') * 100000 +
(pt[12] - '0') * 10000 +
(pt[14] - '0') * 1000 +
(pt[15] - '0') * 100 +
(pt[17] - '0') * 10 +
(pt[18] - '0');
return 0;
}
// -----------------------------------------------------------------------
// Construct a fully qualified table name.
// -----------------------------------------------------------------------
Lng32 AddTableName( const hs_table_type type
, const char *table
, const char *schema
, const char *catalog
)
{
HSGlobalsClass *hs_globals = GetHSContext();
NAString catName, schName, objName;
NAString extName;
NAString defaultCat, defaultSch;
NAString userLocation;
Lng32 retcode = 0;
hs_globals->tableType = type;
HSLogMan *LM = HSLogMan::Instance();
// SET MPLOC is converted to CQD (setting values for default
// attributes MP_SYSTEM, MP_VOLUME, MP_SUBVOLUME). It does not
// update the global MPLOC value stored in SqlParser_MPLOC. The
// following updates the global MPLOC value to be consistent with
// the default attribute values set by SET MPLOC/CQD.
ActiveSchemaDB()->getDefaults().getSqlParser_NADefaults();
if (type == GUARDIAN_TABLE)
{
if (*table == '$')
{ // Qualify with system name.
extName = SqlParser_MPLOC.getSystemName();
extName += ".";
extName += table;
}
else
extName = table;
hs_globals->tableFormat = SQLMP;
}
else
{
if (catalog)
catName = catalog;
else
{
catName = ActiveSchemaDB()->getDefaultSchema().getCatalogName();
}
if (schema)
schName = schema;
else
{
schName = ActiveSchemaDB()->getDefaultSchema().getSchemaName();
}
objName = table;
extName = catName + "." + schName + "." + objName;
}
hs_globals->objDef = NULL;
// Search in volatile schema first. If not found, search in regular cat/sch.
if ((CmpCommon::context()->sqlSession()->volatileSchemaInUse()) &&
(type != GUARDIAN_TABLE) &&
(! catalog))
{
// search using the volatile schema name.
NAString &volCatName = CmpCommon::context()->sqlSession()->volatileCatalogName();
NAString &volSchName = CmpCommon::context()->sqlSession()->volatileSchemaName();
NAString volObjName = table;
ComObjectName volIntName(volCatName, volSchName, volObjName,
COM_UNKNOWN_NAME,
ComAnsiNamePart::INTERNAL_FORMAT);
if (NOT volIntName.isValid())
{
LM->Log("***[ERROR] Unable to create an ObjectClass");
HSFuncMergeDiags(-UERR_OBJECT_INACCESSIBLE, extName);
retcode = -1;
HSHandleError(retcode);
}
if (LM->LogNeeded())
{
LM->Log("Searching in volatile schema, since catalog not specified.\n");
sprintf(LM->msg, "Checking volatile name (volIntName) %s.%s.%s\n",
volIntName.getCatalogNamePart().getInternalName().data(),
volIntName.getSchemaNamePart().getInternalName().data(),
volIntName.getObjectNamePart().getInternalName().data());
LM->Log(LM->msg);
}
hs_globals->objDef = HSTableDef::create(STMTHEAP,
volIntName,
hs_globals->tableType,
hs_globals->nameSpace);
if (NOT hs_globals->objDef->objExists(hs_globals->isUpdatestatsStmt))
{
// now look into the regular schema
delete hs_globals->objDef;
hs_globals->objDef = NULL;
}
else
{
// if schema name was specified, validate that it is the
// current username.
if (schema)
{
QualifiedName qn(volObjName, schName);
if (NOT CmpCommon::context()->sqlSession()->validateVolatileQualifiedName(qn))
{
// table was found in the volatile schema but it is
// not a valid volatile name.
// Look for it in regular schema.
// error info was moved to CmpCommon::diags. Clear it.
CmpCommon::diags()->clear();
delete hs_globals->objDef;
hs_globals->objDef = NULL;
}
}
}
}
if (hs_globals->objDef == NULL)
{
ComObjectName intName(catName, schName, objName,
COM_UNKNOWN_NAME,
ComAnsiNamePart::INTERNAL_FORMAT);
if (NOT intName.isValid())
{
LM->Log("***[ERROR] Unable to create an ObjectClass");
HSFuncMergeDiags(-UERR_OBJECT_INACCESSIBLE, extName);
retcode = -1;
HSHandleError(retcode);
}
hs_globals->objDef = HSTableDef::create(STMTHEAP,
intName,
hs_globals->tableType,
hs_globals->nameSpace);
// just do this check once since it side effects diags (not to mention
// multiple calls do multiple metadata lookups in failure scenarios)
NABoolean objExists = hs_globals->objDef->objExists(hs_globals->isUpdatestatsStmt);
// try public schema if an object is not qualified and not found
if ((NOT schema) &&
(NOT objExists))
{
NAString pubSch = ActiveSchemaDB()->getDefaults().getValue(PUBLIC_SCHEMA_NAME);
ComSchemaName pubSchema(pubSch);
if (NOT pubSchema.getSchemaNamePart().isEmpty())
{
NAString pubSchName = pubSchema.getSchemaNamePart().getInternalName();
NAString pubCatName = (pubSchema.getCatalogNamePart().isEmpty() ?
catName:pubSchema.getCatalogNamePart().getInternalName());
ComObjectName pubIntName(pubCatName, pubSchName, objName,
COM_UNKNOWN_NAME, ComAnsiNamePart::INTERNAL_FORMAT);
if (pubIntName.isValid())
{
HSTableDef *pubObjDef = HSTableDef::create(STMTHEAP,
pubIntName,
hs_globals->tableType,
hs_globals->nameSpace);
if (pubObjDef->objExists(hs_globals->isUpdatestatsStmt))
{
hs_globals->objDef = pubObjDef;
objExists = TRUE;
}
}
}
}
if (NOT objExists)
{
ComDiagsArea & diagsArea = GetHSContext()->diagsArea;
if (!diagsArea.findCondition(-UERR_OBJECT_INACCESSIBLE))
{
// only add this error in if objExists check didn't already
// (it's annoying to have the same error repeated)
HSFuncMergeDiags(-UERR_OBJECT_INACCESSIBLE, extName);
}
retcode = -1;
HSHandleError(retcode);
}
}
//10-040123-2660 We only support tables. We do not allow views.
// Tables cannot be metadata tables.
if (((hs_globals->objDef->getObjectType() != COM_BASE_TABLE_OBJECT) &&
(hs_globals->objDef->getObjectType() != COM_MV_OBJECT)) ||
(hs_globals->objDef->getNATable()->isSeabaseMDTable()) ||
(hs_globals->objDef->getNATable()->isSeabasePrivSchemaTable()))
{
HSFuncMergeDiags(-UERR_INVALID_OBJECT, extName);
retcode = -1;
HSHandleError(retcode);
}
retcode = hs_globals->objDef->getColumnNames();
HSFuncExecQuery("CONTROL QUERY DEFAULT DISPLAY_DIVISION_BY_COLUMNS RESET");
HSHandleError(retcode);
hs_globals->tableFormat = hs_globals->objDef->getObjectFormat();
*hs_globals->catSch = hs_globals->objDef->getPrimaryLoc(HSTableDef::EXTERNAL_FORMAT);
*hs_globals->user_table = hs_globals->objDef->getObjectFullName();
hs_globals->tableFormat = hs_globals->objDef->getObjectFormat();
hs_globals->isHbaseTable = HSGlobalsClass::isHbaseCat(catName);
hs_globals->isHiveTable = HSGlobalsClass::isHiveCat(catName);
if (hs_globals->tableFormat == SQLMX)
{
// Determine the schema version for this MX table.
if (LM->LogNeeded())
{
sprintf(LM->msg, "\nCHECK SCHEMA VERSION FOR TABLE: %s\n",
hs_globals->user_table->data());
LM->Log(LM->msg);
}
HSGlobalsClass::schemaVersion = getTableSchemaVersion(hs_globals->user_table->data());
if (HSGlobalsClass::schemaVersion == COM_VERS_UNKNOWN)
{
HSFuncMergeDiags(-UERR_INTERNAL_ERROR, "GET_SCHEMA_VERSION");
return -1;
}
if (HSGlobalsClass::schemaVersion >= COM_VERS_2300)
HSGlobalsClass::autoInterval = CmpCommon::getDefaultLong(USTAT_AUTOMATION_INTERVAL);
if (LM->LogNeeded())
{
sprintf(LM->msg, "\nUpdateStats: TABLE: %s; SCHEMA VERSION: %d; AUTOMATION INTERVAL: %d\n",
hs_globals->user_table->data(),
HSGlobalsClass::schemaVersion, HSGlobalsClass::autoInterval);
LM->Log(LM->msg);
}
NAString catName(hs_globals->objDef->getCatName());
*hs_globals->hstogram_table = getHistogramsTableLocation(hs_globals->catSch->data(), FALSE);
*hs_globals->hsintval_table = getHistogramsTableLocation(hs_globals->catSch->data(), FALSE);
*hs_globals->hsperssamp_table = getHistogramsTableLocation(hs_globals->catSch->data(), FALSE);
NABoolean isHbaseOrHive = HSGlobalsClass::isHbaseCat(catName) ||
HSGlobalsClass::isHiveCat(catName);
if (isHbaseOrHive) {
hs_globals->hstogram_table->append(".").append(HBASE_HIST_NAME);
hs_globals->hsintval_table->append(".").append(HBASE_HISTINT_NAME);
hs_globals->hsperssamp_table->append(".").append(HBASE_PERS_SAMP_NAME);
} else {
hs_globals->hstogram_table->append(".HISTOGRAMS");
hs_globals->hsintval_table->append(".HISTOGRAM_INTERVALS");
hs_globals->hsperssamp_table->append(".PERSISTENT_SAMPLES");
}
}
else
{
*hs_globals->hstogram_table = hs_globals->objDef->getCatalogLoc(HSTableDef::EXTERNAL_FORMAT);
hs_globals->hstogram_table->append(".HISTOGRM");
*hs_globals->hsintval_table = hs_globals->objDef->getCatalogLoc(HSTableDef::EXTERNAL_FORMAT);
hs_globals->hsintval_table->append(".HISTINTS");
// RESET CQDS:
HSFuncExecQuery("CONTROL QUERY DEFAULT POS RESET");
HSFuncExecQuery("CONTROL QUERY DEFAULT POS_NUM_OF_PARTNS RESET");
}
return 0;
}
NABoolean ColumnExists(const Lng32 colNumber)
{
HSGlobalsClass *hs_globals = GetHSContext();
HSColGroupStruct *group = hs_globals->singleGroup;
NABoolean found = FALSE;
while (!found && group != NULL)
{
HSColumnStruct &column = group->colSet[0];
if (group->colCount == 1 &&
column.colnum == colNumber)
found = TRUE;
group = group->next;
}
return found;
}
HSColGroupStruct* AddSingleColumn(const Lng32 colNumber, HSColGroupStruct*& groupStart, NABoolean prepend = TRUE)
{
HSGlobalsClass *hs_globals = GetHSContext();
HSColGroupStruct *newGroup = new(STMTHEAP) HSColGroupStruct;
HSColumnStruct newColumn = HSColumnStruct(hs_globals->objDef->getColInfo(colNumber));
bool isOverSized = DFS2REC::isAnyCharacter(newColumn.datatype) &&
(newColumn.length > hs_globals->maxCharColumnLengthInBytes);
if (isOverSized)
{
hs_globals->hasOversizedColumns = TRUE;
}
newColumn.colnum = colNumber;
newGroup->colSet.insert((const HSColumnStruct) newColumn);
newGroup->colCount = 1;
*newGroup->colNames = ToAnsiIdentifier(newColumn.colname->data());
// Note: ToAnsiIdentifier() determines whether a name needs to be delimited
// with quotes. This function works for shift-JIS but may not work for other
// non-ISO88591 char sets such as Korean, BIG5, GB2312, and GB18030, ...
if (groupStart == NULL) // first group entry
{
groupStart = newGroup;
}
else // append to front of list
{
if ( prepend ) {
newGroup->next = groupStart;
groupStart->prev = newGroup;
groupStart = newGroup;
} else {
HSColGroupStruct* tailGroup = groupStart;
while ( tailGroup->next )
tailGroup = tailGroup->next;
tailGroup->next = newGroup;
newGroup->prev = tailGroup;
newGroup->next = NULL;
}
}
return newGroup;
}
Lng32 AddSingleColumn(const Lng32 colNumber)
{
HSGlobalsClass *hs_globals = GetHSContext();
AddSingleColumn(colNumber, hs_globals->singleGroup);
hs_globals->groupCount++;
hs_globals->singleGroupCount++;
return 0;
}
Lng32 AddColumnSet(HSColSet &colSet)
{
HSGlobalsClass *hs_globals = GetHSContext();
Lng32 retcode = 0;
HSColGroupStruct *newGroup = NULL;
Lng32 colCount = 0;
NABoolean badColList = FALSE;
NAString colNames = "";
NAString temp;
HSLogMan *LM = HSLogMan::Instance();
Int32 numCols = colSet.entries();
Int32 i;
if (numCols < 2) // Must have at least 2 columns in multi-col set.
{
if (LM->LogNeeded())
{
sprintf(LM->msg, "\t\tIgnoring Column Group with single unique entry (%s)",
colSet[0].colname->data());
LM->Log(LM->msg);
}
return HS_WARNING;
}
for (i=0; i<numCols; i++) // update column numbers, position & NO DUPLICATES
{
HSColumnStruct &col = colSet[i];
temp = " ";
temp += ToAnsiIdentifier(col.colname->data());
// Note: ToAnsiIdentifier() determines whether a name needs to be delimited
// with quotes. This function works for shift-JIS but may not work for other
// non-ISO88591 char sets such as Korean, BIG5, GB2312, and GB18030, ...
temp += ",";
if (colNames.contains(temp))
badColList = TRUE;
else
{
col.colnum = hs_globals->objDef->getColNum((char*)col.colname->data());
if (col.colnum < 0)
{
retcode = -1;
HSHandleError(retcode);
}
col.position = colCount;
colCount++;
}
colNames += temp;
}
colNames.remove(0,1); // remove first blank
colNames.remove(colNames.length() - 1); // remove last comma
if (badColList) // column list contains repeating columns
{
if (LM->LogNeeded())
{
sprintf(LM->msg, "\t\tNon-Unique Column Group (%s)", colNames.data());
LM->Log(LM->msg);
}
HSFuncMergeDiags(- UERR_COLUMNLIST_NOT_UNIQUE, colNames.data());
retcode = -1;
HSHandleError(retcode);
}
else
{
if (GroupExists(colSet))
{
if (LM->LogNeeded())
{
sprintf(LM->msg, "\t\tDuplicate Column Group (%s) has been ignored.", colNames.data());
LM->Log(LM->msg);
}
retcode = HS_WARNING;
}
else
{
newGroup = new(STMTHEAP) HSColGroupStruct;
newGroup->colSet = colSet;
newGroup->colCount = colCount;
*newGroup->colNames = colNames.data();
if (hs_globals->multiGroup == NULL) // first group entry
{
hs_globals->multiGroup = newGroup;
}
else // append to front of list
{
newGroup->next = hs_globals->multiGroup;
hs_globals->multiGroup->prev = newGroup;
hs_globals->multiGroup = newGroup;
}
hs_globals->groupCount++;
}
}
return retcode;
}
// For debugging only..
void showColSet(HSColSet &colSet, const char *title)
{
HSLogMan *LM = HSLogMan::Instance();
if (LM->LogNeeded())
{
UInt32 i;
sprintf(LM->msg, title);
LM->Log(LM->msg);
for (i=0; i<colSet.entries(); i++)
{
sprintf(LM->msg, "\t\tcolSet[%d]: :%s: %d", i, colSet[i].colname->data(), colSet[i].colnum);
LM->Log(LM->msg);
}
}
}
// TRUE if there exists a multi-column group list that has identical columns
// to what we want to add now (colSet); i.e. we will not add duplicates.
NABoolean GroupExists(HSColSet &colSet)
{
HSGlobalsClass *hs_globals = GetHSContext();
NABoolean found = FALSE;
HSColGroupStruct *mgroup;
mgroup = hs_globals->multiGroup;
showColSet(colSet, "GroupExists: argument: colSet");
while (mgroup != NULL)
{
showColSet(mgroup->colSet, "GroupExists: mgroup->colSet");
if (mgroup->colSet == colSet)
return TRUE;
mgroup = mgroup->next;
}
return FALSE;
}
// Returns TRUE iff the n columns of the group match, without regard to order,
// columns 1 through n of the index (column 0 of the index being "_SALT_"),
// or columns 0 through n-1 of the index. The saltMatched output parameter is
// set to indicate which of these is the case (if the function returns TRUE).
// Duplicate columns have been removed from the group, so it is enough to
// check that each one matches one of the target columns of the index.
NABoolean MatchesIndexPrefix(HSColGroupStruct* group,
NAFileSet* index,
NABoolean& saltMatched)
{
const NAColumnArray& inxCols = index->getIndexKeyColumns();
CollIndex numInxCols = inxCols.entries();
Lng32 numGrpCols = group->colCount;
CollIndex lastColInxToCheck;
if (numGrpCols < numInxCols)
lastColInxToCheck = numGrpCols;
else if (numGrpCols == numInxCols)
lastColInxToCheck = numGrpCols - 1;
else // more group cols than index cols; no chance of match
return FALSE;
HSColumnStruct* col;
NABoolean match;
NABoolean firstInxColMatched = FALSE;
NABoolean lastInxColMatched = FALSE;
saltMatched = FALSE;
for (Int32 grpColInx=0; grpColInx<numGrpCols; grpColInx++)
{
Lng32 grpColPosInTable = group->colSet[grpColInx].colnum;
match = FALSE;
for (CollIndex inxColInx=0; inxColInx<=lastColInxToCheck && !match; inxColInx++)
{
if (grpColPosInTable == inxCols[inxColInx]->getPosition())
{
match = TRUE;
if (inxCols[inxColInx]->isSaltColumn())
saltMatched = TRUE;
if (inxColInx == 0)
firstInxColMatched = TRUE;
else if (inxColInx == lastColInxToCheck)
lastInxColMatched = TRUE;
}
}
if (!match)
return FALSE;
}
// If _SALT_ alone is specified, no action is needed.
if (numGrpCols == 1 && saltMatched)
return FALSE;
// Each of the n group columns matched one of the initial n+1 index columns.
// If both the first and last index columns were matched and the index has more
// columns than the group, then something in the middle was left out, and we
// can not say the group matches the index.
if (numGrpCols < numInxCols && firstInxColMatched && lastInxColMatched)
return FALSE;
else
return TRUE;
}
// This is called by AddSaltToIndexPrefixes() when a group (SC or MC) is found
// that coincides with the first n columns of an index (possibly excluding _SALT_),
// where n is the number of columns in the group. The function adds an MC group
// that adds _SALT_ (if missing) to the columns of the index contained in matchedGroup,
// in index order.
Lng32 AddSaltedIndexPrefix(NAFileSet* index,
HSColGroupStruct* matchedGroup,
NABoolean groupHasSalt)
{
HSLogMan *LM = HSLogMan::Instance();
HSGlobalsClass *hs_globals = GetHSContext();
const NAColumnArray& inxCols = index->getIndexKeyColumns();
HSColSet* saltedColSet = new(STMTHEAP) HSColSet(STMTHEAP);
HSColumnStruct* colStruct;
// If the group already contains _SALT_, there will be 1 less column of the
// index to include.
CollIndex lastColInxToInclude = matchedGroup->colCount;
if (groupHasSalt)
lastColInxToInclude--;
// Create an MC that includes the columns of the matching MC with the columns
// in index order, adding _SALT_ at the beginning if it wasn't present in the
// original MC.
for (CollIndex inxColInx=0; inxColInx<=lastColInxToInclude; inxColInx++)
{
colStruct = new(STMTHEAP) HSColumnStruct;
*colStruct = hs_globals->objDef
->getColInfo(inxCols[inxColInx]->getPosition());
colStruct->position = inxColInx; // position in MC
saltedColSet->insert(*colStruct);
}
if (!groupHasSalt && LM->LogNeeded())
{
snprintf(LM->msg, sizeof(LM->msg),
"Adding an MC to duplicate index subset (%s) with \"_SALT_\" prefix added.",
matchedGroup->colNames->data());
LM->Log(LM->msg);
}
// If we formed the new, index-ordered group and had to add _SALT_, we leave
// the original group in place. However, if the new group has the same cols
// and only the order was changed, delete the original first, or the new one
// will be rejected as a duplicate when we try to add it.
if (groupHasSalt)
hs_globals->removeGroup(matchedGroup);
return AddColumnSet(*saltedColSet);
}
// Look for groups that constitute a leading prefix of the primary key, possibly
// excluding the "_SALT_" column. For each such group that omits "_SALT_", add
// another group consisting of that group plus "_SALT_", with the columns in index
// order. For each such group that already includes "_SALT_", replace it with a
// group containing the same set of columns, but in index order. This function
// should only be called for a salted table.
Lng32 AddSaltToIndexPrefixes()
{
Lng32 retcode = 0;
HSGlobalsClass *hs_globals = GetHSContext();
NATable* naTbl = hs_globals->objDef->getNATable();
NAFileSet* clusteringIndex = naTbl->getClusteringIndex();
NABoolean groupHasSalt;
HSColGroupStruct* nextGroup;
NABoolean doingSingles = TRUE;
HSColGroupStruct* group = hs_globals->singleGroup;
if (!group)
{
group = hs_globals->multiGroup;
doingSingles = FALSE;
}
while (group)
{
// AddSaltedIndexPrefix may remove the group it is passed from the group
// list and deallocate it, so we grab the link to the next one from it first.
nextGroup = group->next;
// See if the group matches a prefix of the key, allowing _SALT_ to not
// be present. groupHasSalt will indicate whether it was present. If it
// matches, add the appropriate group.
if (MatchesIndexPrefix(group, clusteringIndex, groupHasSalt))
retcode = AddSaltedIndexPrefix(clusteringIndex, group, groupHasSalt);
group = nextGroup;
if (!group && doingSingles)
{
doingSingles = FALSE;
group = hs_globals->multiGroup;
}
}
return retcode;
}
Lng32 AddKeyGroups()
{
HSGlobalsClass *hs_globals = GetHSContext();
if (HSGlobalsClass::isHiveCat(hs_globals->objDef->getCatName()))
{
// HSHiveTableDef::getKeyList()/getIndexArray() not yet implemented.
*CmpCommon::diags() << DgSqlCode(-UERR_NO_ONEVERYKEY) << DgString0("hive");
return -1;
}
Lng32 retcode = 0;
Lng32 numColsInGroup = 0;
HSColumnStruct col;
NAString tempColList = "";
NAString tempCol;
NAString autoGroup;
ULng32 numKeys;
ULng32 i, j;
NATable* naTbl = hs_globals->objDef->getNATable();
HSLogMan *LM = HSLogMan::Instance();
// ----------------------------------------------------------
// Generate histograms for KEY
// ----------------------------------------------------------
// The clustering index is included in the list of indices returned by
// NATable::getIndexList(), so we store its pointer so we can skip it
// when the other indexes are processed below.
NAFileSet* clusteringIndex = naTbl->getClusteringIndex();
const NAColumnArray& keyCols = clusteringIndex->getIndexKeyColumns();
Lng32 colPos;
numKeys = keyCols.entries();
if (numKeys == 1) // SINGLE-COLUMN KEY
{
colPos = keyCols[0]->getPosition();
if (LM->LogNeeded())
{
sprintf(LM->msg, "\t\tKEY:\t\t(%s)", hs_globals->objDef->getColName(colPos));
LM->Log(LM->msg);
}
if (ColumnExists(colPos)) // avoid duplicates
{
LM->Log("\t\t** duplicate column group has been ignored.");
}
else // add to single-column group list
{
retcode = AddSingleColumn(colPos);
}
}
else if (numKeys > 1) // MULTI-COLUMN KEY
{
// Create multiple MC group(s) if numkeys > 1. Subset MC groups will
// also be created if numkeys > 2, E.g. If numkeys = 5, then
// MC groups with 5, 4, 3, and 2 columns will be created using
// the key columns. Note that if numkeys is larger than CQD
// USTAT_NUM_MC_GROUPS_FOR_KEYS (default = 5), then the number
// of groups created will be limited by this value. So, e.g. if
// numkeys = 10, then MC groups with 5, 4, 3, and 2 columns will
// be created (that is, 5 groups will be created - incl the single).
ULng32 minMCGroupSz = 2;
ULng32 maxMCGroups = (ULng32)
CmpCommon::getDefaultNumeric(USTAT_NUM_MC_GROUPS_FOR_KEYS);
// Generate no MCs with more cols than specified by the cqd.
if (numKeys > maxMCGroups)
numKeys = maxMCGroups;
// For salted table, generate only the longest MC for the key (subject
// to max cols determined above) unless a cqd is set to gen all MCs of
// allowable sizes.
if (CmpCommon::getDefault(USTAT_ADD_SALTED_KEY_PREFIXES_FOR_MC) == DF_OFF &&
hs_globals->objDef->getColNum("_SALT_", FALSE) >= 0)
minMCGroupSz = numKeys;
while (numKeys >= minMCGroupSz) // Create only MC groups not single cols
{
HSColSet colSet(STMTHEAP);
autoGroup = "(";
for (j = 0; j < numKeys; j++)
{
colPos = keyCols[j]->getPosition();
col = hs_globals->objDef->getColInfo(colPos);
col.colnum = colPos;
colSet.insert(col);
autoGroup += col.colname->data();
autoGroup += ",";
}
if (LM->LogNeeded())
{
autoGroup.replace(autoGroup.length()-1,1,")"); // replace comma with close parenthesis
sprintf(LM->msg, "\t\tKEY:\t\t%s", autoGroup.data());
LM->Log(LM->msg);
}
if (retcode = AddColumnSet(colSet))
{
HSHandleError(retcode);
}
numKeys--;
}
}
// ----------------------------------------------------------
// Generate histograms for all INDEXES
// ----------------------------------------------------------
const NAFileSetList& indexes = naTbl->getIndexList();
NAFileSet* index;
for (i = 0; i < indexes.entries(); i++ )
{
index = indexes[i];
if (index == clusteringIndex)
continue; // clustering index processed above already
const NAColumnArray& keyCols = index->getIndexKeyColumns();
numKeys = keyCols.entries();
if (numKeys == 1) // SINGLE-COLUMN INDEX
{
colPos = keyCols[0]->getPosition();
if (LM->LogNeeded())
{
sprintf(LM->msg, "\t\tINDEX[%d]\t(%s)", i,
hs_globals->objDef->getColName(colPos));
LM->Log(LM->msg);
}
if (ColumnExists(colPos)) // avoid duplicates
{
LM->Log("\t\t*** duplicate column group has been ignored.");
}
else // add to single-column group list
{
retcode = AddSingleColumn(colPos);
}
}
else // MULTI-COLUMN INDEX
{
// Create multiple MC group(s) if numkeys > 1. Subset MC groups will
// also be created if numkeys > 2, E.g. If numkeys = 5, then
// MC groups with 5, 4, 3, and 2 columns will be created using
// the key columns. Note that if numkeys is larger than CQD
// USTAT_NUM_MC_GROUPS_FOR_KEYS (default = 5), then the number
// of groups created will be limited by this value. So, e.g. if
// numkeys = 10, then MC groups with 10, 9, 8, 7, 6 columns will
// be created (that is, 5 groups will be created).
ULng32 minMCGroupSz = 2;
ULng32 maxMCGroups = (ULng32)
CmpCommon::getDefaultNumeric(USTAT_NUM_MC_GROUPS_FOR_KEYS);
if (numKeys > maxMCGroups)
minMCGroupSz = numKeys - maxMCGroups + 1;
while (numKeys >= minMCGroupSz) // MinMCGroupSz is greater than 1.
{
HSColSet colSet(STMTHEAP);
tempColList = "";
autoGroup = "(";
for (j = 0; j < numKeys; j++)
{
colPos = keyCols[j]->getPosition();
tempCol = ".";
tempCol += LongToNAString(colPos);
tempCol += ".";
// Eliminate duplicate columns in the index;
// They may have been introduced by appending the key to the specified index.
if (!tempColList.contains(tempCol))
{
col = hs_globals->objDef->getColInfo(colPos);
col.colnum = colPos;
colSet.insert((const struct HSColumnStruct) col);
tempColList += tempCol.data();
numColsInGroup++;
autoGroup += col.colname->data();
autoGroup += ",";
}
}
if (colSet.entries())
{
if (numColsInGroup > 1)
{
if (LM->LogNeeded())
{
autoGroup.replace(autoGroup.length()-1,1,")"); // replace comma with close parenthesis
sprintf(LM->msg, "\t\tINDEX[%d]\t%s", i, autoGroup.data());
LM->Log(LM->msg);
}
if (retcode = AddColumnSet(colSet))
{
HSHandleError(retcode);
}
}
numColsInGroup = 0;
}
numKeys--;
}
}
}
return retcode;
}
// -----------------------------------------------------------------------
// Add the single-column groups from startColumn to endColumn. If these
// parameters are NULL, the function has been called for the ON EVERY COLUMN
// clause, and we will add all single column groups, as well as key groups.
// -----------------------------------------------------------------------
Lng32 AddEveryColumn(const char *startColumn, const char *endColumn)
{
HSGlobalsClass *hs_globals = GetHSContext();
Lng32 colNumber, retcode;
NAString colName;
Lng32 start, upto;
HSLogMan *LM = HSLogMan::Instance();
hs_globals->parserError = HSGlobalsClass::ERROR_SEMANTICS;
// Can't use EVERYCOL_OPT flag for this test, it may have been set on a
// previous call (making this a redundant, or incorrect, request to ustat
// an individual column name). startColumn will always be NULL if this fn
// is called to add all the columns for a table.
if (!startColumn)
{
HS_ASSERT(hs_globals->optFlags & EVERYCOL_OPT);
start = 0;
upto = hs_globals->objDef->getNumCols() - 1;
}
else
{
start = hs_globals->objDef->getColNum(startColumn);
if (start < 0)
{
retcode = -1;
HSHandleError(retcode);
}
upto = hs_globals->objDef->getColNum(endColumn);
if (upto < 0)
{
retcode = -1;
HSHandleError(retcode);
}
if (start > upto)
{
Lng32 tmp = upto;
upto = start;
start = tmp;
}
}
for (colNumber = start; colNumber <= upto; colNumber++)
{
if (ColumnExists(colNumber)) // avoid duplicates
{
colName = hs_globals->objDef->getColName(colNumber);
if (LM->LogNeeded())
{
sprintf(LM->msg, "\t\t****Duplicate Column group (%s) has been ignored", colName.data());
LM->Log(LM->msg);
}
}
else if (!DFS2REC::isLOB(hs_globals->objDef->getColInfo(colNumber).datatype))
{
// add to single-column group list
retcode = AddSingleColumn(colNumber);
}
// else it's a LOB column; silently exclude it (the column was only
// implicitly referenced)
}
if (!startColumn && // ON EVERY COLUMN causes key groups to be added as well
!HSGlobalsClass::isHiveCat(hs_globals->objDef->getCatName())) // No ustat on keys yet for hive tables
{
retcode = AddKeyGroups();
HSHandleError(retcode);
}
hs_globals->parserError = HSGlobalsClass::ERROR_SYNTAX;
return 0;
}
// Get the current set of histogrammed column groups for the table, then add
// each of these to either the singleGroup or multiGroup list for histograms
// to be created.
//
Lng32 AddExistingColumns()
{
HSGlobalsClass *hs_globals = GetHSContext();
HSColGroupStruct *group, *groupList;
Lng32 retcode = 0;
// Introduce new scope for instance of HSTranController; its destructor
// terminates the transaction. We need a transaction for this because
// groupListFromTable() executes a query to get the current histograms,
// and we want any acquired locks to be released before we proceed. An
// implicit transaction started by the query would remain open and retain
// the locks.
{
HSTranController TC("GET GROUP LIST FOR EXISTING", &retcode);
HSHandleError(retcode);
retcode = hs_globals->groupListFromTable(groupList,
TRUE); // pass 'TRUE' to skip empty histograms.
HSHandleError(retcode);
}
while (groupList != NULL)
{
// Detach the current first node from the list. It will be added to a
// different list (hs_globals->singleGroup or hs_globals->multiGroup).
// Don't change group->next until groupList points to something else.
//
group = groupList;
groupList = groupList->next;
if (groupList)
groupList->prev = NULL;
group->next = NULL;
// Set oldHistid to 0 so hist id will be reread during FlushStatistics.
// Reading it in the same transaction that writes the histograms keeps
// 2 or more concurrent Update Stats statements from coming up with the
// same new hist id.
group->oldHistid = 0;
// Look through the columns in this group for any oversized columns.
for (UInt32 i = 0; i < group->colSet.entries(); i++)
{
bool isOverSized = DFS2REC::isAnyCharacter(group->colSet[i].datatype) &&
(group->colSet[i].length > hs_globals->maxCharColumnLengthInBytes);
if (isOverSized)
{
hs_globals->hasOversizedColumns = TRUE;
}
}
hs_globals->addGroup(group);
}
return retcode;
}