| /********************************************************************** |
| // @@@ START COPYRIGHT @@@ |
| // |
| // Licensed to the Apache Software Foundation (ASF) under one |
| // or more contributor license agreements. See the NOTICE file |
| // distributed with this work for additional information |
| // regarding copyright ownership. The ASF licenses this file |
| // to you under the Apache License, Version 2.0 (the |
| // "License"); you may not use this file except in compliance |
| // with the License. You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, |
| // software distributed under the License is distributed on an |
| // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| // KIND, either express or implied. See the License for the |
| // specific language governing permissions and limitations |
| // under the License. |
| // |
| // @@@ END COPYRIGHT @@@ |
| **********************************************************************/ |
| /* -*-C++-*- |
| ***************************************************************************** |
| * |
| * File: hs_parser.C |
| * Description: Functions used by parser. |
| * Created: 09/25/96 |
| * Language: C++ |
| * |
| * |
| *****************************************************************************/ |
| #define SQLPARSERGLOBALS_NADEFAULTS // must be first |
| #define HS_FILE "hs_parser" |
| |
| #include <time.h> |
| #include <sys/time.h> |
| #include "CmpCommon.h" |
| #include "CmpContext.h" |
| #include "ComAnsiNamePart.h" |
| #include "SchemaDB.h" |
| |
| |
| //IMPORTANT: The following header is required or else the main sqlparser will be invoked |
| #include "hs_parser_defs.h" |
| #include "hs_parser.h" |
| #include "hs_globals.h" |
| #include "hs_cli.h" |
| #include "hs_la.h" |
| #include "SqlParserGlobals.h" // must be last #include |
| |
| #include "ComSchemaName.h" // for ComSchemaName |
| |
| extern Int32 yyparse(void*); |
| extern void HSFuncResetLexer(void*); |
| extern void init_scanner (void* &); |
| extern void destroy_scanner(void* &scanner); |
| Lng32 AddSingleColumn(const Lng32 colNumber); |
| |
| |
| // ----------------------------------------------------------------------- |
| // Invoke yyparse, set hsGlobal structure for each column group. |
| // ----------------------------------------------------------------------- |
| Lng32 HSFuncParseStmt() |
| { |
| HSGlobalsClass *hs_globals = GetHSContext(); |
| Lng32 retcode; |
| HSColGroupStruct *mgroup = NULL; |
| |
| void* scanner; |
| init_scanner (scanner); |
| HSFuncResetLexer(scanner); |
| retcode = yyparse(scanner); |
| HSHandleError(retcode); |
| destroy_scanner (scanner); |
| |
| // The parser does not always return immediately following an error, so that |
| // it can report as many errors as possible. This can result in a nonzero |
| // return code being overwritten. To detect this case, we check the diagnostics |
| // area and return with the appropriate sqlcode if necessary. |
| retcode = hs_globals->getRetcodeFromDiags(); |
| HSHandleError(retcode); |
| |
| hs_globals->parserError = HSGlobalsClass::ERROR_SEMANTICS; |
| |
| // We are done here if it is the showstats command |
| if (hs_globals->optFlags & SHOWSTATS_OPT) |
| return 0; |
| |
| // Automatically generate single-column histograms for all multi-column |
| // histograms requested. This is required to calculate UEC counts for |
| // multi-column histograms. |
| // However, do not generate them when the CLEAR option is requested, unless |
| // ON EVERY COLUMN or ON EVERY KEY is also specified. |
| if (NOT (hs_globals->optFlags & CLEAR_OPT) || |
| hs_globals->optFlags & EVERYCOL_OPT || |
| hs_globals->optFlags & EVERYKEY_OPT) |
| { |
| mgroup = hs_globals->multiGroup; |
| while (mgroup != NULL) |
| { |
| for (Int32 i=0; i<mgroup->colCount; i++) |
| { |
| HSColumnStruct &col = mgroup->colSet[i]; |
| if (NOT ColumnExists(col.colnum)) |
| { |
| retcode = AddSingleColumn(col.colnum); |
| HSHandleError(retcode); |
| } |
| } |
| mgroup = mgroup->next; |
| } |
| } |
| |
| // ---------------------------------------------------------------------- |
| // Construct statistics time in the format: YYYY-MM-DD:HH:MM:SS. |
| // We use GMT time. |
| // ---------------------------------------------------------------------- |
| time_t t; |
| time(&t); |
| char pt[30]; |
| |
| strftime(pt, 30, "%Y-%m-%d:%H:%M:%S", gmtime(&t)); |
| *hs_globals->statstime = pt; |
| |
| // Also store a numerical timestamp with 10 digits MMDDHHMMSS. |
| hs_globals->statsTimeInt = (pt[5] - '0') * 1000000000 + |
| (pt[6] - '0') * 100000000 + |
| (pt[8] - '0') * 10000000 + |
| (pt[9] - '0') * 1000000 + |
| (pt[11] - '0') * 100000 + |
| (pt[12] - '0') * 10000 + |
| (pt[14] - '0') * 1000 + |
| (pt[15] - '0') * 100 + |
| (pt[17] - '0') * 10 + |
| (pt[18] - '0'); |
| return 0; |
| } |
| |
| // ----------------------------------------------------------------------- |
| // Construct a fully qualified table name. |
| // ----------------------------------------------------------------------- |
| Lng32 AddTableName( const hs_table_type type |
| , const char *table |
| , const char *schema |
| , const char *catalog |
| ) |
| { |
| HSGlobalsClass *hs_globals = GetHSContext(); |
| |
| NAString catName, schName, objName; |
| NAString extName; |
| NAString defaultCat, defaultSch; |
| NAString userLocation; |
| Lng32 retcode = 0; |
| |
| hs_globals->tableType = type; |
| HSLogMan *LM = HSLogMan::Instance(); |
| |
| // SET MPLOC is converted to CQD (setting values for default |
| // attributes MP_SYSTEM, MP_VOLUME, MP_SUBVOLUME). It does not |
| // update the global MPLOC value stored in SqlParser_MPLOC. The |
| // following updates the global MPLOC value to be consistent with |
| // the default attribute values set by SET MPLOC/CQD. |
| ActiveSchemaDB()->getDefaults().getSqlParser_NADefaults(); |
| |
| if (type == GUARDIAN_TABLE) |
| { |
| if (*table == '$') |
| { // Qualify with system name. |
| extName = SqlParser_MPLOC.getSystemName(); |
| extName += "."; |
| extName += table; |
| } |
| else |
| extName = table; |
| hs_globals->tableFormat = SQLMP; |
| } |
| else |
| { |
| if (catalog) |
| catName = catalog; |
| else |
| { |
| catName = ActiveSchemaDB()->getDefaultSchema().getCatalogName(); |
| } |
| |
| if (schema) |
| schName = schema; |
| else |
| { |
| schName = ActiveSchemaDB()->getDefaultSchema().getSchemaName(); |
| } |
| |
| objName = table; |
| extName = catName + "." + schName + "." + objName; |
| } |
| |
| hs_globals->objDef = NULL; |
| |
| // Search in volatile schema first. If not found, search in regular cat/sch. |
| if ((CmpCommon::context()->sqlSession()->volatileSchemaInUse()) && |
| (type != GUARDIAN_TABLE) && |
| (! catalog)) |
| { |
| // search using the volatile schema name. |
| NAString &volCatName = CmpCommon::context()->sqlSession()->volatileCatalogName(); |
| NAString &volSchName = CmpCommon::context()->sqlSession()->volatileSchemaName(); |
| NAString volObjName = table; |
| |
| |
| ComObjectName volIntName(volCatName, volSchName, volObjName, |
| COM_UNKNOWN_NAME, |
| ComAnsiNamePart::INTERNAL_FORMAT); |
| if (NOT volIntName.isValid()) |
| { |
| LM->Log("***[ERROR] Unable to create an ObjectClass"); |
| HSFuncMergeDiags(-UERR_OBJECT_INACCESSIBLE, extName); |
| retcode = -1; |
| HSHandleError(retcode); |
| } |
| |
| if (LM->LogNeeded()) |
| { |
| LM->Log("Searching in volatile schema, since catalog not specified.\n"); |
| sprintf(LM->msg, "Checking volatile name (volIntName) %s.%s.%s\n", |
| volIntName.getCatalogNamePart().getInternalName().data(), |
| volIntName.getSchemaNamePart().getInternalName().data(), |
| volIntName.getObjectNamePart().getInternalName().data()); |
| LM->Log(LM->msg); |
| } |
| |
| hs_globals->objDef = HSTableDef::create(STMTHEAP, |
| volIntName, |
| hs_globals->tableType, |
| hs_globals->nameSpace); |
| |
| if (NOT hs_globals->objDef->objExists(hs_globals->isUpdatestatsStmt)) |
| { |
| // now look into the regular schema |
| delete hs_globals->objDef; |
| hs_globals->objDef = NULL; |
| } |
| else |
| { |
| // if schema name was specified, validate that it is the |
| // current username. |
| if (schema) |
| { |
| QualifiedName qn(volObjName, schName); |
| if (NOT CmpCommon::context()->sqlSession()->validateVolatileQualifiedName(qn)) |
| { |
| // table was found in the volatile schema but it is |
| // not a valid volatile name. |
| // Look for it in regular schema. |
| // error info was moved to CmpCommon::diags. Clear it. |
| CmpCommon::diags()->clear(); |
| delete hs_globals->objDef; |
| hs_globals->objDef = NULL; |
| } |
| } |
| } |
| } |
| |
| if (hs_globals->objDef == NULL) |
| { |
| ComObjectName intName(catName, schName, objName, |
| COM_UNKNOWN_NAME, |
| ComAnsiNamePart::INTERNAL_FORMAT); |
| if (NOT intName.isValid()) |
| { |
| LM->Log("***[ERROR] Unable to create an ObjectClass"); |
| HSFuncMergeDiags(-UERR_OBJECT_INACCESSIBLE, extName); |
| retcode = -1; |
| HSHandleError(retcode); |
| } |
| |
| |
| hs_globals->objDef = HSTableDef::create(STMTHEAP, |
| intName, |
| hs_globals->tableType, |
| hs_globals->nameSpace); |
| |
| // just do this check once since it side effects diags (not to mention |
| // multiple calls do multiple metadata lookups in failure scenarios) |
| NABoolean objExists = hs_globals->objDef->objExists(hs_globals->isUpdatestatsStmt); |
| |
| // try public schema if an object is not qualified and not found |
| if ((NOT schema) && |
| (NOT objExists)) |
| { |
| NAString pubSch = ActiveSchemaDB()->getDefaults().getValue(PUBLIC_SCHEMA_NAME); |
| ComSchemaName pubSchema(pubSch); |
| if (NOT pubSchema.getSchemaNamePart().isEmpty()) |
| { |
| NAString pubSchName = pubSchema.getSchemaNamePart().getInternalName(); |
| NAString pubCatName = (pubSchema.getCatalogNamePart().isEmpty() ? |
| catName:pubSchema.getCatalogNamePart().getInternalName()); |
| ComObjectName pubIntName(pubCatName, pubSchName, objName, |
| COM_UNKNOWN_NAME, ComAnsiNamePart::INTERNAL_FORMAT); |
| |
| if (pubIntName.isValid()) |
| { |
| HSTableDef *pubObjDef = HSTableDef::create(STMTHEAP, |
| pubIntName, |
| hs_globals->tableType, |
| hs_globals->nameSpace); |
| |
| if (pubObjDef->objExists(hs_globals->isUpdatestatsStmt)) |
| { |
| hs_globals->objDef = pubObjDef; |
| objExists = TRUE; |
| } |
| } |
| } |
| } |
| |
| if (NOT objExists) |
| { |
| ComDiagsArea & diagsArea = GetHSContext()->diagsArea; |
| if (!diagsArea.findCondition(-UERR_OBJECT_INACCESSIBLE)) |
| { |
| // only add this error in if objExists check didn't already |
| // (it's annoying to have the same error repeated) |
| HSFuncMergeDiags(-UERR_OBJECT_INACCESSIBLE, extName); |
| } |
| retcode = -1; |
| HSHandleError(retcode); |
| } |
| } |
| |
| //10-040123-2660 We only support tables. We do not allow views. |
| // Tables cannot be metadata tables. |
| if (((hs_globals->objDef->getObjectType() != COM_BASE_TABLE_OBJECT) && |
| (hs_globals->objDef->getObjectType() != COM_MV_OBJECT)) || |
| (hs_globals->objDef->getNATable()->isSeabaseMDTable()) || |
| (hs_globals->objDef->getNATable()->isSeabasePrivSchemaTable())) |
| { |
| HSFuncMergeDiags(-UERR_INVALID_OBJECT, extName); |
| retcode = -1; |
| HSHandleError(retcode); |
| } |
| retcode = hs_globals->objDef->getColumnNames(); |
| HSFuncExecQuery("CONTROL QUERY DEFAULT DISPLAY_DIVISION_BY_COLUMNS RESET"); |
| HSHandleError(retcode); |
| |
| hs_globals->tableFormat = hs_globals->objDef->getObjectFormat(); |
| *hs_globals->catSch = hs_globals->objDef->getPrimaryLoc(HSTableDef::EXTERNAL_FORMAT); |
| *hs_globals->user_table = hs_globals->objDef->getObjectFullName(); |
| hs_globals->tableFormat = hs_globals->objDef->getObjectFormat(); |
| hs_globals->isHbaseTable = HSGlobalsClass::isHbaseCat(catName); |
| hs_globals->isHiveTable = HSGlobalsClass::isHiveCat(catName); |
| |
| if (hs_globals->tableFormat == SQLMX) |
| { |
| // Determine the schema version for this MX table. |
| if (LM->LogNeeded()) |
| { |
| sprintf(LM->msg, "\nCHECK SCHEMA VERSION FOR TABLE: %s\n", |
| hs_globals->user_table->data()); |
| LM->Log(LM->msg); |
| } |
| HSGlobalsClass::schemaVersion = getTableSchemaVersion(hs_globals->user_table->data()); |
| if (HSGlobalsClass::schemaVersion == COM_VERS_UNKNOWN) |
| { |
| HSFuncMergeDiags(-UERR_INTERNAL_ERROR, "GET_SCHEMA_VERSION"); |
| return -1; |
| } |
| |
| |
| if (HSGlobalsClass::schemaVersion >= COM_VERS_2300) |
| HSGlobalsClass::autoInterval = CmpCommon::getDefaultLong(USTAT_AUTOMATION_INTERVAL); |
| if (LM->LogNeeded()) |
| { |
| sprintf(LM->msg, "\nUpdateStats: TABLE: %s; SCHEMA VERSION: %d; AUTOMATION INTERVAL: %d\n", |
| hs_globals->user_table->data(), |
| HSGlobalsClass::schemaVersion, HSGlobalsClass::autoInterval); |
| LM->Log(LM->msg); |
| } |
| |
| NAString catName(hs_globals->objDef->getCatName()); |
| |
| *hs_globals->hstogram_table = getHistogramsTableLocation(hs_globals->catSch->data(), FALSE); |
| |
| *hs_globals->hsintval_table = getHistogramsTableLocation(hs_globals->catSch->data(), FALSE); |
| |
| *hs_globals->hsperssamp_table = getHistogramsTableLocation(hs_globals->catSch->data(), FALSE); |
| |
| NABoolean isHbaseOrHive = HSGlobalsClass::isHbaseCat(catName) || |
| HSGlobalsClass::isHiveCat(catName); |
| |
| if (isHbaseOrHive) { |
| hs_globals->hstogram_table->append(".").append(HBASE_HIST_NAME); |
| hs_globals->hsintval_table->append(".").append(HBASE_HISTINT_NAME); |
| hs_globals->hsperssamp_table->append(".").append(HBASE_PERS_SAMP_NAME); |
| } else { |
| hs_globals->hstogram_table->append(".HISTOGRAMS"); |
| hs_globals->hsintval_table->append(".HISTOGRAM_INTERVALS"); |
| hs_globals->hsperssamp_table->append(".PERSISTENT_SAMPLES"); |
| } |
| } |
| else |
| { |
| *hs_globals->hstogram_table = hs_globals->objDef->getCatalogLoc(HSTableDef::EXTERNAL_FORMAT); |
| hs_globals->hstogram_table->append(".HISTOGRM"); |
| |
| *hs_globals->hsintval_table = hs_globals->objDef->getCatalogLoc(HSTableDef::EXTERNAL_FORMAT); |
| hs_globals->hsintval_table->append(".HISTINTS"); |
| |
| // RESET CQDS: |
| HSFuncExecQuery("CONTROL QUERY DEFAULT POS RESET"); |
| HSFuncExecQuery("CONTROL QUERY DEFAULT POS_NUM_OF_PARTNS RESET"); |
| } |
| |
| return 0; |
| } |
| |
| |
| NABoolean ColumnExists(const Lng32 colNumber) |
| { |
| HSGlobalsClass *hs_globals = GetHSContext(); |
| HSColGroupStruct *group = hs_globals->singleGroup; |
| NABoolean found = FALSE; |
| |
| while (!found && group != NULL) |
| { |
| HSColumnStruct &column = group->colSet[0]; |
| if (group->colCount == 1 && |
| column.colnum == colNumber) |
| found = TRUE; |
| group = group->next; |
| } |
| |
| return found; |
| } |
| |
| HSColGroupStruct* AddSingleColumn(const Lng32 colNumber, HSColGroupStruct*& groupStart, NABoolean prepend = TRUE) |
| { |
| HSGlobalsClass *hs_globals = GetHSContext(); |
| HSColGroupStruct *newGroup = new(STMTHEAP) HSColGroupStruct; |
| HSColumnStruct newColumn = HSColumnStruct(hs_globals->objDef->getColInfo(colNumber)); |
| |
| bool isOverSized = DFS2REC::isAnyCharacter(newColumn.datatype) && |
| (newColumn.length > hs_globals->maxCharColumnLengthInBytes); |
| if (isOverSized) |
| { |
| hs_globals->hasOversizedColumns = TRUE; |
| } |
| |
| newColumn.colnum = colNumber; |
| newGroup->colSet.insert((const HSColumnStruct) newColumn); |
| newGroup->colCount = 1; |
| *newGroup->colNames = ToAnsiIdentifier(newColumn.colname->data()); |
| // Note: ToAnsiIdentifier() determines whether a name needs to be delimited |
| // with quotes. This function works for shift-JIS but may not work for other |
| // non-ISO88591 char sets such as Korean, BIG5, GB2312, and GB18030, ... |
| if (groupStart == NULL) // first group entry |
| { |
| groupStart = newGroup; |
| } |
| else // append to front of list |
| { |
| if ( prepend ) { |
| newGroup->next = groupStart; |
| groupStart->prev = newGroup; |
| groupStart = newGroup; |
| } else { |
| HSColGroupStruct* tailGroup = groupStart; |
| while ( tailGroup->next ) |
| tailGroup = tailGroup->next; |
| |
| tailGroup->next = newGroup; |
| newGroup->prev = tailGroup; |
| newGroup->next = NULL; |
| } |
| } |
| |
| return newGroup; |
| } |
| |
| Lng32 AddSingleColumn(const Lng32 colNumber) |
| { |
| HSGlobalsClass *hs_globals = GetHSContext(); |
| AddSingleColumn(colNumber, hs_globals->singleGroup); |
| hs_globals->groupCount++; |
| hs_globals->singleGroupCount++; |
| return 0; |
| } |
| |
| Lng32 AddColumnSet(HSColSet &colSet) |
| { |
| HSGlobalsClass *hs_globals = GetHSContext(); |
| Lng32 retcode = 0; |
| HSColGroupStruct *newGroup = NULL; |
| Lng32 colCount = 0; |
| NABoolean badColList = FALSE; |
| NAString colNames = ""; |
| NAString temp; |
| HSLogMan *LM = HSLogMan::Instance(); |
| Int32 numCols = colSet.entries(); |
| Int32 i; |
| |
| if (numCols < 2) // Must have at least 2 columns in multi-col set. |
| { |
| if (LM->LogNeeded()) |
| { |
| sprintf(LM->msg, "\t\tIgnoring Column Group with single unique entry (%s)", |
| colSet[0].colname->data()); |
| LM->Log(LM->msg); |
| } |
| return HS_WARNING; |
| } |
| |
| for (i=0; i<numCols; i++) // update column numbers, position & NO DUPLICATES |
| { |
| HSColumnStruct &col = colSet[i]; |
| temp = " "; |
| temp += ToAnsiIdentifier(col.colname->data()); |
| // Note: ToAnsiIdentifier() determines whether a name needs to be delimited |
| // with quotes. This function works for shift-JIS but may not work for other |
| // non-ISO88591 char sets such as Korean, BIG5, GB2312, and GB18030, ... |
| temp += ","; |
| |
| if (colNames.contains(temp)) |
| badColList = TRUE; |
| else |
| { |
| col.colnum = hs_globals->objDef->getColNum((char*)col.colname->data()); |
| if (col.colnum < 0) |
| { |
| retcode = -1; |
| HSHandleError(retcode); |
| } |
| col.position = colCount; |
| colCount++; |
| } |
| colNames += temp; |
| } |
| colNames.remove(0,1); // remove first blank |
| colNames.remove(colNames.length() - 1); // remove last comma |
| |
| if (badColList) // column list contains repeating columns |
| { |
| if (LM->LogNeeded()) |
| { |
| sprintf(LM->msg, "\t\tNon-Unique Column Group (%s)", colNames.data()); |
| LM->Log(LM->msg); |
| } |
| HSFuncMergeDiags(- UERR_COLUMNLIST_NOT_UNIQUE, colNames.data()); |
| retcode = -1; |
| HSHandleError(retcode); |
| } |
| else |
| { |
| if (GroupExists(colSet)) |
| { |
| if (LM->LogNeeded()) |
| { |
| sprintf(LM->msg, "\t\tDuplicate Column Group (%s) has been ignored.", colNames.data()); |
| LM->Log(LM->msg); |
| } |
| retcode = HS_WARNING; |
| } |
| else |
| { |
| newGroup = new(STMTHEAP) HSColGroupStruct; |
| newGroup->colSet = colSet; |
| newGroup->colCount = colCount; |
| *newGroup->colNames = colNames.data(); |
| |
| if (hs_globals->multiGroup == NULL) // first group entry |
| { |
| hs_globals->multiGroup = newGroup; |
| } |
| else // append to front of list |
| { |
| newGroup->next = hs_globals->multiGroup; |
| hs_globals->multiGroup->prev = newGroup; |
| hs_globals->multiGroup = newGroup; |
| } |
| |
| hs_globals->groupCount++; |
| } |
| } |
| |
| return retcode; |
| } |
| |
| // For debugging only.. |
| void showColSet(HSColSet &colSet, const char *title) |
| { |
| HSLogMan *LM = HSLogMan::Instance(); |
| if (LM->LogNeeded()) |
| { |
| UInt32 i; |
| sprintf(LM->msg, title); |
| LM->Log(LM->msg); |
| for (i=0; i<colSet.entries(); i++) |
| { |
| sprintf(LM->msg, "\t\tcolSet[%d]: :%s: %d", i, colSet[i].colname->data(), colSet[i].colnum); |
| LM->Log(LM->msg); |
| } |
| } |
| } |
| |
| // TRUE if there exists a multi-column group list that has identical columns |
| // to what we want to add now (colSet); i.e. we will not add duplicates. |
| NABoolean GroupExists(HSColSet &colSet) |
| { |
| HSGlobalsClass *hs_globals = GetHSContext(); |
| NABoolean found = FALSE; |
| HSColGroupStruct *mgroup; |
| |
| mgroup = hs_globals->multiGroup; |
| showColSet(colSet, "GroupExists: argument: colSet"); |
| while (mgroup != NULL) |
| { |
| showColSet(mgroup->colSet, "GroupExists: mgroup->colSet"); |
| if (mgroup->colSet == colSet) |
| return TRUE; |
| mgroup = mgroup->next; |
| } |
| return FALSE; |
| } |
| |
| // Returns TRUE iff the n columns of the group match, without regard to order, |
| // columns 1 through n of the index (column 0 of the index being "_SALT_"), |
| // or columns 0 through n-1 of the index. The saltMatched output parameter is |
| // set to indicate which of these is the case (if the function returns TRUE). |
| // Duplicate columns have been removed from the group, so it is enough to |
| // check that each one matches one of the target columns of the index. |
| NABoolean MatchesIndexPrefix(HSColGroupStruct* group, |
| NAFileSet* index, |
| NABoolean& saltMatched) |
| { |
| const NAColumnArray& inxCols = index->getIndexKeyColumns(); |
| CollIndex numInxCols = inxCols.entries(); |
| Lng32 numGrpCols = group->colCount; |
| CollIndex lastColInxToCheck; |
| |
| if (numGrpCols < numInxCols) |
| lastColInxToCheck = numGrpCols; |
| else if (numGrpCols == numInxCols) |
| lastColInxToCheck = numGrpCols - 1; |
| else // more group cols than index cols; no chance of match |
| return FALSE; |
| |
| HSColumnStruct* col; |
| NABoolean match; |
| NABoolean firstInxColMatched = FALSE; |
| NABoolean lastInxColMatched = FALSE; |
| saltMatched = FALSE; |
| for (Int32 grpColInx=0; grpColInx<numGrpCols; grpColInx++) |
| { |
| Lng32 grpColPosInTable = group->colSet[grpColInx].colnum; |
| match = FALSE; |
| for (CollIndex inxColInx=0; inxColInx<=lastColInxToCheck && !match; inxColInx++) |
| { |
| if (grpColPosInTable == inxCols[inxColInx]->getPosition()) |
| { |
| match = TRUE; |
| if (inxCols[inxColInx]->isSaltColumn()) |
| saltMatched = TRUE; |
| if (inxColInx == 0) |
| firstInxColMatched = TRUE; |
| else if (inxColInx == lastColInxToCheck) |
| lastInxColMatched = TRUE; |
| } |
| } |
| |
| if (!match) |
| return FALSE; |
| } |
| |
| // If _SALT_ alone is specified, no action is needed. |
| if (numGrpCols == 1 && saltMatched) |
| return FALSE; |
| |
| // Each of the n group columns matched one of the initial n+1 index columns. |
| // If both the first and last index columns were matched and the index has more |
| // columns than the group, then something in the middle was left out, and we |
| // can not say the group matches the index. |
| if (numGrpCols < numInxCols && firstInxColMatched && lastInxColMatched) |
| return FALSE; |
| else |
| return TRUE; |
| } |
| |
| // This is called by AddSaltToIndexPrefixes() when a group (SC or MC) is found |
| // that coincides with the first n columns of an index (possibly excluding _SALT_), |
| // where n is the number of columns in the group. The function adds an MC group |
| // that adds _SALT_ (if missing) to the columns of the index contained in matchedGroup, |
| // in index order. |
| Lng32 AddSaltedIndexPrefix(NAFileSet* index, |
| HSColGroupStruct* matchedGroup, |
| NABoolean groupHasSalt) |
| { |
| HSLogMan *LM = HSLogMan::Instance(); |
| HSGlobalsClass *hs_globals = GetHSContext(); |
| const NAColumnArray& inxCols = index->getIndexKeyColumns(); |
| HSColSet* saltedColSet = new(STMTHEAP) HSColSet(STMTHEAP); |
| HSColumnStruct* colStruct; |
| |
| // If the group already contains _SALT_, there will be 1 less column of the |
| // index to include. |
| CollIndex lastColInxToInclude = matchedGroup->colCount; |
| if (groupHasSalt) |
| lastColInxToInclude--; |
| |
| // Create an MC that includes the columns of the matching MC with the columns |
| // in index order, adding _SALT_ at the beginning if it wasn't present in the |
| // original MC. |
| for (CollIndex inxColInx=0; inxColInx<=lastColInxToInclude; inxColInx++) |
| { |
| colStruct = new(STMTHEAP) HSColumnStruct; |
| *colStruct = hs_globals->objDef |
| ->getColInfo(inxCols[inxColInx]->getPosition()); |
| colStruct->position = inxColInx; // position in MC |
| saltedColSet->insert(*colStruct); |
| } |
| |
| if (!groupHasSalt && LM->LogNeeded()) |
| { |
| snprintf(LM->msg, sizeof(LM->msg), |
| "Adding an MC to duplicate index subset (%s) with \"_SALT_\" prefix added.", |
| matchedGroup->colNames->data()); |
| LM->Log(LM->msg); |
| } |
| |
| // If we formed the new, index-ordered group and had to add _SALT_, we leave |
| // the original group in place. However, if the new group has the same cols |
| // and only the order was changed, delete the original first, or the new one |
| // will be rejected as a duplicate when we try to add it. |
| if (groupHasSalt) |
| hs_globals->removeGroup(matchedGroup); |
| |
| return AddColumnSet(*saltedColSet); |
| } |
| |
| // Look for groups that constitute a leading prefix of the primary key, possibly |
| // excluding the "_SALT_" column. For each such group that omits "_SALT_", add |
| // another group consisting of that group plus "_SALT_", with the columns in index |
| // order. For each such group that already includes "_SALT_", replace it with a |
| // group containing the same set of columns, but in index order. This function |
| // should only be called for a salted table. |
| Lng32 AddSaltToIndexPrefixes() |
| { |
| Lng32 retcode = 0; |
| HSGlobalsClass *hs_globals = GetHSContext(); |
| NATable* naTbl = hs_globals->objDef->getNATable(); |
| NAFileSet* clusteringIndex = naTbl->getClusteringIndex(); |
| NABoolean groupHasSalt; |
| HSColGroupStruct* nextGroup; |
| |
| NABoolean doingSingles = TRUE; |
| HSColGroupStruct* group = hs_globals->singleGroup; |
| if (!group) |
| { |
| group = hs_globals->multiGroup; |
| doingSingles = FALSE; |
| } |
| |
| while (group) |
| { |
| // AddSaltedIndexPrefix may remove the group it is passed from the group |
| // list and deallocate it, so we grab the link to the next one from it first. |
| nextGroup = group->next; |
| |
| // See if the group matches a prefix of the key, allowing _SALT_ to not |
| // be present. groupHasSalt will indicate whether it was present. If it |
| // matches, add the appropriate group. |
| if (MatchesIndexPrefix(group, clusteringIndex, groupHasSalt)) |
| retcode = AddSaltedIndexPrefix(clusteringIndex, group, groupHasSalt); |
| |
| group = nextGroup; |
| if (!group && doingSingles) |
| { |
| doingSingles = FALSE; |
| group = hs_globals->multiGroup; |
| } |
| } |
| |
| return retcode; |
| } |
| |
| Lng32 AddKeyGroups() |
| { |
| HSGlobalsClass *hs_globals = GetHSContext(); |
| if (HSGlobalsClass::isHiveCat(hs_globals->objDef->getCatName())) |
| { |
| // HSHiveTableDef::getKeyList()/getIndexArray() not yet implemented. |
| *CmpCommon::diags() << DgSqlCode(-UERR_NO_ONEVERYKEY) << DgString0("hive"); |
| return -1; |
| } |
| |
| Lng32 retcode = 0; |
| Lng32 numColsInGroup = 0; |
| HSColumnStruct col; |
| NAString tempColList = ""; |
| NAString tempCol; |
| NAString autoGroup; |
| ULng32 numKeys; |
| ULng32 i, j; |
| NATable* naTbl = hs_globals->objDef->getNATable(); |
| HSLogMan *LM = HSLogMan::Instance(); |
| |
| // ---------------------------------------------------------- |
| // Generate histograms for KEY |
| // ---------------------------------------------------------- |
| // The clustering index is included in the list of indices returned by |
| // NATable::getIndexList(), so we store its pointer so we can skip it |
| // when the other indexes are processed below. |
| NAFileSet* clusteringIndex = naTbl->getClusteringIndex(); |
| const NAColumnArray& keyCols = clusteringIndex->getIndexKeyColumns(); |
| Lng32 colPos; |
| numKeys = keyCols.entries(); |
| |
| if (numKeys == 1) // SINGLE-COLUMN KEY |
| { |
| colPos = keyCols[0]->getPosition(); |
| if (LM->LogNeeded()) |
| { |
| sprintf(LM->msg, "\t\tKEY:\t\t(%s)", hs_globals->objDef->getColName(colPos)); |
| LM->Log(LM->msg); |
| } |
| |
| if (ColumnExists(colPos)) // avoid duplicates |
| { |
| LM->Log("\t\t** duplicate column group has been ignored."); |
| } |
| else // add to single-column group list |
| { |
| retcode = AddSingleColumn(colPos); |
| } |
| } |
| else if (numKeys > 1) // MULTI-COLUMN KEY |
| { |
| // Create multiple MC group(s) if numkeys > 1. Subset MC groups will |
| // also be created if numkeys > 2, E.g. If numkeys = 5, then |
| // MC groups with 5, 4, 3, and 2 columns will be created using |
| // the key columns. Note that if numkeys is larger than CQD |
| // USTAT_NUM_MC_GROUPS_FOR_KEYS (default = 5), then the number |
| // of groups created will be limited by this value. So, e.g. if |
| // numkeys = 10, then MC groups with 5, 4, 3, and 2 columns will |
| // be created (that is, 5 groups will be created - incl the single). |
| |
| ULng32 minMCGroupSz = 2; |
| ULng32 maxMCGroups = (ULng32) |
| CmpCommon::getDefaultNumeric(USTAT_NUM_MC_GROUPS_FOR_KEYS); |
| |
| // Generate no MCs with more cols than specified by the cqd. |
| if (numKeys > maxMCGroups) |
| numKeys = maxMCGroups; |
| |
| // For salted table, generate only the longest MC for the key (subject |
| // to max cols determined above) unless a cqd is set to gen all MCs of |
| // allowable sizes. |
| if (CmpCommon::getDefault(USTAT_ADD_SALTED_KEY_PREFIXES_FOR_MC) == DF_OFF && |
| hs_globals->objDef->getColNum("_SALT_", FALSE) >= 0) |
| minMCGroupSz = numKeys; |
| |
| while (numKeys >= minMCGroupSz) // Create only MC groups not single cols |
| { |
| HSColSet colSet(STMTHEAP); |
| |
| autoGroup = "("; |
| for (j = 0; j < numKeys; j++) |
| { |
| colPos = keyCols[j]->getPosition(); |
| col = hs_globals->objDef->getColInfo(colPos); |
| col.colnum = colPos; |
| colSet.insert(col); |
| autoGroup += col.colname->data(); |
| autoGroup += ","; |
| } |
| |
| if (LM->LogNeeded()) |
| { |
| autoGroup.replace(autoGroup.length()-1,1,")"); // replace comma with close parenthesis |
| sprintf(LM->msg, "\t\tKEY:\t\t%s", autoGroup.data()); |
| LM->Log(LM->msg); |
| } |
| |
| if (retcode = AddColumnSet(colSet)) |
| { |
| HSHandleError(retcode); |
| } |
| numKeys--; |
| } |
| } |
| |
| // ---------------------------------------------------------- |
| // Generate histograms for all INDEXES |
| // ---------------------------------------------------------- |
| const NAFileSetList& indexes = naTbl->getIndexList(); |
| NAFileSet* index; |
| for (i = 0; i < indexes.entries(); i++ ) |
| { |
| index = indexes[i]; |
| if (index == clusteringIndex) |
| continue; // clustering index processed above already |
| const NAColumnArray& keyCols = index->getIndexKeyColumns(); |
| numKeys = keyCols.entries(); |
| if (numKeys == 1) // SINGLE-COLUMN INDEX |
| { |
| colPos = keyCols[0]->getPosition(); |
| if (LM->LogNeeded()) |
| { |
| sprintf(LM->msg, "\t\tINDEX[%d]\t(%s)", i, |
| hs_globals->objDef->getColName(colPos)); |
| LM->Log(LM->msg); |
| } |
| if (ColumnExists(colPos)) // avoid duplicates |
| { |
| LM->Log("\t\t*** duplicate column group has been ignored."); |
| } |
| else // add to single-column group list |
| { |
| retcode = AddSingleColumn(colPos); |
| } |
| } |
| else // MULTI-COLUMN INDEX |
| { |
| // Create multiple MC group(s) if numkeys > 1. Subset MC groups will |
| // also be created if numkeys > 2, E.g. If numkeys = 5, then |
| // MC groups with 5, 4, 3, and 2 columns will be created using |
| // the key columns. Note that if numkeys is larger than CQD |
| // USTAT_NUM_MC_GROUPS_FOR_KEYS (default = 5), then the number |
| // of groups created will be limited by this value. So, e.g. if |
| // numkeys = 10, then MC groups with 10, 9, 8, 7, 6 columns will |
| // be created (that is, 5 groups will be created). |
| |
| ULng32 minMCGroupSz = 2; |
| ULng32 maxMCGroups = (ULng32) |
| CmpCommon::getDefaultNumeric(USTAT_NUM_MC_GROUPS_FOR_KEYS); |
| if (numKeys > maxMCGroups) |
| minMCGroupSz = numKeys - maxMCGroups + 1; |
| while (numKeys >= minMCGroupSz) // MinMCGroupSz is greater than 1. |
| { |
| HSColSet colSet(STMTHEAP); |
| |
| tempColList = ""; |
| autoGroup = "("; |
| for (j = 0; j < numKeys; j++) |
| { |
| colPos = keyCols[j]->getPosition(); |
| tempCol = "."; |
| tempCol += LongToNAString(colPos); |
| tempCol += "."; |
| |
| // Eliminate duplicate columns in the index; |
| // They may have been introduced by appending the key to the specified index. |
| if (!tempColList.contains(tempCol)) |
| { |
| col = hs_globals->objDef->getColInfo(colPos); |
| col.colnum = colPos; |
| colSet.insert((const struct HSColumnStruct) col); |
| |
| tempColList += tempCol.data(); |
| numColsInGroup++; |
| autoGroup += col.colname->data(); |
| autoGroup += ","; |
| } |
| } |
| |
| if (colSet.entries()) |
| { |
| if (numColsInGroup > 1) |
| { |
| if (LM->LogNeeded()) |
| { |
| autoGroup.replace(autoGroup.length()-1,1,")"); // replace comma with close parenthesis |
| sprintf(LM->msg, "\t\tINDEX[%d]\t%s", i, autoGroup.data()); |
| LM->Log(LM->msg); |
| } |
| |
| if (retcode = AddColumnSet(colSet)) |
| { |
| HSHandleError(retcode); |
| } |
| } |
| numColsInGroup = 0; |
| } |
| numKeys--; |
| } |
| } |
| } |
| |
| return retcode; |
| } |
| |
| |
| // ----------------------------------------------------------------------- |
| // Add the single-column groups from startColumn to endColumn. If these |
| // parameters are NULL, the function has been called for the ON EVERY COLUMN |
| // clause, and we will add all single column groups, as well as key groups. |
| // ----------------------------------------------------------------------- |
| Lng32 AddEveryColumn(const char *startColumn, const char *endColumn) |
| { |
| HSGlobalsClass *hs_globals = GetHSContext(); |
| Lng32 colNumber, retcode; |
| NAString colName; |
| Lng32 start, upto; |
| HSLogMan *LM = HSLogMan::Instance(); |
| hs_globals->parserError = HSGlobalsClass::ERROR_SEMANTICS; |
| |
| // Can't use EVERYCOL_OPT flag for this test, it may have been set on a |
| // previous call (making this a redundant, or incorrect, request to ustat |
| // an individual column name). startColumn will always be NULL if this fn |
| // is called to add all the columns for a table. |
| if (!startColumn) |
| { |
| HS_ASSERT(hs_globals->optFlags & EVERYCOL_OPT); |
| start = 0; |
| upto = hs_globals->objDef->getNumCols() - 1; |
| } |
| else |
| { |
| start = hs_globals->objDef->getColNum(startColumn); |
| if (start < 0) |
| { |
| retcode = -1; |
| HSHandleError(retcode); |
| } |
| upto = hs_globals->objDef->getColNum(endColumn); |
| if (upto < 0) |
| { |
| retcode = -1; |
| HSHandleError(retcode); |
| } |
| |
| if (start > upto) |
| { |
| Lng32 tmp = upto; |
| upto = start; |
| start = tmp; |
| } |
| } |
| |
| for (colNumber = start; colNumber <= upto; colNumber++) |
| { |
| if (ColumnExists(colNumber)) // avoid duplicates |
| { |
| colName = hs_globals->objDef->getColName(colNumber); |
| if (LM->LogNeeded()) |
| { |
| sprintf(LM->msg, "\t\t****Duplicate Column group (%s) has been ignored", colName.data()); |
| LM->Log(LM->msg); |
| } |
| } |
| else if (!DFS2REC::isLOB(hs_globals->objDef->getColInfo(colNumber).datatype)) |
| { |
| // add to single-column group list |
| retcode = AddSingleColumn(colNumber); |
| } |
| // else it's a LOB column; silently exclude it (the column was only |
| // implicitly referenced) |
| } |
| |
| if (!startColumn && // ON EVERY COLUMN causes key groups to be added as well |
| !HSGlobalsClass::isHiveCat(hs_globals->objDef->getCatName())) // No ustat on keys yet for hive tables |
| { |
| retcode = AddKeyGroups(); |
| HSHandleError(retcode); |
| } |
| |
| hs_globals->parserError = HSGlobalsClass::ERROR_SYNTAX; |
| return 0; |
| } |
| |
| |
| // Get the current set of histogrammed column groups for the table, then add |
| // each of these to either the singleGroup or multiGroup list for histograms |
| // to be created. |
| // |
| Lng32 AddExistingColumns() |
| { |
| HSGlobalsClass *hs_globals = GetHSContext(); |
| HSColGroupStruct *group, *groupList; |
| Lng32 retcode = 0; |
| |
| // Introduce new scope for instance of HSTranController; its destructor |
| // terminates the transaction. We need a transaction for this because |
| // groupListFromTable() executes a query to get the current histograms, |
| // and we want any acquired locks to be released before we proceed. An |
| // implicit transaction started by the query would remain open and retain |
| // the locks. |
| { |
| HSTranController TC("GET GROUP LIST FOR EXISTING", &retcode); |
| HSHandleError(retcode); |
| retcode = hs_globals->groupListFromTable(groupList, |
| TRUE); // pass 'TRUE' to skip empty histograms. |
| HSHandleError(retcode); |
| } |
| |
| while (groupList != NULL) |
| { |
| // Detach the current first node from the list. It will be added to a |
| // different list (hs_globals->singleGroup or hs_globals->multiGroup). |
| // Don't change group->next until groupList points to something else. |
| // |
| group = groupList; |
| groupList = groupList->next; |
| if (groupList) |
| groupList->prev = NULL; |
| group->next = NULL; |
| |
| // Set oldHistid to 0 so hist id will be reread during FlushStatistics. |
| // Reading it in the same transaction that writes the histograms keeps |
| // 2 or more concurrent Update Stats statements from coming up with the |
| // same new hist id. |
| group->oldHistid = 0; |
| |
| // Look through the columns in this group for any oversized columns. |
| for (UInt32 i = 0; i < group->colSet.entries(); i++) |
| { |
| bool isOverSized = DFS2REC::isAnyCharacter(group->colSet[i].datatype) && |
| (group->colSet[i].length > hs_globals->maxCharColumnLengthInBytes); |
| if (isOverSized) |
| { |
| hs_globals->hasOversizedColumns = TRUE; |
| } |
| } |
| |
| hs_globals->addGroup(group); |
| } |
| return retcode; |
| } |