blob: 4e669171e77140186615a29b45d7f4cd47426beb [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hive.ql.plan;
import org.apache.hadoop.hive.common.type.DataTypePhysicalVariation;
import org.apache.hadoop.hive.ql.exec.TableScanOperator;
import org.apache.hadoop.hive.ql.io.AcidUtils;
import org.apache.hadoop.hive.ql.metadata.HiveStorageHandler;
import org.apache.hadoop.hive.ql.metadata.Table;
import org.apache.hadoop.hive.ql.metadata.VirtualColumn;
import org.apache.hadoop.hive.ql.optimizer.signature.Signature;
import org.apache.hadoop.hive.ql.parse.TableSample;
import org.apache.hadoop.hive.ql.plan.BaseWork.BaseExplainVectorization;
import org.apache.hadoop.hive.ql.plan.Explain.Level;
import org.apache.hadoop.hive.ql.plan.Explain.Vectorization;
import org.apache.hadoop.hive.serde.serdeConstants;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
import org.apache.hadoop.mapred.TextInputFormat;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.BitSet;
import java.util.List;
import java.util.Map;
import java.util.Objects;
/**
* Table Scan Descriptor Currently, data is only read from a base source as part
* of map-reduce framework. So, nothing is stored in the descriptor. But, more
* things will be added here as table scan is invoked as part of local work.
**/
@Explain(displayName = "TableScan", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED })
public class TableScanDesc extends AbstractOperatorDesc implements IStatsGatherDesc {
private static final long serialVersionUID = 1L;
private String alias;
private List<VirtualColumn> virtualCols;
private String statsAggKeyPrefix; // stats publishing/aggregating key prefix
/**
* A list of the partition columns of the table.
* Set by the semantic analyzer only in case of the analyze command.
*/
private List<String> partColumns;
/**
* Used for split sampling (row count per split)
* For example,
* select count(1) from ss_src2 tablesample (10 ROWS) s;
* provides first 10 rows from all input splits
*/
private int rowLimit = -1;
/**
* A boolean variable set to true by the semantic analyzer only in case of the analyze command.
*
*/
private boolean gatherStats;
private boolean statsReliable;
private String tmpStatsDir;
private ExprNodeGenericFuncDesc filterExpr;
private Serializable filterObject;
private String serializedFilterExpr;
private String serializedFilterObject;
// Both neededColumnIDs and neededColumns should never be null.
// When neededColumnIDs is an empty list,
// it means no needed column (e.g. we do not need any column to evaluate
// SELECT count(*) FROM t).
private List<Integer> neededColumnIDs;
private List<String> neededColumns;
private List<String> neededNestedColumnPaths;
// all column names referenced including virtual columns. used in ColumnAccessAnalyzer
private List<String> referencedColumns;
public static final String FILTER_EXPR_CONF_STR =
"hive.io.filter.expr.serialized";
public static final String FILTER_TEXT_CONF_STR =
"hive.io.filter.text";
public static final String FILTER_OBJECT_CONF_STR =
"hive.io.filter.object";
public static final String PARTITION_PRUNING_FILTER =
"hive.io.pruning.filter";
public static final String AS_OF_TIMESTAMP =
"hive.io.as.of.timestamp";
public static final String AS_OF_VERSION =
"hive.io.as.of.version";
public static final String FROM_VERSION =
"hive.io.version.from";
public static final String BRANCH_NAME =
"hive.io.branch.name";
// input file name (big) to bucket number
private Map<String, Integer> bucketFileNameMapping;
private String dbName = null;
private String tableName = null;
private boolean isMetadataOnly = false;
private boolean isTranscationalTable;
private boolean vectorized;
private AcidUtils.AcidOperationalProperties acidOperationalProperties = null;
private TableScanOperator.ProbeDecodeContext probeDecodeContext = null;
private TableSample tableSample;
private Table tableMetadata;
private BitSet includedBuckets;
private int numBuckets = -1;
private String asOfVersion = null;
private String versionIntervalFrom = null;
private String asOfTimestamp = null;
private String branchName = null;
public TableScanDesc() {
this(null, null);
}
@SuppressWarnings("nls")
public TableScanDesc(Table tblMetadata) {
this(null, tblMetadata);
}
public TableScanDesc(final String alias, Table tblMetadata) {
this(alias, null, tblMetadata, null);
}
public TableScanDesc(final String alias, List<VirtualColumn> vcs, Table tblMetadata) {
this(alias, vcs, tblMetadata, null);
}
public TableScanDesc(final String alias, List<VirtualColumn> vcs, Table tblMetadata,
TableScanOperator.ProbeDecodeContext probeDecodeContext) {
this.alias = alias;
this.virtualCols = vcs;
this.tableMetadata = tblMetadata;
if (tblMetadata != null) {
dbName = tblMetadata.getDbName();
tableName = tblMetadata.getTableName();
numBuckets = tblMetadata.getNumBuckets();
asOfTimestamp = tblMetadata.getAsOfTimestamp();
asOfVersion = tblMetadata.getAsOfVersion();
versionIntervalFrom = tblMetadata.getVersionIntervalFrom();
branchName = tblMetadata.getBranchName();
}
isTranscationalTable = AcidUtils.isTransactionalTable(this.tableMetadata);
if (isTranscationalTable) {
acidOperationalProperties = AcidUtils.getAcidOperationalProperties(this.tableMetadata);
}
this.probeDecodeContext = probeDecodeContext;
}
@Override
public Object clone() {
List<VirtualColumn> vcs = new ArrayList<VirtualColumn>(getVirtualCols());
return new TableScanDesc(getAlias(), vcs, this.tableMetadata, this.probeDecodeContext);
}
@Explain(displayName = "alias")
public String getAlias() {
return alias;
}
@Signature
public String getPredicateString() {
if (filterExpr == null) {
return null;
}
return PlanUtils.getExprListString(Arrays.asList(filterExpr));
}
@Explain(displayName = "table", jsonOnly = true)
public String getTableName() {
return this.tableName;
}
@Explain(displayName = "database", jsonOnly = true)
public String getDatabaseName() {
return this.dbName;
}
@Explain(displayName = "columns", jsonOnly = true)
public List<String> getColumnNamesForExplain() {
return this.neededColumns;
}
@Explain(displayName = "isTempTable", jsonOnly = true)
public boolean isTemporary() {
return tableMetadata.isTemporary();
}
@Explain(explainLevels = { Level.USER })
public String getTbl() {
StringBuilder sb = new StringBuilder();
sb.append(this.tableMetadata.getCompleteName());
sb.append("," + alias);
if (AcidUtils.isFullAcidTable(tableMetadata)) {
sb.append(", ACID table");
} else if (isTranscationalTable()) {
sb.append(", transactional table");
}
sb.append(",Tbl:");
sb.append(this.statistics.getBasicStatsState());
sb.append(",Col:");
sb.append(this.statistics.getColumnStatsState());
return sb.toString();
}
public boolean isTranscationalTable() {
return isTranscationalTable;
}
public AcidUtils.AcidOperationalProperties getAcidOperationalProperties() {
return acidOperationalProperties;
}
@Explain(displayName = "Output", explainLevels = { Level.USER })
public List<String> getOutputColumnNames() {
return this.neededColumns;
}
@Explain(displayName = "filterExpr")
public String getFilterExprString() {
if (filterExpr == null) {
return null;
}
return PlanUtils.getExprListString(Arrays.asList(filterExpr));
}
// @Signature // XXX
public ExprNodeGenericFuncDesc getFilterExpr() {
return filterExpr;
}
public void setFilterExpr(ExprNodeGenericFuncDesc filterExpr) {
this.filterExpr = filterExpr;
}
@Explain(displayName = "probeDecodeDetails", explainLevels = { Level.DEFAULT, Level.EXTENDED })
public String getProbeDecodeString() {
if (probeDecodeContext == null) {
return null;
}
return probeDecodeContext.toString();
}
public void setProbeDecodeContext(TableScanOperator.ProbeDecodeContext probeDecodeContext) {
this.probeDecodeContext = probeDecodeContext;
}
public Serializable getFilterObject() {
return filterObject;
}
public void setFilterObject(Serializable filterObject) {
this.filterObject = filterObject;
}
public void setNeededColumnIDs(List<Integer> neededColumnIDs) {
this.neededColumnIDs = neededColumnIDs;
}
public List<Integer> getNeededColumnIDs() {
return neededColumnIDs;
}
public List<String> getNeededNestedColumnPaths() {
return neededNestedColumnPaths;
}
public void setNeededNestedColumnPaths(List<String> neededNestedColumnPaths) {
this.neededNestedColumnPaths = neededNestedColumnPaths;
}
public void setNeededColumns(List<String> neededColumns) {
this.neededColumns = neededColumns;
}
public List<String> getNeededColumns() {
return neededColumns;
}
@Explain(displayName = "Pruned Column Paths")
public List<String> getPrunedColumnPaths() {
List<String> result = new ArrayList<>();
for (String p : neededNestedColumnPaths) {
if (p.indexOf('.') >= 0) {
result.add(p);
}
}
return result;
}
public void setReferencedColumns(List<String> referencedColumns) {
this.referencedColumns = referencedColumns;
}
public List<String> getReferencedColumns() {
return referencedColumns;
}
public void setAlias(String alias) {
this.alias = alias;
}
public void setPartColumns (List<String> partColumns) {
this.partColumns = partColumns;
}
public List<String> getPartColumns () {
return partColumns;
}
public void setGatherStats(boolean gatherStats) {
this.gatherStats = gatherStats;
}
@Override
@Explain(displayName = "GatherStats", explainLevels = { Level.EXTENDED })
@Signature
public boolean isGatherStats() {
return gatherStats;
}
@Override
public String getTmpStatsDir() {
return tmpStatsDir;
}
public void setTmpStatsDir(String tmpStatsDir) {
this.tmpStatsDir = tmpStatsDir;
}
public List<VirtualColumn> getVirtualCols() {
return virtualCols;
}
public void setVirtualCols(List<VirtualColumn> virtualCols) {
this.virtualCols = virtualCols;
}
public void addVirtualCols(List<VirtualColumn> virtualCols) {
this.virtualCols.addAll(virtualCols);
}
public boolean hasVirtualCols() {
return virtualCols != null && !virtualCols.isEmpty();
}
public void setStatsAggPrefix(String k) {
statsAggKeyPrefix = k;
}
@Override
@Explain(displayName = "Statistics Aggregation Key Prefix", explainLevels = { Level.EXTENDED })
public String getStatsAggPrefix() {
return statsAggKeyPrefix;
}
public boolean isStatsReliable() {
return statsReliable;
}
public void setStatsReliable(boolean statsReliable) {
this.statsReliable = statsReliable;
}
public void setRowLimit(int rowLimit) {
this.rowLimit = rowLimit;
}
@Signature
public int getRowLimit() {
return rowLimit;
}
@Explain(displayName = "Row Limit Per Split")
public Integer getRowLimitExplain() {
return rowLimit >= 0 ? rowLimit : null;
}
public Map<String, Integer> getBucketFileNameMapping() {
return bucketFileNameMapping;
}
public void setBucketFileNameMapping(Map<String, Integer> bucketFileNameMapping) {
this.bucketFileNameMapping = bucketFileNameMapping;
}
public void setIsMetadataOnly(boolean metadata_only) {
isMetadataOnly = metadata_only;
}
public boolean getIsMetadataOnly() {
return isMetadataOnly;
}
@Signature
public String getQualifiedTable() {
return dbName + "." + tableName;
}
public Table getTableMetadata() {
return tableMetadata;
}
public void setTableMetadata(Table tableMetadata) {
this.tableMetadata = tableMetadata;
}
public TableSample getTableSample() {
return tableSample;
}
public void setTableSample(TableSample tableSample) {
this.tableSample = tableSample;
}
public String getSerializedFilterExpr() {
return serializedFilterExpr;
}
public void setSerializedFilterExpr(String serializedFilterExpr) {
this.serializedFilterExpr = serializedFilterExpr;
}
public String getSerializedFilterObject() {
return serializedFilterObject;
}
public void setSerializedFilterObject(String serializedFilterObject) {
this.serializedFilterObject = serializedFilterObject;
}
public void setIncludedBuckets(BitSet bitset) {
this.includedBuckets = bitset;
}
public BitSet getIncludedBuckets() {
return this.includedBuckets;
}
@Explain(displayName = "buckets included", explainLevels = { Level.EXTENDED })
public String getIncludedBucketExplain() {
if (this.includedBuckets == null) {
return null;
}
StringBuilder sb = new StringBuilder();
sb.append("[");
for (int i = 0; i < this.includedBuckets.size(); i++) {
if (this.includedBuckets.get(i)) {
sb.append(i);
sb.append(',');
}
}
sb.append(String.format("] of %d", numBuckets));
return sb.toString();
}
public int getNumBuckets() {
return numBuckets;
}
public void setNumBuckets(int numBuckets) {
this.numBuckets = numBuckets;
}
public boolean isNeedSkipHeaderFooters() {
boolean rtn = false;
if (tableMetadata != null && tableMetadata.getTTable() != null
&& TextInputFormat.class
.isAssignableFrom(tableMetadata.getInputFormatClass())) {
Map<String, String> params = tableMetadata.getTTable().getParameters();
if (params != null) {
String skipHVal = params.get(serdeConstants.HEADER_COUNT);
int hcount = skipHVal == null? 0 : Integer.parseInt(skipHVal);
String skipFVal = params.get(serdeConstants.FOOTER_COUNT);
int fcount = skipFVal == null? 0 : Integer.parseInt(skipFVal);
rtn = (hcount != 0 || fcount !=0 );
}
}
return rtn;
}
@Override public Map<String, String> getOpProps() {
return opProps;
}
@Explain(displayName = "properties", explainLevels = { Level.DEFAULT, Level.USER, Level.EXTENDED })
public Map<String, String> getOpPropsWithStorageHandlerProps() {
HiveStorageHandler storageHandler = tableMetadata.getStorageHandler();
return storageHandler == null
? opProps
: storageHandler.getOperatorDescProperties(this, opProps);
}
@Explain(displayName = "As of version")
public String getAsOfVersion() {
return asOfVersion;
}
@Explain(displayName = "Version interval from")
public String getVersionIntervalFrom() {
return versionIntervalFrom;
}
@Explain(displayName = "As of timestamp")
public String getAsOfTimestamp() {
return asOfTimestamp;
}
@Explain(displayName = "branch name")
public String getBranchName() {
return branchName;
}
public class TableScanOperatorExplainVectorization extends OperatorExplainVectorization {
private final TableScanDesc tableScanDesc;
private final VectorTableScanDesc vectorTableScanDesc;
public TableScanOperatorExplainVectorization(TableScanDesc tableScanDesc,
VectorTableScanDesc vectorTableScanDesc) {
// Native vectorization supported.
super(vectorTableScanDesc, true);
this.tableScanDesc = tableScanDesc;
this.vectorTableScanDesc = vectorTableScanDesc;
}
@Explain(vectorization = Vectorization.DETAIL, displayName = "vectorizationSchemaColumns", explainLevels = { Level.DEFAULT, Level.EXTENDED })
public String getSchemaColumns() {
String[] projectedColumnNames = vectorTableScanDesc.getProjectedColumnNames();
TypeInfo[] projectedColumnTypeInfos = vectorTableScanDesc.getProjectedColumnTypeInfos();
// We currently include all data, partition, and any vectorization available
// virtual columns in the VRB.
final int size = projectedColumnNames.length;
int[] projectionColumns = new int[size];
for (int i = 0; i < size; i++) {
projectionColumns[i] = i;
}
DataTypePhysicalVariation[] projectedColumnDataTypePhysicalVariations =
vectorTableScanDesc.getProjectedColumnDataTypePhysicalVariations();
return BaseExplainVectorization.getColumnAndTypes(
projectionColumns,
projectedColumnNames,
projectedColumnTypeInfos,
projectedColumnDataTypePhysicalVariations).toString();
}
}
@Explain(vectorization = Vectorization.OPERATOR, displayName = "TableScan Vectorization", explainLevels = { Level.DEFAULT, Level.EXTENDED })
public TableScanOperatorExplainVectorization getTableScanVectorization() {
VectorTableScanDesc vectorTableScanDesc = (VectorTableScanDesc) getVectorDesc();
if (vectorTableScanDesc == null) {
return null;
}
return new TableScanOperatorExplainVectorization(this, vectorTableScanDesc);
}
/*
* This TableScanDesc flag is strictly set by the Vectorizer class for vectorized MapWork
* vertices.
*/
public void setVectorized(boolean vectorized) {
this.vectorized = vectorized;
}
public boolean isVectorized() {
return vectorized;
}
@Override
public boolean isSame(OperatorDesc other) {
if (getClass().getName().equals(other.getClass().getName())) {
TableScanDesc otherDesc = (TableScanDesc) other;
return Objects.equals(getQualifiedTable(), otherDesc.getQualifiedTable()) &&
ExprNodeDescUtils.isSame(getFilterExpr(), otherDesc.getFilterExpr()) &&
getRowLimit() == otherDesc.getRowLimit() &&
isGatherStats() == otherDesc.isGatherStats();
}
return false;
}
public boolean isFullAcidTable() {
return isTranscationalTable() && !getAcidOperationalProperties().isInsertOnly();
}
}