blob: c39dbc860e6ed58070fd73928e269ed0b06e3347 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hive.ql.ddl.table.info.desc;
import java.io.DataOutputStream;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.commons.lang3.StringUtils;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.common.StatsSetupConst;
import org.apache.hadoop.hive.common.TableName;
import org.apache.hadoop.hive.common.type.HiveDecimal;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.metastore.HMSHandler;
import org.apache.hadoop.hive.metastore.StatObjectConverter;
import org.apache.hadoop.hive.metastore.TableType;
import org.apache.hadoop.hive.metastore.api.AggrStats;
import org.apache.hadoop.hive.metastore.api.ColumnStatisticsData;
import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj;
import org.apache.hadoop.hive.metastore.api.FieldSchema;
import org.apache.hadoop.hive.metastore.api.MetaException;
import org.apache.hadoop.hive.metastore.conf.MetastoreConf;
import org.apache.hadoop.hive.ql.ddl.DDLOperationContext;
import org.apache.hadoop.hive.ql.ddl.ShowUtils;
import org.apache.hadoop.hive.ql.ddl.table.info.desc.formatter.DescTableFormatter;
import org.apache.hadoop.hive.ql.exec.ColumnInfo;
import org.apache.hadoop.hive.ql.ErrorMsg;
import org.apache.hadoop.hive.ql.ddl.DDLOperation;
import org.apache.hadoop.hive.ql.metadata.Hive;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.metadata.Partition;
import org.apache.hadoop.hive.ql.metadata.PartitionIterable;
import org.apache.hadoop.hive.ql.metadata.Table;
import org.apache.hadoop.hive.ql.metadata.TableConstraintsInfo;
import org.apache.hadoop.hive.ql.parse.HiveTableName;
import org.apache.hadoop.hive.ql.parse.SemanticException;
import org.apache.hadoop.hive.ql.plan.ColStatistics;
import org.apache.hadoop.hive.ql.session.SessionState;
import org.apache.hadoop.hive.ql.stats.StatsUtils;
import org.apache.hadoop.hive.serde.serdeConstants;
import org.apache.hadoop.hive.serde2.Deserializer;
import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
import com.google.common.collect.Lists;
/**
* Operation process of describing a table.
*/
public class DescTableOperation extends DDLOperation<DescTableDesc> {
public DescTableOperation(DDLOperationContext context, DescTableDesc desc) {
super(context, desc);
}
@Override
public int execute() throws Exception {
Table table = getTable();
Partition part = getPartition(table);
final String dbTableName = desc.getDbTableName();
try (DataOutputStream outStream = ShowUtils.getOutputStream(new Path(desc.getResFile()), context)) {
LOG.debug("DDLTask: got data for {}", dbTableName);
List<FieldSchema> cols = new ArrayList<>();
List<ColumnStatisticsObj> colStats = new ArrayList<>();
Deserializer deserializer = getDeserializer(table);
if (desc.getColumnPath() == null) {
getColumnsNoColumnPath(table, part, cols);
} else {
if (desc.isFormatted()) {
getColumnDataColPathSpecified(table, part, cols, colStats, deserializer);
} else {
cols.addAll(Hive.getFieldsFromDeserializer(desc.getColumnPath(), deserializer, context.getConf()));
}
}
fixDecimalColumnTypeName(cols);
setConstraintsAndStorageHandlerInfo(table);
handleMaterializedView(table);
// In case the query is served by HiveServer2, don't pad it with spaces,
// as HiveServer2 output is consumed by JDBC/ODBC clients.
boolean isOutputPadded = !SessionState.get().isHiveServerQuery();
DescTableFormatter formatter = DescTableFormatter.getFormatter(context.getConf());
formatter.describeTable(context.getConf(), outStream, desc.getColumnPath(), dbTableName, table, part, cols,
desc.isFormatted(), desc.isExtended(), isOutputPadded, colStats);
LOG.debug("DDLTask: written data for {}", dbTableName);
} catch (SQLException e) {
throw new HiveException(e, ErrorMsg.GENERIC_ERROR, dbTableName);
}
return 0;
}
private Table getTable() throws HiveException {
Table table = context.getDb().getTable(desc.getTableName().getDb(), desc.getTableName().getTable(),
desc.getTableName().getTableMetaRef(), false, false, false);
if (table == null) {
throw new HiveException(ErrorMsg.INVALID_TABLE, desc.getDbTableName());
}
return table;
}
private Partition getPartition(Table table) throws HiveException {
Partition part = null;
if (desc.getPartitionSpec() != null) {
part = context.getDb().getPartition(table, desc.getPartitionSpec(), false);
if (part == null) {
throw new HiveException(ErrorMsg.INVALID_PARTITION,
StringUtils.join(desc.getPartitionSpec().keySet(), ','), desc.getDbTableName());
}
}
return part;
}
private Deserializer getDeserializer(Table table) throws SQLException {
Deserializer deserializer = table.getDeserializer(true);
return deserializer;
}
private void getColumnsNoColumnPath(Table table, Partition partition, List<FieldSchema> cols) throws HiveException {
cols.addAll(partition == null || table.getTableType() == TableType.VIRTUAL_VIEW ?
table.getCols() : partition.getCols());
if (!desc.isFormatted()) {
cols.addAll(table.getPartCols());
}
// Fetch partition statistics only for describe extended or formatted.
if (desc.isExtended() || desc.isFormatted()) {
boolean disablePartitionStats = HiveConf.getBoolVar(context.getConf(), HiveConf.ConfVars.HIVE_DESCRIBE_PARTITIONED_TABLE_IGNORE_STATS);
if (table.isPartitioned() && partition == null && !disablePartitionStats) {
// No partition specified for partitioned table, lets fetch all.
Map<String, String> tblProps = table.getParameters() == null ?
new HashMap<String, String>() : table.getParameters();
Map<String, Long> valueMap = new HashMap<>();
Map<String, Boolean> stateMap = new HashMap<>();
for (String stat : StatsSetupConst.SUPPORTED_STATS) {
valueMap.put(stat, 0L);
stateMap.put(stat, true);
}
PartitionIterable partitions = new PartitionIterable(context.getDb(), table, null,
MetastoreConf.getIntVar(context.getConf(), MetastoreConf.ConfVars.BATCH_RETRIEVE_MAX));
int numParts = 0;
for (Partition p : partitions) {
Map<String, String> partitionProps = p.getParameters();
Boolean state = StatsSetupConst.areBasicStatsUptoDate(partitionProps);
for (String stat : StatsSetupConst.SUPPORTED_STATS) {
stateMap.put(stat, stateMap.get(stat) && state);
if (partitionProps != null && partitionProps.get(stat) != null) {
valueMap.put(stat, valueMap.get(stat) + Long.parseLong(partitionProps.get(stat)));
}
}
numParts++;
}
tblProps.put(StatsSetupConst.NUM_PARTITIONS, Integer.toString(numParts));
for (String stat : StatsSetupConst.SUPPORTED_STATS) {
StatsSetupConst.setBasicStatsState(tblProps, Boolean.toString(stateMap.get(stat)));
tblProps.put(stat, valueMap.get(stat).toString());
}
table.setParameters(tblProps);
}
}
}
private void getColumnDataColPathSpecified(Table table, Partition part, List<FieldSchema> cols,
List<ColumnStatisticsObj> colStats, Deserializer deserializer)
throws SemanticException, HiveException, MetaException {
// when column name is specified in describe table DDL, colPath will be db_name.table_name.column_name
String colName = desc.getColumnPath().split("\\.")[2];
List<String> colNames = Lists.newArrayList(colName.toLowerCase());
TableName tableName = HiveTableName.of(desc.getDbTableName());
if (null == part) {
if (table.isPartitioned()) {
Map<String, String> tableProps = table.getParameters() == null ?
new HashMap<String, String>() : table.getParameters();
if (table.isPartitionKey(colNames.get(0))) {
getColumnDataForPartitionKeyColumn(table, cols, colStats, colNames, tableProps);
} else {
getColumnsForNotPartitionKeyColumn(cols, colStats, deserializer, colNames, tableName, tableProps);
}
table.setParameters(tableProps);
} else {
cols.addAll(Hive.getFieldsFromDeserializer(desc.getColumnPath(), deserializer, context.getConf()));
colStats.addAll(
context.getDb().getTableColumnStatistics(tableName.getDb().toLowerCase(),
tableName.getTable().toLowerCase(), colNames, false));
}
} else {
List<String> partitions = new ArrayList<String>();
// The partition name is converted to lowercase before generating the stats. So we should use the same
// lower case name to get the stats.
String partName = HMSHandler.lowerCaseConvertPartName(part.getName());
partitions.add(partName);
cols.addAll(Hive.getFieldsFromDeserializer(desc.getColumnPath(), deserializer, context.getConf()));
Map<String, List<ColumnStatisticsObj>> partitionColumnStatistics = context.getDb().getPartitionColumnStatistics(
tableName.getDb().toLowerCase(), tableName.getTable().toLowerCase(), partitions, colNames, false);
List<ColumnStatisticsObj> partitionColStat = partitionColumnStatistics.get(partName);
if (partitionColStat != null) {
colStats.addAll(partitionColStat);
}
}
}
private void getColumnDataForPartitionKeyColumn(Table table, List<FieldSchema> cols,
List<ColumnStatisticsObj> colStats, List<String> colNames, Map<String, String> tableProps)
throws HiveException, MetaException {
FieldSchema partCol = table.getPartColByName(colNames.get(0));
cols.add(partCol);
PartitionIterable parts = new PartitionIterable(context.getDb(), table, null,
MetastoreConf.getIntVar(context.getConf(), MetastoreConf.ConfVars.BATCH_RETRIEVE_MAX));
ColumnInfo ci = new ColumnInfo(partCol.getName(),
TypeInfoUtils.getTypeInfoFromTypeString(partCol.getType()), null, false);
ColStatistics cs = StatsUtils.getColStatsForPartCol(ci, parts, context.getConf());
ColumnStatisticsData data = new ColumnStatisticsData();
ColStatistics.Range r = cs.getRange();
StatObjectConverter.fillColumnStatisticsData(partCol.getType(), data, r == null ? null : r.minValue,
r == null ? null : r.maxValue, r == null ? null : r.minValue, r == null ? null : r.maxValue,
r == null ? null : r.minValue.toString(), r == null ? null : r.maxValue.toString(),
cs.getNumNulls(), cs.getCountDistint(), null, null, cs.getAvgColLen(),
cs.getAvgColLen(), cs.getNumTrues(), cs.getNumFalses());
ColumnStatisticsObj cso = new ColumnStatisticsObj(partCol.getName(), partCol.getType(), data);
colStats.add(cso);
StatsSetupConst.setColumnStatsState(tableProps, colNames);
}
private void getColumnsForNotPartitionKeyColumn(List<FieldSchema> cols, List<ColumnStatisticsObj> colStats,
Deserializer deserializer, List<String> colNames, TableName tableName, Map<String, String> tableProps)
throws HiveException {
cols.addAll(Hive.getFieldsFromDeserializer(desc.getColumnPath(), deserializer, context.getConf()));
List<String> parts = context.getDb().getPartitionNames(tableName.getDb().toLowerCase(),
tableName.getTable().toLowerCase(), (short) -1);
AggrStats aggrStats = context.getDb().getAggrColStatsFor(
tableName.getDb().toLowerCase(), tableName.getTable().toLowerCase(), colNames, parts, false);
colStats.addAll(aggrStats.getColStats());
if (parts.size() == aggrStats.getPartsFound()) {
StatsSetupConst.setColumnStatsState(tableProps, colNames);
} else {
StatsSetupConst.removeColumnStatsState(tableProps, colNames);
}
}
/**
* Fix the type name of a column of type decimal w/o precision/scale specified. This makes
* the describe table show "decimal(10,0)" instead of "decimal" even if the type stored
* in metastore is "decimal", which is possible with previous hive.
*
* @param cols columns that to be fixed as such
*/
private static void fixDecimalColumnTypeName(List<FieldSchema> cols) {
for (FieldSchema col : cols) {
if (serdeConstants.DECIMAL_TYPE_NAME.equals(col.getType())) {
col.setType(DecimalTypeInfo.getQualifiedName(HiveDecimal.USER_DEFAULT_PRECISION,
HiveDecimal.USER_DEFAULT_SCALE));
}
}
}
private void setConstraintsAndStorageHandlerInfo(Table table) throws HiveException {
if (desc.isExtended() || desc.isFormatted()) {
TableConstraintsInfo tableConstraintsInfo = context.getDb()
.getTableConstraints(table.getDbName(), table.getTableName(), false, false,
table.getTTable() != null ? table.getTTable().getId() : -1);
table.setTableConstraintsInfo(tableConstraintsInfo);
table.setStorageHandlerInfo(context.getDb().getStorageHandlerInfo(table));
}
}
private void handleMaterializedView(Table table) throws HiveException {
if (table.isMaterializedView()) {
table.setOutdatedForRewriting(context.getDb().isOutdatedMaterializedView(
table,
table.getMVMetadata().getSourceTableNames(),
false,
SessionState.get().getTxnMgr()));
}
}
}