| /* |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| package org.apache.hadoop.hive.ql.parse; |
| |
| import static java.util.Objects.nonNull; |
| import static org.apache.commons.lang3.StringUtils.isNotBlank; |
| import static org.apache.hadoop.hive.common.AcidConstants.SOFT_DELETE_TABLE; |
| import static org.apache.hadoop.hive.conf.HiveConf.ConfVars.DYNAMICPARTITIONCONVERT; |
| import static org.apache.hadoop.hive.conf.HiveConf.ConfVars.HIVEARCHIVEENABLED; |
| import static org.apache.hadoop.hive.conf.HiveConf.ConfVars.HIVE_DEFAULT_STORAGE_HANDLER; |
| import static org.apache.hadoop.hive.conf.HiveConf.ConfVars.HIVESTATSDBCLASS; |
| import static org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_LOCATION; |
| import static org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_STORAGE; |
| import static org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.TABLE_IS_CTAS; |
| import static org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.DEFAULT_TABLE_TYPE; |
| import static org.apache.hadoop.hive.ql.ddl.view.create.AbstractCreateViewAnalyzer.validateTablesUsed; |
| import static org.apache.hadoop.hive.ql.optimizer.calcite.translator.ASTConverter.NON_FK_FILTERED; |
| |
| import java.io.FileNotFoundException; |
| import java.io.IOException; |
| import java.security.AccessControlException; |
| import java.util.ArrayDeque; |
| import java.util.ArrayList; |
| import java.util.Arrays; |
| import java.util.Collection; |
| import java.util.Collections; |
| import java.util.Deque; |
| import java.util.HashMap; |
| import java.util.HashSet; |
| import java.util.Iterator; |
| import java.util.LinkedHashMap; |
| import java.util.LinkedList; |
| import java.util.List; |
| import java.util.Map; |
| import java.util.Map.Entry; |
| import java.util.Optional; |
| import java.util.Queue; |
| import java.util.Set; |
| import java.util.SortedMap; |
| import java.util.SortedSet; |
| import java.util.TreeMap; |
| import java.util.TreeSet; |
| import java.util.UUID; |
| import java.util.concurrent.atomic.AtomicBoolean; |
| import java.util.function.Supplier; |
| import java.util.regex.Pattern; |
| import java.util.regex.PatternSyntaxException; |
| import java.util.stream.Collectors; |
| |
| import org.antlr.runtime.ClassicToken; |
| import org.antlr.runtime.CommonToken; |
| import org.antlr.runtime.Token; |
| import org.antlr.runtime.TokenRewriteStream; |
| import org.antlr.runtime.tree.Tree; |
| import org.antlr.runtime.tree.TreeVisitor; |
| import org.antlr.runtime.tree.TreeVisitorAction; |
| import org.apache.commons.collections.CollectionUtils; |
| import org.apache.commons.lang3.StringUtils; |
| import org.apache.commons.lang3.tuple.Pair; |
| import org.apache.hadoop.fs.FSDataOutputStream; |
| import org.apache.hadoop.fs.FileStatus; |
| import org.apache.hadoop.fs.FileSystem; |
| import org.apache.hadoop.fs.Path; |
| import org.apache.hadoop.fs.permission.FsAction; |
| import org.apache.hadoop.hdfs.DFSUtilClient; |
| import org.apache.hadoop.hive.common.FileUtils; |
| import org.apache.hadoop.hive.common.StatsSetupConst; |
| import org.apache.hadoop.hive.common.StatsSetupConst.StatDB; |
| import org.apache.hadoop.hive.common.StringInternUtils; |
| import org.apache.hadoop.hive.common.TableName; |
| import org.apache.hadoop.hive.common.ValidTxnList; |
| import org.apache.hadoop.hive.common.ValidTxnWriteIdList; |
| import org.apache.hadoop.hive.common.metrics.common.MetricsConstant; |
| import org.apache.hadoop.hive.conf.Constants; |
| import org.apache.hadoop.hive.conf.HiveConf; |
| import org.apache.hadoop.hive.conf.HiveConf.ConfVars; |
| import org.apache.hadoop.hive.conf.HiveConf.ResultFileFormat; |
| import org.apache.hadoop.hive.conf.HiveConf.StrictChecks; |
| import org.apache.hadoop.hive.metastore.TableType; |
| import org.apache.hadoop.hive.metastore.TransactionalValidationListener; |
| import org.apache.hadoop.hive.metastore.Warehouse; |
| import org.apache.hadoop.hive.metastore.api.Database; |
| import org.apache.hadoop.hive.metastore.api.FieldSchema; |
| import org.apache.hadoop.hive.metastore.api.MetaException; |
| import org.apache.hadoop.hive.metastore.api.Order; |
| import org.apache.hadoop.hive.metastore.api.SQLCheckConstraint; |
| import org.apache.hadoop.hive.metastore.api.SQLDefaultConstraint; |
| import org.apache.hadoop.hive.metastore.api.SQLForeignKey; |
| import org.apache.hadoop.hive.metastore.api.SQLNotNullConstraint; |
| import org.apache.hadoop.hive.metastore.api.SQLPrimaryKey; |
| import org.apache.hadoop.hive.metastore.api.SQLUniqueConstraint; |
| import org.apache.hadoop.hive.metastore.api.SourceTable; |
| import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants; |
| import org.apache.hadoop.hive.metastore.conf.MetastoreConf; |
| import org.apache.hadoop.hive.metastore.utils.MetaStoreUtils; |
| import org.apache.hadoop.hive.ql.CompilationOpContext; |
| import org.apache.hadoop.hive.ql.Context; |
| import org.apache.hadoop.hive.ql.ErrorMsg; |
| import org.apache.hadoop.hive.ql.QueryProperties; |
| import org.apache.hadoop.hive.ql.QueryState; |
| import org.apache.hadoop.hive.ql.cache.results.CacheUsage; |
| import org.apache.hadoop.hive.ql.cache.results.QueryResultsCache; |
| import org.apache.hadoop.hive.ql.ddl.DDLWork; |
| import org.apache.hadoop.hive.ql.ddl.misc.hooks.InsertCommitHookDesc; |
| import org.apache.hadoop.hive.ql.ddl.table.constraint.ConstraintsUtils; |
| import org.apache.hadoop.hive.ql.ddl.table.create.CreateTableDesc; |
| import org.apache.hadoop.hive.ql.ddl.table.create.like.CreateTableLikeDesc; |
| import org.apache.hadoop.hive.ql.ddl.table.misc.preinsert.PreInsertTableDesc; |
| import org.apache.hadoop.hive.ql.ddl.table.misc.properties.AlterTableUnsetPropertiesDesc; |
| import org.apache.hadoop.hive.ql.ddl.table.storage.skewed.SkewedTableUtils; |
| import org.apache.hadoop.hive.ql.ddl.view.create.CreateMaterializedViewDesc; |
| import org.apache.hadoop.hive.ql.ddl.view.materialized.update.MaterializedViewUpdateDesc; |
| import org.apache.hadoop.hive.ql.exec.AbstractMapJoinOperator; |
| import org.apache.hadoop.hive.ql.exec.ArchiveUtils; |
| import org.apache.hadoop.hive.ql.exec.ColumnInfo; |
| import org.apache.hadoop.hive.ql.exec.Description; |
| import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluatorFactory; |
| import org.apache.hadoop.hive.ql.exec.FetchTask; |
| import org.apache.hadoop.hive.ql.exec.FileSinkOperator; |
| import org.apache.hadoop.hive.ql.exec.FilterOperator; |
| import org.apache.hadoop.hive.ql.exec.FunctionInfo; |
| import org.apache.hadoop.hive.ql.exec.FunctionRegistry; |
| import org.apache.hadoop.hive.ql.exec.FunctionUtils; |
| import org.apache.hadoop.hive.ql.exec.GroupByOperator; |
| import org.apache.hadoop.hive.ql.exec.JoinOperator; |
| import org.apache.hadoop.hive.ql.exec.LimitOperator; |
| import org.apache.hadoop.hive.ql.exec.Operator; |
| import org.apache.hadoop.hive.ql.exec.OperatorFactory; |
| import org.apache.hadoop.hive.ql.exec.RecordReader; |
| import org.apache.hadoop.hive.ql.exec.RecordWriter; |
| import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator; |
| import org.apache.hadoop.hive.ql.exec.RowSchema; |
| import org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator; |
| import org.apache.hadoop.hive.ql.exec.SelectOperator; |
| import org.apache.hadoop.hive.ql.exec.TableScanOperator; |
| import org.apache.hadoop.hive.ql.exec.Task; |
| import org.apache.hadoop.hive.ql.exec.TaskFactory; |
| import org.apache.hadoop.hive.ql.exec.UnionOperator; |
| import org.apache.hadoop.hive.ql.exec.Utilities; |
| import org.apache.hadoop.hive.ql.exec.Utilities.ReduceField; |
| import org.apache.hadoop.hive.ql.exec.WindowFunctionInfo; |
| import org.apache.hadoop.hive.ql.exec.tez.TezTask; |
| import org.apache.hadoop.hive.ql.hooks.Entity; |
| import org.apache.hadoop.hive.ql.hooks.ReadEntity; |
| import org.apache.hadoop.hive.ql.hooks.WriteEntity; |
| import org.apache.hadoop.hive.ql.hooks.WriteEntity.WriteType; |
| import org.apache.hadoop.hive.ql.io.AcidInputFormat; |
| import org.apache.hadoop.hive.ql.io.AcidOutputFormat; |
| import org.apache.hadoop.hive.ql.io.AcidUtils; |
| import org.apache.hadoop.hive.ql.io.AcidUtils.Operation; |
| import org.apache.hadoop.hive.ql.io.CombineHiveInputFormat; |
| import org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat; |
| import org.apache.hadoop.hive.ql.io.HiveOutputFormat; |
| import org.apache.hadoop.hive.ql.io.NullRowsInputFormat; |
| import org.apache.hadoop.hive.ql.io.SchemaInferenceUtils; |
| import org.apache.hadoop.hive.ql.io.arrow.ArrowColumnarBatchSerDe; |
| import org.apache.hadoop.hive.ql.lib.DefaultGraphWalker; |
| import org.apache.hadoop.hive.ql.lib.Node; |
| import org.apache.hadoop.hive.ql.lib.SemanticDispatcher; |
| import org.apache.hadoop.hive.ql.lib.SemanticGraphWalker; |
| import org.apache.hadoop.hive.ql.lockmgr.DbTxnManager; |
| import org.apache.hadoop.hive.ql.lockmgr.HiveTxnManager; |
| import org.apache.hadoop.hive.ql.lockmgr.LockException; |
| import org.apache.hadoop.hive.ql.metadata.DefaultConstraint; |
| import org.apache.hadoop.hive.ql.metadata.DummyPartition; |
| import org.apache.hadoop.hive.ql.metadata.Hive; |
| import org.apache.hadoop.hive.ql.metadata.HiveException; |
| import org.apache.hadoop.hive.ql.metadata.HiveStorageHandler; |
| import org.apache.hadoop.hive.ql.metadata.HiveUtils; |
| import org.apache.hadoop.hive.ql.metadata.InvalidTableException; |
| import org.apache.hadoop.hive.ql.metadata.Partition; |
| import org.apache.hadoop.hive.ql.metadata.SessionHiveMetaStoreClient; |
| import org.apache.hadoop.hive.ql.metadata.Table; |
| import org.apache.hadoop.hive.ql.metadata.VirtualColumn; |
| import org.apache.hadoop.hive.ql.optimizer.Optimizer; |
| import org.apache.hadoop.hive.ql.optimizer.QueryPlanPostProcessor; |
| import org.apache.hadoop.hive.ql.optimizer.Transform; |
| import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException; |
| import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException.UnsupportedFeature; |
| import org.apache.hadoop.hive.ql.optimizer.calcite.translator.ASTBuilder; |
| import org.apache.hadoop.hive.ql.optimizer.calcite.translator.HiveOpConverterPostProc; |
| import org.apache.hadoop.hive.ql.optimizer.lineage.Generator; |
| import org.apache.hadoop.hive.ql.optimizer.unionproc.UnionProcContext; |
| import org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer.TableSpec.SpecType; |
| import org.apache.hadoop.hive.ql.parse.ExplainConfiguration.AnalyzeState; |
| import org.apache.hadoop.hive.ql.parse.PTFInvocationSpec.OrderExpression; |
| import org.apache.hadoop.hive.ql.parse.PTFInvocationSpec.OrderSpec; |
| import org.apache.hadoop.hive.ql.parse.PTFInvocationSpec.PTFInputSpec; |
| import org.apache.hadoop.hive.ql.parse.PTFInvocationSpec.PTFQueryInputSpec; |
| import org.apache.hadoop.hive.ql.parse.PTFInvocationSpec.PTFQueryInputType; |
| import org.apache.hadoop.hive.ql.parse.PTFInvocationSpec.PartitionExpression; |
| import org.apache.hadoop.hive.ql.parse.PTFInvocationSpec.PartitionSpec; |
| import org.apache.hadoop.hive.ql.parse.PTFInvocationSpec.PartitionedTableFunctionSpec; |
| import org.apache.hadoop.hive.ql.parse.PTFInvocationSpec.PartitioningSpec; |
| import org.apache.hadoop.hive.ql.parse.QBSubQuery.SubQueryType; |
| import org.apache.hadoop.hive.ql.parse.SubQueryUtils.ISubQueryJoinInfo; |
| import org.apache.hadoop.hive.ql.parse.WindowingSpec.BoundarySpec; |
| import org.apache.hadoop.hive.ql.parse.WindowingSpec.Direction; |
| import org.apache.hadoop.hive.ql.parse.WindowingSpec.WindowExpressionSpec; |
| import org.apache.hadoop.hive.ql.parse.WindowingSpec.WindowFrameSpec; |
| import org.apache.hadoop.hive.ql.parse.WindowingSpec.WindowFunctionSpec; |
| import org.apache.hadoop.hive.ql.parse.WindowingSpec.WindowSpec; |
| import org.apache.hadoop.hive.ql.parse.WindowingSpec.WindowType; |
| import org.apache.hadoop.hive.ql.parse.type.ExprNodeTypeCheck; |
| import org.apache.hadoop.hive.ql.parse.type.TypeCheckCtx; |
| import org.apache.hadoop.hive.ql.parse.type.TypeCheckProcFactory; |
| import org.apache.hadoop.hive.ql.plan.AggregationDesc; |
| import org.apache.hadoop.hive.ql.plan.DynamicPartitionCtx; |
| import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; |
| import org.apache.hadoop.hive.ql.plan.ExprNodeColumnListDesc; |
| import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc; |
| import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; |
| import org.apache.hadoop.hive.ql.plan.ExprNodeDescUtils; |
| import org.apache.hadoop.hive.ql.plan.ExprNodeFieldDesc; |
| import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc; |
| import org.apache.hadoop.hive.ql.plan.FileSinkDesc; |
| import org.apache.hadoop.hive.ql.plan.FilterDesc; |
| import org.apache.hadoop.hive.ql.plan.FilterDesc.SampleDesc; |
| import org.apache.hadoop.hive.ql.plan.ForwardDesc; |
| import org.apache.hadoop.hive.ql.plan.GroupByDesc; |
| import org.apache.hadoop.hive.ql.plan.HiveOperation; |
| import org.apache.hadoop.hive.ql.plan.JoinCondDesc; |
| import org.apache.hadoop.hive.ql.plan.JoinDesc; |
| import org.apache.hadoop.hive.ql.plan.LateralViewForwardDesc; |
| import org.apache.hadoop.hive.ql.plan.LateralViewJoinDesc; |
| import org.apache.hadoop.hive.ql.plan.LimitDesc; |
| import org.apache.hadoop.hive.ql.plan.ListBucketingCtx; |
| import org.apache.hadoop.hive.ql.plan.LoadFileDesc; |
| import org.apache.hadoop.hive.ql.plan.LoadTableDesc; |
| import org.apache.hadoop.hive.ql.plan.LoadTableDesc.LoadFileType; |
| import org.apache.hadoop.hive.ql.plan.MapJoinDesc; |
| import org.apache.hadoop.hive.ql.plan.OperatorDesc; |
| import org.apache.hadoop.hive.ql.plan.PTFDesc; |
| import org.apache.hadoop.hive.ql.plan.PlanUtils; |
| import org.apache.hadoop.hive.ql.plan.ReduceSinkDesc; |
| import org.apache.hadoop.hive.ql.plan.ScriptDesc; |
| import org.apache.hadoop.hive.ql.plan.SelectDesc; |
| import org.apache.hadoop.hive.ql.plan.TableDesc; |
| import org.apache.hadoop.hive.ql.plan.TableScanDesc; |
| import org.apache.hadoop.hive.ql.plan.UDTFDesc; |
| import org.apache.hadoop.hive.ql.plan.UnionDesc; |
| import org.apache.hadoop.hive.ql.plan.mapper.AuxOpTreeSignature; |
| import org.apache.hadoop.hive.ql.plan.ptf.OrderExpressionDef; |
| import org.apache.hadoop.hive.ql.plan.ptf.PTFExpressionDef; |
| import org.apache.hadoop.hive.ql.plan.ptf.PartitionedTableFunctionDef; |
| import org.apache.hadoop.hive.ql.security.authorization.plugin.HivePrivilegeObject; |
| import org.apache.hadoop.hive.ql.session.SessionState; |
| import org.apache.hadoop.hive.ql.session.SessionState.ResourceType; |
| import org.apache.hadoop.hive.ql.session.SessionStateUtil; |
| import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator; |
| import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator.Mode; |
| import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; |
| import org.apache.hadoop.hive.ql.udf.generic.GenericUDFArray; |
| import org.apache.hadoop.hive.ql.udf.generic.GenericUDFCardinalityViolation; |
| import org.apache.hadoop.hive.ql.udf.generic.GenericUDFHash; |
| import org.apache.hadoop.hive.ql.udf.generic.GenericUDFMurmurHash; |
| import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPOr; |
| import org.apache.hadoop.hive.ql.udf.generic.GenericUDFSurrogateKey; |
| import org.apache.hadoop.hive.ql.udf.generic.GenericUDTF; |
| import org.apache.hadoop.hive.ql.udf.generic.GenericUDTFInline; |
| import org.apache.hadoop.hive.ql.util.DirectionUtils; |
| import org.apache.hadoop.hive.ql.util.NullOrdering; |
| import org.apache.hadoop.hive.ql.util.ResourceDownloader; |
| import org.apache.hadoop.hive.serde.serdeConstants; |
| import org.apache.hadoop.hive.serde2.AbstractSerDe; |
| import org.apache.hadoop.hive.serde2.DelimitedJSONSerDe; |
| import org.apache.hadoop.hive.serde2.Deserializer; |
| import org.apache.hadoop.hive.serde2.MetadataTypedColumnsetSerDe; |
| import org.apache.hadoop.hive.serde2.NoOpFetchFormatter; |
| import org.apache.hadoop.hive.serde2.NullStructSerDe; |
| import org.apache.hadoop.hive.serde2.SerDeException; |
| import org.apache.hadoop.hive.serde2.SerDeUtils; |
| import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe; |
| import org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe2; |
| import org.apache.hadoop.hive.serde2.objectinspector.ConstantObjectInspector; |
| import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; |
| import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category; |
| import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; |
| import org.apache.hadoop.hive.serde2.objectinspector.StandardStructObjectInspector; |
| import org.apache.hadoop.hive.serde2.objectinspector.StructField; |
| import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; |
| import org.apache.hadoop.hive.serde2.thrift.ThriftJDBCBinarySerDe; |
| import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; |
| import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; |
| import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; |
| import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; |
| import org.apache.hadoop.hive.shims.HadoopShims; |
| import org.apache.hadoop.hive.shims.Utils; |
| import org.apache.hadoop.io.IOUtils; |
| import org.apache.hadoop.mapred.InputFormat; |
| import org.apache.hadoop.mapred.OutputFormat; |
| import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; |
| import org.apache.hadoop.security.UserGroupInformation; |
| import org.apache.hadoop.util.ReflectionUtils; |
| |
| import com.google.common.base.Splitter; |
| import com.google.common.base.Strings; |
| import com.google.common.collect.ArrayListMultimap; |
| import com.google.common.collect.ImmutableMap; |
| import com.google.common.collect.Lists; |
| import com.google.common.collect.Multimap; |
| import com.google.common.collect.Sets; |
| import com.google.common.math.IntMath; |
| import com.google.common.math.LongMath; |
| |
| /** |
| * Implementation of the semantic analyzer. It generates the query plan. |
| * There are other specific semantic analyzers for some hive operations such as |
| * various analyzers for DDL commands. |
| */ |
| |
| public class SemanticAnalyzer extends BaseSemanticAnalyzer { |
| |
| |
| public static final String DUMMY_DATABASE = "_dummy_database"; |
| public static final String DUMMY_TABLE = "_dummy_table"; |
| public static final String SUBQUERY_TAG_1 = "-subquery1"; |
| public static final String SUBQUERY_TAG_2 = "-subquery2"; |
| |
| // Max characters when auto generating the column name with func name |
| private static final int AUTOGEN_COLALIAS_PRFX_MAXLENGTH = 20; |
| |
| public static final String VALUES_TMP_TABLE_NAME_PREFIX = "Values__Tmp__Table__"; |
| |
| /** Marks the temporary table created for a serialized CTE. The table is scoped to the query. */ |
| static final String MATERIALIZATION_MARKER = "$MATERIALIZATION"; |
| private static final String RESULTS_CACHE_KEY_TOKEN_REWRITE_PROGRAM = "RESULTS_CACHE_KEY_PROGRAM"; |
| |
| private Map<TableScanOperator, ExprNodeDesc> opToPartPruner; |
| private Map<TableScanOperator, PrunedPartitionList> opToPartList; |
| protected Map<String, TableScanOperator> topOps; |
| protected Map<Operator<? extends OperatorDesc>, OpParseContext> opParseCtx; |
| private List<LoadTableDesc> loadTableWork; |
| private List<LoadFileDesc> loadFileWork; |
| private final List<ColumnStatsAutoGatherContext> columnStatsAutoGatherContexts; |
| private final Map<JoinOperator, QBJoinTree> joinContext; |
| private final Map<SMBMapJoinOperator, QBJoinTree> smbMapJoinContext; |
| private final List<ReduceSinkOperator> reduceSinkOperatorsAddedByEnforceBucketingSorting; |
| private QB qb; |
| protected ASTNode ast; |
| private int destTableId; |
| private UnionProcContext uCtx; |
| private List<AbstractMapJoinOperator<? extends MapJoinDesc>> listMapJoinOpsNoReducer; |
| private Map<TableScanOperator, SampleDesc> opToSamplePruner; |
| private final Map<TableScanOperator, Map<String, ExprNodeDesc>> opToPartToSkewedPruner; |
| private Map<SelectOperator, Table> viewProjectToTableSchema; |
| private Operator<? extends OperatorDesc> sinkOp; |
| private final CacheTableHelper cacheTableHelper = new CacheTableHelper(); |
| |
| /** |
| * a map for the split sampling, from alias to an instance of SplitSample |
| * that describes percentage and number. |
| */ |
| private final Map<String, SplitSample> nameToSplitSample; |
| private final Map<GroupByOperator, Set<String>> groupOpToInputTables; |
| protected Map<String, PrunedPartitionList> prunedPartitions; |
| protected List<FieldSchema> resultSchema; |
| protected List<FieldSchema> originalResultSchema; |
| protected CreateMaterializedViewDesc createVwDesc; |
| private MaterializedViewUpdateDesc materializedViewUpdateDesc; |
| private List<String> viewsExpanded; |
| protected ASTNode viewSelect; |
| protected final UnparseTranslator unparseTranslator; |
| private final GlobalLimitCtx globalLimitCtx; |
| |
| // prefix for column names auto generated by hive |
| protected final String autogenColAliasPrfxLbl; |
| private final boolean autogenColAliasPrfxIncludeFuncName; |
| |
| // Keep track of view alias to read entity corresponding to the view |
| // For eg: for a query like 'select * from V3', where V3 -> V2, V2 -> V1, V1 -> T |
| // keeps track of aliases for V3, V3:V2, V3:V2:V1. |
| // This is used when T is added as an input for the query, the parents of T is |
| // derived from the alias V3:V2:V1:T |
| private final Map<String, ReadEntity> viewAliasToInput; |
| |
| //need merge isDirect flag to input even if the newInput does not have a parent |
| private boolean mergeIsDirect; |
| |
| // flag for no scan during analyze ... compute statistics |
| private boolean noscan; |
| |
| // flag indicating that the analyzations should go only till resultSchema is ready |
| protected boolean forViewCreation; |
| private String fqViewName; |
| |
| // whether this is a mv rebuild rewritten expression |
| protected MaterializationRebuildMode mvRebuildMode = MaterializationRebuildMode.NONE; |
| |
| protected volatile boolean disableJoinMerge = false; |
| protected final boolean defaultJoinMerge; |
| |
| /** |
| * This is required by prepare/execute statement |
| * Original operator tree { @link topOps} shape is changed when going through transformations |
| * and task generation, as a result original operator tree can not be used later to |
| * e.g. regenerate tasks or re-running physical transformations. |
| * Therefore we need to make a copy and cache it after operator tree is generated. |
| */ |
| protected Map<String, TableScanOperator> topOpsCopy = null; |
| |
| /* |
| * Capture the CTE definitions in a Query. |
| */ |
| protected final Map<String, CTEClause> aliasToCTEs; |
| |
| /* |
| * Used to check recursive CTE invocations. Similar to viewsExpanded |
| */ |
| private List<String> ctesExpanded; |
| |
| /* |
| * Whether root tasks after materialized CTE linkage have been resolved |
| */ |
| private boolean rootTasksResolved; |
| |
| private TableMask tableMask; |
| |
| CreateTableDesc tableDesc; |
| |
| protected AnalyzeRewriteContext analyzeRewrite; |
| |
| private WriteEntity acidAnalyzeTable; |
| |
| // A mapping from a tableName to a table object in metastore. |
| QueryTables tabNameToTabObject; |
| |
| // The tokens we should ignore when we are trying to do table masking. |
| private static final Set<Integer> IGNORED_TOKENS = Sets.newHashSet(HiveParser.TOK_GROUPBY, |
| HiveParser.TOK_ORDERBY, HiveParser.TOK_WINDOWSPEC, HiveParser.TOK_CLUSTERBY, |
| HiveParser.TOK_DISTRIBUTEBY, HiveParser.TOK_SORTBY); |
| |
| private String invalidResultCacheReason; |
| private String invalidAutomaticRewritingMaterializationReason; |
| |
| private final NullOrdering defaultNullOrder; |
| |
| private static final CommonToken SELECTDI_TOKEN = |
| new ImmutableCommonToken(HiveParser.TOK_SELECTDI, "TOK_SELECTDI"); |
| private static final CommonToken SELEXPR_TOKEN = |
| new ImmutableCommonToken(HiveParser.TOK_SELEXPR, "TOK_SELEXPR"); |
| private static final CommonToken TABLEORCOL_TOKEN = |
| new ImmutableCommonToken(HiveParser.TOK_TABLE_OR_COL, "TOK_TABLE_OR_COL"); |
| private static final CommonToken DOT_TOKEN = |
| new ImmutableCommonToken(HiveParser.DOT, "."); |
| |
| private static final String[] UPDATED_TBL_PROPS = { |
| hive_metastoreConstants.TABLE_IS_TRANSACTIONAL, |
| hive_metastoreConstants.TABLE_TRANSACTIONAL_PROPERTIES, |
| hive_metastoreConstants.TABLE_BUCKETING_VERSION |
| }; |
| |
| private int subQueryExpressionAliasCounter = 0; |
| |
| static class Phase1Ctx { |
| String dest; |
| int nextNum; |
| } |
| |
| public SemanticAnalyzer(QueryState queryState) throws SemanticException { |
| super(queryState); |
| opToPartPruner = new HashMap<TableScanOperator, ExprNodeDesc>(); |
| opToPartList = new HashMap<TableScanOperator, PrunedPartitionList>(); |
| opToSamplePruner = new HashMap<TableScanOperator, SampleDesc>(); |
| nameToSplitSample = new HashMap<String, SplitSample>(); |
| // Must be deterministic order maps - see HIVE-8707 |
| topOps = new LinkedHashMap<String, TableScanOperator>(); |
| loadTableWork = new ArrayList<LoadTableDesc>(); |
| loadFileWork = new ArrayList<LoadFileDesc>(); |
| columnStatsAutoGatherContexts = new ArrayList<ColumnStatsAutoGatherContext>(); |
| opParseCtx = new LinkedHashMap<Operator<? extends OperatorDesc>, OpParseContext>(); |
| joinContext = new HashMap<JoinOperator, QBJoinTree>(); |
| smbMapJoinContext = new HashMap<SMBMapJoinOperator, QBJoinTree>(); |
| // Must be deterministic order map for consistent q-test output across Java versions |
| reduceSinkOperatorsAddedByEnforceBucketingSorting = new ArrayList<ReduceSinkOperator>(); |
| destTableId = 1; |
| uCtx = null; |
| listMapJoinOpsNoReducer = new ArrayList<AbstractMapJoinOperator<? extends MapJoinDesc>>(); |
| groupOpToInputTables = new HashMap<GroupByOperator, Set<String>>(); |
| prunedPartitions = new HashMap<String, PrunedPartitionList>(); |
| unparseTranslator = new UnparseTranslator(conf); |
| autogenColAliasPrfxLbl = HiveConf.getVar(conf, |
| HiveConf.ConfVars.HIVE_AUTOGEN_COLUMNALIAS_PREFIX_LABEL); |
| autogenColAliasPrfxIncludeFuncName = HiveConf.getBoolVar(conf, |
| HiveConf.ConfVars.HIVE_AUTOGEN_COLUMNALIAS_PREFIX_INCLUDEFUNCNAME); |
| queryProperties = new QueryProperties(); |
| opToPartToSkewedPruner = new HashMap<TableScanOperator, Map<String, ExprNodeDesc>>(); |
| aliasToCTEs = new HashMap<String, CTEClause>(); |
| globalLimitCtx = new GlobalLimitCtx(); |
| viewAliasToInput = new HashMap<String, ReadEntity>(); |
| mergeIsDirect = true; |
| noscan = false; |
| tabNameToTabObject = new QueryTables(); |
| defaultJoinMerge = !HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_MERGE_NWAY_JOINS); |
| disableJoinMerge = defaultJoinMerge; |
| defaultNullOrder = NullOrdering.defaultNullOrder(conf); |
| } |
| |
| @Override |
| protected void reset(boolean clearCache) { |
| super.reset(true); |
| if(clearCache) { |
| prunedPartitions.clear(); |
| if (ctx != null) { |
| ctx.getOpContext().getColStatsCache().clear(); |
| } |
| |
| //When init(true) combine with genResolvedParseTree, it will generate Resolved Parse tree from syntax tree |
| //ReadEntity created under these conditions should be all relevant to the syntax tree even the ones without parents |
| //set mergeIsDirect to true here. |
| mergeIsDirect = true; |
| } else { |
| mergeIsDirect = false; |
| } |
| loadTableWork.clear(); |
| loadFileWork.clear(); |
| columnStatsAutoGatherContexts.clear(); |
| topOps.clear(); |
| destTableId = 1; |
| idToTableNameMap.clear(); |
| qb = null; |
| ast = null; |
| uCtx = null; |
| joinContext.clear(); |
| smbMapJoinContext.clear(); |
| opParseCtx.clear(); |
| groupOpToInputTables.clear(); |
| disableJoinMerge = defaultJoinMerge; |
| aliasToCTEs.clear(); |
| opToPartPruner.clear(); |
| opToPartList.clear(); |
| opToPartToSkewedPruner.clear(); |
| opToSamplePruner.clear(); |
| nameToSplitSample.clear(); |
| resultSchema = null; |
| createVwDesc = null; |
| materializedViewUpdateDesc = null; |
| viewsExpanded = null; |
| viewSelect = null; |
| ctesExpanded = null; |
| globalLimitCtx.disableOpt(); |
| viewAliasToInput.clear(); |
| reduceSinkOperatorsAddedByEnforceBucketingSorting.clear(); |
| listMapJoinOpsNoReducer.clear(); |
| unparseTranslator.clear(); |
| queryProperties.clear(); |
| outputs.clear(); |
| |
| if (ctx != null && ctx.enableUnparse()) { |
| unparseTranslator.enable(); |
| } |
| } |
| |
| void initParseCtx(ParseContext pctx) { |
| opToPartPruner = pctx.getOpToPartPruner(); |
| opToPartList = pctx.getOpToPartList(); |
| opToSamplePruner = pctx.getOpToSamplePruner(); |
| topOps = pctx.getTopOps(); |
| loadTableWork = pctx.getLoadTableWork(); |
| loadFileWork = pctx.getLoadFileWork(); |
| ctx = pctx.getContext(); |
| destTableId = pctx.getDestTableId(); |
| idToTableNameMap = pctx.getIdToTableNameMap(); |
| uCtx = pctx.getUCtx(); |
| listMapJoinOpsNoReducer = pctx.getListMapJoinOpsNoReducer(); |
| prunedPartitions = pctx.getPrunedPartitions(); |
| tabNameToTabObject = pctx.getTabNameToTabObject(); |
| fetchTask = pctx.getFetchTask(); |
| setLineageInfo(pctx.getLineageInfo()); |
| } |
| |
| public ParseContext getParseContext() { |
| // Make sure the basic query properties are initialized |
| copyInfoToQueryProperties(queryProperties); |
| return new ParseContext(queryState, opToPartPruner, opToPartList, topOps, |
| new HashSet<JoinOperator>(joinContext.keySet()), |
| new HashSet<SMBMapJoinOperator>(smbMapJoinContext.keySet()), |
| loadTableWork, loadFileWork, columnStatsAutoGatherContexts, |
| ctx, idToTableNameMap, destTableId, uCtx, |
| listMapJoinOpsNoReducer, prunedPartitions, tabNameToTabObject, |
| opToSamplePruner, globalLimitCtx, nameToSplitSample, inputs, rootTasks, |
| opToPartToSkewedPruner, viewAliasToInput, reduceSinkOperatorsAddedByEnforceBucketingSorting, |
| analyzeRewrite, tableDesc, createVwDesc, materializedViewUpdateDesc, |
| queryProperties, viewProjectToTableSchema); |
| } |
| |
| public CompilationOpContext getOpContext() { |
| return ctx.getOpContext(); |
| } |
| |
| static String genPartValueString(String partColType, String partVal) { |
| String returnVal = partVal; |
| if (partColType.equals(serdeConstants.STRING_TYPE_NAME) || |
| partColType.contains(serdeConstants.VARCHAR_TYPE_NAME) || |
| partColType.contains(serdeConstants.CHAR_TYPE_NAME)) { |
| returnVal = "'" + escapeSQLString(partVal) + "'"; |
| } else if (partColType.equals(serdeConstants.TINYINT_TYPE_NAME)) { |
| returnVal = partVal + "Y"; |
| } else if (partColType.equals(serdeConstants.SMALLINT_TYPE_NAME)) { |
| returnVal = partVal + "S"; |
| } else if (partColType.equals(serdeConstants.INT_TYPE_NAME)) { |
| returnVal = partVal; |
| } else if (partColType.equals(serdeConstants.BIGINT_TYPE_NAME)) { |
| returnVal = partVal + "L"; |
| } else if (partColType.contains(serdeConstants.DECIMAL_TYPE_NAME)) { |
| returnVal = partVal + "BD"; |
| } else if (partColType.equals(serdeConstants.DATE_TYPE_NAME) || |
| partColType.equals(serdeConstants.TIMESTAMP_TYPE_NAME)) { |
| returnVal = partColType + " '" + escapeSQLString(partVal) + "'"; |
| } else { |
| //for other usually not used types, just quote the value |
| returnVal = "'" + escapeSQLString(partVal) + "'"; |
| } |
| |
| return returnVal; |
| } |
| |
| private void doPhase1QBExpr(ASTNode ast, QBExpr qbexpr, String id, String alias, ASTNode tabColNames) |
| throws SemanticException { |
| doPhase1QBExpr(ast, qbexpr, id, alias, false, tabColNames); |
| } |
| |
| @SuppressWarnings("nls") |
| void doPhase1QBExpr(ASTNode ast, QBExpr qbexpr, String id, String alias, boolean insideView, ASTNode tabColNames) |
| throws SemanticException { |
| |
| assert (ast.getToken() != null); |
| if (ast.getToken().getType() == HiveParser.TOK_QUERY) { |
| QB qb = new QB(id, alias, true); |
| qb.setInsideView(insideView); |
| Phase1Ctx ctx_1 = initPhase1Ctx(); |
| qb.getParseInfo().setColAliases(tabColNames); |
| doPhase1(ast, qb, ctx_1, null); |
| |
| qbexpr.setOpcode(QBExpr.Opcode.NULLOP); |
| qbexpr.setQB(qb); |
| } |
| // setop |
| else { |
| switch (ast.getToken().getType()) { |
| case HiveParser.TOK_UNIONALL: |
| qbexpr.setOpcode(QBExpr.Opcode.UNION); |
| break; |
| case HiveParser.TOK_INTERSECTALL: |
| qbexpr.setOpcode(QBExpr.Opcode.INTERSECTALL); |
| break; |
| case HiveParser.TOK_INTERSECTDISTINCT: |
| qbexpr.setOpcode(QBExpr.Opcode.INTERSECT); |
| break; |
| case HiveParser.TOK_EXCEPTALL: |
| qbexpr.setOpcode(QBExpr.Opcode.EXCEPTALL); |
| break; |
| case HiveParser.TOK_EXCEPTDISTINCT: |
| qbexpr.setOpcode(QBExpr.Opcode.EXCEPT); |
| break; |
| default: |
| throw new SemanticException(ErrorMsg.UNSUPPORTED_SET_OPERATOR.getMsg("Type " |
| + ast.getToken().getType())); |
| } |
| // query 1 |
| assert (ast.getChild(0) != null); |
| QBExpr qbexpr1 = new QBExpr(alias + SUBQUERY_TAG_1); |
| doPhase1QBExpr((ASTNode) ast.getChild(0), qbexpr1, id, |
| alias + SUBQUERY_TAG_1, insideView, tabColNames); |
| qbexpr.setQBExpr1(qbexpr1); |
| |
| // query 2 |
| assert (ast.getChild(1) != null); |
| QBExpr qbexpr2 = new QBExpr(alias + SUBQUERY_TAG_2); |
| doPhase1QBExpr((ASTNode) ast.getChild(1), qbexpr2, id, |
| alias + SUBQUERY_TAG_2, insideView, tabColNames); |
| qbexpr.setQBExpr2(qbexpr2); |
| } |
| } |
| |
| private Map<String, ASTNode> doPhase1GetAggregationsFromSelect( |
| ASTNode selExpr, QB qb, String dest) throws SemanticException { |
| |
| // Iterate over the selects search for aggregation Trees. |
| // Use String as keys to eliminate duplicate trees. |
| Map<String, ASTNode> aggregationTrees = new LinkedHashMap<String, ASTNode>(); |
| List<ASTNode> wdwFns = new ArrayList<ASTNode>(); |
| for (int i = 0; i < selExpr.getChildCount(); ++i) { |
| ASTNode function = (ASTNode) selExpr.getChild(i); |
| if (function.getType() == HiveParser.TOK_SELEXPR || |
| function.getType() == HiveParser.TOK_SUBQUERY_EXPR) { |
| function = (ASTNode)function.getChild(0); |
| } |
| doPhase1GetAllAggregations(function, qb, aggregationTrees, wdwFns, null); |
| } |
| |
| // window based aggregations are handled differently |
| for (ASTNode wdwFn : wdwFns) { |
| WindowingSpec spec = qb.getWindowingSpec(dest); |
| if(spec == null) { |
| queryProperties.setHasWindowing(true); |
| spec = new WindowingSpec(); |
| qb.addDestToWindowingSpec(dest, spec); |
| } |
| Map<String, ASTNode> wExprsInDest = qb.getParseInfo().getWindowingExprsForClause(dest); |
| int wColIdx = spec.getWindowExpressions() == null ? 0 : spec.getWindowExpressions().size(); |
| WindowFunctionSpec wFnSpec = processWindowFunction(wdwFn, |
| (ASTNode)wdwFn.getChild(wdwFn.getChildCount()-1)); |
| // If this is a duplicate invocation of a function; don't add to WindowingSpec. |
| if ( wExprsInDest != null && |
| wExprsInDest.containsKey(wFnSpec.getExpression().toStringTree())) { |
| continue; |
| } |
| wFnSpec.setAlias(wFnSpec.getName() + "_window_" + wColIdx); |
| spec.addWindowFunction(wFnSpec); |
| qb.getParseInfo().addWindowingExprToClause(dest, wFnSpec.getExpression()); |
| } |
| |
| return aggregationTrees; |
| } |
| |
| private void doPhase1WhereClause(ASTNode expressionTree, QB qb) throws SemanticException { |
| int exprTokenType = expressionTree.getToken().getType(); |
| if(exprTokenType == HiveParser.TOK_SUBQUERY_EXPR) { |
| qb.addSubqExprAlias(expressionTree, this); |
| return; |
| } |
| |
| for (int i = 0; i < expressionTree.getChildCount(); i++) { |
| doPhase1WhereClause((ASTNode) expressionTree.getChild(i), qb); |
| } |
| } |
| |
| /** |
| * This method figures out if current AST is for INSERT INTO |
| * @param qbp qbParseInfo |
| * @param dest destination clause |
| * @return true or false |
| */ |
| protected boolean isInsertInto(QBParseInfo qbp, String dest) { |
| // get the destination and check if it is TABLE |
| if(qbp == null || dest == null ) { |
| return false; |
| } |
| ASTNode destNode = qbp.getDestForClause(dest); |
| return destNode != null && destNode.getType() == HiveParser.TOK_TAB; |
| } |
| |
| /** |
| * Given an AST this method figures out if it is a value clause |
| * e.g. VALUES(1,3..) |
| */ |
| private boolean isValueClause(ASTNode select) { |
| if(select == null) { |
| return false; |
| } |
| if(select.getChildCount() == 1) { |
| ASTNode selectExpr = (ASTNode)select.getChild(0); |
| if(selectExpr.getChildCount() == 1 ) { |
| ASTNode selectChildExpr = (ASTNode)selectExpr.getChild(0); |
| if(selectChildExpr.getType() == HiveParser.TOK_FUNCTION) { |
| ASTNode inline = (ASTNode)selectChildExpr.getChild(0); |
| ASTNode func = (ASTNode)selectChildExpr.getChild(1); |
| if(inline.getText().equals(GenericUDTFInline.class.getAnnotation(Description.class).name()) |
| && func.getType() == HiveParser.TOK_FUNCTION) { |
| ASTNode arrayNode = (ASTNode)func.getChild(0); |
| ASTNode funcNode= (ASTNode)func.getChild(1); |
| if(arrayNode.getText().equals(GenericUDFArray.class.getAnnotation(Description.class).name() ) |
| && funcNode.getType() == HiveParser.TOK_FUNCTION) { |
| return true; |
| } |
| } |
| } |
| } |
| } |
| return false; |
| } |
| |
| /** |
| * This method creates a list of default constraints which corresponds to |
| * given schema (targetSchema) or target table's column schema (if targetSchema is null) |
| * @param tbl |
| * @param targetSchema |
| * @return List of default constraints (including NULL if there is no default) |
| * @throws SemanticException |
| */ |
| protected List<String> getDefaultConstraints(Table tbl, List<String> targetSchema) throws SemanticException{ |
| Map<String, String> colNameToDefaultVal = getColNameToDefaultValueMap(tbl); |
| List<String> defaultConstraints = new ArrayList<>(); |
| if(targetSchema != null && !targetSchema.isEmpty()) { |
| for (String colName : targetSchema) { |
| defaultConstraints.add(colNameToDefaultVal.get(colName)); |
| } |
| } |
| else { |
| for(FieldSchema fs:tbl.getCols()) { |
| defaultConstraints.add(colNameToDefaultVal.get(fs.getName())); |
| } |
| } |
| return defaultConstraints; |
| } |
| |
| protected Map<String, String> getColNameToDefaultValueMap(Table tbl) throws SemanticException { |
| Map<String, String> colNameToDefaultVal = null; |
| try { |
| DefaultConstraint dc = Hive.get().getEnabledDefaultConstraints(tbl.getDbName(), tbl.getTableName()); |
| colNameToDefaultVal = dc.getColNameToDefaultValueMap(); |
| } catch (Exception e) { |
| if (e instanceof SemanticException) { |
| throw (SemanticException) e; |
| } else { |
| throw (new RuntimeException(e)); |
| } |
| } |
| return colNameToDefaultVal; |
| } |
| |
| /** |
| * Constructs an AST for given DEFAULT string |
| * @param newValue |
| * @throws SemanticException |
| */ |
| private ASTNode getNodeReplacementforDefault(String newValue) throws SemanticException { |
| ASTNode newNode = null; |
| if(newValue== null) { |
| newNode = ASTBuilder.construct(HiveParser.TOK_NULL, "TOK_NULL").node(); |
| } |
| else { |
| try { |
| newNode = new ParseDriver().parseExpression(newValue); |
| } catch(Exception e) { |
| throw new SemanticException("Error while parsing default value for DEFAULT keyword: " + newValue |
| + ". Error message: " + e.getMessage()); |
| } |
| } |
| return newNode; |
| } |
| |
| /** |
| * This method replaces ASTNode corresponding to DEFAULT keyword with either DEFAULT constraint |
| * expression if exists or NULL otherwise |
| * @param selectExprs |
| * @param targetTable |
| * @throws SemanticException |
| */ |
| private void replaceDefaultKeywordForUpdate(ASTNode selectExprs, Table targetTable) throws SemanticException { |
| List<String> defaultConstraints = null; |
| for (int i = 0; i < selectExprs.getChildCount(); i++) { |
| ASTNode selectExpr = (ASTNode) selectExprs.getChild(i); |
| if (selectExpr.getChildCount() == 1 && selectExpr.getChild(0).getType() == HiveParser.TOK_TABLE_OR_COL) { |
| //first child should be rowid |
| if (i != 0 || selectExpr.getChild(0).getChild(0).getText().equals("ROW__ID")) { |
| if (selectExpr.getChild(0).getChild(0).getType() == HiveParser.TOK_DEFAULT_VALUE) { |
| if (defaultConstraints == null) { |
| defaultConstraints = getDefaultConstraints(targetTable, null); |
| } |
| ASTNode newNode = getNodeReplacementforDefault(defaultConstraints.get(i - 1)); |
| // replace the node in place |
| selectExpr.replaceChildren(0, 0, newNode); |
| if (LOG.isDebugEnabled()) { |
| LOG.debug("DEFAULT keyword replacement - Inserted {} for table: {}", newNode.getText(), |
| targetTable.getTableName()); |
| } |
| } |
| } |
| } |
| } |
| } |
| |
| /** |
| * This method replaces DEFAULT AST node with DEFAULT expression |
| * @param valueArrClause This is AST for value clause |
| * @param targetTable |
| * @param targetSchema this is target schema/column schema if specified in query |
| */ |
| private void replaceDefaultKeyword(ASTNode valueArrClause, Table targetTable, List<String> targetSchema) throws SemanticException { |
| List<String> defaultConstraints = null; |
| for (int i = 1; i < valueArrClause.getChildCount(); i++) { |
| ASTNode valueClause = (ASTNode) valueArrClause.getChild(i); |
| //skip first child since it is struct |
| for (int j = 1; j < valueClause.getChildCount(); j++) { |
| if (valueClause.getChild(j).getType() == HiveParser.TOK_TABLE_OR_COL |
| && valueClause.getChild(j).getChild(0).getType() == HiveParser.TOK_DEFAULT_VALUE) { |
| if (defaultConstraints == null) { |
| defaultConstraints = getDefaultConstraints(targetTable, targetSchema); |
| } |
| ASTNode newNode = getNodeReplacementforDefault(defaultConstraints.get(j - 1)); |
| // replace the node in place |
| valueClause.replaceChildren(j, j, newNode); |
| LOG.debug("DEFAULT keyword replacement - Inserted {} for table: {}", newNode.getText(), |
| targetTable.getTableName()); |
| } |
| } |
| } |
| } |
| |
| private void doPhase1GetColumnAliasesFromSelect( |
| ASTNode selectExpr, QBParseInfo qbp, String dest) throws SemanticException { |
| if (isInsertInto(qbp, dest)) { |
| ASTNode tblAst = qbp.getDestForClause(dest); |
| String tableName = getUnescapedName((ASTNode) tblAst.getChild(0)); |
| Table targetTable; |
| try { |
| if (isValueClause(selectExpr)) { |
| targetTable = getTableObjectByName(tableName); |
| replaceDefaultKeyword((ASTNode) selectExpr.getChild(0).getChild(0).getChild(1), targetTable, qbp.getDestSchemaForClause(dest)); |
| } else if (updating(dest)) { |
| targetTable = getTableObjectByName(tableName); |
| replaceDefaultKeywordForUpdate(selectExpr, targetTable); |
| } |
| } catch (Exception e) { |
| if (e instanceof SemanticException) { |
| throw (SemanticException) e; |
| } else { |
| throw (new RuntimeException(e)); |
| } |
| } |
| } |
| for (int i = 0; i < selectExpr.getChildCount(); ++i) { |
| ASTNode selExpr = (ASTNode) selectExpr.getChild(i); |
| if ((selExpr.getToken().getType() == HiveParser.TOK_SELEXPR) |
| && (selExpr.getChildCount() == 2)) { |
| String columnAlias = unescapeIdentifier(selExpr.getChild(1).getText()); |
| qbp.setExprToColumnAlias((ASTNode) selExpr.getChild(0), columnAlias); |
| } |
| } |
| } |
| |
| /** |
| * DFS-scan the expressionTree to find all aggregation subtrees and put them |
| * in aggregations. |
| * |
| * @param expressionTree |
| * @param aggregations |
| * the key to the HashTable is the toStringTree() representation of |
| * the aggregation subtree. |
| * @throws SemanticException |
| */ |
| private void doPhase1GetAllAggregations(ASTNode expressionTree, QB qb, |
| Map<String, ASTNode> aggregations, List<ASTNode> wdwFns, |
| ASTNode wndParent) throws SemanticException { |
| int exprTokenType = expressionTree.getToken().getType(); |
| if(exprTokenType == HiveParser.TOK_SUBQUERY_EXPR) { |
| //since now we have scalar subqueries we can get subquery expression in having |
| // we don't want to include aggregate from within subquery |
| qb.addSubqExprAlias(expressionTree, this); |
| return; |
| } |
| |
| boolean parentIsWindowSpec = wndParent != null; |
| |
| if (exprTokenType == HiveParser.TOK_FUNCTION |
| || exprTokenType == HiveParser.TOK_FUNCTIONDI |
| || exprTokenType == HiveParser.TOK_FUNCTIONSTAR) { |
| assert (expressionTree.getChildCount() != 0); |
| Tree lastChild = expressionTree.getChild(expressionTree.getChildCount() - 1); |
| if (lastChild.getType() == HiveParser.TOK_WINDOWSPEC) { |
| // If it is a windowing spec, we include it in the list |
| // Further, we will examine its children AST nodes to check whether |
| // there are aggregation functions within |
| wdwFns.add(expressionTree); |
| for(Node child : expressionTree.getChildren()) { |
| doPhase1GetAllAggregations((ASTNode) child, qb, aggregations, wdwFns, expressionTree); |
| } |
| return; |
| } else if (lastChild.getType() == HiveParser.TOK_WITHIN_GROUP) { |
| transformWithinGroup(expressionTree, lastChild); |
| } |
| if (expressionTree.getChild(0).getType() == HiveParser.Identifier) { |
| String functionName = unescapeIdentifier(expressionTree.getChild(0) |
| .getText()); |
| // Validate the function name |
| if (FunctionRegistry.getFunctionInfo(functionName) == null) { |
| throw new SemanticException(ErrorMsg.INVALID_FUNCTION.getMsg(functionName)); |
| } |
| if(FunctionRegistry.impliesOrder(functionName) && !parentIsWindowSpec) { |
| throw new SemanticException(ErrorMsg.MISSING_OVER_CLAUSE.getMsg(functionName)); |
| } |
| if (FunctionRegistry.getGenericUDAFResolver(functionName) != null) { |
| if(containsLeadLagUDF(expressionTree) && !parentIsWindowSpec) { |
| throw new SemanticException(ErrorMsg.MISSING_OVER_CLAUSE.getMsg(functionName)); |
| } |
| aggregations.put(expressionTree.toStringTree(), expressionTree); |
| FunctionInfo fi = FunctionRegistry.getFunctionInfo(functionName); |
| if (!fi.isNative()) { |
| unparseTranslator.addIdentifierTranslation((ASTNode) expressionTree |
| .getChild(0)); |
| } |
| return; |
| } |
| } |
| } |
| for (int i = 0; i < expressionTree.getChildCount(); i++) { |
| doPhase1GetAllAggregations((ASTNode) expressionTree.getChild(i), qb, |
| aggregations, wdwFns, wndParent); |
| } |
| } |
| |
| private void transformWithinGroup(ASTNode expressionTree, Tree withinGroupNode) throws SemanticException { |
| if (isCBOExecuted()) { |
| return; |
| } |
| |
| Tree functionNameNode = expressionTree.getChild(0); |
| if (!FunctionRegistry.isOrderedAggregate(functionNameNode.getText())) { |
| throw new SemanticException(ErrorMsg.WITHIN_GROUP_NOT_ALLOWED, functionNameNode.getText()); |
| } |
| |
| List<Tree> parameters = new ArrayList<>(expressionTree.getChildCount() - 2); |
| for (int i = 1; i < expressionTree.getChildCount() - 1; ++i) { |
| parameters.add(expressionTree.getChild(i)); |
| } |
| while (expressionTree.getChildCount() > 1) { |
| expressionTree.deleteChild(1); |
| } |
| |
| Tree orderByNode = withinGroupNode.getChild(0); |
| if (parameters.size() != orderByNode.getChildCount()) { |
| throw new SemanticException(ErrorMsg.WITHIN_GROUP_PARAMETER_MISMATCH, |
| Integer.toString(parameters.size()), Integer.toString(orderByNode.getChildCount())); |
| } |
| |
| for (int i = 0; i < orderByNode.getChildCount(); ++i) { |
| expressionTree.addChild(parameters.get(i)); |
| Tree tabSortColNameNode = orderByNode.getChild(i); |
| Tree nullsNode = tabSortColNameNode.getChild(0); |
| ASTNode sortKey = (ASTNode) tabSortColNameNode.getChild(0).getChild(0); |
| expressionTree.addChild(sortKey); |
| expressionTree.addChild(ASTBuilder.createAST(HiveParser.NumberLiteral, |
| Integer.toString(DirectionUtils.tokenToCode(tabSortColNameNode.getType())))); |
| expressionTree.addChild(ASTBuilder.createAST(HiveParser.NumberLiteral, |
| Integer.toString(NullOrdering.fromToken(nullsNode.getType()).getCode()))); |
| } |
| } |
| |
| private List<ASTNode> doPhase1GetDistinctFuncExprs(Map<String, ASTNode> aggregationTrees) { |
| List<ASTNode> exprs = new ArrayList<ASTNode>(); |
| for (Map.Entry<String, ASTNode> entry : aggregationTrees.entrySet()) { |
| ASTNode value = entry.getValue(); |
| assert (value != null); |
| if (value.getToken().getType() == HiveParser.TOK_FUNCTIONDI) { |
| exprs.add(value); |
| } |
| } |
| return exprs; |
| } |
| |
| public static String generateErrorMessage(ASTNode ast, String message) { |
| StringBuilder sb = new StringBuilder(); |
| if (ast == null) { |
| sb.append(message).append(". Cannot tell the position of null AST."); |
| return sb.toString(); |
| } |
| sb.append(ast.getLine()); |
| sb.append(":"); |
| sb.append(ast.getCharPositionInLine()); |
| sb.append(" "); |
| sb.append(message); |
| sb.append(". Error encountered near token '"); |
| sb.append(ASTErrorUtils.getText(ast)); |
| sb.append("'"); |
| return sb.toString(); |
| } |
| |
| ASTNode getAST() { |
| return this.ast; |
| } |
| |
| protected void setAST(ASTNode newAST) { |
| this.ast = newAST; |
| } |
| |
| private String findSimpleTableName(ASTNode tabref, int aliasIndex) throws SemanticException { |
| assert tabref.getType() == HiveParser.TOK_TABREF; |
| ASTNode tableTree = (ASTNode) (tabref.getChild(0)); |
| |
| String alias; |
| if (aliasIndex != 0) { |
| alias = unescapeIdentifier(tabref.getChild(aliasIndex).getText()); |
| } |
| else { |
| alias = getUnescapedUnqualifiedTableName(tableTree); |
| } |
| return alias; |
| } |
| /** |
| * Goes though the tabref tree and finds the alias for the table. Once found, |
| * it records the table name-> alias association in aliasToTabs. It also makes |
| * an association from the alias to the table AST in parse info. |
| * |
| * @return the alias of the table |
| */ |
| private String processTable(QB qb, ASTNode tabref) throws SemanticException { |
| // For each table reference get the table name |
| // and the alias (if alias is not present, the table name |
| // is used as an alias) |
| int[] indexes = findTabRefIdxs(tabref); |
| int aliasIndex = indexes[0]; |
| int propsIndex = indexes[1]; |
| int tsampleIndex = indexes[2]; |
| int ssampleIndex = indexes[3]; |
| int asOfTimeIndex = indexes[4]; |
| int asOfVersionIndex = indexes[5]; |
| int asOfVersionFromIndex = indexes[6]; |
| |
| ASTNode tableTree = (ASTNode) (tabref.getChild(0)); |
| |
| String tabIdName = getUnescapedName(tableTree).toLowerCase(); |
| |
| String alias = findSimpleTableName(tabref, aliasIndex); |
| |
| if (propsIndex >= 0) { |
| Tree propsAST = tabref.getChild(propsIndex); |
| Map<String, String> props = getProps((ASTNode) propsAST.getChild(0)); |
| // We get the information from Calcite. |
| if ("TRUE".equals(props.get("insideView"))) { |
| qb.getAliasInsideView().add(alias.toLowerCase()); |
| } |
| qb.setTabProps(alias, props); |
| } |
| |
| if (asOfTimeIndex != -1 || asOfVersionIndex != -1 || asOfVersionFromIndex != -1) { |
| String asOfVersion = asOfVersionIndex == -1 ? null : getAsOfValue(tabref, asOfVersionIndex); |
| String asOfVersionFrom = |
| asOfVersionFromIndex == -1 ? null : tabref.getChild(asOfVersionFromIndex).getChild(0).getText(); |
| String asOfTime = asOfTimeIndex == -1 ? null : getAsOfValue(tabref, asOfTimeIndex); |
| qb.setSystemVersion(alias, new QBSystemVersion(asOfVersion, asOfVersionFrom, asOfTime)); |
| } |
| |
| // If the alias is already there then we have a conflict |
| if (qb.exists(alias)) { |
| throw new SemanticException(ASTErrorUtils.getMsg( |
| ErrorMsg.AMBIGUOUS_TABLE_ALIAS.getMsg(), |
| tabref.getChild(aliasIndex))); |
| } |
| if (tsampleIndex >= 0) { |
| ASTNode sampleClause = (ASTNode) tabref.getChild(tsampleIndex); |
| List<ASTNode> sampleCols = new ArrayList<ASTNode>(); |
| if (sampleClause.getChildCount() > 2) { |
| for (int i = 2; i < sampleClause.getChildCount(); i++) { |
| sampleCols.add((ASTNode) sampleClause.getChild(i)); |
| } |
| } |
| // TODO: For now only support sampling on up to two columns |
| // Need to change it to list of columns |
| if (sampleCols.size() > 2) { |
| throw new SemanticException(generateErrorMessage( |
| (ASTNode) tabref.getChild(0), |
| ErrorMsg.SAMPLE_RESTRICTION.getMsg())); |
| } |
| TableSample tabSample = new TableSample( |
| unescapeIdentifier(sampleClause.getChild(0).getText()), |
| unescapeIdentifier(sampleClause.getChild(1).getText()), |
| sampleCols); |
| qb.getParseInfo().setTabSample(alias, tabSample); |
| if (unparseTranslator.isEnabled()) { |
| for (ASTNode sampleCol : sampleCols) { |
| unparseTranslator.addIdentifierTranslation((ASTNode) sampleCol |
| .getChild(0)); |
| } |
| } |
| } else if (ssampleIndex >= 0) { |
| ASTNode sampleClause = (ASTNode) tabref.getChild(ssampleIndex); |
| |
| Tree type = sampleClause.getChild(0); |
| Tree numerator = sampleClause.getChild(1); |
| String value = unescapeIdentifier(numerator.getText()); |
| |
| |
| SplitSample sample; |
| if (type.getType() == HiveParser.TOK_PERCENT) { |
| assertCombineInputFormat(numerator, "Percentage"); |
| double percent = Double.valueOf(value); |
| if (percent < 0 || percent > 100) { |
| throw new SemanticException(generateErrorMessage((ASTNode) numerator, |
| "Sampling percentage should be between 0 and 100")); |
| } |
| int seedNum = conf.getIntVar(ConfVars.HIVESAMPLERANDOMNUM); |
| sample = new SplitSample(percent, seedNum); |
| } else if (type.getType() == HiveParser.TOK_ROWCOUNT) { |
| sample = new SplitSample(Integer.parseInt(value)); |
| } else { |
| assert type.getType() == HiveParser.TOK_LENGTH; |
| assertCombineInputFormat(numerator, "Total Length"); |
| long length = Integer.parseInt(value.substring(0, value.length() - 1)); |
| char last = value.charAt(value.length() - 1); |
| if (last == 'k' || last == 'K') { |
| length <<= 10; |
| } else if (last == 'm' || last == 'M') { |
| length <<= 20; |
| } else if (last == 'g' || last == 'G') { |
| length <<= 30; |
| } |
| int seedNum = conf.getIntVar(ConfVars.HIVESAMPLERANDOMNUM); |
| sample = new SplitSample(length, seedNum); |
| } |
| String alias_id = getAliasId(alias, qb); |
| nameToSplitSample.put(alias_id, sample); |
| } |
| // Insert this map into the stats |
| qb.setTabAlias(alias, tabIdName); |
| if (qb.isInsideView()) { |
| qb.getAliasInsideView().add(alias.toLowerCase()); |
| } |
| qb.addAlias(alias); |
| |
| qb.getParseInfo().setSrcForAlias(alias, tableTree); |
| |
| // if alias to CTE contains the table name, we do not do the translation because |
| // cte is actually a subquery. |
| if (!this.aliasToCTEs.containsKey(tabIdName)) { |
| unparseTranslator.addTableNameTranslation(tableTree, SessionState.get().getCurrentDatabase()); |
| if (aliasIndex != 0) { |
| unparseTranslator.addIdentifierTranslation((ASTNode) tabref.getChild(aliasIndex)); |
| } |
| } |
| |
| return alias; |
| } |
| |
| private String getAsOfValue(ASTNode tabref, int asOfIndex) throws SemanticException { |
| String asOfValue = null; |
| if (asOfIndex != -1) { |
| ASTNode expr = (ASTNode) tabref.getChild(asOfIndex).getChild(0); |
| if (expr.getChildCount() > 0) { |
| ExprNodeDesc desc = genExprNodeDesc(expr, new RowResolver(), false, true); |
| ExprNodeConstantDesc c = (ExprNodeConstantDesc) desc; |
| asOfValue = String.valueOf(c.getValue()); |
| } else { |
| asOfValue = stripQuotes(expr.getText()); |
| } |
| } |
| return asOfValue; |
| } |
| |
| Map<String, SplitSample> getNameToSplitSampleMap() { |
| return this.nameToSplitSample; |
| } |
| |
| private void assertCombineInputFormat(Tree numerator, String message) throws SemanticException { |
| String inputFormat = conf.getVar(HiveConf.ConfVars.HIVE_EXECUTION_ENGINE).equals("tez") ? |
| HiveConf.getVar(conf, HiveConf.ConfVars.HIVETEZINPUTFORMAT): |
| HiveConf.getVar(conf, HiveConf.ConfVars.HIVEINPUTFORMAT); |
| if (!inputFormat.equals(CombineHiveInputFormat.class.getName())) { |
| throw new SemanticException(generateErrorMessage((ASTNode) numerator, |
| message + " sampling is not supported in " + inputFormat)); |
| } |
| } |
| |
| private String processSubQuery(QB qb, ASTNode subq) throws SemanticException { |
| |
| // This is a subquery and must have an alias |
| if (subq.getChildCount() != 2) { |
| throw new SemanticException(ASTErrorUtils.getMsg( |
| ErrorMsg.NO_SUBQUERY_ALIAS.getMsg(), subq)); |
| } |
| ASTNode subqref = (ASTNode) subq.getChild(0); |
| String alias = unescapeIdentifier(subq.getChild(1).getText()); |
| |
| // Recursively do the first phase of semantic analysis for the subquery |
| QBExpr qbexpr = new QBExpr(alias, subqref); |
| |
| doPhase1QBExpr(subqref, qbexpr, qb.getId(), alias, qb.isInsideView(), null); |
| |
| // If the alias is already there then we have a conflict |
| if (qb.exists(alias)) { |
| throw new SemanticException(ASTErrorUtils.getMsg( |
| ErrorMsg.AMBIGUOUS_TABLE_ALIAS.getMsg(), |
| subq.getChild(1))); |
| } |
| // Insert this map into the stats |
| qb.setSubqAlias(alias, qbexpr); |
| qb.addAlias(alias); |
| |
| unparseTranslator.addIdentifierTranslation((ASTNode) subq.getChild(1)); |
| |
| return alias; |
| } |
| |
| /* |
| * Phase1: hold onto any CTE definitions in aliasToCTE. |
| * CTE definitions are global to the Query. |
| */ |
| private void processCTE(QB qb, ASTNode ctes) throws SemanticException { |
| |
| int numCTEs = ctes.getChildCount(); |
| |
| for(int i=0; i <numCTEs; i++) { |
| ASTNode cte = (ASTNode) ctes.getChild(i); |
| ASTNode cteQry = (ASTNode) cte.getChild(0); |
| String alias = unescapeIdentifier(cte.getChild(1).getText()); |
| ASTNode withColList = cte.getChildCount() == 3 ? (ASTNode) cte.getChild(2) : null; |
| |
| String qName = qb.getId() == null ? "" : qb.getId() + ":"; |
| qName += alias.toLowerCase(); |
| |
| if ( aliasToCTEs.containsKey(qName)) { |
| throw new SemanticException(ASTErrorUtils.getMsg( |
| ErrorMsg.AMBIGUOUS_TABLE_ALIAS.getMsg(), |
| cte.getChild(1))); |
| } |
| aliasToCTEs.put(qName, new CTEClause(qName, cteQry, withColList)); |
| } |
| } |
| |
| /* |
| * We allow CTE definitions in views. So we can end up with a hierarchy of CTE definitions: |
| * - at the top level of a query statement |
| * - where a view is referenced. |
| * - views may refer to other views. |
| * |
| * The scoping rules we use are: to search for a CTE from the current QB outwards. In order to |
| * disambiguate between CTES are different levels we qualify(prefix) them with the id of the QB |
| * they appear in when adding them to the <code>aliasToCTEs</code> map. |
| * |
| */ |
| private CTEClause findCTEFromName(QB qb, String cteName) { |
| StringBuilder qId = new StringBuilder(); |
| if (qb.getId() != null) { |
| qId.append(qb.getId()); |
| } |
| |
| while (qId.length() > 0) { |
| String nm = qId + ":" + cteName; |
| CTEClause cte = aliasToCTEs.get(nm); |
| if (cte != null) { |
| return cte; |
| } |
| int lastIndex = qId.lastIndexOf(":"); |
| lastIndex = lastIndex < 0 ? 0 : lastIndex; |
| qId.setLength(lastIndex); |
| } |
| return aliasToCTEs.get(cteName); |
| } |
| |
| /* |
| * If a CTE is referenced in a QueryBlock: |
| * - add it as a SubQuery for now. |
| * - SQ.alias is the alias used in QB. (if no alias is specified, |
| * it used the CTE name. Works just like table references) |
| * - Adding SQ done by: |
| * - copying AST of CTE |
| * - setting ASTOrigin on cloned AST. |
| * - trigger phase 1 on new QBExpr. |
| * - update QB data structs: remove this as a table reference, move it to a SQ invocation. |
| */ |
| private void addCTEAsSubQuery(QB qb, String cteName, String cteAlias) |
| throws SemanticException { |
| cteAlias = cteAlias == null ? cteName : cteAlias; |
| CTEClause cte = findCTEFromName(qb, cteName); |
| ASTNode cteQryNode = cte.cteNode; |
| QBExpr cteQBExpr = new QBExpr(cteAlias); |
| doPhase1QBExpr(cteQryNode, cteQBExpr, qb.getId(), cteAlias, cte.withColList); |
| qb.rewriteCTEToSubq(cteAlias, cteName, cteQBExpr); |
| } |
| |
| private final CTEClause rootClause = new CTEClause(null, null, null); |
| |
| @Override |
| public List<Task<?>> getAllRootTasks() { |
| if (!rootTasksResolved) { |
| rootTasks = toRealRootTasks(rootClause.asExecutionOrder()); |
| rootTasksResolved = true; |
| } |
| return rootTasks; |
| } |
| |
| @Override |
| public Set<ReadEntity> getAllInputs() { |
| Set<ReadEntity> readEntities = new HashSet<ReadEntity>(getInputs()); |
| for (CTEClause cte : rootClause.asExecutionOrder()) { |
| if (cte.source != null) { |
| readEntities.addAll(cte.source.getInputs()); |
| } |
| } |
| return readEntities; |
| } |
| |
| @Override |
| public Set<WriteEntity> getAllOutputs() { |
| Set<WriteEntity> writeEntities = new HashSet<WriteEntity>(getOutputs()); |
| for (CTEClause cte : rootClause.asExecutionOrder()) { |
| if (cte.source != null) { |
| writeEntities.addAll(cte.source.getOutputs()); |
| } |
| } |
| return writeEntities; |
| } |
| |
| class CTEClause { |
| CTEClause(String alias, ASTNode cteNode, ASTNode withColList) { |
| this.alias = alias; |
| this.cteNode = cteNode; |
| this.withColList = withColList; |
| } |
| String alias; |
| ASTNode cteNode; |
| ASTNode withColList; |
| boolean materialize; |
| int reference; |
| QBExpr qbExpr; |
| List<CTEClause> parents = new ArrayList<CTEClause>(); |
| |
| // materialized |
| SemanticAnalyzer source; |
| |
| List<Task<?>> getTasks() { |
| return source == null ? null : source.rootTasks; |
| } |
| |
| List<CTEClause> asExecutionOrder() { |
| List<CTEClause> execution = new ArrayList<CTEClause>(); |
| asExecutionOrder(new HashSet<CTEClause>(), execution); |
| return execution; |
| } |
| |
| void asExecutionOrder(Set<CTEClause> visited, List<CTEClause> execution) { |
| for (CTEClause parent : parents) { |
| if (visited.add(parent)) { |
| parent.asExecutionOrder(visited, execution); |
| } |
| } |
| execution.add(this); |
| } |
| |
| @Override |
| public String toString() { |
| return alias == null ? "<root>" : alias; |
| } |
| } |
| |
| private List<Task<?>> toRealRootTasks(List<CTEClause> execution) { |
| List<Task<?>> cteRoots = new ArrayList<>(); |
| List<Task<?>> cteLeafs = new ArrayList<>(); |
| List<Task<?>> curTopRoots = null; |
| List<Task<?>> curBottomLeafs = null; |
| for (CTEClause current : execution) { |
| if (current.parents.isEmpty() && curTopRoots != null) { |
| cteRoots.addAll(curTopRoots); |
| cteLeafs.addAll(curBottomLeafs); |
| curTopRoots = curBottomLeafs = null; |
| } |
| List<Task<?>> curTasks = current.getTasks(); |
| if (curTasks == null) { |
| continue; |
| } |
| if (curTopRoots == null) { |
| curTopRoots = curTasks; |
| } |
| if (curBottomLeafs != null) { |
| for (Task<?> topLeafTask : curBottomLeafs) { |
| for (Task<?> currentRootTask : curTasks) { |
| topLeafTask.addDependentTask(currentRootTask); |
| } |
| } |
| } |
| curBottomLeafs = Task.findLeafs(curTasks); |
| } |
| if (curTopRoots != null) { |
| cteRoots.addAll(curTopRoots); |
| cteLeafs.addAll(curBottomLeafs); |
| } |
| |
| if (cteRoots.isEmpty()) { |
| return rootTasks; |
| } |
| for (Task<?> cteLeafTask : cteLeafs) { |
| for (Task<?> mainRootTask : rootTasks) { |
| cteLeafTask.addDependentTask(mainRootTask); |
| } |
| } |
| return cteRoots; |
| } |
| |
| Table materializeCTE(String cteName, CTEClause cte) throws HiveException { |
| |
| ASTNode createTable = new ASTNode(new ClassicToken(HiveParser.TOK_CREATETABLE)); |
| |
| ASTNode tableName = new ASTNode(new ClassicToken(HiveParser.TOK_TABNAME)); |
| tableName.addChild(new ASTNode(new ClassicToken(HiveParser.Identifier, cteName))); |
| |
| ASTNode temporary = new ASTNode(new ClassicToken(HiveParser.KW_TEMPORARY, MATERIALIZATION_MARKER)); |
| |
| createTable.addChild(tableName); |
| createTable.addChild(temporary); |
| createTable.addChild(cte.cteNode); |
| |
| SemanticAnalyzer analyzer = new SemanticAnalyzer(queryState); |
| analyzer.initCtx(ctx); |
| analyzer.init(false); |
| |
| // should share cte contexts |
| analyzer.aliasToCTEs.putAll(aliasToCTEs); |
| |
| HiveOperation operation = queryState.getHiveOperation(); |
| try { |
| analyzer.analyzeInternal(createTable); |
| } finally { |
| queryState.setCommandType(operation); |
| } |
| |
| Table table = analyzer.tableDesc.toTable(conf); |
| Path location = table.getDataLocation(); |
| try { |
| location.getFileSystem(conf).mkdirs(location); |
| } catch (IOException e) { |
| throw new HiveException(e); |
| } |
| table.setMaterializedTable(true); |
| |
| LOG.info("{} will be materialized into {}", cteName, location); |
| cte.source = analyzer; |
| |
| ctx.addMaterializedTable(cteName, table); |
| |
| return table; |
| } |
| |
| |
| static boolean isJoinToken(ASTNode node) { |
| return (node.getToken().getType() == HiveParser.TOK_JOIN) |
| || (node.getToken().getType() == HiveParser.TOK_CROSSJOIN) |
| || isOuterJoinToken(node) |
| || (node.getToken().getType() == HiveParser.TOK_LEFTSEMIJOIN) |
| || (node.getToken().getType() == HiveParser.TOK_LEFTANTISEMIJOIN) |
| || (node.getToken().getType() == HiveParser.TOK_UNIQUEJOIN); |
| } |
| |
| static private boolean isOuterJoinToken(ASTNode node) { |
| return (node.getToken().getType() == HiveParser.TOK_LEFTOUTERJOIN) |
| || (node.getToken().getType() == HiveParser.TOK_RIGHTOUTERJOIN) |
| || (node.getToken().getType() == HiveParser.TOK_FULLOUTERJOIN); |
| } |
| |
| /** |
| * Given the AST with TOK_JOIN as the root, get all the aliases for the tables |
| * or subqueries in the join. |
| * |
| * @param qb |
| * @param join |
| * @throws SemanticException |
| */ |
| @SuppressWarnings("nls") |
| private void processJoin(QB qb, ASTNode join) throws SemanticException { |
| int numChildren = join.getChildCount(); |
| if ((numChildren != 2) && (numChildren != 3) && (numChildren != 4) |
| && join.getToken().getType() != HiveParser.TOK_UNIQUEJOIN) { |
| throw new SemanticException(generateErrorMessage(join, |
| "Join with multiple children")); |
| } |
| |
| queryProperties.incrementJoinCount(isOuterJoinToken(join)); |
| for (int num = 0; num < numChildren; num++) { |
| ASTNode child = (ASTNode) join.getChild(num); |
| if (child.getToken().getType() == HiveParser.TOK_TABREF) { |
| processTable(qb, child); |
| } else if (child.getToken().getType() == HiveParser.TOK_SUBQUERY) { |
| processSubQuery(qb, child); |
| } else if (child.getToken().getType() == HiveParser.TOK_PTBLFUNCTION) { |
| queryProperties.setHasPTF(true); |
| processPTF(qb, child); |
| PTFInvocationSpec ptfInvocationSpec = qb.getPTFInvocationSpec(child); |
| String inputAlias = ptfInvocationSpec == null ? null : |
| ptfInvocationSpec.getFunction().getAlias();; |
| if ( inputAlias == null ) { |
| throw new SemanticException(generateErrorMessage(child, |
| "PTF invocation in a Join must have an alias")); |
| } |
| |
| } else if (child.getToken().getType() == HiveParser.TOK_LATERAL_VIEW || |
| child.getToken().getType() == HiveParser.TOK_LATERAL_VIEW_OUTER) { |
| // SELECT * FROM src1 LATERAL VIEW udtf() AS myTable JOIN src2 ... |
| // is not supported. Instead, the lateral view must be in a subquery |
| // SELECT * FROM (SELECT * FROM src1 LATERAL VIEW udtf() AS myTable) a |
| // JOIN src2 ... |
| throw new SemanticException(ASTErrorUtils.getMsg( |
| ErrorMsg.LATERAL_VIEW_WITH_JOIN.getMsg(), join)); |
| } else if (isJoinToken(child)) { |
| processJoin(qb, child); |
| } |
| } |
| } |
| |
| /** |
| * Given the AST with TOK_LATERAL_VIEW as the root, get the alias for the |
| * table or subquery in the lateral view and also make a mapping from the |
| * alias to all the lateral view AST's. |
| * |
| * @param qb |
| * @param lateralView |
| * @return the alias for the table/subquery |
| * @throws SemanticException |
| */ |
| |
| private String processLateralView(QB qb, ASTNode lateralView) |
| throws SemanticException { |
| int numChildren = lateralView.getChildCount(); |
| assert (numChildren == 2); |
| |
| if (!isCBOSupportedLateralView(lateralView)) { |
| queryProperties.setCBOSupportedLateralViews(false); |
| } |
| |
| ASTNode next = (ASTNode) lateralView.getChild(1); |
| String alias = null; |
| switch (next.getToken().getType()) { |
| case HiveParser.TOK_TABREF: |
| alias = processTable(qb, next); |
| break; |
| case HiveParser.TOK_SUBQUERY: |
| alias = processSubQuery(qb, next); |
| break; |
| case HiveParser.TOK_LATERAL_VIEW: |
| case HiveParser.TOK_LATERAL_VIEW_OUTER: |
| alias = processLateralView(qb, next); |
| break; |
| default: |
| throw new SemanticException(ASTErrorUtils.getMsg( |
| ErrorMsg.LATERAL_VIEW_INVALID_CHILD.getMsg(), lateralView)); |
| } |
| alias = alias.toLowerCase(); |
| qb.getParseInfo().addLateralViewForAlias(alias, lateralView); |
| qb.addAlias(alias); |
| return alias; |
| } |
| |
| /** |
| * Phase 1: (including, but not limited to): |
| * |
| * 1. Gets all the aliases for all the tables / subqueries and makes the |
| * appropriate mapping in aliasToTabs, aliasToSubq 2. Gets the location of the |
| * destination and names the clause "inclause" + i 3. Creates a map from a |
| * string representation of an aggregation tree to the actual aggregation AST |
| * 4. Creates a mapping from the clause name to the select expression AST in |
| * destToSelExpr 5. Creates a mapping from a table alias to the lateral view |
| * AST's in aliasToLateralViews |
| * |
| * @param ast |
| * @param qb |
| * @param ctx_1 |
| * @throws SemanticException |
| */ |
| @SuppressWarnings({"fallthrough", "nls"}) |
| boolean doPhase1(ASTNode ast, QB qb, Phase1Ctx ctx_1, PlannerContext plannerCtx) |
| throws SemanticException { |
| |
| boolean phase1Result = true; |
| QBParseInfo qbp = qb.getParseInfo(); |
| boolean skipRecursion = false; |
| |
| if (ast.getToken() != null) { |
| skipRecursion = true; |
| switch (ast.getToken().getType()) { |
| case HiveParser.TOK_SELECTDI: |
| qb.countSelDi(); |
| // fall through |
| case HiveParser.TOK_SELECT: |
| qb.countSel(); |
| qbp.setSelExprForClause(ctx_1.dest, ast); |
| |
| int posn = 0; |
| if (((ASTNode) ast.getChild(0)).getType() == HiveParser.QUERY_HINT) { |
| posn = processQueryHint((ASTNode)ast.getChild(0), qbp, posn); |
| } |
| |
| if ((ast.getChild(posn).getChild(0).getType() == HiveParser.TOK_TRANSFORM)) { |
| queryProperties.setUsesScript(true); |
| } |
| |
| Map<String, ASTNode> aggregations = doPhase1GetAggregationsFromSelect(ast, qb, ctx_1.dest); |
| doPhase1GetColumnAliasesFromSelect(ast, qbp, ctx_1.dest); |
| qbp.setAggregationExprsForClause(ctx_1.dest, aggregations); |
| qbp.setDistinctFuncExprsForClause(ctx_1.dest, |
| doPhase1GetDistinctFuncExprs(aggregations)); |
| break; |
| |
| case HiveParser.TOK_WHERE: |
| qbp.setWhrExprForClause(ctx_1.dest, ast); |
| if (!SubQueryUtils.findSubQueries((ASTNode) ast.getChild(0)).isEmpty()) { |
| queryProperties.setFilterWithSubQuery(true); |
| } |
| doPhase1WhereClause(ast, qb); |
| break; |
| |
| case HiveParser.TOK_INSERT_INTO: |
| String currentDatabase = SessionState.get().getCurrentDatabase(); |
| String tab_name = getUnescapedName((ASTNode) ast.getChild(0).getChild(0), currentDatabase); |
| qbp.addInsertIntoTable(tab_name, ast); |
| |
| case HiveParser.TOK_DESTINATION: |
| ctx_1.dest = this.ctx.getDestNamePrefix(ast, qb).toString() + ctx_1.nextNum; |
| ctx_1.nextNum++; |
| boolean isTmpFileDest = false; |
| if (ast.getChildCount() > 0 && ast.getChild(0) instanceof ASTNode) { |
| ASTNode ch = (ASTNode) ast.getChild(0); |
| if (ch.getToken().getType() == HiveParser.TOK_DIR && ch.getChildCount() > 0 |
| && ch.getChild(0) instanceof ASTNode) { |
| ch = (ASTNode) ch.getChild(0); |
| isTmpFileDest = ch.getToken().getType() == HiveParser.TOK_TMP_FILE; |
| if (ch.getToken().getType() == HiveParser.StringLiteral) { |
| qbp.setInsertOverwriteDirectory(true); |
| } |
| } else { |
| if (ast.getToken().getType() == HiveParser.TOK_DESTINATION |
| && ast.getChild(0).getType() == HiveParser.TOK_TAB) { |
| String fullTableName = getUnescapedName((ASTNode) ast.getChild(0).getChild(0), |
| SessionState.get().getCurrentDatabase()); |
| qbp.getInsertOverwriteTables().put(fullTableName.toLowerCase(), ast); |
| qbp.setDestToOpType(ctx_1.dest, true); |
| } |
| } |
| } |
| |
| // is there a insert in the subquery |
| if (qbp.getIsSubQ() && !isTmpFileDest) { |
| throw new SemanticException(ASTErrorUtils.getMsg( |
| ErrorMsg.NO_INSERT_INSUBQUERY.getMsg(), ast)); |
| } |
| |
| qbp.setDestForClause(ctx_1.dest, (ASTNode) ast.getChild(0)); |
| handleInsertStatementSpecPhase1(ast, qbp, ctx_1); |
| |
| if (qbp.getClauseNamesForDest().size() == 2) { |
| // From the moment that we have two destination clauses, |
| // we know that this is a multi-insert query. |
| // Thus, set property to right value. |
| // Using qbp.getClauseNamesForDest().size() >= 2 would be |
| // equivalent, but we use == to avoid setting the property |
| // multiple times |
| queryProperties.setMultiDestQuery(true); |
| } |
| |
| if (plannerCtx != null && !queryProperties.hasMultiDestQuery()) { |
| plannerCtx.setInsertToken(ast, isTmpFileDest); |
| } else if (plannerCtx != null && qbp.getClauseNamesForDest().size() == 2) { |
| // For multi-insert query, currently we only optimize the FROM clause. |
| // Hence, introduce multi-insert token on top of it. |
| // However, first we need to reset existing token (insert). |
| // Using qbp.getClauseNamesForDest().size() >= 2 would be |
| // equivalent, but we use == to avoid setting the property |
| // multiple times |
| plannerCtx.resetToken(); |
| plannerCtx.setMultiInsertToken((ASTNode) qbp.getQueryFrom().getChild(0)); |
| } |
| break; |
| |
| case HiveParser.TOK_FROM: |
| int child_count = ast.getChildCount(); |
| if (child_count != 1) { |
| throw new SemanticException(generateErrorMessage(ast, |
| "Multiple Children " + child_count)); |
| } |
| |
| if (!qbp.getIsSubQ()) { |
| qbp.setQueryFromExpr(ast); |
| } |
| |
| // Check if this is a subquery / lateral view |
| ASTNode frm = (ASTNode) ast.getChild(0); |
| if (frm.getToken().getType() == HiveParser.TOK_TABREF) { |
| processTable(qb, frm); |
| } else if (frm.getToken().getType() == HiveParser.TOK_SUBQUERY) { |
| processSubQuery(qb, frm); |
| } else if (frm.getToken().getType() == HiveParser.TOK_LATERAL_VIEW || |
| frm.getToken().getType() == HiveParser.TOK_LATERAL_VIEW_OUTER) { |
| queryProperties.setHasLateralViews(true); |
| processLateralView(qb, frm); |
| } else if (isJoinToken(frm)) { |
| processJoin(qb, frm); |
| qbp.setJoinExpr(frm); |
| }else if(frm.getToken().getType() == HiveParser.TOK_PTBLFUNCTION){ |
| queryProperties.setHasPTF(true); |
| processPTF(qb, frm); |
| } |
| break; |
| |
| case HiveParser.TOK_CLUSTERBY: |
| // Get the clusterby aliases - these are aliased to the entries in the |
| // select list |
| queryProperties.setHasClusterBy(true); |
| qbp.setClusterByExprForClause(ctx_1.dest, ast); |
| break; |
| |
| case HiveParser.TOK_DISTRIBUTEBY: |
| // Get the distribute by aliases - these are aliased to the entries in |
| // the select list |
| queryProperties.setHasDistributeBy(true); |
| qbp.setDistributeByExprForClause(ctx_1.dest, ast); |
| if (qbp.getClusterByForClause(ctx_1.dest) != null) { |
| throw new SemanticException(generateErrorMessage(ast, |
| ErrorMsg.CLUSTERBY_DISTRIBUTEBY_CONFLICT.getMsg())); |
| } else if (qbp.getOrderByForClause(ctx_1.dest) != null) { |
| throw new SemanticException(generateErrorMessage(ast, |
| ErrorMsg.ORDERBY_DISTRIBUTEBY_CONFLICT.getMsg())); |
| } |
| break; |
| |
| case HiveParser.TOK_SORTBY: |
| // Get the sort by aliases - these are aliased to the entries in the |
| // select list |
| queryProperties.setHasSortBy(true); |
| qbp.setSortByExprForClause(ctx_1.dest, ast); |
| if (qbp.getClusterByForClause(ctx_1.dest) != null) { |
| throw new SemanticException(generateErrorMessage(ast, |
| ErrorMsg.CLUSTERBY_SORTBY_CONFLICT.getMsg())); |
| } else if (qbp.getOrderByForClause(ctx_1.dest) != null) { |
| throw new SemanticException(generateErrorMessage(ast, |
| ErrorMsg.ORDERBY_SORTBY_CONFLICT.getMsg())); |
| } |
| |
| break; |
| |
| case HiveParser.TOK_ORDERBY: |
| // Get the order by aliases - these are aliased to the entries in the |
| // select list |
| queryProperties.setHasOrderBy(true); |
| qbp.setOrderByExprForClause(ctx_1.dest, ast); |
| if (qbp.getClusterByForClause(ctx_1.dest) != null) { |
| throw new SemanticException(generateErrorMessage(ast, |
| ErrorMsg.CLUSTERBY_ORDERBY_CONFLICT.getMsg())); |
| } |
| // If there are aggregations in order by, we need to remember them in qb. |
| qbp.addAggregationExprsForClause(ctx_1.dest, |
| doPhase1GetAggregationsFromSelect(ast, qb, ctx_1.dest)); |
| break; |
| |
| case HiveParser.TOK_GROUPBY: |
| case HiveParser.TOK_ROLLUP_GROUPBY: |
| case HiveParser.TOK_CUBE_GROUPBY: |
| case HiveParser.TOK_GROUPING_SETS: |
| // Get the groupby aliases - these are aliased to the entries in the |
| // select list |
| queryProperties.setHasGroupBy(true); |
| if (qbp.getJoinExpr() != null) { |
| queryProperties.setHasJoinFollowedByGroupBy(true); |
| } |
| qbp.setGroupByExprForClause(ctx_1.dest, ast); |
| skipRecursion = true; |
| |
| // Rollup and Cubes are syntactic sugar on top of grouping sets |
| if (ast.getToken().getType() == HiveParser.TOK_ROLLUP_GROUPBY) { |
| qbp.getDestRollups().add(ctx_1.dest); |
| } else if (ast.getToken().getType() == HiveParser.TOK_CUBE_GROUPBY) { |
| qbp.getDestCubes().add(ctx_1.dest); |
| } else if (ast.getToken().getType() == HiveParser.TOK_GROUPING_SETS) { |
| qbp.getDestGroupingSets().add(ctx_1.dest); |
| } |
| break; |
| |
| case HiveParser.TOK_HAVING: |
| qbp.setHavingExprForClause(ctx_1.dest, ast); |
| qbp.addAggregationExprsForClause(ctx_1.dest, |
| doPhase1GetAggregationsFromSelect(ast, qb, ctx_1.dest)); |
| // Clause might also refer to aggregations with distinct |
| qbp.setDistinctFuncExprsForClause(ctx_1.dest, |
| doPhase1GetDistinctFuncExprs(qbp.getAggregationExprsForClause(ctx_1.dest))); |
| break; |
| |
| case HiveParser.TOK_QUALIFY: |
| qbp.setQualifyExprForClause(ctx_1.dest, ast); |
| qbp.addAggregationExprsForClause(ctx_1.dest, |
| doPhase1GetAggregationsFromSelect(ast, qb, ctx_1.dest)); |
| break; |
| |
| case HiveParser.KW_WINDOW: |
| if (!qb.hasWindowingSpec(ctx_1.dest) ) { |
| throw new SemanticException(generateErrorMessage(ast, |
| "Query has no Cluster/Distribute By; but has a Window definition")); |
| } |
| handleQueryWindowClauses(qb, ctx_1, ast); |
| break; |
| |
| case HiveParser.TOK_LIMIT: |
| queryProperties.setHasLimit(true); |
| if (ast.getChildCount() == 2) { |
| qbp.setDestLimit(ctx_1.dest, |
| Integer.valueOf(ast.getChild(0).getText()), Integer.valueOf(ast.getChild(1).getText())); |
| } else { |
| qbp.setDestLimit(ctx_1.dest, Integer.valueOf(0), Integer.valueOf(ast.getChild(0).getText())); |
| } |
| break; |
| |
| case HiveParser.TOK_ANALYZE: |
| // Case of analyze command |
| |
| String table_name = getUnescapedName((ASTNode) ast.getChild(0).getChild(0)).toLowerCase(); |
| |
| |
| qb.setTabAlias(table_name, table_name); |
| qb.addAlias(table_name); |
| qb.getParseInfo().setIsAnalyzeCommand(true); |
| qb.getParseInfo().setNoScanAnalyzeCommand(this.noscan); |
| // Allow analyze the whole table and dynamic partitions |
| HiveConf.setVar(conf, HiveConf.ConfVars.DYNAMICPARTITIONINGMODE, "nonstrict"); |
| HiveConf.setVar(conf, HiveConf.ConfVars.HIVEMAPREDMODE, "nonstrict"); |
| |
| break; |
| |
| case HiveParser.TOK_UNIONALL: |
| if (!qbp.getIsSubQ()) { |
| // this shouldn't happen. The parser should have converted the union to be |
| // contained in a subquery. Just in case, we keep the error as a fallback. |
| throw new SemanticException(generateErrorMessage(ast, |
| ErrorMsg.UNION_NOTIN_SUBQ.getMsg())); |
| } |
| skipRecursion = false; |
| break; |
| |
| case HiveParser.TOK_INSERT: |
| ASTNode destination = (ASTNode) ast.getChild(0); |
| Tree tab = destination.getChild(0); |
| |
| // Proceed if AST contains partition & If Not Exists |
| if (destination.getChildCount() == 2 && |
| tab.getChildCount() == 2 && |
| destination.getChild(1).getType() == HiveParser.TOK_IFNOTEXISTS) { |
| final String tableName = getUnescapedName((ASTNode) tab.getChild(0), SessionState.get().getCurrentDatabase()); |
| |
| Tree partitions = tab.getChild(1); |
| int childCount = partitions.getChildCount(); |
| Map<String, String> partition = new HashMap<String, String>(); |
| for (int i = 0; i < childCount; i++) { |
| String partitionName = partitions.getChild(i).getChild(0).getText(); |
| // Convert to lowercase for the comparison |
| partitionName = partitionName.toLowerCase(); |
| Tree pvalue = partitions.getChild(i).getChild(1); |
| if (pvalue == null) { |
| break; |
| } |
| String partitionVal = stripQuotes(pvalue.getText()); |
| partition.put(partitionName, partitionVal); |
| } |
| // if it is a dynamic partition throw the exception |
| if (childCount != partition.size()) { |
| throw new SemanticException(ErrorMsg.INSERT_INTO_DYNAMICPARTITION_IFNOTEXISTS |
| .getMsg(partition.toString())); |
| } |
| Table table = null; |
| try { |
| table = getTableObjectByName(tableName); |
| } catch (HiveException ex) { |
| throw new SemanticException(ex); |
| } |
| try { |
| Partition parMetaData = db.getPartition(table, partition, false); |
| // Check partition exists if it exists skip the overwrite |
| if (parMetaData != null) { |
| phase1Result = false; |
| skipRecursion = true; |
| LOG.info("Partition already exists so insert into overwrite " + |
| "skipped for partition : {}", parMetaData); |
| break; |
| } |
| } catch (HiveException e) { |
| LOG.info("Error while getting metadata : ", e); |
| } |
| validatePartSpec(table, partition, (ASTNode)tab, conf, false); |
| } |
| skipRecursion = false; |
| break; |
| case HiveParser.TOK_LATERAL_VIEW: |
| case HiveParser.TOK_LATERAL_VIEW_OUTER: |
| // todo: nested LV |
| assert ast.getChildCount() == 1; |
| qb.getParseInfo().getDestToLateralView().put(ctx_1.dest, ast); |
| break; |
| case HiveParser.TOK_CTE: |
| processCTE(qb, ast); |
| break; |
| case HiveParser.QUERY_HINT: |
| processQueryHint(ast, qbp, 0); |
| default: |
| skipRecursion = false; |
| break; |
| } |
| } |
| |
| if (!skipRecursion) { |
| // Iterate over the rest of the children |
| int child_count = ast.getChildCount(); |
| for (int child_pos = 0; child_pos < child_count && phase1Result; ++child_pos) { |
| // Recurse |
| phase1Result = doPhase1((ASTNode) ast.getChild(child_pos), qb, ctx_1, plannerCtx); |
| } |
| } |
| return phase1Result; |
| } |
| |
| private int processQueryHint(ASTNode ast, QBParseInfo qbp, int posn) throws SemanticException{ |
| ParseDriver pd = new ParseDriver(); |
| String queryHintStr = ast.getText(); |
| LOG.debug("QUERY HINT: {} ", queryHintStr); |
| try { |
| ASTNode hintNode = pd.parseHint(queryHintStr); |
| qbp.setHints(hintNode); |
| } catch (ParseException e) { |
| throw new SemanticException("failed to parse query hint: "+e.getMessage(), e); |
| } |
| return posn + 1; |
| } |
| |
| /** |
| * This is phase1 of supporting specifying schema in insert statement |
| * insert into foo(z,y) select a,b from bar; |
| * @see #handleInsertStatementSpec(java.util.List, String, RowResolver, QB, ASTNode) |
| * @throws SemanticException |
| */ |
| private void handleInsertStatementSpecPhase1(ASTNode ast, QBParseInfo qbp, Phase1Ctx ctx_1) throws SemanticException { |
| ASTNode tabColName = (ASTNode)ast.getChild(1); |
| if(ast.getType() == HiveParser.TOK_INSERT_INTO && tabColName != null && tabColName.getType() == HiveParser.TOK_TABCOLNAME) { |
| //we have "insert into foo(a,b)..."; parser will enforce that 1+ columns are listed if TOK_TABCOLNAME is present |
| String fullTableName = getUnescapedName((ASTNode) ast.getChild(0).getChild(0), |
| SessionState.get().getCurrentDatabase()); |
| List<String> targetColumnNames = processTableColumnNames(tabColName, fullTableName); |
| qbp.setDestSchemaForClause(ctx_1.dest, targetColumnNames); |
| Table targetTable; |
| try { |
| targetTable = getTableObjectByName(fullTableName); |
| } catch (HiveException ex) { |
| LOG.error("Error processing HiveParser.TOK_DESTINATION: " + ex.getMessage(), ex); |
| throw new SemanticException(ex); |
| } |
| if(targetTable == null) { |
| throw new SemanticException(generateErrorMessage(ast, |
| "Unable to access metadata for table " + fullTableName)); |
| } |
| Set<String> targetColumns = new HashSet<>(targetColumnNames); |
| for(FieldSchema f : targetTable.getCols()) { |
| //parser only allows foo(a,b), not foo(foo.a, foo.b) |
| targetColumns.remove(f.getName()); |
| } |
| if(!targetColumns.isEmpty()) {//here we need to see if remaining columns are dynamic partition columns |
| /* We just checked the user specified schema columns among regular table column and found some which are not |
| 'regular'. Now check is they are dynamic partition columns |
| For dynamic partitioning, |
| Given "create table multipart(a int, b int) partitioned by (c int, d int);" |
| for "insert into multipart partition(c='1',d)(d,a) values(2,3);" we expect parse tree to look like this |
| (TOK_INSERT_INTO |
| (TOK_TAB |
| (TOK_TABNAME multipart) |
| (TOK_PARTSPEC |
| (TOK_PARTVAL c '1') |
| (TOK_PARTVAL d) |
| ) |
| ) |
| (TOK_TABCOLNAME d a) |
| )*/ |
| List<String> dynamicPartitionColumns = new ArrayList<String>(); |
| if(ast.getChild(0) != null && ast.getChild(0).getType() == HiveParser.TOK_TAB) { |
| ASTNode tokTab = (ASTNode)ast.getChild(0); |
| ASTNode tokPartSpec = (ASTNode)tokTab.getFirstChildWithType(HiveParser.TOK_PARTSPEC); |
| if(tokPartSpec != null) { |
| for(Node n : tokPartSpec.getChildren()) { |
| ASTNode tokPartVal = null; |
| if(n instanceof ASTNode) { |
| tokPartVal = (ASTNode)n; |
| } |
| if(tokPartVal != null && tokPartVal.getType() == HiveParser.TOK_PARTVAL && tokPartVal.getChildCount() == 1) { |
| assert tokPartVal.getChild(0).getType() == HiveParser.Identifier : |
| "Expected column name; found tokType=" + tokPartVal.getType(); |
| dynamicPartitionColumns.add(tokPartVal.getChild(0).getText()); |
| } |
| } |
| for(String colName : dynamicPartitionColumns) { |
| targetColumns.remove(colName); |
| } |
| } else { |
| // partition spec is not specified but column schema can have partitions specified |
| for(FieldSchema f : targetTable.getPartCols()) { |
| //parser only allows foo(a,b), not foo(foo.a, foo.b) |
| targetColumns.remove(f.getName()); |
| } |
| } |
| } |
| |
| if(!targetColumns.isEmpty()) { |
| //Found some columns in user specified schema which are neither regular not dynamic partition columns |
| throw new SemanticException(generateErrorMessage(tabColName, |
| "'" + (targetColumns.size() == 1 ? targetColumns.iterator().next() : targetColumns) + |
| "' in insert schema specification " + (targetColumns.size() == 1 ? "is" : "are") + |
| " not found among regular columns of " + |
| fullTableName + " nor dynamic partition columns.")); |
| } |
| } |
| } |
| } |
| |
| protected List<String> processTableColumnNames(ASTNode tabColName, String tableName) throws SemanticException { |
| if (tabColName == null) { |
| return Collections.emptyList(); |
| } |
| List<String> targetColNames = new ArrayList<>(tabColName.getChildren().size()); |
| for(Node col : tabColName.getChildren()) { |
| assert ((ASTNode)col).getType() == HiveParser.Identifier : |
| "expected token " + HiveParser.Identifier + " found " + ((ASTNode)col).getType(); |
| targetColNames.add(((ASTNode)col).getText().toLowerCase()); |
| } |
| Set<String> targetColumns = new HashSet<>(targetColNames); |
| if(targetColNames.size() != targetColumns.size()) { |
| throw new SemanticException(generateErrorMessage(tabColName, |
| "Duplicate column name detected in " + tableName + " table schema specification")); |
| } |
| return targetColNames; |
| } |
| |
| private void getMaterializationMetadata(QB qb) throws SemanticException { |
| if (qb.isCTAS()) { |
| return; |
| } |
| try { |
| gatherCTEReferences(qb, rootClause); |
| int threshold = HiveConf.getIntVar(conf, HiveConf.ConfVars.HIVE_CTE_MATERIALIZE_THRESHOLD); |
| for (CTEClause cte : Sets.newHashSet(aliasToCTEs.values())) { |
| if (threshold >= 0 && cte.reference >= threshold) { |
| cte.materialize = !HiveConf.getBoolVar(conf, ConfVars.HIVE_CTE_MATERIALIZE_FULL_AGGREGATE_ONLY) |
| || cte.qbExpr.getQB().getParseInfo().isFullyAggregate(); |
| } |
| } |
| } catch (HiveException e) { |
| LOG.error("Failed to get Materialization Metadata", e); |
| if (e instanceof SemanticException) { |
| throw (SemanticException)e; |
| } |
| throw new SemanticException(e.getMessage(), e); |
| } |
| } |
| |
| private void gatherCTEReferences(QBExpr qbexpr, CTEClause parent) throws HiveException { |
| if (qbexpr.getOpcode() == QBExpr.Opcode.NULLOP) { |
| gatherCTEReferences(qbexpr.getQB(), parent); |
| } else { |
| gatherCTEReferences(qbexpr.getQBExpr1(), parent); |
| gatherCTEReferences(qbexpr.getQBExpr2(), parent); |
| } |
| } |
| |
| // TODO: check view references, too |
| private void gatherCTEReferences(QB qb, CTEClause current) throws HiveException { |
| for (String alias : qb.getTabAliases()) { |
| String tabName = qb.getTabNameForAlias(alias); |
| String cteName = tabName.toLowerCase(); |
| |
| CTEClause cte = findCTEFromName(qb, cteName); |
| if (cte != null) { |
| if (ctesExpanded.contains(cteName)) { |
| throw new SemanticException("Recursive cte " + cteName + |
| " detected (cycle: " + StringUtils.join(ctesExpanded, " -> ") + |
| " -> " + cteName + ")."); |
| } |
| cte.reference++; |
| current.parents.add(cte); |
| if (cte.qbExpr != null) { |
| continue; |
| } |
| cte.qbExpr = new QBExpr(cteName); |
| doPhase1QBExpr(cte.cteNode, cte.qbExpr, qb.getId(), cteName, cte.withColList); |
| |
| ctesExpanded.add(cteName); |
| gatherCTEReferences(cte.qbExpr, cte); |
| ctesExpanded.remove(ctesExpanded.size() - 1); |
| } |
| } |
| for (String alias : qb.getSubqAliases()) { |
| gatherCTEReferences(qb.getSubqForAlias(alias), current); |
| } |
| for (String alias : qb.getSubqExprAliases()) { |
| gatherCTEReferences(qb.getSubqExprForAlias(alias), current); |
| } |
| } |
| |
| void getMetaData(QB qb) throws SemanticException { |
| getMetaData(qb, false); |
| } |
| |
| private void getMetaData(QB qb, boolean enableMaterialization) throws SemanticException { |
| try { |
| if (enableMaterialization) { |
| getMaterializationMetadata(qb); |
| } |
| getMetaData(qb, null); |
| } catch (HiveException e) { |
| if (e instanceof SemanticException) { |
| throw (SemanticException)e; |
| } |
| throw new SemanticException(e.getMessage(), e); |
| } |
| } |
| |
| private void getMetaData(QBExpr qbexpr, ReadEntity parentInput) |
| throws HiveException { |
| if (qbexpr.getOpcode() == QBExpr.Opcode.NULLOP) { |
| getMetaData(qbexpr.getQB(), parentInput); |
| } else { |
| getMetaData(qbexpr.getQBExpr1(), parentInput); |
| getMetaData(qbexpr.getQBExpr2(), parentInput); |
| } |
| } |
| |
| @SuppressWarnings("nls") |
| private void getMetaData(QB qb, ReadEntity parentInput) |
| throws HiveException { |
| LOG.info("Get metadata for source tables"); |
| |
| // Go over the tables and populate the related structures. |
| // We have to materialize the table alias list since we might |
| // modify it in the middle for view rewrite. |
| List<String> tabAliases = new ArrayList<String>(qb.getTabAliases()); |
| |
| // Keep track of view alias to view name and read entity |
| // For eg: for a query like 'select * from V3', where V3 -> V2, V2 -> V1, V1 -> T |
| // keeps track of full view name and read entity corresponding to alias V3, V3:V2, V3:V2:V1. |
| // This is needed for tracking the dependencies for inputs, along with their parents. |
| Map<String, Pair<String, ReadEntity>> aliasToViewInfo = |
| new HashMap<String, Pair<String, ReadEntity>>(); |
| |
| /* |
| * used to capture view to SQ conversions. This is used to check for |
| * recursive CTE invocations. |
| */ |
| Map<String, String> sqAliasToCTEName = new HashMap<String, String>(); |
| |
| for (String alias : tabAliases) { |
| String tabName = qb.getTabNameForAlias(alias); |
| String cteName = tabName.toLowerCase(); |
| |
| // Get table details from tabNameToTabObject cache |
| Table tab = aliasToCTEs.containsKey(tabName)? null: getTableObjectByName(tabName, false); |
| if (tab != null) { |
| Table newTab = tab.makeCopy(); |
| tab = newTab; |
| } |
| if (tab == null || |
| tab.getDbName().equals(SessionState.get().getCurrentDatabase())) { |
| Table materializedTab = ctx.getMaterializedTable(cteName); |
| if (materializedTab == null) { |
| // we first look for this alias from CTE, and then from catalog. |
| CTEClause cte = findCTEFromName(qb, cteName); |
| if (cte != null) { |
| if (!cte.materialize) { |
| addCTEAsSubQuery(qb, cteName, alias); |
| sqAliasToCTEName.put(alias, cteName); |
| continue; |
| } |
| tab = materializeCTE(cteName, cte); |
| } |
| } else { |
| tab = materializedTab; |
| } |
| } |
| |
| if (tab == null) { |
| if(tabName.equals(DUMMY_DATABASE + "." + DUMMY_TABLE)) { |
| continue; |
| } |
| ASTNode src = qb.getParseInfo().getSrcForAlias(alias); |
| if (null != src) { |
| if (src.getChildCount() == 3) { |
| throw new SemanticException(ErrorMsg.INVALID_TABLE.getMsg() + " '" + src.getChild(2).getText() + "'"); |
| } |
| throw new SemanticException(ASTErrorUtils.getMsg(ErrorMsg.INVALID_TABLE.getMsg(), src)); |
| } else { |
| throw new SemanticException(ErrorMsg.INVALID_TABLE.getMsg(alias)); |
| } |
| } |
| |
| QBSystemVersion asOf = qb.getSystemVersionForAlias(alias); |
| if (asOf != null) { |
| if (!Optional.ofNullable(tab.getStorageHandler()).map(HiveStorageHandler::isTimeTravelAllowed).orElse(false)) { |
| throw new SemanticException(ErrorMsg.TIME_TRAVEL_NOT_ALLOWED, alias); |
| } |
| tab.setAsOfVersion(asOf.getAsOfVersion()); |
| tab.setVersionIntervalFrom(asOf.getFromVersion()); |
| tab.setAsOfTimestamp(asOf.getAsOfTime()); |
| } |
| |
| if (tab.isView()) { |
| if (qb.getParseInfo().isAnalyzeCommand()) { |
| throw new SemanticException(ErrorMsg.ANALYZE_VIEW.getMsg()); |
| } |
| String fullViewName = tab.getFullyQualifiedName(); |
| // Prevent view cycles |
| if (viewsExpanded.contains(fullViewName)) { |
| throw new SemanticException("Recursive view " + fullViewName + |
| " detected (cycle: " + StringUtils.join(viewsExpanded, " -> ") + |
| " -> " + fullViewName + ")."); |
| } |
| replaceViewReferenceWithDefinition(qb, tab, tabName, alias); |
| // This is the last time we'll see the Table objects for views, so add it to the inputs |
| // now. isInsideView will tell if this view is embedded in another view. |
| // If the view is Inside another view, it should have at least one parent |
| if (qb.isInsideView() && parentInput == null) { |
| parentInput = PlanUtils.getParentViewInfo(getAliasId(alias, qb), viewAliasToInput); |
| } |
| ReadEntity viewInput = new ReadEntity(tab, parentInput, !qb.isInsideView()); |
| viewInput = PlanUtils.addInput(inputs, viewInput); |
| aliasToViewInfo.put(alias, Pair.of(fullViewName, viewInput)); |
| String aliasId = getAliasId(alias, qb); |
| if (aliasId != null) { |
| aliasId = aliasId.replace(SemanticAnalyzer.SUBQUERY_TAG_1, "") |
| .replace(SemanticAnalyzer.SUBQUERY_TAG_2, ""); |
| } |
| viewAliasToInput.put(aliasId, viewInput); |
| continue; |
| } |
| |
| if (!InputFormat.class.isAssignableFrom(tab.getInputFormatClass())) { |
| throw new SemanticException(generateErrorMessage( |
| qb.getParseInfo().getSrcForAlias(alias), |
| ErrorMsg.INVALID_INPUT_FORMAT_TYPE.getMsg())); |
| } |
| |
| qb.getMetaData().setSrcForAlias(alias, tab); |
| |
| if (qb.getParseInfo().isAnalyzeCommand()) { |
| // allow partial partition specification for nonscan since noscan is fast. |
| TableSpec ts = new TableSpec(db, conf, (ASTNode) ast.getChild(0), true, this.noscan); |
| if (ts.specType == SpecType.DYNAMIC_PARTITION) { // dynamic partitions |
| try { |
| ts.partitions = db.getPartitionsByNames(ts.tableHandle, ts.partSpec); |
| } catch (HiveException e) { |
| throw new SemanticException(generateErrorMessage( |
| qb.getParseInfo().getSrcForAlias(alias), |
| "Cannot get partitions for " + ts.partSpec), e); |
| } |
| } |
| |
| tab.setTableSpec(ts); |
| qb.getParseInfo().addTableSpec(alias, ts); |
| } |
| |
| ReadEntity parentViewInfo = PlanUtils.getParentViewInfo(getAliasId(alias, qb), viewAliasToInput); |
| // Temporary tables created during the execution are not the input sources |
| if (!PlanUtils.isValuesTempTable(alias)) { |
| PlanUtils.addInput(inputs, |
| new ReadEntity(tab, parentViewInfo, parentViewInfo == null), mergeIsDirect); |
| } |
| } |
| |
| LOG.info("Get metadata for subqueries"); |
| // Go over the subqueries and getMetaData for these |
| for (String alias : qb.getSubqAliases()) { |
| boolean wasView = aliasToViewInfo.containsKey(alias); |
| boolean wasCTE = sqAliasToCTEName.containsKey(alias); |
| ReadEntity newParentInput = null; |
| if (wasView) { |
| viewsExpanded.add(aliasToViewInfo.get(alias).getLeft()); |
| newParentInput = aliasToViewInfo.get(alias).getRight(); |
| } else if (wasCTE) { |
| ctesExpanded.add(sqAliasToCTEName.get(alias)); |
| } |
| QBExpr qbexpr = qb.getSubqForAlias(alias); |
| if (qbexpr.getQB() != null && (wasView || qb.isInsideView())) { |
| qbexpr.getQB().setInsideView(true); |
| } |
| getMetaData(qbexpr, newParentInput); |
| if (wasView) { |
| viewsExpanded.remove(viewsExpanded.size() - 1); |
| } else if (wasCTE) { |
| ctesExpanded.remove(ctesExpanded.size() - 1); |
| } |
| } |
| |
| RowFormatParams rowFormatParams = new RowFormatParams(); |
| StorageFormat storageFormat = new StorageFormat(conf); |
| |
| LOG.info("Get metadata for destination tables"); |
| // Go over all the destination structures and populate the related |
| // metadata |
| QBParseInfo qbp = qb.getParseInfo(); |
| |
| for (String name : qbp.getClauseNamesForDest()) { |
| ASTNode ast = qbp.getDestForClause(name); |
| switch (ast.getToken().getType()) { |
| case HiveParser.TOK_TAB: { |
| TableSpec ts = new TableSpec(db, conf, ast); |
| if (ts.tableHandle.isView() || |
| (mvRebuildMode == MaterializationRebuildMode.NONE && ts.tableHandle.isMaterializedView())) { |
| throw new SemanticException(ErrorMsg.DML_AGAINST_VIEW.getMsg()); |
| } |
| |
| Class<?> outputFormatClass = ts.tableHandle.getOutputFormatClass(); |
| if (!ts.tableHandle.isNonNative() && |
| !HiveOutputFormat.class.isAssignableFrom(outputFormatClass)) { |
| throw new SemanticException(ASTErrorUtils.getMsg( |
| ErrorMsg.INVALID_OUTPUT_FORMAT_TYPE.getMsg(), |
| ast, "The class is " + outputFormatClass.toString())); |
| } |
| |
| boolean isTableWrittenTo = qb.getParseInfo().isInsertIntoTable(ts.tableHandle.getDbName(), |
| ts.tableHandle.getTableName(), ts.tableHandle.getBranchName()); |
| isTableWrittenTo |= (qb.getParseInfo().getInsertOverwriteTables(). |
| get(getUnescapedName((ASTNode) ast.getChild(0), ts.tableHandle.getDbName()).toLowerCase()) != null); |
| assert isTableWrittenTo : |
| "Inconsistent data structure detected: we are writing to " + ts.tableHandle + " in " + |
| name + " but it's not in isInsertIntoTable() or getInsertOverwriteTables()"; |
| // Disallow update and delete on non-acid tables |
| final boolean isWriteOperation = updating(name) || deleting(name); |
| boolean isFullAcid = AcidUtils.isFullAcidTable(ts.tableHandle) || |
| AcidUtils.isNonNativeAcidTable(ts.tableHandle, isWriteOperation); |
| if (isWriteOperation && !isFullAcid) { |
| if (!AcidUtils.isInsertOnlyTable(ts.tableHandle)) { |
| // Whether we are using an acid compliant transaction manager has already been caught in |
| // UpdateDeleteSemanticAnalyzer, so if we are updating or deleting and getting nonAcid |
| // here, it means the table itself doesn't support it. |
| throw new SemanticException(ErrorMsg.ACID_OP_ON_NONACID_TABLE, ts.getTableName().getTable()); |
| } else { |
| throw new SemanticException(ErrorMsg.ACID_OP_ON_INSERTONLYTRAN_TABLE, ts.getTableName().getTable()); |
| } |
| } |
| // TableSpec ts is got from the query (user specified), |
| // which means the user didn't specify partitions in their query, |
| // but whether the table itself is partitioned is not know. |
| if (ts.specType != SpecType.STATIC_PARTITION) { |
| // This is a table or dynamic partition |
| qb.getMetaData().setDestForAlias(name, ts.tableHandle); |
| // has dynamic as well as static partitions |
| if (ts.partSpec != null && ts.partSpec.size() > 0) { |
| qb.getMetaData().setPartSpecForAlias(name, ts.partSpec); |
| } |
| } else { |
| // This is a partition |
| qb.getMetaData().setDestForAlias(name, ts.partHandle); |
| } |
| if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVESTATSAUTOGATHER)) { |
| // Add the table spec for the destination table. |
| qb.getParseInfo().addTableSpec(ts.getTableName().getTable().toLowerCase(), ts); |
| } |
| break; |
| } |
| |
| case HiveParser.TOK_DIR: { |
| // This is a dfs file |
| String fname = stripQuotes(ast.getChild(0).getText()); |
| if ((!qb.getParseInfo().getIsSubQ()) && (((ASTNode) ast.getChild(0)).getToken().getType() |
| == HiveParser.TOK_TMP_FILE)) { |
| |
| if (qb.isCTAS() || qb.isMaterializedView()) { |
| qb.setIsQuery(false); |
| ctx.setResDir(null); |
| ctx.setResFile(null); |
| |
| Path location; |
| // If the CTAS query does specify a location, use the table location, else use the db location |
| if (qb.isMaterializedView() && qb.getViewDesc() != null && qb.getViewDesc().getLocation() != null) { |
| location = new Path(qb.getViewDesc().getLocation()); |
| } else if (qb.isCTAS() && qb.getTableDesc() != null && qb.getTableDesc().getLocation() != null) { |
| location = new Path(qb.getTableDesc().getLocation()); |
| } else { |
| // allocate a temporary output dir on the location of the table |
| String tableName = getUnescapedName((ASTNode) ast.getChild(0)); |
| String[] names = Utilities.getDbTableName(tableName); |
| try { |
| Warehouse wh = new Warehouse(conf); |
| //Use destination table's db location. |
| String destTableDb = qb.getTableDesc() != null ? qb.getTableDesc().getDatabaseName() : null; |
| if (destTableDb == null) { |
| destTableDb = names[0]; |
| } |
| boolean useExternal = false; |
| if (qb.isMaterializedView()) { |
| useExternal = !AcidUtils.isTransactionalView(qb.getViewDesc()) && !makeAcid(); |
| } else { |
| useExternal = (qb.getTableDesc() == null || qb.getTableDesc().isTemporary() |
| || qb.getTableDesc().isExternal() || !makeAcid()); |
| } |
| if (useExternal) { |
| location = wh.getDatabaseExternalPath(db.getDatabase(destTableDb)); |
| } else { |
| location = wh.getDatabaseManagedPath(db.getDatabase(destTableDb)); |
| } |
| } catch (MetaException e) { |
| throw new SemanticException(e); |
| } |
| } |
| try { |
| CreateTableDesc tblDesc = qb.getTableDesc(); |
| if (tblDesc != null && tblDesc.isTemporary() && AcidUtils.isInsertOnlyTable(tblDesc.getTblProps())) { |
| fname = FileUtils.makeQualified(location, conf).toString(); |
| } else { |
| fname = ctx.getExtTmpPathRelTo(FileUtils.makeQualified(location, conf)).toString(); |
| } |
| } catch (Exception e) { |
| throw new SemanticException( |
| generateErrorMessage(ast, "Error creating temporary folder on: " + location.toString()), e); |
| } |
| if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVESTATSAUTOGATHER)) { |
| TableSpec ts = new TableSpec(db, conf, this.ast); |
| // Add the table spec for the destination table. |
| qb.getParseInfo().addTableSpec(ts.getTableName().getTable().toLowerCase(), ts); |
| } |
| } else { |
| // This is the only place where isQuery is set to true; it defaults to false. |
| qb.setIsQuery(true); |
| Path stagingPath = getStagingDirectoryPathname(qb); |
| fname = stagingPath.toString(); |
| ctx.setResDir(stagingPath); |
| } |
| } |
| |
| boolean isDfsFile = true; |
| if (ast.getChildCount() >= 2 && ast.getChild(1).getText().toLowerCase().equals("local")) { |
| isDfsFile = false; |
| } |
| // Set the destination for the SELECT query inside the CTAS |
| qb.getMetaData().setDestForAlias(name, fname, isDfsFile); |
| |
| CreateTableDesc directoryDesc = new CreateTableDesc(); |
| boolean directoryDescIsSet = false; |
| int numCh = ast.getChildCount(); |
| for (int num = 1; num < numCh ; num++){ |
| ASTNode child = (ASTNode) ast.getChild(num); |
| if (child != null) { |
| if (storageFormat.fillStorageFormat(child)) { |
| directoryDesc.setInputFormat(storageFormat.getInputFormat()); |
| directoryDesc.setOutputFormat(storageFormat.getOutputFormat()); |
| directoryDesc.setSerName(storageFormat.getSerde()); |
| directoryDescIsSet = true; |
| continue; |
| } |
| switch (child.getToken().getType()) { |
| case HiveParser.TOK_TABLEROWFORMAT: |
| rowFormatParams.analyzeRowFormat(child); |
| directoryDesc.setFieldDelim(rowFormatParams.fieldDelim); |
| directoryDesc.setLineDelim(rowFormatParams.lineDelim); |
| directoryDesc.setCollItemDelim(rowFormatParams.collItemDelim); |
| directoryDesc.setMapKeyDelim(rowFormatParams.mapKeyDelim); |
| directoryDesc.setFieldEscape(rowFormatParams.fieldEscape); |
| directoryDesc.setNullFormat(rowFormatParams.nullFormat); |
| directoryDescIsSet=true; |
| break; |
| case HiveParser.TOK_TABLESERIALIZER: |
| ASTNode serdeChild = (ASTNode) child.getChild(0); |
| storageFormat.setSerde(unescapeSQLString(serdeChild.getChild(0).getText())); |
| directoryDesc.setSerName(storageFormat.getSerde()); |
| if (serdeChild.getChildCount() > 1) { |
| directoryDesc.setSerdeProps(new HashMap<String, String>()); |
| readProps((ASTNode) serdeChild.getChild(1).getChild(0), directoryDesc.getSerdeProps()); |
| } |
| directoryDescIsSet = true; |
| break; |
| } |
| } |
| } |
| if (directoryDescIsSet){ |
| qb.setDirectoryDesc(directoryDesc); |
| } |
| break; |
| } |
| default: |
| throw new SemanticException(generateErrorMessage(ast, |
| "Unknown Token Type " + ast.getToken().getType())); |
| } |
| } |
| } |
| |
| /** |
| * Checks if a given path is encrypted (valid only for HDFS files) |
| * @param path The path to check for encryption |
| * @return True if the path is encrypted; False if it is not encrypted |
| * @throws HiveException If an error occurs while checking for encryption |
| */ |
| private boolean isPathEncrypted(Path path) throws HiveException { |
| |
| try { |
| HadoopShims.HdfsEncryptionShim hdfsEncryptionShim = |
| SessionState.get().getHdfsEncryptionShim(path.getFileSystem(conf), conf); |
| if (hdfsEncryptionShim != null) { |
| if (hdfsEncryptionShim.isPathEncrypted(path)) { |
| return true; |
| } |
| } |
| } catch (Exception e) { |
| throw new HiveException("Unable to determine if " + path + " is encrypted: " + e, e); |
| } |
| |
| return false; |
| } |
| |
| /** |
| * Compares to path key encryption strenghts. |
| * |
| * @param p1 Path to an HDFS file system |
| * @param p2 Path to an HDFS file system |
| * @return -1 if strength is weak; 0 if is equals; 1 if it is stronger |
| * @throws HiveException If an error occurs while comparing key strengths. |
| */ |
| private int comparePathKeyStrength(Path p1, Path p2) throws HiveException { |
| try { |
| HadoopShims.HdfsEncryptionShim hdfsEncryptionShim1 = SessionState.get().getHdfsEncryptionShim(p1.getFileSystem(conf), conf); |
| HadoopShims.HdfsEncryptionShim hdfsEncryptionShim2 = SessionState.get().getHdfsEncryptionShim(p2.getFileSystem(conf), conf); |
| |
| if (hdfsEncryptionShim1 != null && hdfsEncryptionShim2 != null) { |
| return hdfsEncryptionShim1.comparePathKeyStrength(p1, p2, hdfsEncryptionShim2); |
| } |
| } catch (Exception e) { |
| throw new HiveException("Unable to compare key strength for " + p1 + " and " + p2 + " : " + e, e); |
| } |
| |
| return 0; // Non-encrypted path (or equals strength) |
| } |
| |
| /** |
| * Checks if a given path has read-only access permissions. |
| * |
| * @param path The path to check for read-only permissions. |
| * @return True if the path is read-only; False otherwise. |
| * @throws HiveException If an error occurs while checking file permissions. |
| */ |
| private boolean isPathReadOnly(Path path) throws HiveException { |
| HiveConf conf = SessionState.get().getConf(); |
| try { |
| FileSystem fs = path.getFileSystem(conf); |
| UserGroupInformation ugi = Utils.getUGI(); |
| FileStatus status = fs.getFileStatus(path); |
| |
| // We just check for writing permissions. If it fails with AccessControException, then it |
| // means the location may be read-only. |
| FileUtils.checkFileAccessWithImpersonation(fs, status, FsAction.WRITE, ugi.getUserName()); |
| |
| // Path has writing permissions |
| return false; |
| } catch (AccessControlException e) { |
| // An AccessControlException may be caused for other different errors, |
| // but we take it as if our path is read-only |
| return true; |
| } catch (Exception e) { |
| throw new HiveException("Unable to determine if " + path + " is read only: " + e, e); |
| } |
| } |
| |
| /** |
| * Gets the strongest encrypted table path. |
| * |
| * @param qb The QB object that contains a list of all table locations. |
| * @return The strongest encrypted path. It may return NULL if there are not tables encrypted, or are not HDFS tables. |
| * @throws HiveException if an error occurred attempting to compare the encryption strength |
| */ |
| private Path getStrongestEncryptedTablePath(QB qb) throws HiveException { |
| List<String> tabAliases = new ArrayList<String>(qb.getTabAliases()); |
| Path strongestPath = null; |
| |
| /* Walk through all found table locations to get the most encrypted table */ |
| for (String alias : tabAliases) { |
| Table tab = qb.getMetaData().getTableForAlias(alias); |
| if (tab != null) { |
| Path tablePath = tab.getDataLocation(); |
| if (tablePath != null) { |
| if ("hdfs".equalsIgnoreCase(tablePath.toUri().getScheme())) { |
| if (isPathEncrypted(tablePath)) { |
| if (strongestPath == null) { |
| strongestPath = tablePath; |
| } else if (comparePathKeyStrength(tablePath, strongestPath) > 0) { |
| strongestPath = tablePath; |
| } |
| } |
| } |
| } |
| } |
| } |
| |
| return strongestPath; |
| } |
| |
| /** |
| * Gets the staging directory where MR files will be stored temporary. |
| * It walks through the QB plan to find the correct location where save temporary files. This |
| * temporary location (or staging directory) may be created inside encrypted tables locations for |
| * security reasons. If the QB has read-only tables, then the older scratch directory will be used, |
| * or a permission error will be thrown if the requested query table is encrypted and the old scratch |
| * directory is not. |
| * |
| * @param qb The QB object that contains a list of all table locations. |
| * @return The path to the staging directory. |
| * @throws HiveException If an error occurs while identifying the correct staging location. |
| */ |
| private Path getStagingDirectoryPathname(QB qb) throws HiveException { |
| Path stagingPath = null, tablePath = null; |
| |
| if (DFSUtilClient.isHDFSEncryptionEnabled(conf)) { |
| // Looks for the most encrypted table location |
| // It may return null if there are not tables encrypted, or are not part of HDFS |
| tablePath = getStrongestEncryptedTablePath(qb); |
| } |
| if (tablePath != null) { |
| // At this point, tablePath is part of HDFS and it is encrypted |
| if (isPathReadOnly(tablePath)) { |
| Path tmpPath = ctx.getMRTmpPath(); |
| if (comparePathKeyStrength(tablePath, tmpPath) < 0) { |
| throw new HiveException("Read-only encrypted tables cannot be read " + |
| "if the scratch directory is not encrypted (or encryption is weak)"); |
| } else { |
| stagingPath = tmpPath; |
| } |
| } |
| |
| if (stagingPath == null) { |
| stagingPath = ctx.getMRTmpPath(tablePath.toUri()); |
| } |
| } else { |
| stagingPath = ctx.getMRTmpPath(false); |
| } |
| |
| return stagingPath; |
| } |
| |
| private void replaceViewReferenceWithDefinition(QB qb, Table tab, |
| String tab_name, String alias) throws SemanticException { |
| |
| ASTNode viewTree; |
| final ASTNodeOrigin viewOrigin = new ASTNodeOrigin("VIEW", tab.getTableName(), |
| tab.getViewExpandedText(), alias, qb.getParseInfo().getSrcForAlias( |
| alias)); |
| try { |
| // Reparse text, passing null for context to avoid clobbering |
| // the top-level token stream. |
| String viewFullyQualifiedName = tab.getCompleteName(); |
| String viewText = tab.getViewExpandedText(); |
| TableMask viewMask = new TableMask(this, conf, false); |
| viewTree = ParseUtils.parse(viewText, ctx, tab.getCompleteName()); |
| cacheTableHelper.populateCacheForView(ctx.getParsedTables(), conf, |
| getTxnMgr(), tab.getDbName(), tab.getTableName()); |
| if (viewMask.isEnabled() && analyzeRewrite == null) { |
| ParseResult parseResult = rewriteASTWithMaskAndFilter(viewMask, viewTree, |
| ctx.getViewTokenRewriteStream(viewFullyQualifiedName), |
| ctx, db); |
| viewTree = parseResult.getTree(); |
| } |
| SemanticDispatcher nodeOriginDispatcher = new SemanticDispatcher() { |
| @Override |
| public Object dispatch(Node nd, java.util.Stack<Node> stack, |
| Object... nodeOutputs) { |
| ((ASTNode) nd).setOrigin(viewOrigin); |
| return null; |
| } |
| }; |
| SemanticGraphWalker nodeOriginTagger = new DefaultGraphWalker( |
| nodeOriginDispatcher); |
| nodeOriginTagger.startWalking(java.util.Collections |
| .<Node> singleton(viewTree), null); |
| } catch (ParseException e) { |
| // A user could encounter this if a stored view definition contains |
| // an old SQL construct which has been eliminated in a later Hive |
| // version, so we need to provide full debugging info to help |
| // with fixing the view definition. |
| LOG.error("Failed to replaceViewReferenceWithDefinition", e); |
| StringBuilder sb = new StringBuilder(); |
| sb.append(e.getMessage()); |
| ASTErrorUtils.renderOrigin(sb, viewOrigin); |
| throw new SemanticException(sb.toString(), e); |
| } |
| QBExpr qbexpr = new QBExpr(alias); |
| doPhase1QBExpr(viewTree, qbexpr, qb.getId(), alias, true, null); |
| // if skip authorization, skip checking; |
| // if it is inside a view, skip checking; |
| // if HIVE_STATS_COLLECT_SCANCOLS is enabled, check. |
| if ((!this.skipAuthorization() && !qb.isInsideView()) |
| || HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_STATS_COLLECT_SCANCOLS)) { |
| qb.rewriteViewToSubq(alias, tab_name, qbexpr, tab); |
| } else { |
| qb.rewriteViewToSubq(alias, tab_name, qbexpr, null); |
| } |
| } |
| |
| private boolean isPresent(String[] list, String elem) { |
| for (String s : list) { |
| if (s.toLowerCase().equals(elem)) { |
| return true; |
| } |
| } |
| |
| return false; |
| } |
| |
| /* |
| * This method is invoked for unqualified column references in join conditions. |
| * This is passed in the Alias to Operator mapping in the QueryBlock so far. |
| * We try to resolve the unqualified column against each of the Operator Row Resolvers. |
| * - if the column is present in only one RowResolver, we treat this as a reference to |
| * that Operator. |
| * - if the column resolves with more than one RowResolver, we treat it as an Ambiguous |
| * reference. |
| * - if the column doesn't resolve with any RowResolver, we treat this as an Invalid |
| * reference. |
| */ |
| @SuppressWarnings("rawtypes") |
| private String findAlias(ASTNode columnRef, |
| Map<String, Operator> aliasToOpInfo) throws SemanticException { |
| String colName = unescapeIdentifier(columnRef.getChild(0).getText() |
| .toLowerCase()); |
| String tabAlias = null; |
| if ( aliasToOpInfo != null ) { |
| for (Map.Entry<String, Operator> opEntry : aliasToOpInfo.entrySet()) { |
| Operator op = opEntry.getValue(); |
| RowResolver rr = opParseCtx.get(op).getRowResolver(); |
| ColumnInfo colInfo = rr.get(null, colName); |
| if (colInfo != null) { |
| if (tabAlias == null) { |
| tabAlias = opEntry.getKey(); |
| } else { |
| throw new SemanticException(ASTErrorUtils.getMsg( |
| ErrorMsg.AMBIGUOUS_TABLE_ALIAS.getMsg(), columnRef.getChild(0))); |
| } |
| } |
| } |
| } |
| if ( tabAlias == null ) { |
| throw new SemanticException(ASTErrorUtils.getMsg( |
| ErrorMsg.INVALID_TABLE_ALIAS.getMsg(), columnRef.getChild(0))); |
| } |
| return tabAlias; |
| } |
| |
| @SuppressWarnings("nls") |
| void parseJoinCondPopulateAlias(QBJoinTree joinTree, ASTNode condn, |
| List<String> leftAliases, List<String> rightAliases, |
| List<String> fields, |
| Map<String, Operator> aliasToOpInfo) throws SemanticException { |
| // String[] allAliases = joinTree.getAllAliases(); |
| switch (condn.getToken().getType()) { |
| case HiveParser.TOK_TABLE_OR_COL: |
| String tableOrCol = unescapeIdentifier(condn.getChild(0).getText() |
| .toLowerCase()); |
| unparseTranslator.addIdentifierTranslation((ASTNode) condn.getChild(0)); |
| if (isPresent(joinTree.getLeftAliases(), tableOrCol)) { |
| if (!leftAliases.contains(tableOrCol)) { |
| leftAliases.add(tableOrCol); |
| } |
| } else if (isPresent(joinTree.getRightAliases(), tableOrCol)) { |
| if (!rightAliases.contains(tableOrCol)) { |
| rightAliases.add(tableOrCol); |
| } |
| } else { |
| tableOrCol = findAlias(condn, aliasToOpInfo); |
| if (isPresent(joinTree.getLeftAliases(), tableOrCol)) { |
| if (!leftAliases.contains(tableOrCol)) { |
| leftAliases.add(tableOrCol); |
| } |
| } else { |
| if (!rightAliases.contains(tableOrCol)) { |
| rightAliases.add(tableOrCol); |
| } |
| if (joinTree.getNoSemiJoin() == false) { |
| // if this is a semijoin, we need to add the condition |
| joinTree.addRHSSemijoinColumns(tableOrCol, condn); |
| } |
| } |
| } |
| break; |
| |
| case HiveParser.Identifier: |
| // it may be a field name, return the identifier and let the caller decide |
| // whether it is or not |
| if (fields != null) { |
| fields |
| .add(unescapeIdentifier(condn.getToken().getText().toLowerCase())); |
| } |
| unparseTranslator.addIdentifierTranslation(condn); |
| break; |
| case HiveParser.TOK_NULL: |
| case HiveParser.Number: |
| case HiveParser.StringLiteral: |
| case HiveParser.IntegralLiteral: |
| case HiveParser.NumberLiteral: |
| case HiveParser.TOK_STRINGLITERALSEQUENCE: |
| case HiveParser.TOK_CHARSETLITERAL: |
| case HiveParser.KW_TRUE: |
| case HiveParser.KW_FALSE: |
| case HiveParser.TOK_DATELITERAL: |
| case HiveParser.TOK_TIMESTAMPLITERAL: |
| case HiveParser.TOK_TIMESTAMPLOCALTZLITERAL: |
| case HiveParser.TOK_INTERVAL_DAY_LITERAL: |
| case HiveParser.TOK_INTERVAL_DAY_TIME: |
| case HiveParser.TOK_INTERVAL_DAY_TIME_LITERAL: |
| case HiveParser.TOK_INTERVAL_HOUR_LITERAL: |
| case HiveParser.TOK_INTERVAL_MINUTE_LITERAL: |
| case HiveParser.TOK_INTERVAL_MONTH_LITERAL: |
| case HiveParser.TOK_INTERVAL_SECOND_LITERAL: |
| case HiveParser.TOK_INTERVAL_YEAR_LITERAL: |
| case HiveParser.TOK_INTERVAL_YEAR_MONTH: |
| case HiveParser.TOK_INTERVAL_YEAR_MONTH_LITERAL: |
| break; |
| |
| case HiveParser.TOK_FUNCTION: |
| // check all the arguments |
| for (int i = 1; i < condn.getChildCount(); i++) { |
| parseJoinCondPopulateAlias(joinTree, (ASTNode) condn.getChild(i), |
| leftAliases, rightAliases, null, aliasToOpInfo); |
| } |
| break; |
| |
| default: |
| // This is an operator - so check whether it is unary or binary operator |
| if (condn.getChildCount() == 1) { |
| parseJoinCondPopulateAlias(joinTree, (ASTNode) condn.getChild(0), |
| leftAliases, rightAliases, null, aliasToOpInfo); |
| } else if (condn.getChildCount() == 2) { |
| |
| List<String> fields1 = null; |
| // if it is a dot operator, remember the field name of the rhs of the |
| // left semijoin |
| if (joinTree.getNoSemiJoin() == false |
| && condn.getToken().getType() == HiveParser.DOT) { |
| // get the semijoin rhs table name and field name |
| fields1 = new ArrayList<String>(); |
| int rhssize = rightAliases.size(); |
| parseJoinCondPopulateAlias(joinTree, (ASTNode) condn.getChild(0), |
| leftAliases, rightAliases, null, aliasToOpInfo); |
| String rhsAlias = null; |
| |
| if (rightAliases.size() > rhssize) { // the new table is rhs table |
| rhsAlias = rightAliases.get(rightAliases.size() - 1); |
| } |
| |
| parseJoinCondPopulateAlias(joinTree, (ASTNode) condn.getChild(1), |
| leftAliases, rightAliases, fields1, aliasToOpInfo); |
| if (rhsAlias != null && fields1.size() > 0) { |
| joinTree.addRHSSemijoinColumns(rhsAlias, condn); |
| } |
| } else { |
| parseJoinCondPopulateAlias(joinTree, (ASTNode) condn.getChild(0), |
| leftAliases, rightAliases, null, aliasToOpInfo); |
| parseJoinCondPopulateAlias(joinTree, (ASTNode) condn.getChild(1), |
| leftAliases, rightAliases, fields1, aliasToOpInfo); |
| } |
| } else { |
| throw new SemanticException(condn.toStringTree() + " encountered with " |
| + condn.getChildCount() + " children"); |
| } |
| break; |
| } |
| } |
| |
| private void populateAliases(List<String> leftAliases, |
| List<String> rightAliases, ASTNode condn, QBJoinTree joinTree, |
| List<String> leftSrc) { |
| if ((leftAliases.size() != 0) && (rightAliases.size() != 0)) { |
| joinTree.addPostJoinFilter(condn); |
| return; |
| } |
| |
| if (rightAliases.size() != 0) { |
| assert rightAliases.size() == 1; |
| joinTree.getExpressions().get(1).add(condn); |
| } else if (leftAliases.size() != 0) { |
| joinTree.getExpressions().get(0).add(condn); |
| for (String s : leftAliases) { |
| if (!leftSrc.contains(s)) { |
| leftSrc.add(s); |
| } |
| } |
| } else { |
| joinTree.addPostJoinFilter(condn); |
| } |
| } |
| |
| /* |
| * refactored out of the Equality case of parseJoinCondition |
| * so that this can be recursively called on its left tree in the case when |
| * only left sources are referenced in a Predicate |
| */ |
| void applyEqualityPredicateToQBJoinTree(QBJoinTree joinTree, |
| JoinType type, |
| List<String> leftSrc, |
| ASTNode joinCond, |
| ASTNode leftCondn, |
| ASTNode rightCondn, |
| List<String> leftCondAl1, |
| List<String> leftCondAl2, |
| List<String> rightCondAl1, |
| List<String> rightCondAl2) { |
| if (leftCondAl1.size() != 0) { |
| if ((rightCondAl1.size() != 0) |
| || ((rightCondAl1.size() == 0) && (rightCondAl2.size() == 0))) { |
| if (type.equals(JoinType.LEFTOUTER) || |
| type.equals(JoinType.FULLOUTER)) { |
| joinTree.getFilters().get(0).add(joinCond); |
| } else { |
| /* |
| * If the rhs references table sources and this QBJoinTree has a leftTree; |
| * hand it to the leftTree and let it recursively handle it. |
| * There are 3 cases of passing a condition down: |
| * 1. The leftSide && rightSide don't contains references to the leftTree's rightAlias |
| * => pass the lists down as is. |
| * 2. The leftSide contains refs to the leftTree's rightAlias, the rightSide doesn't |
| * => switch the leftCondAl1 and leftConAl2 lists and pass down. |
| * 3. The rightSide contains refs to the leftTree's rightAlias, the leftSide doesn't |
| * => switch the rightCondAl1 and rightConAl2 lists and pass down. |
| * 4. In case both contain references to the leftTree's rightAlias |
| * => we cannot push the condition down. |
| * 5. If either contain references to both left & right |
| * => we cannot push forward. |
| */ |
| if (rightCondAl1.size() != 0) { |
| QBJoinTree leftTree = joinTree.getJoinSrc(); |
| List<String> leftTreeLeftSrc = new ArrayList<String>(); |
| if (leftTree != null && leftTree.getNoOuterJoin()) { |
| String leftTreeRightSource = leftTree.getRightAliases() != null && |
| leftTree.getRightAliases().length > 0 ? |
| leftTree.getRightAliases()[0] : null; |
| |
| boolean leftHasRightReference = false; |
| for (String r : leftCondAl1) { |
| if (r.equals(leftTreeRightSource)) { |
| leftHasRightReference = true; |
| break; |
| } |
| } |
| boolean rightHasRightReference = false; |
| for (String r : rightCondAl1) { |
| if (r.equals(leftTreeRightSource)) { |
| rightHasRightReference = true; |
| break; |
| } |
| } |
| |
| boolean pushedDown = false; |
| if ( !leftHasRightReference && !rightHasRightReference ) { |
| applyEqualityPredicateToQBJoinTree(leftTree, type, leftTreeLeftSrc, |
| joinCond, leftCondn, rightCondn, |
| leftCondAl1, leftCondAl2, |
| rightCondAl1, rightCondAl2); |
| pushedDown = true; |
| } else if ( !leftHasRightReference && rightHasRightReference && rightCondAl1.size() == 1 ) { |
| applyEqualityPredicateToQBJoinTree(leftTree, type, leftTreeLeftSrc, |
| joinCond, leftCondn, rightCondn, |
| leftCondAl1, leftCondAl2, |
| rightCondAl2, rightCondAl1); |
| pushedDown = true; |
| } else if (leftHasRightReference && !rightHasRightReference && leftCondAl1.size() == 1 ) { |
| applyEqualityPredicateToQBJoinTree(leftTree, type, leftTreeLeftSrc, |
| joinCond, leftCondn, rightCondn, |
| leftCondAl2, leftCondAl1, |
| rightCondAl1, rightCondAl2); |
| pushedDown = true; |
| } |
| |
| if (leftTreeLeftSrc.size() == 1) { |
| leftTree.setLeftAlias(leftTreeLeftSrc.get(0)); |
| } |
| if ( pushedDown) { |
| return; |
| } |
| } // leftTree != null |
| } |
| joinTree.getFiltersForPushing().get(0).add(joinCond); |
| } |
| } else if (rightCondAl2.size() != 0) { |
| populateAliases(leftCondAl1, leftCondAl2, leftCondn, joinTree, |
| leftSrc); |
| populateAliases(rightCondAl1, rightCondAl2, rightCondn, joinTree, |
| leftSrc); |
| boolean nullsafe = joinCond.getToken().getType() == HiveParser.EQUAL_NS; |
| joinTree.getNullSafes().add(nullsafe); |
| } |
| } else if (leftCondAl2.size() != 0) { |
| if ((rightCondAl2.size() != 0) |
| || ((rightCondAl1.size() == 0) && (rightCondAl2.size() == 0))) { |
| if (type.equals(JoinType.RIGHTOUTER) |
| || type.equals(JoinType.FULLOUTER)) { |
| joinTree.getFilters().get(1).add(joinCond); |
| } else { |
| joinTree.getFiltersForPushing().get(1).add(joinCond); |
| } |
| } else if (rightCondAl1.size() != 0) { |
| populateAliases(leftCondAl1, leftCondAl2, leftCondn, joinTree, |
| leftSrc); |
| populateAliases(rightCondAl1, rightCondAl2, rightCondn, joinTree, |
| leftSrc); |
| boolean nullsafe = joinCond.getToken().getType() == HiveParser.EQUAL_NS; |
| joinTree.getNullSafes().add(nullsafe); |
| } |
| } else if (rightCondAl1.size() != 0) { |
| if (type.equals(JoinType.LEFTOUTER) |
| || type.equals(JoinType.FULLOUTER)) { |
| joinTree.getFilters().get(0).add(joinCond); |
| } else { |
| joinTree.getFiltersForPushing().get(0).add(joinCond); |
| } |
| } else { |
| if (type.equals(JoinType.RIGHTOUTER) |
| || type.equals(JoinType.FULLOUTER)) { |
| joinTree.getFilters().get(1).add(joinCond); |
| } else if (type.equals(JoinType.LEFTSEMI)) { |
| joinTree.getExpressions().get(0).add(leftCondn); |
| joinTree.getExpressions().get(1).add(rightCondn); |
| boolean nullsafe = joinCond.getToken().getType() == HiveParser.EQUAL_NS; |
| joinTree.getNullSafes().add(nullsafe); |
| joinTree.getFiltersForPushing().get(1).add(joinCond); |
| } else { |
| joinTree.getFiltersForPushing().get(1).add(joinCond); |
| } |
| } |
| |
| } |
| |
| @SuppressWarnings("rawtypes") |
| private void parseJoinCondition(QBJoinTree joinTree, ASTNode joinCond, List<String> leftSrc, |
| Map<String, Operator> aliasToOpInfo) |
| throws SemanticException { |
| if (joinCond == null) { |
| return; |
| } |
| JoinCond cond = joinTree.getJoinCond()[0]; |
| |
| JoinType type = cond.getJoinType(); |
| parseJoinCondition(joinTree, joinCond, leftSrc, type, aliasToOpInfo); |
| |
| List<List<ASTNode>> filters = joinTree.getFilters(); |
| if (type == JoinType.LEFTOUTER || type == JoinType.FULLOUTER) { |
| joinTree.addFilterMapping(cond.getLeft(), cond.getRight(), filters.get(0).size()); |
| } |
| if (type == JoinType.RIGHTOUTER || type == JoinType.FULLOUTER) { |
| joinTree.addFilterMapping(cond.getRight(), cond.getLeft(), filters.get(1).size()); |
| } |
| } |
| |
| /** |
| * Parse the join condition. For equality conjuncts, break them into left and |
| * right expressions and store in the join tree. For other conditions, either |
| * add them to the post-conditions if they apply to more than one input, add |
| * them to the filter conditions of a given input if it applies only on |
| * one of them and should not be pushed, e.g., left outer join with condition |
| * that applies only to left input, or push them below the join if they |
| * apply only to one input and can be pushed, e.g., left outer join with |
| * condition that applies only to right input. |
| * |
| * @param joinTree |
| * jointree to be populated |
| * @param joinCond |
| * join condition |
| * @param leftSrc |
| * left sources |
| * @throws SemanticException |
| */ |
| @SuppressWarnings("rawtypes") |
| private void parseJoinCondition(QBJoinTree joinTree, ASTNode joinCond, |
| List<String> leftSrc, JoinType type, |
| Map<String, Operator> aliasToOpInfo) throws SemanticException { |
| if (joinCond == null) { |
| return; |
| } |
| |
| switch (joinCond.getToken().getType()) { |
| case HiveParser.KW_OR: |
| parseJoinCondPopulateAlias(joinTree, (ASTNode) joinCond.getChild(0), |
| new ArrayList<String>(), new ArrayList<String>(), |
| null, aliasToOpInfo); |
| parseJoinCondPopulateAlias(joinTree, (ASTNode) joinCond.getChild(1), |
| new ArrayList<String>(), new ArrayList<String>(), |
| null, aliasToOpInfo); |
| joinTree.addPostJoinFilter(joinCond); |
| break; |
| |
| case HiveParser.KW_AND: |
| parseJoinCondition(joinTree, (ASTNode) joinCond.getChild(0), leftSrc, type, aliasToOpInfo); |
| parseJoinCondition(joinTree, (ASTNode) joinCond.getChild(1), leftSrc, type, aliasToOpInfo); |
| break; |
| |
| case HiveParser.EQUAL_NS: |
| case HiveParser.EQUAL: |
| ASTNode leftCondn = (ASTNode) joinCond.getChild(0); |
| List<String> leftCondAl1 = new ArrayList<String>(); |
| List<String> leftCondAl2 = new ArrayList<String>(); |
| parseJoinCondPopulateAlias(joinTree, leftCondn, leftCondAl1, leftCondAl2, |
| null, aliasToOpInfo); |
| |
| ASTNode rightCondn = (ASTNode) joinCond.getChild(1); |
| List<String> rightCondAl1 = new ArrayList<String>(); |
| List<String> rightCondAl2 = new ArrayList<String>(); |
| parseJoinCondPopulateAlias(joinTree, rightCondn, rightCondAl1, |
| rightCondAl2, null, aliasToOpInfo); |
| |
| // is it a filter or a join condition |
| // if it is filter see if it can be pushed above the join |
| // filter cannot be pushed if |
| // * join is full outer or |
| // * join is left outer and filter is on left alias or |
| // * join is right outer and filter is on right alias |
| if (((leftCondAl1.size() != 0) && (leftCondAl2.size() != 0)) |
| || ((rightCondAl1.size() != 0) && (rightCondAl2.size() != 0))) { |
| joinTree.addPostJoinFilter(joinCond); |
| } else { |
| applyEqualityPredicateToQBJoinTree(joinTree, type, leftSrc, |
| joinCond, leftCondn, rightCondn, |
| leftCondAl1, leftCondAl2, |
| rightCondAl1, rightCondAl2); |
| } |
| break; |
| |
| default: |
| boolean isFunction = (joinCond.getType() == HiveParser.TOK_FUNCTION); |
| |
| // Create all children |
| int childrenBegin = (isFunction ? 1 : 0); |
| List<List<String>> leftAlias = new ArrayList<List<String>>(joinCond.getChildCount() - childrenBegin); |
| List<List<String>> rightAlias = new ArrayList<List<String>>(joinCond.getChildCount() - childrenBegin); |
| for (int ci = 0; ci < joinCond.getChildCount() - childrenBegin; ci++) { |
| List<String> left = new ArrayList<String>(); |
| List<String> right = new ArrayList<String>(); |
| leftAlias.add(left); |
| rightAlias.add(right); |
| } |
| |
| for (int ci = childrenBegin; ci < joinCond.getChildCount(); ci++) { |
| parseJoinCondPopulateAlias(joinTree, (ASTNode) joinCond.getChild(ci), |
| leftAlias.get(ci - childrenBegin), rightAlias.get(ci |
| - childrenBegin), null, aliasToOpInfo); |
| } |
| |
| boolean leftAliasNull = true; |
| for (List<String> left : leftAlias) { |
| if (left.size() != 0) { |
| leftAliasNull = false; |
| break; |
| } |
| } |
| |
| boolean rightAliasNull = true; |
| for (List<String> right : rightAlias) { |
| if (right.size() != 0) { |
| rightAliasNull = false; |
| break; |
| } |
| } |
| |
| if (!leftAliasNull && !rightAliasNull) { |
| joinTree.addPostJoinFilter(joinCond); |
| } else { |
| if (!leftAliasNull) { |
| if (type.equals(JoinType.LEFTOUTER) |
| || type.equals(JoinType.FULLOUTER)) { |
| joinTree.getFilters().get(0).add(joinCond); |
| } else { |
| joinTree.getFiltersForPushing().get(0).add(joinCond); |
| } |
| } else { |
| if (type.equals(JoinType.RIGHTOUTER) |
| || type.equals(JoinType.FULLOUTER)) { |
| joinTree.getFilters().get(1).add(joinCond); |
| } else { |
| joinTree.getFiltersForPushing().get(1).add(joinCond); |
| } |
| } |
| } |
| |
| break; |
| } |
| } |
| |
| @SuppressWarnings("rawtypes") |
| private void extractJoinCondsFromWhereClause(QBJoinTree joinTree, ASTNode predicate, |
| Map<String, Operator> aliasToOpInfo) { |
| |
| switch (predicate.getType()) { |
| case HiveParser.KW_AND: |
| extractJoinCondsFromWhereClause(joinTree, |
| (ASTNode) predicate.getChild(0), aliasToOpInfo); |
| extractJoinCondsFromWhereClause(joinTree, |
| (ASTNode) predicate.getChild(1), aliasToOpInfo); |
| break; |
| case HiveParser.EQUAL_NS: |
| case HiveParser.EQUAL: |
| |
| ASTNode leftCondn = (ASTNode) predicate.getChild(0); |
| List<String> leftCondAl1 = new ArrayList<String>(); |
| List<String> leftCondAl2 = new ArrayList<String>(); |
| try { |
| parseJoinCondPopulateAlias(joinTree, leftCondn, leftCondAl1, leftCondAl2, null, aliasToOpInfo); |
| } catch(SemanticException se) { |
| // suppress here; if it is a real issue will get caught in where clause handling. |
| return; |
| } |
| |
| ASTNode rightCondn = (ASTNode) predicate.getChild(1); |
| List<String> rightCondAl1 = new ArrayList<String>(); |
| List<String> rightCondAl2 = new ArrayList<String>(); |
| try { |
| parseJoinCondPopulateAlias(joinTree, rightCondn, rightCondAl1, |
| rightCondAl2, null, aliasToOpInfo); |
| } catch(SemanticException se) { |
| // suppress here; if it is a real issue will get caught in where clause handling. |
| return; |
| } |
| |
| if (((leftCondAl1.size() != 0) && (leftCondAl2.size() != 0)) |
| || ((rightCondAl1.size() != 0) && (rightCondAl2.size() != 0))) { |
| // this is not a join condition. |
| return; |
| } |
| |
| if (((leftCondAl1.size() == 0) && (leftCondAl2.size() == 0)) |
| || ((rightCondAl1.size() == 0) && (rightCondAl2.size() == 0))) { |
| // this is not a join condition. Will get handled by predicate pushdown. |
| return; |
| } |
| |
| List<String> leftSrc = new ArrayList<String>(); |
| JoinCond cond = joinTree.getJoinCond()[0]; |
| JoinType type = cond.getJoinType(); |
| applyEqualityPredicateToQBJoinTree(joinTree, type, leftSrc, |
| predicate, leftCondn, rightCondn, |
| leftCondAl1, leftCondAl2, |
| rightCondAl1, rightCondAl2); |
| if (leftSrc.size() == 1) { |
| joinTree.setLeftAlias(leftSrc.get(0)); |
| } |
| |
| // todo: hold onto this predicate, so that we don't add it to the Filter Operator. |
| |
| break; |
| default: |
| return; |
| } |
| } |
| |
| @SuppressWarnings("nls") |
| <T extends OperatorDesc> Operator<T> putOpInsertMap(Operator<T> op, |
| RowResolver rr) { |
| OpParseContext ctx = new OpParseContext(rr); |
| opParseCtx.put(op, ctx); |
| op.augmentPlan(); |
| return op; |
| } |
| |
| @SuppressWarnings("nls") |
| private Operator genHavingPlan(String dest, QB qb, Operator input, |
| Map<String, Operator> aliasToOpInfo) |
| throws SemanticException { |
| |
| ASTNode havingExpr = qb.getParseInfo().getHavingForClause(dest); |
| |
| OpParseContext inputCtx = opParseCtx.get(input); |
| RowResolver inputRR = inputCtx.getRowResolver(); |
| Map<ASTNode, String> exprToColumnAlias = qb.getParseInfo().getAllExprToColumnAlias(); |
| for (ASTNode astNode : exprToColumnAlias.keySet()) { |
| if (inputRR.getExpression(astNode) != null) { |
| inputRR.put("", exprToColumnAlias.get(astNode), inputRR.getExpression(astNode)); |
| } |
| } |
| ASTNode condn = (ASTNode) havingExpr.getChild(0); |
| |
| if (!isCBOExecuted() && !qb.getParseInfo().getDestToGroupBy().isEmpty()) { |
| // If CBO did not optimize the query, we might need to replace grouping function |
| final String destClauseName = qb.getParseInfo().getClauseNames().iterator().next(); |
| final boolean cubeRollupGrpSetPresent = (!qb.getParseInfo().getDestRollups().isEmpty() |
| || !qb.getParseInfo().getDestGroupingSets().isEmpty() |
| || !qb.getParseInfo().getDestCubes().isEmpty()); |
| // Special handling of grouping function |
| condn = rewriteGroupingFunctionAST(getGroupByForClause(qb.getParseInfo(), destClauseName), condn, |
| !cubeRollupGrpSetPresent); |
| } |
| |
| /* |
| * Now a having clause can contain a SubQuery predicate; |
| * so we invoke genFilterPlan to handle SubQuery algebraic transformation, |
| * just as is done for SubQuery predicates appearing in the Where Clause. |
| */ |
| Operator output = genFilterPlan(condn, qb, input, aliasToOpInfo, true, false); |
| output = putOpInsertMap(output, inputRR); |
| return output; |
| } |
| |
| protected ASTNode rewriteGroupingFunctionAST(final List<ASTNode> grpByAstExprs, ASTNode targetNode, |
| final boolean noneSet) { |
| |
| TreeVisitorAction action = new TreeVisitorAction() { |
| |
| @Override |
| public Object pre(Object t) { |
| return t; |
| } |
| |
| @Override |
| public Object post(Object t) { |
| ASTNode root = (ASTNode) t; |
| if (root.getType() == HiveParser.TOK_FUNCTION) { |
| ASTNode func = (ASTNode) ParseDriver.adaptor.getChild(root, 0); |
| if ("grouping".equalsIgnoreCase(func.getText()) && func.getChildCount() == 0) { |
| int numberOperands = ParseDriver.adaptor.getChildCount(root); |
| // We implement this logic using replaceChildren instead of replacing |
| // the root node itself because windowing logic stores multiple |
| // pointers to the AST, and replacing root might lead to some pointers |
| // leading to non-rewritten version |
| ASTNode newRoot = new ASTNode(); |
| // Rewritten grouping function |
| ASTNode groupingFunc = (ASTNode) ParseDriver.adaptor.create( |
| HiveParser.Identifier, "grouping"); |
| ParseDriver.adaptor.addChild(groupingFunc, ParseDriver.adaptor.create( |
| HiveParser.Identifier, "rewritten")); |
| newRoot.addChild(groupingFunc); |
| // Grouping ID reference |
| ASTNode childGroupingID; |
| if (noneSet) { |
| // Query does not contain CUBE, ROLLUP, or GROUPING SETS, and thus, |
| // grouping should return 0 |
| childGroupingID = (ASTNode) ParseDriver.adaptor.create(HiveParser.IntegralLiteral, |
| "0L"); |
| } else { |
| // We refer to grouping_id column |
| childGroupingID = (ASTNode) ParseDriver.adaptor.create( |
| HiveParser.TOK_TABLE_OR_COL, "TOK_TABLE_OR_COL"); |
| ParseDriver.adaptor.addChild(childGroupingID, ParseDriver.adaptor.create( |
| HiveParser.Identifier, VirtualColumn.GROUPINGID.getName())); |
| } |
| newRoot.addChild(childGroupingID); |
| // Indices |
| for (int i = 1; i < numberOperands; i++) { |
| ASTNode c = (ASTNode) ParseDriver.adaptor.getChild(root, i); |
| for (int j = 0; j < grpByAstExprs.size(); j++) { |
| ASTNode grpByExpr = grpByAstExprs.get(j); |
| if (grpByExpr.toStringTree().equals(c.toStringTree())) { |
| // Create and add AST node with position of grouping function input |
| // in group by clause |
| ASTNode childN = (ASTNode) ParseDriver.adaptor.create(HiveParser.IntegralLiteral, |
| String.valueOf(IntMath.mod(-j-1, grpByAstExprs.size())) + "L"); |
| newRoot.addChild(childN); |
| break; |
| } |
| } |
| } |
| if (numberOperands + 1 != ParseDriver.adaptor.getChildCount(newRoot)) { |
| throw new RuntimeException(ErrorMsg.HIVE_GROUPING_FUNCTION_EXPR_NOT_IN_GROUPBY.getMsg()); |
| } |
| // Replace expression |
| root.replaceChildren(0, numberOperands - 1, newRoot); |
| } |
| } |
| return t; |
| } |
| }; |
| return (ASTNode) new TreeVisitor(ParseDriver.adaptor).visit(targetNode, action); |
| } |
| |
| private Operator genPlanForSubQueryPredicate( |
| QB qbSQ, |
| ISubQueryJoinInfo subQueryPredicate) throws SemanticException { |
| qbSQ.setSubQueryDef(subQueryPredicate.getSubQuery()); |
| Phase1Ctx ctx_1 = initPhase1Ctx(); |
| doPhase1(subQueryPredicate.getSubQueryAST(), qbSQ, ctx_1, null); |
| getMetaData(qbSQ); |
| return genPlan(qbSQ); |
| } |
| |
| @SuppressWarnings("nls") |
| private Operator genFilterPlan(ASTNode searchCond, QB qb, Operator input, |
| Map<String, Operator> aliasToOpInfo, |
| boolean forHavingClause, boolean forGroupByClause) |
| throws SemanticException { |
| |
| OpParseContext inputCtx = opParseCtx.get(input); |
| RowResolver inputRR = inputCtx.getRowResolver(); |
| |
| /* |
| * Handling of SubQuery Expressions: |
| * if "Where clause contains no SubQuery expressions" then |
| * -->[true] ===CONTINUE_FILTER_PROCESSING=== |
| * else |
| * -->[false] "extract SubQuery expressions\n from Where clause" |
| * if "this is a nested SubQuery or \nthere are more than 1 SubQuery expressions" then |
| * -->[yes] "throw Unsupported Error" |
| * else |
| * --> "Rewrite Search condition to \nremove SubQuery predicate" |
| * --> "build QBSubQuery" |
| * --> "extract correlated predicates \nfrom Where Clause" |
| * --> "add correlated Items to \nSelect List and Group By" |
| * --> "construct Join Predicate \nfrom correlation predicates" |
| * --> "Generate Plan for\n modified SubQuery" |
| * --> "Build the Join Condition\n for Parent Query to SubQuery join" |
| * --> "Build the QBJoinTree from the Join condition" |
| * --> "Update Parent Query Filter\n with any Post Join conditions" |
| * --> ===CONTINUE_FILTER_PROCESSING=== |
| * endif |
| * endif |
| * |
| * Support for Sub Queries in Having Clause: |
| * - By and large this works the same way as SubQueries in the Where Clause. |
| * - The one addum is the handling of aggregation expressions from the Outer Query |
| * appearing in correlation clauses. |
| * - So such correlating predicates are allowed: |
| * min(OuterQuert.x) = SubQuery.y |
| * - this requires special handling when converting to joins. See QBSubQuery.rewrite |
| * method method for detailed comments. |
| */ |
| List<ASTNode> subQueriesInOriginalTree = SubQueryUtils.findSubQueries(searchCond); |
| |
| if ( subQueriesInOriginalTree.size() > 0 ) { |
| |
| /* |
| * Restriction.9.m :: disallow nested SubQuery expressions. |
| */ |
| if (qb.getSubQueryPredicateDef() != null ) { |
| throw new SemanticException(ASTErrorUtils.getMsg( |
| ErrorMsg.UNSUPPORTED_SUBQUERY_EXPRESSION.getMsg(), |
| subQueriesInOriginalTree.get(0), "Nested SubQuery expressions are not supported.")); |
| } |
| |
| /* |
| * Restriction.8.m :: We allow only 1 SubQuery expression per Query. |
| */ |
| if (subQueriesInOriginalTree.size() > 1 ) { |
| |
| throw new SemanticException(ASTErrorUtils.getMsg( |
| ErrorMsg.UNSUPPORTED_SUBQUERY_EXPRESSION.getMsg(), |
| subQueriesInOriginalTree.get(1), "Only 1 SubQuery expression is supported.")); |
| } |
| |
| /* |
| * Clone the Search AST; apply all rewrites on the clone. |
| */ |
| ASTNode clonedSearchCond = (ASTNode) SubQueryUtils.adaptor.dupTree(searchCond); |
| List<ASTNode> subQueries = SubQueryUtils.findSubQueries(clonedSearchCond); |
| |
| for(int i=0; i < subQueries.size(); i++) { |
| ASTNode subQueryAST = subQueries.get(i); |
| ASTNode originalSubQueryAST = subQueriesInOriginalTree.get(i); |
| |
| int sqIdx = qb.incrNumSubQueryPredicates(); |
| clonedSearchCond = SubQueryUtils.rewriteParentQueryWhere(clonedSearchCond, subQueryAST); |
| |
| QBSubQuery subQuery = SubQueryUtils.buildSubQuery(qb.getId(), |
| sqIdx, subQueryAST, originalSubQueryAST, ctx); |
| |
| if ( !forHavingClause ) { |
| qb.setWhereClauseSubQueryPredicate(subQuery); |
| } else { |
| qb.setHavingClauseSubQueryPredicate(subQuery); |
| } |
| String havingInputAlias = null; |
| |
| if ( forHavingClause ) { |
| havingInputAlias = "gby_sq" + sqIdx; |
| aliasToOpInfo.put(havingInputAlias, input); |
| } |
| |
| subQuery.validateAndRewriteAST(inputRR, forHavingClause, havingInputAlias, aliasToOpInfo.keySet()); |
| |
| QB qbSQ = new QB(subQuery.getOuterQueryId(), subQuery.getAlias(), true); |
| qbSQ.setInsideView(qb.isInsideView()); |
| Operator sqPlanTopOp = genPlanForSubQueryPredicate(qbSQ, subQuery); |
| aliasToOpInfo.put(subQuery.getAlias(), sqPlanTopOp); |
| RowResolver sqRR = opParseCtx.get(sqPlanTopOp).getRowResolver(); |
| |
| /* |
| * Check.5.h :: For In and Not In the SubQuery must implicitly or |
| * explicitly only contain one select item. |
| */ |
| if ( subQuery.getOperator().getType() != SubQueryType.EXISTS && |
| subQuery.getOperator().getType() != SubQueryType.NOT_EXISTS && |
| sqRR.getColumnInfos().size() - |
| subQuery.getNumOfCorrelationExprsAddedToSQSelect() > 1 ) { |
| throw new SemanticException(ASTErrorUtils.getMsg( |
| ErrorMsg.INVALID_SUBQUERY_EXPRESSION.getMsg(), |
| subQueryAST, "SubQuery can contain only 1 item in Select List.")); |
| } |
| |
| /* |
| * If this is a Not In SubQuery Predicate then Join in the Null Check SubQuery. |
| * See QBSubQuery.NotInCheck for details on why and how this is constructed. |
| */ |
| if ( subQuery.getNotInCheck() != null ) { |
| QBSubQuery.NotInCheck notInCheck = subQuery.getNotInCheck(); |
| notInCheck.setSQRR(sqRR); |
| QB qbSQ_nic = new QB(subQuery.getOuterQueryId(), notInCheck.getAlias(), true); |
| Operator sqnicPlanTopOp = genPlanForSubQueryPredicate(qbSQ_nic, notInCheck); |
| aliasToOpInfo.put(notInCheck.getAlias(), sqnicPlanTopOp); |
| QBJoinTree joinTree_nic = genSQJoinTree(qb, notInCheck, |
| input, |
| aliasToOpInfo); |
| pushJoinFilters(qb, joinTree_nic, aliasToOpInfo, false); |
| input = genJoinOperator(qbSQ_nic, joinTree_nic, aliasToOpInfo, input); |
| inputRR = opParseCtx.get(input).getRowResolver(); |
| if ( forHavingClause ) { |
| aliasToOpInfo.put(havingInputAlias, input); |
| } |
| } |
| |
| /* |
| * Gen Join between outer Operator and SQ op |
| */ |
| subQuery.buildJoinCondition(inputRR, sqRR, forHavingClause, havingInputAlias); |
| QBJoinTree joinTree = genSQJoinTree(qb, subQuery, |
| input, |
| aliasToOpInfo); |
| /* |
| * push filters only for this QBJoinTree. Child QBJoinTrees have already been handled. |
| */ |
| pushJoinFilters(qb, joinTree, aliasToOpInfo, false); |
| input = genJoinOperator(qbSQ, joinTree, aliasToOpInfo, input); |
| searchCond = subQuery.updateOuterQueryFilter(clonedSearchCond); |
| } |
| } |
| |
| return genFilterPlan(qb, searchCond, input, forHavingClause || forGroupByClause); |
| } |
| |
| /** |
| * create a filter plan. The condition and the inputs are specified. |
| * |
| * @param qb |
| * current query block |
| * @param condn |
| * The condition to be resolved |
| * @param input |
| * the input operator |
| */ |
| @SuppressWarnings("nls") |
| private Operator genFilterPlan(QB qb, ASTNode condn, Operator input, boolean useCaching) |
| throws SemanticException { |
| |
| OpParseContext inputCtx = opParseCtx.get(input); |
| RowResolver inputRR = inputCtx.getRowResolver(); |
| |
| ExprNodeDesc filterCond = genExprNodeDesc(condn, inputRR, useCaching, isCBOExecuted()); |
| if (filterCond instanceof ExprNodeConstantDesc) { |
| ExprNodeConstantDesc c = (ExprNodeConstantDesc) filterCond; |
| if (Boolean.TRUE.equals(c.getValue())) { |
| // If filter condition is TRUE, we ignore it |
| return input; |
| } |
| if (ExprNodeDescUtils.isNullConstant(c)) { |
| // If filter condition is NULL, transform to FALSE |
| filterCond = new ExprNodeConstantDesc(TypeInfoFactory.booleanTypeInfo, false); |
| } |
| } |
| |
| if (!filterCond.getTypeInfo().accept(TypeInfoFactory.booleanTypeInfo)) { |
| // If the returning type of the filter condition is not boolean, try to implicitly |
| // convert the result of the condition to a boolean value. |
| if (filterCond.getTypeInfo().getCategory() == ObjectInspector.Category.PRIMITIVE) { |
| // For primitive types like string/double/timestamp, try to cast the result of |
| // the child expression to a boolean. |
| filterCond = ExprNodeTypeCheck.getExprNodeDefaultExprProcessor() |
| .createConversionCast(filterCond, TypeInfoFactory.booleanTypeInfo); |
| } else { |
| // For complex types like map/list/struct, create a isnotnull function on the child expression. |
| filterCond = ExprNodeTypeCheck.getExprNodeDefaultExprProcessor() |
| .getFuncExprNodeDesc("isnotnull", filterCond); |
| } |
| } |
| |
| Operator output = putOpInsertMap(OperatorFactory.getAndMakeChild( |
| new FilterDesc(filterCond, false), new RowSchema( |
| inputRR.getColumnInfos()), input), inputRR); |
| |
| ctx.getPlanMapper().link(condn, output); |
| |
| LOG.debug("Created Filter Plan for {} row schema: {}", qb.getId(), inputRR.toString()); |
| return output; |
| } |
| |
| /* |
| * for inner joins push a 'is not null predicate' to the join sources for |
| * every non nullSafe predicate. |
| */ |
| private Operator genNotNullFilterForJoinSourcePlan(QB qb, Operator input, |
| QBJoinTree joinTree, ExprNodeDesc[] joinKeys) throws SemanticException { |
| |
| if (qb == null || joinTree == null) { |
| return input; |
| } |
| |
| if (!joinTree.getNoOuterJoin()) { |
| return input; |
| } |
| |
| if (joinKeys == null || joinKeys.length == 0) { |
| return input; |
| } |
| Multimap<Integer, ExprNodeColumnDesc> hashes = ArrayListMultimap.create(); |
| if (input instanceof FilterOperator) { |
| ExprNodeDescUtils.getExprNodeColumnDesc(Arrays.asList(((FilterDesc)input.getConf()).getPredicate()), hashes); |
| } |
| ExprNodeDesc filterPred = null; |
| List<Boolean> nullSafes = joinTree.getNullSafes(); |
| for (int i = 0; i < joinKeys.length; i++) { |
| if (nullSafes.get(i) || (joinKeys[i] instanceof ExprNodeColumnDesc && |
| ((ExprNodeColumnDesc)joinKeys[i]).getIsPartitionColOrVirtualCol())) { |
| // no need to generate is not null predicate for partitioning or |
| // virtual column, since those columns can never be null. |
| continue; |
| } |
| boolean skip = false; |
| for (ExprNodeColumnDesc node : hashes.get(joinKeys[i].hashCode())) { |
| if (node.isSame(joinKeys[i])) { |
| skip = true; |
| break; |
| } |
| } |
| if (skip) { |
| // there is already a predicate on this src. |
| continue; |
| } |
| List<ExprNodeDesc> args = new ArrayList<ExprNodeDesc>(); |
| args.add(joinKeys[i]); |
| ExprNodeDesc nextExpr = ExprNodeGenericFuncDesc.newInstance( |
| FunctionRegistry.getFunctionInfo("isnotnull").getGenericUDF(), args); |
| filterPred = filterPred == null ? nextExpr : ExprNodeDescUtils |
| .mergePredicates(filterPred, nextExpr); |
| } |
| |
| if (filterPred == null) { |
| return input; |
| } |
| |
| OpParseContext inputCtx = opParseCtx.get(input); |
| RowResolver inputRR = inputCtx.getRowResolver(); |
| |
| if (input instanceof FilterOperator) { |
| FilterOperator f = (FilterOperator) input; |
| List<ExprNodeDesc> preds = new ArrayList<ExprNodeDesc>(); |
| preds.add(f.getConf().getPredicate()); |
| preds.add(filterPred); |
| f.getConf().setPredicate(ExprNodeDescUtils.mergePredicates(preds)); |
| |
| return input; |
| } |
| |
| FilterDesc filterDesc = new FilterDesc(filterPred, false); |
| filterDesc.setGenerated(true); |
| Operator output = putOpInsertMap(OperatorFactory.getAndMakeChild(filterDesc, |
| new RowSchema(inputRR.getColumnInfos()), input), inputRR); |
| |
| LOG.debug("Created Filter Plan for {} row schema: {}", qb.getId(), inputRR); |
| return output; |
| } |
| |
| Integer genExprNodeDescRegex(String colRegex, String tabAlias, ASTNode sel, |
| List<ExprNodeDesc> exprList, Set<ColumnInfo> excludeCols, RowResolver input, |
| RowResolver colSrcRR, Integer pos, RowResolver output, List<String> aliases, |
| boolean ensureUniqueCols) throws SemanticException { |
| List<Pair<ColumnInfo, RowResolver>> colList = new ArrayList<>(); |
| Integer i = genColListRegex(colRegex, tabAlias, sel, |
| colList, excludeCols, input, colSrcRR, pos, output, aliases, ensureUniqueCols); |
| for (Pair<ColumnInfo, RowResolver> p : colList) { |
| exprList.add(ExprNodeTypeCheck.toExprNode(p.getLeft(), p.getRight())); |
| } |
| return i; |
| } |
| |
| @SuppressWarnings("nls") |
| // TODO: make aliases unique, otherwise needless rewriting takes place |
| Integer genColListRegex(String colRegex, String tabAlias, ASTNode sel, |
| List<Pair<ColumnInfo, RowResolver>> colList, Set<ColumnInfo> excludeCols, RowResolver input, |
| RowResolver colSrcRR, Integer pos, RowResolver output, List<String> aliases, |
| boolean ensureUniqueCols) throws SemanticException { |
| |
| if (colSrcRR == null) { |
| colSrcRR = input; |
| } |
| // The table alias should exist |
| if (tabAlias != null && !colSrcRR.hasTableAlias(tabAlias)) { |
| throw new SemanticException(ASTErrorUtils.getMsg( |
| ErrorMsg.INVALID_TABLE_ALIAS.getMsg(), sel)); |
| } |
| |
| // TODO: Have to put in the support for AS clause |
| Pattern regex = null; |
| try { |
| regex = Pattern.compile(colRegex, Pattern.CASE_INSENSITIVE); |
| } catch (PatternSyntaxException e) { |
| throw new SemanticException(ASTErrorUtils.getMsg( |
| ErrorMsg.INVALID_COLUMN.getMsg(), sel, e.getMessage())); |
| } |
| |
| StringBuilder replacementText = new StringBuilder(); |
| int matched = 0; |
| // add empty string to the list of aliases. Some operators (ex. GroupBy) add |
| // ColumnInfos for table alias "". |
| if (!aliases.contains("")) { |
| aliases.add(""); |
| } |
| /* |
| * track the input ColumnInfos that are added to the output. |
| * if a columnInfo has multiple mappings; then add the column only once, |
| * but carry the mappings forward. |
| */ |
| Map<ColumnInfo, ColumnInfo> inputColsProcessed = new HashMap<ColumnInfo, ColumnInfo>(); |
| // For expr "*", aliases should be iterated in the order they are specified |
| // in the query. |
| |
| if (colSrcRR.getNamedJoinInfo() != null) { |
| // We got using() clause in previous join. Need to generate select list as |
| // per standard. For * we will have joining columns first non-repeated |
| // followed by other columns. |
| Map<String, ColumnInfo> leftMap = colSrcRR.getFieldMap(colSrcRR.getNamedJoinInfo().getAliases().get(0)); |
| Map<String, ColumnInfo> rightMap = colSrcRR.getFieldMap(colSrcRR.getNamedJoinInfo().getAliases().get(1)); |
| Map<String, ColumnInfo> chosenMap = null; |
| if (colSrcRR.getNamedJoinInfo().getHiveJoinType() != JoinType.RIGHTOUTER) { |
| chosenMap = leftMap; |
| } else { |
| chosenMap = rightMap; |
| } |
| // first get the columns in named columns |
| for (String columnName : colSrcRR.getNamedJoinInfo().getNamedColumns()) { |
| for (Map.Entry<String, ColumnInfo> entry : chosenMap.entrySet()) { |
| ColumnInfo colInfo = entry.getValue(); |
| if (!columnName.equals(colInfo.getAlias())) { |
| continue; |
| } |
| String name = colInfo.getInternalName(); |
| String[] tmp = colSrcRR.reverseLookup(name); |
| |
| // Skip the colinfos which are not for this particular alias |
| if (tabAlias != null && !tmp[0].equalsIgnoreCase(tabAlias)) { |
| continue; |
| } |
| |
| if (colInfo.getIsVirtualCol() && colInfo.isHiddenVirtualCol()) { |
| continue; |
| } |
| ColumnInfo oColInfo = inputColsProcessed.get(colInfo); |
| if (oColInfo == null) { |
| colList.add(Pair.of(colInfo, colSrcRR)); |
| oColInfo = new ColumnInfo(getColumnInternalName(pos), colInfo.getType(), |
| colInfo.getTabAlias(), colInfo.getIsVirtualCol(), colInfo.isHiddenVirtualCol()); |
| inputColsProcessed.put(colInfo, oColInfo); |
| } |
| if (ensureUniqueCols) { |
| if (!output.putWithCheck(tmp[0], tmp[1], null, oColInfo)) { |
| throw new CalciteSemanticException("Cannot add column to RR: " + tmp[0] + "." |
| + tmp[1] + " => " + oColInfo + " due to duplication, see previous warnings", |
| UnsupportedFeature.Duplicates_in_RR); |
| } |
| } else { |
| output.put(tmp[0], tmp[1], oColInfo); |
| } |
| pos++; |
| matched++; |
| |
| if (unparseTranslator.isEnabled() || (tableMask.isEnabled() && analyzeRewrite == null)) { |
| if (replacementText.length() > 0) { |
| replacementText.append(", "); |
| } |
| replacementText.append(HiveUtils.unparseIdentifier(tmp[0], conf)); |
| replacementText.append("."); |
| replacementText.append(HiveUtils.unparseIdentifier(tmp[1], conf)); |
| } |
| } |
| } |
| } |
| for (String alias : aliases) { |
| Map<String, ColumnInfo> fMap = colSrcRR.getFieldMap(alias); |
| if (fMap == null) { |
| continue; |
| } |
| // For the tab.* case, add all the columns to the fieldList |
| // from the input schema |
| for (Map.Entry<String, ColumnInfo> entry : fMap.entrySet()) { |
| ColumnInfo colInfo = entry.getValue(); |
| if (colSrcRR.getNamedJoinInfo() != null && colSrcRR.getNamedJoinInfo().getNamedColumns().contains(colInfo.getAlias())) { |
| // we already added this column in select list. |
| continue; |
| } |
| if (excludeCols != null && excludeCols.contains(colInfo)) { |
| continue; // This was added during plan generation. |
| } |
| // First, look up the column from the source against which * is to be |
| // resolved. |
| // We'd later translated this into the column from proper input, if |
| // it's valid. |
| // TODO: excludeCols may be possible to remove using the same |
| // technique. |
| String name = colInfo.getInternalName(); |
| String[] tmp = colSrcRR.reverseLookup(name); |
| |
| // Skip the colinfos which are not for this particular alias |
| if (tabAlias != null && !tmp[0].equalsIgnoreCase(tabAlias)) { |
| continue; |
| } |
| |
| if (colInfo.getIsVirtualCol() && colInfo.isHiddenVirtualCol()) { |
| continue; |
| } |
| |
| // Not matching the regex? |
| if (!regex.matcher(tmp[1]).matches()) { |
| continue; |
| } |
| |
| // If input (GBY) is different than the source of columns, find the |
| // same column in input. |
| // TODO: This is fraught with peril. |
| if (input != colSrcRR) { |
| colInfo = input.get(tabAlias, tmp[1]); |
| if (colInfo == null) { |
| LOG.error("Cannot find colInfo for {}.{}, derived from [{}], in [{}]", tabAlias, tmp[1], colSrcRR, input); |
| throw new SemanticException(ErrorMsg.NON_KEY_EXPR_IN_GROUPBY, tmp[1]); |
| } |
| name = colInfo.getInternalName(); |
| tmp = input.reverseLookup(name); |
| if (LOG.isDebugEnabled()) { |
| String oldCol = name + " => " + (tmp == null ? "null" : (tmp[0] + "." + tmp[1])); |
| String newCol = name + " => " + (tmp == null ? "null" : (tmp[0] + "." + tmp[1])); |
| LOG.debug("Translated [" + oldCol + "] to [" + newCol + "]"); |
| } |
| } |
| |
| ColumnInfo oColInfo = inputColsProcessed.get(colInfo); |
| if (oColInfo == null) { |
| colList.add(Pair.of(colInfo, input)); |
| oColInfo = new ColumnInfo(getColumnInternalName(pos), colInfo.getType(), |
| colInfo.getTabAlias(), colInfo.getIsVirtualCol(), colInfo.isHiddenVirtualCol()); |
| inputColsProcessed.put(colInfo, oColInfo); |
| } |
| assert nonNull(tmp); |
| if (ensureUniqueCols) { |
| if (!output.putWithCheck(tmp[0], tmp[1], null, oColInfo)) { |
| throw new CalciteSemanticException("Cannot add column to RR: " + tmp[0] + "." + tmp[1] |
| + " => " + oColInfo + " due to duplication, see previous warnings", |
| UnsupportedFeature.Duplicates_in_RR); |
| } |
| } else { |
| output.put(tmp[0], tmp[1], oColInfo); |
| } |
| pos++; |
| matched++; |
| |
| if (unparseTranslator.isEnabled() || tableMask.isEnabled()) { |
| if (replacementText.length() > 0) { |
| replacementText.append(", "); |
| } |
| replacementText.append(HiveUtils.unparseIdentifier(tmp[0], conf)); |
| replacementText.append("."); |
| replacementText.append(HiveUtils.unparseIdentifier(tmp[1], conf)); |
| } |
| } |
| } |
| |
| if (matched == 0) { |
| throw new SemanticException(ASTErrorUtils.getMsg( |
| ErrorMsg.INVALID_COLUMN.getMsg(), sel)); |
| } |
| |
| unparseTranslator.addTranslation(sel, replacementText.toString()); |
| if (tableMask.isEnabled()) { |
| tableMask.addTranslation(sel, replacementText.toString()); |
| } |
| return pos; |
| } |
| |
| public static String getColumnInternalName(int pos) { |
| return HiveConf.getColumnInternalName(pos); |
| } |
| |
| private String getScriptProgName(String cmd) { |
| int end = cmd.indexOf(" "); |
| return (end == -1) ? cmd : cmd.substring(0, end); |
| } |
| |
| private String getScriptArgs(String cmd) { |
| int end = cmd.indexOf(" "); |
| return (end == -1) ? "" : cmd.substring(end, cmd.length()); |
| } |
| |
| private String fetchFilesNotInLocalFilesystem(String cmd) { |
| SessionState ss = SessionState.get(); |
| String progName = getScriptProgName(cmd); |
| |
| if (!ResourceDownloader.isFileUri(progName)) { |
| String filePath = ss.add_resource(ResourceType.FILE, progName, true); |
| Path p = new Path(filePath); |
| String fileName = p.getName(); |
| String scriptArgs = getScriptArgs(cmd); |
| return fileName + scriptArgs; |
| } |
| |
| return cmd; |
| } |
| |
| private TableDesc getTableDescFromSerDe(ASTNode child, String cols, |
| String colTypes) throws SemanticException { |
| if (child.getType() == HiveParser.TOK_SERDENAME) { |
| String serdeName = unescapeSQLString(child.getChild(0).getText()); |
| Class<? extends Deserializer> serdeClass = null; |
| |
| try { |
| serdeClass = (Class<? extends Deserializer>) Class.forName(serdeName, |
| true, Utilities.getSessionSpecifiedClassLoader()); |
| } catch (ClassNotFoundException e) { |
| throw new SemanticException(e); |
| } |
| |
| TableDesc tblDesc = PlanUtils.getTableDesc(serdeClass, Integer |
| .toString(Utilities.tabCode), cols, colTypes, null, false); |
| // copy all the properties |
| if (child.getChildCount() == 2) { |
| ASTNode prop = (ASTNode) ((ASTNode) child.getChild(1)).getChild(0); |
| for (int propChild = 0; propChild < prop.getChildCount(); propChild++) { |
| String key = unescapeSQLString(prop.getChild(propChild).getChild(0) |
| .getText()); |
| String value = unescapeSQLString(prop.getChild(propChild).getChild(1) |
| .getText()); |
| tblDesc.getProperties().setProperty(key, value); |
| } |
| } |
| return tblDesc; |
| } else if (child.getType() == HiveParser.TOK_SERDEPROPS) { |
| TableDesc tblDesc = PlanUtils.getDefaultTableDesc(Integer |
| .toString(Utilities.ctrlaCode), cols, colTypes, false); |
| int numChildRowFormat = child.getChildCount(); |
| for (int numC = 0; numC < numChildRowFormat; numC++) { |
| ASTNode rowChild = (ASTNode) child.getChild(numC); |
| switch (rowChild.getToken().getType()) { |
| case HiveParser.TOK_TABLEROWFORMATFIELD: |
| String fieldDelim = unescapeSQLString(rowChild.getChild(0).getText()); |
| tblDesc.getProperties() |
| .setProperty(serdeConstants.FIELD_DELIM, fieldDelim); |
| tblDesc.getProperties().setProperty(serdeConstants.SERIALIZATION_FORMAT, |
| fieldDelim); |
| |
| if (rowChild.getChildCount() >= 2) { |
| String fieldEscape = unescapeSQLString(rowChild.getChild(1) |
| .getText()); |
| tblDesc.getProperties().setProperty(serdeConstants.ESCAPE_CHAR, |
| fieldEscape); |
| } |
| break; |
| case HiveParser.TOK_TABLEROWFORMATCOLLITEMS: |
| tblDesc.getProperties().setProperty(serdeConstants.COLLECTION_DELIM, |
| unescapeSQLString(rowChild.getChild(0).getText())); |
| break; |
| case HiveParser.TOK_TABLEROWFORMATMAPKEYS: |
| tblDesc.getProperties().setProperty(serdeConstants.MAPKEY_DELIM, |
| unescapeSQLString(rowChild.getChild(0).getText())); |
| break; |
| case HiveParser.TOK_TABLEROWFORMATLINES: |
| String lineDelim = unescapeSQLString(rowChild.getChild(0).getText()); |
| tblDesc.getProperties().setProperty(serdeConstants.LINE_DELIM, lineDelim); |
| if (!lineDelim.equals("\n") && !lineDelim.equals("10")) { |
| throw new SemanticException(generateErrorMessage(rowChild, |
| ErrorMsg.LINES_TERMINATED_BY_NON_NEWLINE.getMsg())); |
| } |
| break; |
| case HiveParser.TOK_TABLEROWFORMATNULL: |
| String nullFormat = unescapeSQLString(rowChild.getChild(0).getText()); |
| tblDesc.getProperties().setProperty(serdeConstants.SERIALIZATION_NULL_FORMAT, |
| nullFormat); |
| break; |
| default: |
| assert false; |
| } |
| } |
| |
| return tblDesc; |
| } |
| |
| // should never come here |
| return null; |
| } |
| |
| private void failIfColAliasExists(Set<String> nameSet, String name) |
| throws SemanticException { |
| if (nameSet.contains(name)) { |
| throw new SemanticException(ErrorMsg.COLUMN_ALIAS_ALREADY_EXISTS |
| .getMsg(name)); |
| } |
| nameSet.add(name); |
| } |
| |
| @SuppressWarnings("nls") |
| private Operator genScriptPlan(ASTNode trfm, QB qb, Operator input) |
| throws SemanticException { |
| // If there is no "AS" clause, the output schema will be "key,value" |
| List<ColumnInfo> outputCols = new ArrayList<ColumnInfo>(); |
| int inputSerDeNum = 1, inputRecordWriterNum = 2; |
| int outputSerDeNum = 4, outputRecordReaderNum = 5; |
| int outputColsNum = 6; |
| boolean outputColNames = false, outputColSchemas = false; |
| int execPos = 3; |
| boolean defaultOutputCols = false; |
| |
| // Go over all the children |
| if (trfm.getChildCount() > outputColsNum) { |
| ASTNode outCols = (ASTNode) trfm.getChild(outputColsNum); |
| if (outCols.getType() == HiveParser.TOK_ALIASLIST) { |
| outputColNames = true; |
| } else if (outCols.getType() == HiveParser.TOK_TABCOLLIST) { |
| outputColSchemas = true; |
| } |
| } |
| |
| // If column type is not specified, use a string |
| if (!outputColNames && !outputColSchemas) { |
| String intName = getColumnInternalName(0); |
| ColumnInfo colInfo = new ColumnInfo(intName, |
| TypeInfoFactory.stringTypeInfo, null, false); |
| colInfo.setAlias("key"); |
| outputCols.add(colInfo); |
| intName = getColumnInternalName(1); |
| colInfo = new ColumnInfo(intName, TypeInfoFactory.stringTypeInfo, null, |
| false); |
| colInfo.setAlias("value"); |
| outputCols.add(colInfo); |
| defaultOutputCols = true; |
| } else { |
| ASTNode collist = (ASTNode) trfm.getChild(outputColsNum); |
| int ccount = collist.getChildCount(); |
| |
| Set<String> colAliasNamesDuplicateCheck = new HashSet<String>(); |
| if (outputColNames) { |
| for (int i = 0; i < ccount; ++i) { |
| String colAlias = unescapeIdentifier(((ASTNode) collist.getChild(i)) |
| .getText()).toLowerCase(); |
| failIfColAliasExists(colAliasNamesDuplicateCheck, colAlias); |
| String intName = getColumnInternalName(i); |
| ColumnInfo colInfo = new ColumnInfo(intName, |
| TypeInfoFactory.stringTypeInfo, null, false); |
| colInfo.setAlias(colAlias); |
| outputCols.add(colInfo); |
| } |
| } else { |
| for (int i = 0; i < ccount; ++i) { |
| ASTNode child = (ASTNode) collist.getChild(i); |
| assert child.getType() == HiveParser.TOK_TABCOL; |
| String colAlias = unescapeIdentifier(((ASTNode) child.getChild(0)) |
| .getText()).toLowerCase(); |
| failIfColAliasExists(colAliasNamesDuplicateCheck, colAlias); |
| String intName = getColumnInternalName(i); |
| ColumnInfo colInfo = new ColumnInfo(intName, TypeInfoUtils |
| .getTypeInfoFromTypeString(getTypeStringFromAST((ASTNode) child |
| .getChild(1))), null, false); |
| colInfo.setAlias(colAlias); |
| outputCols.add(colInfo); |
| } |
| } |
| } |
| |
| RowResolver out_rwsch = new RowResolver(); |
| StringBuilder columns = new StringBuilder(); |
| StringBuilder columnTypes = new StringBuilder(); |
| |
| for (int i = 0; i < outputCols.size(); ++i) { |
| if (i != 0) { |
| columns.append(","); |
| columnTypes.append(","); |
| } |
| |
| columns.append(outputCols.get(i).getInternalName()); |
| columnTypes.append(outputCols.get(i).getType().getTypeName()); |
| |
| out_rwsch.put(qb.getParseInfo().getAlias(), outputCols.get(i).getAlias(), |
| outputCols.get(i)); |
| } |
| |
| StringBuilder inpColumns = new StringBuilder(); |
| StringBuilder inpColumnTypes = new StringBuilder(); |
| List<ColumnInfo> inputSchema = opParseCtx.get(input).getRowResolver().getColumnInfos(); |
| for (int i = 0; i < inputSchema.size(); ++i) { |
| if (i != 0) { |
| inpColumns.append(","); |
| inpColumnTypes.append(","); |
| } |
| |
| inpColumns.append(inputSchema.get(i).getInternalName()); |
| inpColumnTypes.append(inputSchema.get(i).getType().getTypeName()); |
| } |
| |
| TableDesc outInfo; |
| TableDesc errInfo; |
| TableDesc inInfo; |
| String defaultSerdeName = conf.getVar(HiveConf.ConfVars.HIVESCRIPTSERDE); |
| Class<? extends Deserializer> serde; |
| |
| try { |
| serde = (Class<? extends Deserializer>) Class.forName(defaultSerdeName, |
| true, Utilities.getSessionSpecifiedClassLoader()); |
| } catch (ClassNotFoundException e) { |
| throw new SemanticException(e); |
| } |
| |
| int fieldSeparator = Utilities.tabCode; |
| if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVESCRIPTESCAPE)) { |
| fieldSeparator = Utilities.ctrlaCode; |
| } |
| |
| // Input and Output Serdes |
| if (trfm.getChild(inputSerDeNum).getChildCount() > 0) { |
| inInfo = getTableDescFromSerDe((ASTNode) (((ASTNode) trfm |
| .getChild(inputSerDeNum))).getChild(0), inpColumns.toString(), |
| inpColumnTypes.toString()); |
| } else { |
| // It is not a very clean way, and should be modified later - due to |
| // compatibility reasons, user sees the results as JSON for custom |
| // scripts and has no way for specifying that. Right now, it is |
| // hard-coded to DelimitedJSONSerDe |
| inInfo = PlanUtils.getTableDesc(DelimitedJSONSerDe.class, Integer |
| .toString(fieldSeparator), inpColumns.toString(), inpColumnTypes |
| .toString(), null, false); |
| } |
| |
| if (trfm.getChild(outputSerDeNum).getChildCount() > 0) { |
| outInfo = getTableDescFromSerDe((ASTNode) (((ASTNode) trfm |
| .getChild(outputSerDeNum))).getChild(0), columns.toString(), |
| columnTypes.toString()); |
| // This is for backward compatibility. If the user did not specify the |
| // output column list, we assume that there are 2 columns: key and value. |
| // However, if the script outputs: col1, col2, col3 seperated by TAB, the |
| // requirement is: key is col and value is (col2 TAB col3) |
| } else { |
| outInfo = PlanUtils.getTableDesc(serde, Integer |
| .toString(fieldSeparator), columns.toString(), columnTypes |
| .toString(), null, defaultOutputCols); |
| } |
| |
| // Error stream always uses the default serde with a single column |
| errInfo = PlanUtils.getTableDesc(serde, Integer.toString(Utilities.tabCode), "KEY"); |
| |
| // Output record readers |
| Class<? extends RecordReader> outRecordReader = getRecordReader((ASTNode) trfm |
| .getChild(outputRecordReaderNum)); |
| Class<? extends RecordWriter> inRecordWriter = getRecordWriter((ASTNode) trfm |
| .getChild(inputRecordWriterNum)); |
| Class<? extends RecordReader> errRecordReader = getDefaultRecordReader(); |
| |
| Operator output = putOpInsertMap(OperatorFactory.getAndMakeChild(new ScriptDesc( |
| fetchFilesNotInLocalFilesystem(stripQuotes(trfm.getChild(execPos).getText())), |
| inInfo, inRecordWriter, outInfo, outRecordReader, errRecordReader, errInfo), |
| new RowSchema(out_rwsch.getColumnInfos()), input), out_rwsch); |
| output.setColumnExprMap(new HashMap<String, ExprNodeDesc>()); // disable backtracking |
| |
| // Add URI entity for transform script. script assumed t be local unless downloadable |
| if (conf.getBoolVar(ConfVars.HIVE_CAPTURE_TRANSFORM_ENTITY)) { |
| String scriptCmd = getScriptProgName(stripQuotes(trfm.getChild(execPos).getText())); |
| getInputs().add(new ReadEntity(new Path(scriptCmd), |
| ResourceDownloader.isFileUri(scriptCmd))); |
| } |
| |
| return output; |
| } |
| |
| private Class<? extends RecordReader> getRecordReader(ASTNode node) |
| throws SemanticException { |
| String name; |
| |
| if (node.getChildCount() == 0) { |
| name = conf.getVar(HiveConf.ConfVars.HIVESCRIPTRECORDREADER); |
| } else { |
| name = unescapeSQLString(node.getChild(0).getText()); |
| } |
| |
| try { |
| return (Class<? extends RecordReader>) Class.forName(name, true, |
| Utilities.getSessionSpecifiedClassLoader()); |
| } catch (ClassNotFoundException e) { |
| throw new SemanticException(e); |
| } |
| } |
| |
| private Class<? extends RecordReader> getDefaultRecordReader() |
| throws SemanticException { |
| String name; |
| |
| name = conf.getVar(HiveConf.ConfVars.HIVESCRIPTRECORDREADER); |
| |
| try { |
| return (Class<? extends RecordReader>) Class.forName(name, true, |
| Utilities.getSessionSpecifiedClassLoader()); |
| } catch (ClassNotFoundException e) { |
| throw new SemanticException(e); |
| } |
| } |
| |
| private Class<? extends RecordWriter> getRecordWriter(ASTNode node) |
| throws SemanticException { |
| String name; |
| |
| if (node.getChildCount() == 0) { |
| name = conf.getVar(HiveConf.ConfVars.HIVESCRIPTRECORDWRITER); |
| } else { |
| name = unescapeSQLString(node.getChild(0).getText()); |
| } |
| |
| try { |
| return (Class<? extends RecordWriter>) Class.forName(name, true, |
| Utilities.getSessionSpecifiedClassLoader()); |
| } catch (ClassNotFoundException e) { |
| throw new SemanticException(e); |
| } |
| } |
| |
| private List<Long> getGroupingSetsForRollup(int size) { |
| List<Long> groupingSetKeys = new ArrayList<Long>(); |
| for (int i = 0; i <= size; i++) { |
| groupingSetKeys.add((1L << i) - 1); |
| } |
| return groupingSetKeys; |
| } |
| |
| private List<Long> getGroupingSetsForCube(int size) { |
| long count = 1L << size; |
| List<Long> results = new ArrayList<Long>(); |
| for (long i = 0; i < count; ++i) { |
| results.add(i); |
| } |
| return results; |
| } |
| |
| // This function returns the grouping sets along with the grouping expressions |
| // Even if rollups and cubes are present in the query, they are converted to |
| // grouping sets at this point |
| Pair<List<ASTNode>, List<Long>> getGroupByGroupingSetsForClause( |
| QBParseInfo parseInfo, String dest) throws SemanticException { |
| List<Long> groupingSets = new ArrayList<Long>(); |
| List<ASTNode> groupByExprs = getGroupByForClause(parseInfo, dest); |
| |
| if (parseInfo.getDestRollups().contains(dest)) { |
| groupingSets = getGroupingSetsForRollup(groupByExprs.size()); |
| } else if (parseInfo.getDestCubes().contains(dest)) { |
| groupingSets = getGroupingSetsForCube(groupByExprs.size()); |
| } else if (parseInfo.getDestGroupingSets().contains(dest)) { |
| groupingSets = getGroupingSets(groupByExprs, parseInfo, dest); |
| } |
| |
| if (!groupingSets.isEmpty() && groupByExprs.size() > Long.SIZE) { |
| throw new SemanticException(ErrorMsg.HIVE_GROUPING_SETS_SIZE_LIMIT.getMsg()); |
| } |
| |
| return Pair.of(groupByExprs, groupingSets); |
| } |
| |
| private List<Long> getGroupingSets(List<ASTNode> groupByExpr, QBParseInfo parseInfo, |
| String dest) throws SemanticException { |
| Map<String, Integer> exprPos = new HashMap<String, Integer>(); |
| for (int i = 0; i < groupByExpr.size(); ++i) { |
| ASTNode node = groupByExpr.get(i); |
| exprPos.put(node.toStringTree(), i); |
| } |
| |
| ASTNode root = parseInfo.getGroupByForClause(dest); |
| List<Long> result = new ArrayList<Long>(root == null ? 0 : root.getChildCount()); |
| if (root != null) { |
| for (int i = 0; i < root.getChildCount(); ++i) { |
| ASTNode child = (ASTNode) root.getChild(i); |
| if (child.getType() != HiveParser.TOK_GROUPING_SETS_EXPRESSION) { |
| continue; |
| } |
| long bitmap = LongMath.pow(2, groupByExpr.size()) - 1; |
| for (int j = 0; j < child.getChildCount(); ++j) { |
| String treeAsString = child.getChild(j).toStringTree(); |
| Integer pos = exprPos.get(treeAsString); |
| if (pos == null) { |
| throw new SemanticException( |
| generateErrorMessage((ASTNode) child.getChild(j), |
| ErrorMsg.HIVE_GROUPING_SETS_EXPR_NOT_IN_GROUPBY.getErrorCodedMsg())); |
| } |
| bitmap = unsetBit(bitmap, groupByExpr.size() - pos - 1); |
| |
| // Add the copy translation for grouping set keys. This will make sure that same translation as |
| // group by key is applied on the grouping set key. If translation is added to group by key |
| // to add the table name to the column name (tbl.key), then same thing will be done for grouping |
| // set keys also. |
| unparseTranslator.addCopyTranslation((ASTNode)child.getChild(j), groupByExpr.get(pos)); |
| } |
| result.add(bitmap); |
| } |
| } |
| |
| if (checkForEmptyGroupingSets(result, LongMath.pow(2, groupByExpr.size()) - 1)) { |
| throw new SemanticException( |
| ErrorMsg.HIVE_GROUPING_SETS_EMPTY.getMsg()); |
| } |
| return result; |
| } |
| |
| private boolean checkForEmptyGroupingSets(List<Long> bitmaps, long groupingIdAllSet) { |
| boolean ret = true; |
| for (long mask : bitmaps) { |
| ret &= mask == groupingIdAllSet; |
| } |
| return ret; |
| } |
| |
| public static long setBit(long bitmap, int bitIdx) { |
| return bitmap | (1L << bitIdx); |
| } |
| |
| private long unsetBit(long bitmap, int bitIdx) { |
| return bitmap & ~(1L << bitIdx); |
| } |
| |
| /** |
| * Returns the GBY, if present; |
| * DISTINCT, if present, will be handled when generating the SELECT. |
| */ |
| List<ASTNode> getGroupByForClause(QBParseInfo parseInfo, String dest) throws SemanticException { |
| ASTNode selectExpr = parseInfo.getSelForClause(dest); |
| Collection<ASTNode> aggregateFunction = parseInfo.getDestToAggregationExprs().get(dest).values(); |
| if (!(this instanceof CalcitePlanner) && isSelectDistinct(selectExpr) && hasGroupBySibling(selectExpr)) { |
| throw new SemanticException("SELECT DISTINCT with GROUP BY is only supported with CBO"); |
| } |
| |
| if (isSelectDistinct(selectExpr) && !hasGroupBySibling(selectExpr) && |
| !isAggregateInSelect(selectExpr, aggregateFunction)) { |
| List<ASTNode> result = new ArrayList<ASTNode>(selectExpr.getChildCount()); |
| for (int i = 0; i < selectExpr.getChildCount(); ++i) { |
| if (((ASTNode) selectExpr.getChild(i)).getToken().getType() == HiveParser.QUERY_HINT) { |
| continue; |
| } |
| // table.column AS alias |
| ASTNode grpbyExpr = (ASTNode) selectExpr.getChild(i).getChild(0); |
| result.add(grpbyExpr); |
| } |
| return result; |
| } else { |
| // look for a true GBY |
| ASTNode grpByExprs = parseInfo.getGroupByForClause(dest); |
| List<ASTNode> result = new ArrayList<ASTNode>(grpByExprs == null ? 0 : grpByExprs.getChildCount()); |
| if (grpByExprs != null) { |
| for (int i = 0; i < grpByExprs.getChildCount(); ++i) { |
| ASTNode grpbyExpr = (ASTNode) grpByExprs.getChild(i); |
| if (grpbyExpr.getType() != HiveParser.TOK_GROUPING_SETS_EXPRESSION) { |
| result.add(grpbyExpr); |
| } |
| } |
| } |
| return result; |
| } |
| } |
| |
| protected boolean hasGroupBySibling(ASTNode selectExpr) { |
| boolean isGroupBy = false; |
| if (selectExpr.getParent() != null && selectExpr.getParent() instanceof Node) { |
| for (Node sibling : ((Node)selectExpr.getParent()).getChildren()) { |
| isGroupBy |= sibling instanceof ASTNode && ((ASTNode)sibling).getType() == HiveParser.TOK_GROUPBY; |
| } |
| } |
| |
| return isGroupBy; |
| } |
| |
| protected boolean isSelectDistinct(ASTNode expr) { |
| return expr.getType() == HiveParser.TOK_SELECTDI; |
| } |
| |
| private boolean isAggregateInSelect(Node node, Collection<ASTNode> aggregateFunction) { |
| if (node.getChildren() == null) { |
| return false; |
| } |
| |
| for (Node child : node.getChildren()) { |
| if (aggregateFunction.contains(child) || isAggregateInSelect(child, aggregateFunction)) { |
| return true; |
| } |
| } |
| |
| return false; |
| } |
| |
| static String[] getColAlias(ASTNode selExpr, String defaultName, |
| RowResolver inputRR, boolean includeFuncName, int colNum) { |
| String colAlias = null; |
| String tabAlias = null; |
| String[] colRef = new String[2]; |
| |
| //for queries with a windowing expressions, the selexpr may have a third child |
| if (selExpr.getChildCount() == 2 || |
| (selExpr.getChildCount() == 3 && |
| selExpr.getChild(2).getType() == HiveParser.TOK_WINDOWSPEC)) { |
| // return zz for "xx + yy AS zz" |
| colAlias = unescapeIdentifier(selExpr.getChild(1).getText().toLowerCase()); |
| colRef[0] = tabAlias; |
| colRef[1] = colAlias; |
| return colRef; |
| } |
| |
| ASTNode root = (ASTNode) selExpr.getChild(0); |
| if (root.getType() == HiveParser.TOK_TABLE_OR_COL) { |
| colAlias = |
| BaseSemanticAnalyzer.unescapeIdentifier(root.getChild(0).getText().toLowerCase()); |
| colRef[0] = tabAlias; |
| colRef[1] = colAlias; |
| return colRef; |
| } |
| |
| if (root.getType() == HiveParser.DOT) { |
| ASTNode tab = (ASTNode) root.getChild(0); |
| if (tab.getType() == HiveParser.TOK_TABLE_OR_COL) { |
| String t = unescapeIdentifier(tab.getChild(0).getText()); |
| if (inputRR.hasTableAlias(t)) { |
| tabAlias = t; |
| } |
| } |
| |
| // Return zz for "xx.zz" and "xx.yy.zz" |
| ASTNode col = (ASTNode) root.getChild(1); |
| if (col.getType() == HiveParser.Identifier) { |
| colAlias = unescapeIdentifier(col.getText().toLowerCase()); |
| } |
| } |
| |
| // if specified generate alias using func name |
| if (includeFuncName && (root.getType() == HiveParser.TOK_FUNCTION)) { |
| |
| String expr_flattened = root.toStringTree(); |
| |
| // remove all TOK tokens |
| String expr_no_tok = expr_flattened.replaceAll("tok_\\S+", ""); |
| |
| // remove all non alphanumeric letters, replace whitespace spans with underscore |
| String expr_formatted = expr_no_tok.replaceAll("\\W", " ").trim().replaceAll("\\s+", "_"); |
| |
| // limit length to 20 chars |
| if (expr_formatted.length() > AUTOGEN_COLALIAS_PRFX_MAXLENGTH) { |
| expr_formatted = expr_formatted.substring(0, AUTOGEN_COLALIAS_PRFX_MAXLENGTH); |
| } |
| |
| // append colnum to make it unique |
| colAlias = expr_formatted.concat("_" + colNum); |
| } |
| |
| if (colAlias == null) { |
| // Return defaultName if selExpr is not a simple xx.yy.zz |
| colAlias = defaultName + colNum; |
| } |
| |
| colRef[0] = tabAlias; |
| colRef[1] = colAlias; |
| return colRef; |
| } |
| |
| /** |
| * Returns whether the pattern is a regex expression (instead of a normal |
| * string). Normal string is a string with all alphabets/digits and "_". |
| */ |
| static boolean isRegex(String pattern, HiveConf conf) { |
| String qIdSupport = HiveConf.getVar(conf, HiveConf.ConfVars.HIVE_QUOTEDID_SUPPORT); |
| if (!"none".equals(qIdSupport)) { |
| return false; |
| } |
| for (int i = 0; i < pattern.length(); i++) { |
| if (!Character.isLetterOrDigit(pattern.charAt(i)) |
| && pattern.charAt(i) != '_') { |
| return true; |
| } |
| } |
| return false; |
| } |
| |
| |
| private Operator<?> genSelectPlan(String dest, QB qb, Operator<?> input, |
| Operator<?> inputForSelectStar) throws SemanticException { |
| ASTNode selExprList = qb.getParseInfo().getSelForClause(dest); |
| Operator<?> op = genSelectPlan(dest, selExprList, qb, input, inputForSelectStar, false); |
| |
| LOG.debug("Created Select Plan for clause: {}", dest); |
| |
| return op; |
| } |
| |
| @SuppressWarnings("nls") |
| private Operator<?> genSelectPlan(String dest, ASTNode selExprList, QB qb, Operator<?> input, |
| Operator<?> inputForSelectStar, boolean outerLV) throws SemanticException { |
| |
| LOG.debug("tree: {}", selExprList.toStringTree()); |
| |
| List<ExprNodeDesc> colList = new ArrayList<ExprNodeDesc>(); |
| RowResolver out_rwsch = new RowResolver(); |
| ASTNode trfm = null; |
| Integer pos = 0; |
| RowResolver inputRR = opParseCtx.get(input).getRowResolver(); |
| RowResolver starRR = null; |
| if (inputForSelectStar != null && inputForSelectStar != input) { |
| starRR = opParseCtx.get(inputForSelectStar).getRowResolver(); |
| } |
| // SELECT * or SELECT TRANSFORM(*) |
| boolean selectStar = false; |
| int posn = 0; |
| boolean hintPresent = (selExprList.getChild(0).getType() == HiveParser.QUERY_HINT); |
| if (hintPresent) { |
| posn++; |
| } |
| |
| boolean isInTransform = (selExprList.getChild(posn).getChild(0).getType() == |
| HiveParser.TOK_TRANSFORM); |
| if (isInTransform) { |
| queryProperties.setUsesScript(true); |
| globalLimitCtx.setHasTransformOrUDTF(true); |
| trfm = (ASTNode) selExprList.getChild(posn).getChild(0); |
| } |
| |
| // Detect queries of the form SELECT udtf(col) AS ... |
| // by looking for a function as the first child, and then checking to see |
| // if the function is a Generic UDTF. It's not as clean as TRANSFORM due to |
| // the lack of a special token. |
| boolean isUDTF = false; |
| String udtfTableAlias = null; |
| List<String> udtfColAliases = new ArrayList<String>(); |
| ASTNode udtfExpr = (ASTNode) selExprList.getChild(posn).getChild(0); |
| GenericUDTF genericUDTF = null; |
| |
| int udtfExprType = udtfExpr.getType(); |
| if (udtfExprType == HiveParser.TOK_FUNCTION |
| || udtfExprType == HiveParser.TOK_FUNCTIONSTAR) { |
| String funcName = TypeCheckProcFactory.getFunctionText(udtfExpr, true); |
| FunctionInfo fi = FunctionRegistry.getFunctionInfo(funcName); |
| if (fi != null) { |
| genericUDTF = fi.getGenericUDTF(); |
| } |
| isUDTF = (genericUDTF != null); |
| if (isUDTF) { |
| globalLimitCtx.setHasTransformOrUDTF(true); |
| } |
| if (isUDTF && !fi.isNative()) { |
| unparseTranslator.addIdentifierTranslation((ASTNode) udtfExpr |
| .getChild(0)); |
| } |
| if (isUDTF && (selectStar = udtfExprType == HiveParser.TOK_FUNCTIONSTAR)) { |
| genExprNodeDescRegex(".*", null, (ASTNode) udtfExpr.getChild(0), |
| colList, null, inputRR, starRR, pos, out_rwsch, qb.getAliases(), false); |
| } |
| } |
| |
| if (isUDTF) { |
| // Only support a single expression when it's a UDTF |
| if (selExprList.getChildCount() > 1) { |
| throw new SemanticException(generateErrorMessage( |
| (ASTNode) selExprList.getChild(1), |
| ErrorMsg.UDTF_MULTIPLE_EXPR.getMsg())); |
| } |
| |
| ASTNode selExpr = (ASTNode) selExprList.getChild(posn); |
| |
| // Get the column / table aliases from the expression. Start from 1 as |
| // 0 is the TOK_FUNCTION |
| // column names also can be inferred from result of UDTF |
| for (int i = 1; i < selExpr.getChildCount(); i++) { |
| ASTNode selExprChild = (ASTNode) selExpr.getChild(i); |
| switch (selExprChild.getType()) { |
| case HiveParser.Identifier: |
| udtfColAliases.add(unescapeIdentifier(selExprChild.getText().toLowerCase())); |
| unparseTranslator.addIdentifierTranslation(selExprChild); |
| break; |
| case HiveParser.TOK_TABALIAS: |
| assert (selExprChild.getChildCount() == 1); |
| udtfTableAlias = unescapeIdentifier(selExprChild.getChild(0) |
| .getText()); |
| qb.addAlias(udtfTableAlias); |
| unparseTranslator.addIdentifierTranslation((ASTNode) selExprChild |
| .getChild(0)); |
| break; |
| default: |
| assert (false); |
| } |
| } |
| LOG.debug("UDTF table alias is {}", udtfTableAlias); |
| LOG.debug("UDTF col aliases are {}", udtfColAliases); |
| } |
| |
| // The list of expressions after SELECT or SELECT TRANSFORM. |
| ASTNode exprList; |
| if (isInTransform) { |
| exprList = (ASTNode) trfm.getChild(0); |
| } else if (isUDTF) { |
| exprList = udtfExpr; |
| } else { |
| exprList = selExprList; |
| } |
| |
| LOG.debug("genSelectPlan: input = {} starRr = {}", inputRR, starRR); |
| |
| // For UDTF's, skip the function name to get the expressions |
| int startPosn = isUDTF ? posn + 1 : posn; |
| if (isInTransform) { |
| startPosn = 0; |
| } |
| |
| final boolean cubeRollupGrpSetPresent = (!qb.getParseInfo().getDestRollups().isEmpty() |
| || !qb.getParseInfo().getDestGroupingSets().isEmpty() |
| || !qb.getParseInfo().getDestCubes().isEmpty()); |
| Set<String> colAliases = new HashSet<String>(); |
| int offset = 0; |
| // Iterate over all expression (either after SELECT, or in SELECT TRANSFORM) |
| for (int i = startPosn; i < exprList.getChildCount(); ++i) { |
| |
| // child can be EXPR AS ALIAS, or EXPR. |
| ASTNode child = (ASTNode) exprList.getChild(i); |
| boolean hasAsClause = (!isInTransform) && (child.getChildCount() == 2); |
| boolean isWindowSpec = child.getChildCount() == 3 && |
| child.getChild(2).getType() == HiveParser.TOK_WINDOWSPEC; |
| |
| // EXPR AS (ALIAS,...) parses, but is only allowed for UDTF's |
| // This check is not needed and invalid when there is a transform b/c the |
| // AST's are slightly different. |
| if (!isWindowSpec && !isInTransform && !isUDTF && child.getChildCount() > 2) { |
| throw new SemanticException(generateErrorMessage( |
| (ASTNode) child.getChild(2), |
| ErrorMsg.INVALID_AS.getMsg())); |
| } |
| |
| // The real expression |
| ASTNode expr; |
| String tabAlias; |
| String colAlias; |
| |
| if (isInTransform || isUDTF) { |
| tabAlias = null; |
| colAlias = autogenColAliasPrfxLbl + i; |
| expr = child; |
| } else { |
| // Get rid of TOK_SELEXPR |
| expr = (ASTNode) child.getChild(0); |
| String[] colRef = getColAlias(child, autogenColAliasPrfxLbl, inputRR, |
| autogenColAliasPrfxIncludeFuncName, i + offset); |
| tabAlias = colRef[0]; |
| colAlias = colRef[1]; |
| if (hasAsClause) { |
| unparseTranslator.addIdentifierTranslation((ASTNode) child |
| .getChild(1)); |
| } |
| } |
| colAliases.add(colAlias); |
| |
| // The real expression |
| if (expr.getType() == HiveParser.TOK_ALLCOLREF) { |
| int initPos = pos; |
| pos = genExprNodeDescRegex(".*", expr.getChildCount() == 0 ? null |
| : getUnescapedName((ASTNode) expr.getChild(0)).toLowerCase(), |
| expr, colList, null, inputRR, starRR, pos, out_rwsch, qb.getAliases(), false); |
| if (unparseTranslator.isEnabled()) { |
| offset += pos - initPos - 1; |
| } |
| selectStar = true; |
| } else if (expr.getType() == HiveParser.TOK_TABLE_OR_COL && !hasAsClause |
| && !inputRR.getIsExprResolver() |
| && isRegex(unescapeIdentifier(expr.getChild(0).getText()), conf)) { |
| // In case the expression is a regex COL. |
| // This can only happen without AS clause |
| // We don't allow this for ExprResolver - the Group By case |
| pos = genExprNodeDescRegex(unescapeIdentifier(expr.getChild(0).getText()), |
| null, expr, colList, null, inputRR, starRR, pos, out_rwsch, qb.getAliases(), false); |
| } else if (expr.getType() == HiveParser.DOT |
| && expr.getChild(0).getType() == HiveParser.TOK_TABLE_OR_COL |
| && inputRR.hasTableAlias(unescapeIdentifier(expr.getChild(0) |
| .getChild(0).getText().toLowerCase())) && !hasAsClause |
| && !inputRR.getIsExprResolver() |
| && isRegex(unescapeIdentifier(expr.getChild(1).getText()), conf)) { |
| // In case the expression is TABLE.COL (col can be regex). |
| // This can only happen without AS clause |
| // We don't allow this for ExprResolver - the Group By case |
| pos = genExprNodeDescRegex(unescapeIdentifier(expr.getChild(1).getText()), |
| unescapeIdentifier(expr.getChild(0).getChild(0).getText().toLowerCase()), |
| expr, colList, null, inputRR, starRR, pos, out_rwsch, qb.getAliases(), false); |
| } else { |
| // Case when this is an expression |
| TypeCheckCtx tcCtx = new TypeCheckCtx(inputRR, true, isCBOExecuted()); |
| // We allow stateful functions in the SELECT list (but nowhere else) |
| tcCtx.setAllowStatefulFunctions(true); |
| tcCtx.setAllowDistinctFunctions(false); |
| if (!isCBOExecuted() && !qb.getParseInfo().getDestToGroupBy().isEmpty()) { |
| // If CBO did not optimize the query, we might need to replace grouping function |
| // Special handling of grouping function |
| expr = rewriteGroupingFunctionAST(getGroupByForClause(qb.getParseInfo(), dest), expr, |
| !cubeRollupGrpSetPresent); |
| } |
| ExprNodeDesc exp = genExprNodeDesc(expr, inputRR, tcCtx); |
| String recommended = recommendName(exp, colAlias); |
| if (recommended != null && !colAliases.contains(recommended) && |
| out_rwsch.get(null, recommended) == null) { |
| colAlias = recommended; |
| } |
| colList.add(exp); |
| |
| ColumnInfo colInfo = new ColumnInfo(getColumnInternalName(pos), |
| exp.getWritableObjectInspector(), tabAlias, false); |
| colInfo.setSkewedCol((exp instanceof ExprNodeColumnDesc) && ((ExprNodeColumnDesc) exp) |
| .isSkewedCol()); |
| out_rwsch.put(tabAlias, colAlias, colInfo); |
| |
| if ( exp instanceof ExprNodeColumnDesc ) { |
| ExprNodeColumnDesc colExp = (ExprNodeColumnDesc) exp; |
| String[] altMapping = inputRR.getAlternateMappings(colExp.getColumn()); |
| if ( altMapping != null ) { |
| out_rwsch.put(altMapping[0], altMapping[1], colInfo); |
| } |
| } |
| |
| pos++; |
| } |
| } |
| selectStar = selectStar && exprList.getChildCount() == posn + 1; |
| |
| out_rwsch = handleInsertStatementSpec(colList, dest, out_rwsch, qb, selExprList); |
| |
| List<String> columnNames = new ArrayList<String>(); |
| Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>(); |
| for (int i = 0; i < colList.size(); i++) { |
| String outputCol = getColumnInternalName(i); |
| colExprMap.put(outputCol, colList.get(i)); |
| columnNames.add(outputCol); |
| } |
| |
| Operator output = putOpInsertMap(OperatorFactory.getAndMakeChild( |
| new SelectDesc(colList, columnNames, selectStar), new RowSchema( |
| out_rwsch.getColumnInfos()), input), out_rwsch); |
| |
| output.setColumnExprMap(colExprMap); |
| if (isInTransform) { |
| output = genScriptPlan(trfm, qb, output); |
| } |
| |
| if (isUDTF) { |
| output = genUDTFPlan(genericUDTF, udtfTableAlias, udtfColAliases, qb, output, outerLV); |
| } |
| |
| LOG.debug("Created Select Plan row schema: {}", out_rwsch); |
| return output; |
| } |
| |
| private RowResolver getColForInsertStmtSpec(Map<String, ExprNodeDesc> targetCol2Projection, final Table target, |
| Map<String, ColumnInfo> targetCol2ColumnInfo, int colListPos, |
| List<TypeInfo> targetTableColTypes, List<ExprNodeDesc> newColList, |
| List<String> targetTableColNames) |
| throws SemanticException { |
| RowResolver newOutputRR = new RowResolver(); |
| Map<String, String> colNameToDefaultVal = null; |
| |
| // see if we need to fetch default constraints from metastore |
| if(targetCol2Projection.size() < targetTableColNames.size()) { |
| colNameToDefaultVal = getColNameToDefaultValueMap(target); |
| } |
| for (int i = 0; i < targetTableColNames.size(); i++) { |
| String f = targetTableColNames.get(i); |
| if(targetCol2Projection.containsKey(f)) { |
| //put existing column in new list to make sure it is in the right position |
| newColList.add(targetCol2Projection.get(f)); |
| ColumnInfo ci = targetCol2ColumnInfo.get(f); |
| ci.setInternalName(getColumnInternalName(colListPos)); |
| newOutputRR.put(ci.getTabAlias(), ci.getInternalName(), ci); |
| } |
| else { |
| //add new 'synthetic' columns for projections not provided by Select |
| assert(colNameToDefaultVal != null); |
| ExprNodeDesc exp = null; |
| if(colNameToDefaultVal.containsKey(f)) { |
| // make an expression for default value |
| String defaultValue = colNameToDefaultVal.get(f); |
| ParseDriver parseDriver = new ParseDriver(); |
| try { |
| ASTNode defValAst = parseDriver.parseExpression(defaultValue); |
| |
| exp = ExprNodeTypeCheck.genExprNode(defValAst, new TypeCheckCtx(null)).get(defValAst); |
| } catch(Exception e) { |
| throw new SemanticException("Error while parsing default value: " + defaultValue |
| + ". Error message: " + e.getMessage()); |
| } |
| LOG.debug("Added default value from metastore: {}", exp); |
| } |
| else { |
| exp = new ExprNodeConstantDesc(targetTableColTypes.get(i), null); |
| } |
| newColList.add(exp); |
| final String tableAlias = null;//this column doesn't come from any table |
| ColumnInfo colInfo = new ColumnInfo(getColumnInternalName(colListPos), |
| exp.getWritableObjectInspector(), tableAlias, false); |
| newOutputRR.put(colInfo.getTabAlias(), colInfo.getInternalName(), colInfo); |
| } |
| colListPos++; |
| } |
| return newOutputRR; |
| } |
| |
| /** |
| * This modifies the Select projections when the Select is part of an insert statement and |
| * the insert statement specifies a column list for the target table, e.g. |
| * create table source (a int, b int); |
| * create table target (x int, y int, z int); |
| * insert into target(z,x) select * from source |
| * |
| * Once the * is resolved to 'a,b', this list needs to rewritten to 'b,null,a' so that it looks |
| * as if the original query was written as |
| * insert into target select b, null, a from source |
| * |
| * if target schema is not specified, this is no-op |
| * |
| * @see #handleInsertStatementSpecPhase1(ASTNode, QBParseInfo, org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.Phase1Ctx) |
| * @throws SemanticException |
| */ |
| RowResolver handleInsertStatementSpec(List<ExprNodeDesc> col_list, String dest, |
| RowResolver outputRR, QB qb, |
| ASTNode selExprList) throws SemanticException { |
| //(z,x) |
| List<String> targetTableSchema = qb.getParseInfo().getDestSchemaForClause(dest);//specified in the query |
| if(targetTableSchema == null) { |
| //no insert schema was specified |
| return outputRR; |
| } |
| if(targetTableSchema.size() != col_list.size()) { |
| Table target = qb.getMetaData().getDestTableForAlias(dest); |
| Partition partition = target == null ? qb.getMetaData().getDestPartitionForAlias(dest) : null; |
| throw new SemanticException(generateErrorMessage(selExprList, |
| "Expected " + targetTableSchema.size() + " columns for " + dest + |
| (target != null ? "/" + target.getCompleteName() : (partition != null ? "/" + partition.getCompleteName() : "")) + |
| "; select produces " + col_list.size() + " columns")); |
| } |
| //e.g. map z->expr for a |
| Map<String, ExprNodeDesc> targetCol2Projection = new HashMap<String, ExprNodeDesc>(); |
| //e.g. map z->ColumnInfo for a |
| Map<String, ColumnInfo> targetCol2ColumnInfo = new HashMap<String, ColumnInfo>(); |
| int colListPos = 0; |
| for(String targetCol : targetTableSchema) { |
| targetCol2ColumnInfo.put(targetCol, outputRR.getColumnInfos().get(colListPos)); |
| targetCol2Projection.put(targetCol, col_list.get(colListPos++)); |
| } |
| Table target = qb.getMetaData().getDestTableForAlias(dest); |
| Partition partition = target == null ? qb.getMetaData().getDestPartitionForAlias(dest) : null; |
| if(target == null && partition == null) { |
| throw new SemanticException(generateErrorMessage(selExprList, |
| "No table/partition found in QB metadata for dest='" + dest + "'")); |
| } |
| List<ExprNodeDesc> newColList = new ArrayList<ExprNodeDesc>(); |
| colListPos = 0; |
| List<FieldSchema> targetTableCols = target != null ? target.getCols() : partition.getCols(); |
| List<String> targetTableColNames = new ArrayList<String>(); |
| List<TypeInfo> targetTableColTypes = new ArrayList<TypeInfo>(); |
| for(FieldSchema fs : targetTableCols) { |
| targetTableColNames.add(fs.getName()); |
| targetTableColTypes.add(TypeInfoUtils.getTypeInfoFromTypeString(fs.getType())); |
| } |
| Map<String, String> partSpec = qb.getMetaData().getPartSpecForAlias(dest); |
| if(partSpec != null) { |
| //find dynamic partition columns |
| //relies on consistent order via LinkedHashMap |
| for(Map.Entry<String, String> partKeyVal : partSpec.entrySet()) { |
| if (partKeyVal.getValue() == null) { |
| targetTableColNames.add(partKeyVal.getKey());//these must be after non-partition cols |
| targetTableColTypes.add(TypeInfoFactory.stringTypeInfo); |
| } |
| } |
| } |
| |
| //now make the select produce <regular columns>,<dynamic partition columns> with |
| //where missing columns are NULL-filled |
| Table tbl = target == null? partition.getTable() : target; |
| RowResolver newOutputRR = getColForInsertStmtSpec(targetCol2Projection, tbl, targetCol2ColumnInfo, colListPos, |
| targetTableColTypes, newColList, targetTableColNames); |
| col_list.clear(); |
| col_list.addAll(newColList); |
| return newOutputRR; |
| } |
| |
| String recommendName(ExprNodeDesc exp, String colAlias) { |
| if (!colAlias.startsWith(autogenColAliasPrfxLbl)) { |
| return null; |
| } |
| String column = ExprNodeDescUtils.recommendInputName(exp); |
| if (column != null && !column.startsWith(autogenColAliasPrfxLbl)) { |
| return column; |
| } |
| return null; |
| } |
| |
| String getAutogenColAliasPrfxLbl() { |
| return this.autogenColAliasPrfxLbl; |
| } |
| |
| boolean autogenColAliasPrfxIncludeFuncName() { |
| return this.autogenColAliasPrfxIncludeFuncName; |
| } |
| |
| /** |
| * Class to store GenericUDAF related information. |
| */ |
| public static class GenericUDAFInfo { |
| public List<ExprNodeDesc> convertedParameters; |
| public GenericUDAFEvaluator genericUDAFEvaluator; |
| public TypeInfo returnType; |
| } |
| |
| /** |
| * Convert exprNodeDesc array to ObjectInspector array. |
| */ |
| static List<ObjectInspector> getWritableObjectInspector(List<ExprNodeDesc> exprs) { |
| return exprs.stream().map(ExprNodeDesc::getWritableObjectInspector).collect(Collectors.toList()); |
| } |
| |
| /** |
| * Returns the GenericUDAFEvaluator for the aggregation. This is called once |
| * for each GroupBy aggregation. |
| */ |
| public static GenericUDAFEvaluator getGenericUDAFEvaluator(String aggName, |
| List<ExprNodeDesc> aggParameters, ASTNode aggTree, |
| boolean isDistinct, boolean isAllColumns) |
| throws SemanticException { |
| return getGenericUDAFEvaluator2(aggName, getWritableObjectInspector(aggParameters), |
| aggTree, isDistinct, isAllColumns); |
| } |
| |
| public static GenericUDAFEvaluator getGenericUDAFEvaluator2(String aggName, |
| List<ObjectInspector> aggParameterOIs, ASTNode aggTree, |
| boolean isDistinct, boolean isAllColumns) |
| throws SemanticException { |
| GenericUDAFEvaluator result = FunctionRegistry.getGenericUDAFEvaluator( |
| aggName, aggParameterOIs, isDistinct, isAllColumns); |
| if (null == result) { |
| String reason = "Looking for UDAF Evaluator\"" + aggName |
| + "\" with parameters " + aggParameterOIs; |
| throw new SemanticException(ASTErrorUtils.getMsg( |
| ErrorMsg.INVALID_FUNCTION_SIGNATURE.getMsg(), |
| (ASTNode) aggTree.getChild(0), reason)); |
| } |
| return result; |
| } |
| |
| /** |
| * Returns the GenericUDAFInfo struct for the aggregation. |
| * |
| * @param evaluator |
| * @param emode |
| * @param aggParameters |
| * The exprNodeDesc of the original parameters |
| * @return GenericUDAFInfo |
| * @throws SemanticException |
| * when the UDAF is not found or has problems. |
| */ |
| public static GenericUDAFInfo getGenericUDAFInfo(GenericUDAFEvaluator evaluator, |
| GenericUDAFEvaluator.Mode emode, List<ExprNodeDesc> aggParameters) |
| throws SemanticException { |
| GenericUDAFInfo udafInfo = getGenericUDAFInfo2( |
| evaluator, emode, getWritableObjectInspector(aggParameters)); |
| udafInfo.convertedParameters = aggParameters; |
| return udafInfo; |
| } |
| |
| public static GenericUDAFInfo getGenericUDAFInfo2(GenericUDAFEvaluator evaluator, |
| GenericUDAFEvaluator.Mode emode, List<ObjectInspector> aggOIs) |
| throws SemanticException { |
| |
| GenericUDAFInfo r = new GenericUDAFInfo(); |
| |
| // set r.genericUDAFEvaluator |
| r.genericUDAFEvaluator = evaluator; |
| |
| // set r.returnType |
| ObjectInspector returnOI = null; |
| try { |
| ObjectInspector[] aggOIArray = new ObjectInspector[aggOIs.size()]; |
| for (int ii = 0; ii < aggOIs.size(); ++ii) { |
| aggOIArray[ii] = aggOIs.get(ii); |
| } |
| returnOI = r.genericUDAFEvaluator.init(emode, aggOIArray); |
| r.returnType = TypeInfoUtils.getTypeInfoFromObjectInspector(returnOI); |
| } catch (HiveException e) { |
| throw new SemanticException(e); |
| } |
| |
| return r; |
| } |
| |
| public static GenericUDAFEvaluator.Mode groupByDescModeToUDAFMode( |
| GroupByDesc.Mode mode, boolean isDistinct) { |
| switch (mode) { |
| case COMPLETE: |
| return GenericUDAFEvaluator.Mode.COMPLETE; |
| case HASH: |
| case PARTIAL1: |
| return GenericUDAFEvaluator.Mode.PARTIAL1; |
| case PARTIAL2: |
| return GenericUDAFEvaluator.Mode.PARTIAL2; |
| case PARTIALS: |
| return isDistinct ? GenericUDAFEvaluator.Mode.PARTIAL1 |
| : GenericUDAFEvaluator.Mode.PARTIAL2; |
| case FINAL: |
| return GenericUDAFEvaluator.Mode.FINAL; |
| case MERGEPARTIAL: |
| return isDistinct ? GenericUDAFEvaluator.Mode.COMPLETE |
| : GenericUDAFEvaluator.Mode.FINAL; |
| default: |
| throw new RuntimeException("internal error in groupByDescModeToUDAFMode"); |
| } |
| } |
| |
| /** |
| * Check if the given internalName represents a constant parameter in aggregation parameters |
| * of an aggregation tree. |
| * This method is only invoked when map-side aggregation is not involved. In this case, |
| * every parameter in every aggregation tree should already have a corresponding ColumnInfo, |
| * which is generated when the corresponding ReduceSinkOperator of the GroupByOperator being |
| * generating is generated. If we find that this parameter is a constant parameter, |
| * we will return the corresponding ExprNodeDesc in reduceValues, and we will not need to |
| * use a new ExprNodeColumnDesc, which can not be treated as a constant parameter, for this |
| * parameter (since the writableObjectInspector of a ExprNodeColumnDesc will not be |
| * a instance of ConstantObjectInspector). |
| * |
| * @param reduceValues |
| * value columns of the corresponding ReduceSinkOperator |
| * @param internalName |
| * the internal name of this parameter |
| * @return the ExprNodeDesc of the constant parameter if the given internalName represents |
| * a constant parameter; otherwise, return null |
| */ |
| public static ExprNodeDesc isConstantParameterInAggregationParameters(String internalName, |
| List<ExprNodeDesc> reduceValues) { |
| // only the pattern of "VALUE._col([0-9]+)" should be handled. |
| |
| String[] terms = internalName.split("\\."); |
| if (terms.length != 2 || reduceValues == null) { |
| return null; |
| } |
| |
| if (Utilities.ReduceField.VALUE.toString().equals(terms[0])) { |
| int pos = HiveConf.getPositionFromInternalName(terms[1]); |
| if (pos >= 0 && pos < reduceValues.size()) { |
| ExprNodeDesc reduceValue = reduceValues.get(pos); |
| if (reduceValue != null) { |
| if (reduceValue.getWritableObjectInspector() instanceof ConstantObjectInspector) { |
| // this internalName represents a constant parameter in aggregation parameters |
| return reduceValue; |
| } |
| } |
| } |
| } |
| |
| return null; |
| } |
| |
| /** |
| * Generate the GroupByOperator for the Query Block (parseInfo.getXXX(dest)). |
| * The new GroupByOperator will be a child of the reduceSinkOperatorInfo. |
| * |
| * @param mode |
| * The mode of the aggregation (PARTIAL1 or COMPLETE) |
| * @param genericUDAFEvaluators |
| * If not null, this function will store the mapping from Aggregation |
| * StringTree to the genericUDAFEvaluator in this parameter, so it |
| * can be used in the next-stage GroupBy aggregations. |
| * @return the new GroupByOperator |
| */ |
| @SuppressWarnings("nls") |
| private Operator genGroupByPlanGroupByOperator(QBParseInfo parseInfo, |
| String dest, Operator input, ReduceSinkOperator rs, GroupByDesc.Mode mode, |
| Map<String, GenericUDAFEvaluator> genericUDAFEvaluators) |
| throws SemanticException { |
| RowResolver groupByInputRowResolver = opParseCtx |
| .get(input).getRowResolver(); |
| RowResolver groupByOutputRowResolver = new RowResolver(); |
| groupByOutputRowResolver.setIsExprResolver(true); |
| List<ExprNodeDesc> groupByKeys = new ArrayList<ExprNodeDesc>(); |
| List<AggregationDesc> aggregations = new ArrayList<AggregationDesc>(); |
| List<String> outputColumnNames = new ArrayList<String>(); |
| Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>(); |
| List<ASTNode> grpByExprs = getGroupByForClause(parseInfo, dest); |
| for (int i = 0; i < grpByExprs.size(); ++i) { |
| ASTNode grpbyExpr = grpByExprs.get(i); |
| ColumnInfo exprInfo = groupByInputRowResolver.getExpression(grpbyExpr); |
| |
| if (exprInfo == null) { |
| throw new SemanticException(ASTErrorUtils.getMsg( |
| ErrorMsg.INVALID_COLUMN.getMsg(), grpbyExpr)); |
| } |
| |
| groupByKeys.add(new ExprNodeColumnDesc(exprInfo.getType(), exprInfo |
| .getInternalName(), "", false)); |
| String field = getColumnInternalName(i); |
| outputColumnNames.add(field); |
| ColumnInfo oColInfo = new ColumnInfo(field, exprInfo.getType(), null, false); |
| groupByOutputRowResolver.putExpression(grpbyExpr, |
| oColInfo); |
| addAlternateGByKeyMappings(grpbyExpr, oColInfo, input, groupByOutputRowResolver); |
| colExprMap.put(field, groupByKeys.get(groupByKeys.size() - 1)); |
| } |
| // For each aggregation |
| Map<String, ASTNode> aggregationTrees = parseInfo.getAggregationExprsForClause(dest); |
| assert (aggregationTrees != null); |
| // get the last colName for the reduce KEY |
| // it represents the column name corresponding to distinct aggr, if any |
| String lastKeyColName = null; |
| List<String> inputKeyCols = rs.getConf().getOutputKeyColumnNames(); |
| if (inputKeyCols.size() > 0) { |
| lastKeyColName = inputKeyCols.get(inputKeyCols.size() - 1); |
| } |
| List<ExprNodeDesc> reduceValues = rs.getConf().getValueCols(); |
| int numDistinctUDFs = 0; |
| for (Map.Entry<String, ASTNode> entry : aggregationTrees.entrySet()) { |
| ASTNode value = entry.getValue(); |
| |
| // This is the GenericUDAF name |
| String aggName = unescapeIdentifier(value.getChild(0).getText()); |
| boolean isDistinct = value.getType() == HiveParser.TOK_FUNCTIONDI; |
| boolean isAllColumns = value.getType() == HiveParser.TOK_FUNCTIONSTAR; |
| |
| // Convert children to aggParameters |
| List<ExprNodeDesc> aggParameters = new ArrayList<ExprNodeDesc>(); |
| // 0 is the function name |
| for (int i = 1; i < value.getChildCount(); i++) { |
| ASTNode paraExpr = (ASTNode) value.getChild(i); |
| ColumnInfo paraExprInfo = |
| groupByInputRowResolver.getExpression(paraExpr); |
| if (paraExprInfo == null) { |
| throw new SemanticException(ASTErrorUtils.getMsg( |
| ErrorMsg.INVALID_COLUMN.getMsg(), paraExpr)); |
| } |
| |
| String paraExpression = paraExprInfo.getInternalName(); |
| assert (paraExpression != null); |
| if (isDistinct && lastKeyColName != null) { |
| // if aggr is distinct, the parameter is name is constructed as |
| // KEY.lastKeyColName:<tag>._colx |
| paraExpression = Utilities.ReduceField.KEY.name() + "." + |
| lastKeyColName + ":" + numDistinctUDFs + "." + |
| getColumnInternalName(i - 1); |
| |
| } |
| |
| ExprNodeDesc expr = new ExprNodeColumnDesc(paraExprInfo.getType(), |
| paraExpression, paraExprInfo.getTabAlias(), |
| paraExprInfo.getIsVirtualCol()); |
| ExprNodeDesc reduceValue = isConstantParameterInAggregationParameters( |
| paraExprInfo.getInternalName(), reduceValues); |
| |
| if (reduceValue != null) { |
| // this parameter is a constant |
| expr = reduceValue; |
| } |
| |
| aggParameters.add(expr); |
| } |
| |
| if (isDistinct) { |
| numDistinctUDFs++; |
| } |
| Mode amode = groupByDescModeToUDAFMode(mode, isDistinct); |
| GenericUDAFEvaluator genericUDAFEvaluator = getGenericUDAFEvaluator( |
| aggName, aggParameters, value, isDistinct, isAllColumns); |
| assert (genericUDAFEvaluator != null); |
| GenericUDAFInfo udaf = getGenericUDAFInfo(genericUDAFEvaluator, amode, aggParameters); |
| aggregations.add(new AggregationDesc(aggName.toLowerCase(), |
| udaf.genericUDAFEvaluator, udaf.convertedParameters, isDistinct, |
| amode)); |
| String field = getColumnInternalName(groupByKeys.size() |
| + aggregations.size() - 1); |
| outputColumnNames.add(field); |
| groupByOutputRowResolver.putExpression(value, new ColumnInfo( |
| field, udaf.returnType, "", false)); |
| // Save the evaluator so that it can be used by the next-stage |
| // GroupByOperators |
| if (genericUDAFEvaluators != null) { |
| genericUDAFEvaluators.put(entry.getKey(), genericUDAFEvaluator); |
| } |
| } |
| float groupByMemoryUsage = HiveConf.getFloatVar(conf, HiveConf.ConfVars.HIVEMAPAGGRHASHMEMORY); |
| float memoryThreshold = HiveConf |
| .getFloatVar(conf, HiveConf.ConfVars.HIVEMAPAGGRMEMORYTHRESHOLD); |
| float minReductionHashAggr = HiveConf |
| .getFloatVar(conf, HiveConf.ConfVars.HIVEMAPAGGRHASHMINREDUCTION); |
| float minReductionHashAggrLowerBound = HiveConf |
| .getFloatVar(conf, ConfVars.HIVEMAPAGGRHASHMINREDUCTIONLOWERBOUND); |
| |
| Operator op = putOpInsertMap(OperatorFactory.getAndMakeChild( |
| new GroupByDesc(mode, outputColumnNames, groupByKeys, aggregations, |
| false, groupByMemoryUsage, memoryThreshold, minReductionHashAggr, minReductionHashAggrLowerBound, |
| null, false, -1, numDistinctUDFs > 0), |
| new RowSchema(groupByOutputRowResolver.getColumnInfos()), |
| input), groupByOutputRowResolver); |
| op.setColumnExprMap(colExprMap); |
| return op; |
| } |
| |
| // Add the grouping set key to the group by operator. |
| // This is not the first group by operator, but it is a subsequent group by operator |
| // which is forwarding the grouping keys introduced by the grouping sets. |
| // For eg: consider: select key, value, count(1) from T group by key, value with rollup. |
| // Assuming map-side aggregation and no skew, the plan would look like: |
| // |
| // TableScan --> Select --> GroupBy1 --> ReduceSink --> GroupBy2 --> Select --> FileSink |
| // |
| // This function is called for GroupBy2 to pass the additional grouping keys introduced by |
| // GroupBy1 for the grouping set (corresponding to the rollup). |
| private void addGroupingSetKey(List<ExprNodeDesc> groupByKeys, |
| RowResolver groupByInputRowResolver, |
| RowResolver groupByOutputRowResolver, |
| List<String> outputColumnNames, |
| Map<String, ExprNodeDesc> colExprMap) throws SemanticException { |
| // For grouping sets, add a dummy grouping key |
| String groupingSetColumnName = |
| groupByInputRowResolver.get(null, VirtualColumn.GROUPINGID.getName()).getInternalName(); |
| ExprNodeDesc inputExpr = new ExprNodeColumnDesc(VirtualColumn.GROUPINGID.getTypeInfo(), |
| groupingSetColumnName, null, false); |
| groupByKeys.add(inputExpr); |
| |
| String field = getColumnInternalName(groupByKeys.size() - 1); |
| outputColumnNames.add(field); |
| groupByOutputRowResolver.put(null, VirtualColumn.GROUPINGID.getName(), |
| new ColumnInfo( |
| field, |
| VirtualColumn.GROUPINGID.getTypeInfo(), |
| null, |
| true)); |
| colExprMap.put(field, groupByKeys.get(groupByKeys.size() - 1)); |
| } |
| |
| // Process grouping set for the reduce sink operator |
| // For eg: consider: select key, value, count(1) from T group by key, value with rollup. |
| // Assuming map-side aggregation and no skew, the plan would look like: |
| // |
| // TableScan --> Select --> GroupBy1 --> ReduceSink --> GroupBy2 --> Select --> FileSink |
| // |
| // This function is called for ReduceSink to add the additional grouping keys introduced by |
| // GroupBy1 into the reduce keys. |
| private void processGroupingSetReduceSinkOperator(RowResolver reduceSinkInputRowResolver, |
| RowResolver reduceSinkOutputRowResolver, |
| List<ExprNodeDesc> reduceKeys, |
| List<String> outputKeyColumnNames, |
| Map<String, ExprNodeDesc> colExprMap) throws SemanticException { |
| // add a key for reduce sink |
| String groupingSetColumnName = |
| reduceSinkInputRowResolver.get(null, VirtualColumn.GROUPINGID.getName()).getInternalName(); |
| ExprNodeDesc inputExpr = new ExprNodeColumnDesc(VirtualColumn.GROUPINGID.getTypeInfo(), |
| groupingSetColumnName, null, false); |
| reduceKeys.add(inputExpr); |
| |
| outputKeyColumnNames.add(getColumnInternalName(reduceKeys.size() - 1)); |
| String field = Utilities.ReduceField.KEY.toString() + "." |
| + getColumnInternalName(reduceKeys.size() - 1); |
| ColumnInfo colInfo = new ColumnInfo(field, reduceKeys.get( |
| reduceKeys.size() - 1).getTypeInfo(), null, true); |
| reduceSinkOutputRowResolver.put(null, VirtualColumn.GROUPINGID.getName(), colInfo); |
| colExprMap.put(colInfo.getInternalName(), inputExpr); |
| } |
| |
| |
| /** |
| * Generate the GroupByOperator for the Query Block (parseInfo.getXXX(dest)). |
| * The new GroupByOperator will be a child of the reduceSinkOperatorInfo. |
| * |
| * @param parseInfo |
| * @param dest |
| * @param reduceSinkOperatorInfo |
| * @param mode |
| * The mode of the aggregation (MERGEPARTIAL, PARTIAL2) |
| * @param genericUDAFEvaluators |
| * The mapping from Aggregation StringTree to the |
| * genericUDAFEvaluator. |
| * @param groupingSets |
| * list of grouping sets |
| * @param groupingSetsPresent |
| * whether grouping sets are present in this query |
| * @param groupingSetsNeedAdditionalMRJob |
| * whether grouping sets are consumed by this group by |
| * @return the new GroupByOperator |
| */ |
| @SuppressWarnings("nls") |
| private Operator genGroupByPlanGroupByOperator1(QBParseInfo parseInfo, |
| String dest, Operator reduceSinkOperatorInfo, GroupByDesc.Mode mode, |
| Map<String, GenericUDAFEvaluator> genericUDAFEvaluators, |
| List<Long> groupingSets, |
| boolean groupingSetsPresent, |
| boolean groupingSetsNeedAdditionalMRJob) throws SemanticException { |
| List<String> outputColumnNames = new ArrayList<String>(); |
| RowResolver groupByInputRowResolver = opParseCtx |
| .get(reduceSinkOperatorInfo).getRowResolver(); |
| RowResolver groupByOutputRowResolver = new RowResolver(); |
| groupByOutputRowResolver.setIsExprResolver(true); |
| List<ExprNodeDesc> groupByKeys = new ArrayList<ExprNodeDesc>(); |
| List<AggregationDesc> aggregations = new ArrayList<AggregationDesc>(); |
| List<ASTNode> grpByExprs = getGroupByForClause(parseInfo, dest); |
| Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>(); |
| for (int i = 0; i < grpByExprs.size(); ++i) { |
| ASTNode grpbyExpr = grpByExprs.get(i); |
| ColumnInfo exprInfo = groupByInputRowResolver.getExpression(grpbyExpr); |
| |
| if (exprInfo == null) { |
| throw new SemanticException(ASTErrorUtils.getMsg( |
| ErrorMsg.INVALID_COLUMN.getMsg(), grpbyExpr)); |
| } |
| |
| groupByKeys.add(new ExprNodeColumnDesc(exprInfo)); |
| String field = getColumnInternalName(i); |
| outputColumnNames.add(field); |
| ColumnInfo oColInfo = new ColumnInfo(field, exprInfo.getType(), "", false); |
| groupByOutputRowResolver.putExpression(grpbyExpr, |
| oColInfo); |
| addAlternateGByKeyMappings(grpbyExpr, oColInfo, reduceSinkOperatorInfo, groupByOutputRowResolver); |
| colExprMap.put(field, groupByKeys.get(groupByKeys.size() - 1)); |
| } |
| |
| // This is only needed if a new grouping set key is being created |
| int groupingSetsPosition = -1; |
| |
| // For grouping sets, add a dummy grouping key |
| if (groupingSetsPresent) { |
| groupingSetsPosition = groupByKeys.size(); |
| // Consider the query: select a,b, count(1) from T group by a,b with cube; |
| // where it is being executed in a single map-reduce job |
| // The plan is TableScan -> GroupBy1 -> ReduceSink -> GroupBy2 -> FileSink |
| // GroupBy1 already added the grouping id as part of the row |
| // This function is called for GroupBy2 to add grouping id as part of the groupby keys |
| if (!groupingSetsNeedAdditionalMRJob) { |
| addGroupingSetKey( |
| groupByKeys, |
| groupByInputRowResolver, |
| groupByOutputRowResolver, |
| outputColumnNames, |
| colExprMap); |
| } |
| else { |
| // The grouping set has not yet been processed. Create a new grouping key |
| // Consider the query: select a,b, count(1) from T group by a,b with cube; |
| // where it is being executed in 2 map-reduce jobs |
| // The plan for 1st MR is TableScan -> GroupBy1 -> ReduceSink -> GroupBy2 -> FileSink |
| // GroupBy1/ReduceSink worked as if grouping sets were not present |
| // This function is called for GroupBy2 to create new rows for grouping sets |
| // For each input row (a,b), 4 rows are created for the example above: |
| // (a,b), (a,null), (null, b), (null, null) |
| createNewGroupingKey(groupByKeys, |
| outputColumnNames, |
| groupByOutputRowResolver, |
| colExprMap); |
| } |
| } |
| |
| Map<String, ASTNode> aggregationTrees = parseInfo.getAggregationExprsForClause(dest); |
| // get the last colName for the reduce KEY |
| // it represents the column name corresponding to distinct aggr, if any |
| String lastKeyColName = null; |
| List<ExprNodeDesc> reduceValues = null; |
| if (reduceSinkOperatorInfo.getConf() instanceof ReduceSinkDesc) { |
| List<String> inputKeyCols = ((ReduceSinkDesc) |
| reduceSinkOperatorInfo.getConf()).getOutputKeyColumnNames(); |
| if (inputKeyCols.size() > 0) { |
| lastKeyColName = inputKeyCols.get(inputKeyCols.size() - 1); |
| } |
| reduceValues = ((ReduceSinkDesc) reduceSinkOperatorInfo.getConf()).getValueCols(); |
| } |
| int numDistinctUDFs = 0; |
| boolean containsDistinctAggr = false; |
| for (Map.Entry<String, ASTNode> entry : aggregationTrees.entrySet()) { |
| ASTNode value = entry.getValue(); |
| String aggName = unescapeIdentifier(value.getChild(0).getText()); |
| List<ExprNodeDesc> aggParameters = new ArrayList<ExprNodeDesc>(); |
| boolean isDistinct = (value.getType() == HiveParser.TOK_FUNCTIONDI); |
| containsDistinctAggr = containsDistinctAggr || isDistinct; |
| |
| // If the function is distinct, partial aggregation has not been done on |
| // the client side. |
| // If distPartAgg is set, the client is letting us know that partial |
| // aggregation has not been done. |
| // For eg: select a, count(b+c), count(distinct d+e) group by a |
| // For count(b+c), if partial aggregation has been performed, then we |
| // directly look for count(b+c). |
| // Otherwise, we look for b+c. |
| // For distincts, partial aggregation is never performed on the client |
| // side, so always look for the parameters: d+e |
| if (isDistinct) { |
| // 0 is the function name |
| for (int i = 1; i < value.getChildCount(); i++) { |
| ASTNode paraExpr = (ASTNode) value.getChild(i); |
| ColumnInfo paraExprInfo = |
| groupByInputRowResolver.getExpression(paraExpr); |
| if (paraExprInfo == null) { |
| throw new SemanticException(ASTErrorUtils.getMsg( |
| ErrorMsg.INVALID_COLUMN.getMsg(), |
| paraExpr)); |
| } |
| |
| String paraExpression = paraExprInfo.getInternalName(); |
| assert (paraExpression != null); |
| if (lastKeyColName != null) { |
| // if aggr is distinct, the parameter is name is constructed as |
| // KEY.lastKeyColName:<tag>._colx |
| paraExpression = Utilities.ReduceField.KEY.name() + "." + |
| lastKeyColName + ":" + numDistinctUDFs + "." |
| + getColumnInternalName(i - 1); |
| } |
| |
| ExprNodeDesc expr = new ExprNodeColumnDesc(paraExprInfo.getType(), |
| paraExpression, paraExprInfo.getTabAlias(), |
| paraExprInfo.getIsVirtualCol()); |
| ExprNodeDesc reduceValue = isConstantParameterInAggregationParameters( |
| paraExprInfo.getInternalName(), reduceValues); |
| |
| if (reduceValue != null) { |
| // this parameter is a constant |
| expr = reduceValue; |
| } |
| aggParameters.add(expr); |
| } |
| } else { |
| ColumnInfo paraExprInfo = groupByInputRowResolver.getExpression(value); |
| if (paraExprInfo == null) { |
| throw new SemanticException(ASTErrorUtils.getMsg( |
| ErrorMsg.INVALID_COLUMN.getMsg(), value)); |
| } |
| String paraExpression = paraExprInfo.getInternalName(); |
| assert (paraExpression != null); |
| aggParameters.add(new ExprNodeColumnDesc(paraExprInfo.getType(), |
| paraExpression, paraExprInfo.getTabAlias(), paraExprInfo |
| .getIsVirtualCol())); |
| } |
| if (isDistinct) { |
| numDistinctUDFs++; |
| } |
| |
| Mode amode = groupByDescModeToUDAFMode(mode, isDistinct); |
| GenericUDAFEvaluator genericUDAFEvaluator = null; |
| genericUDAFEvaluator = genericUDAFEvaluators.get(entry.getKey()); |
| assert (genericUDAFEvaluator != null); |
| |
| GenericUDAFInfo udaf = getGenericUDAFInfo(genericUDAFEvaluator, amode, |
| aggParameters); |
| aggregations.add(new AggregationDesc(aggName.toLowerCase(), |
| udaf.genericUDAFEvaluator, udaf.convertedParameters, |
| (mode != GroupByDesc.Mode.FINAL && isDistinct), amode)); |
| String field = getColumnInternalName(groupByKeys.size() |
| + aggregations.size() - 1); |
| outputColumnNames.add(field); |
| groupByOutputRowResolver.putExpression(value, new ColumnInfo( |
| field, udaf.returnType, "", false)); |
| } |
| float groupByMemoryUsage = HiveConf.getFloatVar(conf, HiveConf.ConfVars.HIVEMAPAGGRHASHMEMORY); |
| float memoryThreshold = HiveConf |
| .getFloatVar(conf, HiveConf.ConfVars.HIVEMAPAGGRMEMORYTHRESHOLD); |
| float minReductionHashAggr = HiveConf |
| .getFloatVar(conf, HiveConf.ConfVars.HIVEMAPAGGRHASHMINREDUCTION); |
| float minReductionHashAggrLowerBound = HiveConf |
| .getFloatVar(conf, ConfVars.HIVEMAPAGGRHASHMINREDUCTIONLOWERBOUND); |
| |
| // Nothing special needs to be done for grouping sets if |
| // this is the final group by operator, and multiple rows corresponding to the |
| // grouping sets have been generated upstream. |
| // However, if an addition MR job has been created to handle grouping sets, |
| // additional rows corresponding to grouping sets need to be created here. |
| Operator op = putOpInsertMap(OperatorFactory.getAndMakeChild( |
| new GroupByDesc(mode, outputColumnNames, groupByKeys, aggregations, |
| groupByMemoryUsage, memoryThreshold, minReductionHashAggr, minReductionHashAggrLowerBound, |
| groupingSets, |
| groupingSetsPresent && groupingSetsNeedAdditionalMRJob, |
| groupingSetsPosition, containsDistinctAggr), |
| new RowSchema(groupByOutputRowResolver.getColumnInfos()), reduceSinkOperatorInfo), |
| groupByOutputRowResolver); |
| op.setColumnExprMap(colExprMap); |
| return op; |
| } |
| |
| /* |
| * Create a new grouping key for grouping id. |
| * A dummy grouping id. is added. At runtime, the group by operator |
| * creates 'n' rows per input row, where 'n' is the number of grouping sets. |
| */ |
| private void createNewGroupingKey(List<ExprNodeDesc> groupByKeys, |
| List<String> outputColumnNames, |
| RowResolver groupByOutputRowResolver, |
| Map<String, ExprNodeDesc> colExprMap) { |
| // The value for the constant does not matter. It is replaced by the grouping set |
| // value for the actual implementation |
| ExprNodeConstantDesc constant = new ExprNodeConstantDesc(VirtualColumn.GROUPINGID.getTypeInfo(), 0L); |
| groupByKeys.add(constant); |
| String field = getColumnInternalName(groupByKeys.size() - 1); |
| outputColumnNames.add(field); |
| groupByOutputRowResolver.put(null, VirtualColumn.GROUPINGID.getName(), |
| new ColumnInfo( |
| field, |
| VirtualColumn.GROUPINGID.getTypeInfo(), |
| null, |
| true)); |
| colExprMap.put(field, constant); |
| } |
| |
| /** |
| * Generate the map-side GroupByOperator for the Query Block |
| * (qb.getParseInfo().getXXX(dest)). The new GroupByOperator will be a child |
| * of the inputOperatorInfo. |
| * |
| * @param genericUDAFEvaluators |
| * If not null, this function will store the mapping from Aggregation |
| * StringTree to the genericUDAFEvaluator in this parameter, so it |
| * can be used in the next-stage GroupBy aggregations. |
| * @return the new GroupByOperator |
| */ |
| @SuppressWarnings("nls") |
| private Operator genGroupByPlanMapGroupByOperator(QB qb, |
| String dest, |
| List<ASTNode> grpByExprs, |
| Operator inputOperatorInfo, |
| Map<String, GenericUDAFEvaluator> genericUDAFEvaluators, |
| List<Long> groupingSetKeys, |
| boolean groupingSetsPresent) throws SemanticException { |
| |
| RowResolver groupByInputRowResolver = opParseCtx.get(inputOperatorInfo) |
| .getRowResolver(); |
| QBParseInfo parseInfo = qb.getParseInfo(); |
| RowResolver groupByOutputRowResolver = new RowResolver(); |
| groupByOutputRowResolver.setIsExprResolver(true); |
| List<ExprNodeDesc> groupByKeys = new ArrayList<ExprNodeDesc>(); |
| List<String> outputColumnNames = new ArrayList<String>(); |
| List<AggregationDesc> aggregations = new ArrayList<AggregationDesc>(); |
| Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>(); |
| for (int i = 0; i < grpByExprs.size(); ++i) { |
| ASTNode grpbyExpr = grpByExprs.get(i); |
| ExprNodeDesc grpByExprNode = genExprNodeDesc(grpbyExpr, |
| groupByInputRowResolver); |
| |
| if ((grpByExprNode instanceof ExprNodeColumnDesc) && ExprNodeDescUtils.indexOf(grpByExprNode, groupByKeys) >= 0) { |
| // Skip duplicated grouping keys, it happens when define column alias. |
| grpByExprs.remove(i--); |
| continue; |
| } |
| groupByKeys.add(grpByExprNode); |
| String field = getColumnInternalName(i); |
| outputColumnNames.add(field); |
| groupByOutputRowResolver.putExpression(grpbyExpr, |
| new ColumnInfo(field, grpByExprNode.getTypeInfo(), "", false)); |
| colExprMap.put(field, groupByKeys.get(groupByKeys.size() - 1)); |
| } |
| |
| // The grouping set key is present after the grouping keys, before the distinct keys |
| int groupingSetsPosition = -1; |
| |
| // For grouping sets, add a dummy grouping key |
| // This dummy key needs to be added as a reduce key |
| // For eg: consider: select key, value, count(1) from T group by key, value with rollup. |
| // Assuming map-side aggregation and no skew, the plan would look like: |
| // |
| // TableScan --> Select --> GroupBy1 --> ReduceSink --> GroupBy2 --> Select --> FileSink |
| // |
| // This function is called for GroupBy1 to create an additional grouping key |
| // for the grouping set (corresponding to the rollup). |
| if (groupingSetsPresent) { |
| groupingSetsPosition = groupByKeys.size(); |
| createNewGroupingKey(groupByKeys, |
| outputColumnNames, |
| groupByOutputRowResolver, |
| colExprMap); |
| } |
| |
| // If there is a distinctFuncExp, add all parameters to the reduceKeys. |
| if (!parseInfo.getDistinctFuncExprsForClause(dest).isEmpty()) { |
| List<ASTNode> list = parseInfo.getDistinctFuncExprsForClause(dest); |
| for (ASTNode value : list) { |
| // 0 is function name |
| for (int i = 1; i < value.getChildCount(); i++) { |
| ASTNode parameter = (ASTNode) value.getChild(i); |
| if (groupByOutputRowResolver.getExpression(parameter) == null) { |
| ExprNodeDesc distExprNode = genExprNodeDesc(parameter, |
| groupByInputRowResolver); |
| groupByKeys.add(distExprNode); |
| String field = getColumnInternalName(groupByKeys.size() - 1); |
| outputColumnNames.add(field); |
| groupByOutputRowResolver.putExpression(parameter, new ColumnInfo( |
| field, distExprNode.getTypeInfo(), "", false)); |
| colExprMap.put(field, groupByKeys.get(groupByKeys.size() - 1)); |
| } |
| } |
| } |
| } |
| |
| // For each aggregation |
| Map<String, ASTNode> aggregationTrees = parseInfo.getAggregationExprsForClause(dest); |
| assert (aggregationTrees != null); |
| |
| boolean containsDistinctAggr = false; |
| for (Map.Entry<String, ASTNode> entry : aggregationTrees.entrySet()) { |
| ASTNode value = entry.getValue(); |
| String aggName = unescapeIdentifier(value.getChild(0).getText()); |
| List<ExprNodeDesc> aggParameters = new ArrayList<ExprNodeDesc>(); |
| // 0 is the function name |
| for (int i = 1; i < value.getChildCount(); i++) { |
| ASTNode paraExpr = (ASTNode) value.getChild(i); |
| ExprNodeDesc paraExprNode = genExprNodeDesc(paraExpr, |
| groupByInputRowResolver); |
| |
| aggParameters.add(paraExprNode); |
| } |
| |
| boolean isDistinct = value.getType() == HiveParser.TOK_FUNCTIONDI; |
| containsDistinctAggr = containsDistinctAggr || isDistinct; |
| boolean isAllColumns = value.getType() == HiveParser.TOK_FUNCTIONSTAR; |
| Mode amode = groupByDescModeToUDAFMode(GroupByDesc.Mode.HASH, isDistinct); |
| |
| GenericUDAFEvaluator genericUDAFEvaluator = getGenericUDAFEvaluator( |
| aggName, aggParameters, value, isDistinct, isAllColumns); |
| assert (genericUDAFEvaluator != null); |
| GenericUDAFInfo udaf = getGenericUDAFInfo(genericUDAFEvaluator, amode, |
| aggParameters); |
| aggregations.add(new AggregationDesc(aggName.toLowerCase(), |
| udaf.genericUDAFEvaluator, udaf.convertedParameters, isDistinct, |
| amode)); |
| String field = getColumnInternalName(groupByKeys.size() |
| + aggregations.size() - 1); |
| outputColumnNames.add(field); |
| if (groupByOutputRowResolver.getExpression(value) == null) { |
| groupByOutputRowResolver.putExpression(value, new ColumnInfo( |
| field, udaf.returnType, "", false)); |
| } |
| // Save the evaluator so that it can be used by the next-stage |
| // GroupByOperators |
| if (genericUDAFEvaluators != null) { |
| genericUDAFEvaluators.put(entry.getKey(), genericUDAFEvaluator); |
| } |
| } |
| float groupByMemoryUsage = HiveConf.getFloatVar(conf, HiveConf.ConfVars.HIVEMAPAGGRHASHMEMORY); |
| float memoryThreshold = HiveConf |
| .getFloatVar(conf, HiveConf.ConfVars.HIVEMAPAGGRMEMORYTHRESHOLD); |
| float minReductionHashAggr = HiveConf |
| .getFloatVar(conf, HiveConf.ConfVars.HIVEMAPAGGRHASHMINREDUCTION); |
| float minReductionHashAggrLowerBound = HiveConf |
| .getFloatVar(conf, ConfVars.HIVEMAPAGGRHASHMINREDUCTIONLOWERBOUND); |
| Operator op = putOpInsertMap(OperatorFactory.getAndMakeChild( |
| new GroupByDesc(GroupByDesc.Mode.HASH, outputColumnNames, groupByKeys, aggregations, |
| false, groupByMemoryUsage, memoryThreshold, minReductionHashAggr, minReductionHashAggrLowerBound, |
| groupingSetKeys, groupingSetsPresent, groupingSetsPosition, containsDistinctAggr), |
| new RowSchema(groupByOutputRowResolver.getColumnInfos()), |
| inputOperatorInfo), groupByOutputRowResolver); |
| op.setColumnExprMap(colExprMap); |
| return op; |
| } |
| |
| /** |
| * Generate the ReduceSinkOperator for the Group By Query Block |
| * (qb.getPartInfo().getXXX(dest)). The new ReduceSinkOperator will be a child |
| * of inputOperatorInfo. |
| * |
| * It will put all Group By keys and the distinct field (if any) in the |
| * map-reduce sort key, and all other fields in the map-reduce value. |
| * |
| * @param numPartitionFields |
| * the number of fields for map-reduce partitioning. This is usually |
| * the number of fields in the Group By keys. |
| * @return the new ReduceSinkOperator. |
| * @throws SemanticException |
| */ |
| @SuppressWarnings("nls") |
| private ReduceSinkOperator genGroupByPlanReduceSinkOperator(QB qb, |
| String dest, |
| Operator inputOperatorInfo, |
| List<ASTNode> grpByExprs, |
| int numPartitionFields, |
| boolean changeNumPartitionFields, |
| int numReducers, |
| boolean mapAggrDone, |
| boolean groupingSetsPresent) throws SemanticException { |
| |
| RowResolver reduceSinkInputRowResolver = opParseCtx.get(inputOperatorInfo) |
| .getRowResolver(); |
| QBParseInfo parseInfo = qb.getParseInfo(); |
| RowResolver reduceSinkOutputRowResolver = new RowResolver(); |
| reduceSinkOutputRowResolver.setIsExprResolver(true); |
| Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>(); |
| // Pre-compute group-by keys and store in reduceKeys |
| |
| List<String> outputKeyColumnNames = new ArrayList<String>(); |
| List<String> outputValueColumnNames = new ArrayList<String>(); |
| |
| List<ExprNodeDesc> reduceKeys = getReduceKeysForReduceSink(grpByExprs, |
| reduceSinkInputRowResolver, reduceSinkOutputRowResolver, outputKeyColumnNames, |
| colExprMap); |
| |
| int keyLength = reduceKeys.size(); |
| int numOfColsRmedFromkey = grpByExprs.size() - keyLength; |
| |
| // add a key for reduce sink |
| if (groupingSetsPresent) { |
| // Process grouping set for the reduce sink operator |
| processGroupingSetReduceSinkOperator( |
| reduceSinkInputRowResolver, |
| reduceSinkOutputRowResolver, |
| reduceKeys, |
| outputKeyColumnNames, |
| colExprMap); |
| |
| if (changeNumPartitionFields) { |
| numPartitionFields++; |
| } |
| } |
| |
| List<List<Integer>> distinctColIndices = getDistinctColIndicesForReduceSink(parseInfo, dest, |
| reduceKeys, reduceSinkInputRowResolver, reduceSinkOutputRowResolver, outputKeyColumnNames, |
| colExprMap); |
| |
| List<ExprNodeDesc> reduceValues = new ArrayList<ExprNodeDesc>(); |
| Map<String, ASTNode> aggregationTrees = parseInfo.getAggregationExprsForClause(dest); |
| |
| if (!mapAggrDone) { |
| getReduceValuesForReduceSinkNoMapAgg(parseInfo, dest, reduceSinkInputRowResolver, |
| reduceSinkOutputRowResolver, outputValueColumnNames, reduceValues, colExprMap); |
| } else { |
| // Put partial aggregation results in reduceValues |
| int inputField = reduceKeys.size() + numOfColsRmedFromkey; |
| |
| for (Map.Entry<String, ASTNode> entry : aggregationTrees.entrySet()) { |
| |
| TypeInfo type = reduceSinkInputRowResolver.getColumnInfos().get( |
| inputField).getType(); |
| ExprNodeColumnDesc exprDesc = new ExprNodeColumnDesc(type, |
| getColumnInternalName(inputField), "", false); |
| reduceValues.add(exprDesc); |
| inputField++; |
| String outputColName = getColumnInternalName(reduceValues.size() - 1); |
| outputValueColumnNames.add(outputColName); |
| String internalName = Utilities.ReduceField.VALUE.toString() + "." |
| + outputColName; |
| reduceSinkOutputRowResolver.putExpression(entry.getValue(), |
| new ColumnInfo(internalName, type, null, false)); |
| colExprMap.put(internalName, exprDesc); |
| } |
| } |
| |
| ReduceSinkOperator rsOp = (ReduceSinkOperator) putOpInsertMap( |
| OperatorFactory.getAndMakeChild( |
| PlanUtils.getReduceSinkDesc(reduceKeys, |
| groupingSetsPresent ? keyLength + 1 : keyLength, |
| reduceValues, distinctColIndices, |
| outputKeyColumnNames, outputValueColumnNames, true, -1, numPartitionFields, |
| numReducers, AcidUtils.Operation.NOT_ACID, defaultNullOrder), |
| new RowSchema(reduceSinkOutputRowResolver.getColumnInfos()), inputOperatorInfo), |
| reduceSinkOutputRowResolver); |
| rsOp.setColumnExprMap(colExprMap); |
| return rsOp; |
| } |
| |
| private List<ExprNodeDesc> getReduceKeysForReduceSink(List<ASTNode> grpByExprs, |
| RowResolver reduceSinkInputRowResolver, RowResolver reduceSinkOutputRowResolver, |
| List<String> outputKeyColumnNames, Map<String, ExprNodeDesc> colExprMap) |
| throws SemanticException { |
| |
| List<ExprNodeDesc> reduceKeys = new ArrayList<ExprNodeDesc>(); |
| |
| for (ASTNode grpbyExpr : grpByExprs) { |
| ExprNodeDesc inputExpr = genExprNodeDesc(grpbyExpr, |
| reduceSinkInputRowResolver); |
| ColumnInfo prev = reduceSinkOutputRowResolver.getExpression(grpbyExpr); |
| if (prev != null && isConsistentWithinQuery(inputExpr)) { |
| colExprMap.put(prev.getInternalName(), inputExpr); |
| continue; |
| } |
| reduceKeys.add(inputExpr); |
| outputKeyColumnNames.add(getColumnInternalName(reduceKeys.size() - 1)); |
| String field = ReduceField.KEY.toString() + "." |
| + getColumnInternalName(reduceKeys.size() - 1); |
| ColumnInfo colInfo = new ColumnInfo(field, reduceKeys.get( |
| reduceKeys.size() - 1).getTypeInfo(), null, false); |
| reduceSinkOutputRowResolver.putExpression(grpbyExpr, colInfo); |
| colExprMap.put(colInfo.getInternalName(), inputExpr); |
| } |
| |
| return reduceKeys; |
| } |
| |
| private boolean isConsistentWithinQuery(ExprNodeDesc expr) throws SemanticException { |
| try { |
| return ExprNodeEvaluatorFactory.get(expr).isConsistentWithinQuery(); |
| } catch (Exception e) { |
| throw new SemanticException(e); |
| } |
| } |
| |
| private List<List<Integer>> getDistinctColIndicesForReduceSink(QBParseInfo parseInfo, |
| String dest, |
| List<ExprNodeDesc> reduceKeys, RowResolver reduceSinkInputRowResolver, |
| RowResolver reduceSinkOutputRowResolver, List<String> outputKeyColumnNames, |
| Map<String, ExprNodeDesc> colExprMap) |
| throws SemanticException { |
| |
| List<List<Integer>> distinctColIndices = new ArrayList<List<Integer>>(); |
| |
| // If there is a distinctFuncExp, add all parameters to the reduceKeys. |
| if (!parseInfo.getDistinctFuncExprsForClause(dest).isEmpty()) { |
| List<ASTNode> distFuncs = parseInfo.getDistinctFuncExprsForClause(dest); |
| String colName = getColumnInternalName(reduceKeys.size()); |
| outputKeyColumnNames.add(colName); |
| for (int i = 0; i < distFuncs.size(); i++) { |
| ASTNode value = distFuncs.get(i); |
| int numExprs = 0; |
| List<Integer> distinctIndices = new ArrayList<Integer>(); |
| // 0 is function name |
| for (int j = 1; j < value.getChildCount(); j++) { |
| ASTNode parameter = (ASTNode) value.getChild(j); |
| ExprNodeDesc expr = genExprNodeDesc(parameter, reduceSinkInputRowResolver); |
| // see if expr is already present in reduceKeys. |
| // get index of expr in reduceKeys |
| int ri; |
| for (ri = 0; ri < reduceKeys.size(); ri++) { |
| if (reduceKeys.get(ri).getExprString().equals(expr.getExprString())) { |
| break; |
| } |
| } |
| // add the expr to reduceKeys if it is not present |
| if (ri == reduceKeys.size()) { |
| String name = getColumnInternalName(numExprs); |
| String field = Utilities.ReduceField.KEY.toString() + "." + colName |
| + ":" + i |
| + "." + name; |
| ColumnInfo colInfo = new ColumnInfo(field, expr.getTypeInfo(), null, false); |
| reduceSinkOutputRowResolver.putExpression(parameter, colInfo); |
| colExprMap.put(field, expr); |
| reduceKeys.add(expr); |
| } |
| // add the index of expr in reduceKeys to distinctIndices |
| distinctIndices.add(ri); |
| numExprs++; |
| } |
| distinctColIndices.add(distinctIndices); |
| } |
| } |
| |
| return distinctColIndices; |
| } |
| |
| private void getReduceValuesForReduceSinkNoMapAgg(QBParseInfo parseInfo, String dest, |
| RowResolver reduceSinkInputRowResolver, |
| RowResolver reduceSinkOutputRowResolver, |
| List<String> outputValueColumnNames, |
| List<ExprNodeDesc> reduceValues, |
| Map<String, ExprNodeDesc> colExprMap) throws SemanticException { |
| Map<String, ASTNode> aggregationTrees = parseInfo.getAggregationExprsForClause(dest); |
| |
| // Put parameters to aggregations in reduceValues |
| for (Map.Entry<String, ASTNode> entry : aggregationTrees.entrySet()) { |
| ASTNode value = entry.getValue(); |
| // 0 is function name |
| for (int i = 1; i < value.getChildCount(); i++) { |
| ASTNode parameter = (ASTNode) value.getChild(i); |
| if (reduceSinkOutputRowResolver.getExpression(parameter) == null) { |
| ExprNodeDesc exprDesc = genExprNodeDesc(parameter, reduceSinkInputRowResolver); |
| reduceValues.add(exprDesc); |
| outputValueColumnNames |
| .add(getColumnInternalName(reduceValues.size() - 1)); |
| String field = Utilities.ReduceField.VALUE.toString() + "." |
| + getColumnInternalName(reduceValues.size() - 1); |
| reduceSinkOutputRowResolver.putExpression(parameter, new ColumnInfo(field, |
| reduceValues.get(reduceValues.size() - 1).getTypeInfo(), null, |
| false)); |
| colExprMap.put(field, exprDesc); |
| } |
| } |
| } |
| } |
| |
| @SuppressWarnings("nls") |
| private ReduceSinkOperator genCommonGroupByPlanReduceSinkOperator(QB qb, List<String> dests, |
| Operator inputOperatorInfo) throws SemanticException { |
| |
| RowResolver reduceSinkInputRowResolver = opParseCtx.get(inputOperatorInfo) |
| .getRowResolver(); |
| QBParseInfo parseInfo = qb.getParseInfo(); |
| RowResolver reduceSinkOutputRowResolver = new RowResolver(); |
| reduceSinkOutputRowResolver.setIsExprResolver(true); |
| Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>(); |
| |
| // The group by keys and distinct keys should be the same for all dests, so using the first |
| // one to produce these will be the same as using any other. |
| String dest = dests.get(0); |
| |
| // Pre-compute group-by keys and store in reduceKeys |
| List<String> outputKeyColumnNames = new ArrayList<String>(); |
| List<String> outputValueColumnNames = new ArrayList<String>(); |
| List<ASTNode> grpByExprs = getGroupByForClause(parseInfo, dest); |
| |
| List<ExprNodeDesc> reduceKeys = getReduceKeysForReduceSink(grpByExprs, |
| reduceSinkInputRowResolver, reduceSinkOutputRowResolver, outputKeyColumnNames, |
| colExprMap); |
| |
| int keyLength = reduceKeys.size(); |
| |
| List<List<Integer>> distinctColIndices = getDistinctColIndicesForReduceSink(parseInfo, dest, |
| reduceKeys, reduceSinkInputRowResolver, reduceSinkOutputRowResolver, outputKeyColumnNames, |
| colExprMap); |
| |
| List<ExprNodeDesc> reduceValues = new ArrayList<ExprNodeDesc>(); |
| |
| // The dests can have different non-distinct aggregations, so we have to iterate over all of |
| // them |
| for (String destination : dests) { |
| |
| getReduceValuesForReduceSinkNoMapAgg(parseInfo, destination, reduceSinkInputRowResolver, |
| reduceSinkOutputRowResolver, outputValueColumnNames, reduceValues, colExprMap); |
| |
| // Need to pass all of the columns used in the where clauses as reduce values |
| ASTNode whereClause = parseInfo.getWhrForClause(destination); |
| if (whereClause != null) { |
| assert whereClause.getChildCount() == 1; |
| ASTNode predicates = (ASTNode) whereClause.getChild(0); |
| |
| Map<ASTNode, ExprNodeDesc> nodeOutputs = |
| genAllExprNodeDesc(predicates, reduceSinkInputRowResolver); |
| removeMappingForKeys(predicates, nodeOutputs, reduceKeys); |
| |
| // extract columns missing in current RS key/value |
| for (Map.Entry<ASTNode, ExprNodeDesc> entry : nodeOutputs.entrySet()) { |
| ASTNode parameter = entry.getKey(); |
| ExprNodeDesc expression = entry.getValue(); |
| if (!(expression instanceof ExprNodeColumnDesc)) { |
| continue; |
| } |
| if (ExprNodeDescUtils.indexOf(expression, reduceValues) >= 0) { |
| continue; |
| } |
| String internalName = getColumnInternalName(reduceValues.size()); |
| String field = Utilities.ReduceField.VALUE.toString() + "." + internalName; |
| |
| reduceValues.add(expression); |
| outputValueColumnNames.add(internalName); |
| reduceSinkOutputRowResolver.putExpression(parameter, |
| new ColumnInfo(field, expression.getTypeInfo(), null, false)); |
| colExprMap.put(field, expression); |
| } |
| } |
| } |
| |
| // Optimize the scenario when there are no grouping keys - only 1 reducer is needed |
| int numReducers = -1; |
| if (grpByExprs.isEmpty()) { |
| numReducers = 1; |
| } |
| ReduceSinkDesc rsDesc = PlanUtils.getReduceSinkDesc(reduceKeys, keyLength, reduceValues, |
| distinctColIndices, outputKeyColumnNames, outputValueColumnNames, |
| true, -1, keyLength, numReducers, AcidUtils.Operation.NOT_ACID, defaultNullOrder); |
| |
| ReduceSinkOperator rsOp = (ReduceSinkOperator) putOpInsertMap( |
| OperatorFactory.getAndMakeChild(rsDesc, new RowSchema(reduceSinkOutputRowResolver |
| .getColumnInfos()), inputOperatorInfo), reduceSinkOutputRowResolver); |
| rsOp.setColumnExprMap(colExprMap); |
| return rsOp; |
| } |
| |
| // Remove expression node descriptor and children of it for a given predicate |
| // from mapping if it's already on RS keys. |
| // Remaining column expressions would be a candidate for an RS value |
| private void removeMappingForKeys(ASTNode predicate, Map<ASTNode, ExprNodeDesc> mapping, |
| List<ExprNodeDesc> keys) { |
| ExprNodeDesc expr = mapping.get(predicate); |
| if (expr != null && ExprNodeDescUtils.indexOf(expr, keys) >= 0) { |
| removeRecursively(predicate, mapping); |
| } else { |
| for (int i = 0; i < predicate.getChildCount(); i++) { |
| removeMappingForKeys((ASTNode) predicate.getChild(i), mapping, keys); |
| } |
| } |
| } |
| |
| // Remove expression node desc and all children of it from mapping |
| private void removeRecursively(ASTNode current, Map<ASTNode, ExprNodeDesc> mapping) { |
| mapping.remove(current); |
| for (int i = 0; i < current.getChildCount(); i++) { |
| removeRecursively((ASTNode) current.getChild(i), mapping); |
| } |
| } |
| |
| /** |
| * Generate the second ReduceSinkOperator for the Group By Plan |
| * (parseInfo.getXXX(dest)). The new ReduceSinkOperator will be a child of |
| * groupByOperatorInfo. |
| * |
| * The second ReduceSinkOperator will put the group by keys in the map-reduce |
| * sort key, and put the partial aggregation results in the map-reduce value. |
| * |
| * @param numPartitionFields |
| * the number of fields in the map-reduce partition key. This should |
| * always be the same as the number of Group By keys. We should be |
| * able to remove this parameter since in this phase there is no |
| * distinct any more. |
| * @return the new ReduceSinkOperator. |
| * @throws SemanticException |
| */ |
| @SuppressWarnings("nls") |
| private Operator genGroupByPlanReduceSinkOperator2MR(QBParseInfo parseInfo, |
| String dest, |
| Operator groupByOperatorInfo, |
| int numPartitionFields, |
| int numReducers, |
| boolean groupingSetsPresent) throws SemanticException { |
| RowResolver reduceSinkInputRowResolver2 = opParseCtx.get( |
| groupByOperatorInfo).getRowResolver(); |
| RowResolver reduceSinkOutputRowResolver2 = new RowResolver(); |
| reduceSinkOutputRowResolver2.setIsExprResolver(true); |
| Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>(); |
| List<ExprNodeDesc> reduceKeys = new ArrayList<ExprNodeDesc>(); |
| List<String> outputColumnNames = new ArrayList<String>(); |
| // Get group-by keys and store in reduceKeys |
| List<ASTNode> grpByExprs = getGroupByForClause(parseInfo, dest); |
| for (int i = 0; i < grpByExprs.size(); ++i) { |
| ASTNode grpbyExpr = grpByExprs.get(i); |
| String field = getColumnInternalName(i); |
| outputColumnNames.add(field); |
| TypeInfo typeInfo = reduceSinkInputRowResolver2.getExpression( |
| grpbyExpr).getType(); |
| ExprNodeColumnDesc inputExpr = new ExprNodeColumnDesc(typeInfo, field, |
| "", false); |
| reduceKeys.add(inputExpr); |
| ColumnInfo colInfo = new ColumnInfo(Utilities.ReduceField.KEY.toString() |
| + "." + field, typeInfo, "", false); |
| reduceSinkOutputRowResolver2.putExpression(grpbyExpr, colInfo); |
| colExprMap.put(colInfo.getInternalName(), inputExpr); |
| } |
| |
| // add a key for reduce sink |
| if (groupingSetsPresent) { |
| // Note that partitioning fields dont need to change, since it is either |
| // partitioned randomly, or by all grouping keys + distinct keys |
| processGroupingSetReduceSinkOperator( |
| reduceSinkInputRowResolver2, |
| reduceSinkOutputRowResolver2, |
| reduceKeys, |
| outputColumnNames, |
| colExprMap); |
| } |
| |
| // Get partial aggregation results and store in reduceValues |
| List<ExprNodeDesc> reduceValues = new ArrayList<ExprNodeDesc>(); |
| int inputField = reduceKeys.size(); |
| Map<String, ASTNode> aggregationTrees = parseInfo.getAggregationExprsForClause(dest); |
| for (Map.Entry<String, ASTNode> entry : aggregationTrees.entrySet()) { |
| String field = getColumnInternalName(inputField); |
| ASTNode t = entry.getValue(); |
| TypeInfo typeInfo = reduceSinkInputRowResolver2.getExpression(t) |
| .getType(); |
| ExprNodeColumnDesc exprDesc = new ExprNodeColumnDesc(typeInfo, field, "", false); |
| reduceValues.add(exprDesc); |
| inputField++; |
| String col = getColumnInternalName(reduceValues.size() - 1); |
| outputColumnNames.add(col); |
| ColumnInfo colInfo = new ColumnInfo( |
| Utilities.ReduceField.VALUE.toString() + "." + col, typeInfo, "", |
| false); |
| reduceSinkOutputRowResolver2.putExpression(t, colInfo); |
| colExprMap.put(colInfo.getInternalName(), exprDesc); |
| } |
| |
| ReduceSinkOperator rsOp = (ReduceSinkOperator) putOpInsertMap( |
| OperatorFactory.getAndMakeChild(PlanUtils.getReduceSinkDesc(reduceKeys, |
| reduceValues, outputColumnNames, true, -1, numPartitionFields, |
| numReducers, AcidUtils.Operation.NOT_ACID, defaultNullOrder), |
| new RowSchema(reduceSinkOutputRowResolver2.getColumnInfos()), groupByOperatorInfo), |
| reduceSinkOutputRowResolver2); |
| |
| rsOp.setColumnExprMap(colExprMap); |
| return rsOp; |
| } |
| |
| /** |
| * Generate the second GroupByOperator for the Group By Plan |
| * (parseInfo.getXXX(dest)). The new GroupByOperator will do the second |
| * aggregation based on the partial aggregation results. |
| * |
| * @param genericUDAFEvaluators |
| * The mapping from Aggregation StringTree to the |
| * genericUDAFEvaluator. |
| * @return the new GroupByOperator |
| * @throws SemanticException |
| */ |
| @SuppressWarnings("nls") |
| private Operator genGroupByPlanGroupByOperator2MR(QBParseInfo parseInfo, |
| String dest, |
| Operator reduceSinkOperatorInfo2, |
| Map<String, GenericUDAFEvaluator> genericUDAFEvaluators, |
| boolean groupingSetsPresent) throws SemanticException { |
| |
| RowResolver groupByInputRowResolver2 = opParseCtx.get( |
| reduceSinkOperatorInfo2).getRowResolver(); |
| RowResolver groupByOutputRowResolver2 = new RowResolver(); |
| groupByOutputRowResolver2.setIsExprResolver(true); |
| List<ExprNodeDesc> groupByKeys = new ArrayList<ExprNodeDesc>(); |
| List<AggregationDesc> aggregations = new ArrayList<AggregationDesc>(); |
| Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>(); |
| List<ASTNode> grpByExprs = getGroupByForClause(parseInfo, dest); |
| List<String> outputColumnNames = new ArrayList<String>(); |
| for (int i = 0; i < grpByExprs.size(); ++i) { |
| ASTNode grpbyExpr = grpByExprs.get(i); |
| ColumnInfo exprInfo = groupByInputRowResolver2.getExpression(grpbyExpr); |
| if (exprInfo == null) { |
| throw new SemanticException(ASTErrorUtils.getMsg( |
| ErrorMsg.INVALID_COLUMN.getMsg(), grpbyExpr)); |
| } |
| |
| String expression = exprInfo.getInternalName(); |
| groupByKeys.add(new ExprNodeColumnDesc(exprInfo.getType(), expression, |
| exprInfo.getTabAlias(), exprInfo.getIsVirtualCol())); |
| String field = getColumnInternalName(i); |
| outputColumnNames.add(field); |
| ColumnInfo oColInfo = new ColumnInfo(field, exprInfo.getType(), "", false); |
| groupByOutputRowResolver2.putExpression(grpbyExpr, |
| oColInfo); |
| addAlternateGByKeyMappings(grpbyExpr, oColInfo, reduceSinkOperatorInfo2, groupByOutputRowResolver2); |
| colExprMap.put(field, groupByKeys.get(groupByKeys.size() - 1)); |
| } |
| |
| int groupingSetsPosition = -1; |
| // For grouping sets, add a dummy grouping key |
| if (groupingSetsPresent) { |
| groupingSetsPosition = groupByKeys.size(); |
| addGroupingSetKey( |
| groupByKeys, |
| groupByInputRowResolver2, |
| groupByOutputRowResolver2, |
| outputColumnNames, |
| colExprMap); |
| } |
| |
| Map<String, ASTNode> aggregationTrees = parseInfo.getAggregationExprsForClause(dest); |
| boolean containsDistinctAggr = false; |
| for (Map.Entry<String, ASTNode> entry : aggregationTrees.entrySet()) { |
| List<ExprNodeDesc> aggParameters = new ArrayList<ExprNodeDesc>(); |
| ASTNode value = entry.getValue(); |
| ColumnInfo paraExprInfo = groupByInputRowResolver2.getExpression(value); |
| if (paraExprInfo == null) { |
| throw new SemanticException(ASTErrorUtils.getMsg( |
| ErrorMsg.INVALID_COLUMN.getMsg(), value)); |
| } |
| String paraExpression = paraExprInfo.getInternalName(); |
| assert (paraExpression != null); |
| aggParameters.add(new ExprNodeColumnDesc(paraExprInfo.getType(), |
| paraExpression, paraExprInfo.getTabAlias(), paraExprInfo |
| .getIsVirtualCol())); |
| |
| String aggName = unescapeIdentifier(value.getChild(0).getText()); |
| |
| boolean isDistinct = value.getType() == HiveParser.TOK_FUNCTIONDI; |
| containsDistinctAggr = containsDistinctAggr || isDistinct; |
| Mode amode = groupByDescModeToUDAFMode(GroupByDesc.Mode.FINAL, isDistinct); |
| GenericUDAFEvaluator genericUDAFEvaluator = genericUDAFEvaluators |
| .get(entry.getKey()); |
| assert (genericUDAFEvaluator != null); |
| GenericUDAFInfo udaf = getGenericUDAFInfo(genericUDAFEvaluator, amode, |
| aggParameters); |
| aggregations |
| .add(new AggregationDesc( |
| aggName.toLowerCase(), |
| udaf.genericUDAFEvaluator, |
| udaf.convertedParameters, |
| false, |
| amode)); |
| String field = getColumnInternalName(groupByKeys.size() |
| + aggregations.size() - 1); |
| outputColumnNames.add(field); |
| groupByOutputRowResolver2.putExpression(value, new ColumnInfo( |
| field, udaf.returnType, "", false)); |
| } |
| float groupByMemoryUsage = HiveConf.getFloatVar(conf, HiveConf.ConfVars.HIVEMAPAGGRHASHMEMORY); |
| float memoryThreshold = HiveConf |
| .getFloatVar(conf, HiveConf.ConfVars.HIVEMAPAGGRMEMORYTHRESHOLD); |
| float minReductionHashAggr = HiveConf |
| .getFloatVar(conf, HiveConf.ConfVars.HIVEMAPAGGRHASHMINREDUCTION); |
| float minReductionHashAggrLowerBound = HiveConf |
| .getFloatVar(conf, ConfVars.HIVEMAPAGGRHASHMINREDUCTIONLOWERBOUND); |
| |
| Operator op = putOpInsertMap(OperatorFactory.getAndMakeChild( |
| new GroupByDesc(GroupByDesc.Mode.FINAL, outputColumnNames, groupByKeys, aggregations, |
| false, groupByMemoryUsage, memoryThreshold, minReductionHashAggr, minReductionHashAggrLowerBound, |
| null, false, |
| groupingSetsPosition, containsDistinctAggr), |
| new RowSchema(groupByOutputRowResolver2.getColumnInfos()), |
| reduceSinkOperatorInfo2), groupByOutputRowResolver2); |
| op.setColumnExprMap(colExprMap); |
| return op; |
| } |
| |
| /** |
| * Generate a Group-By plan using a single map-reduce job (3 operators will be |
| * inserted): |
| * |
| * ReduceSink ( keys = (K1_EXP, K2_EXP, DISTINCT_EXP), values = (A1_EXP, |
| * A2_EXP) ) SortGroupBy (keys = (KEY.0,KEY.1), aggregations = |
| * (count_distinct(KEY.2), sum(VALUE.0), count(VALUE.1))) Select (final |
| * selects). |
| * |
| * @param dest |
| * @param qb |
| * @param input |
| * @return |
| * @throws SemanticException |
| * |
| * Generate a Group-By plan using 1 map-reduce job. Spray by the |
| * group by key, and sort by the distinct key (if any), and compute |
| * aggregates * The aggregation evaluation functions are as |
| * follows: Partitioning Key: grouping key |
| * |
| * Sorting Key: grouping key if no DISTINCT grouping + distinct key |
| * if DISTINCT |
| * |
| * Reducer: iterate/merge (mode = COMPLETE) |
| **/ |
| @SuppressWarnings({"nls"}) |
| private Operator genGroupByPlan1MR(String dest, QB qb, Operator input) |
| throws SemanticException { |
| |
| QBParseInfo parseInfo = qb.getParseInfo(); |
| |
| int numReducers = -1; |
| Pair<List<ASTNode>, List<Long>> grpByExprsGroupingSets = getGroupByGroupingSetsForClause(parseInfo, dest); |
| |
| List<ASTNode> grpByExprs = grpByExprsGroupingSets.getLeft(); |
| List<Long> groupingSets = grpByExprsGroupingSets.getRight(); |
| |
| if (grpByExprs.isEmpty()) { |
| numReducers = 1; |
| } |
| |
| // Grouping sets are not allowed |
| if (!groupingSets.isEmpty()) { |
| throw new SemanticException(ErrorMsg.HIVE_GROUPING_SETS_AGGR_NOMAPAGGR.getMsg()); |
| } |
| |
| // ////// 1. Generate ReduceSinkOperator |
| ReduceSinkOperator reduceSinkOperatorInfo = |
| genGroupByPlanReduceSinkOperator(qb, |
| dest, |
| input, |
| grpByExprs, |
| grpByExprs.size(), |
| false, |
| numReducers, |
| false, |
| false); |
| |
| // ////// 2. Generate GroupbyOperator |
| Operator groupByOperatorInfo = genGroupByPlanGroupByOperator(parseInfo, |
| dest, reduceSinkOperatorInfo, reduceSinkOperatorInfo, GroupByDesc.Mode.COMPLETE, null); |
| |
| return groupByOperatorInfo; |
| } |
| |
| @SuppressWarnings({"nls"}) |
| private Operator genGroupByPlan1ReduceMultiGBY(List<String> dests, QB qb, Operator input, |
| Map<String, Operator> aliasToOpInfo) |
| throws SemanticException { |
| |
| QBParseInfo parseInfo = qb.getParseInfo(); |
| |
| ExprNodeDesc previous = null; |
| Operator selectInput = input; |
| |
| // In order to facilitate partition pruning, or the where clauses together and put them at the |
| // top of the operator tree, this could also reduce the amount of data going to the reducer |
| List<ExprNodeDesc.ExprNodeDescEqualityWrapper> whereExpressions = |
| new ArrayList<ExprNodeDesc.ExprNodeDescEqualityWrapper>(); |
| for (String dest : dests) { |
| Pair<List<ASTNode>, List<Long>> grpByExprsGroupingSets = |
| getGroupByGroupingSetsForClause(parseInfo, dest); |
| |
| List<Long> groupingSets = grpByExprsGroupingSets.getRight(); |
| if (!groupingSets.isEmpty()) { |
| throw new SemanticException(ErrorMsg.HIVE_GROUPING_SETS_AGGR_NOMAPAGGR_MULTIGBY.getMsg()); |
| } |
| |
| ASTNode whereExpr = parseInfo.getWhrForClause(dest); |
| |
| if (whereExpr != null) { |
| OpParseContext inputCtx = opParseCtx.get(input); |
| RowResolver inputRR = inputCtx.getRowResolver(); |
| ExprNodeDesc current = genExprNodeDesc((ASTNode) whereExpr.getChild(0), inputRR); |
| |
| // Check the list of where expressions already added so they aren't duplicated |
| ExprNodeDesc.ExprNodeDescEqualityWrapper currentWrapped = |
| new ExprNodeDesc.ExprNodeDescEqualityWrapper(current); |
| if (!whereExpressions.contains(currentWrapped)) { |
| whereExpressions.add(currentWrapped); |
| } else { |
| continue; |
| } |
| |
| if (previous == null) { |
| // If this is the first expression |
| previous = current; |
| continue; |
| } |
| |
| GenericUDFOPOr or = new GenericUDFOPOr(); |
| List<ExprNodeDesc> expressions = new ArrayList<ExprNodeDesc>(2); |
| expressions.add(current); |
| expressions.add(previous); |
| previous = new ExprNodeGenericFuncDesc(TypeInfoFactory.booleanTypeInfo, or, expressions); |
| } else { |
| // If an expression does not have a where clause, there can be no common filter |
| previous = null; |
| break; |
| } |
| } |
| |
| if (previous != null) { |
| OpParseContext inputCtx = opParseCtx.get(input); |
| RowResolver inputRR = inputCtx.getRowResolver(); |
| FilterDesc orFilterDesc = new FilterDesc(previous, false); |
| orFilterDesc.setGenerated(true); |
| |
| selectInput = putOpInsertMap(OperatorFactory.getAndMakeChild(orFilterDesc, new RowSchema( |
| inputRR.getColumnInfos()), input), inputRR); |
| } |
| |
| // insert a select operator here used by the ColumnPruner to reduce |
| // the data to shuffle |
| Operator select = genSelectAllDesc(selectInput); |
| |
| // Generate ReduceSinkOperator |
| ReduceSinkOperator reduceSinkOperatorInfo = |
| genCommonGroupByPlanReduceSinkOperator(qb, dests, select); |
| |
| // It is assumed throughout the code that a reducer has a single child, add a |
| // ForwardOperator so that we can add multiple filter/group by operators as children |
| RowResolver reduceSinkOperatorInfoRR = opParseCtx.get(reduceSinkOperatorInfo).getRowResolver(); |
| Operator forwardOp = putOpInsertMap(OperatorFactory.getAndMakeChild(new ForwardDesc(), |
| new RowSchema(reduceSinkOperatorInfoRR.getColumnInfos()), reduceSinkOperatorInfo), |
| reduceSinkOperatorInfoRR); |
| |
| Operator curr = forwardOp; |
| |
| for (String dest : dests) { |
| curr = forwardOp; |
| |
| if (parseInfo.getWhrForClause(dest) != null) { |
| ASTNode whereExpr = qb.getParseInfo().getWhrForClause(dest); |
| curr = genFilterPlan((ASTNode) whereExpr.getChild(0), qb, forwardOp, aliasToOpInfo, false, true); |
| } |
| |
| // Generate GroupbyOperator |
| Operator groupByOperatorInfo = genGroupByPlanGroupByOperator(parseInfo, |
| dest, curr, reduceSinkOperatorInfo, GroupByDesc.Mode.COMPLETE, null); |
| |
| // TODO: should we pass curr instead of null? |
| curr = genPostGroupByBodyPlan(groupByOperatorInfo, dest, qb, aliasToOpInfo, null); |
| } |
| |
| return curr; |
| } |
| |
| /** |
| * Generate a Group-By plan using a 2 map-reduce jobs (5 operators will be |
| * inserted): |
| * |
| * ReduceSink ( keys = (K1_EXP, K2_EXP, DISTINCT_EXP), values = (A1_EXP, |
| * A2_EXP) ) NOTE: If DISTINCT_EXP is null, partition by rand() SortGroupBy |
| * (keys = (KEY.0,KEY.1), aggregations = (count_distinct(KEY.2), sum(VALUE.0), |
| * count(VALUE.1))) ReduceSink ( keys = (0,1), values=(2,3,4)) SortGroupBy |
| * (keys = (KEY.0,KEY.1), aggregations = (sum(VALUE.0), sum(VALUE.1), |
| * sum(VALUE.2))) Select (final selects). |
| * |
| * @param dest |
| * @param qb |
| * @param input |
| * @return |
| * @throws SemanticException |
| * |
| * Generate a Group-By plan using a 2 map-reduce jobs. Spray by the |
| * grouping key and distinct key (or a random number, if no distinct |
| * is present) in hope of getting a uniform distribution, and |
| * compute partial aggregates grouped by the reduction key (grouping |
| * key + distinct key). Evaluate partial aggregates first, and spray |
| * by the grouping key to compute actual aggregates in the second |
| * phase. The aggregation evaluation functions are as follows: |
| * Partitioning Key: random() if no DISTINCT grouping + distinct key |
| * if DISTINCT |
| * |
| * Sorting Key: grouping key if no DISTINCT grouping + distinct key |
| * if DISTINCT |
| * |
| * Reducer: iterate/terminatePartial (mode = PARTIAL1) |
| * |
| * STAGE 2 |
| * |
| * Partitioning Key: grouping key |
| * |
| * Sorting Key: grouping key if no DISTINCT grouping + distinct key |
| * if DISTINCT |
| * |
| * Reducer: merge/terminate (mode = FINAL) |
| */ |
| @SuppressWarnings("nls") |
| private Operator genGroupByPlan2MR(String dest, QB qb, Operator input) |
| throws SemanticException { |
| |
| QBParseInfo parseInfo = qb.getParseInfo(); |
| |
| Pair<List<ASTNode>, List<Long>> grpByExprsGroupingSets = getGroupByGroupingSetsForClause(parseInfo, dest); |
| |
| List<ASTNode> grpByExprs = grpByExprsGroupingSets.getLeft(); |
| List<Long> groupingSets = grpByExprsGroupingSets.getRight(); |
| |
| // Grouping sets are not allowed |
| // This restriction can be lifted in future. |
| // HIVE-3508 has been filed for this |
| if (!groupingSets.isEmpty()) { |
| throw new SemanticException(ErrorMsg.HIVE_GROUPING_SETS_AGGR_NOMAPAGGR.getMsg()); |
| } |
| |
| // ////// 1. Generate ReduceSinkOperator |
| // There is a special case when we want the rows to be randomly distributed |
| // to |
| // reducers for load balancing problem. That happens when there is no |
| // DISTINCT |
| // operator. We set the numPartitionColumns to -1 for this purpose. This is |
| // captured by WritableComparableHiveObject.hashCode() function. |
| ReduceSinkOperator reduceSinkOperatorInfo = |
| genGroupByPlanReduceSinkOperator(qb, |
| dest, |
| input, |
| grpByExprs, |
| (parseInfo.getDistinctFuncExprsForClause(dest).isEmpty() ? -1 : Integer.MAX_VALUE), |
| false, |
| -1, |
| false, |
| false); |
| |
| // ////// 2. Generate GroupbyOperator |
| Map<String, GenericUDAFEvaluator> genericUDAFEvaluators = |
| new LinkedHashMap<String, GenericUDAFEvaluator>(); |
| GroupByOperator groupByOperatorInfo = (GroupByOperator) genGroupByPlanGroupByOperator( |
| parseInfo, dest, reduceSinkOperatorInfo, reduceSinkOperatorInfo, GroupByDesc.Mode.PARTIAL1, |
| genericUDAFEvaluators); |
| |
| int numReducers = -1; |
| if (grpByExprs.isEmpty()) { |
| numReducers = 1; |
| } |
| |
| // ////// 3. Generate ReduceSinkOperator2 |
| Operator reduceSinkOperatorInfo2 = genGroupByPlanReduceSinkOperator2MR( |
| parseInfo, dest, groupByOperatorInfo, grpByExprs.size(), numReducers, false); |
| |
| // ////// 4. Generate GroupbyOperator2 |
| Operator groupByOperatorInfo2 = genGroupByPlanGroupByOperator2MR(parseInfo, |
| dest, reduceSinkOperatorInfo2, |
| genericUDAFEvaluators, false); |
| |
| return groupByOperatorInfo2; |
| } |
| |
| private boolean optimizeMapAggrGroupBy(String dest, QB qb) throws SemanticException { |
| List<ASTNode> grpByExprs = getGroupByForClause(qb.getParseInfo(), dest); |
| if ((grpByExprs != null) && !grpByExprs.isEmpty()) { |
| return false; |
| } |
| |
| return qb.getParseInfo().getDistinctFuncExprsForClause(dest).isEmpty(); |
| } |
| |
| /** |
| * Generate a Group-By plan using 1 map-reduce job. First perform a map-side |
| * partial aggregation (to reduce the amount of data), at this point of time, |
| * we may turn off map-side partial aggregation based on its performance. Then |
| * spray by the group by key, and sort by the distinct key (if any), and |
| * compute aggregates based on actual aggregates |
| * |
| * The aggregation evaluation functions are as follows: |
| * |
| * No grouping sets: |
| * Group By Operator: |
| * grouping keys: group by expressions if no DISTINCT |
| * grouping keys: group by expressions + distinct keys if DISTINCT |
| * Mapper: iterate/terminatePartial (mode = HASH) |
| * Partitioning Key: grouping key |
| * Sorting Key: grouping key if no DISTINCT |
| * grouping + distinct key if DISTINCT |
| * Reducer: iterate/terminate if DISTINCT |
| * merge/terminate if NO DISTINCT (mode MERGEPARTIAL) |
| * |
| * Grouping Sets: |
| * Group By Operator: |
| * grouping keys: group by expressions + grouping id. if no DISTINCT |
| * grouping keys: group by expressions + grouping id. + distinct keys if DISTINCT |
| * Mapper: iterate/terminatePartial (mode = HASH) |
| * Partitioning Key: grouping key + grouping id. |
| * Sorting Key: grouping key + grouping id. if no DISTINCT |
| * grouping + grouping id. + distinct key if DISTINCT |
| * Reducer: iterate/terminate if DISTINCT |
| * merge/terminate if NO DISTINCT (mode MERGEPARTIAL) |
| * |
| * Grouping Sets with an additional MR job introduced (distincts are not allowed): |
| * Group By Operator: |
| * grouping keys: group by expressions |
| * Mapper: iterate/terminatePartial (mode = HASH) |
| * Partitioning Key: grouping key |
| * Sorting Key: grouping key |
| * Reducer: merge/terminate (mode MERGEPARTIAL) |
| * Group by Operator: |
| * grouping keys: group by expressions + add a new grouping id. key |
| * |
| * STAGE 2 |
| * Partitioning Key: grouping key + grouping id. |
| * Sorting Key: grouping key + grouping id. |
| * Reducer: merge/terminate (mode = FINAL) |
| * Group by Operator: |
| * grouping keys: group by expressions + grouping id. |
| */ |
| @SuppressWarnings("nls") |
| private Operator genGroupByPlanMapAggrNoSkew(String dest, QB qb, |
| Operator inputOperatorInfo) throws SemanticException { |
| |
| QBParseInfo parseInfo = qb.getParseInfo(); |
| Pair<List<ASTNode>, List<Long>> grpByExprsGroupingSets = getGroupByGroupingSetsForClause(parseInfo, dest); |
| |
| List<ASTNode> grpByExprs = grpByExprsGroupingSets.getLeft(); |
| List<Long> groupingSets = grpByExprsGroupingSets.getRight(); |
| boolean groupingSetsPresent = !groupingSets.isEmpty(); |
| |
| int newMRJobGroupingSetsThreshold = |
| conf.getIntVar(HiveConf.ConfVars.HIVE_NEW_JOB_GROUPING_SET_CARDINALITY); |
| |
| // ////// Generate GroupbyOperator for a map-side partial aggregation |
| Map<String, GenericUDAFEvaluator> genericUDAFEvaluators = |
| new LinkedHashMap<String, GenericUDAFEvaluator>(); |
| |
| // Is the grouping sets data consumed in the current in MR job, or |
| // does it need an additional MR job |
| boolean groupingSetsNeedAdditionalMRJob = groupingSetsPresent && |
| groupingSets.size() > newMRJobGroupingSetsThreshold; |
| |
| GroupByOperator groupByOperatorInfo = |
| (GroupByOperator) genGroupByPlanMapGroupByOperator( |
| qb, |
| dest, |
| grpByExprs, |
| inputOperatorInfo, |
| genericUDAFEvaluators, |
| groupingSets, |
| groupingSetsPresent && !groupingSetsNeedAdditionalMRJob); |
| |
| groupOpToInputTables.put(groupByOperatorInfo, opParseCtx.get( |
| inputOperatorInfo).getRowResolver().getTableNames()); |
| int numReducers = -1; |
| |
| // Optimize the scenario when there are no grouping keys - only 1 reducer is |
| // needed |
| if (grpByExprs.isEmpty()) { |
| numReducers = 1; |
| } |
| |
| // ////// Generate ReduceSink Operator |
| boolean isDistinct = !qb.getParseInfo().getDistinctFuncExprsForClause(dest).isEmpty(); |
| |
| // Distincts are not allowed with an additional mr job |
| if (groupingSetsNeedAdditionalMRJob && isDistinct) { |
| String errorMsg = "The number of rows per input row due to grouping sets is " |
| + groupingSets.size(); |
| throw new SemanticException( |
| ErrorMsg.HIVE_GROUPING_SETS_THRESHOLD_NOT_ALLOWED_WITH_DISTINCTS.getMsg(errorMsg)); |
| } |
| |
| Operator reduceSinkOperatorInfo = |
| genGroupByPlanReduceSinkOperator(qb, |
| dest, |
| groupByOperatorInfo, |
| grpByExprs, |
| grpByExprs.size(), |
| true, |
| numReducers, |
| true, |
| groupingSetsPresent && !groupingSetsNeedAdditionalMRJob); |
| |
| // Does it require a new MR job for grouping sets |
| if (!groupingSetsPresent || !groupingSetsNeedAdditionalMRJob) { |
| // This is a 1-stage map-reduce processing of the groupby. Tha map-side |
| // aggregates was just used to |
| // reduce output data. In case of distincts, partial results are not used, |
| // and so iterate is again |
| // invoked on the reducer. In case of non-distincts, partial results are |
| // used, and merge is invoked |
| // on the reducer. |
| return genGroupByPlanGroupByOperator1(parseInfo, dest, |
| reduceSinkOperatorInfo, GroupByDesc.Mode.MERGEPARTIAL, |
| genericUDAFEvaluators, |
| groupingSets, groupingSetsPresent, groupingSetsNeedAdditionalMRJob); |
| } |
| else |
| { |
| // Add 'n' rows corresponding to the grouping sets. For each row, create 'n' rows, |
| // one for each grouping set key. Since map-side aggregation has already been performed, |
| // the number of rows would have been reduced. Moreover, the rows corresponding to the |
| // grouping keys come together, so there is a higher chance of finding the rows in the hash |
| // table. |
| Operator groupByOperatorInfo2 = |
| genGroupByPlanGroupByOperator1(parseInfo, dest, |
| reduceSinkOperatorInfo, GroupByDesc.Mode.PARTIALS, |
| genericUDAFEvaluators, |
| groupingSets, groupingSetsPresent, groupingSetsNeedAdditionalMRJob); |
| |
| // ////// Generate ReduceSinkOperator2 |
| Operator reduceSinkOperatorInfo2 = genGroupByPlanReduceSinkOperator2MR( |
| parseInfo, dest, groupByOperatorInfo2, grpByExprs.size() + 1, numReducers, |
| groupingSetsPresent); |
| |
| // ////// Generate GroupbyOperator3 |
| return genGroupByPlanGroupByOperator2MR(parseInfo, dest, |
| reduceSinkOperatorInfo2, |
| genericUDAFEvaluators, groupingSetsPresent); |
| } |
| } |
| |
| /** |
| * Generate a Group-By plan using a 2 map-reduce jobs. However, only 1 |
| * group-by plan is generated if the query involves no grouping key and no |
| * distincts. In that case, the plan is same as generated by |
| * genGroupByPlanMapAggr1MR. Otherwise, the following plan is generated: First |
| * perform a map side partial aggregation (to reduce the amount of data). Then |
| * spray by the grouping key and distinct key (or a random number, if no |
| * distinct is present) in hope of getting a uniform distribution, and compute |
| * partial aggregates grouped by the reduction key (grouping key + distinct |
| * key). Evaluate partial aggregates first, and spray by the grouping key to |
| * compute actual aggregates in the second phase. |
| * |
| * The aggregation evaluation functions are as follows: |
| * |
| * No grouping sets: |
| * STAGE 1 |
| * Group by Operator: |
| * grouping keys: group by expressions if no DISTINCT |
| * grouping keys: group by expressions + distinct keys if DISTINCT |
| * Mapper: iterate/terminatePartial (mode = HASH) |
| * Partitioning Key: random() if no DISTINCT |
| * grouping + distinct key if DISTINCT |
| * Sorting Key: grouping key if no DISTINCT |
| * grouping + distinct key if DISTINCT |
| * Reducer: iterate/terminatePartial if DISTINCT |
| * merge/terminatePartial if NO DISTINCT (mode = MERGEPARTIAL) |
| * Group by Operator: |
| * grouping keys: group by expressions |
| * |
| * STAGE 2 |
| * Partitioning Key: grouping key |
| * Sorting Key: grouping key |
| * Reducer: merge/terminate (mode = FINAL) |
| * |
| * In the presence of grouping sets, the aggregation evaluation functions are as follows: |
| * STAGE 1 |
| * Group by Operator: |
| * grouping keys: group by expressions + grouping id. if no DISTINCT |
| * grouping keys: group by expressions + + grouping id. + distinct keys if DISTINCT |
| * Mapper: iterate/terminatePartial (mode = HASH) |
| * Partitioning Key: random() if no DISTINCT |
| * grouping + grouping id. + distinct key if DISTINCT |
| * Sorting Key: grouping key + grouping id. if no DISTINCT |
| * grouping + grouping id. + distinct key if DISTINCT |
| * Reducer: iterate/terminatePartial if DISTINCT |
| * merge/terminatePartial if NO DISTINCT (mode = MERGEPARTIAL) |
| * Group by Operator: |
| * grouping keys: group by expressions + grouping id. |
| * |
| * STAGE 2 |
| * Partitioning Key: grouping key |
| * Sorting Key: grouping key + grouping id. |
| * Reducer: merge/terminate (mode = FINAL) |
| */ |
| @SuppressWarnings("nls") |
| private Operator genGroupByPlanMapAggr2MR(String dest, QB qb, |
| Operator inputOperatorInfo) throws SemanticException { |
| |
| QBParseInfo parseInfo = qb.getParseInfo(); |
| |
| Pair<List<ASTNode>, List<Long>> grpByExprsGroupingSets = getGroupByGroupingSetsForClause(parseInfo, dest); |
| |
| List<ASTNode> grpByExprs = grpByExprsGroupingSets.getLeft(); |
| List<Long> groupingSets = grpByExprsGroupingSets.getRight(); |
| boolean groupingSetsPresent = !groupingSets.isEmpty(); |
| |
| if (groupingSetsPresent) { |
| |
| int newMRJobGroupingSetsThreshold = |
| conf.getIntVar(HiveConf.ConfVars.HIVE_NEW_JOB_GROUPING_SET_CARDINALITY); |
| |
| // Turn off skew if an additional MR job is required anyway for grouping sets. |
| if (groupingSets.size() > newMRJobGroupingSetsThreshold) { |
| String errorMsg = "The number of rows per input row due to grouping sets is " |
| + groupingSets.size(); |
| throw new SemanticException( |
| ErrorMsg.HIVE_GROUPING_SETS_THRESHOLD_NOT_ALLOWED_WITH_SKEW.getMsg(errorMsg)); |
| } |
| } |
| |
| // ////// Generate GroupbyOperator for a map-side partial aggregation |
| Map<String, GenericUDAFEvaluator> genericUDAFEvaluators = |
| new LinkedHashMap<String, GenericUDAFEvaluator>(); |
| GroupByOperator groupByOperatorInfo = |
| (GroupByOperator) genGroupByPlanMapGroupByOperator( |
| qb, dest, grpByExprs, inputOperatorInfo, |
| genericUDAFEvaluators, groupingSets, groupingSetsPresent); |
| |
| groupOpToInputTables.put(groupByOperatorInfo, opParseCtx.get( |
| inputOperatorInfo).getRowResolver().getTableNames()); |
| // Optimize the scenario when there are no grouping keys and no distinct - 2 |
| // map-reduce jobs are not needed |
| // For eg: select count(1) from T where t.ds = .... |
| if (!optimizeMapAggrGroupBy(dest, qb)) { |
| List<ASTNode> distinctFuncExprs = parseInfo.getDistinctFuncExprsForClause(dest); |
| |
| // ////// Generate ReduceSink Operator |
| Operator reduceSinkOperatorInfo = |
| genGroupByPlanReduceSinkOperator(qb, |
| dest, |
| groupByOperatorInfo, |
| grpByExprs, |
| distinctFuncExprs.isEmpty() ? -1 : Integer.MAX_VALUE, |
| false, |
| -1, |
| true, |
| groupingSetsPresent); |
| |
| // ////// Generate GroupbyOperator for a partial aggregation |
| Operator groupByOperatorInfo2 = genGroupByPlanGroupByOperator1(parseInfo, |
| dest, reduceSinkOperatorInfo, GroupByDesc.Mode.PARTIALS, |
| genericUDAFEvaluators, |
| groupingSets, groupingSetsPresent, false); |
| |
| int numReducers = -1; |
| if (grpByExprs.isEmpty()) { |
| numReducers = 1; |
| } |
| |
| // ////// Generate ReduceSinkOperator2 |
| Operator reduceSinkOperatorInfo2 = genGroupByPlanReduceSinkOperator2MR( |
| parseInfo, dest, groupByOperatorInfo2, grpByExprs.size(), numReducers, |
| groupingSetsPresent); |
| |
| // ////// Generate GroupbyOperator3 |
| return genGroupByPlanGroupByOperator2MR(parseInfo, dest, |
| reduceSinkOperatorInfo2, |
| genericUDAFEvaluators, groupingSetsPresent); |
| } else { |
| // If there are no grouping keys, grouping sets cannot be present |
| assert !groupingSetsPresent; |
| |
| // ////// Generate ReduceSink Operator |
| Operator reduceSinkOperatorInfo = |
| genGroupByPlanReduceSinkOperator(qb, |
| dest, |
| groupByOperatorInfo, |
| grpByExprs, |
| grpByExprs.size(), |
| false, |
| 1, |
| true, |
| groupingSetsPresent); |
| |
| return genGroupByPlanGroupByOperator2MR(parseInfo, dest, |
| reduceSinkOperatorInfo, genericUDAFEvaluators, false); |
| } |
| } |
| |
| private int getReducersBucketing(int totalFiles, int maxReducers) { |
| int numFiles = (int)Math.ceil((double)totalFiles / (double)maxReducers); |
| while (true) { |
| if (totalFiles % numFiles == 0) { |
| return totalFiles / numFiles; |
| } |
| numFiles++; |
| } |
| } |
| |
| private static class SortBucketRSCtx { |
| List<ExprNodeDesc> partnCols; |
| boolean multiFileSpray; |
| int numFiles; |
| int totalFiles; |
| |
| public SortBucketRSCtx() { |
| partnCols = null; |
| multiFileSpray = false; |
| numFiles = 1; |
| totalFiles = 1; |
| } |
| |
| /** |
| * @return the partnCols |
| */ |
| public List<ExprNodeDesc> getPartnCols() { |
| return partnCols; |
| } |
| |
| /** |
| * @param partnCols |
| * the partnCols to set |
| */ |
| public void setPartnCols(List<ExprNodeDesc> partnCols) { |
| this.partnCols = partnCols; |
| } |
| |
| /** |
| * @return the multiFileSpray |
| */ |
| public boolean isMultiFileSpray() { |
| return multiFileSpray; |
| } |
| |
| /** |
| * @param multiFileSpray |
| * the multiFileSpray to set |
| */ |
| public void setMultiFileSpray(boolean multiFileSpray) { |
| this.multiFileSpray = multiFileSpray; |
| } |
| |
| /** |
| * @return the numFiles |
| */ |
| public int getNumFiles() { |
| return numFiles; |
| } |
| |
| /** |
| * @param numFiles |
| * the numFiles to set |
| */ |
| public void setNumFiles(int numFiles) { |
| this.numFiles = numFiles; |
| } |
| |
| /** |
| * @return the totalFiles |
| */ |
| public int getTotalFiles() { |
| return totalFiles; |
| } |
| |
| /** |
| * @param totalFiles |
| * the totalFiles to set |
| */ |
| public void setTotalFiles(int totalFiles) { |
| this.totalFiles = totalFiles; |
| } |
| } |
| |
| @SuppressWarnings("nls") |
| private Operator genBucketingSortingDest(String dest, Operator input, QB qb, |
| TableDesc table_desc, Table dest_tab, SortBucketRSCtx ctx) throws SemanticException { |
| |
| // If the table is bucketed, and bucketing is enforced, do the following: |
| // If the number of buckets is smaller than the number of maximum reducers, |
| // create those many reducers. |
| // If not, create a multiFileSink instead of FileSink - the multiFileSink will |
| // spray the data into multiple buckets. That way, we can support a very large |
| // number of buckets without needing a very large number of reducers. |
| boolean enforceBucketing = false; |
| List<ExprNodeDesc> partnCols = new ArrayList<>(); |
| List<ExprNodeDesc> sortCols = new ArrayList<>(); |
| List<Integer> sortOrders = new ArrayList<>(); |
| boolean multiFileSpray = false; |
| int numFiles = 1; |
| int totalFiles = 1; |
| boolean isCompaction = false; |
| if (dest_tab != null && dest_tab.getParameters() != null) { |
| isCompaction = AcidUtils.isCompactionTable(dest_tab.getParameters()); |
| } |
| |
| StringBuilder order = new StringBuilder(); |
| StringBuilder nullOrder = new StringBuilder(); |
| if (dest_tab.getNumBuckets() > 0 && !dest_tab.getBucketCols().isEmpty()) { |
| enforceBucketing = true; |
| if (updating(dest) || deleting(dest)) { |
| partnCols = getPartitionColsFromBucketColsForUpdateDelete(input, true); |
| sortCols = getPartitionColsFromBucketColsForUpdateDelete(input, false); |
| createSortOrderForUpdateDelete(sortCols, order, nullOrder); |
| } else { |
| partnCols = getPartitionColsFromBucketCols(dest, qb, dest_tab, table_desc, input, false); |
| } |
| } else { |
| // Non-native acid tables should handle their own bucketing for updates/deletes |
| if ((updating(dest) || deleting(dest)) && !AcidUtils.isNonNativeAcidTable(dest_tab, true)) { |
| partnCols = getPartitionColsFromBucketColsForUpdateDelete(input, true); |
| enforceBucketing = true; |
| } |
| } |
| |
| if ((dest_tab.getSortCols() != null) && |
| (dest_tab.getSortCols().size() > 0)) { |
| sortCols = getSortCols(dest, qb, dest_tab, table_desc, input); |
| getSortOrders(dest_tab, order, nullOrder); |
| if (!enforceBucketing) { |
| throw new SemanticException(ErrorMsg.TBL_SORTED_NOT_BUCKETED.getErrorCodedMsg(dest_tab.getCompleteName())); |
| } |
| } else if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_SORT_WHEN_BUCKETING) && |
| enforceBucketing && !updating(dest) && !deleting(dest)) { |
| sortCols = new ArrayList<>(); |
| for (ExprNodeDesc expr : partnCols) { |
| sortCols.add(expr.clone()); |
| order.append(DirectionUtils.codeToSign(DirectionUtils.ASCENDING_CODE)); |
| nullOrder.append(NullOrdering.NULLS_FIRST.getSign()); |
| } |
| } |
| |
| if (enforceBucketing) { |
| Operation acidOp = AcidUtils.isFullAcidTable(dest_tab) ? getAcidType(table_desc.getOutputFileFormatClass(), |
| dest, AcidUtils.isInsertOnlyTable(dest_tab)) : Operation.NOT_ACID; |
| int maxReducers = conf.getIntVar(HiveConf.ConfVars.MAXREDUCERS); |
| if (conf.getIntVar(HiveConf.ConfVars.HADOOPNUMREDUCERS) > 0) { |
| maxReducers = conf.getIntVar(HiveConf.ConfVars.HADOOPNUMREDUCERS); |
| } |
| int numBuckets = dest_tab.getNumBuckets(); |
| if (numBuckets > maxReducers) { |
| LOG.debug("numBuckets is {} and maxReducers is {}", numBuckets, maxReducers); |
| multiFileSpray = true; |
| totalFiles = numBuckets; |
| if (totalFiles % maxReducers == 0) { |
| numFiles = totalFiles / maxReducers; |
| } |
| else { |
| // find the number of reducers such that it is a divisor of totalFiles |
| maxReducers = getReducersBucketing(totalFiles, maxReducers); |
| numFiles = totalFiles / maxReducers; |
| } |
| } |
| else { |
| maxReducers = numBuckets; |
| } |
| |
| input = genReduceSinkPlan(input, partnCols, sortCols, order.toString(), nullOrder.toString(), |
| maxReducers, acidOp, isCompaction); |
| reduceSinkOperatorsAddedByEnforceBucketingSorting.add((ReduceSinkOperator)input.getParentOperators().get(0)); |
| ctx.setMultiFileSpray(multiFileSpray); |
| ctx.setNumFiles(numFiles); |
| ctx.setTotalFiles(totalFiles); |
| } |
| return input; |
| } |
| |
| // SORT BY ROW__ID ASC |
| private void createSortOrderForUpdateDelete(List<ExprNodeDesc> sortCols, |
| StringBuilder sortOrder, StringBuilder nullSortOrder) { |
| NullOrdering defaultNullOrder = NullOrdering.defaultNullOrder(conf); |
| for (int i = 0; i < sortCols.size(); i++) { |
| sortOrder.append(DirectionUtils.codeToSign(DirectionUtils.ASCENDING_CODE)); |
| nullSortOrder.append(defaultNullOrder.getSign()); |
| } |
| } |
| |
| private void genPartnCols(String dest, Operator input, QB qb, |
| TableDesc table_desc, Table dest_tab, SortBucketRSCtx ctx) throws SemanticException { |
| boolean enforceBucketing = false; |
| List<ExprNodeDesc> partnColsNoConvert = new ArrayList<ExprNodeDesc>(); |
| |
| if ((dest_tab.getNumBuckets() > 0)) { |
| enforceBucketing = true; |
| if (updating(dest) || deleting(dest)) { |
| partnColsNoConvert = getPartitionColsFromBucketColsForUpdateDelete(input, false); |
| } else { |
| partnColsNoConvert = getPartitionColsFromBucketCols(dest, qb, dest_tab, table_desc, input, |
| false); |
| } |
| } |
| |
| if ((dest_tab.getSortCols() != null) && |
| (dest_tab.getSortCols().size() > 0)) { |
| if (!enforceBucketing) { |
| throw new SemanticException(ErrorMsg.TBL_SORTED_NOT_BUCKETED.getErrorCodedMsg(dest_tab.getCompleteName())); |
| } |
| enforceBucketing = true; |
| } |
| |
| if (enforceBucketing) { |
| ctx.setPartnCols(partnColsNoConvert); |
| } |
| } |
| |
| private Operator genMaterializedViewDataOrgPlan(Table destinationTable, String sortColsStr, String distributeColsStr, |
| RowResolver inputRR, Operator input) throws SemanticException { |
| Map<String, Integer> colNameToIdx = new HashMap<>(); |
| for (int i = 0; i < destinationTable.getCols().size(); i++) { |
| colNameToIdx.put(destinationTable.getCols().get(i).getName(), i); |
| } |
| List<ColumnInfo> colInfos = inputRR.getColumnInfos(); |
| List<ColumnInfo> sortColInfos = new ArrayList<>(); |
| if (sortColsStr != null) { |
| Utilities.decodeColumnNames(sortColsStr) |
| .forEach(s -> sortColInfos.add(colInfos.get(colNameToIdx.get(s)))); |
| } |
| List<ColumnInfo> distributeColInfos = new ArrayList<>(); |
| if (distributeColsStr != null) { |
| Utilities.decodeColumnNames(distributeColsStr) |
| .forEach(s -> distributeColInfos.add(colInfos.get(colNameToIdx.get(s)))); |
| } |
| return genMaterializedViewDataOrgPlan(sortColInfos, distributeColInfos, inputRR, input); |
| } |
| |
| private Operator genMaterializedViewDataOrgPlan(List<ColumnInfo> sortColInfos, List<ColumnInfo> distributeColInfos, |
| RowResolver inputRR, Operator input) { |
| // In this case, we will introduce a RS and immediately after a SEL that restores |
| // the row schema to what follow-up operations are expecting |
| Set<String> keys = sortColInfos.stream() |
| .map(ColumnInfo::getInternalName) |
| .collect(Collectors.toSet()); |
| Set<String> distributeKeys = distributeColInfos.stream() |
| .map(ColumnInfo::getInternalName) |
| .collect(Collectors.toSet()); |
| List<ExprNodeDesc> keyCols = new ArrayList<>(); |
| List<String> keyColNames = new ArrayList<>(); |
| StringBuilder order = new StringBuilder(); |
| StringBuilder nullOrder = new StringBuilder(); |
| List<ExprNodeDesc> valCols = new ArrayList<>(); |
| List<String> valColNames = new ArrayList<>(); |
| List<ExprNodeDesc> partCols = new ArrayList<>(); |
| Map<String, ExprNodeDesc> colExprMap = new HashMap<>(); |
| Map<String, String> nameMapping = new HashMap<>(); |
| // map _col0 to KEY._col0, etc |
| for (ColumnInfo ci : inputRR.getRowSchema().getSignature()) { |
| ExprNodeColumnDesc e = new ExprNodeColumnDesc(ci); |
| String columnName = ci.getInternalName(); |
| if (keys.contains(columnName)) { |
| // key (sort column) |
| keyColNames.add(columnName); |
| keyCols.add(e); |
| colExprMap.put(Utilities.ReduceField.KEY + "." + columnName, e); |
| nameMapping.put(columnName, Utilities.ReduceField.KEY + "." + columnName); |
| order.append("+"); |
| nullOrder.append("a"); |
| } else { |
| // value |
| valColNames.add(columnName); |
| valCols.add(e); |
| colExprMap.put(Utilities.ReduceField.VALUE + "." + columnName, e); |
| nameMapping.put(columnName, Utilities.ReduceField.VALUE + "." + columnName); |
| } |
| if (distributeKeys.contains(columnName)) { |
| // distribute column |
| partCols.add(e.clone()); |
| } |
| } |
| // Create Key/Value TableDesc. When the operator plan is split into MR tasks, |
| // the reduce operator will initialize Extract operator with information |
| // from Key and Value TableDesc |
| List<FieldSchema> fields = PlanUtils.getFieldSchemasFromColumnList(keyCols, |
| keyColNames, 0, ""); |
| TableDesc keyTable = PlanUtils.getReduceKeyTableDesc(fields, order.toString(), nullOrder.toString()); |
| List<FieldSchema> valFields = PlanUtils.getFieldSchemasFromColumnList(valCols, |
| valColNames, 0, ""); |
| TableDesc valueTable = PlanUtils.getReduceValueTableDesc(valFields); |
| List<List<Integer>> distinctColumnIndices = new ArrayList<>(); |
| // Number of reducers is set to default (-1) |
| ReduceSinkDesc rsConf = new ReduceSinkDesc(keyCols, keyCols.size(), valCols, |
| keyColNames, distinctColumnIndices, valColNames, -1, partCols, -1, keyTable, |
| valueTable, Operation.NOT_ACID); |
| RowResolver rsRR = new RowResolver(); |
| List<ColumnInfo> rsSignature = new ArrayList<>(); |
| for (int index = 0; index < input.getSchema().getSignature().size(); index++) { |
| ColumnInfo colInfo = new ColumnInfo(input.getSchema().getSignature().get(index)); |
| String[] nm = inputRR.reverseLookup(colInfo.getInternalName()); |
| String[] nm2 = inputRR.getAlternateMappings(colInfo.getInternalName()); |
| colInfo.setInternalName(nameMapping.get(colInfo.getInternalName())); |
| rsSignature.add(colInfo); |
| rsRR.put(nm[0], nm[1], colInfo); |
| if (nm2 != null) { |
| rsRR.addMappingOnly(nm2[0], nm2[1], colInfo); |
| } |
| } |
| Operator<?> result = putOpInsertMap(OperatorFactory.getAndMakeChild( |
| rsConf, new RowSchema(rsSignature), input), rsRR); |
| result.setColumnExprMap(colExprMap); |
| |
| // Create SEL operator |
| RowResolver selRR = new RowResolver(); |
| List<ColumnInfo> selSignature = new ArrayList<>(); |
| List<ExprNodeDesc> columnExprs = new ArrayList<>(); |
| List<String> colNames = new ArrayList<>(); |
| Map<String, ExprNodeDesc> selColExprMap = new HashMap<>(); |
| for (int index = 0; index < input.getSchema().getSignature().size(); index++) { |
| ColumnInfo colInfo = new ColumnInfo(input.getSchema().getSignature().get(index)); |
| String[] nm = inputRR.reverseLookup(colInfo.getInternalName()); |
| String[] nm2 = inputRR.getAlternateMappings(colInfo.getInternalName()); |
| selSignature.add(colInfo); |
| selRR.put(nm[0], nm[1], colInfo); |
| if (nm2 != null) { |
| selRR.addMappingOnly(nm2[0], nm2[1], colInfo); |
| } |
| String colName = colInfo.getInternalName(); |
| ExprNodeDesc exprNodeDesc; |
| if (keys.contains(colName)) { |
| exprNodeDesc = new ExprNodeColumnDesc(colInfo.getType(), ReduceField.KEY.toString() + "." + colName, null, false); |
| columnExprs.add(exprNodeDesc); |
| } else { |
| exprNodeDesc = new ExprNodeColumnDesc(colInfo.getType(), ReduceField.VALUE.toString() + "." + colName, null, false); |
| columnExprs.add(exprNodeDesc); |
| } |
| colNames.add(colName); |
| selColExprMap.put(colName, exprNodeDesc); |
| } |
| SelectDesc selConf = new SelectDesc(columnExprs, colNames); |
| result = putOpInsertMap(OperatorFactory.getAndMakeChild(selConf, new RowSchema(selSignature), result), selRR); |
| result.setColumnExprMap(selColExprMap); |
| |
| return result; |
| } |
| |
| private void setStatsForNonNativeTable(String dbName, String tableName) throws SemanticException { |
| TableName qTableName = HiveTableName.ofNullable(tableName, dbName); |
| Map<String, String> mapProp = new HashMap<>(); |
| mapProp.put(StatsSetupConst.COLUMN_STATS_ACCURATE, null); |
| AlterTableUnsetPropertiesDesc alterTblDesc = new AlterTableUnsetPropertiesDesc(qTableName, null, null, false, |
| mapProp, false, null); |
| this.rootTasks.add(TaskFactory.get(new DDLWork(getInputs(), getOutputs(), alterTblDesc))); |
| } |
| |
| private boolean mergeCardinalityViolationBranch(final Operator input) { |
| if(input instanceof SelectOperator) { |
| SelectOperator selectOp = (SelectOperator)input; |
| if(selectOp.getConf().getColList().size() == 1) { |
| ExprNodeDesc colExpr = selectOp.getConf().getColList().get(0); |
| if(colExpr instanceof ExprNodeGenericFuncDesc) { |
| ExprNodeGenericFuncDesc func = (ExprNodeGenericFuncDesc)colExpr ; |
| return func.getGenericUDF() instanceof GenericUDFCardinalityViolation; |
| } |
| } |
| } |
| return false; |
| } |
| |
| private Operator genConstraintsPlan(String dest, QB qb, Operator input) throws SemanticException { |
| if (deleting(dest)) { |
| // for DELETE statements NOT NULL constraint need not be checked |
| return input; |
| } |
| |
| if (updating(dest) && isCBOExecuted() && this.ctx.getOperation() != Context.Operation.MERGE) { |
| // for UPDATE statements CBO already added and pushed down the constraints |
| return input; |
| } |
| |
| //MERGE statements could have inserted a cardinality violation branch, we need to avoid that |
| if (mergeCardinalityViolationBranch(input)) { |
| return input; |
| } |
| |
| // if this is an insert into statement we might need to add constraint check |
| assert (input.getParentOperators().size() == 1); |
| RowResolver inputRR = opParseCtx.get(input).getRowResolver(); |
| Table targetTable = getTargetTable(qb, dest); |
| ExprNodeDesc combinedConstraintExpr = |
| ExprNodeTypeCheck.genConstraintsExpr(conf, targetTable, updating(dest), inputRR); |
| |
| if (combinedConstraintExpr != null) { |
| return putOpInsertMap(OperatorFactory.getAndMakeChild( |
| new FilterDesc(combinedConstraintExpr, false), new RowSchema( |
| inputRR.getColumnInfos()), input), inputRR); |
| } |
| return input; |
| } |
| |
| protected Table getTargetTable(QB qb, String dest) throws SemanticException { |
| Integer dest_type = qb.getMetaData().getDestTypeForAlias(dest); |
| if (dest_type == QBMetaData.DEST_TABLE) { |
| return qb.getMetaData().getDestTableForAlias(dest); |
| |
| } else if (dest_type == QBMetaData.DEST_PARTITION) { |
| Partition dest_part = qb.getMetaData().getDestPartitionForAlias(dest); |
| return dest_part.getTable(); |
| |
| } else { |
| throw new SemanticException("Generating constraint check plan: Invalid target type: " + dest); |
| } |
| } |
| |
| private Path getDestinationFilePath(final String destinationFile, boolean isMmTable) { |
| if (this.isResultsCacheEnabled() && this.queryTypeCanUseCache()) { |
| assert (!isMmTable); |
| QueryResultsCache instance = QueryResultsCache.getInstance(); |
| // QueryResultsCache should have been initialized by now |
| if (instance != null) { |
| Path resultCacheTopDir = instance.getCacheDirPath(); |
| String dirName = UUID.randomUUID().toString(); |
| Path resultDir = new Path(resultCacheTopDir, dirName); |
| this.ctx.setFsResultCacheDirs(resultDir); |
| return resultDir; |
| } |
| } |
| return new Path(destinationFile); |
| } |
| |
| @SuppressWarnings("nls") |
| protected Operator genFileSinkPlan(String dest, QB qb, Operator input) |
| throws SemanticException { |
| |
| RowResolver inputRR = opParseCtx.get(input).getRowResolver(); |
| QBMetaData qbm = qb.getMetaData(); |
| Integer destType = qbm.getDestTypeForAlias(dest); |
| |
| Table destinationTable = null; // destination table if any |
| boolean destTableIsTransactional; // true for full ACID table and MM table |
| boolean destTableIsFullAcid; // should the destination table be written to using ACID |
| boolean isDirectInsert = false; // should we add files directly to the final path |
| AcidUtils.Operation acidOperation = null; |
| boolean destTableIsTemporary = false; |
| boolean destTableIsMaterialization = false; |
| Partition destinationPartition = null;// destination partition if any |
| Path queryTmpdir = null; // the intermediate destination directory |
| String moveTaskId = null; |
| Path destinationPath = null; // the final destination directory |
| TableDesc tableDescriptor = null; |
| StructObjectInspector specificRowObjectInspector = null; |
| int currentTableId = 0; |
| boolean isLocal = false; |
| SortBucketRSCtx rsCtx = new SortBucketRSCtx(); |
| DynamicPartitionCtx dpCtx = null; |
| LoadTableDesc ltd = null; |
| ListBucketingCtx lbCtx = null; |
| Map<String, String> partSpec = null; |
| boolean isMmTable = false, isMmCreate = false, isNonNativeTable = false; |
| Long writeId = null; |
| HiveTxnManager txnMgr = getTxnMgr(); |
| |
| switch (destType.intValue()) { |
| case QBMetaData.DEST_TABLE: { |
| |
| destinationTable = qbm.getDestTableForAlias(dest); |
| destTableIsTransactional = AcidUtils.isTransactionalTable(destinationTable); |
| destTableIsFullAcid = AcidUtils.isFullAcidTable(destinationTable); |
| destTableIsTemporary = destinationTable.isTemporary(); |
| |
| // Is the user trying to insert into a external tables |
| checkExternalTable(destinationTable); |
| |
| partSpec = qbm.getPartSpecForAlias(dest); |
| destinationPath = destinationTable.getPath(); |
| |
| checkImmutableTable(qb, destinationTable, destinationPath, false); |
| |
| // Check for dynamic partitions. |
| dpCtx = checkDynPart(qb, qbm, destinationTable, partSpec, dest); |
| |
| isNonNativeTable = destinationTable.isNonNative(); |
| isMmTable = AcidUtils.isInsertOnlyTable(destinationTable.getParameters()); |
| AcidUtils.Operation acidOp = AcidUtils.Operation.NOT_ACID; |
| // this table_desc does not contain the partitioning columns |
| tableDescriptor = Utilities.getTableDesc(destinationTable); |
| |
| if (!isNonNativeTable) { |
| if (destTableIsTransactional) { |
| acidOp = getAcidType(tableDescriptor.getOutputFileFormatClass(), dest, isMmTable); |
| } |
| } |
| isDirectInsert = isDirectInsert(destTableIsFullAcid, acidOp); |
| acidOperation = acidOp; |
| queryTmpdir = getTmpDir(isNonNativeTable, isMmTable, isDirectInsert, destinationPath, dpCtx); |
| moveTaskId = getMoveTaskId(); |
| if (Utilities.FILE_OP_LOGGER.isTraceEnabled()) { |
| Utilities.FILE_OP_LOGGER.trace("create filesink w/DEST_TABLE specifying " + queryTmpdir |
| + " from " + destinationPath); |
| } |
| if (dpCtx != null) { |
| // set the root of the temporary path where dynamic partition columns will populate |
| dpCtx.setRootPath(queryTmpdir); |
| } |
| |
| // Add NOT NULL constraint check |
| input = genConstraintsPlan(dest, qb, input); |
| |
| if (!qb.getIsQuery()) { |
| input = genConversionSelectOperator(dest, qb, input, destinationTable.getDeserializer(), |
| dpCtx, destinationTable.getPartitionKeys(), destinationTable); |
| } |
| |
| if (destinationTable.isMaterializedView() && |
| mvRebuildMode == MaterializationRebuildMode.INSERT_OVERWRITE_REBUILD) { |
| // Data organization (DISTRIBUTED, SORTED, CLUSTERED) for materialized view |
| // TODO: We only do this for a full rebuild |
| String sortColsStr = destinationTable.getProperty(Constants.MATERIALIZED_VIEW_SORT_COLUMNS); |
| String distributeColsStr = destinationTable.getProperty(Constants.MATERIALIZED_VIEW_DISTRIBUTE_COLUMNS); |
| if (sortColsStr != null || distributeColsStr != null) { |
| input = genMaterializedViewDataOrgPlan(destinationTable, sortColsStr, distributeColsStr, inputRR, input); |
| } |
| } else { |
| // Add sorting/bucketing if needed |
| input = genBucketingSortingDest(dest, input, qb, tableDescriptor, destinationTable, rsCtx); |
| } |
| |
| idToTableNameMap.put(String.valueOf(destTableId), destinationTable.getTableName()); |
| currentTableId = destTableId; |
| destTableId++; |
| // Create the work for moving the table |
| // NOTE: specify Dynamic partitions in dest_tab for WriteEntity |
| if (!isNonNativeTable || destinationTable.getStorageHandler().commitInMoveTask()) { |
| if (destTableIsTransactional) { |
| acidOp = getAcidType(tableDescriptor.getOutputFileFormatClass(), dest, isMmTable); |
| checkAcidConstraints(); |
| } else { |
| lbCtx = constructListBucketingCtx(destinationTable.getSkewedColNames(), |
| destinationTable.getSkewedColValues(), destinationTable.getSkewedColValueLocationMaps(), |
| destinationTable.isStoredAsSubDirectories()); |
| } |
| try { |
| if (ctx.getExplainConfig() != null) { |
| writeId = null; // For explain plan, txn won't be opened and doesn't make sense to allocate write id |
| } else { |
| if (isMmTable) { |
| writeId = txnMgr.getTableWriteId(destinationTable.getDbName(), destinationTable.getTableName()); |
| } else { |
| writeId = acidOp == Operation.NOT_ACID ? null : |
| txnMgr.getTableWriteId(destinationTable.getDbName(), destinationTable.getTableName()); |
| } |
| } |
| } catch (LockException ex) { |
| throw new SemanticException("Failed to allocate write Id", ex); |
| } |
| boolean isReplace = !qb.getParseInfo().isInsertIntoTable( |
| destinationTable.getDbName(), destinationTable.getTableName(), destinationTable.getBranchName()); |
| ltd = new LoadTableDesc(queryTmpdir, tableDescriptor, dpCtx, acidOp, isReplace, writeId); |
| if (writeId != null) { |
| ltd.setStmtId(txnMgr.getCurrentStmtId()); |
| } |
| ltd.setMoveTaskId(moveTaskId); |
| // For Acid table, Insert Overwrite shouldn't replace the table content. We keep the old |
| // deltas and base and leave them up to the cleaner to clean up |
| boolean isInsertInto = qb.getParseInfo().isInsertIntoTable( |
| destinationTable.getDbName(), destinationTable.getTableName(), destinationTable.getBranchName()); |
| LoadFileType loadType; |
| if (isDirectInsert) { |
| loadType = LoadFileType.IGNORE; |
| } else if (!isInsertInto && !destTableIsTransactional) { |
| loadType = LoadFileType.REPLACE_ALL; |
| } else { |
| loadType = LoadFileType.KEEP_EXISTING; |
| } |
| ltd.setLoadFileType(loadType); |
| ltd.setInsertOverwrite(!isInsertInto); |
| ltd.setIsDirectInsert(isDirectInsert); |
| ltd.setLbCtx(lbCtx); |
| loadTableWork.add(ltd); |
| } else { |
| // This is a non-native table. |
| // We need to set stats as inaccurate. |
| setStatsForNonNativeTable(destinationTable.getDbName(), destinationTable.getTableName()); |
| // true if it is insert overwrite. |
| boolean overwrite = !qb.getParseInfo().isInsertIntoTable(destinationTable.getDbName(), destinationTable.getTableName(), |
| destinationTable.getBranchName()); |
| createPreInsertDesc(destinationTable, overwrite); |
| |
| ltd = new LoadTableDesc(queryTmpdir, tableDescriptor, partSpec == null ? ImmutableMap.of() : partSpec); |
| ltd.setInsertOverwrite(overwrite); |
| ltd.setLoadFileType(overwrite ? LoadFileType.REPLACE_ALL : LoadFileType.KEEP_EXISTING); |
| } |
| |
| if (destinationTable.isMaterializedView()) { |
| materializedViewUpdateDesc = new MaterializedViewUpdateDesc( |
| destinationTable.getFullyQualifiedName(), false, false, true); |
| } |
| |
| WriteEntity output = generateTableWriteEntity(dest, destinationTable, partSpec, ltd, dpCtx); |
| ctx.getLoadTableOutputMap().put(ltd, output); |
| break; |
| } |
| case QBMetaData.DEST_PARTITION: { |
| |
| destinationPartition = qbm.getDestPartitionForAlias(dest); |
| destinationTable = destinationPartition.getTable(); |
| destTableIsTransactional = AcidUtils.isTransactionalTable(destinationTable); |
| destTableIsFullAcid = AcidUtils.isFullAcidTable(destinationTable); |
| |
| checkExternalTable(destinationTable); |
| |
| Path partPath = destinationPartition.getDataLocation(); |
| |
| checkImmutableTable(qb, destinationTable, partPath, true); |
| |
| // Previous behavior (HIVE-1707) used to replace the partition's dfs with the table's dfs. |
| // The changes in HIVE-19891 appears to no longer support that behavior. |
| destinationPath = partPath; |
| |
| if (MetaStoreUtils.isArchived(destinationPartition.getTPartition())) { |
| try { |
| String conflictingArchive = ArchiveUtils.conflictingArchiveNameOrNull( |
| db, destinationTable, destinationPartition.getSpec()); |
| String message = String.format("Insert conflict with existing archive: %s", |
| conflictingArchive); |
| throw new SemanticException(message); |
| } catch (SemanticException err) { |
| throw err; |
| } catch (HiveException err) { |
| throw new SemanticException(err); |
| } |
| } |
| |
| isNonNativeTable = destinationTable.isNonNative(); |
| isMmTable = AcidUtils.isInsertOnlyTable(destinationTable.getParameters()); |
| AcidUtils.Operation acidOp = AcidUtils.Operation.NOT_ACID; |
| // this table_desc does not contain the partitioning columns |
| tableDescriptor = Utilities.getTableDesc(destinationTable); |
| |
| if (!isNonNativeTable) { |
| if (destTableIsTransactional) { |
| acidOp = getAcidType(tableDescriptor.getOutputFileFormatClass(), dest, isMmTable); |
| } |
| } |
| isDirectInsert = isDirectInsert(destTableIsFullAcid, acidOp); |
| acidOperation = acidOp; |
| queryTmpdir = getTmpDir(isNonNativeTable, isMmTable, isDirectInsert, destinationPath, null); |
| moveTaskId = getMoveTaskId(); |
| if (Utilities.FILE_OP_LOGGER.isTraceEnabled()) { |
| Utilities.FILE_OP_LOGGER.trace("create filesink w/DEST_PARTITION specifying " |
| + queryTmpdir + " from " + destinationPath); |
| } |
| |
| // Add NOT NULL constraint check |
| input = genConstraintsPlan(dest, qb, input); |
| |
| if (!qb.getIsQuery()) { |
| input = genConversionSelectOperator(dest, qb, input, destinationTable.getDeserializer(), dpCtx, null, destinationTable); |
| } |
| |
| if (destinationTable.isMaterializedView() && |
| mvRebuildMode == MaterializationRebuildMode.INSERT_OVERWRITE_REBUILD) { |
| // Data organization (DISTRIBUTED, SORTED, CLUSTERED) for materialized view |
| // TODO: We only do this for a full rebuild |
| String sortColsStr = destinationTable.getProperty(Constants.MATERIALIZED_VIEW_SORT_COLUMNS); |
| String distributeColsStr = destinationTable.getProperty(Constants.MATERIALIZED_VIEW_DISTRIBUTE_COLUMNS); |
| if (sortColsStr != null || distributeColsStr != null) { |
| input = genMaterializedViewDataOrgPlan(destinationTable, sortColsStr, distributeColsStr, inputRR, input); |
| } |
| } else { |
| // Add sorting/bucketing if needed |
| input = genBucketingSortingDest(dest, input, qb, tableDescriptor, destinationTable, rsCtx); |
| } |
| |
| idToTableNameMap.put(String.valueOf(destTableId), destinationTable.getTableName()); |
| currentTableId = destTableId; |
| destTableId++; |
| |
| if (destTableIsTransactional) { |
| acidOp = getAcidType(tableDescriptor.getOutputFileFormatClass(), dest, isMmTable); |
| checkAcidConstraints(); |
| } else { |
| // Transactional tables can't be list bucketed or have skewed cols |
| lbCtx = constructListBucketingCtx(destinationPartition.getSkewedColNames(), |
| destinationPartition.getSkewedColValues(), destinationPartition.getSkewedColValueLocationMaps(), |
| destinationPartition.isStoredAsSubDirectories()); |
| } |
| try { |
| if (ctx.getExplainConfig() != null) { |
| writeId = null; // For explain plan, txn won't be opened and doesn't make sense to allocate write id |
| } else { |
| if (isMmTable) { |
| writeId = txnMgr.getTableWriteId(destinationTable.getDbName(), destinationTable.getTableName()); |
| } else { |
| writeId = (acidOp == Operation.NOT_ACID) ? null : |
| txnMgr.getTableWriteId(destinationTable.getDbName(), destinationTable.getTableName()); |
| } |
| } |
| } catch (LockException ex) { |
| throw new SemanticException("Failed to allocate write Id", ex); |
| } |
| ltd = new LoadTableDesc(queryTmpdir, tableDescriptor, destinationPartition.getSpec(), acidOp, writeId); |
| if (writeId != null) { |
| ltd.setStmtId(txnMgr.getCurrentStmtId()); |
| } |
| // For the current context for generating File Sink Operator, it is either INSERT INTO or INSERT OVERWRITE. |
| // So the next line works. |
| boolean isInsertInto = !qb.getParseInfo().isDestToOpTypeInsertOverwrite(dest); |
| // For Acid table, Insert Overwrite shouldn't replace the table content. We keep the old |
| // deltas and base and leave them up to the cleaner to clean up |
| LoadFileType loadType; |
| if (isDirectInsert) { |
| loadType = LoadFileType.IGNORE; |
| } else if (!isInsertInto && !destTableIsTransactional) { |
| loadType = LoadFileType.REPLACE_ALL; |
| } else { |
| loadType = LoadFileType.KEEP_EXISTING; |
| } |
| ltd.setLoadFileType(loadType); |
| ltd.setInsertOverwrite(!isInsertInto); |
| ltd.setIsDirectInsert(isDirectInsert); |
| ltd.setLbCtx(lbCtx); |
| ltd.setMoveTaskId(moveTaskId); |
| |
| loadTableWork.add(ltd); |
| if (!outputs.add(new WriteEntity(destinationPartition, determineWriteType(ltd, dest)))) { |
| throw new SemanticException(ErrorMsg.OUTPUT_SPECIFIED_MULTIPLE_TIMES |
| .getMsg(destinationTable.getTableName() + "@" + destinationPartition.getName())); |
| } |
| break; |
| } |
| case QBMetaData.DEST_LOCAL_FILE: |
| isLocal = true; |
| // fall through |
| case QBMetaData.DEST_DFS_FILE: { |
| destinationPath = getDestinationFilePath(qbm.getDestFileForAlias(dest), isMmTable); |
| |
| // CTAS case: the file output format and serde are defined by the create |
| // table command rather than taking the default value |
| List<FieldSchema> fieldSchemas = null; |
| List<FieldSchema> partitionColumns = null; |
| List<String> partitionColumnNames = null; |
| List<FieldSchema> sortColumns = null; |
| List<String> sortColumnNames = null; |
| List<FieldSchema> distributeColumns = null; |
| List<String> distributeColumnNames = null; |
| List<ColumnInfo> fileSinkColInfos = null; |
| List<ColumnInfo> sortColInfos = null; |
| List<ColumnInfo> distributeColInfos = null; |
| TableName tableName = null; |
| Map<String, String> tblProps = null; |
| CreateTableDesc tblDesc = qb.getTableDesc(); |
| CreateMaterializedViewDesc viewDesc = qb.getViewDesc(); |
| boolean createTableUseSuffix = false; |
| if (tblDesc != null) { |
| fieldSchemas = new ArrayList<>(); |
| partitionColumns = new ArrayList<>(); |
| partitionColumnNames = tblDesc.getPartColNames(); |
| fileSinkColInfos = new ArrayList<>(); |
| destTableIsTemporary = tblDesc.isTemporary(); |
| destTableIsMaterialization = tblDesc.isMaterialization(); |
| tableName = TableName.fromString(tblDesc.getDbTableName(), null, tblDesc.getDatabaseName()); |
| tblProps = tblDesc.getTblProps(); |
| // Add suffix only when required confs are present |
| // and user has not specified a location to the table. |
| createTableUseSuffix = (HiveConf.getBoolVar(conf, ConfVars.HIVE_ACID_CREATE_TABLE_USE_SUFFIX) |
| || HiveConf.getBoolVar(conf, ConfVars.HIVE_ACID_LOCKLESS_READS_ENABLED)) |
| && tblDesc.getLocation() == null; |
| } else if (viewDesc != null) { |
| fieldSchemas = new ArrayList<>(); |
| partitionColumns = new ArrayList<>(); |
| partitionColumnNames = viewDesc.getPartColNames(); |
| sortColumns = new ArrayList<>(); |
| sortColumnNames = viewDesc.getSortColNames(); |
| distributeColumns = new ArrayList<>(); |
| distributeColumnNames = viewDesc.getDistributeColNames(); |
| fileSinkColInfos = new ArrayList<>(); |
| sortColInfos = new ArrayList<>(); |
| distributeColInfos = new ArrayList<>(); |
| destTableIsTemporary = false; |
| destTableIsMaterialization = false; |
| tableName = HiveTableName.ofNullableWithNoDefault(viewDesc.getViewName()); |
| tblProps = viewDesc.getTblProps(); |
| // Add suffix only when required confs are present |
| // and user has not specified a location to the table. |
| createTableUseSuffix = (HiveConf.getBoolVar(conf, ConfVars.HIVE_ACID_CREATE_TABLE_USE_SUFFIX) |
| || HiveConf.getBoolVar(conf, ConfVars.HIVE_ACID_LOCKLESS_READS_ENABLED)) |
| && viewDesc.getLocation() == null; |
| } |
| |
| destTableIsTransactional = tblProps != null && AcidUtils.isTablePropertyTransactional(tblProps); |
| if (destTableIsTransactional) { |
| isNonNativeTable = MetaStoreUtils.isNonNativeTable(tblProps); |
| boolean isCtas = tblDesc != null && tblDesc.isCTAS(); |
| boolean isCMV = viewDesc != null && qb.isMaterializedView(); |
| isMmTable = isMmCreate = AcidUtils.isInsertOnlyTable(tblProps); |
| if (!isNonNativeTable && !destTableIsTemporary && (isCtas || isCMV)) { |
| destTableIsFullAcid = AcidUtils.isFullAcidTable(tblProps); |
| acidOperation = getAcidType(dest); |
| isDirectInsert = isDirectInsert(destTableIsFullAcid, acidOperation); |
| if (isDirectInsert || isMmTable) { |
| destinationPath = getCtasOrCMVLocation(tblDesc, viewDesc, createTableUseSuffix); |
| if (createTableUseSuffix) { |
| if (tblDesc != null) { |
| tblDesc.getTblProps().put(SOFT_DELETE_TABLE, Boolean.TRUE.toString()); |
| } else { |
| viewDesc.getTblProps().put(SOFT_DELETE_TABLE, Boolean.TRUE.toString()); |
| } |
| } |
| // Set the location in context for possible rollback. |
| ctx.setLocation(destinationPath); |
| // Setting the location so that metadata transformers |
| // does not change the location later while creating the table. |
| if (tblDesc != null) { |
| tblDesc.setLocation(destinationPath.toString()); |
| } else { |
| viewDesc.setLocation(destinationPath.toString()); |
| } |
| } else { |
| // Set the location in context for possible rollback. |
| ctx.setLocation(getCtasOrCMVLocation(tblDesc, viewDesc, createTableUseSuffix)); |
| } |
| } |
| try { |
| if (ctx.getExplainConfig() != null) { |
| writeId = 0L; // For explain plan, txn won't be opened and doesn't make sense to allocate write id |
| } else { |
| writeId = txnMgr.getTableWriteId(tableName.getDb(), tableName.getTable()); |
| } |
| } catch (LockException ex) { |
| throw new SemanticException("Failed to allocate write Id", ex); |
| } |
| if (isMmTable || isDirectInsert) { |
| if (tblDesc != null) { |
| tblDesc.setInitialWriteId(writeId); |
| } else { |
| viewDesc.setInitialWriteId(writeId); |
| } |
| } |
| } |
| |
| // Check for dynamic partitions. |
| final String cols, colTypes; |
| final boolean isPartitioned; |
| if (dpCtx != null) { |
| throw new SemanticException("Dynamic partition context has already been created, this should not happen"); |
| } |
| if (!CollectionUtils.isEmpty(partitionColumnNames)) { |
| ColsAndTypes ct = deriveFileSinkColTypes( |
| inputRR, partitionColumnNames, sortColumnNames, distributeColumnNames, fieldSchemas, partitionColumns, |
| sortColumns, distributeColumns, fileSinkColInfos, sortColInfos, distributeColInfos); |
| cols = ct.cols; |
| colTypes = ct.colTypes; |
| dpCtx = new DynamicPartitionCtx(partitionColumnNames, |
| conf.getVar(HiveConf.ConfVars.DEFAULTPARTITIONNAME), |
| conf.getIntVar(HiveConf.ConfVars.DYNAMICPARTITIONMAXPARTSPERNODE)); |
| qbm.setDPCtx(dest, dpCtx); |
| isPartitioned = true; |
| } else { |
| ColsAndTypes ct = deriveFileSinkColTypes( |
| inputRR, sortColumnNames, distributeColumnNames, fieldSchemas, sortColumns, distributeColumns, |
| sortColInfos, distributeColInfos); |
| cols = ct.cols; |
| colTypes = ct.colTypes; |
| isPartitioned = false; |
| } |
| |
| if (isLocal) { |
| assert !isMmTable; |
| // for local directory - we always write to map-red intermediate |
| // store and then copy to local fs |
| queryTmpdir = ctx.getMRTmpPath(); |
| if (dpCtx != null && dpCtx.getSPPath() != null) { |
| queryTmpdir = new Path(queryTmpdir, dpCtx.getSPPath()); |
| } |
| } else { |
| // otherwise write to the file system implied by the directory |
| // no copy is required. we may want to revisit this policy in future |
| try { |
| Path qPath = FileUtils.makeQualified(destinationPath, conf); |
| queryTmpdir = getTmpDir(false, isMmTable, isDirectInsert, qPath, dpCtx); |
| } catch (Exception e) { |
| throw new SemanticException("Error creating " |
| + destinationPath, e); |
| } |
| } |
| // set the root of the temporary path where dynamic partition columns will populate |
| if (dpCtx != null) { |
| dpCtx.setRootPath(queryTmpdir); |
| } |
| |
| if (Utilities.FILE_OP_LOGGER.isTraceEnabled()) { |
| Utilities.FILE_OP_LOGGER.trace("Setting query directory " + queryTmpdir |
| + " from " + destinationPath + " (" + isMmTable + ")"); |
| } |
| |
| // update the create table descriptor with the resulting schema. |
| if (tblDesc != null) { |
| tblDesc.setCols(new ArrayList<>(fieldSchemas)); |
| tblDesc.setPartCols(new ArrayList<>(partitionColumns)); |
| } else if (viewDesc != null) { |
| viewDesc.setSchema(new ArrayList<>(fieldSchemas)); |
| viewDesc.setPartCols(new ArrayList<>(partitionColumns)); |
| if (viewDesc.isOrganized()) { |
| viewDesc.setSortCols(new ArrayList<>(sortColumns)); |
| viewDesc.setDistributeCols(new ArrayList<>(distributeColumns)); |
| } |
| } |
| |
| boolean isDestTempFile = true; |
| if (ctx.isMRTmpFileURI(destinationPath.toUri().toString()) == false |
| && ctx.isResultCacheDir(destinationPath) == false) { |
| // not a temp dir and not a result cache dir |
| idToTableNameMap.put(String.valueOf(destTableId), destinationPath.toUri().toString()); |
| currentTableId = destTableId; |
| destTableId++; |
| isDestTempFile = false; |
| } |
| |
| try { |
| if (tblDesc == null) { |
| if (viewDesc != null) { |
| if (viewDesc.getStorageHandler() != null) { |
| viewDesc.setLocation(getCtasOrCMVLocation(tblDesc, viewDesc, createTableUseSuffix).toString()); |
| } |
| tableDescriptor = PlanUtils.getTableDesc(viewDesc, cols, colTypes); |
| } else if (qb.getIsQuery()) { |
| Class<? extends Deserializer> serdeClass = LazySimpleSerDe.class; |
| String fileFormat = conf.getResultFileFormat().toString(); |
| if (SessionState.get().getIsUsingThriftJDBCBinarySerDe()) { |
| serdeClass = ThriftJDBCBinarySerDe.class; |
| fileFormat = ResultFileFormat.SEQUENCEFILE.toString(); |
| // Set the fetch formatter to be a no-op for the ListSinkOperator, since we'll |
| // write out formatted thrift objects to SequenceFile |
| conf.set(SerDeUtils.LIST_SINK_OUTPUT_FORMATTER, NoOpFetchFormatter.class.getName()); |
| } else if (fileFormat.equals(PlanUtils.LLAP_OUTPUT_FORMAT_KEY)) { |
| // If this output format is Llap, check to see if Arrow is requested |
| boolean useArrow = HiveConf.getBoolVar(conf, HiveConf.ConfVars.LLAP_OUTPUT_FORMAT_ARROW); |
| serdeClass = useArrow ? ArrowColumnarBatchSerDe.class : LazyBinarySerDe2.class; |
| } |
| tableDescriptor = PlanUtils.getDefaultQueryOutputTableDesc(cols, colTypes, fileFormat, |
| serdeClass); |
| } else { |
| tableDescriptor = PlanUtils.getDefaultTableDesc(qb.getDirectoryDesc(), cols, colTypes); |
| } |
| } else { |
| if (tblDesc.isCTAS() && tblDesc.getStorageHandler() != null) { |
| tblDesc.setLocation(getCtasOrCMVLocation(tblDesc, viewDesc, createTableUseSuffix).toString()); |
| } |
| tableDescriptor = PlanUtils.getTableDesc(tblDesc, cols, colTypes); |
| } |
| } catch (HiveException e) { |
| throw new SemanticException(e); |
| } |
| |
| // We need a specific rowObjectInspector in this case |
| try { |
| specificRowObjectInspector = |
| (StructObjectInspector) tableDescriptor.getDeserializer(conf).getObjectInspector(); |
| } catch (Exception e) { |
| throw new SemanticException(e.getMessage(), e); |
| } |
| |
| boolean isDfsDir = (destType == QBMetaData.DEST_DFS_FILE); |
| |
| try { |
| if (tblDesc != null) { |
| Table t = tblDesc.toTable(conf); |
| destinationTable = tblDesc.isMaterialization() ? t : db.getTranslateTableDryrun(t.getTTable()); |
| } else { |
| destinationTable = viewDesc != null ? viewDesc.toTable(conf) : null; |
| } |
| } catch (HiveException e) { |
| throw new SemanticException(e); |
| } |
| |
| destTableIsFullAcid = AcidUtils.isFullAcidTable(destinationTable); |
| |
| // Data organization (DISTRIBUTED, SORTED, CLUSTERED) for materialized view |
| if (viewDesc != null && viewDesc.isOrganized()) { |
| input = genMaterializedViewDataOrgPlan(sortColInfos, distributeColInfos, inputRR, input); |
| } |
| |
| moveTaskId = getMoveTaskId(); |
| |
| if (isPartitioned) { |
| // Create a SELECT that may reorder the columns if needed |
| RowResolver rowResolver = new RowResolver(); |
| List<ExprNodeDesc> columnExprs = new ArrayList<>(); |
| List<String> colNames = new ArrayList<>(); |
| Map<String, ExprNodeDesc> colExprMap = new HashMap<>(); |
| for (int i = 0; i < fileSinkColInfos.size(); i++) { |
| ColumnInfo ci = fileSinkColInfos.get(i); |
| ExprNodeDesc columnExpr = new ExprNodeColumnDesc(ci); |
| String name = getColumnInternalName(i); |
| rowResolver.put("", name, new ColumnInfo(name, columnExpr.getTypeInfo(), "", false)); |
| columnExprs.add(columnExpr); |
| colNames.add(name); |
| colExprMap.put(name, columnExpr); |
| } |
| input = putOpInsertMap(OperatorFactory.getAndMakeChild( |
| new SelectDesc(columnExprs, colNames), new RowSchema(rowResolver |
| .getColumnInfos()), input), rowResolver); |
| input.setColumnExprMap(colExprMap); |
| |
| // If this is a partitioned CTAS or MV statement, we are going to create a LoadTableDesc |
| // object. Although the table does not exist in metastore, we will swap the CreateTableTask |
| // and MoveTask resulting from this LoadTable so in this specific case, first we create |
| // the metastore table, then we move and commit the partitions. At least for the time being, |
| // this order needs to be enforced because metastore expects a table to exist before we can |
| // add any partitions to it. |
| isNonNativeTable = tableDescriptor.isNonNative(); |
| if (!isNonNativeTable || destinationTable.getStorageHandler().commitInMoveTask()) { |
| AcidUtils.Operation acidOp = AcidUtils.Operation.NOT_ACID; |
| if (destTableIsTransactional) { |
| acidOp = getAcidType(tableDescriptor.getOutputFileFormatClass(), dest, isMmTable); |
| checkAcidConstraints(); |
| } |
| // isReplace = false in case concurrent operation is executed |
| ltd = new LoadTableDesc(queryTmpdir, tableDescriptor, dpCtx, acidOp, false, writeId); |
| if (writeId != null) { |
| ltd.setStmtId(txnMgr.getCurrentStmtId()); |
| } |
| ltd.setLoadFileType(LoadFileType.KEEP_EXISTING); |
| ltd.setInsertOverwrite(false); |
| ltd.setIsDirectInsert(isDirectInsert); |
| loadTableWork.add(ltd); |
| } else { |
| // This is a non-native table. |
| // We need to set stats as inaccurate. |
| setStatsForNonNativeTable(tableDescriptor.getDbName(), tableDescriptor.getTableName()); |
| ltd = new LoadTableDesc(queryTmpdir, tableDescriptor, dpCtx.getPartSpec()); |
| ltd.setInsertOverwrite(false); |
| ltd.setLoadFileType(LoadFileType.KEEP_EXISTING); |
| } |
| ltd.setMoveTaskId(moveTaskId); |
| ltd.setMdTable(destinationTable); |
| WriteEntity output = generateTableWriteEntity(dest, destinationTable, dpCtx.getPartSpec(), ltd, dpCtx); |
| ctx.getLoadTableOutputMap().put(ltd, output); |
| } else { |
| // Create LFD even for MM CTAS - it's a no-op move, but it still seems to be used for stats. |
| LoadFileDesc loadFileDesc = new LoadFileDesc(tblDesc, viewDesc, queryTmpdir, destinationPath, isDfsDir, cols, |
| colTypes, |
| destTableIsFullAcid ?//there is a change here - prev version had 'transactional', one before 'acid' |
| Operation.INSERT : Operation.NOT_ACID, |
| isMmCreate); |
| loadFileDesc.setMoveTaskId(moveTaskId); |
| loadFileWork.add(loadFileDesc); |
| try { |
| Path qualifiedPath = conf.getBoolVar(ConfVars.HIVE_RANGER_USE_FULLY_QUALIFIED_URL) ? |
| destinationPath.getFileSystem(conf).makeQualified(destinationPath) : destinationPath; |
| if (!outputs.add(new WriteEntity(qualifiedPath, !isDfsDir, isDestTempFile))) { |
| throw new SemanticException(ErrorMsg.OUTPUT_SPECIFIED_MULTIPLE_TIMES |
| .getMsg(destinationPath.toUri().toString())); |
| } |
| } catch (IOException ex) { |
| throw new SemanticException("Error while getting the full qualified path for the given directory: " + ex.getMessage()); |
| } |
| } |
| break; |
| } |
| default: |
| throw new SemanticException("Unknown destination type: " + destType); |
| } |
| |
| if (!(destType == QBMetaData.DEST_DFS_FILE && qb.getIsQuery()) |
| && destinationTable != null && destinationTable.getStorageHandler() != null) { |
| try { |
| input = genConversionSelectOperator( |
| dest, qb, input, tableDescriptor.getDeserializer(conf), dpCtx, null, destinationTable); |
| } catch (Exception e) { |
| throw new SemanticException(e); |
| } |
| } |
| |
| inputRR = opParseCtx.get(input).getRowResolver(); |
| |
| List<ColumnInfo> vecCol = new ArrayList<ColumnInfo>(); |
| |
| if (updating(dest) || deleting(dest)) { |
| if (AcidUtils.isNonNativeAcidTable(destinationTable, true)) { |
| destinationTable.getStorageHandler().acidVirtualColumns().stream() |
| .map(col -> new ColumnInfo(col.getName(), col.getTypeInfo(), "", true)) |
| .forEach(vecCol::add); |
| } else { |
| vecCol.add(new ColumnInfo(VirtualColumn.ROWID.getName(), VirtualColumn.ROWID.getTypeInfo(), |
| "", true)); |
| } |
| } else { |
| try { |
| // If we already have a specific inspector (view or directory as a target) use that |
| // Otherwise use the table deserializer to get the inspector |
| StructObjectInspector rowObjectInspector = specificRowObjectInspector != null ? specificRowObjectInspector : |
| (StructObjectInspector) destinationTable.getDeserializer().getObjectInspector(); |
| List<? extends StructField> fields = rowObjectInspector |
| .getAllStructFieldRefs(); |
| for (StructField field : fields) { |
| vecCol.add(new ColumnInfo(field.getFieldName(), TypeInfoUtils |
| .getTypeInfoFromObjectInspector(field |
| .getFieldObjectInspector()), "", false)); |
| } |
| } catch (Exception e) { |
| throw new SemanticException(e.getMessage(), e); |
| } |
| } |
| |
| RowSchema fsRS = new RowSchema(vecCol); |
| |
| // The output files of a FileSink can be merged if they are either not being written to a table |
| // or are being written to a table which is not bucketed |
| // and table the table is not sorted |
| boolean canBeMerged = (destinationTable == null || !((destinationTable.getNumBuckets() > 0) || |
| (destinationTable.getSortCols() != null && destinationTable.getSortCols().size() > 0))); |
| |
| // If this table is working with ACID semantics, turn off merging |
| canBeMerged &= !destTableIsFullAcid; |
| |
| // Generate the partition columns from the parent input |
| if (destType == QBMetaData.DEST_TABLE || destType == QBMetaData.DEST_PARTITION) { |
| genPartnCols(dest, input, qb, tableDescriptor, destinationTable, rsCtx); |
| } |
| |
| FileSinkDesc fileSinkDesc = createFileSinkDesc(dest, tableDescriptor, destinationPartition, |
| destinationPath, currentTableId, destTableIsFullAcid, destTableIsTemporary,//this was 1/4 acid |
| destTableIsMaterialization, queryTmpdir, rsCtx, dpCtx, lbCtx, fsRS, |
| canBeMerged, destinationTable, writeId, isMmCreate, destType, qb, isDirectInsert, acidOperation, moveTaskId); |
| if (isMmCreate || (qb.isCTAS() || qb.isMaterializedView()) && isDirectInsert) { |
| // Add FSD so that the LoadTask compilation could fix up its path to avoid the move. |
| if (tableDesc != null) { |
| tableDesc.setWriter(fileSinkDesc); |
| } else { |
| createVwDesc.setWriter(fileSinkDesc); |
| } |
| } |
| |
| if (fileSinkDesc.getInsertOverwrite()) { |
| if (ltd != null) { |
| ltd.setInsertOverwrite(true); |
| } |
| } |
| if (null != tableDescriptor && useBatchingSerializer(tableDescriptor.getSerdeClassName())) { |
| fileSinkDesc.setIsUsingBatchingSerDe(true); |
| } else { |
| fileSinkDesc.setIsUsingBatchingSerDe(false); |
| } |
| |
| Operator output = putOpInsertMap(OperatorFactory.getAndMakeChild( |
| fileSinkDesc, fsRS, input), inputRR); |
| |
| // In the MoveTask the lineage information is not set in case of delete and update as the |
| // columns are not matching. So we only need the lineage information for insert. |
| // If directInsert=false, adding the lineage info here for the other operations is |
| // ok, as the paths are different. But if directInsert=true, the path for all |
| // operation is the same (the table location) and this can lead to invalid lineage information |
| // in case of a merge statement. |
| if (!isDirectInsert || acidOperation == AcidUtils.Operation.INSERT) { |
| handleLineage(destinationTable, ltd, output); |
| } |
| setWriteIdForSurrogateKeys(ltd, input); |
| |
| LOG.debug("Created FileSink Plan for clause: {}dest_path: {} row schema: {}", dest, destinationPath, inputRR); |
| |
| FileSinkOperator fso = (FileSinkOperator) output; |
| fso.getConf().setTable(destinationTable); |
| // the following code is used to collect column stats when |
| // hive.stats.autogather=true |
| // and it is an insert overwrite or insert into table |
| if (conf.getBoolVar(ConfVars.HIVESTATSAUTOGATHER) |
| && conf.getBoolVar(ConfVars.HIVESTATSCOLAUTOGATHER) |
| && enableColumnStatsCollecting() |
| && destinationTable != null |
| && (!destinationTable.isNonNative() || destinationTable.getStorageHandler().commitInMoveTask()) |
| && !destTableIsTemporary && !destTableIsMaterialization |
| && ColumnStatsAutoGatherContext.canRunAutogatherStats(fso)) { |
| if (destType == QBMetaData.DEST_TABLE) { |
| genAutoColumnStatsGatheringPipeline(destinationTable, partSpec, input, |
| qb.getParseInfo().isInsertIntoTable(destinationTable.getDbName(), destinationTable.getTableName(), |
| destinationTable.getBranchName()), false); |
| } else if (destType == QBMetaData.DEST_PARTITION) { |
| genAutoColumnStatsGatheringPipeline(destinationTable, destinationPartition.getSpec(), input, |
| qb.getParseInfo().isInsertIntoTable(destinationTable.getDbName(), destinationTable.getTableName(), |
| destinationTable.getBranchName()), false); |
| } else if (destType == QBMetaData.DEST_LOCAL_FILE || destType == QBMetaData.DEST_DFS_FILE) { |
| // CTAS or CMV statement |
| genAutoColumnStatsGatheringPipeline(destinationTable, null, input, |
| false, true); |
| } |
| } |
| return output; |
| } |
| |
| protected boolean enableColumnStatsCollecting() { |
| return true; |
| } |
| |
| private Path getCtasOrCMVLocation(CreateTableDesc tblDesc, CreateMaterializedViewDesc viewDesc, |
| boolean createTableWithSuffix) throws SemanticException { |
| Path location; |
| String[] names; |
| String protoName = null; |
| Table tbl; |
| try { |
| if (tblDesc != null) { |
| protoName = tblDesc.getDbTableName(); |
| |
| // Handle table translation initially and if not present |
| // use default table path. |
| // Property modifications of the table is handled later. |
| // We are interested in the location if it has changed |
| // due to table translation. |
| tbl = tblDesc.toTable(conf); |
| tbl = db.getTranslateTableDryrun(tbl.getTTable()); |
| } else { |
| protoName = viewDesc.getViewName(); |
| tbl = viewDesc.toTable(conf); |
| } |
| names = Utilities.getDbTableName(protoName); |
| |
| Warehouse wh = new Warehouse(conf); |
| if (tbl.getSd() == null |
| || tbl.getSd().getLocation() == null) { |
| location = wh.getDefaultTablePath(db.getDatabase(names[0]), names[1], false); |
| } else { |
| location = wh.getDnsPath(new Path(tbl.getSd().getLocation())); |
| } |
| |
| if (createTableWithSuffix) { |
| location = new Path(location.toString() + |
| Utilities.getTableOrMVSuffix(ctx, createTableWithSuffix)); |
| } |
| |
| return location; |
| } catch (HiveException | MetaException e) { |
| throw new SemanticException(e); |
| } |
| } |
| |
| private boolean isDirectInsert(boolean destTableIsFullAcid, AcidUtils.Operation acidOp) { |
| // In case of an EXPLAIN ANALYZE query, the direct insert has to be turned off. HIVE-24336 |
| if (ctx.getExplainAnalyze() == AnalyzeState.RUNNING) { |
| return false; |
| } |
| boolean directInsertEnabled = conf.getBoolVar(HiveConf.ConfVars.HIVE_ACID_DIRECT_INSERT_ENABLED); |
| boolean directInsert = directInsertEnabled && destTableIsFullAcid && acidOp != AcidUtils.Operation.NOT_ACID; |
| if (LOG.isDebugEnabled() && directInsert) { |
| LOG.debug("Direct insert for ACID tables is enabled."); |
| } |
| return directInsert; |
| } |
| |
| private Path getTmpDir(boolean isNonNativeTable, boolean isMmTable, boolean isDirectInsert, |
| Path destinationPath, DynamicPartitionCtx dpCtx) { |
| /** |
| * We will directly insert to the final destination in the following cases: |
| * 1. Non native table |
| * 2. Micro-managed (insert only table) |
| * 3. Full ACID table and operation type is INSERT |
| */ |
| Path destPath = null; |
| if (isNonNativeTable || isMmTable || isDirectInsert) { |
| destPath = destinationPath; |
| } else if (HiveConf.getBoolVar(conf, ConfVars.HIVE_USE_SCRATCHDIR_FOR_STAGING)) { |
| destPath = ctx.getTempDirForInterimJobPath(destinationPath); |
| } else { |
| destPath = ctx.getTempDirForFinalJobPath(destinationPath); |
| } |
| if (dpCtx != null && dpCtx.getSPPath() != null) { |
| return new Path(destPath, dpCtx.getSPPath()); |
| } |
| return destPath; |
| } |
| |
| private String getMoveTaskId() { |
| return ctx.getMoveTaskId(); |
| } |
| |
| private boolean useBatchingSerializer(String serdeClassName) { |
| return SessionState.get().isHiveServerQuery() && |
| hasSetBatchSerializer(serdeClassName); |
| } |
| |
| private boolean hasSetBatchSerializer(String serdeClassName) { |
| return (serdeClassName.equalsIgnoreCase(ThriftJDBCBinarySerDe.class.getName()) && |
| HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_SERVER2_THRIFT_RESULTSET_SERIALIZE_IN_TASKS)) || |
| serdeClassName.equalsIgnoreCase(ArrowColumnarBatchSerDe.class.getName()); |
| } |
| |
| private ColsAndTypes deriveFileSinkColTypes(RowResolver inputRR, List<String> sortColumnNames, List<String> distributeColumnNames, |
| List<FieldSchema> fieldSchemas, List<FieldSchema> sortColumns, List<FieldSchema> distributeColumns, |
| List<ColumnInfo> sortColInfos, List<ColumnInfo> distributeColInfos) throws SemanticException { |
| return deriveFileSinkColTypes(inputRR, new ArrayList<>(), sortColumnNames, distributeColumnNames, |
| fieldSchemas, new ArrayList<>(), sortColumns, distributeColumns, new ArrayList<>(), |
| sortColInfos, distributeColInfos); |
| } |
| |
| private ColsAndTypes deriveFileSinkColTypes( |
| RowResolver inputRR, List<String> partitionColumnNames, List<String> sortColumnNames, List<String> distributeColumnNames, |
| List<FieldSchema> columns, List<FieldSchema> partitionColumns, List<FieldSchema> sortColumns, List<FieldSchema> distributeColumns, |
| List<ColumnInfo> fileSinkColInfos, List<ColumnInfo> sortColInfos, List<ColumnInfo> distributeColInfos) throws SemanticException { |
| ColsAndTypes result = new ColsAndTypes("", ""); |
| List<String> allColumns = new ArrayList<>(); |
| List<ColumnInfo> colInfos = inputRR.getColumnInfos(); |
| List<ColumnInfo> nonPartColInfos = new ArrayList<>(); |
| SortedMap<Integer, Pair<FieldSchema, ColumnInfo>> partColInfos = new TreeMap<>(); |
| SortedMap<Integer, Pair<FieldSchema, ColumnInfo>> sColInfos = new TreeMap<>(); |
| SortedMap<Integer, Pair<FieldSchema, ColumnInfo>> dColInfos = new TreeMap<>(); |
| boolean first = true; |
| int numNonPartitionedCols = colInfos.size() - partitionColumnNames.size(); |
| if (numNonPartitionedCols <= 0) { |
| throw new SemanticException("Too many partition columns declared"); |
| } |
| for (ColumnInfo colInfo : colInfos) { |
| String[] nm = inputRR.reverseLookup(colInfo.getInternalName()); |
| |
| if (nm[1] != null) { // non-null column alias |
| colInfo.setAlias(nm[1]); |
| } |
| |
| boolean isPartitionCol = false; |
| String colName = colInfo.getInternalName(); //default column name |
| if (columns != null) { |
| FieldSchema col = new FieldSchema(); |
| if (!("".equals(nm[0])) && nm[1] != null) { |
| colName = unescapeIdentifier(colInfo.getAlias()).toLowerCase(); // remove `` |
| } |
| colName = fixCtasColumnName(colName); |
| col.setName(colName); |
| allColumns.add(colName); |
| String typeName = colInfo.getType().getTypeName(); |
| // CTAS should NOT create a VOID type |
| if (typeName.equals(serdeConstants.VOID_TYPE_NAME)) { |
| throw new SemanticException(ErrorMsg.CTAS_CREATES_VOID_TYPE.getMsg(colName)); |
| } |
| col.setType(typeName); |
| int idx = partitionColumnNames.indexOf(colName); |
| if (idx >= 0) { |
| partColInfos.put(idx, Pair.of(col, colInfo)); |
| isPartitionCol = true; |
| } else { |
| if (sortColumnNames != null) { |
| idx = sortColumnNames.indexOf(colName); |
| if (idx >= 0) { |
| sColInfos.put(idx, Pair.of(col, colInfo)); |
| } |
| } |
| if (distributeColumnNames != null) { |
| idx = distributeColumnNames.indexOf(colName); |
| if (idx >= 0) { |
| dColInfos.put(idx, Pair.of(col, colInfo)); |
| } |
| } |
| columns.add(col); |
| nonPartColInfos.add(colInfo); |
| } |
| } |
| |
| if (!isPartitionCol) { |
| if (!first) { |
| result.cols = result.cols.concat(","); |
| result.colTypes = result.colTypes.concat(":"); |
| } |
| |
| first = false; |
| result.cols = result.cols.concat(colName); |
| |
| // Replace VOID type with string when the output is a temp table or |
| // local files. |
| // A VOID type can be generated under the query: |
| // |
| // select NULL from tt; |
| // or |
| // insert overwrite local directory "abc" select NULL from tt; |
| // |
| // where there is no column type to which the NULL value should be |
| // converted. |
| // |
| String tName = colInfo.getType().getTypeName(); |
| if (tName.equals(serdeConstants.VOID_TYPE_NAME)) { |
| result.colTypes = result.colTypes.concat(serdeConstants.STRING_TYPE_NAME); |
| } else { |
| result.colTypes = result.colTypes.concat(tName); |
| } |
| } |
| |
| } |
| |
| if (partColInfos.size() != partitionColumnNames.size()) { |
| throw new SemanticException("Table declaration contains partition columns that are not present " + |
| "in query result schema. " + |
| "Query columns: " + allColumns + ". " + |
| "Partition columns: " + partitionColumnNames); |
| } |
| |
| if (sortColumnNames != null && sColInfos.size() != sortColumnNames.size()) { |
| throw new SemanticException("Table declaration contains cluster/sort columns that are not present " + |
| "in query result schema. " + |
| "Query columns: " + allColumns + ". " + |
| "Organization columns: " + sortColumnNames); |
| } |
| |
| if (distributeColumnNames != null && dColInfos.size() != distributeColumnNames.size()) { |
| throw new SemanticException("Table declaration contains cluster/distribute columns that are not present " + |
| "in query result schema. " + |
| "Query columns: " + allColumns + ". " + |
| "Organization columns: " + distributeColumnNames); |
| } |
| |
| // FileSinkColInfos comprise nonPartCols followed by partCols |
| fileSinkColInfos.addAll(nonPartColInfos); |
| partitionColumns.addAll(partColInfos.values().stream().map(Pair::getLeft).collect(Collectors.toList())); |
| fileSinkColInfos.addAll(partColInfos.values().stream().map(Pair::getRight).collect(Collectors.toList())); |
| // data org columns |
| if (sortColumnNames != null) { |
| sortColumns.addAll(sColInfos.values().stream().map(Pair::getLeft).collect(Collectors.toList())); |
| sortColInfos.addAll(sColInfos.values().stream().map(Pair::getRight).collect(Collectors.toList())); |
| } |
| if (distributeColumnNames != null) { |
| distributeColumns.addAll(dColInfos.values().stream().map(Pair::getLeft).collect(Collectors.toList())); |
| distributeColInfos.addAll(dColInfos.values().stream().map(Pair::getRight).collect(Collectors.toList())); |
| } |
| |
| return result; |
| } |
| |
| private FileSinkDesc createFileSinkDesc(String dest, TableDesc table_desc, |
| Partition dest_part, Path dest_path, int currentTableId, |
| boolean destTableIsAcid, boolean destTableIsTemporary, |
| boolean destTableIsMaterialization, Path queryTmpdir, |
| SortBucketRSCtx rsCtx, DynamicPartitionCtx dpCtx, ListBucketingCtx lbCtx, |
| RowSchema fsRS, boolean canBeMerged, Table dest_tab, Long mmWriteId, boolean isMmCtas, |
| Integer dest_type, QB qb, boolean isDirectInsert, AcidUtils.Operation acidOperation, String moveTaskId) throws SemanticException { |
| boolean isInsertOverwrite = false; |
| Context.Operation writeOperation = getWriteOperation(dest); |
| switch (dest_type) { |
| case QBMetaData.DEST_PARTITION: |
| //fall through |
| case QBMetaData.DEST_TABLE: |
| //INSERT [OVERWRITE] path |
| String destTableFullName = dest_tab.getCompleteName().replace('@', '.'); |
| Map<String, ASTNode> iowMap = qb.getParseInfo().getInsertOverwriteTables(); |
| if (iowMap.containsKey(destTableFullName) && |
| qb.getParseInfo().isDestToOpTypeInsertOverwrite(dest)) { |
| isInsertOverwrite = true; |
| } |
| |
| // Some non-native tables might be partitioned without partition spec information being present in the Table object |
| HiveStorageHandler storageHandler = dest_tab.getStorageHandler(); |
| if (storageHandler != null && storageHandler.alwaysUnpartitioned()) { |
| DynamicPartitionCtx nonNativeDpCtx = storageHandler.createDPContext(conf, dest_tab, writeOperation); |
| if (dpCtx == null && nonNativeDpCtx != null) { |
| dpCtx = nonNativeDpCtx; |
| } |
| } |
| |
| break; |
| case QBMetaData.DEST_LOCAL_FILE: |
| case QBMetaData.DEST_DFS_FILE: |
| //CTAS path or insert into file/directory |
| break; |
| default: |
| throw new IllegalStateException("Unexpected dest_type=" + dest_tab); |
| } |
| FileSinkDesc fileSinkDesc = new FileSinkDesc(queryTmpdir, table_desc, |
| conf.getBoolVar(HiveConf.ConfVars.COMPRESSRESULT), currentTableId, rsCtx.isMultiFileSpray(), |
| canBeMerged, rsCtx.getNumFiles(), rsCtx.getTotalFiles(), rsCtx.getPartnCols(), dpCtx, |
| dest_path, mmWriteId, isMmCtas, isInsertOverwrite, qb.getIsQuery(), |
| qb.isCTAS() || qb.isMaterializedView(), isDirectInsert, acidOperation, |
| ctx.isDeleteBranchOfUpdate(dest)); |
| |
| fileSinkDesc.setMoveTaskId(moveTaskId); |
| boolean isHiveServerQuery = SessionState.get().isHiveServerQuery(); |
| fileSinkDesc.setHiveServerQuery(isHiveServerQuery); |
| // If this is an insert, update, or delete on an ACID table then mark that so the |
| // FileSinkOperator knows how to properly write to it. |
| boolean isDestInsertOnly = (dest_part != null && dest_part.getTable() != null && |
| AcidUtils.isInsertOnlyTable(dest_part.getTable().getParameters())) |
| || (table_desc != null && AcidUtils.isInsertOnlyTable(table_desc.getProperties())); |
| |
| if (isDestInsertOnly) { |
| fileSinkDesc.setWriteType(Operation.INSERT); |
| acidFileSinks.add(fileSinkDesc); |
| } |
| |
| if (destTableIsAcid) { |
| AcidUtils.Operation wt = updating(dest) ? AcidUtils.Operation.UPDATE : |
| (deleting(dest) ? AcidUtils.Operation.DELETE : AcidUtils.Operation.INSERT); |
| fileSinkDesc.setWriteType(wt); |
| acidFileSinks.add(fileSinkDesc); |
| } |
| |
| fileSinkDesc.setWriteOperation(writeOperation); |
| |
| fileSinkDesc.setTemporary(destTableIsTemporary); |
| fileSinkDesc.setMaterialization(destTableIsMaterialization); |
| |
| /* Set List Bucketing context. */ |
| if (lbCtx != null) { |
| lbCtx.processRowSkewedIndex(fsRS); |
| lbCtx.calculateSkewedValueSubDirList(); |
| } |
| fileSinkDesc.setLbCtx(lbCtx); |
| |
| // set the stats publishing/aggregating key prefix |
| // the same as directory name. The directory name |
| // can be changed in the optimizer but the key should not be changed |
| // it should be the same as the MoveWork's sourceDir. |
| fileSinkDesc.setStatsAggPrefix(fileSinkDesc.getDirName().toString()); |
| if (!destTableIsMaterialization && |
| HiveConf.getVar(conf, HIVESTATSDBCLASS).equalsIgnoreCase(StatDB.fs.name())) { |
| String statsTmpLoc = ctx.getTempDirForInterimJobPath(dest_path).toString(); |
| fileSinkDesc.setStatsTmpDir(statsTmpLoc); |
| LOG.debug("Set stats collection dir : " + statsTmpLoc); |
| } |
| |
| if (dest_part != null) { |
| try { |
| String staticSpec = Warehouse.makePartPath(dest_part.getSpec()); |
| fileSinkDesc.setStaticSpec(staticSpec); |
| } catch (MetaException e) { |
| throw new SemanticException(e); |
| } |
| } else if (dpCtx != null) { |
| fileSinkDesc.setStaticSpec(dpCtx.getSPPath()); |
| } |
| return fileSinkDesc; |
| } |
| |
| private void handleLineage(Table destinationTable, LoadTableDesc ltd, Operator output) |
| throws SemanticException { |
| if (ltd != null) { |
| queryState.getLineageState().mapDirToOp(ltd.getSourcePath(), output); |
| } |
| if (queryState.getCommandType().equals(HiveOperation.CREATETABLE_AS_SELECT.getOperationName())) { |
| |
| Path tlocation = null; |
| String tName = Utilities.getDbTableName(tableDesc.getDbTableName())[1]; |
| try { |
| String suffix = Utilities.getTableOrMVSuffix(ctx, |
| AcidUtils.isTableSoftDeleteEnabled(destinationTable, conf)); |
| Warehouse wh = new Warehouse(conf); |
| tlocation = wh.getDefaultTablePath(db.getDatabase(tableDesc.getDatabaseName()), |
| tName + suffix, tableDesc.isExternal()); |
| |
| if (destinationTable != null && destinationTable.getSd() != null |
| && destinationTable.getPath() != null) { |
| tlocation = destinationTable.getPath(); |
| } |
| } catch (MetaException|HiveException e) { |
| throw new SemanticException(e); |
| } |
| |
| queryState.getLineageState() |
| .mapDirToOp(tlocation, output); |
| } else if (queryState.getCommandType().equals(HiveOperation.CREATE_MATERIALIZED_VIEW.getOperationName())) { |
| Path tlocation; |
| String [] dbTable = Utilities.getDbTableName(createVwDesc.getViewName()); |
| try { |
| Warehouse wh = new Warehouse(conf); |
| Map<String, String> tblProps = createVwDesc.getTblProps(); |
| tlocation = wh.getDefaultTablePath(db.getDatabase(dbTable[0]), dbTable[1], |
| tblProps == null || !AcidUtils.isTablePropertyTransactional(tblProps)); |
| } catch (MetaException|HiveException e) { |
| throw new SemanticException(e); |
| } |
| |
| queryState.getLineageState() |
| .mapDirToOp(tlocation, output); |
| } |
| } |
| |
| private void setWriteIdForSurrogateKeys(LoadTableDesc ltd, Operator input) { |
| if (ltd == null) { |
| return; |
| } |
| |
| Map<String, ExprNodeDesc> columnExprMap = input.getConf().getColumnExprMap(); |
| if (columnExprMap != null) { |
| for (ExprNodeDesc desc : columnExprMap.values()) { |
| if (desc instanceof ExprNodeGenericFuncDesc) { |
| GenericUDF genericUDF = ((ExprNodeGenericFuncDesc)desc).getGenericUDF(); |
| if (genericUDF instanceof GenericUDFSurrogateKey) { |
| ((GenericUDFSurrogateKey)genericUDF).setWriteId(ltd.getWriteId()); |
| } |
| } |
| } |
| } |
| |
| for (Operator<? extends OperatorDesc> parent : (List<Operator<? extends OperatorDesc>>)input.getParentOperators()) { |
| setWriteIdForSurrogateKeys(ltd, parent); |
| } |
| } |
| |
| private WriteEntity generateTableWriteEntity(String dest, Table dest_tab, |
| Map<String, String> partSpec, LoadTableDesc ltd, |
| DynamicPartitionCtx dpCtx) |
| throws SemanticException { |
| WriteEntity output = null; |
| |
| // Here only register the whole table for post-exec hook if no DP present |
| // in the case of DP, we will register WriteEntity in MoveTask when the |
| // list of dynamically created partitions are known. |
| if ((dpCtx == null || dpCtx.getNumDPCols() == 0)) { |
| output = new WriteEntity(dest_tab, determineWriteType(ltd, dest)); |
| if (!outputs.add(output)) { |
| if(!allowOutputMultipleTimes()) { |
| /** |
| * Merge stmt with early split update may create several (2) writes to the same |
| * table with the same {@link WriteType}, e.g. if original Merge stmt has both update and |
| * delete clauses, and update is split into insert + delete, in which case it's not an |
| * error*/ |
| throw new SemanticException(ErrorMsg.OUTPUT_SPECIFIED_MULTIPLE_TIMES |
| .getMsg(dest_tab.getTableName())); |
| } |
| } |
| } |
| |
| if ((dpCtx != null) && (dpCtx.getNumDPCols() >= 0)) { |
| // No static partition specified |
| if (dpCtx.getNumSPCols() == 0) { |
| output = new WriteEntity(dest_tab, determineWriteType(ltd, dest), true); |
| outputs.add(output); |
| output.setDynamicPartitionWrite(true); |
| } |
| // part of the partition specified |
| // Create a DummyPartition in this case. Since, the metastore does not store partial |
| // partitions currently, we need to store dummy partitions |
| else { |
| try { |
| String ppath = dpCtx.getSPPath(); |
| ppath = ppath.substring(0, ppath.length() - 1); |
| DummyPartition p = |
| new DummyPartition(dest_tab, dest_tab.getDbName() |
| + "@" + dest_tab.getTableName() + "@" + ppath, |
| partSpec); |
| output = new WriteEntity(p, getWriteType(dest), false); |
| output.setDynamicPartitionWrite(true); |
| outputs.add(output); |
| } catch (HiveException e) { |
| throw new SemanticException(e.getMessage(), e); |
| } |
| } |
| } |
| return output; |
| } |
| |
| protected boolean allowOutputMultipleTimes() { |
| return false; |
| } |
| |
| private void checkExternalTable(Table dest_tab) throws SemanticException { |
| if ((!conf.getBoolVar(HiveConf.ConfVars.HIVE_INSERT_INTO_EXTERNAL_TABLES)) && |
| (dest_tab.getTableType().equals(TableType.EXTERNAL_TABLE))) { |
| throw new SemanticException( |
| ErrorMsg.INSERT_EXTERNAL_TABLE.getMsg(dest_tab.getTableName())); |
| } |
| } |
| |
| private void checkImmutableTable(QB qb, Table dest_tab, Path dest_path, boolean isPart) |
| throws SemanticException { |
| // If the query here is an INSERT_INTO and the target is an immutable table, |
| // verify that our destination is empty before proceeding |
| if (!dest_tab.isImmutable() || !qb.getParseInfo().isInsertIntoTable( |
| dest_tab.getDbName(), dest_tab.getTableName(), dest_tab.getBranchName())) { |
| return; |
| } |
| try { |
| FileSystem fs = dest_path.getFileSystem(conf); |
| if (! org.apache.hadoop.hive.metastore.utils.FileUtils.isDirEmpty(fs,dest_path)){ |
| LOG.warn("Attempted write into an immutable table : " |
| + dest_tab.getTableName() + " : " + dest_path); |
| throw new SemanticException( |
| ErrorMsg.INSERT_INTO_IMMUTABLE_TABLE.getMsg(dest_tab.getTableName())); |
| } |
| } catch (IOException ioe) { |
| LOG.warn("Error while trying to determine if immutable table " |
| + (isPart ? "partition " : "") + "has any data : " + dest_tab.getTableName() |
| + " : " + dest_path); |
| throw new SemanticException(ErrorMsg.INSERT_INTO_IMMUTABLE_TABLE.getMsg(ioe.getMessage())); |
| } |
| } |
| |
| private DynamicPartitionCtx checkDynPart(QB qb, QBMetaData qbm, Table dest_tab, |
| Map<String, String> partSpec, String dest) throws SemanticException { |
| List<FieldSchema> parts = dest_tab.getPartitionKeys(); |
| if (parts == null || parts.isEmpty()) { |
| return null; // table is not partitioned |
| } |
| if (partSpec == null || partSpec.size() == 0) { // user did NOT specify partition |
| throw new SemanticException(generateErrorMessage(qb.getParseInfo().getDestForClause(dest), |
| ErrorMsg.NEED_PARTITION_ERROR.getMsg())); |
| } |
| DynamicPartitionCtx dpCtx = qbm.getDPCtx(dest); |
| if (dpCtx == null) { |
| dest_tab.validatePartColumnNames(partSpec, false); |
| dpCtx = new DynamicPartitionCtx(partSpec, |
| conf.getVar(HiveConf.ConfVars.DEFAULTPARTITIONNAME), |
| conf.getIntVar(HiveConf.ConfVars.DYNAMICPARTITIONMAXPARTSPERNODE)); |
| qbm.setDPCtx(dest, dpCtx); |
| } |
| |
| verifyDynamicPartitionEnabled(conf, qb, dest); |
| |
| if ((dest_tab.getNumBuckets() > 0)) { |
| dpCtx.setNumBuckets(dest_tab.getNumBuckets()); |
| } |
| return dpCtx; |
| } |
| |
| private static void verifyDynamicPartitionEnabled(HiveConf conf, QB qb, String dest) throws SemanticException { |
| if (!HiveConf.getBoolVar(conf, HiveConf.ConfVars.DYNAMICPARTITIONING)) { // allow DP |
| throw new SemanticException(generateErrorMessage(qb.getParseInfo().getDestForClause(dest), |
| ErrorMsg.DYNAMIC_PARTITION_DISABLED.getMsg())); |
| } |
| } |
| |
| private void createPreInsertDesc(Table table, boolean overwrite) { |
| PreInsertTableDesc preInsertTableDesc = new PreInsertTableDesc(table, overwrite); |
| this.rootTasks |
| .add(TaskFactory.get(new DDLWork(getInputs(), getOutputs(), preInsertTableDesc))); |
| } |
| |
| |
| private void genAutoColumnStatsGatheringPipeline(Table table, Map<String, String> partSpec, Operator curr, |
| boolean isInsertInto, boolean useTableValueConstructor) |
| throws SemanticException { |
| LOG.info("Generate an operator pipeline to autogather column stats for table " + table.getTableName() |
| + " in query " + ctx.getCmd()); |
| ColumnStatsAutoGatherContext columnStatsAutoGatherContext = null; |
| columnStatsAutoGatherContext = new ColumnStatsAutoGatherContext(this, conf, curr, table, partSpec, isInsertInto, ctx); |
| if (useTableValueConstructor) { |
| // Table does not exist, use table value constructor to simulate |
| columnStatsAutoGatherContext.insertTableValuesAnalyzePipeline(); |
| } else { |
| // Table already exists |
| columnStatsAutoGatherContext.insertAnalyzePipeline(); |
| } |
| columnStatsAutoGatherContexts.add(columnStatsAutoGatherContext); |
| } |
| |
| String fixCtasColumnName(String colName) { |
| return colName; |
| } |
| |
| private void checkAcidConstraints() { |
| /* |
| LOG.info("Modifying config values for ACID write"); |
| conf.setBoolVar(ConfVars.HIVEOPTREDUCEDEDUPLICATION, true); |
| conf.setIntVar(ConfVars.HIVEOPTREDUCEDEDUPLICATIONMINREDUCER, 1); |
| These props are now enabled elsewhere (see commit diffs). It would be better instead to throw |
| if they are not set. For exmaple, if user has set hive.optimize.reducededuplication=false for |
| some reason, we'll run a query contrary to what they wanted... But throwing now would be |
| backwards incompatible. |
| */ |
| conf.set(AcidUtils.CONF_ACID_KEY, "true"); |
| SessionState.get().getConf().set(AcidUtils.CONF_ACID_KEY, "true"); |
| } |
| |
| /** |
| * Generate the conversion SelectOperator that converts the columns into the |
| * types that are expected by the table_desc. |
| */ |
| private Operator genConversionSelectOperator(String dest, QB qb, Operator input, |
| Deserializer deserializer, DynamicPartitionCtx dpCtx, List<FieldSchema> parts, Table table) |
| throws SemanticException { |
| StructObjectInspector oi = null; |
| try { |
| oi = (StructObjectInspector) deserializer.getObjectInspector(); |
| } catch (Exception e) { |
| throw new SemanticException(e); |
| } |
| |
| // Check column number |
| List<? extends StructField> tableFields = oi.getAllStructFieldRefs(); |
| boolean dynPart = HiveConf.getBoolVar(conf, HiveConf.ConfVars.DYNAMICPARTITIONING); |
| List<ColumnInfo> rowFields = opParseCtx.get(input).getRowResolver().getColumnInfos(); |
| int inColumnCnt = rowFields.size(); |
| int outColumnCnt = tableFields.size(); |
| |
| // if target table is always unpartitioned, then the output object inspector will already contain the partition cols |
| // too, therefore we shouldn't add the partition col num to the output col num |
| boolean alreadyContainsPartCols = Optional.ofNullable(table) |
| .map(Table::getStorageHandler) |
| .map(HiveStorageHandler::alwaysUnpartitioned) |
| .orElse(Boolean.FALSE); |
| |
| if (dynPart && dpCtx != null && !alreadyContainsPartCols) { |
| outColumnCnt += dpCtx.getNumDPCols(); |
| } |
| |
| // The numbers of input columns and output columns should match for regular query |
| if (!updating(dest) && !deleting(dest) && inColumnCnt != outColumnCnt) { |
| String reason = "Table " + dest + " has " + outColumnCnt |
| + " columns, but query has " + inColumnCnt + " columns."; |
| throw new SemanticException(ASTErrorUtils.getMsg( |
| ErrorMsg.TARGET_TABLE_COLUMN_MISMATCH.getMsg(), |
| qb.getParseInfo().getDestForClause(dest), reason)); |
| } |
| |
| // Check column types |
| AtomicBoolean converted = new AtomicBoolean(false); |
| int columnNumber = tableFields.size(); |
| List<ExprNodeDesc> expressions = new ArrayList<ExprNodeDesc>(columnNumber); |
| |
| // MetadataTypedColumnsetSerDe does not need type conversions because it |
| // does the conversion to String by itself. |
| if (!(deserializer instanceof MetadataTypedColumnsetSerDe) && !deleting(dest)) { |
| |
| // If we're updating, add the required virtual columns. |
| int virtualColumnSize = updating(dest) ? AcidUtils.getAcidVirtualColumns(table).size() : 0; |
| for (int i = 0; i < virtualColumnSize; i++) { |
| expressions.add(new ExprNodeColumnDesc(rowFields.get(i).getType(), |
| rowFields.get(i).getInternalName(), "", true)); |
| } |
| |
| // here only deals with non-partition columns. We deal with partition columns next |
| int rowFieldsOffset = expressions.size(); |
| for (int i = 0; i < columnNumber; i++) { |
| ExprNodeDesc column = handleConversion(tableFields.get(i), rowFields.get(rowFieldsOffset + i), converted, dest, i); |
| expressions.add(column); |
| } |
| |
| // For Non-Native ACID tables we should convert the new values as well |
| rowFieldsOffset = expressions.size(); |
| if (updating(dest) && AcidUtils.isNonNativeAcidTable(table, true)) { |
| for (int i = 0; i < columnNumber; i++) { |
| ExprNodeDesc column = handleConversion(tableFields.get(i), rowFields.get(rowFieldsOffset + i), converted, dest, i); |
| expressions.add(column); |
| } |
| } |
| |
| // deal with dynamic partition columns |
| rowFieldsOffset = expressions.size(); |
| if (dynPart && dpCtx != null && dpCtx.getNumDPCols() > 0) { |
| // rowFields contains non-partitioned columns (tableFields) followed by DP columns |
| for (int dpColIdx = 0; dpColIdx < rowFields.size() - rowFieldsOffset; ++dpColIdx) { |
| |
| // create ExprNodeDesc |
| ColumnInfo inputColumn = rowFields.get(dpColIdx + rowFieldsOffset); |
| TypeInfo inputTypeInfo = inputColumn.getType(); |
| ExprNodeDesc column = |
| new ExprNodeColumnDesc(inputTypeInfo, inputColumn.getInternalName(), "", true); |
| |
| // Cast input column to destination column type if necessary. |
| if (conf.getBoolVar(DYNAMICPARTITIONCONVERT)) { |
| if (parts != null && !parts.isEmpty()) { |
| String destPartitionName = dpCtx.getDPColNames().get(dpColIdx); |
| FieldSchema destPartitionFieldSchema = parts.stream() |
| .filter(dynamicPartition -> dynamicPartition.getName().equals(destPartitionName)) |
| .findFirst().orElse(null); |
| if (destPartitionFieldSchema == null) { |
| throw new IllegalStateException("Partition schema for dynamic partition " + |
| destPartitionName + " not found in DynamicPartitionCtx."); |
| } |
| String partitionType = destPartitionFieldSchema.getType(); |
| if (partitionType == null) { |
| throw new IllegalStateException("Couldn't get FieldSchema for partition" + |
| destPartitionFieldSchema.getName()); |
| } |
| PrimitiveTypeInfo partitionTypeInfo = |
| TypeInfoFactory.getPrimitiveTypeInfo(partitionType); |
| if (!partitionTypeInfo.equals(inputTypeInfo)) { |
| column = ExprNodeTypeCheck.getExprNodeDefaultExprProcessor() |
| .createConversionCast(column, partitionTypeInfo); |
| converted.set(true); |
| } |
| } else { |
| LOG.warn("Partition schema for dynamic partition " + inputColumn.getAlias() + " (" |
| + inputColumn.getInternalName() + ") not found in DynamicPartitionCtx. " |
| + "This is expected with a CTAS."); |
| } |
| } |
| expressions.add(column); |
| } |
| } |
| } |
| |
| if (converted.get()) { |
| // add the select operator |
| RowResolver rowResolver = new RowResolver(); |
| List<String> colNames = new ArrayList<String>(); |
| Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>(); |
| for (int i = 0; i < expressions.size(); i++) { |
| String name = getColumnInternalName(i); |
| rowResolver.put("", name, new ColumnInfo(name, expressions.get(i) |
| .getTypeInfo(), "", false)); |
| colNames.add(name); |
| colExprMap.put(name, expressions.get(i)); |
| } |
| input = putOpInsertMap(OperatorFactory.getAndMakeChild( |
| new SelectDesc(expressions, colNames), new RowSchema(rowResolver |
| .getColumnInfos()), input), rowResolver); |
| input.setColumnExprMap(colExprMap); |
| } |
| return input; |
| } |
| |
| /** |
| * Creates an expression for converting from a table column to a row column. For example: |
| * The table column is int but the query provides a string in the row, then we need to cast automatically. |
| * @param tableField The target table column |
| * @param rowField The source row column |
| * @param conversion The value of this boolean is set to true if we detect that a conversion is needed. This is a |
| * hidden return value hidden here, to notify the caller that a cast was needed. |
| * @param dest The destination table for the error message |
| * @param columnNum The destination column id for the error message |
| * @return The Expression describing the selected column. Note that `conversion` can be considered as a return value |
| * as well |
| * @throws SemanticException If conversion were needed, but automatic conversion is not available |
| */ |
| private ExprNodeDesc handleConversion(StructField tableField, ColumnInfo rowField, AtomicBoolean conversion, String dest, int columnNum) |
| throws SemanticException { |
| ObjectInspector tableFieldOI = tableField |
| .getFieldObjectInspector(); |
| TypeInfo tableFieldTypeInfo = TypeInfoUtils |
| .getTypeInfoFromObjectInspector(tableFieldOI); |
| TypeInfo rowFieldTypeInfo = rowField.getType(); |
| ExprNodeDesc column = new ExprNodeColumnDesc(rowFieldTypeInfo, |
| rowField.getInternalName(), "", false, |
| rowField.isSkewedCol()); |
| // LazySimpleSerDe can convert any types to String type using |
| // JSON-format. However, we may add more operators. |
| // Thus, we still keep the conversion. |
| if (!tableFieldTypeInfo.equals(rowFieldTypeInfo)) { |
| // need to do some conversions here |
| conversion.set(true); |
| if (tableFieldTypeInfo.getCategory() != Category.PRIMITIVE) { |
| // cannot convert to complex types |
| column = null; |
| } else { |
| column = ExprNodeTypeCheck.getExprNodeDefaultExprProcessor() |
| .createConversionCast(column, (PrimitiveTypeInfo)tableFieldTypeInfo); |
| } |
| if (column == null) { |
| String reason = "Cannot convert column " + columnNum + " from " |
| + rowFieldTypeInfo + " to " + tableFieldTypeInfo + "."; |
| throw new SemanticException(ASTErrorUtils.getMsg( |
| ErrorMsg.TARGET_TABLE_COLUMN_MISMATCH.getMsg(), |
| qb.getParseInfo().getDestForClause(dest), reason)); |
| } |
| } |
| return column; |
| } |
| |
| @SuppressWarnings("nls") |
| private Operator genLimitPlan(String dest, Operator input, int offset, int limit) { |
| // A map-only job can be optimized - instead of converting it to a |
| // map-reduce job, we can have another map |
| // job to do the same to avoid the cost of sorting in the map-reduce phase. |
| // A better approach would be to |
| // write into a local file and then have a map-only job. |
| // Add the limit operator to get the value fields |
| |
| RowResolver inputRR = opParseCtx.get(input).getRowResolver(); |
| |
| LimitDesc limitDesc = new LimitDesc(offset, limit); |
| globalLimitCtx.setLastReduceLimitDesc(limitDesc); |
| |
| Operator limitMap = putOpInsertMap(OperatorFactory.getAndMakeChild( |
| limitDesc, new RowSchema(inputRR.getColumnInfos()), input), |
| inputRR); |
| |
| LOG.debug("Created LimitOperator Plan for clause: {} row schema: {}", dest, inputRR); |
| |
| return limitMap; |
| } |
| |
| private Operator genUDTFPlan(GenericUDTF genericUDTF, String outputTableAlias, List<String> colAliases, QB qb, |
| Operator input, boolean outerLV) throws SemanticException { |
| |
| // No GROUP BY / DISTRIBUTE BY / SORT BY / CLUSTER BY |
| QBParseInfo qbp = qb.getParseInfo(); |
| if (!qbp.getDestToGroupBy().isEmpty()) { |
| throw new SemanticException(ErrorMsg.UDTF_NO_GROUP_BY.getMsg()); |
| } |
| if (!qbp.getDestToDistributeBy().isEmpty()) { |
| throw new SemanticException(ErrorMsg.UDTF_NO_DISTRIBUTE_BY.getMsg()); |
| } |
| if (!qbp.getDestToSortBy().isEmpty()) { |
| throw new SemanticException(ErrorMsg.UDTF_NO_SORT_BY.getMsg()); |
| } |
| if (!qbp.getDestToClusterBy().isEmpty()) { |
| throw new SemanticException(ErrorMsg.UDTF_NO_CLUSTER_BY.getMsg()); |
| } |
| if (!qbp.getAliasToLateralViews().isEmpty()) { |
| throw new SemanticException(ErrorMsg.UDTF_LATERAL_VIEW.getMsg()); |
| } |
| |
| LOG.debug("Table alias: {} Col aliases: {}", outputTableAlias, colAliases); |
| |
| // Use the RowResolver from the input operator to generate a input |
| // ObjectInspector that can be used to initialize the UDTF. Then, the |
| |
| // resulting output object inspector can be used to make the RowResolver |
| // for the UDTF operator |
| RowResolver selectRR = opParseCtx.get(input).getRowResolver(); |
| List<ColumnInfo> inputCols = selectRR.getColumnInfos(); |
| |
| // Create the object inspector for the input columns and initialize the UDTF |
| List<String> colNames = new ArrayList<String>(); |
| ObjectInspector[] colOIs = new ObjectInspector[inputCols.size()]; |
| for (int i = 0; i < inputCols.size(); i++) { |
| colNames.add(inputCols.get(i).getInternalName()); |
| colOIs[i] = inputCols.get(i).getObjectInspector(); |
| } |
| StandardStructObjectInspector rowOI = |
| ObjectInspectorFactory.getStandardStructObjectInspector(colNames, Arrays.asList(colOIs)); |
| StructObjectInspector outputOI = genericUDTF.initialize(rowOI); |
| |
| int numUdtfCols = outputOI.getAllStructFieldRefs().size(); |
| if (colAliases.isEmpty()) { |
| // user did not specfied alias names, infer names from outputOI |
| for (StructField field : outputOI.getAllStructFieldRefs()) { |
| colAliases.add(field.getFieldName()); |
| } |
| } |
| // Make sure that the number of column aliases in the AS clause matches |
| // the number of columns output by the UDTF |
| int numSuppliedAliases = colAliases.size(); |
| if (numUdtfCols != numSuppliedAliases) { |
| throw new SemanticException(ErrorMsg.UDTF_ALIAS_MISMATCH |
| .getMsg("expected " + numUdtfCols + " aliases " + "but got " |
| + numSuppliedAliases)); |
| } |
| |
| // Generate the output column info's / row resolver using internal names. |
| List<ColumnInfo> udtfCols = new ArrayList<ColumnInfo>(); |
| |
| Iterator<String> colAliasesIter = colAliases.iterator(); |
| for (StructField sf : outputOI.getAllStructFieldRefs()) { |
| |
| String colAlias = colAliasesIter.next(); |
| assert (colAlias != null); |
| |
| // Since the UDTF operator feeds into a LVJ operator that will rename |
| // all the internal names, we can just use field name from the UDTF's OI |
| // as the internal name |
| ColumnInfo col = new ColumnInfo(sf.getFieldName(), TypeInfoUtils |
| .getTypeInfoFromObjectInspector(sf.getFieldObjectInspector()), |
| outputTableAlias, false); |
| udtfCols.add(col); |
| } |
| |
| // Create the row resolver for this operator from the output columns |
| RowResolver out_rwsch = new RowResolver(); |
| for (int i = 0; i < udtfCols.size(); i++) { |
| out_rwsch.put(outputTableAlias, colAliases.get(i), udtfCols.get(i)); |
| } |
| |
| // Add the UDTFOperator to the operator DAG |
| return putOpInsertMap(OperatorFactory.getAndMakeChild( |
| new UDTFDesc(genericUDTF, outerLV), new RowSchema(out_rwsch.getColumnInfos()), |
| input), out_rwsch); |
| } |
| |
| @SuppressWarnings("nls") |
| private Operator genLimitMapRedPlan(String dest, QB qb, Operator input, |
| int offset, int limit, boolean extraMRStep) throws SemanticException { |
| // A map-only job can be optimized - instead of converting it to a |
| // map-reduce job, we can have another map |
| // job to do the same to avoid the cost of sorting in the map-reduce phase. |
| // A better approach would be to |
| // write into a local file and then have a map-only job. |
| // Add the limit operator to get the value fields |
| Operator curr = genLimitPlan(dest, input, offset, limit); |
| |
| // the client requested that an extra map-reduce step be performed |
| if (!extraMRStep || !HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_GROUPBY_LIMIT_EXTRASTEP)){ |
| return curr; |
| } |
| |
| // Create a reduceSink operator followed by another limit |
| curr = genReduceSinkPlan(dest, qb, curr, 1, false); |
| return genLimitPlan(dest, curr, offset, limit); |
| } |
| |
| private List<ExprNodeDesc> getPartitionColsFromBucketCols(String dest, QB qb, Table tab, TableDesc table_desc, |
| Operator input, boolean convert) |
| throws SemanticException { |
| List<String> tabBucketCols = tab.getBucketCols(); |
| List<FieldSchema> tabCols = tab.getCols(); |
| |
| // Partition by the bucketing column |
| List<Integer> posns = new ArrayList<Integer>(); |
| |
| for (String bucketCol : tabBucketCols) { |
| int pos = 0; |
| for (FieldSchema tabCol : tabCols) { |
| if (bucketCol.equals(tabCol.getName())) { |
| posns.add(pos); |
| break; |
| } |
| pos++; |
| } |
| } |
| |
| return genConvertCol(dest, qb, table_desc, input, posns, convert); |
| } |
| |
| // We have to set up the bucketing columns differently for update and deletes, |
| // as it is always using the ROW__ID column. |
| private List<ExprNodeDesc> getPartitionColsFromBucketColsForUpdateDelete( |
| Operator input, boolean convert) throws SemanticException { |
| //return genConvertCol(dest, qb, tab, table_desc, input, Arrays.asList(0), convert); |
| // In the case of update and delete the bucketing column is always the first column, |
| // and it isn't in the table info. So rather than asking the table for it, |
| // we'll construct it ourself and send it back. This is based on the work done in |
| // genConvertCol below. |
| ColumnInfo rowField = opParseCtx.get(input).getRowResolver().getColumnInfos().get(0); |
| TypeInfo rowFieldTypeInfo = rowField.getType(); |
| ExprNodeDesc column = new ExprNodeColumnDesc(rowFieldTypeInfo, rowField.getInternalName(), |
| rowField.getTabAlias(), true); |
| if (convert) { |
| column = ExprNodeTypeCheck.getExprNodeDefaultExprProcessor() |
| .createConversionCast(column, TypeInfoFactory.intTypeInfo); |
| } |
| return Collections.singletonList(column); |
| } |
| |
| private List<ExprNodeDesc> genConvertCol(String dest, QB qb, TableDesc tableDesc, Operator input, |
| List<Integer> posns, boolean convert) |
| throws SemanticException { |
| StructObjectInspector oi = null; |
| try { |
| AbstractSerDe deserializer = tableDesc.getSerDeClass() |
| .newInstance(); |
| deserializer.initialize(conf, tableDesc.getProperties(), null); |
| oi = (StructObjectInspector) deserializer.getObjectInspector(); |
| } catch (Exception e) { |
| throw new SemanticException(e); |
| } |
| |
| List<? extends StructField> tableFields = oi.getAllStructFieldRefs(); |
| List<ColumnInfo> rowFields = opParseCtx.get(input).getRowResolver().getColumnInfos(); |
| |
| // Check column type |
| int columnNumber = posns.size(); |
| List<ExprNodeDesc> expressions = new ArrayList<ExprNodeDesc>(columnNumber); |
| for (Integer posn : posns) { |
| ObjectInspector tableFieldOI = tableFields.get(posn).getFieldObjectInspector(); |
| TypeInfo tableFieldTypeInfo = TypeInfoUtils.getTypeInfoFromObjectInspector(tableFieldOI); |
| TypeInfo rowFieldTypeInfo = rowFields.get(posn).getType(); |
| ExprNodeDesc column = new ExprNodeColumnDesc(rowFieldTypeInfo, |
| rowFields.get(posn).getInternalName(), rowFields.get(posn).getTabAlias(), |
| rowFields.get(posn).getIsVirtualCol()); |
| |
| if (convert && !tableFieldTypeInfo.equals(rowFieldTypeInfo)) { |
| // need to do some conversions here |
| if (tableFieldTypeInfo.getCategory() != Category.PRIMITIVE) { |
| // cannot convert to complex types |
| column = null; |
| } else { |
| column = ExprNodeTypeCheck.getExprNodeDefaultExprProcessor() |
| .createConversionCast(column, (PrimitiveTypeInfo)tableFieldTypeInfo); |
| } |
| if (column == null) { |
| String reason = "Cannot convert column " + posn + " from " |
| + rowFieldTypeInfo + " to " + tableFieldTypeInfo + "."; |
| throw new SemanticException(ASTErrorUtils.getMsg( |
| ErrorMsg.TARGET_TABLE_COLUMN_MISMATCH.getMsg(), |
| qb.getParseInfo().getDestForClause(dest), reason)); |
| } |
| } |
| expressions.add(column); |
| } |
| |
| return expressions; |
| } |
| |
| private List<ExprNodeDesc> getSortCols(String dest, QB qb, Table tab, TableDesc tableDesc, Operator input) |
| throws SemanticException { |
| List<Order> tabSortCols = tab.getSortCols(); |
| List<FieldSchema> tabCols = tab.getCols(); |
| |
| // Partition by the bucketing column |
| List<Integer> posns = new ArrayList<Integer>(); |
| for (Order sortCol : tabSortCols) { |
| int pos = 0; |
| for (FieldSchema tabCol : tabCols) { |
| if (sortCol.getCol().equals(tabCol.getName())) { |
| posns.add(pos); |
| break; |
| } |
| pos++; |
| } |
| } |
| |
| return genConvertCol(dest, qb, tableDesc, input, posns, false); |
| } |
| |
| private void getSortOrders(Table tab, StringBuilder order, StringBuilder nullOrder) { |
| List<Order> tabSortCols = tab.getSortCols(); |
| List<FieldSchema> tabCols = tab.getCols(); |
| |
| for (Order sortCol : tabSortCols) { |
| for (FieldSchema tabCol : tabCols) { |
| if (sortCol.getCol().equals(tabCol.getName())) { |
| order.append(DirectionUtils.codeToSign(sortCol.getOrder())); |
| nullOrder.append(sortCol.getOrder() == DirectionUtils.ASCENDING_CODE ? 'a' : 'z'); |
| break; |
| } |
| } |
| } |
| } |
| |
| private Operator genReduceSinkPlan(String dest, QB qb, Operator<?> input, |
| int numReducers, boolean hasOrderBy) throws SemanticException { |
| |
| RowResolver inputRR = opParseCtx.get(input).getRowResolver(); |
| |
| // First generate the expression for the partition and sort keys |
| // The cluster by clause / distribute by clause has the aliases for |
| // partition function |
| ASTNode partitionExprs = qb.getParseInfo().getClusterByForClause(dest); |
| if (partitionExprs == null) { |
| partitionExprs = qb.getParseInfo().getDistributeByForClause(dest); |
| } |
| List<ExprNodeDesc> partCols = new ArrayList<ExprNodeDesc>(); |
| if (partitionExprs != null) { |
| int ccount = partitionExprs.getChildCount(); |
| for (int i = 0; i < ccount; ++i) { |
| ASTNode cl = (ASTNode) partitionExprs.getChild(i); |
| partCols.add(genExprNodeDesc(cl, inputRR)); |
| } |
| } |
| ASTNode sortExprs = qb.getParseInfo().getClusterByForClause(dest); |
| if (sortExprs == null) { |
| sortExprs = qb.getParseInfo().getSortByForClause(dest); |
| } |
| |
| if (sortExprs == null) { |
| sortExprs = qb.getParseInfo().getOrderByForClause(dest); |
| if (sortExprs != null) { |
| assert numReducers == 1; |
| // in strict mode, in the presence of order by, limit must be specified |
| if (qb.getParseInfo().getDestLimit(dest) == null) { |
| String error = StrictChecks.checkNoLimit(conf); |
| if (error != null) { |
| throw new SemanticException(generateErrorMessage(sortExprs, error)); |
| } |
| } |
| } |
| } |
| List<ExprNodeDesc> sortCols = new ArrayList<ExprNodeDesc>(); |
| StringBuilder order = new StringBuilder(); |
| StringBuilder nullOrder = new StringBuilder(); |
| if (sortExprs != null) { |
| int ccount = sortExprs.getChildCount(); |
| for (int i = 0; i < ccount; ++i) { |
| ASTNode cl = (ASTNode) sortExprs.getChild(i); |
| |
| if (cl.getType() == HiveParser.TOK_TABSORTCOLNAMEASC) { |
| // SortBy ASC |
| order.append("+"); |
| cl = (ASTNode) cl.getChild(0); |
| if (cl.getType() == HiveParser.TOK_NULLS_FIRST) { |
| nullOrder.append("a"); |
| } else if (cl.getType() == HiveParser.TOK_NULLS_LAST) { |
| nullOrder.append("z"); |
| } else { |
| throw new SemanticException( |
| "Unexpected null ordering option: " + cl.getType()); |
| } |
| cl = (ASTNode) cl.getChild(0); |
| } else if (cl.getType() == HiveParser.TOK_TABSORTCOLNAMEDESC) { |
| // SortBy DESC |
| order.append("-"); |
| cl = (ASTNode) cl.getChild(0); |
| if (cl.getType() == HiveParser.TOK_NULLS_FIRST) { |
| nullOrder.append("a"); |
| } else if (cl.getType() == HiveParser.TOK_NULLS_LAST) { |
| nullOrder.append("z"); |
| } else { |
| throw new SemanticException( |
| "Unexpected null ordering option: " + cl.getType()); |
| } |
| cl = (ASTNode) cl.getChild(0); |
| } else { |
| // ClusterBy |
| order.append("+"); |
| nullOrder.append("a"); |
| } |
| ExprNodeDesc exprNode = genExprNodeDesc(cl, inputRR); |
| sortCols.add(exprNode); |
| } |
| } |
| |
| Table dest_tab = qb.getMetaData().getDestTableForAlias(dest); |
| AcidUtils.Operation acidOp = Operation.NOT_ACID; |
| if (AcidUtils.isTransactionalTable(dest_tab)) { |
| acidOp = getAcidType(Utilities.getTableDesc(dest_tab).getOutputFileFormatClass(), dest, |
| AcidUtils.isInsertOnlyTable(dest_tab)); |
| } |
| boolean isCompaction = false; |
| if (dest_tab != null && dest_tab.getParameters() != null) { |
| isCompaction = AcidUtils.isCompactionTable(dest_tab.getParameters()); |
| } |
| Operator result = genReduceSinkPlan( |
| input, partCols, sortCols, order.toString(), nullOrder.toString(), |
| numReducers, acidOp, true, isCompaction); |
| if (result.getParentOperators().size() == 1 && |
| result.getParentOperators().get(0) instanceof ReduceSinkOperator) { |
| ((ReduceSinkOperator) result.getParentOperators().get(0)) |
| .getConf().setHasOrderBy(hasOrderBy); |
| } |
| return result; |
| } |
| |
| private Operator genReduceSinkPlan(Operator<?> input, |
| List<ExprNodeDesc> partitionCols, List<ExprNodeDesc> sortCols, |
| String sortOrder, String nullOrder, int numReducers, AcidUtils.Operation acidOp, boolean isCompaction) |
| throws SemanticException { |
| return genReduceSinkPlan(input, partitionCols, sortCols, sortOrder, nullOrder, numReducers, |
| acidOp, false, isCompaction); |
| } |
| |
| @SuppressWarnings("nls") |
| private Operator genReduceSinkPlan(Operator<?> input, List<ExprNodeDesc> partitionCols, List<ExprNodeDesc> sortCols, |
| String sortOrder, String nullOrder, int numReducers, AcidUtils.Operation acidOp, |
| boolean pullConstants, boolean isCompaction) throws SemanticException { |
| |
| RowResolver inputRR = opParseCtx.get(input).getRowResolver(); |
| |
| Operator dummy = Operator.createDummy(); |
| dummy.setParentOperators(Arrays.asList(input)); |
| |
| List<ExprNodeDesc> newSortCols = new ArrayList<ExprNodeDesc>(); |
| StringBuilder newSortOrder = new StringBuilder(); |
| StringBuilder newNullOrder = new StringBuilder(); |
| List<ExprNodeDesc> sortColsBack = new ArrayList<ExprNodeDesc>(); |
| for (int i = 0; i < sortCols.size(); i++) { |
| ExprNodeDesc sortCol = sortCols.get(i); |
| // If we are not pulling constants, OR |
| // we are pulling constants but this is not a constant |
| if (!pullConstants || !(sortCol instanceof ExprNodeConstantDesc)) { |
| newSortCols.add(sortCol); |
| newSortOrder.append(sortOrder.charAt(i)); |
| newNullOrder.append(nullOrder.charAt(i)); |
| sortColsBack.add(ExprNodeDescUtils.backtrack(sortCol, dummy, input)); |
| } |
| } |
| |
| // For the generation of the values expression just get the inputs |
| // signature and generate field expressions for those |
| RowResolver rsRR = new RowResolver(); |
| List<String> outputColumns = new ArrayList<String>(); |
| List<ExprNodeDesc> valueCols = new ArrayList<ExprNodeDesc>(); |
| List<ExprNodeDesc> valueColsBack = new ArrayList<ExprNodeDesc>(); |
| Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>(); |
| List<ExprNodeDesc> constantCols = new ArrayList<ExprNodeDesc>(); |
| |
| List<ColumnInfo> columnInfos = inputRR.getColumnInfos(); |
| |
| int[] index = new int[columnInfos.size()]; |
| for (int i = 0; i < index.length; i++) { |
| ColumnInfo colInfo = columnInfos.get(i); |
| String[] nm = inputRR.reverseLookup(colInfo.getInternalName()); |
| String[] nm2 = inputRR.getAlternateMappings(colInfo.getInternalName()); |
| ExprNodeColumnDesc value = new ExprNodeColumnDesc(colInfo); |
| |
| // backtrack can be null when input is script operator |
| ExprNodeDesc valueBack = ExprNodeDescUtils.backtrack(value, dummy, input); |
| if (pullConstants && valueBack instanceof ExprNodeConstantDesc) { |
| // ignore, it will be generated by SEL op |
| index[i] = Integer.MAX_VALUE; |
| constantCols.add(valueBack); |
| continue; |
| } |
| int kindex = valueBack == null ? -1 : ExprNodeDescUtils.indexOf(valueBack, sortColsBack); |
| if (kindex >= 0) { |
| index[i] = kindex; |
| ColumnInfo newColInfo = new ColumnInfo(colInfo); |
| newColInfo.setInternalName(Utilities.ReduceField.KEY + ".reducesinkkey" + kindex); |
| newColInfo.setTabAlias(nm[0]); |
| rsRR.put(nm[0], nm[1], newColInfo); |
| if (nm2 != null) { |
| rsRR.addMappingOnly(nm2[0], nm2[1], newColInfo); |
| } |
| continue; |
| } |
| int vindex = valueBack == null ? -1 : ExprNodeDescUtils.indexOf(valueBack, valueColsBack); |
| if (vindex >= 0) { |
| index[i] = -vindex - 1; |
| continue; |
| } |
| index[i] = -valueCols.size() - 1; |
| String outputColName = getColumnInternalName(valueCols.size()); |
| |
| valueCols.add(value); |
| valueColsBack.add(valueBack); |
| |
| ColumnInfo newColInfo = new ColumnInfo(colInfo); |
| newColInfo.setInternalName(Utilities.ReduceField.VALUE + "." + outputColName); |
| newColInfo.setTabAlias(nm[0]); |
| |
| rsRR.put(nm[0], nm[1], newColInfo); |
| if (nm2 != null) { |
| rsRR.addMappingOnly(nm2[0], nm2[1], newColInfo); |
| } |
| outputColumns.add(outputColName); |
| } |
| |
| dummy.setParentOperators(null); |
| |
| ReduceSinkDesc rsdesc = PlanUtils.getReduceSinkDesc(newSortCols, valueCols, outputColumns, |
| false, -1, partitionCols, newSortOrder.toString(), newNullOrder.toString(), defaultNullOrder, |
| numReducers, acidOp, isCompaction); |
| Operator interim = putOpInsertMap(OperatorFactory.getAndMakeChild(rsdesc, |
| new RowSchema(rsRR.getColumnInfos()), input), rsRR); |
| |
| List<String> keyColNames = rsdesc.getOutputKeyColumnNames(); |
| for (int i = 0 ; i < keyColNames.size(); i++) { |
| colExprMap.put(Utilities.ReduceField.KEY + "." + keyColNames.get(i), newSortCols.get(i)); |
| } |
| List<String> valueColNames = rsdesc.getOutputValueColumnNames(); |
| for (int i = 0 ; i < valueColNames.size(); i++) { |
| colExprMap.put(Utilities.ReduceField.VALUE + "." + valueColNames.get(i), valueCols.get(i)); |
| } |
| interim.setColumnExprMap(colExprMap); |
| |
| RowResolver selectRR = new RowResolver(); |
| List<ExprNodeDesc> selCols = new ArrayList<ExprNodeDesc>(); |
| List<String> selOutputCols = new ArrayList<String>(); |
| Map<String, ExprNodeDesc> selColExprMap = new HashMap<String, ExprNodeDesc>(); |
| |
| Iterator<ExprNodeDesc> constants = constantCols.iterator(); |
| for (int i = 0; i < index.length; i++) { |
| ColumnInfo prev = columnInfos.get(i); |
| String[] nm = inputRR.reverseLookup(prev.getInternalName()); |
| String[] nm2 = inputRR.getAlternateMappings(prev.getInternalName()); |
| ColumnInfo info = new ColumnInfo(prev); |
| |
| ExprNodeDesc desc; |
| if (index[i] == Integer.MAX_VALUE) { |
| desc = constants.next(); |
| } else { |
| String field; |
| if (index[i] >= 0) { |
| field = Utilities.ReduceField.KEY + "." + keyColNames.get(index[i]); |
| } else { |
| field = Utilities.ReduceField.VALUE + "." + valueColNames.get(-index[i] - 1); |
| } |
| desc = new ExprNodeColumnDesc(info.getType(), |
| field, info.getTabAlias(), info.getIsVirtualCol()); |
| } |
| selCols.add(desc); |
| |
| String internalName = getColumnInternalName(i); |
| info.setInternalName(internalName); |
| selectRR.put(nm[0], nm[1], info); |
| if (nm2 != null) { |
| selectRR.addMappingOnly(nm2[0], nm2[1], info); |
| } |
| selOutputCols.add(internalName); |
| selColExprMap.put(internalName, desc); |
| } |
| SelectDesc select = new SelectDesc(selCols, selOutputCols); |
| Operator output = putOpInsertMap(OperatorFactory.getAndMakeChild(select, |
| new RowSchema(selectRR.getColumnInfos()), interim), selectRR); |
| output.setColumnExprMap(selColExprMap); |
| return output; |
| } |
| |
| private Operator genJoinOperatorChildren(QBJoinTree join, Operator left, |
| Operator[] right, Set<Integer> omitOpts, ExprNodeDesc[][] joinKeys) throws SemanticException { |
| |
| RowResolver outputRR = new RowResolver(); |
| List<String> outputColumnNames = new ArrayList<String>(); |
| // all children are base classes |
| Operator<?>[] rightOps = new Operator[right.length]; |
| |
| Map<String, Byte> reversedExprs = new HashMap<String, Byte>(); |
| Map<Byte, List<ExprNodeDesc>> exprMap = new HashMap<Byte, List<ExprNodeDesc>>(); |
| Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>(); |
| Map<Integer, Set<String>> posToAliasMap = new HashMap<Integer, Set<String>>(); |
| Map<Byte, List<ExprNodeDesc>> filterMap = new HashMap<Byte, List<ExprNodeDesc>>(); |
| |
| // Only used for semijoin with residual predicates |
| List<ColumnInfo> topSelectInputColumns = new ArrayList<>(); |
| |
| for (int pos = 0; pos < right.length; ++pos) { |
| Operator<?> input = right[pos] == null ? left : right[pos]; |
| if (input == null) { |
| input = left; |
| } |
| ReduceSinkOperator rs = (ReduceSinkOperator) input; |
| if (rs.getNumParent() != 1) { |
| throw new SemanticException("RS should have single parent"); |
| } |
| Operator<?> parent = rs.getParentOperators().get(0); |
| ReduceSinkDesc rsDesc = (ReduceSinkDesc) (input.getConf()); |
| |
| int[] index = rs.getValueIndex(); |
| |
| List<ExprNodeDesc> valueDesc = new ArrayList<ExprNodeDesc>(); |
| List<ExprNodeDesc> filterDesc = new ArrayList<ExprNodeDesc>(); |
| Byte tag = (byte) rsDesc.getTag(); |
| |
| // check whether this input operator produces output |
| // If it has residual, we do not skip this output, |
| // we will add a Select on top of the join |
| if (omitOpts != null && omitOpts.contains(pos) |
| && join.getPostJoinFilters().size() == 0) { |
| exprMap.put(tag, valueDesc); |
| filterMap.put(tag, filterDesc); |
| rightOps[pos] = input; |
| continue; |
| } |
| |
| List<String> keyColNames = rsDesc.getOutputKeyColumnNames(); |
| List<String> valColNames = rsDesc.getOutputValueColumnNames(); |
| |
| // prepare output descriptors for the input opt |
| RowResolver inputRR = opParseCtx.get(input).getRowResolver(); |
| RowResolver parentRR = opParseCtx.get(parent).getRowResolver(); |
| posToAliasMap.put(pos, new HashSet<String>(inputRR.getTableNames())); |
| |
| List<ColumnInfo> columns = parentRR.getColumnInfos(); |
| for (int i = 0; i < index.length; i++) { |
| ColumnInfo prev = columns.get(i); |
| String[] nm = parentRR.reverseLookup(prev.getInternalName()); |
| String[] nm2 = parentRR.getAlternateMappings(prev.getInternalName()); |
| if (outputRR.get(nm[0], nm[1]) != null) { |
| continue; |
| } |
| ColumnInfo info = new ColumnInfo(prev); |
| String field; |
| if (index[i] >= 0) { |
| field = Utilities.ReduceField.KEY + "." + keyColNames.get(index[i]); |
| } else { |
| field = Utilities.ReduceField.VALUE + "." + valColNames.get(-index[i] - 1); |
| } |
| String internalName = getColumnInternalName(outputColumnNames.size()); |
| ExprNodeColumnDesc desc = new ExprNodeColumnDesc(info.getType(), |
| field, info.getTabAlias(), info.getIsVirtualCol()); |
| |
| info.setInternalName(internalName); |
| colExprMap.put(internalName, desc); |
| outputRR.put(nm[0], nm[1], info); |
| if (nm2 != null) { |
| outputRR.addMappingOnly(nm2[0], nm2[1], info); |
| } |
| |
| valueDesc.add(desc); |
| outputColumnNames.add(internalName); |
| reversedExprs.put(internalName, tag); |
| |
| // Populate semijoin select if needed |
| if (omitOpts == null || !omitOpts.contains(pos)) { |
| topSelectInputColumns.add(info); |
| } |
| } |
| for (ASTNode cond : join.getFilters().get(tag)) { |
| filterDesc.add(genExprNodeDesc(cond, inputRR)); |
| } |
| exprMap.put(tag, valueDesc); |
| filterMap.put(tag, filterDesc); |
| rightOps[pos] = input; |
| } |
| |
| JoinCondDesc[] joinCondns = new JoinCondDesc[join.getJoinCond().length]; |
| for (int i = 0; i < join.getJoinCond().length; i++) { |
| JoinCond condn = join.getJoinCond()[i]; |
| joinCondns[i] = new JoinCondDesc(condn); |
| } |
| |
| JoinDesc desc = new JoinDesc(exprMap, outputColumnNames, |
| join.getNoOuterJoin(), joinCondns, filterMap, joinKeys, null); |
| desc.setReversedExprs(reversedExprs); |
| desc.setFilterMap(join.getFilterMap()); |
| // Add filters that apply to more than one input |
| if (join.getPostJoinFilters().size() != 0 && |
| (!join.getNoOuterJoin() || !join.getNoSemiJoin() |
| || HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_PUSH_RESIDUAL_INNER))) { |
| LOG.debug("Generate JOIN with post-filtering conditions"); |
| List<ExprNodeDesc> residualFilterExprs = new ArrayList<ExprNodeDesc>(); |
| for (ASTNode cond : join.getPostJoinFilters()) { |
| residualFilterExprs.add(genExprNodeDesc(cond, outputRR, false, isCBOExecuted())); |
| } |
| desc.setResidualFilterExprs(residualFilterExprs); |
| // Clean post-conditions |
| join.getPostJoinFilters().clear(); |
| } |
| |
| JoinOperator joinOp = (JoinOperator) OperatorFactory.getAndMakeChild(getOpContext(), desc, |
| new RowSchema(outputRR.getColumnInfos()), rightOps); |
| joinOp.setColumnExprMap(colExprMap); |
| joinOp.setPosToAliasMap(posToAliasMap); |
| |
| if (join.getNullSafes() != null) { |
| boolean[] nullsafes = new boolean[join.getNullSafes().size()]; |
| for (int i = 0; i < nullsafes.length; i++) { |
| nullsafes[i] = join.getNullSafes().get(i); |
| } |
| desc.setNullSafes(nullsafes); |
| } |
| |
| Operator<?> topOp = putOpInsertMap(joinOp, outputRR); |
| if (omitOpts != null && !omitOpts.isEmpty() |
| && desc.getResidualFilterExprs() != null && !desc.getResidualFilterExprs().isEmpty()) { |
| // Adding a select operator to top of semijoin to ensure projection of only correct columns |
| final List<ExprNodeDesc> topSelectExprs = new ArrayList<>(); |
| final List<String> topSelectOutputColNames = new ArrayList<>(); |
| final RowResolver topSelectRR = new RowResolver(); |
| final Map<String, ExprNodeDesc> topSelectColExprMap = new HashMap<String, ExprNodeDesc>(); |
| for (ColumnInfo colInfo : topSelectInputColumns) { |
| ExprNodeColumnDesc columnExpr = new ExprNodeColumnDesc(colInfo); |
| topSelectExprs.add(columnExpr); |
| topSelectOutputColNames.add(colInfo.getInternalName()); |
| topSelectColExprMap.put(colInfo.getInternalName(), columnExpr); |
| String[] nm = outputRR.reverseLookup(columnExpr.getColumn()); |
| String[] nm2 = outputRR.getAlternateMappings(columnExpr.getColumn()); |
| topSelectRR.put(nm[0], nm[1], colInfo); |
| if (nm2 != null) { |
| topSelectRR.addMappingOnly(nm2[0], nm2[1], colInfo); |
| } |
| } |
| final SelectDesc topSelect = new SelectDesc(topSelectExprs, topSelectOutputColNames); |
| topOp = putOpInsertMap(OperatorFactory.getAndMakeChild(topSelect, |
| new RowSchema(topSelectRR.getColumnInfos()), topOp), topSelectRR); |
| topOp.setColumnExprMap(topSelectColExprMap); |
| } |
| |
| return topOp; |
| } |
| |
| private ExprNodeDesc[][] genJoinKeys(QBJoinTree joinTree, Operator[] inputs) |
| throws SemanticException { |
| ExprNodeDesc[][] joinKeys = new ExprNodeDesc[inputs.length][]; |
| for (int i = 0; i < inputs.length; i++) { |
| RowResolver inputRR = opParseCtx.get(inputs[i]).getRowResolver(); |
| List<ASTNode> expressions = joinTree.getExpressions().get(i); |
| joinKeys[i] = new ExprNodeDesc[expressions.size()]; |
| for (int j = 0; j < joinKeys[i].length; j++) { |
| joinKeys[i][j] = genExprNodeDesc(expressions.get(j), inputRR, true, isCBOExecuted()); |
| } |
| } |
| // Type checking and implicit type conversion for join keys |
| return genJoinOperatorTypeCheck(joinKeys); |
| } |
| |
| @SuppressWarnings("nls") |
| private Operator genJoinReduceSinkChild(ExprNodeDesc[] joinKeys, |
| Operator<?> parent, String[] srcs, int tag) throws SemanticException { |
| |
| Operator dummy = Operator.createDummy(); // dummy for backtracking |
| dummy.setParentOperators(Arrays.asList(parent)); |
| |
| RowResolver inputRR = opParseCtx.get(parent).getRowResolver(); |
| RowResolver outputRR = new RowResolver(); |
| List<String> outputColumns = new ArrayList<String>(); |
| List<ExprNodeDesc> reduceKeys = new ArrayList<ExprNodeDesc>(); |
| List<ExprNodeDesc> reduceKeysBack = new ArrayList<ExprNodeDesc>(); |
| |
| // Compute join keys and store in reduceKeys |
| for (ExprNodeDesc joinKey : joinKeys) { |
| reduceKeys.add(joinKey); |
| reduceKeysBack.add(ExprNodeDescUtils.backtrack(joinKey, dummy, parent)); |
| } |
| |
| // Walk over the input row resolver and copy in the output |
| ArrayList<ExprNodeDesc> reduceValues = new ArrayList<ExprNodeDesc>(); |
| |
| List<ColumnInfo> columns = inputRR.getColumnInfos(); |
| int[] index = new int[columns.size()]; |
| for (int i = 0; i < columns.size(); i++) { |
| ColumnInfo colInfo = columns.get(i); |
| String[] nm = inputRR.reverseLookup(colInfo.getInternalName()); |
| String[] nm2 = inputRR.getAlternateMappings(colInfo.getInternalName()); |
| ExprNodeDesc expr = new ExprNodeColumnDesc(colInfo); |
| |
| // backtrack can be null when input is script operator |
| ExprNodeDesc exprBack = ExprNodeDescUtils.backtrack(expr, dummy, parent); |
| if (exprBack != null) { |
| if (ExprNodeDescUtils.isConstant(exprBack)) { |
| int kindex = reduceKeysBack.indexOf(exprBack); |
| if (kindex >= 0) { |
| addJoinKeyToRowSchema(outputRR, index, i, colInfo, nm, nm2, kindex); |
| continue; |
| } |
| } else { |
| int startIdx = 0; |
| int kindex; |
| // joinKey may present multiple times, add the duplicates to the schema with different internal name. |
| // example: KEY.reducesinkkey0, KEY.reducesinkkey1 |
| // join LU_CUSTOMER a16 |
| // on (a15.CUSTOMER_ID = a16.CUSTOMER_ID and pa11.CUSTOMER_ID = a16.CUSTOMER_ID) |
| while ((kindex = ExprNodeDescUtils.indexOf(exprBack, reduceKeysBack, startIdx)) >= 0) { |
| addJoinKeyToRowSchema(outputRR, index, i, colInfo, nm, nm2, kindex); |
| startIdx = kindex + 1; |
| } |
| if (startIdx > 0) { |
| // at least one instance found |
| continue; |
| } |
| } |
| } |
| index[i] = -reduceValues.size() - 1; |
| String outputColName = getColumnInternalName(reduceValues.size()); |
| |
| reduceValues.add(expr); |
| |
| ColumnInfo newColInfo = new ColumnInfo(colInfo); |
| String internalColName = Utilities.ReduceField.VALUE + "." + outputColName; |
| newColInfo.setInternalName(internalColName); |
| newColInfo.setTabAlias(nm[0]); |
| |
| outputRR.put(nm[0], nm[1], newColInfo); |
| if (nm2 != null) { |
| outputRR.addMappingOnly(nm2[0], nm2[1], newColInfo); |
| } |
| outputColumns.add(outputColName); |
| } |
| dummy.setParentOperators(null); |
| |
| int numReds = -1; |
| |
| // Use only 1 reducer in case of cartesian product |
| if (reduceKeys.size() == 0) { |
| numReds = 1; |
| String error = StrictChecks.checkCartesian(conf); |
| if (error != null) { |
| throw new SemanticException(error); |
| } |
| } |
| |
| ReduceSinkDesc rsDesc = PlanUtils.getReduceSinkDesc(reduceKeys, |
| reduceValues, outputColumns, false, tag, |
| reduceKeys.size(), numReds, AcidUtils.Operation.NOT_ACID, defaultNullOrder); |
| |
| Map<String, String> translatorMap = new HashMap<String, String>(); |
| |
| Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>(); |
| |
| List<String> keyColNames = rsDesc.getOutputKeyColumnNames(); |
| for (int i = 0 ; i < keyColNames.size(); i++) { |
| String oldName = keyColNames.get(i); |
| String newName = Utilities.ReduceField.KEY + "." + oldName; |
| colExprMap.put(newName, reduceKeys.get(i)); |
| translatorMap.put(oldName, newName); |
| } |
| List<String> valColNames = rsDesc.getOutputValueColumnNames(); |
| for (int i = 0 ; i < valColNames.size(); i++) { |
| String oldName = valColNames.get(i); |
| String newName = Utilities.ReduceField.VALUE + "." + oldName; |
| colExprMap.put(newName, reduceValues.get(i)); |
| translatorMap.put(oldName, newName); |
| } |
| |
| RowSchema defaultRs = new RowSchema(outputRR.getColumnInfos()); |
| |
| List<ColumnInfo> newColumnInfos = new ArrayList<ColumnInfo>(); |
| for (ColumnInfo ci : outputRR.getColumnInfos()) { |
| if (translatorMap.containsKey(ci.getInternalName())) { |
| ci = new ColumnInfo(ci); |
| ci.setInternalName(translatorMap.get(ci.getInternalName())); |
| } |
| newColumnInfos.add(ci); |
| } |
| |
| ReduceSinkOperator rsOp = (ReduceSinkOperator) putOpInsertMap( |
| OperatorFactory.getAndMakeChild(rsDesc, new RowSchema(newColumnInfos), parent), outputRR); |
| |
| rsOp.setValueIndex(index); |
| rsOp.setColumnExprMap(colExprMap); |
| rsOp.setInputAliases(srcs); |
| return rsOp; |
| } |
| |
| private void addJoinKeyToRowSchema( |
| RowResolver outputRR, int[] index, int i, ColumnInfo colInfo, String[] nm, String[] nm2, int kindex) { |
| ColumnInfo newColInfo = new ColumnInfo(colInfo); |
| String internalColName = ReduceField.KEY + ".reducesinkkey" + kindex; |
| newColInfo.setInternalName(internalColName); |
| newColInfo.setTabAlias(nm[0]); |
| outputRR.put(nm[0], nm[1], newColInfo); |
| if (nm2 != null) { |
| outputRR.addMappingOnly(nm2[0], nm2[1], newColInfo); |
| } |
| index[i] = kindex; |
| } |
| |
| private Operator genJoinOperator(QB qb, QBJoinTree joinTree, |
| Map<String, Operator> map, |
| Operator joiningOp) throws SemanticException { |
| QBJoinTree leftChild = joinTree.getJoinSrc(); |
| Operator joinSrcOp = joiningOp instanceof JoinOperator ? joiningOp : null; |
| |
| if (joinSrcOp == null && leftChild != null) { |
| joinSrcOp = genJoinOperator(qb, leftChild, map, null); |
| } |
| |
| if ( joinSrcOp != null ) { |
| List<ASTNode> filter = joinTree.getFiltersForPushing().get(0); |
| for (ASTNode cond : filter) { |
| joinSrcOp = genFilterPlan(qb, cond, joinSrcOp, false); |
| } |
| } |
| |
| String[] baseSrc = joinTree.getBaseSrc(); |
| Operator[] srcOps = new Operator[baseSrc.length]; |
| |
| Set<Integer> omitOpts = null; // set of input to the join that should be |
| // omitted by the output |
| int pos = 0; |
| for (String src : baseSrc) { |
| if (src != null) { |
| Operator srcOp = map.get(src.toLowerCase()); |
| |
| // for left-semi join, generate an additional selection & group-by |
| // operator before ReduceSink |
| List<ASTNode> fields = joinTree.getRHSSemijoinColumns(src); |
| if (fields != null) { |
| // the RHS table columns should be not be output from the join |
| if (omitOpts == null) { |
| omitOpts = new HashSet<Integer>(); |
| } |
| omitOpts.add(pos); |
| |
| // generate a selection operator for group-by keys only |
| srcOp = insertSelectForSemijoin(fields, srcOp); |
| |
| // generate a groupby operator (HASH mode) for a map-side partial |
| // aggregation for semijoin |
| srcOps[pos++] = genMapGroupByForSemijoin(fields, srcOp); |
| } else { |
| srcOps[pos++] = srcOp; |
| } |
| } else { |
| assert pos == 0; |
| srcOps[pos++] = joinSrcOp; |
| } |
| } |
| |
| ExprNodeDesc[][] joinKeys = genJoinKeys(joinTree, srcOps); |
| |
| for (int i = 0; i < srcOps.length; i++) { |
| // generate a ReduceSink operator for the join |
| String[] srcs = baseSrc[i] != null ? new String[] {baseSrc[i]} : joinTree.getLeftAliases(); |
| if (!isCBOExecuted()) { |
| srcOps[i] = genNotNullFilterForJoinSourcePlan(qb, srcOps[i], joinTree, joinKeys[i]); |
| } |
| srcOps[i] = genJoinReduceSinkChild(joinKeys[i], srcOps[i], srcs, joinTree.getNextTag()); |
| } |
| |
| Operator<?> topOp = genJoinOperatorChildren(joinTree, joinSrcOp, srcOps, omitOpts, joinKeys); |
| JoinOperator joinOp; |
| if (topOp instanceof JoinOperator) { |
| joinOp = (JoinOperator) topOp; |
| } else { |
| // We might generate a Select operator on top of the join operator for |
| // semijoin |
| joinOp = (JoinOperator) topOp.getParentOperators().get(0); |
| } |
| joinOp.getConf().setQBJoinTreeProps(joinTree); |
| joinContext.put(joinOp, joinTree); |
| |
| if (joinTree.getPostJoinFilters().size() != 0) { |
| assert joinTree.getNoOuterJoin(); |
| if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_PUSH_RESIDUAL_INNER)) { |
| // Safety check for postconditions |
| throw new SemanticException("Post-filtering conditions should have been added to the JOIN operator"); |
| } |
| for(ASTNode condn : joinTree.getPostJoinFilters()) { |
| topOp = genFilterPlan(qb, condn, topOp, false); |
| } |
| } |
| |
| return topOp; |
| } |
| |
| /** |
| * Construct a selection operator for semijoin that filter out all fields |
| * other than the group by keys. |
| * |
| * @param fields |
| * list of fields need to be output |
| * @param input |
| * input operator |
| * @return the selection operator. |
| * @throws SemanticException |
| */ |
| private Operator insertSelectForSemijoin(List<ASTNode> fields, |
| Operator<?> input) throws SemanticException { |
| |
| RowResolver inputRR = opParseCtx.get(input).getRowResolver(); |
| List<ExprNodeDesc> colList = new ArrayList<ExprNodeDesc>(); |
| List<String> outputColumnNames = new ArrayList<String>(); |
| Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>(); |
| |
| RowResolver outputRR = new RowResolver(); |
| |
| // construct the list of columns that need to be projected |
| for (int i = 0; i < fields.size(); ++i) { |
| ASTNode field = fields.get(i); |
| String[] nm; |
| String[] nm2; |
| ExprNodeDesc expr = genExprNodeDesc(field, inputRR); |
| if (expr instanceof ExprNodeColumnDesc) { |
| // In most of the cases, this is a column reference |
| ExprNodeColumnDesc columnExpr = (ExprNodeColumnDesc) expr; |
| nm = inputRR.reverseLookup(columnExpr.getColumn()); |
| nm2 = inputRR.getAlternateMappings(columnExpr.getColumn()); |
| } else if (expr instanceof ExprNodeConstantDesc) { |
| // However, it can be a constant too. In that case, we need to track |
| // the column that it originated from in the input operator so we can |
| // propagate the aliases. |
| ExprNodeConstantDesc constantExpr = (ExprNodeConstantDesc) expr; |
| String inputCol = constantExpr.getFoldedFromCol(); |
| nm = inputRR.reverseLookup(inputCol); |
| nm2 = inputRR.getAlternateMappings(inputCol); |
| } else { |
| // We might generate other types that are not recognized, e.g., a field reference |
| // if it is a nested field, but since this is just an additional optimization, |
| // we bail out without introducing the Select + GroupBy below the right input |
| // of the left semijoin |
| return input; |
| } |
| String colName = getColumnInternalName(i); |
| outputColumnNames.add(colName); |
| ColumnInfo colInfo = new ColumnInfo(colName, expr.getTypeInfo(), "", false); |
| outputRR.put(nm[0], nm[1], colInfo); |
| if (nm2 != null) { |
| outputRR.addMappingOnly(nm2[0], nm2[1], colInfo); |
| } |
| colList.add(expr); |
| colExprMap.put(colName, expr); |
| } |
| |
| // create selection operator |
| Operator output = putOpInsertMap(OperatorFactory.getAndMakeChild( |
| new SelectDesc(colList, outputColumnNames, false), |
| new RowSchema(outputRR.getColumnInfos()), input), outputRR); |
| |
| output.setColumnExprMap(colExprMap); |
| return output; |
| } |
| |
| private Operator genMapGroupByForSemijoin(List<ASTNode> fields, Operator<?> input) |
| throws SemanticException { |
| |
| RowResolver groupByInputRowResolver = opParseCtx.get(input).getRowResolver(); |
| RowResolver groupByOutputRowResolver = new RowResolver(); |
| List<ExprNodeDesc> groupByKeys = new ArrayList<ExprNodeDesc>(); |
| List<String> outputColumnNames = new ArrayList<String>(); |
| List<AggregationDesc> aggregations = new ArrayList<AggregationDesc>(); |
| Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>(); |
| |
| for (int i = 0; i < fields.size(); ++i) { |
| // get the group by keys to ColumnInfo |
| ASTNode colName = fields.get(i); |
| String[] nm; |
| String[] nm2; |
| ExprNodeDesc grpByExprNode = genExprNodeDesc(colName, groupByInputRowResolver); |
| if (grpByExprNode instanceof ExprNodeColumnDesc) { |
| // In most of the cases, this is a column reference |
| ExprNodeColumnDesc columnExpr = (ExprNodeColumnDesc) grpByExprNode; |
| nm = groupByInputRowResolver.reverseLookup(columnExpr.getColumn()); |
| nm2 = groupByInputRowResolver.getAlternateMappings(columnExpr.getColumn()); |
| } else if (grpByExprNode instanceof ExprNodeConstantDesc) { |
| // However, it can be a constant too. In that case, we need to track |
| // the column that it originated from in the input operator so we can |
| // propagate the aliases. |
| ExprNodeConstantDesc constantExpr = (ExprNodeConstantDesc) grpByExprNode; |
| String inputCol = constantExpr.getFoldedFromCol(); |
| nm = groupByInputRowResolver.reverseLookup(inputCol); |
| nm2 = groupByInputRowResolver.getAlternateMappings(inputCol); |
| } else { |
| // We might generate other types that are not recognized, e.g., a field reference |
| // if it is a nested field, but since this is just an additional optimization, |
| // we bail out without introducing the Select + GroupBy below the right input |
| // of the left semijoin |
| return input; |
| } |
| groupByKeys.add(grpByExprNode); |
| // generate output column names |
| String field = getColumnInternalName(i); |
| outputColumnNames.add(field); |
| ColumnInfo colInfo2 = new ColumnInfo(field, grpByExprNode.getTypeInfo(), |
| "", false); |
| groupByOutputRowResolver.put(nm[0], nm[1], colInfo2); |
| if (nm2 != null) { |
| groupByOutputRowResolver.addMappingOnly(nm2[0], nm2[1], colInfo2); |
| } |
| groupByOutputRowResolver.putExpression(colName, colInfo2); |
| // establish mapping from the output column to the input column |
| colExprMap.put(field, grpByExprNode); |
| } |
| |
| // Generate group-by operator |
| float groupByMemoryUsage = HiveConf.getFloatVar(conf, HiveConf.ConfVars.HIVEMAPAGGRHASHMEMORY); |
| float memoryThreshold = HiveConf |
| .getFloatVar(conf, HiveConf.ConfVars.HIVEMAPAGGRMEMORYTHRESHOLD); |
| float minReductionHashAggr = HiveConf |
| .getFloatVar(conf, HiveConf.ConfVars.HIVEMAPAGGRHASHMINREDUCTION); |
| float minReductionHashAggrLowerBound = HiveConf |
| .getFloatVar(conf, ConfVars.HIVEMAPAGGRHASHMINREDUCTIONLOWERBOUND); |
| Operator op = putOpInsertMap(OperatorFactory.getAndMakeChild( |
| new GroupByDesc(GroupByDesc.Mode.HASH, outputColumnNames, groupByKeys, aggregations, |
| false, groupByMemoryUsage, memoryThreshold, minReductionHashAggr, minReductionHashAggrLowerBound, |
| null, false, -1, false), |
| new RowSchema(groupByOutputRowResolver.getColumnInfos()), |
| input), groupByOutputRowResolver); |
| |
| op.setColumnExprMap(colExprMap); |
| return op; |
| } |
| |
| private ExprNodeDesc[][] genJoinOperatorTypeCheck(ExprNodeDesc[][] keys) |
| throws SemanticException { |
| // keys[i] -> ArrayList<exprNodeDesc> for the i-th join operator key list |
| int keyLength = 0; |
| for (int i = 0; i < keys.length; i++) { |
| if (i == 0) { |
| keyLength = keys[i].length; |
| } else { |
| assert keyLength == keys[i].length; |
| } |
| } |
| // implicit type conversion hierarchy |
| for (int k = 0; k < keyLength; k++) { |
| // Find the common class for type conversion |
| TypeInfo commonType = keys[0][k].getTypeInfo(); |
| for (int i = 1; i < keys.length; i++) { |
| TypeInfo a = commonType; |
| TypeInfo b = keys[i][k].getTypeInfo(); |
| commonType = FunctionRegistry.getCommonClassForComparison(a, b); |
| if (commonType == null) { |
| throw new SemanticException( |
| "Cannot do equality join on different types: " + a.getTypeName() |
| + " and " + b.getTypeName()); |
| } |
| } |
| // Add implicit type conversion if necessary |
| for (int i = 0; i < keys.length; i++) { |
| if (TypeInfoUtils.isConversionRequiredForComparison( |
| keys[i][k].getTypeInfo(), commonType)) { |
| keys[i][k] = ExprNodeTypeCheck.getExprNodeDefaultExprProcessor() |
| .createConversionCast(keys[i][k], (PrimitiveTypeInfo)commonType); |
| } else { |
| // For the case no implicit type conversion, e.g., varchar(5) and varchar(10), |
| // pick the common type for all the keys since during run-time, same key type is assumed. |
| keys[i][k].setTypeInfo(commonType); |
| } |
| } |
| } |
| return keys; |
| } |
| |
| private Operator genJoinPlan(QB qb, Map<String, Operator> map) |
| throws SemanticException { |
| QBJoinTree joinTree = qb.getQbJoinTree(); |
| return genJoinOperator(qb, joinTree, map, null); |
| } |
| |
| /** |
| * Extract the filters from the join condition and push them on top of the |
| * source operators. This procedure traverses the query tree recursively, |
| */ |
| private void pushJoinFilters(QB qb, QBJoinTree joinTree, |
| Map<String, Operator> map) throws SemanticException { |
| pushJoinFilters(qb, joinTree, map, true); |
| } |
| |
| /** |
| * Extract the filters from the join condition and push them on top of the |
| * source operators. This procedure traverses the query tree recursively, |
| */ |
| private void pushJoinFilters(QB qb, QBJoinTree joinTree, |
| Map<String, Operator> map, |
| boolean recursively) throws SemanticException { |
| if ( recursively ) { |
| if (joinTree.getJoinSrc() != null) { |
| pushJoinFilters(qb, joinTree.getJoinSrc(), map); |
| } |
| } |
| List<List<ASTNode>> filters = joinTree.getFiltersForPushing(); |
| int pos = 0; |
| for (String src : joinTree.getBaseSrc()) { |
| if (src != null) { |
| Operator srcOp = map.get(src); |
| List<ASTNode> filter = filters.get(pos); |
| for (ASTNode cond : filter) { |
| srcOp = genFilterPlan(qb, cond, srcOp, false); |
| } |
| map.put(src, srcOp); |
| } |
| pos++; |
| } |
| } |
| |
| private List<String> getMapSideJoinTables(QB qb) { |
| List<String> cols = new ArrayList<String>(); |
| |
| |
| ASTNode hints = qb.getParseInfo().getHints(); |
| for (int pos = 0; pos < hints.getChildCount(); pos++) { |
| ASTNode hint = (ASTNode) hints.getChild(pos); |
| if (((ASTNode) hint.getChild(0)).getToken().getType() == HintParser.TOK_MAPJOIN) { |
| // the user has specified to ignore mapjoin hint |
| if (!conf.getBoolVar(HiveConf.ConfVars.HIVEIGNOREMAPJOINHINT) |
| && !conf.getVar(HiveConf.ConfVars.HIVE_EXECUTION_ENGINE).equals("tez")) { |
| ASTNode hintTblNames = (ASTNode) hint.getChild(1); |
| int numCh = hintTblNames.getChildCount(); |
| for (int tblPos = 0; tblPos < numCh; tblPos++) { |
| String tblName = ((ASTNode) hintTblNames.getChild(tblPos)).getText() |
| .toLowerCase(); |
| if (!cols.contains(tblName)) { |
| cols.add(tblName); |
| } |
| } |
| } |
| else { |
| queryProperties.setMapJoinRemoved(true); |
| } |
| } |
| } |
| |
| return cols; |
| } |
| |
| // The join alias is modified before being inserted for consumption by sort-merge |
| // join queries. If the join is part of a sub-query the alias is modified to include |
| // the sub-query alias. |
| private String getModifiedAlias(QB qb, String alias) { |
| return QB.getAppendedAliasFromId(qb.getId(), alias); |
| } |
| |
| private QBJoinTree genUniqueJoinTree(QB qb, ASTNode joinParseTree, |
| Map<String, Operator> aliasToOpInfo) |
| throws SemanticException { |
| QBJoinTree joinTree = new QBJoinTree(); |
| joinTree.setNoOuterJoin(false); |
| |
| joinTree.setExpressions(new ArrayList<List<ASTNode>>()); |
| joinTree.setFilters(new ArrayList<List<ASTNode>>()); |
| joinTree.setFiltersForPushing(new ArrayList<List<ASTNode>>()); |
| |
| // Create joinTree structures to fill them up later |
| List<String> rightAliases = new ArrayList<String>(); |
| List<String> leftAliases = new ArrayList<String>(); |
| List<String> baseSrc = new ArrayList<String>(); |
| List<Boolean> preserved = new ArrayList<Boolean>(); |
| |
| boolean lastPreserved = false; |
| int cols = -1; |
| |
| for (int i = 0; i < joinParseTree.getChildCount(); i++) { |
| ASTNode child = (ASTNode) joinParseTree.getChild(i); |
| |
| switch (child.getToken().getType()) { |
| case HiveParser.TOK_TABREF: |
| // Handle a table - populate aliases appropriately: |
| // leftAliases should contain the first table, rightAliases should |
| // contain all other tables and baseSrc should contain all tables |
| |
| String tableName = getUnescapedUnqualifiedTableName((ASTNode) child.getChild(0)); |
| |
| String alias = child.getChildCount() == 1 ? tableName |
| : unescapeIdentifier(child.getChild(child.getChildCount() - 1) |
| .getText().toLowerCase()); |
| |
| if (i == 0) { |
| leftAliases.add(alias); |
| joinTree.setLeftAlias(alias); |
| } else { |
| rightAliases.add(alias); |
| } |
| joinTree.getAliasToOpInfo().put(getModifiedAlias(qb, alias), aliasToOpInfo.get(alias)); |
| joinTree.setId(qb.getId()); |
| baseSrc.add(alias); |
| |
| preserved.add(lastPreserved); |
| lastPreserved = false; |
| break; |
| |
| case HiveParser.TOK_EXPLIST: |
| if (cols == -1 && child.getChildCount() != 0) { |
| cols = child.getChildCount(); |
| } else if (child.getChildCount() != cols) { |
| throw new SemanticException("Tables with different or invalid " |
| + "number of keys in UNIQUEJOIN"); |
| } |
| |
| List<ASTNode> expressions = new ArrayList<ASTNode>(); |
| List<ASTNode> filt = new ArrayList<ASTNode>(); |
| List<ASTNode> filters = new ArrayList<ASTNode>(); |
| |
| for (Node exp : child.getChildren()) { |
| expressions.add((ASTNode) exp); |
| } |
| |
| joinTree.getExpressions().add(expressions); |
| joinTree.getFilters().add(filt); |
| joinTree.getFiltersForPushing().add(filters); |
| break; |
| |
| case HiveParser.KW_PRESERVE: |
| lastPreserved = true; |
| break; |
| |
| case HiveParser.TOK_SUBQUERY: |
| throw new SemanticException( |
| "Subqueries are not supported in UNIQUEJOIN"); |
| |
| default: |
| throw new SemanticException("Unexpected UNIQUEJOIN structure"); |
| } |
| } |
| |
| joinTree.setBaseSrc(baseSrc.toArray(new String[0])); |
| joinTree.setLeftAliases(leftAliases.toArray(new String[0])); |
| joinTree.setRightAliases(rightAliases.toArray(new String[0])); |
| |
| JoinCond[] condn = new JoinCond[preserved.size()]; |
| for (int i = 0; i < condn.length; i++) { |
| condn[i] = new JoinCond(preserved.get(i)); |
| } |
| joinTree.setJoinCond(condn); |
| |
| if ((qb.getParseInfo().getHints() != null) |
| && !(conf.getVar(HiveConf.ConfVars.HIVE_EXECUTION_ENGINE).equals("tez"))) { |
| LOG.info("STREAMTABLE hint honored."); |
| parseStreamTables(joinTree, qb); |
| } |
| return joinTree; |
| } |
| |
| /* |
| * Setup a QBJoinTree between a SubQuery and its Parent Query. The Parent Query |
| * is the lhs of the Join. |
| * |
| * The Parent Query is represented by the last Operator needed to process its From Clause. |
| * In case of a single table Query this will be a TableScan, but it can be a Join Operator |
| * if the Parent Query contains Join clauses, or in case of a single source from clause, |
| * the source could be a SubQuery or a PTF invocation. |
| * |
| * We setup the QBJoinTree with the above constrains in place. So: |
| * - the lhs of the QBJoinTree can be a another QBJoinTree if the Parent Query operator |
| * is a JoinOperator. In this case we get its QBJoinTree from the 'joinContext' |
| * - the rhs is always a reference to the SubQuery. Its alias is obtained from the |
| * QBSubQuery object. |
| * |
| * The QBSubQuery also provides the Joining Condition AST. The Joining Condition has been |
| * transformed in QBSubQuery setup, before this call. The Joining condition has any correlated |
| * predicates and a predicate for joining the Parent Query expression with the SubQuery. |
| * |
| * The QBSubQuery also specifies what kind of Join to construct. |
| * |
| * Given this information, once we initialize the QBJoinTree, we call the 'parseJoinCondition' |
| * method to validate and parse Join conditions. |
| */ |
| private QBJoinTree genSQJoinTree(QB qb, ISubQueryJoinInfo subQuery, |
| Operator joiningOp, |
| Map<String, Operator> aliasToOpInfo) |
| throws SemanticException { |
| QBJoinTree joinTree = new QBJoinTree(); |
| JoinCond[] condn = new JoinCond[1]; |
| |
| switch (subQuery.getJoinType()) { |
| case LEFTOUTER: |
| joinTree.setNoOuterJoin(false); |
| condn[0] = new JoinCond(0, 1, JoinType.LEFTOUTER); |
| break; |
| case RIGHTOUTER: |
| joinTree.setNoOuterJoin(false); |
| condn[0] = new JoinCond(0, 1, JoinType.RIGHTOUTER); |
| break; |
| case FULLOUTER: |
| joinTree.setNoOuterJoin(false); |
| condn[0] = new JoinCond(0, 1, JoinType.FULLOUTER); |
| break; |
| case LEFTSEMI: |
| joinTree.setNoSemiJoin(false); |
| condn[0] = new JoinCond(0, 1, JoinType.LEFTSEMI); |
| break; |
| case ANTI: |
| joinTree.setNoSemiJoin(false); |
| condn[0] = new JoinCond(0, 1, JoinType.ANTI); |
| break; |
| default: |
| condn[0] = new JoinCond(0, 1, JoinType.INNER); |
| joinTree.setNoOuterJoin(true); |
| break; |
| } |
| joinTree.setJoinCond(condn); |
| |
| if ( joiningOp instanceof JoinOperator ) { |
| QBJoinTree leftTree = joinContext.get(joiningOp); |
| joinTree.setJoinSrc(leftTree); |
| String[] leftChildAliases = leftTree.getLeftAliases(); |
| String leftAliases[] = new String[leftChildAliases.length + 1]; |
| for (int i = 0; i < leftChildAliases.length; i++) { |
| leftAliases[i] = leftChildAliases[i]; |
| } |
| leftAliases[leftChildAliases.length] = leftTree.getRightAliases()[0]; |
| joinTree.setLeftAliases(leftAliases); |
| } else { |
| String alias = unescapeIdentifier( |
| SubQueryUtils.getAlias(joiningOp, aliasToOpInfo).toLowerCase()); |
| joinTree.setLeftAlias(alias); |
| String[] leftAliases = new String[1]; |
| leftAliases[0] = alias; |
| joinTree.setLeftAliases(leftAliases); |
| String[] children = new String[2]; |
| children[0] = alias; |
| joinTree.setBaseSrc(children); |
| joinTree.setId(qb.getId()); |
| joinTree.getAliasToOpInfo().put( |
| getModifiedAlias(qb, alias), aliasToOpInfo.get(alias)); |
| } |
| |
| String rightalias = unescapeIdentifier(subQuery.getAlias().toLowerCase()); |
| String[] rightAliases = new String[1]; |
| rightAliases[0] = rightalias; |
| joinTree.setRightAliases(rightAliases); |
| String[] children = joinTree.getBaseSrc(); |
| if (children == null) { |
| children = new String[2]; |
| } |
| children[1] = rightalias; |
| joinTree.setBaseSrc(children); |
| joinTree.setId(qb.getId()); |
| joinTree.getAliasToOpInfo().put( |
| getModifiedAlias(qb, rightalias), aliasToOpInfo.get(rightalias)); |
| // remember rhs table for semijoin |
| if (!joinTree.getNoSemiJoin()) { |
| joinTree.addRHSSemijoin(rightalias); |
| } |
| |
| List<List<ASTNode>> expressions = new ArrayList<List<ASTNode>>(); |
| expressions.add(new ArrayList<ASTNode>()); |
| expressions.add(new ArrayList<ASTNode>()); |
| joinTree.setExpressions(expressions); |
| |
| List<Boolean> nullsafes = new ArrayList<Boolean>(); |
| joinTree.setNullSafes(nullsafes); |
| |
| List<List<ASTNode>> filters = new ArrayList<List<ASTNode>>(); |
| filters.add(new ArrayList<ASTNode>()); |
| filters.add(new ArrayList<ASTNode>()); |
| joinTree.setFilters(filters); |
| joinTree.setFilterMap(new int[2][]); |
| |
| List<List<ASTNode>> filtersForPushing = new ArrayList<List<ASTNode>>(); |
| filtersForPushing.add(new ArrayList<ASTNode>()); |
| filtersForPushing.add(new ArrayList<ASTNode>()); |
| joinTree.setFiltersForPushing(filtersForPushing); |
| |
| ASTNode joinCond = subQuery.getJoinConditionAST(); |
| List<String> leftSrc = new ArrayList<String>(); |
| parseJoinCondition(joinTree, joinCond, leftSrc, aliasToOpInfo); |
| if (leftSrc.size() == 1) { |
| joinTree.setLeftAlias(leftSrc.get(0)); |
| } |
| |
| return joinTree; |
| } |
| |
| private QBJoinTree genJoinTree(QB qb, ASTNode joinParseTree, |
| Map<String, Operator> aliasToOpInfo) |
| throws SemanticException { |
| QBJoinTree joinTree = new QBJoinTree(); |
| JoinCond[] condn = new JoinCond[1]; |
| |
| switch (joinParseTree.getToken().getType()) { |
| case HiveParser.TOK_LEFTOUTERJOIN: |
| joinTree.setNoOuterJoin(false); |
| condn[0] = new JoinCond(0, 1, JoinType.LEFTOUTER); |
| break; |
| case HiveParser.TOK_RIGHTOUTERJOIN: |
| joinTree.setNoOuterJoin(false); |
| condn[0] = new JoinCond(0, 1, JoinType.RIGHTOUTER); |
| break; |
| case HiveParser.TOK_FULLOUTERJOIN: |
| joinTree.setNoOuterJoin(false); |
| condn[0] = new JoinCond(0, 1, JoinType.FULLOUTER); |
| break; |
| case HiveParser.TOK_LEFTSEMIJOIN: |
| joinTree.setNoSemiJoin(false); |
| condn[0] = new JoinCond(0, 1, JoinType.LEFTSEMI); |
| break; |
| case HiveParser.TOK_LEFTANTISEMIJOIN: |
| joinTree.setNoSemiJoin(false); |
| condn[0] = new JoinCond(0, 1, JoinType.ANTI); |
| break; |
| default: |
| condn[0] = new JoinCond(0, 1, JoinType.INNER); |
| joinTree.setNoOuterJoin(true); |
| break; |
| } |
| |
| joinTree.setJoinCond(condn); |
| |
| ASTNode left = (ASTNode) joinParseTree.getChild(0); |
| ASTNode right = (ASTNode) joinParseTree.getChild(1); |
| if (joinParseTree.getChildren().size() >= 4) { |
| addPkFkInfo(joinTree, (ASTNode) joinParseTree.getChild(3)); |
| } |
| |
| boolean isValidLeftToken = isValidJoinSide(left); |
| boolean isJoinLeftToken = !isValidLeftToken && isJoinToken(left); |
| boolean isValidRightToken = isValidJoinSide(right); |
| boolean isJoinRightToken = !isValidRightToken && isJoinToken(right); |
| // TODO: if we didn't care about the column order, we could switch join sides here |
| // for TOK_JOIN and TOK_FULLOUTERJOIN. |
| if (!isValidLeftToken && !isJoinLeftToken) { |
| throw new SemanticException("Invalid token on the left side of the join: " |
| + left.getToken().getText() + "; please rewrite your query"); |
| } else if (!isValidRightToken) { |
| String advice= ""; |
| if (isJoinRightToken && !isJoinLeftToken) { |
| advice = "; for example, put the nested join on the left side, or nest joins differently"; |
| } else if (isJoinRightToken) { |
| advice = "; for example, nest joins differently"; |
| } |
| throw new SemanticException("Invalid token on the right side of the join: " |
| + right.getToken().getText() + "; please rewrite your query" + advice); |
| } |
| |
| if (isValidLeftToken) { |
| String alias = extractJoinAlias(left); |
| joinTree.setLeftAlias(alias); |
| String[] leftAliases = new String[1]; |
| leftAliases[0] = alias; |
| joinTree.setLeftAliases(leftAliases); |
| String[] children = new String[2]; |
| children[0] = alias; |
| joinTree.setBaseSrc(children); |
| joinTree.setId(qb.getId()); |
| joinTree.getAliasToOpInfo().put( |
| getModifiedAlias(qb, alias), aliasToOpInfo.get(alias)); |
| } else if (isJoinLeftToken) { |
| QBJoinTree leftTree = genJoinTree(qb, left, aliasToOpInfo); |
| joinTree.setJoinSrc(leftTree); |
| String[] leftChildAliases = leftTree.getLeftAliases(); |
| String leftAliases[] = new String[leftChildAliases.length + 1]; |
| for (int i = 0; i < leftChildAliases.length; i++) { |
| leftAliases[i] = leftChildAliases[i]; |
| } |
| leftAliases[leftChildAliases.length] = leftTree.getRightAliases()[0]; |
| joinTree.setLeftAliases(leftAliases); |
| } else { |
| assert (false); |
| } |
| |
| if (isValidRightToken) { |
| String alias = extractJoinAlias(right); |
| String[] rightAliases = new String[1]; |
| rightAliases[0] = alias; |
| joinTree.setRightAliases(rightAliases); |
| String[] children = joinTree.getBaseSrc(); |
| if (children == null) { |
| children = new String[2]; |
| } |
| children[1] = alias; |
| joinTree.setBaseSrc(children); |
| joinTree.setId(qb.getId()); |
| joinTree.getAliasToOpInfo().put( |
| getModifiedAlias(qb, alias), aliasToOpInfo.get(alias)); |
| // remember rhs table for semijoin |
| if (!joinTree.getNoSemiJoin()) { |
| joinTree.addRHSSemijoin(alias); |
| } |
| } else { |
| assert false; |
| } |
| |
| List<List<ASTNode>> expressions = new ArrayList<List<ASTNode>>(); |
| expressions.add(new ArrayList<ASTNode>()); |
| expressions.add(new ArrayList<ASTNode>()); |
| joinTree.setExpressions(expressions); |
| |
| List<Boolean> nullsafes = new ArrayList<Boolean>(); |
| joinTree.setNullSafes(nullsafes); |
| |
| List<List<ASTNode>> filters = new ArrayList<List<ASTNode>>(); |
| filters.add(new ArrayList<ASTNode>()); |
| filters.add(new ArrayList<ASTNode>()); |
| joinTree.setFilters(filters); |
| joinTree.setFilterMap(new int[2][]); |
| |
| List<List<ASTNode>> filtersForPushing = new ArrayList<List<ASTNode>>(); |
| filtersForPushing.add(new ArrayList<ASTNode>()); |
| filtersForPushing.add(new ArrayList<ASTNode>()); |
| joinTree.setFiltersForPushing(filtersForPushing); |
| |
| ASTNode joinCond = (ASTNode) joinParseTree.getChild(2); |
| List<String> leftSrc = new ArrayList<String>(); |
| parseJoinCondition(joinTree, joinCond, leftSrc, aliasToOpInfo); |
| if (leftSrc.size() == 1) { |
| joinTree.setLeftAlias(leftSrc.get(0)); |
| } |
| |
| // check the hints to see if the user has specified a map-side join. This |
| // will be removed later on, once the cost-based |
| // infrastructure is in place |
| if (qb.getParseInfo().getHints() != null) { |
| List<String> mapSideTables = getMapSideJoinTables(qb); |
| List<String> mapAliases = joinTree.getMapAliases(); |
| |
| for (String mapTbl : mapSideTables) { |
| boolean mapTable = false; |
| for (String leftAlias : joinTree.getLeftAliases()) { |
| if (mapTbl.equalsIgnoreCase(leftAlias)) { |
| mapTable = true; |
| } |
| } |
| for (String rightAlias : joinTree.getRightAliases()) { |
| if (mapTbl.equalsIgnoreCase(rightAlias)) { |
| mapTable = true; |
| } |
| } |
| |
| if (mapTable) { |
| if (mapAliases == null) { |
| mapAliases = new ArrayList<String>(); |
| } |
| mapAliases.add(mapTbl); |
| joinTree.setMapSideJoin(true); |
| } |
| } |
| |
| joinTree.setMapAliases(mapAliases); |
| |
| if (!(conf.getVar(ConfVars.HIVE_EXECUTION_ENGINE).equals("tez"))) { |
| parseStreamTables(joinTree, qb); |
| } |
| } |
| |
| return joinTree; |
| } |
| |
| private void addPkFkInfo(QBJoinTree joinTree, ASTNode hints) { |
| if (hints.getToken().getType() == HintParser.TOK_HINTLIST) { |
| Tree hint = hints.getChild(0); |
| if (hint.getType() == HintParser.TOK_HINT && hint.getChild(0).getType() == HintParser.TOK_PKFK_JOIN) { |
| Tree args = hint.getChild(1); |
| joinTree.setFkJoinTableIndex(Integer.parseInt(args.getChild(0).getText())); |
| joinTree.setNonFkSideIsFiltered(NON_FK_FILTERED.equals(args.getChild(1).getText())); |
| } |
| } |
| } |
| |
| private boolean isValidJoinSide(ASTNode right) { |
| return (right.getToken().getType() == HiveParser.TOK_TABREF) |
| || (right.getToken().getType() == HiveParser.TOK_SUBQUERY) |
| || (right.getToken().getType() == HiveParser.TOK_PTBLFUNCTION); |
| } |
| |
| private String extractJoinAlias(ASTNode node) throws SemanticException { |
| // ptf node form is: |
| // ^(TOK_PTBLFUNCTION $name $alias? partitionTableFunctionSource partitioningSpec? expression*) |
| // guaranteed to have an alias here: check done in processJoin |
| if (node.getType() == HiveParser.TOK_PTBLFUNCTION) { |
| return unescapeIdentifier(node.getChild(1).getText().toLowerCase()); |
| } |
| if (node.getChildCount() == 1) { |
| return getUnescapedUnqualifiedTableName((ASTNode) node.getChild(0)).toLowerCase(); |
| } |
| for (int i = node.getChildCount() - 1; i >= 1; i--) { |
| if (node.getChild(i).getType() == HiveParser.Identifier) { |
| return unescapeIdentifier(node.getChild(i).getText().toLowerCase()); |
| } |
| } |
| throw new SemanticException("Unable to get join alias."); |
| } |
| |
| private void parseStreamTables(QBJoinTree joinTree, QB qb) { |
| List<String> streamAliases = joinTree.getStreamAliases(); |
| |
| for (Node hintNode : qb.getParseInfo().getHints().getChildren()) { |
| ASTNode hint = (ASTNode) hintNode; |
| if (hint.getChild(0).getType() == HintParser.TOK_STREAMTABLE) { |
| for (int i = 0; i < hint.getChild(1).getChildCount(); i++) { |
| if (streamAliases == null) { |
| streamAliases = new ArrayList<String>(); |
| } |
| streamAliases.add(hint.getChild(1).getChild(i).getText()); |
| } |
| } |
| } |
| |
| joinTree.setStreamAliases(streamAliases); |
| } |
| |
| /** Parses semjoin hints in the query and returns the table names mapped to filter size, or -1 if not specified. |
| * Hints can be in 2 formats |
| * 1. TableName, ColumnName, Target-TableName, bloom filter entries |
| * 2. TableName, ColumnName, Target-TableName |
| * */ |
| private Map<String, List<SemiJoinHint>> parseSemiJoinHint(List<ASTNode> hints) throws SemanticException { |
| if (hints == null || hints.size() == 0) { |
| return null; |
| } |
| Map<String, List<SemiJoinHint>> result = null; |
| for (ASTNode hintNode : hints) { |
| for (Node node : hintNode.getChildren()) { |
| ASTNode hint = (ASTNode) node; |
| if (hint.getChild(0).getType() != HintParser.TOK_LEFTSEMIJOIN && |
| hint.getChild(0).getType() != HintParser.TOK_LEFTANTISEMIJOIN) { |
| continue; |
| } |
| if (result == null) { |
| result = new HashMap<>(); |
| } |
| Tree args = hint.getChild(1); |
| if (args.getChildCount() == 1) { |
| String text = args.getChild(0).getText(); |
| if (text.equalsIgnoreCase("None")) { |
| // Hint to disable runtime filtering. |
| return result; |
| } |
| } |
| int curIdx = 0; |
| while(curIdx < args.getChildCount()) { |
| curIdx = parseSingleSemiJoinHint(args, curIdx, result); |
| } |
| } |
| } |
| if (LOG.isDebugEnabled()) { |
| LOG.debug("Semijoin hint parsed: " + result); |
| } |
| return result; |
| } |
| |
| private int parseSingleSemiJoinHint(Tree args, int curIdx, Map<String, List<SemiJoinHint>> result) |
| throws SemanticException { |
| // Check if there are enough entries in the tree to constitute a hint. |
| int numEntriesLeft = args.getChildCount() - curIdx; |
| if (numEntriesLeft < 3) { |
| throw new SemanticException("User provided only 1 entry for the hint with alias " |
| + args.getChild(curIdx).getText()); |
| } |
| |
| String source = args.getChild(curIdx++).getText(); |
| // validate |
| if (StringUtils.isNumeric(source)) { |
| throw new SemanticException("User provided bloom filter entries when source alias is " |
| + "expected. source:" + source); |
| } |
| |
| String colName = args.getChild(curIdx++).getText(); |
| // validate |
| if (StringUtils.isNumeric(colName)) { |
| throw new SemanticException("User provided bloom filter entries when column name is " |
| + "expected. colName:" + colName); |
| } |
| |
| String target = args.getChild(curIdx++).getText(); |
| // validate |
| if (StringUtils.isNumeric(target)) { |
| throw new SemanticException("User provided bloom filter entries when target alias is " |
| + "expected. target: " + target); |
| } |
| |
| Integer number = null; |
| if (numEntriesLeft > 3) { |
| // Check if there exists bloom filter size entry |
| try { |
| number = Integer.parseInt(args.getChild(curIdx).getText()); |
| curIdx++; |
| } catch (NumberFormatException e) { // Ignore |
| LOG.warn("Number format exception when parsing " + number, e); |
| } |
| } |
| result.computeIfAbsent(source, value -> new ArrayList<>()).add(new SemiJoinHint(colName, target, number)); |
| return curIdx; |
| } |
| |
| /** |
| * disableMapJoinWithHint |
| * @param hints |
| * @return true if hint to disable hint is provided, else false |
| */ |
| private boolean disableMapJoinWithHint(List<ASTNode> hints) { |
| if (hints == null || hints.size() == 0) { |
| return false; |
| } |
| for (ASTNode hintNode : hints) { |
| for (Node node : hintNode.getChildren()) { |
| ASTNode hint = (ASTNode) node; |
| if (hint.getChild(0).getType() != HintParser.TOK_MAPJOIN) { |
| continue; |
| } |
| Tree args = hint.getChild(1); |
| if (args.getChildCount() == 1) { |
| String text = args.getChild(0).getText(); |
| if (text.equalsIgnoreCase("None")) { |
| // Hint to disable mapjoin. |
| return true; |
| } |
| } |
| } |
| } |
| return false; |
| } |
| |
| /** |
| * Merges node to target |
| */ |
| private void mergeJoins(QBJoinTree node, QBJoinTree target, int pos, int[] tgtToNodeExprMap) { |
| String[] nodeRightAliases = node.getRightAliases(); |
| String[] trgtRightAliases = target.getRightAliases(); |
| String[] rightAliases = new String[nodeRightAliases.length + trgtRightAliases.length]; |
| |
| for (int i = 0; i < trgtRightAliases.length; i++) { |
| rightAliases[i] = trgtRightAliases[i]; |
| } |
| for (int i = 0; i < nodeRightAliases.length; i++) { |
| rightAliases[i + trgtRightAliases.length] = nodeRightAliases[i]; |
| } |
| target.setRightAliases(rightAliases); |
| target.getAliasToOpInfo().putAll(node.getAliasToOpInfo()); |
| |
| String[] nodeBaseSrc = node.getBaseSrc(); |
| String[] trgtBaseSrc = target.getBaseSrc(); |
| String[] baseSrc = new String[nodeBaseSrc.length + trgtBaseSrc.length - 1]; |
| |
| for (int i = 0; i < trgtBaseSrc.length; i++) { |
| baseSrc[i] = trgtBaseSrc[i]; |
| } |
| for (int i = 1; i < nodeBaseSrc.length; i++) { |
| baseSrc[i + trgtBaseSrc.length - 1] = nodeBaseSrc[i]; |
| } |
| target.setBaseSrc(baseSrc); |
| |
| List<List<ASTNode>> expr = target.getExpressions(); |
| for (int i = 0; i < nodeRightAliases.length; i++) { |
| List<ASTNode> nodeConds = node.getExpressions().get(i + 1); |
| List<ASTNode> reordereNodeConds = new ArrayList<ASTNode>(); |
| for(int k=0; k < tgtToNodeExprMap.length; k++) { |
| reordereNodeConds.add(nodeConds.get(tgtToNodeExprMap[k])); |
| } |
| expr.add(reordereNodeConds); |
| } |
| |
| List<Boolean> nns = node.getNullSafes(); |
| List<Boolean> tns = target.getNullSafes(); |
| for (int i = 0; i < tns.size(); i++) { |
| tns.set(i, tns.get(i) & nns.get(i)); // any of condition contains non-NS, non-NS |
| } |
| |
| List<List<ASTNode>> filters = target.getFilters(); |
| for (int i = 0; i < nodeRightAliases.length; i++) { |
| filters.add(node.getFilters().get(i + 1)); |
| } |
| |
| if (node.getFilters().get(0).size() != 0) { |
| List<ASTNode> filterPos = filters.get(pos); |
| filterPos.addAll(node.getFilters().get(0)); |
| } |
| |
| int[][] nmap = node.getFilterMap(); |
| int[][] tmap = target.getFilterMap(); |
| int[][] newmap = new int[tmap.length + nmap.length - 1][]; |
| |
| for (int[] mapping : nmap) { |
| if (mapping != null) { |
| for (int i = 0; i < mapping.length; i += 2) { |
| if (pos > 0 || mapping[i] > 0) { |
| mapping[i] += trgtRightAliases.length; |
| } |
| } |
| } |
| } |
| if (nmap[0] != null) { |
| if (tmap[pos] == null) { |
| tmap[pos] = nmap[0]; |
| } else { |
| int[] appended = new int[tmap[pos].length + nmap[0].length]; |
| System.arraycopy(tmap[pos], 0, appended, 0, tmap[pos].length); |
| System.arraycopy(nmap[0], 0, appended, tmap[pos].length, nmap[0].length); |
| tmap[pos] = appended; |
| } |
| } |
| System.arraycopy(tmap, 0, newmap, 0, tmap.length); |
| System.arraycopy(nmap, 1, newmap, tmap.length, nmap.length - 1); |
| target.setFilterMap(newmap); |
| |
| List<List<ASTNode>> filter = target.getFiltersForPushing(); |
| for (int i = 0; i < nodeRightAliases.length; i++) { |
| filter.add(node.getFiltersForPushing().get(i + 1)); |
| } |
| |
| if (node.getFiltersForPushing().get(0).size() != 0) { |
| /* |
| * for each predicate: |
| * - does it refer to one or many aliases |
| * - if one: add it to the filterForPushing list of that alias |
| * - if many: add as a filter from merging trees. |
| */ |
| |
| for(ASTNode nodeFilter : node.getFiltersForPushing().get(0) ) { |
| int fPos = ParseUtils.checkJoinFilterRefersOneAlias(target.getBaseSrc(), nodeFilter); |
| |
| if ( fPos != - 1 ) { |
| filter.get(fPos).add(nodeFilter); |
| } else { |
| target.addPostJoinFilter(nodeFilter); |
| } |
| } |
| } |
| |
| target.setNoOuterJoin(node.getNoOuterJoin() && target.getNoOuterJoin()); |
| target.setNoSemiJoin(node.getNoSemiJoin() && target.getNoSemiJoin()); |
| |
| target.mergeRHSSemijoin(node); |
| |
| JoinCond[] nodeCondns = node.getJoinCond(); |
| int nodeCondnsSize = nodeCondns.length; |
| JoinCond[] targetCondns = target.getJoinCond(); |
| int targetCondnsSize = targetCondns.length; |
| JoinCond[] newCondns = new JoinCond[nodeCondnsSize + targetCondnsSize]; |
| for (int i = 0; i < targetCondnsSize; i++) { |
| newCondns[i] = targetCondns[i]; |
| } |
| |
| for (int i = 0; i < nodeCondnsSize; i++) { |
| JoinCond nodeCondn = nodeCondns[i]; |
| if (nodeCondn.getLeft() == 0) { |
| nodeCondn.setLeft(pos); |
| } else { |
| nodeCondn.setLeft(nodeCondn.getLeft() + targetCondnsSize); |
| } |
| nodeCondn.setRight(nodeCondn.getRight() + targetCondnsSize); |
| newCondns[targetCondnsSize + i] = nodeCondn; |
| } |
| |
| target.setJoinCond(newCondns); |
| if (target.isMapSideJoin()) { |
| assert node.isMapSideJoin(); |
| List<String> mapAliases = target.getMapAliases(); |
| for (String mapTbl : node.getMapAliases()) { |
| if (!mapAliases.contains(mapTbl)) { |
| mapAliases.add(mapTbl); |
| } |
| } |
| target.setMapAliases(mapAliases); |
| } |
| |
| if (node.getPostJoinFilters().size() != 0) { |
| // Safety check: if we are merging join operators and there are post-filtering |
| // conditions, they cannot be outer joins |
| assert node.getNoOuterJoin() ; |
| assert target.getPostJoinFilters().size() == 0 || target.getNoOuterJoin(); |
| for (ASTNode exprPostFilter : node.getPostJoinFilters()) { |
| target.addPostJoinFilter(exprPostFilter); |
| } |
| } |
| } |
| |
| private Pair<Integer, int[]> findMergePos(QBJoinTree node, QBJoinTree target) { |
| int res = -1; |
| String leftAlias = node.getLeftAlias(); |
| if (leftAlias == null && (!node.getNoOuterJoin() || !target.getNoOuterJoin())) { |
| // Cross with outer join: currently we do not merge |
| return Pair.of(-1, null); |
| } |
| |
| List<ASTNode> nodeCondn = node.getExpressions().get(0); |
| List<ASTNode> targetCondn = null; |
| |
| if (leftAlias == null || leftAlias.equals(target.getLeftAlias())) { |
| targetCondn = target.getExpressions().get(0); |
| res = 0; |
| } else { |
| for (int i = 0; i < target.getRightAliases().length; i++) { |
| if (leftAlias.equals(target.getRightAliases()[i])) { |
| targetCondn = target.getExpressions().get(i + 1); |
| res = i + 1; |
| break; |
| } |
| } |
| } |
| |
| if ( targetCondn == null || (nodeCondn.size() != targetCondn.size())) { |
| return Pair.of(-1, null); |
| } |
| |
| /* |
| * The order of the join condition expressions don't matter. |
| * A merge can happen: |
| * - if every target condition is present in some position of the node condition list. |
| * - there is no node condition, which is not equal to any target condition. |
| */ |
| |
| int[] tgtToNodeExprMap = new int[targetCondn.size()]; |
| boolean[] nodeFiltersMapped = new boolean[nodeCondn.size()]; |
| int i, j; |
| for(i=0; i<targetCondn.size(); i++) { |
| String tgtExprTree = targetCondn.get(i).toStringTree(); |
| tgtToNodeExprMap[i] = -1; |
| for(j=0; j < nodeCondn.size(); j++) { |
| if ( nodeCondn.get(j).toStringTree().equals(tgtExprTree)) { |
| tgtToNodeExprMap[i] = j; |
| nodeFiltersMapped[j] = true; |
| } |
| } |
| if ( tgtToNodeExprMap[i] == -1) { |
| return Pair.of(-1, null); |
| } |
| } |
| |
| for(j=0; j < nodeCondn.size(); j++) { |
| if ( !nodeFiltersMapped[j]) { |
| return Pair.of(-1, null); |
| } |
| } |
| |
| return Pair.of(res, tgtToNodeExprMap); |
| } |
| |
| boolean isCBOExecuted() { |
| return false; |
| } |
| |
| boolean isCBOSupportedLateralView(ASTNode lateralView) { |
| return false; |
| } |
| |
| boolean continueJoinMerge() { |
| return true; |
| } |
| |
| private boolean shouldMerge(final QBJoinTree node, final QBJoinTree target) { |
| boolean isNodeOuterJoin=false, isNodeSemiJoin=false, hasNodePostJoinFilters=false; |
| boolean isTargetOuterJoin=false, isTargetSemiJoin=false, hasTargetPostJoinFilters=false; |
| |
| isNodeOuterJoin = !node.getNoOuterJoin(); |
| isNodeSemiJoin= !node.getNoSemiJoin(); |
| hasNodePostJoinFilters = node.getPostJoinFilters().size() !=0; |
| |
| isTargetOuterJoin = !target.getNoOuterJoin(); |
| isTargetSemiJoin= !target.getNoSemiJoin(); |
| hasTargetPostJoinFilters = target.getPostJoinFilters().size() !=0; |
| |
| if (hasNodePostJoinFilters || hasTargetPostJoinFilters) { |
| if (isNodeOuterJoin || isNodeSemiJoin || isTargetOuterJoin || isTargetSemiJoin) { |
| return false; |
| } |
| } |
| return true; |
| } |
| |
| // try merge join tree from inner most source |
| // (it was merged from outer most to inner, which could be invalid) |
| // |
| // in a join tree ((A-B)-C)-D where C is not mergeable with A-B, |
| // D can be merged with A-B into single join If and only if C and D has same join type |
| // In this case, A-B-D join will be executed first and ABD-C join will be executed in next |
| private void mergeJoinTree(QB qb) { |
| QBJoinTree tree = qb.getQbJoinTree(); |
| if (tree.getJoinSrc() == null) { |
| return; |
| } |
| |
| // make array with QBJoinTree : outer most(0) --> inner most(n) |
| List<QBJoinTree> trees = new ArrayList<QBJoinTree>(); |
| for (;tree != null; tree = tree.getJoinSrc()) { |
| trees.add(tree); |
| } |
| |
| // merging from 'target'(inner) to 'node'(outer) |
| boolean mergedQBJTree = false; |
| for (int i = trees.size() - 1; i >= 0; i--) { |
| QBJoinTree target = trees.get(i); |
| if (target == null) { |
| continue; |
| } |
| JoinType prevType = null; // save join type |
| boolean continueScanning = true; |
| for (int j = i - 1; j >= 0 && continueScanning; j--) { |
| QBJoinTree node = trees.get(j); |
| if (node == null) { |
| continue; |
| } |
| JoinType currType = getType(node.getJoinCond()); |
| if (prevType != null && prevType != currType) { |
| break; |
| } |
| if(!shouldMerge(node, target)) { |
| // Outer joins or outer and not outer with post-filtering conditions cannot be merged |
| break; |
| } |
| Pair<Integer, int[]> mergeDetails = findMergePos(node, target); |
| int pos = mergeDetails.getLeft(); |
| if (pos >= 0) { |
| // for outer joins, it should not exceed 16 aliases (short type) |
| if (!node.getNoOuterJoin() || !target.getNoOuterJoin()) { |
| if (node.getRightAliases().length + target.getRightAliases().length + 1 > 16) { |
| LOG.info(ErrorMsg.JOINNODE_OUTERJOIN_MORETHAN_16.getErrorCodedMsg()); |
| continueScanning = continueJoinMerge(); |
| continue; |
| } |
| } |
| mergeJoins(node, target, pos, mergeDetails.getRight()); |
| trees.set(j, null); |
| mergedQBJTree = true; |
| continue; // continue merging with next alias |
| } |
| /* |
| * for CBO provided orderings, don't attempt to reorder joins. |
| * only convert consecutive joins into n-way joins. |
| */ |
| continueScanning = continueJoinMerge(); |
| if (prevType == null) { |
| prevType = currType; |
| } |
| } |
| } |
| |
| // Now that we reordered QBJoinTrees, update leftaliases of all |
| // QBJoinTree from innermost to outer |
| if ((trees.size() > 1) && mergedQBJTree) { |
| QBJoinTree curQBJTree = null; |
| QBJoinTree prevQBJTree = null; |
| for (int i = trees.size() - 1; i >= 0; i--) { |
| curQBJTree = trees.get(i); |
| if (curQBJTree != null) { |
| if (prevQBJTree != null) { |
| List<String> newCurLeftAliases = new ArrayList<String>(); |
| newCurLeftAliases.addAll(Arrays.asList(prevQBJTree.getLeftAliases())); |
| newCurLeftAliases.addAll(Arrays.asList(prevQBJTree.getRightAliases())); |
| curQBJTree |
| .setLeftAliases(newCurLeftAliases.toArray(new String[newCurLeftAliases.size()])); |
| } |
| prevQBJTree = curQBJTree; |
| } |
| } |
| } |
| |
| // reconstruct join tree |
| QBJoinTree current = null; |
| for (QBJoinTree target : trees) { |
| if (target == null) { |
| continue; |
| } |
| if (current == null) { |
| qb.setQbJoinTree(current = target); |
| } else { |
| current.setJoinSrc(target); |
| current = target; |
| } |
| } |
| } |
| |
| // Join types should be all the same for merging (or returns null) |
| private JoinType getType(JoinCond[] conds) { |
| JoinType type = conds[0].getJoinType(); |
| return Arrays.stream(conds).allMatch(cond -> cond.getJoinType() == type) ? type : null; |
| } |
| |
| private Operator genSelectAllDesc(Operator input) { |
| OpParseContext inputCtx = opParseCtx.get(input); |
| RowResolver inputRR = inputCtx.getRowResolver(); |
| List<ColumnInfo> columns = inputRR.getColumnInfos(); |
| List<ExprNodeDesc> colList = new ArrayList<ExprNodeDesc>(); |
| List<String> columnNames = new ArrayList<String>(); |
| Map<String, ExprNodeDesc> columnExprMap = new HashMap<String, ExprNodeDesc>(); |
| for (ColumnInfo col : columns) { |
| colList.add(new ExprNodeColumnDesc(col, true)); |
| columnNames.add(col.getInternalName()); |
| columnExprMap.put(col.getInternalName(), new ExprNodeColumnDesc(col, true)); |
| } |
| RowResolver outputRR = inputRR.duplicate(); |
| Operator output = putOpInsertMap(OperatorFactory.getAndMakeChild( |
| new SelectDesc(colList, columnNames, true), |
| outputRR.getRowSchema(), input), outputRR); |
| output.setColumnExprMap(columnExprMap); |
| return output; |
| } |
| |
| // Groups the clause names into lists so that any two clauses in the same list has the same |
| // group by and distinct keys and no clause appears in more than one list. Returns a list of the |
| // lists of clauses. |
| private List<List<String>> getCommonGroupByDestGroups(QB qb, |
| Map<String, Operator<? extends OperatorDesc>> inputs) throws SemanticException { |
| |
| QBParseInfo qbp = qb.getParseInfo(); |
| |
| Set<String> ks = new TreeSet<>(qbp.getClauseNames()); |
| List<List<String>> commonGroupByDestGroups = new ArrayList<>(); |
| |
| // If this is a trivial query block return |
| if (ks.isEmpty()) { |
| commonGroupByDestGroups.add(Collections.emptyList()); |
| return commonGroupByDestGroups; |
| } |
| if (ks.size() == 1) { |
| commonGroupByDestGroups.add(Collections.singletonList(ks.iterator().next())); |
| return commonGroupByDestGroups; |
| } |
| |
| List<Operator<? extends OperatorDesc>> inputOperators = |
| new ArrayList<Operator<? extends OperatorDesc>>(ks.size()); |
| // We will try to combine multiple clauses into a smaller number with compatible keys. |
| List<List<ExprNodeDesc>> newSprayKeyLists = new ArrayList<List<ExprNodeDesc>>(ks.size()); |
| List<List<ExprNodeDesc>> newDistinctKeyLists = new ArrayList<List<ExprNodeDesc>>(ks.size()); |
| |
| // Iterate over each clause |
| for (String dest : ks) { |
| Operator input = inputs.get(dest); |
| RowResolver inputRR = opParseCtx.get(input).getRowResolver(); |
| |
| // Determine the keys for the current clause. |
| List<ExprNodeDesc> currentDistinctKeys = getDistinctExprs(qbp, dest, inputRR); |
| List<ExprNodeDesc> currentSprayKeys = determineSprayKeys(qbp, dest, inputRR); |
| |
| // Loop through each of the lists of exprs, looking for a match. |
| boolean found = false; |
| for (int i = 0; i < newSprayKeyLists.size(); i++) { |
| if (!input.equals(inputOperators.get(i))) { |
| continue; |
| } |
| // We will try to merge this clause into one of the previously added ones. |
| List<ExprNodeDesc> targetSprayKeys = newSprayKeyLists.get(i); |
| List<ExprNodeDesc> targetDistinctKeys = newDistinctKeyLists.get(i); |
| if (currentDistinctKeys.isEmpty() != targetDistinctKeys.isEmpty()) { |
| // GBY without distinct keys is not prepared to process distinct key structured rows. |
| continue; |
| } |
| |
| if (currentDistinctKeys.isEmpty()) { |
| // current dest has no distinct keys. |
| List<ExprNodeDesc> combinedList = combineExprNodeLists(targetSprayKeys, targetDistinctKeys); |
| if (!matchExprLists(combinedList, currentSprayKeys)) { |
| continue; |
| } // else do the common code at the end. |
| } else { |
| if (targetDistinctKeys.isEmpty()) { |
| List<ExprNodeDesc> combinedList = combineExprNodeLists(currentSprayKeys, currentDistinctKeys); |
| if (!matchExprLists(combinedList, targetSprayKeys)) { |
| continue; |
| } else { |
| // we have found a match. insert this distinct clause to head. |
| newDistinctKeyLists.remove(i); |
| newSprayKeyLists.remove(i); |
| newDistinctKeyLists.add(i, currentDistinctKeys); |
| newSprayKeyLists.add(i, currentSprayKeys); |
| commonGroupByDestGroups.get(i).add(0, dest); |
| found = true; |
| break; |
| } |
| } else { |
| if (!matchExprLists(targetDistinctKeys, currentDistinctKeys)) { |
| continue; |
| } |
| |
| if (!matchExprLists(targetSprayKeys, currentSprayKeys)) { |
| continue; |
| } |
| // else do common code |
| } |
| } |
| |
| // common code |
| // A match was found, so add the clause to the corresponding list |
| commonGroupByDestGroups.get(i).add(dest); |
| found = true; |
| break; |
| } |
| |
| // No match was found, so create new entries |
| if (!found) { |
| inputOperators.add(input); |
| newSprayKeyLists.add(currentSprayKeys); |
| newDistinctKeyLists.add(currentDistinctKeys); |
| List<String> destGroup = new ArrayList<String>(); |
| destGroup.add(dest); |
| commonGroupByDestGroups.add(destGroup); |
| } |
| } |
| |
| return commonGroupByDestGroups; |
| } |
| |
| private List<ExprNodeDesc> determineSprayKeys(QBParseInfo qbp, String dest, |
| RowResolver inputRR) throws SemanticException { |
| List<ExprNodeDesc> sprayKeys = new ArrayList<ExprNodeDesc>(); |
| |
| // Add the group by expressions |
| List<ASTNode> grpByExprs = getGroupByForClause(qbp, dest); |
| for (ASTNode grpByExpr : grpByExprs) { |
| ExprNodeDesc exprDesc = genExprNodeDesc(grpByExpr, inputRR); |
| if (ExprNodeDescUtils.indexOf(exprDesc, sprayKeys) < 0) { |
| sprayKeys.add(exprDesc); |
| } |
| } |
| return sprayKeys; |
| } |
| |
| private List<ExprNodeDesc> combineExprNodeLists(List<ExprNodeDesc> list, List<ExprNodeDesc> list2) { |
| ArrayList<ExprNodeDesc> result = new ArrayList<>(list); |
| for (ExprNodeDesc elem : list2) { |
| if (!result.contains(elem)) { |
| result.add(elem); |
| } |
| } |
| return result; |
| } |
| |
| // Returns whether or not two lists contain the same elements independent of order |
| private boolean matchExprLists(List<ExprNodeDesc> list1, List<ExprNodeDesc> list2) { |
| |
| if (list1.size() != list2.size()) { |
| return false; |
| } |
| for (ExprNodeDesc exprNodeDesc : list1) { |
| if (ExprNodeDescUtils.indexOf(exprNodeDesc, list2) < 0) { |
| return false; |
| } |
| } |
| |
| return true; |
| } |
| |
| // Returns a list of the distinct exprs without duplicates for a given clause name |
| private List<ExprNodeDesc> getDistinctExprs(QBParseInfo qbp, String dest, RowResolver inputRR) |
| throws SemanticException { |
| |
| List<ASTNode> distinctAggExprs = qbp.getDistinctFuncExprsForClause(dest); |
| List<ExprNodeDesc> distinctExprs = new ArrayList<ExprNodeDesc>(); |
| |
| for (ASTNode distinctAggExpr : distinctAggExprs) { |
| // 0 is function name |
| for (int i = 1; i < distinctAggExpr.getChildCount(); i++) { |
| ASTNode parameter = (ASTNode) distinctAggExpr.getChild(i); |
| ExprNodeDesc expr = genExprNodeDesc(parameter, inputRR); |
| if (ExprNodeDescUtils.indexOf(expr, distinctExprs) < 0) { |
| distinctExprs.add(expr); |
| } |
| } |
| } |
| |
| return distinctExprs; |
| } |
| |
| @SuppressWarnings("nls") |
| private Operator genBodyPlan(QB qb, Operator input, Map<String, Operator> aliasToOpInfo) |
| throws SemanticException { |
| QBParseInfo qbp = qb.getParseInfo(); |
| |
| SortedSet<String> ks = new TreeSet<String>(qbp.getClauseNames()); |
| Map<String, Operator<? extends OperatorDesc>> inputs = createInputForDests(qb, input, ks); |
| |
| Operator curr = input; |
| |
| List<List<String>> commonGroupByDestGroups = null; |
| |
| // If we can put multiple group bys in a single reducer, determine suitable groups of |
| // expressions, otherwise treat all the expressions as a single group |
| if (conf.getBoolVar(HiveConf.ConfVars.HIVEMULTIGROUPBYSINGLEREDUCER)) { |
| try { |
| commonGroupByDestGroups = getCommonGroupByDestGroups(qb, inputs); |
| } catch (SemanticException e) { |
| LOG.error("Failed to group clauses by common spray keys.", e); |
| } |
| } |
| |
| if (commonGroupByDestGroups == null) { |
| commonGroupByDestGroups = Collections.singletonList(new ArrayList<>(ks)); |
| } |
| |
| if (!commonGroupByDestGroups.isEmpty()) { |
| |
| // Iterate over each group of subqueries with the same group by/distinct keys |
| for (List<String> commonGroupByDestGroup : commonGroupByDestGroups) { |
| if (commonGroupByDestGroup.isEmpty()) { |
| continue; |
| } |
| |
| String firstDest = commonGroupByDestGroup.get(0); |
| input = inputs.get(firstDest); |
| |
| // Constructs a standard group by plan if: |
| // There is no other subquery with the same group by/distinct keys or |
| // (There are no aggregations in a representative query for the group and |
| // There is no group by in that representative query) or |
| // The data is skewed or |
| // The conf variable used to control combining group bys into a single reducer is false |
| if (commonGroupByDestGroup.size() == 1 || |
| (qbp.getAggregationExprsForClause(firstDest).size() == 0 && |
| getGroupByForClause(qbp, firstDest).size() == 0) || |
| conf.getBoolVar(HiveConf.ConfVars.HIVEGROUPBYSKEW) || |
| !conf.getBoolVar(HiveConf.ConfVars.HIVEMULTIGROUPBYSINGLEREDUCER)) { |
| |
| // Go over all the destination tables |
| for (String dest : commonGroupByDestGroup) { |
| curr = inputs.get(dest); |
| |
| if (qbp.getWhrForClause(dest) != null) { |
| ASTNode whereExpr = qb.getParseInfo().getWhrForClause(dest); |
| curr = genFilterPlan((ASTNode) whereExpr.getChild(0), qb, curr, aliasToOpInfo, false, false); |
| } |
| // Preserve operator before the GBY - we'll use it to resolve '*' |
| Operator<?> gbySource = curr; |
| |
| if ((qbp.getAggregationExprsForClause(dest).size() != 0 |
| || getGroupByForClause(qbp, dest).size() > 0) |
| && (qbp.getSelForClause(dest).getToken().getType() != HiveParser.TOK_SELECTDI |
| || qbp.getWindowingExprsForClause(dest) == null)) { |
| // multiple distincts is not supported with skew in data |
| if (conf.getBoolVar(HiveConf.ConfVars.HIVEGROUPBYSKEW) && |
| qbp.getDistinctFuncExprsForClause(dest).size() > 1) { |
| throw new SemanticException(ErrorMsg.UNSUPPORTED_MULTIPLE_DISTINCTS. |
| getMsg()); |
| } |
| // insert a select operator here used by the ColumnPruner to reduce |
| // the data to shuffle |
| curr = genSelectAllDesc(curr); |
| // Check and transform group by *. This will only happen for select distinct *. |
| // Here the "genSelectPlan" is being leveraged. |
| // The main benefits are (1) remove virtual columns that should |
| // not be included in the group by; (2) add the fully qualified column names to unParseTranslator |
| // so that view is supported. The drawback is that an additional SEL op is added. If it is |
| // not necessary, it will be removed by NonBlockingOpDeDupProc Optimizer because it will match |
| // SEL%SEL% rule. |
| ASTNode selExprList = qbp.getSelForClause(dest); |
| if (selExprList.getToken().getType() == HiveParser.TOK_SELECTDI |
| && selExprList.getChildCount() == 1 && selExprList.getChild(0).getChildCount() == 1) { |
| ASTNode node = (ASTNode) selExprList.getChild(0).getChild(0); |
| if (node.getToken().getType() == HiveParser.TOK_ALLCOLREF) { |
| curr = genSelectPlan(dest, qb, curr, curr); |
| RowResolver rr = opParseCtx.get(curr).getRowResolver(); |
| qbp.setSelExprForClause(dest, genSelectDIAST(rr)); |
| } |
| } |
| if (conf.getBoolVar(HiveConf.ConfVars.HIVEMAPSIDEAGGREGATE)) { |
| if (!conf.getBoolVar(HiveConf.ConfVars.HIVEGROUPBYSKEW)) { |
| curr = genGroupByPlanMapAggrNoSkew(dest, qb, curr); |
| } else { |
| curr = genGroupByPlanMapAggr2MR(dest, qb, curr); |
| } |
| } else if (conf.getBoolVar(HiveConf.ConfVars.HIVEGROUPBYSKEW)) { |
| curr = genGroupByPlan2MR(dest, qb, curr); |
| } else { |
| curr = genGroupByPlan1MR(dest, qb, curr); |
| } |
| } |
| if (LOG.isDebugEnabled()) { |
| LOG.debug("RR before GB " + opParseCtx.get(gbySource).getRowResolver() |
| + " after GB " + opParseCtx.get(curr).getRowResolver()); |
| } |
| |
| curr = genPostGroupByBodyPlan(curr, dest, qb, aliasToOpInfo, gbySource); |
| } |
| } else { |
| curr = genGroupByPlan1ReduceMultiGBY(commonGroupByDestGroup, qb, input, aliasToOpInfo); |
| } |
| } |
| } |
| |
| LOG.debug("Created Body Plan for Query Block {}", qb.getId()); |
| |
| return curr; |
| } |
| |
| private Map<String, Operator<? extends OperatorDesc>> createInputForDests(QB qb, |
| Operator<? extends OperatorDesc> input, Set<String> dests) throws SemanticException { |
| Map<String, Operator<? extends OperatorDesc>> inputs = |
| new HashMap<String, Operator<? extends OperatorDesc>>(); |
| for (String dest : dests) { |
| inputs.put(dest, genLateralViewPlanForDest(dest, qb, input)); |
| } |
| return inputs; |
| } |
| |
| private Operator genPostGroupByBodyPlan(Operator curr, String dest, QB qb, |
| Map<String, Operator> aliasToOpInfo, Operator gbySource) |
| throws SemanticException { |
| |
| QBParseInfo qbp = qb.getParseInfo(); |
| |
| // Insert HAVING plan here |
| if (qbp.getHavingForClause(dest) != null) { |
| if (getGroupByForClause(qbp, dest).size() == 0) { |
| throw new SemanticException("HAVING specified without GROUP BY"); |
| } |
| curr = genHavingPlan(dest, qb, curr, aliasToOpInfo); |
| } |
| |
| if(queryProperties.hasWindowing() && qb.getWindowingSpec(dest) != null) { |
| curr = genWindowingPlan(qb, qb.getWindowingSpec(dest), curr); |
| // GBy for DISTINCT after windowing |
| if ((qbp.getAggregationExprsForClause(dest).size() != 0 |
| || getGroupByForClause(qbp, dest).size() > 0) |
| && qbp.getSelForClause(dest).getToken().getType() == HiveParser.TOK_SELECTDI |
| && qbp.getWindowingExprsForClause(dest) != null) { |
| if (conf.getBoolVar(HiveConf.ConfVars.HIVEMAPSIDEAGGREGATE)) { |
| if (!conf.getBoolVar(HiveConf.ConfVars.HIVEGROUPBYSKEW)) { |
| curr = genGroupByPlanMapAggrNoSkew(dest, qb, curr); |
| } else { |
| curr = genGroupByPlanMapAggr2MR(dest, qb, curr); |
| } |
| } else if (conf.getBoolVar(HiveConf.ConfVars.HIVEGROUPBYSKEW)) { |
| curr = genGroupByPlan2MR(dest, qb, curr); |
| } else { |
| curr = genGroupByPlan1MR(dest, qb, curr); |
| } |
| } |
| } |
| |
| curr = genSelectPlan(dest, qb, curr, gbySource); |
| |
| Integer limit = qbp.getDestLimit(dest); |
| Integer offset = (qbp.getDestLimitOffset(dest) == null) ? 0 : qbp.getDestLimitOffset(dest); |
| |
| // Expressions are not supported currently without a alias. |
| |
| // Reduce sink is needed if the query contains a cluster by, distribute by, |
| // order by or a sort by clause. |
| boolean genReduceSink = false; |
| boolean hasOrderBy = false; |
| |
| // Currently, expressions are not allowed in cluster by, distribute by, |
| // order by or a sort by clause. For each of the above clause types, check |
| // if the clause contains any expression. |
| if (qbp.getClusterByForClause(dest) != null) { |
| genReduceSink = true; |
| } |
| |
| if (qbp.getDistributeByForClause(dest) != null) { |
| genReduceSink = true; |
| } |
| |
| if (qbp.getOrderByForClause(dest) != null) { |
| genReduceSink = true; |
| hasOrderBy = true; |
| } |
| |
| if (qbp.getSortByForClause(dest) != null) { |
| genReduceSink = true; |
| } |
| |
| if (genReduceSink) { |
| int numReducers = -1; |
| |
| // Use only 1 reducer if order by is present |
| if (hasOrderBy) { |
| numReducers = 1; |
| } |
| |
| curr = genReduceSinkPlan(dest, qb, curr, numReducers, hasOrderBy); |
| } |
| |
| |
| if (qbp.getIsSubQ()) { |
| if (limit != null) { |
| // In case of order by, only 1 reducer is used, so no need of |
| // another shuffle |
| curr = genLimitMapRedPlan(dest, qb, curr, offset, |
| limit, limit != 0 && !hasOrderBy); |
| } |
| } else { |
| // exact limit can be taken care of by the fetch operator |
| if (limit != null) { |
| boolean extraMRStep = true; |
| |
| if (limit == 0 || hasOrderBy || |
| qb.getIsQuery() && qbp.getClusterByForClause(dest) == null && |
| qbp.getSortByForClause(dest) == null) { |
| extraMRStep = false; |
| } |
| |
| curr = genLimitMapRedPlan(dest, qb, curr, offset, |
| limit, extraMRStep); |
| qb.getParseInfo().setOuterQueryLimit(limit); |
| } |
| if (!queryState.getHiveOperation().equals(HiveOperation.CREATEVIEW)) { |
| curr = genFileSinkPlan(dest, qb, curr); |
| } |
| } |
| |
| return curr; |
| } |
| |
| @SuppressWarnings("nls") |
| private Operator genUnionPlan(String unionalias, String leftalias, |
| Operator leftOp, String rightalias, Operator rightOp) |
| throws SemanticException { |
| |
| // Currently, the unions are not merged - each union has only 2 parents. So, |
| // a n-way union will lead to (n-1) union operators. |
| // This can be easily merged into 1 union |
| RowResolver leftRR = opParseCtx.get(leftOp).getRowResolver(); |
| RowResolver rightRR = opParseCtx.get(rightOp).getRowResolver(); |
| Map<String, ColumnInfo> leftmap = leftRR.getFieldMap(leftalias); |
| Map<String, ColumnInfo> rightmap = rightRR.getFieldMap(rightalias); |
| // make sure the schemas of both sides are the same |
| ASTNode tabref = qb.getAliases().isEmpty() ? null : |
| qb.getParseInfo().getSrcForAlias(qb.getAliases().get(0)); |
| if (leftmap.size() != rightmap.size()) { |
| throw new SemanticException("Schema of both sides of union should match."); |
| } |
| |
| RowResolver unionoutRR = new RowResolver(); |
| |
| Iterator<Map.Entry<String, ColumnInfo>> lIter = leftmap.entrySet().iterator(); |
| Iterator<Map.Entry<String, ColumnInfo>> rIter = rightmap.entrySet().iterator(); |
| while (lIter.hasNext()) { |
| Map.Entry<String, ColumnInfo> lEntry = lIter.next(); |
| Map.Entry<String, ColumnInfo> rEntry = rIter.next(); |
| ColumnInfo lInfo = lEntry.getValue(); |
| ColumnInfo rInfo = rEntry.getValue(); |
| |
| String field = lEntry.getKey(); // use left alias (~mysql, postgresql) |
| // try widening conversion, otherwise fail union |
| TypeInfo commonTypeInfo = FunctionRegistry.getCommonClassForUnionAll(lInfo.getType(), |
| rInfo.getType()); |
| if (commonTypeInfo == null) { |
| throw new SemanticException(generateErrorMessage(tabref, |
| "Schema of both sides of union should match: Column " + field |
| + " is of type " + lInfo.getType().getTypeName() |
| + " on first table and type " + rInfo.getType().getTypeName() |
| + " on second table")); |
| } |
| ColumnInfo unionColInfo = new ColumnInfo(lInfo); |
| unionColInfo.setType(commonTypeInfo); |
| unionoutRR.put(unionalias, field, unionColInfo); |
| } |
| |
| // For TEZ we rely on the generated SelectOperator to do the type casting. |
| // Consider: |
| // SEL_1 (int) SEL_2 (int) SEL_3 (double) |
| // If we first merge SEL_1 and SEL_2 into a UNION_1, and then merge UNION_1 |
| // with SEL_3 to get UNION_2, then no SelectOperator will be inserted. Hence error |
| // will happen afterwards. The solution here is to insert one after UNION_1, which |
| // cast int to double. |
| boolean isMR = HiveConf.getVar(conf, |
| HiveConf.ConfVars.HIVE_EXECUTION_ENGINE).equals("mr"); |
| |
| if (!isMR || !(leftOp instanceof UnionOperator)) { |
| leftOp = genInputSelectForUnion(leftOp, leftmap, leftalias, unionoutRR, unionalias); |
| } |
| |
| if (!isMR || !(rightOp instanceof UnionOperator)) { |
| rightOp = genInputSelectForUnion(rightOp, rightmap, rightalias, unionoutRR, unionalias); |
| } |
| |
| // If one of the children (left or right) is: |
| // (i) a union, or |
| // (ii) an identity projection followed by a union, |
| // merge with it |
| // else create a new one |
| if (leftOp instanceof UnionOperator || |
| (leftOp instanceof SelectOperator && |
| leftOp.getParentOperators() != null && |
| !leftOp.getParentOperators().isEmpty() && |
| leftOp.getParentOperators().get(0) instanceof UnionOperator && |
| ((SelectOperator)leftOp).isIdentitySelect()) ) { |
| |
| if(!(leftOp instanceof UnionOperator)) { |
| Operator oldChild = leftOp; |
| leftOp = (Operator) leftOp.getParentOperators().get(0); |
| leftOp.removeChildAndAdoptItsChildren(oldChild); |
| } |
| |
| // make left a child of right |
| List<Operator<? extends OperatorDesc>> child = |
| new ArrayList<Operator<? extends OperatorDesc>>(); |
| child.add(leftOp); |
| rightOp.setChildOperators(child); |
| |
| List<Operator<? extends OperatorDesc>> parent = leftOp |
| .getParentOperators(); |
| parent.add(rightOp); |
| |
| UnionDesc uDesc = ((UnionOperator) leftOp).getConf(); |
| uDesc.setNumInputs(uDesc.getNumInputs() + 1); |
| return putOpInsertMap(leftOp, unionoutRR); |
| } |
| |
| if (rightOp instanceof UnionOperator || |
| (rightOp instanceof SelectOperator && |
| rightOp.getParentOperators() != null && |
| !rightOp.getParentOperators().isEmpty() && |
| rightOp.getParentOperators().get(0) instanceof UnionOperator && |
| ((SelectOperator)rightOp).isIdentitySelect()) ) { |
| |
| if(!(rightOp instanceof UnionOperator)) { |
| Operator oldChild = rightOp; |
| rightOp = (Operator) rightOp.getParentOperators().get(0); |
| rightOp.removeChildAndAdoptItsChildren(oldChild); |
| } |
| |
| // make right a child of left |
| List<Operator<? extends OperatorDesc>> child = |
| new ArrayList<Operator<? extends OperatorDesc>>(); |
| child.add(rightOp); |
| leftOp.setChildOperators(child); |
| |
| List<Operator<? extends OperatorDesc>> parent = rightOp |
| .getParentOperators(); |
| parent.add(leftOp); |
| UnionDesc uDesc = ((UnionOperator) rightOp).getConf(); |
| uDesc.setNumInputs(uDesc.getNumInputs() + 1); |
| |
| return putOpInsertMap(rightOp, unionoutRR); |
| } |
| |
| // Create a new union operator |
| Operator<? extends OperatorDesc> unionforward = OperatorFactory |
| .getAndMakeChild(getOpContext(), new UnionDesc(), new RowSchema(unionoutRR |
| .getColumnInfos())); |
| |
| // set union operator as child of each of leftOp and rightOp |
| rightOp.setChildOperators(Lists.newArrayList(unionforward)); |
| leftOp.setChildOperators(Lists.newArrayList(unionforward)); |
| |
| unionforward.setParentOperators(Lists.newArrayList(leftOp, rightOp)); |
| |
| // create operator info list to return |
| return putOpInsertMap(unionforward, unionoutRR); |
| } |
| |
| /** |
| * Generates a select operator which can go between the original input operator and the union |
| * operator. This select casts columns to match the type of the associated column in the union, |
| * other columns pass through unchanged. The new operator's only parent is the original input |
| * operator to the union, and it's only child is the union. If the input does not need to be |
| * cast, the original operator is returned, and no new select operator is added. |
| * |
| * @param origInputOp |
| * The original input operator to the union. |
| * @param origInputFieldMap |
| * A map from field name to ColumnInfo for the original input operator. |
| * @param origInputAlias |
| * The alias associated with the original input operator. |
| * @param unionoutRR |
| * The union's output row resolver. |
| * @param unionalias |
| * The alias of the union. |
| * @return |
| * @throws SemanticException |
| */ |
| private Operator<? extends OperatorDesc> genInputSelectForUnion( |
| Operator<? extends OperatorDesc> origInputOp, Map<String, ColumnInfo> origInputFieldMap, |
| String origInputAlias, RowResolver unionoutRR, String unionalias) |
| throws SemanticException { |
| |
| Map<String, ColumnInfo> fieldMap = unionoutRR.getFieldMap(unionalias); |
| |
| Iterator<ColumnInfo> oIter = origInputFieldMap.values().iterator(); |
| Iterator<ColumnInfo> uIter = fieldMap.values().iterator(); |
| |
| List<ExprNodeDesc> columns = new ArrayList<>(); |
| boolean needsCast = false; |
| while (oIter.hasNext()) { |
| ColumnInfo oInfo = oIter.next(); |
| ColumnInfo uInfo = uIter.next(); |
| ExprNodeDesc column = new ExprNodeColumnDesc(oInfo.getType(), oInfo.getInternalName(), |
| oInfo.getTabAlias(), oInfo.getIsVirtualCol(), oInfo.isSkewedCol()); |
| if (!oInfo.getType().equals(uInfo.getType())) { |
| needsCast = true; |
| column = ExprNodeTypeCheck.getExprNodeDefaultExprProcessor() |
| .createConversionCast(column, (PrimitiveTypeInfo)uInfo.getType()); |
| } |
| columns.add(column); |
| } |
| |
| // If none of the columns need to be cast there's no need for an additional select operator |
| if (!needsCast) { |
| return origInputOp; |
| } |
| |
| RowResolver rowResolver = new RowResolver(); |
| Map<String, ExprNodeDesc> columnExprMap = new HashMap<String, ExprNodeDesc>(); |
| |
| List<String> colName = new ArrayList<String>(); |
| for (int i = 0; i < columns.size(); i++) { |
| String name = getColumnInternalName(i); |
| ColumnInfo col = new ColumnInfo(name, columns.get(i) |
| .getTypeInfo(), "", false); |
| rowResolver.put(origInputAlias, name, col); |
| colName.add(name); |
| columnExprMap.put(name, columns.get(i)); |
| } |
| |
| Operator<SelectDesc> newInputOp = OperatorFactory.getAndMakeChild( |
| new SelectDesc(columns, colName), new RowSchema(rowResolver.getColumnInfos()), |
| columnExprMap, origInputOp); |
| return putOpInsertMap(newInputOp, rowResolver); |
| } |
| |
| /** |
| * Generates the sampling predicate from the TABLESAMPLE clause information. |
| * This function uses the bucket column list to decide the expression inputs |
| * to the predicate hash function in case useBucketCols is set to true, |
| * otherwise the expression list stored in the TableSample is used. The bucket |
| * columns of the table are used to generate this predicate in case no |
| * expressions are provided on the TABLESAMPLE clause and the table has |
| * clustering columns defined in it's metadata. The predicate created has the |
| * following structure: |
| * |
| * ((hash(expressions) & Integer.MAX_VALUE) % denominator) == numerator |
| * |
| * @param ts |
| * TABLESAMPLE clause information |
| * @param bucketCols |
| * The clustering columns of the table |
| * @param useBucketCols |
| * Flag to indicate whether the bucketCols should be used as input to |
| * the hash function |
| * @param alias |
| * The alias used for the table in the row resolver |
| * @param rwsch |
| * The row resolver used to resolve column references |
| * @param planExpr |
| * The plan tree for the expression. If the user specified this, the |
| * parse expressions are not used |
| * @return exprNodeDesc |
| * @exception SemanticException |
| */ |
| private ExprNodeDesc genSamplePredicate(TableSample ts, |
| List<String> bucketCols, boolean useBucketCols, String alias, |
| RowResolver rwsch, ExprNodeDesc planExpr, int bucketingVersion) |
| throws SemanticException { |
| |
| ExprNodeDesc numeratorExpr = new ExprNodeConstantDesc( |
| TypeInfoFactory.intTypeInfo, Integer.valueOf(ts.getNumerator() - 1)); |
| |
| ExprNodeDesc denominatorExpr = new ExprNodeConstantDesc( |
| TypeInfoFactory.intTypeInfo, Integer.valueOf(ts.getDenominator())); |
| |
| ExprNodeDesc intMaxExpr = new ExprNodeConstantDesc( |
| TypeInfoFactory.intTypeInfo, Integer.valueOf(Integer.MAX_VALUE)); |
| |
| List<ExprNodeDesc> args = new ArrayList<ExprNodeDesc>(); |
| if (planExpr != null) { |
| args.add(planExpr); |
| } else if (useBucketCols) { |
| for (String col : bucketCols) { |
| ColumnInfo ci = rwsch.get(alias, col); |
| // TODO: change type to the one in the table schema |
| args.add(new ExprNodeColumnDesc(ci)); |
| } |
| } else { |
| for (ASTNode expr : ts.getExprs()) { |
| args.add(genExprNodeDesc(expr, rwsch)); |
| } |
| } |
| |
| ExprNodeDesc equalsExpr = null; |
| { |
| ExprNodeDesc hashfnExpr = new ExprNodeGenericFuncDesc( |
| TypeInfoFactory.intTypeInfo, |
| bucketingVersion == 2 ? new GenericUDFMurmurHash() : new GenericUDFHash(), args); |
| LOG.info("hashfnExpr = " + hashfnExpr); |
| ExprNodeDesc andExpr = ExprNodeTypeCheck.getExprNodeDefaultExprProcessor() |
| .getFuncExprNodeDesc("&", hashfnExpr, intMaxExpr); |
| LOG.info("andExpr = " + andExpr); |
| ExprNodeDesc modExpr = ExprNodeTypeCheck.getExprNodeDefaultExprProcessor() |
| .getFuncExprNodeDesc("%", andExpr, denominatorExpr); |
| LOG.info("modExpr = " + modExpr); |
| LOG.info("numeratorExpr = " + numeratorExpr); |
| equalsExpr = ExprNodeTypeCheck.getExprNodeDefaultExprProcessor() |
| .getFuncExprNodeDesc("==", modExpr, numeratorExpr); |
| LOG.info("equalsExpr = " + equalsExpr); |
| } |
| return equalsExpr; |
| } |
| |
| protected String getAliasId(String alias, QB qb) { |
| return (qb.getId() == null ? alias : qb.getId() + ":" + alias).toLowerCase(); |
| } |
| |
| @SuppressWarnings("nls") |
| private Operator genTablePlan(String alias, QB qb) throws SemanticException { |
| |
| String alias_id = getAliasId(alias, qb); |
| Table tab = qb.getMetaData().getSrcForAlias(alias); |
| RowResolver rwsch; |
| |
| // is the table already present |
| TableScanOperator top = topOps.get(alias_id); |
| |
| // Obtain table props in query |
| Map<String, String> properties = qb.getTabPropsForAlias(alias); |
| |
| if (top == null) { |
| // Determine row schema for TSOP. |
| // Include column names from SerDe, the partition and virtual columns. |
| rwsch = new RowResolver(); |
| try { |
| // Including parameters passed in the query |
| if (properties != null) { |
| for (Entry<String, String> prop : properties.entrySet()) { |
| if (tab.getSerdeParam(prop.getKey()) != null) { |
| LOG.warn("SerDe property in input query overrides stored SerDe property"); |
| } |
| tab.setSerdeParam(prop.getKey(), prop.getValue()); |
| } |
| } |
| // Obtain inspector for schema |
| final Deserializer deserializer = tab.getDeserializer(); |
| StructObjectInspector rowObjectInspector = (StructObjectInspector) deserializer.getObjectInspector(); |
| |
| deserializer.handleJobLevelConfiguration(conf); |
| List<? extends StructField> fields = rowObjectInspector |
| .getAllStructFieldRefs(); |
| for (int i = 0; i < fields.size(); i++) { |
| /** |
| * if the column is a skewed column, use ColumnInfo accordingly |
| */ |
| ColumnInfo colInfo = new ColumnInfo(fields.get(i).getFieldName(), |
| TypeInfoUtils.getTypeInfoFromObjectInspector(fields.get(i) |
| .getFieldObjectInspector()), alias, false); |
| colInfo.setSkewedCol(isSkewedCol(alias, qb, fields.get(i).getFieldName())); |
| rwsch.put(alias, fields.get(i).getFieldName(), colInfo); |
| } |
| } catch (SerDeException e) { |
| throw new RuntimeException(e); |
| } |
| // Hack!! - refactor once the metadata APIs with types are ready |
| // Finally add the partitioning columns |
| for (FieldSchema part_col : tab.getPartCols()) { |
| LOG.trace("Adding partition col: " + part_col); |
| rwsch.put(alias, part_col.getName(), new ColumnInfo(part_col.getName(), |
| TypeInfoFactory.getPrimitiveTypeInfo(part_col.getType()), alias, true)); |
| } |
| |
| // put virtual columns into RowResolver. |
| List<VirtualColumn> vcList = new ArrayList<>(); |
| if (!tab.isNonNative()) { |
| vcList.addAll(VirtualColumn.getRegistry(conf)); |
| } |
| if (tab.isNonNative() && AcidUtils.isNonNativeAcidTable(tab, false)) { |
| vcList.addAll(tab.getStorageHandler().acidVirtualColumns()); |
| } |
| |
| vcList.forEach(vc -> rwsch.put(alias, vc.getName().toLowerCase(), new ColumnInfo(vc.getName(), |
| vc.getTypeInfo(), alias, true, vc.getIsHidden() |
| ))); |
| |
| // Create the root of the operator tree |
| TableScanDesc tsDesc = new TableScanDesc(alias, vcList, tab); |
| setupStats(tsDesc, qb.getParseInfo(), tab, alias, rwsch); |
| |
| Map<String, String> tblProperties = tab.getParameters(); |
| Map<String, String> tblPropertiesFromQuery = qb.getTabPropsForAlias(alias); |
| |
| AcidUtils.AcidOperationalProperties acidOperationalProperties = tsDesc.getAcidOperationalProperties(); |
| if (acidOperationalProperties != null) { |
| tsDesc.getAcidOperationalProperties().setInsertOnlyFetchBucketId( |
| (tblProperties != null && Boolean.parseBoolean(tblProperties.get(Constants.INSERT_ONLY_FETCH_BUCKET_ID))) || |
| (tblPropertiesFromQuery != null && |
| Boolean.parseBoolean(tblPropertiesFromQuery.get(Constants.INSERT_ONLY_FETCH_BUCKET_ID)))); |
| |
| tsDesc.getAcidOperationalProperties().setFetchDeletedRows( |
| (tblProperties != null && Boolean.parseBoolean(tblProperties.get(Constants.ACID_FETCH_DELETED_ROWS))) || |
| (tblPropertiesFromQuery != null && |
| Boolean.parseBoolean(tblPropertiesFromQuery.get(Constants.ACID_FETCH_DELETED_ROWS)))); |
| } |
| |
| SplitSample sample = nameToSplitSample.get(alias_id); |
| if (sample != null && sample.getRowCount() != null) { |
| tsDesc.setRowLimit(sample.getRowCount()); |
| nameToSplitSample.remove(alias_id); |
| } |
| |
| top = (TableScanOperator) putOpInsertMap(OperatorFactory.get(getOpContext(), tsDesc, |
| new RowSchema(rwsch.getColumnInfos())), rwsch); |
| |
| // Set insiderView so that we can skip the column authorization for this. |
| top.setInsideView(qb.isInsideView() || qb.getAliasInsideView().contains(alias.toLowerCase())); |
| |
| // Add this to the list of top operators - we always start from a table |
| // scan |
| topOps.put(alias_id, top); |
| |
| if (properties != null) { |
| tsDesc.setOpProps(properties); |
| } |
| } else { |
| rwsch = opParseCtx.get(top).getRowResolver(); |
| top.setChildOperators(null); |
| } |
| |
| // check if this table is sampled and needs more than input pruning |
| Operator<? extends OperatorDesc> op = top; |
| TableSample ts = qb.getParseInfo().getTabSample(alias); |
| if (ts != null) { |
| TableScanOperator tableScanOp = top; |
| tableScanOp.getConf().setTableSample(ts); |
| int num = ts.getNumerator(); |
| int den = ts.getDenominator(); |
| List<ASTNode> sampleExprs = ts.getExprs(); |
| |
| // TODO: Do the type checking of the expressions |
| List<String> tabBucketCols = tab.getBucketCols(); |
| int numBuckets = tab.getNumBuckets(); |
| |
| // If there are no sample cols and no bucket cols then throw an error |
| if (tabBucketCols.size() == 0 && sampleExprs.size() == 0) { |
| throw new SemanticException(ErrorMsg.NON_BUCKETED_TABLE.getMsg() + " " |
| + tab.getTableName()); |
| } |
| |
| if (num > den) { |
| throw new SemanticException( |
| ErrorMsg.BUCKETED_NUMERATOR_BIGGER_DENOMINATOR.getMsg() + " " |
| + tab.getTableName()); |
| } |
| |
| // check if a predicate is needed |
| // predicate is needed if either input pruning is not enough |
| // or if input pruning is not possible |
| |
| // check if the sample columns are the same as the table bucket columns |
| boolean colsEqual = true; |
| if ((sampleExprs.size() != tabBucketCols.size()) |
| && (sampleExprs.size() != 0)) { |
| colsEqual = false; |
| } |
| |
| for (int i = 0; i < sampleExprs.size() && colsEqual; i++) { |
| boolean colFound = false; |
| for (int j = 0; j < tabBucketCols.size() && !colFound; j++) { |
| if (sampleExprs.get(i).getToken().getType() != HiveParser.TOK_TABLE_OR_COL) { |
| break; |
| } |
| |
| if ((sampleExprs.get(i).getChild(0)).getText().equalsIgnoreCase(tabBucketCols.get(j))) { |
| colFound = true; |
| } |
| } |
| colsEqual = colFound; |
| } |
| |
| // Check if input can be pruned |
| ts.setInputPruning((sampleExprs.size() == 0 || colsEqual)); |
| |
| // check if input pruning is enough |
| if ((sampleExprs.size() == 0 || colsEqual) |
| && (num == den || (den % numBuckets == 0 || numBuckets % den == 0))) { |
| |
| // input pruning is enough; add the filter for the optimizer to use it |
| // later |
| LOG.info("No need for sample filter"); |
| ExprNodeDesc samplePredicate = genSamplePredicate(ts, tabBucketCols, |
| colsEqual, alias, rwsch, null, |
| tab.getBucketingVersion()); |
| FilterDesc filterDesc = new FilterDesc( |
| samplePredicate, true, new SampleDesc(ts.getNumerator(), |
| ts.getDenominator(), tabBucketCols, true)); |
| filterDesc.setGenerated(true); |
| op = OperatorFactory.getAndMakeChild(filterDesc, |
| new RowSchema(rwsch.getColumnInfos()), top); |
| } else { |
| // need to add filter |
| // create tableOp to be filterDesc and set as child to 'top' |
| LOG.info("Need sample filter"); |
| ExprNodeDesc samplePredicate = genSamplePredicate(ts, tabBucketCols, |
| colsEqual, alias, rwsch, null, |
| tab.getBucketingVersion()); |
| FilterDesc filterDesc = new FilterDesc(samplePredicate, true); |
| filterDesc.setGenerated(true); |
| op = OperatorFactory.getAndMakeChild(filterDesc, |
| new RowSchema(rwsch.getColumnInfos()), top); |
| } |
| } else { |
| boolean testMode = conf.getBoolVar(ConfVars.HIVETESTMODE); |
| if (testMode) { |
| String tabName = tab.getTableName(); |
| |
| // has the user explicitly asked not to sample this table |
| String unSampleTblList = conf |
| .getVar(ConfVars.HIVETESTMODENOSAMPLE); |
| String[] unSampleTbls = unSampleTblList.split(","); |
| boolean unsample = false; |
| for (String unSampleTbl : unSampleTbls) { |
| if (tabName.equalsIgnoreCase(unSampleTbl)) { |
| unsample = true; |
| } |
| } |
| |
| if (!unsample) { |
| int numBuckets = tab.getNumBuckets(); |
| |
| // If the input table is bucketed, choose the first bucket |
| if (numBuckets > 0) { |
| TableSample tsSample = new TableSample(1, numBuckets); |
| tsSample.setInputPruning(true); |
| qb.getParseInfo().setTabSample(alias, tsSample); |
| ExprNodeDesc samplePred = genSamplePredicate(tsSample, tab |
| .getBucketCols(), true, alias, rwsch, null, |
| tab.getBucketingVersion()); |
| FilterDesc filterDesc = new FilterDesc(samplePred, true, |
| new SampleDesc(tsSample.getNumerator(), tsSample |
| .getDenominator(), tab.getBucketCols(), true)); |
| filterDesc.setGenerated(true); |
| op = OperatorFactory.getAndMakeChild(filterDesc, |
| new RowSchema(rwsch.getColumnInfos()), top); |
| LOG.info("No need for sample filter"); |
| } else { |
| // The table is not bucketed, add a dummy filter :: rand() |
| int freq = conf.getIntVar(ConfVars.HIVETESTMODESAMPLEFREQ); |
| TableSample tsSample = new TableSample(1, freq); |
| tsSample.setInputPruning(false); |
| qb.getParseInfo().setTabSample(alias, tsSample); |
| LOG.info("Need sample filter"); |
| ExprNodeDesc randFunc = ExprNodeTypeCheck.getExprNodeDefaultExprProcessor() |
| .getFuncExprNodeDesc("rand", |
| new ExprNodeConstantDesc(Integer.valueOf(460476415))); |
| ExprNodeDesc samplePred = genSamplePredicate(tsSample, null, false, |
| alias, rwsch, randFunc, tab.getBucketingVersion()); |
| FilterDesc filterDesc = new FilterDesc(samplePred, true); |
| filterDesc.setGenerated(true); |
| op = OperatorFactory.getAndMakeChild(filterDesc, |
| new RowSchema(rwsch.getColumnInfos()), top); |
| } |
| } |
| } |
| } |
| |
| Operator output = putOpInsertMap(op, rwsch); |
| |
| LOG.debug("Created Table Plan for {} {}", alias, op); |
| |
| return output; |
| } |
| |
| boolean isSkewedCol(String alias, QB qb, String colName) { |
| return qb.getSkewedColumnNames(alias).stream() |
| .anyMatch(skewedCol -> skewedCol.equalsIgnoreCase(colName)); |
| } |
| |
| private void setupStats(TableScanDesc tsDesc, QBParseInfo qbp, Table tab, String alias, |
| RowResolver rwsch) |
| throws SemanticException { |
| |
| // if it is not analyze command and not column stats, then do not gatherstats |
| if (!qbp.isAnalyzeCommand() && qbp.getAnalyzeRewrite() == null) { |
| tsDesc.setGatherStats(false); |
| return; |
| } |
| |
| if (HiveConf.getVar(conf, HIVESTATSDBCLASS).equalsIgnoreCase(StatDB.fs.name())) { |
| String statsTmpLoc = ctx.getTempDirForInterimJobPath(tab.getPath()).toString(); |
| LOG.debug("Set stats collection dir : " + statsTmpLoc); |
| tsDesc.setTmpStatsDir(statsTmpLoc); |
| } |
| tsDesc.setGatherStats(true); |
| tsDesc.setStatsReliable(conf.getBoolVar(HiveConf.ConfVars.HIVE_STATS_RELIABLE)); |
| |
| // append additional virtual columns for storing statistics |
| Iterator<VirtualColumn> vcs = VirtualColumn.getStatsRegistry(conf).iterator(); |
| List<VirtualColumn> vcList = new ArrayList<VirtualColumn>(); |
| while (vcs.hasNext()) { |
| VirtualColumn vc = vcs.next(); |
| rwsch.put(alias, vc.getName(), new ColumnInfo(vc.getName(), |
| vc.getTypeInfo(), alias, true, vc.getIsHidden())); |
| vcList.add(vc); |
| } |
| tsDesc.addVirtualCols(vcList); |
| |
| String tblName = tab.getTableName(); |
| // Theoretically the key prefix could be any unique string shared |
| // between TableScanOperator (when publishing) and StatsTask (when aggregating). |
| // Here we use |
| // db_name.table_name + partitionSec |
| // as the prefix for easy of read during explain and debugging. |
| // Currently, partition spec can only be static partition. |
| String k = FileUtils.escapePathName(tblName).toLowerCase() + Path.SEPARATOR; |
| tsDesc.setStatsAggPrefix(FileUtils.escapePathName(tab.getDbName()).toLowerCase() + "." + k); |
| |
| // set up WriteEntity for replication and txn stats |
| WriteEntity we = new WriteEntity(tab, WriteEntity.WriteType.DDL_SHARED); |
| we.setTxnAnalyze(true); |
| outputs.add(we); |
| if (AcidUtils.isTransactionalTable(tab)) { |
| if (acidAnalyzeTable != null) { |
| throw new IllegalStateException("Multiple ACID tables in analyze: " |
| + we + ", " + acidAnalyzeTable); |
| } |
| acidAnalyzeTable = we; |
| } |
| |
| // add WriteEntity for each matching partition |
| if (tab.isPartitioned()) { |
| List<String> cols = new ArrayList<String>(); |
| if (qbp.getAnalyzeRewrite() != null) { |
| List<FieldSchema> partitionCols = tab.getPartCols(); |
| for (FieldSchema fs : partitionCols) { |
| cols.add(fs.getName()); |
| } |
| tsDesc.setPartColumns(cols); |
| return; |
| } |
| TableSpec tblSpec = qbp.getTableSpec(alias); |
| Map<String, String> partSpec = tblSpec.getPartSpec(); |
| if (partSpec != null) { |
| cols.addAll(partSpec.keySet()); |
| tsDesc.setPartColumns(cols); |
| } else { |
| throw new SemanticException(ErrorMsg.NEED_PARTITION_SPECIFICATION.getMsg()); |
| } |
| List<Partition> partitions = qbp.getTableSpec().partitions; |
| if (partitions != null) { |
| for (Partition partn : partitions) { |
| WriteEntity pwe = new WriteEntity(partn, WriteEntity.WriteType.DDL_NO_LOCK); |
| pwe.setTxnAnalyze(true); |
| outputs.add(pwe); |
| } |
| } |
| } |
| } |
| |
| private Operator genPlan(QB parent, QBExpr qbexpr) throws SemanticException { |
| if (qbexpr.getOpcode() == QBExpr.Opcode.EXCEPT || qbexpr.getOpcode() == QBExpr.Opcode.EXCEPTALL |
| || qbexpr.getOpcode() == QBExpr.Opcode.INTERSECT || qbexpr.getOpcode() == QBExpr.Opcode.INTERSECTALL) { |
| throw new SemanticException( |
| "EXCEPT and INTERSECT operations are only supported with Cost Based Optimizations enabled. Please set 'hive.cbo.enable' to true!"); |
| } |
| if (qbexpr.getOpcode() == QBExpr.Opcode.NULLOP) { |
| boolean skipAmbiguityCheck = viewSelect == null && parent.isTopLevelSelectStarQuery(); |
| return genPlan(qbexpr.getQB(), skipAmbiguityCheck); |
| } |
| if (qbexpr.getOpcode() == QBExpr.Opcode.UNION) { |
| Operator qbexpr1Ops = genPlan(parent, qbexpr.getQBExpr1()); |
| Operator qbexpr2Ops = genPlan(parent, qbexpr.getQBExpr2()); |
| |
| return genUnionPlan(qbexpr.getAlias(), qbexpr.getQBExpr1().getAlias(), |
| qbexpr1Ops, qbexpr.getQBExpr2().getAlias(), qbexpr2Ops); |
| } |
| return null; |
| } |
| |
| Operator genPlan(QB qb) throws SemanticException { |
| return genPlan(qb, false); |
| } |
| |
| @SuppressWarnings("nls") |
| private Operator genPlan(QB qb, boolean skipAmbiguityCheck) |
| throws SemanticException { |
| |
| if (!ctx.isCboSucceeded() && qb.getParseInfo().hasQualifyClause()) { |
| throw new SemanticException(ErrorMsg.CBO_IS_REQUIRED.getErrorCodedMsg("Qualify clause")); |
| } |
| |
| // First generate all the opInfos for the elements in the from clause |
| // Must be deterministic order map - see HIVE-8707 |
| Map<String, Operator> aliasToOpInfo = new LinkedHashMap<String, Operator>(); |
| |
| // Recurse over the subqueries to fill the subquery part of the plan |
| for (String alias : qb.getSubqAliases()) { |
| QBExpr qbexpr = qb.getSubqForAlias(alias); |
| Operator<?> operator = genPlan(qb, qbexpr); |
| aliasToOpInfo.put(alias, operator); |
| if (qb.getViewToTabSchema().containsKey(alias)) { |
| // we set viewProjectToTableSchema so that we can leverage ColumnPruner. |
| if (operator instanceof LimitOperator) { |
| // If create view has LIMIT operator, this can happen |
| // Fetch parent operator |
| operator = operator.getParentOperators().get(0); |
| } |
| if (operator instanceof SelectOperator) { |
| if (this.viewProjectToTableSchema == null) { |
| this.viewProjectToTableSchema = new LinkedHashMap<>(); |
| } |
| viewProjectToTableSchema.put((SelectOperator) operator, qb.getViewToTabSchema() |
| .get(alias)); |
| } else { |
| throw new SemanticException("View " + alias + " is corresponding to " |
| + operator.getType().name() + ", rather than a SelectOperator."); |
| } |
| } |
| } |
| |
| // Recurse over all the source tables |
| for (String alias : qb.getTabAliases()) { |
| if(alias.equals(DUMMY_TABLE)) { |
| continue; |
| } |
| Operator op = genTablePlan(alias, qb); |
| aliasToOpInfo.put(alias, op); |
| } |
| |
| if (aliasToOpInfo.isEmpty()) { |
| qb.getMetaData().setSrcForAlias(DUMMY_TABLE, getDummyTable()); |
| TableScanOperator op = (TableScanOperator) genTablePlan(DUMMY_TABLE, qb); |
| op.getConf().setRowLimit(1); |
| qb.addAlias(DUMMY_TABLE); |
| qb.setTabAlias(DUMMY_TABLE, DUMMY_TABLE); |
| aliasToOpInfo.put(DUMMY_TABLE, op); |
| } |
| |
| Operator srcOpInfo = null; |
| Operator lastPTFOp = null; |
| |
| if(queryProperties.hasPTF()){ |
| //After processing subqueries and source tables, process |
| // partitioned table functions |
| |
| Map<ASTNode, PTFInvocationSpec> ptfNodeToSpec = qb.getPTFNodeToSpec(); |
| if ( ptfNodeToSpec != null ) { |
| for(Entry<ASTNode, PTFInvocationSpec> entry : ptfNodeToSpec.entrySet()) { |
| ASTNode ast = entry.getKey(); |
| PTFInvocationSpec spec = entry.getValue(); |
| String inputAlias = spec.getQueryInputName(); |
| Operator inOp = aliasToOpInfo.get(inputAlias); |
| if ( inOp == null ) { |
| throw new SemanticException(generateErrorMessage(ast, |
| "Cannot resolve input Operator for PTF invocation")); |
| } |
| lastPTFOp = genPTFPlan(spec, inOp); |
| String ptfAlias = spec.getFunction().getAlias(); |
| if ( ptfAlias != null ) { |
| aliasToOpInfo.put(ptfAlias, lastPTFOp); |
| } |
| } |
| } |
| |
| } |
| |
| // For all the source tables that have a lateral view, attach the |
| // appropriate operators to the TS |
| genLateralViewPlans(aliasToOpInfo, qb); |
| |
| |
| // process join |
| if (qb.getParseInfo().getJoinExpr() != null) { |
| ASTNode joinExpr = qb.getParseInfo().getJoinExpr(); |
| |
| if (joinExpr.getToken().getType() == HiveParser.TOK_UNIQUEJOIN) { |
| QBJoinTree joinTree = genUniqueJoinTree(qb, joinExpr, aliasToOpInfo); |
| qb.setQbJoinTree(joinTree); |
| } else { |
| QBJoinTree joinTree = genJoinTree(qb, joinExpr, aliasToOpInfo); |
| qb.setQbJoinTree(joinTree); |
| /* |
| * if there is only one destination in Query try to push where predicates |
| * as Join conditions |
| */ |
| Set<String> dests = qb.getParseInfo().getClauseNames(); |
| if ( dests.size() == 1 && joinTree.getNoOuterJoin()) { |
| String dest = dests.iterator().next(); |
| ASTNode whereClause = qb.getParseInfo().getWhrForClause(dest); |
| if ( whereClause != null ) { |
| extractJoinCondsFromWhereClause(joinTree, |
| (ASTNode) whereClause.getChild(0), |
| aliasToOpInfo ); |
| } |
| } |
| |
| if (!disableJoinMerge) { |
| mergeJoinTree(qb); |
| } |
| } |
| |
| // if any filters are present in the join tree, push them on top of the |
| // table |
| pushJoinFilters(qb, qb.getQbJoinTree(), aliasToOpInfo); |
| srcOpInfo = genJoinPlan(qb, aliasToOpInfo); |
| } else { |
| // Now if there are more than 1 sources then we have a join case |
| // later we can extend this to the union all case as well |
| srcOpInfo = aliasToOpInfo.values().iterator().next(); |
| // with ptfs, there maybe more (note for PTFChains: |
| // 1 ptf invocation may entail multiple PTF operators) |
| srcOpInfo = lastPTFOp != null ? lastPTFOp : srcOpInfo; |
| } |
| |
| Operator bodyOpInfo = genBodyPlan(qb, srcOpInfo, aliasToOpInfo); |
| |
| if (LOG.isDebugEnabled()) { |
| LOG.debug("Created Plan for Query Block " + qb.getId()); |
| } |
| |
| if (qb.getAlias() != null) { |
| rewriteRRForSubQ(qb.getAlias(), bodyOpInfo, skipAmbiguityCheck); |
| } |
| |
| setQB(qb); |
| return bodyOpInfo; |
| } |
| |
| // change curr ops row resolver's tab aliases to subq alias |
| private void rewriteRRForSubQ(String alias, Operator operator, boolean skipAmbiguityCheck) |
| throws SemanticException { |
| RowResolver rr = opParseCtx.get(operator).getRowResolver(); |
| RowResolver newRR = new RowResolver(); |
| for (ColumnInfo colInfo : rr.getColumnInfos()) { |
| String name = colInfo.getInternalName(); |
| String[] tmp = rr.reverseLookup(name); |
| if ("".equals(tmp[0]) || tmp[1] == null) { |
| // ast expression is not a valid column name for table |
| tmp[1] = colInfo.getInternalName(); |
| } else if (newRR.get(alias, tmp[1]) != null) { |
| // enforce uniqueness of column names |
| if (!skipAmbiguityCheck) { |
| throw new SemanticException(ErrorMsg.AMBIGUOUS_COLUMN.getMsg(tmp[1] + " in " + alias)); |
| } |
| // if it's wrapped by top-level select star query, skip ambiguity check (for backward compatibility) |
| tmp[1] = colInfo.getInternalName(); |
| } |
| newRR.put(alias, tmp[1], colInfo); |
| } |
| opParseCtx.get(operator).setRowResolver(newRR); |
| } |
| |
| Path dummyPath; |
| public Table getDummyTable() throws SemanticException { |
| if (dummyPath == null) { |
| dummyPath = createDummyFile(); |
| } |
| |
| Table desc = new Table(DUMMY_DATABASE, DUMMY_TABLE); |
| desc.getTTable().getSd().setLocation(dummyPath.toString()); |
| desc.getTTable().getSd().getSerdeInfo().setSerializationLib(NullStructSerDe.class.getName()); |
| desc.setInputFormatClass(NullRowsInputFormat.class); |
| desc.setOutputFormatClass(HiveIgnoreKeyTextOutputFormat.class); |
| return desc; |
| } |
| |
| // add dummy data for not removed by CombineHiveInputFormat, etc. |
| private Path createDummyFile() throws SemanticException { |
| Path dummyPath = new Path(ctx.getMRScratchDir(), "dummy_path"); |
| Path dummyFile = new Path(dummyPath, "dummy_file"); |
| FSDataOutputStream fout = null; |
| try { |
| FileSystem fs = dummyFile.getFileSystem(conf); |
| if (fs.exists(dummyFile)) { |
| return dummyPath; |
| } |
| fout = fs.create(dummyFile); |
| fout.write(1); |
| fout.close(); |
| } catch (IOException e) { |
| throw new SemanticException(e); |
| } finally { |
| IOUtils.closeStream(fout); |
| } |
| return dummyPath; |
| } |
| |
| /** |
| * Generates the operator DAG needed to implement lateral views and attaches |
| * it to the TS operator. |
| * |
| * @param aliasToOpInfo |
| * A mapping from a table alias to the TS operator. This function |
| * replaces the operator mapping as necessary |
| * @param qb |
| * @throws SemanticException |
| */ |
| private void genLateralViewPlans(Map<String, Operator> aliasToOpInfo, QB qb) |
| throws SemanticException { |
| Map<String, List<ASTNode>> aliasToLateralViews = qb.getParseInfo().getAliasToLateralViews(); |
| for (Entry<String, Operator> e : aliasToOpInfo.entrySet()) { |
| String alias = e.getKey(); |
| // See if the alias has a lateral view. If so, chain the lateral view |
| // operator on |
| List<ASTNode> lateralViews = aliasToLateralViews.get(alias); |
| if (lateralViews != null) { |
| Operator op = e.getValue(); |
| |
| for (ASTNode lateralViewTree : aliasToLateralViews.get(alias)) { |
| // There are 2 paths from the TS operator (or a previous LVJ operator) |
| // to the same LateralViewJoinOperator. |
| // TS -> SelectOperator(*) -> LateralViewJoinOperator |
| // TS -> SelectOperator (gets cols for UDTF) -> UDTFOperator0 |
| // -> LateralViewJoinOperator |
| // |
| |
| op = genLateralViewPlan(qb, op, lateralViewTree); |
| } |
| e.setValue(op); |
| } |
| } |
| } |
| |
| private Operator genLateralViewPlanForDest(String dest, QB qb, Operator op) |
| throws SemanticException { |
| ASTNode lateralViewTree = qb.getParseInfo().getDestToLateralView().get(dest); |
| if (lateralViewTree != null) { |
| return genLateralViewPlan(qb, op, lateralViewTree); |
| } |
| return op; |
| } |
| |
| private Operator genLateralViewPlan(QB qb, Operator op, ASTNode lateralViewTree) |
| throws SemanticException { |
| RowResolver lvForwardRR = new RowResolver(); |
| RowResolver source = opParseCtx.get(op).getRowResolver(); |
| Map<String, ExprNodeDesc> lvfColExprMap = new HashMap<String, ExprNodeDesc>(); |
| Map<String, ExprNodeDesc> selColExprMap = new HashMap<String, ExprNodeDesc>(); |
| List<ExprNodeDesc> colList = new ArrayList<ExprNodeDesc>(); |
| List<String> colNames = new ArrayList<String>(); |
| for (ColumnInfo col : source.getColumnInfos()) { |
| String[] tabCol = source.reverseLookup(col.getInternalName()); |
| lvForwardRR.put(tabCol[0], tabCol[1], col); |
| ExprNodeColumnDesc colExpr = new ExprNodeColumnDesc(col); |
| colList.add(colExpr); |
| colNames.add(colExpr.getColumn()); |
| lvfColExprMap.put(col.getInternalName(), colExpr); |
| selColExprMap.put(col.getInternalName(), colExpr.clone()); |
| } |
| |
| Operator lvForward = putOpInsertMap(OperatorFactory.getAndMakeChild( |
| new LateralViewForwardDesc(), new RowSchema(lvForwardRR.getColumnInfos()), |
| op), lvForwardRR); |
| lvForward.setColumnExprMap(lvfColExprMap); |
| |
| // The order in which the two paths are added is important. The |
| // lateral view join operator depends on having the select operator |
| // give it the row first. |
| |
| // Get the all path by making a select(*). |
| RowResolver allPathRR = opParseCtx.get(lvForward).getRowResolver(); |
| // Operator allPath = op; |
| SelectDesc sDesc = new SelectDesc(colList, colNames, false); |
| sDesc.setSelStarNoCompute(true); |
| Operator allPath = putOpInsertMap(OperatorFactory.getAndMakeChild( |
| sDesc, new RowSchema(allPathRR.getColumnInfos()), |
| lvForward), allPathRR); |
| allPath.setColumnExprMap(selColExprMap); |
| int allColumns = allPathRR.getColumnInfos().size(); |
| // Get the UDTF Path |
| QB blankQb = new QB(null, null, false); |
| Operator udtfPath = genSelectPlan(null, (ASTNode) lateralViewTree |
| .getChild(0), blankQb, lvForward, null, |
| lateralViewTree.getType() == HiveParser.TOK_LATERAL_VIEW_OUTER); |
| // add udtf aliases to QB |
| for (String udtfAlias : blankQb.getAliases()) { |
| qb.addAlias(udtfAlias); |
| } |
| RowResolver udtfPathRR = opParseCtx.get(udtfPath).getRowResolver(); |
| |
| // Merge the two into the lateral view join |
| // The cols of the merged result will be the combination of both the |
| // cols of the UDTF path and the cols of the all path. The internal |
| // names have to be changed to avoid conflicts |
| |
| RowResolver lateralViewRR = new RowResolver(); |
| List<String> outputInternalColNames = new ArrayList<String>(); |
| |
| |
| // For PPD, we need a column to expression map so that during the walk, |
| // the processor knows how to transform the internal col names. |
| // Following steps are dependant on the fact that we called |
| // LVmerge.. in the above order |
| Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>(); |
| |
| LVmergeRowResolvers(allPathRR, lateralViewRR, colExprMap, outputInternalColNames); |
| LVmergeRowResolvers(udtfPathRR, lateralViewRR, colExprMap, outputInternalColNames); |
| |
| Operator lateralViewJoin = putOpInsertMap(OperatorFactory |
| .getAndMakeChild(new LateralViewJoinDesc(allColumns, outputInternalColNames), |
| new RowSchema(lateralViewRR.getColumnInfos()), allPath, |
| udtfPath), lateralViewRR); |
| lateralViewJoin.setColumnExprMap(colExprMap); |
| return lateralViewJoin; |
| } |
| |
| /** |
| * A helper function that gets all the columns and respective aliases in the |
| * source and puts them into dest. It renames the internal names of the |
| * columns based on getColumnInternalName(position). |
| * |
| * Note that this helper method relies on RowResolver.getColumnInfos() |
| * returning the columns in the same order as they will be passed in the |
| * operator DAG. |
| * |
| * @param source |
| * @param dest |
| * @param colExprMap |
| * @param outputInternalColNames |
| * - a list to which the new internal column names will be added, in |
| * the same order as in the dest row resolver |
| */ |
| private void LVmergeRowResolvers(RowResolver source, RowResolver dest, |
| Map<String, ExprNodeDesc> colExprMap, List<String> outputInternalColNames) { |
| for (ColumnInfo c : source.getColumnInfos()) { |
| String internalName = getColumnInternalName(outputInternalColNames.size()); |
| outputInternalColNames.add(internalName); |
| ColumnInfo newCol = new ColumnInfo(internalName, c.getType(), c |
| .getTabAlias(), c.getIsVirtualCol(), c.isHiddenVirtualCol()); |
| String[] tableCol = source.reverseLookup(c.getInternalName()); |
| String tableAlias = tableCol[0]; |
| String colAlias = tableCol[1]; |
| dest.put(tableAlias, colAlias, newCol); |
| colExprMap.put(internalName, new ExprNodeColumnDesc(c)); |
| } |
| } |
| |
| @SuppressWarnings("nls") |
| Phase1Ctx initPhase1Ctx() { |
| Phase1Ctx ctx_1 = new Phase1Ctx(); |
| ctx_1.nextNum = 0; |
| ctx_1.dest = "reduce"; |
| |
| return ctx_1; |
| } |
| |
| @Override |
| public void init(boolean clearPartsCache) { |
| // clear most members |
| reset(clearPartsCache); |
| |
| // init |
| this.qb = new QB(null, null, false); |
| } |
| |
| @Override |
| @SuppressWarnings("nls") |
| public void analyzeInternal(ASTNode ast) throws SemanticException { |
| analyzeInternal(ast, PlannerContext::new); |
| } |
| |
| /** |
| * Planner specific stuff goes in here. |
| */ |
| static class PlannerContext { |
| |
| void setCTASToken(ASTNode child) { |
| } |
| |
| void setViewToken(ASTNode child) { |
| } |
| |
| void setInsertToken(ASTNode ast, boolean isTmpFileDest) { |
| } |
| |
| void setMultiInsertToken(ASTNode child) { |
| } |
| |
| void resetToken() { |
| } |
| } |
| |
| protected Table getTableObjectByName(String tableName, boolean throwException) throws HiveException { |
| if (!tabNameToTabObject.containsKey(tableName)) { |
| Table table = db.getTable(tableName, throwException); |
| if (table != null) { |
| tabNameToTabObject.put(tableName, table); |
| } |
| return table; |
| } else { |
| return tabNameToTabObject.get(tableName); |
| } |
| } |
| |
| public Table getTableObjectByName(String tableName) throws HiveException { |
| return getTableObjectByName(tableName, true); |
| } |
| |
| private void walkASTMarkTABREF(TableMask tableMask, ASTNode ast, Set<String> cteAlias, Context ctx) |
| throws SemanticException { |
| Queue<Node> queue = new LinkedList<>(); |
| queue.add(ast); |
| Map<HivePrivilegeObject, MaskAndFilterInfo> basicInfos = new LinkedHashMap<>(); |
| while (!queue.isEmpty()) { |
| ASTNode astNode = (ASTNode) queue.poll(); |
| if (astNode.getToken().getType() == HiveParser.TOK_TABREF) { |
| int aliasIndex = 0; |
| StringBuilder additionalTabInfo = new StringBuilder(); |
| for (int index = 1; index < astNode.getChildCount(); index++) { |
| ASTNode ct = (ASTNode) astNode.getChild(index); |
| if (ct.getToken().getType() == HiveParser.TOK_TABLEBUCKETSAMPLE |
| || ct.getToken().getType() == HiveParser.TOK_TABLESPLITSAMPLE |
| || ct.getToken().getType() == HiveParser.TOK_TABLEPROPERTIES) { |
| additionalTabInfo.append(ctx.getTokenRewriteStream().toString(ct.getTokenStartIndex(), |
| ct.getTokenStopIndex())); |
| } else { |
| aliasIndex = index; |
| } |
| } |
| |
| ASTNode tableTree = (ASTNode) (astNode.getChild(0)); |
| |
| String tabIdName = getUnescapedName(tableTree); |
| |
| String alias; |
| if (aliasIndex != 0) { |
| alias = unescapeIdentifier(astNode.getChild(aliasIndex).getText()); |
| } else { |
| alias = getUnescapedUnqualifiedTableName(tableTree); |
| } |
| |
| // We need to know if it is CTE or not. |
| // A CTE may have the same name as a table. |
| // For example, |
| // with select TAB1 [masking] as TAB2 |
| // select * from TAB2 [no masking] |
| if (cteAlias.contains(tabIdName)) { |
| continue; |
| } |
| |
| Table table = null; |
| try { |
| table = getTableObjectByName(tabIdName, false); |
| } catch (HiveException e) { |
| // This should not happen. |
| throw new SemanticException("Got exception though getTableObjectByName method should ignore it"); |
| } |
| if (table == null) { |
| // Table may not be found when materialization of CTE is on. |
| STATIC_LOG.debug("Table " + tabIdName + " is not found in walkASTMarkTABREF."); |
| continue; |
| } |
| |
| if (table.isMaterializedView()) { |
| // When we are querying a materialized view directly, we check whether the source tables |
| // do not apply any policies. |
| for (SourceTable sourceTable : table.getMVMetadata().getSourceTables()) { |
| String qualifiedTableName = TableName.getDbTable( |
| sourceTable.getTable().getDbName(), sourceTable.getTable().getTableName()); |
| try { |
| table = getTableObjectByName(qualifiedTableName, true); |
| } catch (HiveException e) { |
| // This should not happen. |
| throw new SemanticException("Table " + qualifiedTableName + |
| " not found when trying to obtain it to check masking/filtering policies"); |
| } |
| |
| List<String> colNames = new ArrayList<>(); |
| extractColumnInfos(table, colNames, new ArrayList<>()); |
| |
| basicInfos.put(new HivePrivilegeObject(table.getDbName(), table.getTableName(), colNames), null); |
| } |
| } else { |
| List<String> colNames; |
| List<String> colTypes; |
| if (this.ctx.isCboSucceeded() && this.columnAccessInfo != null && |
| (colNames = this.columnAccessInfo.getTableToColumnAllAccessMap().get(table.getCompleteName())) != null) { |
| Map<String, String> colNameToType = table.getAllCols().stream() |
| .collect(Collectors.toMap(FieldSchema::getName, FieldSchema::getType)); |
| colTypes = colNames.stream().map(colNameToType::get).collect(Collectors.toList()); |
| } else { |
| colNames = new ArrayList<>(); |
| colTypes = new ArrayList<>(); |
| extractColumnInfos(table, colNames, colTypes); |
| } |
| |
| basicInfos.put(new HivePrivilegeObject(table.getDbName(), table.getTableName(), colNames), |
| new MaskAndFilterInfo(colTypes, additionalTabInfo.toString(), alias, astNode, table.isView(), table.isNonNative())); |
| } |
| } |
| if (astNode.getChildCount() > 0 && !IGNORED_TOKENS.contains(astNode.getToken().getType())) { |
| for (Node child : astNode.getChildren()) { |
| queue.offer(child); |
| } |
| } |
| } |
| List<HivePrivilegeObject> basicPrivObjs = new ArrayList<>(basicInfos.keySet()); |
| List<HivePrivilegeObject> needRewritePrivObjs = tableMask.applyRowFilterAndColumnMasking(basicPrivObjs); |
| if (needRewritePrivObjs != null && !needRewritePrivObjs.isEmpty()) { |
| for (HivePrivilegeObject privObj : needRewritePrivObjs) { |
| MaskAndFilterInfo info = basicInfos.get(privObj); |
| // First we check whether entity actually needs masking or filtering |
| if (tableMask.needsMaskingOrFiltering(privObj)) { |
| if (info == null) { |
| // This is a table used by a materialized view |
| // Currently we do not support querying directly a materialized view |
| // when mask/filter should be applied on source tables |
| throw new SemanticException(ErrorMsg.MASKING_FILTERING_ON_MATERIALIZED_VIEWS_SOURCES, |
| privObj.getDbname(), privObj.getObjectName()); |
| } else { |
| String replacementText = tableMask.create(privObj, info); |
| // We don't support masking/filtering against ACID query at the moment |
| if (ctx.getIsUpdateDeleteMerge()) { |
| throw new SemanticException(ErrorMsg.MASKING_FILTERING_ON_ACID_NOT_SUPPORTED, |
| privObj.getDbname(), privObj.getObjectName()); |
| } |
| tableMask.setNeedsRewrite(true); |
| tableMask.addTranslation(info.astNode, replacementText); |
| } |
| } |
| } |
| } |
| } |
| |
| private void extractColumnInfos(Table table, List<String> colNames, List<String> colTypes) { |
| for (FieldSchema col : table.getAllCols()) { |
| colNames.add(col.getName()); |
| colTypes.add(col.getType()); |
| } |
| } |
| |
| // We walk through the AST. |
| // We replace all the TOK_TABREF by adding additional masking and filter if |
| // the table needs to be masked or filtered. |
| // For the replacement, we leverage the methods that are used for |
| // unparseTranslator. |
| private ParseResult rewriteASTWithMaskAndFilter(TableMask tableMask, ASTNode ast, TokenRewriteStream tokenRewriteStream, |
| Context ctx, Hive db) |
| throws SemanticException { |
| // 1. collect information about CTE if there is any. |
| // The base table of CTE should be masked. |
| // The CTE itself should not be masked in the references in the following main query. |
| Set<String> cteAlias = new HashSet<>(); |
| if (ast.getChildCount() > 0 |
| && HiveParser.TOK_CTE == ((ASTNode) ast.getChild(0)).getToken().getType()) { |
| // the structure inside CTE is like this |
| // TOK_CTE |
| // TOK_SUBQUERY |
| // sq1 (may refer to sq2) |
| // ... |
| // TOK_SUBQUERY |
| // sq2 |
| ASTNode cte = (ASTNode) ast.getChild(0); |
| // we start from sq2, end up with sq1. |
| for (int index = cte.getChildCount() - 1; index >= 0; index--) { |
| ASTNode subq = (ASTNode) cte.getChild(index); |
| String alias = unescapeIdentifier(subq.getChild(1).getText()); |
| if (cteAlias.contains(alias)) { |
| throw new SemanticException("Duplicate definition of " + alias); |
| } else { |
| cteAlias.add(alias); |
| walkASTMarkTABREF(tableMask, subq, cteAlias, ctx); |
| } |
| } |
| // walk the other part of ast |
| for (int index = 1; index < ast.getChildCount(); index++) { |
| walkASTMarkTABREF(tableMask, (ASTNode) ast.getChild(index), cteAlias, ctx); |
| } |
| } |
| // there is no CTE, walk the whole AST |
| else { |
| walkASTMarkTABREF(tableMask, ast, cteAlias, ctx); |
| } |
| // 2. rewrite the AST, replace TABREF with masking/filtering |
| if (tableMask.needsRewrite()) { |
| quoteIdentifierTokens(tokenRewriteStream); |
| tableMask.applyTranslations(tokenRewriteStream); |
| String rewrittenQuery = tokenRewriteStream.toString( |
| ast.getTokenStartIndex(), ast.getTokenStopIndex()); |
| ASTNode rewrittenTree; |
| try { |
| // We pass a new empty context with our HiveConf so the lexer can |
| // detect if allowQuotedId is enabled. |
| Context rewriteCtx = new Context(conf); |
| ctx.addSubContext(rewriteCtx); |
| rewrittenTree = ParseUtils.parse(rewrittenQuery, rewriteCtx); |
| return new ParseResult(rewrittenTree, rewriteCtx.getTokenRewriteStream(), |
| rewriteCtx.getParsedTables()); |
| } catch (ParseException e) { |
| throw new SemanticException(e); |
| } |
| } else { |
| return new ParseResult(ast, ctx.getTokenRewriteStream(), ctx.getParsedTables()); |
| } |
| } |
| |
| void gatherUserSuppliedFunctions(ASTNode ast) throws SemanticException { |
| int tokenType = ast.getToken().getType(); |
| if (tokenType == HiveParser.TOK_FUNCTION || |
| tokenType == HiveParser.TOK_FUNCTIONDI || |
| tokenType == HiveParser.TOK_FUNCTIONSTAR) { |
| if (ast.getChild(0).getType() == HiveParser.Identifier) { |
| try { |
| String functionName = unescapeIdentifier(ast.getChild(0).getText()).toLowerCase(); |
| String[] qualifiedFunctionName = FunctionUtils.getQualifiedFunctionNameParts(functionName); |
| this.userSuppliedFunctions.add(qualifiedFunctionName[0]+"."+qualifiedFunctionName[1]); |
| } catch (HiveException ex) { |
| throw new SemanticException(ex.getMessage(), ex); |
| } |
| } |
| } |
| for (int i = 0; i < ast.getChildCount();i++) { |
| gatherUserSuppliedFunctions((ASTNode) ast.getChild(i)); |
| } |
| } |
| |
| boolean genResolvedParseTree(ASTNode ast, PlannerContext plannerCtx) throws SemanticException { |
| ASTNode child = ast; |
| this.ast = ast; |
| viewsExpanded = new ArrayList<String>(); |
| ctesExpanded = new ArrayList<String>(); |
| |
| // 1. analyze and process the position alias |
| // step processPositionAlias out of genResolvedParseTree |
| |
| // 2. analyze create table command |
| if (ast.getToken().getType() == HiveParser.TOK_CREATETABLE) { |
| // if it is not CTAS, we don't need to go further and just return |
| if ((child = analyzeCreateTable(ast, qb, plannerCtx)) == null) { |
| return false; |
| } |
| } else { |
| queryState.setCommandType(HiveOperation.QUERY); |
| } |
| |
| // 3. analyze create view command |
| if (ast.getToken().getType() == HiveParser.TOK_CREATE_MATERIALIZED_VIEW) { |
| child = analyzeCreateView(ast, qb, plannerCtx); |
| if (child == null) { |
| return false; |
| } |
| viewSelect = child; |
| // prevent view from referencing itself |
| viewsExpanded.add(createVwDesc.getViewName()); |
| } |
| |
| if (forViewCreation) { |
| viewsExpanded.add(fqViewName); |
| } |
| |
| switch(ast.getToken().getType()) { |
| case HiveParser.TOK_SET_AUTOCOMMIT: |
| assert ast.getChildCount() == 1; |
| if(ast.getChild(0).getType() == HiveParser.TOK_TRUE) { |
| setAutoCommitValue(true); |
| } |
| else if(ast.getChild(0).getType() == HiveParser.TOK_FALSE) { |
| setAutoCommitValue(false); |
| } |
| else { |
| assert false : "Unexpected child of TOK_SET_AUTOCOMMIT: " + ast.getChild(0).getType(); |
| } |
| //fall through |
| case HiveParser.TOK_START_TRANSACTION: |
| case HiveParser.TOK_COMMIT: |
| case HiveParser.TOK_ROLLBACK: |
| if(!(conf.getBoolVar(ConfVars.HIVE_IN_TEST) || conf.getBoolVar(ConfVars.HIVE_IN_TEZ_TEST))) { |
| throw new IllegalStateException(HiveOperation.operationForToken(ast.getToken().getType()) + |
| " is not supported yet."); |
| } |
| queryState.setCommandType(HiveOperation.operationForToken(ast.getToken().getType())); |
| return false; |
| } |
| |
| // masking and filtering should be created here |
| // the basic idea is similar to unparseTranslator. |
| tableMask = new TableMask(this, conf, ctx.isSkipTableMasking()); |
| |
| // Gather UDFs referenced in query before VIEW expansion. This is used to |
| // determine if authorization checks need to occur on the UDFs. |
| gatherUserSuppliedFunctions(child); |
| |
| // 4. continue analyzing from the child ASTNode. |
| Phase1Ctx ctx_1 = initPhase1Ctx(); |
| if (!doPhase1(child, qb, ctx_1, plannerCtx)) { |
| // if phase1Result false return |
| return false; |
| } |
| LOG.info("Completed phase 1 of Semantic Analysis"); |
| |
| // 5. Resolve Parse Tree |
| // Materialization is allowed if it is not a view definition |
| getMetaData(qb, createVwDesc == null && !forViewCreation); |
| LOG.info("Completed getting MetaData in Semantic Analysis"); |
| |
| return true; |
| } |
| |
| void getHintsFromQB(QB qb, List<ASTNode> hints) { |
| if (qb.getParseInfo().getHints() != null) { |
| hints.add(qb.getParseInfo().getHints()); |
| } |
| |
| Set<String> aliases = qb.getSubqAliases(); |
| |
| for (String alias : aliases) { |
| getHintsFromQB(qb.getSubqForAlias(alias), hints); |
| } |
| } |
| |
| private void getHintsFromQB(QBExpr qbExpr, List<ASTNode> hints) { |
| QBExpr qbExpr1 = qbExpr.getQBExpr1(); |
| QBExpr qbExpr2 = qbExpr.getQBExpr2(); |
| QB qb = qbExpr.getQB(); |
| |
| if (qbExpr1 != null) { |
| getHintsFromQB(qbExpr1, hints); |
| } |
| if (qbExpr2 != null) { |
| getHintsFromQB(qbExpr2, hints); |
| } |
| if (qb != null) { |
| getHintsFromQB(qb, hints); |
| } |
| } |
| |
| Operator genOPTree(ASTNode ast, PlannerContext plannerCtx) throws SemanticException { |
| // fetch all the hints in qb |
| List<ASTNode> hintsList = new ArrayList<>(); |
| getHintsFromQB(qb, hintsList); |
| getQB().getParseInfo().setHintList(hintsList); |
| return genPlan(qb); |
| } |
| |
| private void removeOBInSubQuery(QBExpr qbExpr) { |
| if (qbExpr == null) { |
| return; |
| } |
| |
| if (qbExpr.getOpcode() == QBExpr.Opcode.NULLOP) { |
| QB subQB = qbExpr.getQB(); |
| QBParseInfo parseInfo = subQB.getParseInfo(); |
| String alias = qbExpr.getAlias(); |
| Map<String, ASTNode> destToOrderBy = parseInfo.getDestToOrderBy(); |
| Map<String, ASTNode> destToSortBy = parseInfo.getDestToSortBy(); |
| final String warning = "WARNING: Order/Sort by without limit in sub query or view [" + |
| alias + "] is removed, as it's pointless and bad for performance."; |
| if (destToOrderBy != null) { |
| for (String dest : destToOrderBy.keySet()) { |
| if (parseInfo.getDestLimit(dest) == null) { |
| removeASTChild(destToOrderBy.get(dest)); |
| destToOrderBy.remove(dest); |
| console.printInfo(warning); |
| } |
| } |
| } |
| if (destToSortBy != null) { |
| for (String dest : destToSortBy.keySet()) { |
| if (parseInfo.getDestLimit(dest) == null) { |
| removeASTChild(destToSortBy.get(dest)); |
| destToSortBy.remove(dest); |
| console.printInfo(warning); |
| } |
| } |
| } |
| } else { |
| removeOBInSubQuery(qbExpr.getQBExpr1()); |
| removeOBInSubQuery(qbExpr.getQBExpr2()); |
| } |
| } |
| |
| private void removeASTChild(ASTNode node) { |
| Optional.ofNullable(node.getParent()) |
| .ifPresent(parent -> { |
| parent.deleteChild(node.getChildIndex()); |
| node.setParent(null); |
| }); |
| } |
| |
| protected void compilePlan(ParseContext pCtx) throws SemanticException{ |
| if (!ctx.getExplainLogical()) { |
| TaskCompiler compiler = TaskCompilerFactory.getCompiler(conf, pCtx); |
| compiler.init(queryState, console, db); |
| compiler.compile(pCtx, rootTasks, inputs, outputs); |
| fetchTask = pCtx.getFetchTask(); |
| } |
| } |
| |
| @SuppressWarnings("checkstyle:methodlength") |
| void analyzeInternal(ASTNode ast, Supplier<PlannerContext> pcf) throws SemanticException { |
| LOG.info("Starting Semantic Analysis"); |
| // 1. Generate Resolved Parse tree from syntax tree |
| boolean needsTransform = needsTransform(); |
| //change the location of position alias process here |
| processPositionAlias(ast); |
| cacheTableHelper.populateCache(ctx.getParsedTables(), conf, getTxnMgr()); |
| PlannerContext plannerCtx = pcf.get(); |
| if (!genResolvedParseTree(ast, plannerCtx)) { |
| return; |
| } |
| |
| if (HiveConf.getBoolVar(conf, ConfVars.HIVE_REMOVE_ORDERBY_IN_SUBQUERY)) { |
| for (String alias : qb.getSubqAliases()) { |
| removeOBInSubQuery(qb.getSubqForAlias(alias)); |
| } |
| } |
| |
| final String llapIOETLSkipFormat = HiveConf.getVar(conf, ConfVars.LLAP_IO_ETL_SKIP_FORMAT); |
| if (qb.getParseInfo().hasInsertTables() || qb.isCTAS()) { |
| if (llapIOETLSkipFormat.equalsIgnoreCase("encode")) { |
| conf.setBoolean(ConfVars.LLAP_IO_ENCODE_ENABLED.varname, false); |
| LOG.info("Disabling LLAP IO encode as ETL query is detected"); |
| } else if (llapIOETLSkipFormat.equalsIgnoreCase("all")) { |
| conf.setBoolean(ConfVars.LLAP_IO_ENABLED.varname, false); |
| LOG.info("Disabling LLAP IO as ETL query is detected"); |
| } |
| } |
| |
| // Check query results cache. |
| // If no masking/filtering required, then we can check the cache now, before |
| // generating the operator tree and going through CBO. |
| // Otherwise we have to wait until after the masking/filtering step. |
| boolean isCacheEnabled = isResultsCacheEnabled(); |
| QueryResultsCache.LookupInfo lookupInfo = null; |
| if (isCacheEnabled && !needsTransform && queryTypeCanUseCache()) { |
| lookupInfo = createLookupInfoForQuery(ast); |
| if (checkResultsCache(lookupInfo, false)) { |
| return; |
| } |
| } |
| |
| ASTNode astForMasking; |
| if (isCBOExecuted() && needsTransform && |
| (qb.isCTAS() || forViewCreation || qb.isMaterializedView() || qb.isMultiDestQuery())) { |
| // If we use CBO and we may apply masking/filtering policies, we create a copy of the ast. |
| // The reason is that the generation of the operator tree may modify the initial ast, |
| // but if we need to parse for a second time, we would like to parse the unmodified ast. |
| astForMasking = (ASTNode) ParseDriver.adaptor.dupTree(ast); |
| } else { |
| astForMasking = ast; |
| } |
| |
| // 2. Gen OP Tree from resolved Parse Tree |
| sinkOp = genOPTree(ast, plannerCtx); |
| |
| boolean usesMasking = false; |
| if (!forViewCreation && ast.getToken().getType() != HiveParser.TOK_CREATE_MATERIALIZED_VIEW && |
| (tableMask.isEnabled() && analyzeRewrite == null)) { |
| // Here we rewrite the * and also the masking table |
| ParseResult rewrittenResult = rewriteASTWithMaskAndFilter(tableMask, astForMasking, ctx.getTokenRewriteStream(), |
| ctx, db); |
| ASTNode rewrittenAST = rewrittenResult.getTree(); |
| if (astForMasking != rewrittenAST) { |
| usesMasking = true; |
| plannerCtx = pcf.get(); |
| ctx.setSkipTableMasking(true); |
| ctx.setTokenRewriteStream(rewrittenResult.getTokenRewriteStream()); |
| init(true); |
| //change the location of position alias process here |
| processPositionAlias(rewrittenAST); |
| genResolvedParseTree(rewrittenAST, plannerCtx); |
| if (this instanceof CalcitePlanner) { |
| ((CalcitePlanner) this).resetCalciteConfiguration(); |
| } |
| sinkOp = genOPTree(rewrittenAST, plannerCtx); |
| } |
| } |
| |
| // validate if this sink operation is allowed for non-native tables |
| if (sinkOp instanceof FileSinkOperator) { |
| FileSinkOperator fileSinkOperator = (FileSinkOperator) sinkOp; |
| Optional<HiveStorageHandler> handler = Optional.ofNullable(fileSinkOperator) |
| .map(FileSinkOperator::getConf) |
| .map(FileSinkDesc::getTable) |
| .map(Table::getStorageHandler); |
| if (handler.isPresent()) { |
| handler.get().validateSinkDesc(fileSinkOperator.getConf()); |
| } |
| } |
| |
| // Check query results cache |
| // In the case that row or column masking/filtering was required, we do not support caching. |
| // TODO: Enable caching for queries with masking/filtering |
| if (isCacheEnabled && needsTransform && !usesMasking && queryTypeCanUseCache()) { |
| lookupInfo = createLookupInfoForQuery(ast); |
| if (checkResultsCache(lookupInfo, false)) { |
| return; |
| } |
| } |
| |
| // 3. Deduce Resultset Schema |
| if ((forViewCreation || createVwDesc != null) && !this.ctx.isCboSucceeded()) { |
| resultSchema = convertRowSchemaToViewSchema(opParseCtx.get(sinkOp).getRowResolver()); |
| } else { |
| // resultSchema will be null if |
| // (1) cbo is disabled; |
| // (2) or cbo is enabled with AST return path (whether succeeded or not, |
| // resultSchema will be re-initialized) |
| // It will only be not null if cbo is enabled with new return path and it |
| // succeeds. |
| if (resultSchema == null) { |
| resultSchema = convertRowSchemaToResultSetSchema(opParseCtx.get(sinkOp).getRowResolver(), |
| HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_RESULTSET_USE_UNIQUE_COLUMN_NAMES)); |
| } |
| } |
| |
| // 4. Generate Parse Context for Optimizer & Physical compiler |
| copyInfoToQueryProperties(queryProperties); |
| ParseContext pCtx = new ParseContext(queryState, opToPartPruner, opToPartList, topOps, |
| new HashSet<JoinOperator>(joinContext.keySet()), |
| new HashSet<SMBMapJoinOperator>(smbMapJoinContext.keySet()), |
| loadTableWork, loadFileWork, columnStatsAutoGatherContexts, ctx, idToTableNameMap, destTableId, uCtx, |
| listMapJoinOpsNoReducer, prunedPartitions, tabNameToTabObject, opToSamplePruner, |
| globalLimitCtx, nameToSplitSample, inputs, rootTasks, opToPartToSkewedPruner, |
| viewAliasToInput, reduceSinkOperatorsAddedByEnforceBucketingSorting, |
| analyzeRewrite, tableDesc, createVwDesc, materializedViewUpdateDesc, |
| queryProperties, viewProjectToTableSchema); |
| |
| // Set the semijoin hints in parse context |
| pCtx.setSemiJoinHints(parseSemiJoinHint(getQB().getParseInfo().getHintList())); |
| // Set the mapjoin hint if it needs to be disabled. |
| pCtx.setDisableMapJoin(disableMapJoinWithHint(getQB().getParseInfo().getHintList())); |
| |
| if (forViewCreation) { |
| // Generate lineage info if LineageLogger hook is configured. |
| // Add the transformation that computes the lineage information. |
| Set<String> postExecHooks = Sets.newHashSet(Splitter.on(",").trimResults() |
| .omitEmptyStrings() |
| .split(Strings.nullToEmpty(HiveConf.getVar(conf, HiveConf.ConfVars.POSTEXECHOOKS)))); |
| if (postExecHooks.contains("org.apache.hadoop.hive.ql.hooks.PostExecutePrinter") |
| || postExecHooks.contains("org.apache.hadoop.hive.ql.hooks.LineageLogger") |
| || postExecHooks.contains("org.apache.atlas.hive.hook.HiveHook")) { |
| List<Transform> transformations = new ArrayList<Transform>(); |
| transformations.add(new HiveOpConverterPostProc()); |
| transformations.add(new Generator(postExecHooks)); |
| for (Transform t : transformations) { |
| pCtx = t.transform(pCtx); |
| } |
| } |
| } |
| |
| // 5. Take care of view creation |
| if (createVwDesc != null) { |
| if (ctx.getExplainAnalyze() == AnalyzeState.RUNNING) { |
| return; |
| } |
| |
| if (!ctx.isCboSucceeded()) { |
| saveViewDefinition(); |
| } |
| |
| // validate the create view statement at this point, the createVwDesc gets |
| // all the information for semanticcheck |
| validateCreateView(); |
| |
| createVwDesc.setTablesUsed(pCtx.getTablesUsed()); |
| } |
| |
| // If we're creating views and ColumnAccessInfo is already created, we should not run these, since |
| // it means that in step 2, the ColumnAccessInfo was already created |
| if (!forViewCreation || getColumnAccessInfo() == null) { |
| // 6. Generate table access stats if required |
| if (HiveConf.getBoolVar(this.conf, HiveConf.ConfVars.HIVE_STATS_COLLECT_TABLEKEYS)) { |
| TableAccessAnalyzer tableAccessAnalyzer = new TableAccessAnalyzer(pCtx); |
| setTableAccessInfo(tableAccessAnalyzer.analyzeTableAccess()); |
| } |
| AuxOpTreeSignature.linkAuxSignatures(pCtx); |
| // 7. Perform Logical optimization |
| if (LOG.isDebugEnabled()) { |
| LOG.debug("Before logical optimization\n" + Operator.toString(pCtx.getTopOps().values())); |
| } |
| Optimizer optm = new Optimizer(); |
| optm.setPctx(pCtx); |
| optm.initialize(conf); |
| pCtx = optm.optimize(); |
| if (pCtx.getColumnAccessInfo() != null) { |
| // set ColumnAccessInfo for view column authorization |
| setColumnAccessInfo(pCtx.getColumnAccessInfo()); |
| } |
| if (LOG.isDebugEnabled()) { |
| LOG.debug("After logical optimization\n" + Operator.toString(pCtx.getTopOps().values())); |
| } |
| |
| // 8. Generate column access stats if required - wait until column pruning |
| // takes place during optimization |
| boolean isColumnInfoNeedForAuth = SessionState.get().isAuthorizationModeV2() |
| && HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_AUTHORIZATION_ENABLED); |
| if (isColumnInfoNeedForAuth |
| || HiveConf.getBoolVar(this.conf, HiveConf.ConfVars.HIVE_STATS_COLLECT_SCANCOLS)) { |
| ColumnAccessAnalyzer columnAccessAnalyzer = new ColumnAccessAnalyzer(pCtx); |
| // view column access info is carried by this.getColumnAccessInfo(). |
| setColumnAccessInfo(columnAccessAnalyzer.analyzeColumnAccess(this.getColumnAccessInfo())); |
| } |
| } |
| |
| if (forViewCreation) { |
| return; |
| } |
| |
| // 9. Optimize Physical op tree & Translate to target execution engine (MR, |
| // TEZ..) |
| compilePlan(pCtx); |
| |
| //find all Acid FileSinkOperatorS |
| new QueryPlanPostProcessor(rootTasks, acidFileSinks, ctx.getExecutionId()); |
| |
| // 10. Attach CTAS/Insert-Commit-hooks for Storage Handlers |
| final Optional<TezTask> optionalTezTask = |
| rootTasks.stream().filter(task -> task instanceof TezTask).map(task -> (TezTask) task) |
| .findFirst(); |
| if (optionalTezTask.isPresent()) { |
| final TezTask tezTask = optionalTezTask.get(); |
| rootTasks.stream() |
| .filter(task -> task.getWork() instanceof DDLWork) |
| .map(task -> (DDLWork) task.getWork()) |
| .filter(ddlWork -> ddlWork.getDDLDesc() instanceof PreInsertTableDesc) |
| .map(ddlWork -> (PreInsertTableDesc)ddlWork.getDDLDesc()) |
| .map(desc -> new InsertCommitHookDesc(desc.getTable(), desc.isOverwrite())) |
| .forEach(insertCommitHookDesc -> tezTask.addDependentTask( |
| TaskFactory.get(new DDLWork(getInputs(), getOutputs(), insertCommitHookDesc), conf))); |
| } |
| |
| LOG.info("Completed plan generation"); |
| |
| // 11. put accessed columns to readEntity |
| if (HiveConf.getBoolVar(this.conf, HiveConf.ConfVars.HIVE_STATS_COLLECT_SCANCOLS)) { |
| putAccessedColumnsToReadEntity(inputs, columnAccessInfo); |
| } |
| |
| if (isCacheEnabled && lookupInfo != null) { |
| if (queryCanBeCached()) { |
| // Last chance - check if the query is available in the cache. |
| // Since we have already generated a query plan, using a cached query result at this point |
| // requires SemanticAnalyzer state to be reset. |
| if (checkResultsCache(lookupInfo, true)) { |
| LOG.info("Cached result found on second lookup"); |
| } else { |
| QueryResultsCache.QueryInfo queryInfo = createCacheQueryInfoForQuery(lookupInfo); |
| |
| // Specify that the results of this query can be cached. |
| setCacheUsage(new CacheUsage( |
| CacheUsage.CacheStatus.CAN_CACHE_QUERY_RESULTS, queryInfo)); |
| } |
| } |
| } |
| } |
| |
| private void putAccessedColumnsToReadEntity(Set<ReadEntity> inputs, ColumnAccessInfo columnAccessInfo) { |
| Map<String, List<String>> tableToColumnAccessMap = columnAccessInfo.getTableToColumnAccessMap(); |
| if (tableToColumnAccessMap != null && !tableToColumnAccessMap.isEmpty()) { |
| for(ReadEntity entity: inputs) { |
| List<String> cols; |
| switch (entity.getType()) { |
| case TABLE: |
| cols = tableToColumnAccessMap.get(entity.getTable().getCompleteName()); |
| if (cols != null && !cols.isEmpty()) { |
| entity.getAccessedColumns().addAll(cols); |
| } |
| break; |
| case PARTITION: |
| cols = tableToColumnAccessMap.get(entity.getPartition().getTable().getCompleteName()); |
| if (cols != null && !cols.isEmpty()) { |
| entity.getAccessedColumns().addAll(cols); |
| } |
| break; |
| default: |
| // no-op |
| } |
| } |
| } |
| } |
| |
| @Override |
| public List<FieldSchema> getResultSchema() { |
| return resultSchema; |
| } |
| |
| public List<FieldSchema> getOriginalResultSchema() { |
| return originalResultSchema; |
| } |
| |
| protected void saveViewDefinition() throws SemanticException { |
| // Make a copy of the statement's result schema, since we may |
| // modify it below as part of imposing view column names. |
| List<FieldSchema> derivedSchema = |
| new ArrayList<FieldSchema>(resultSchema); |
| ParseUtils.validateColumnNameUniqueness(derivedSchema); |
| |
| List<FieldSchema> imposedSchema = createVwDesc.getSchema(); |
| if (imposedSchema != null) { |
| int explicitColCount = imposedSchema.size(); |
| int derivedColCount = derivedSchema.size(); |
| if (explicitColCount != derivedColCount) { |
| throw new SemanticException(generateErrorMessage( |
| viewSelect, |
| ErrorMsg.VIEW_COL_MISMATCH.getMsg())); |
| } |
| } |
| |
| // Preserve the original view definition as specified by the user. |
| if (createVwDesc.getViewOriginalText() == null) { |
| String originalText = ctx.getTokenRewriteStream().toString( |
| viewSelect.getTokenStartIndex(), viewSelect.getTokenStopIndex()); |
| createVwDesc.setViewOriginalText(originalText); |
| } |
| |
| // Now expand the view definition with extras such as explicit column |
| // references; this expanded form is what we'll re-parse when the view is |
| // referenced later. |
| unparseTranslator.applyTranslations(ctx.getTokenRewriteStream()); |
| String expandedText = ctx.getTokenRewriteStream().toString( |
| viewSelect.getTokenStartIndex(), viewSelect.getTokenStopIndex()); |
| |
| if (createVwDesc.getPartColNames() != null) { |
| // If we are creating a materialized view and it has partition columns, |
| // we may need to reorder column projection in expanded query. The reason |
| // is that Hive assumes that in the partition columns are at the end of |
| // the MV schema, and if we do not do this, we will have a mismatch between |
| // the SQL query for the MV and the MV itself. |
| boolean first = true; |
| StringBuilder sb = new StringBuilder(); |
| sb.append("SELECT "); |
| for (FieldSchema fieldSchema : derivedSchema) { |
| if (!createVwDesc.getPartColNames().contains(fieldSchema.getName())) { |
| if (first) { |
| first = false; |
| } else { |
| sb.append(", "); |
| } |
| sb.append(HiveUtils.unparseIdentifier(fieldSchema.getName(), conf)); |
| } |
| } |
| for (String partColName : createVwDesc.getPartColNames()) { |
| sb.append(", "); |
| sb.append(HiveUtils.unparseIdentifier(partColName, conf)); |
| } |
| sb.append(" FROM ("); |
| sb.append(expandedText); |
| sb.append(") "); |
| sb.append(HiveUtils.unparseIdentifier(Utilities.getDbTableName(createVwDesc.getViewName())[1], conf)); |
| expandedText = sb.toString(); |
| } |
| |
| // Set schema and expanded text for the view |
| createVwDesc.setSchema(derivedSchema); |
| createVwDesc.setViewExpandedText(expandedText); |
| } |
| |
| private List<FieldSchema> convertRowSchemaToViewSchema(RowResolver rr) throws SemanticException { |
| List<FieldSchema> fieldSchema = convertRowSchemaToResultSetSchema(rr, false); |
| ParseUtils.validateColumnNameUniqueness(fieldSchema); |
| return fieldSchema; |
| } |
| |
| List<FieldSchema> convertRowSchemaToResultSetSchema(RowResolver rr, boolean useTabAliasIfAvailable) { |
| List<FieldSchema> fieldSchemas = new ArrayList<FieldSchema>(); |
| String[] qualifiedColName; |
| String colName; |
| |
| for (ColumnInfo colInfo : rr.getColumnInfos()) { |
| if (colInfo.isHiddenVirtualCol()) { |
| continue; |
| } |
| |
| qualifiedColName = rr.reverseLookup(colInfo.getInternalName()); |
| // __u<n> is a UNION ALL placeholder name |
| if (useTabAliasIfAvailable && qualifiedColName[0] != null && (!qualifiedColName[0].isEmpty()) && (!qualifiedColName[0].startsWith("__u"))) { |
| colName = qualifiedColName[0] + "." + qualifiedColName[1]; |
| } else { |
| colName = qualifiedColName[1]; |
| } |
| fieldSchemas.add(new FieldSchema(colName, colInfo.getType().getTypeName(), null)); |
| } |
| return fieldSchemas; |
| } |
| |
| /** |
| * Generates an expression node descriptor for the expression with TypeCheckCtx. |
| */ |
| public ExprNodeDesc genExprNodeDesc(ASTNode expr, RowResolver input) |
| throws SemanticException { |
| // Since the user didn't supply a customized type-checking context, |
| // use default settings. |
| return genExprNodeDesc(expr, input, true, false); |
| } |
| |
| ExprNodeDesc genExprNodeDesc(ASTNode expr, RowResolver input, boolean useCaching, |
| boolean foldExpr) throws SemanticException { |
| TypeCheckCtx tcCtx = new TypeCheckCtx(input, useCaching, foldExpr); |
| return genExprNodeDesc(expr, input, tcCtx); |
| } |
| |
| /** |
| * Generates an expression node descriptors for the expression and children of it |
| * with default TypeCheckCtx. |
| */ |
| Map<ASTNode, ExprNodeDesc> genAllExprNodeDesc(ASTNode expr, RowResolver input) |
| throws SemanticException { |
| TypeCheckCtx tcCtx = new TypeCheckCtx(input); |
| return genAllExprNodeDesc(expr, input, tcCtx); |
| } |
| |
| /** |
| * Returns expression node descriptor for the expression. |
| * If it's evaluated already in previous operator, it can be retrieved from cache. |
| */ |
| ExprNodeDesc genExprNodeDesc(ASTNode expr, RowResolver input, |
| TypeCheckCtx tcCtx) throws SemanticException { |
| // We recursively create the exprNodeDesc. Base cases: when we encounter |
| // a column ref, we convert that into an exprNodeColumnDesc; when we |
| // encounter |
| // a constant, we convert that into an exprNodeConstantDesc. For others we |
| // just |
| // build the exprNodeFuncDesc with recursively built children. |
| |
| // If the current subExpression is pre-calculated, as in Group-By etc. |
| ExprNodeDesc cached = null; |
| if (tcCtx.isUseCaching()) { |
| cached = getExprNodeDescCached(expr, input); |
| } |
| if (cached == null) { |
| Map<ASTNode, ExprNodeDesc> allExprs = genAllExprNodeDesc(expr, input, tcCtx); |
| return allExprs.get(expr); |
| } |
| return cached; |
| } |
| |
| /** |
| * Find ExprNodeDesc for the expression cached in the RowResolver. Returns null if not exists. |
| */ |
| private ExprNodeDesc getExprNodeDescCached(ASTNode expr, RowResolver input) |
| throws SemanticException { |
| ColumnInfo colInfo = input.getExpression(expr); |
| if (colInfo != null) { |
| ASTNode source = input.getExpressionSource(expr); |
| if (source != null) { |
| unparseTranslator.addCopyTranslation(expr, source); |
| } |
| return new ExprNodeColumnDesc(colInfo.getType(), colInfo |
| .getInternalName(), colInfo.getTabAlias(), colInfo |
| .getIsVirtualCol(), colInfo.isSkewedCol()); |
| } |
| return null; |
| } |
| |
| /** |
| * Generates all of the expression node descriptors for the expression and children of it |
| * passed in the arguments. This function uses the row resolver and the metadata information |
| * that are passed as arguments to resolve the column names to internal names. |
| * |
| * @param expr |
| * The expression |
| * @param input |
| * The row resolver |
| * @param tcCtx |
| * Customized type-checking context |
| * @return expression to exprNodeDesc mapping |
| * @throws SemanticException Failed to evaluate expression |
| */ |
| @SuppressWarnings("nls") |
| Map<ASTNode, ExprNodeDesc> genAllExprNodeDesc(ASTNode expr, RowResolver input, |
| TypeCheckCtx tcCtx) throws SemanticException { |
| // Create the walker and the rules dispatcher. |
| tcCtx.setUnparseTranslator(unparseTranslator); |
| |
| Map<ASTNode, ExprNodeDesc> nodeOutputs = |
| ExprNodeTypeCheck.genExprNode(expr, tcCtx); |
| ExprNodeDesc desc = nodeOutputs.get(expr); |
| if (desc == null) { |
| String tableOrCol = BaseSemanticAnalyzer.unescapeIdentifier(expr |
| .getChild(0).getText()); |
| ColumnInfo colInfo = input.get(null, tableOrCol); |
| String errMsg; |
| if (colInfo == null && input.getIsExprResolver()){ |
| errMsg = ASTErrorUtils.getMsg( |
| ErrorMsg.NON_KEY_EXPR_IN_GROUPBY.getMsg(), expr); |
| } else { |
| errMsg = tcCtx.getError(); |
| } |
| throw new SemanticException(Optional.ofNullable(errMsg).orElse("Error in parsing ")); |
| } |
| if (desc instanceof ExprNodeColumnListDesc) { |
| throw new SemanticException("TOK_ALLCOLREF is not supported in current context"); |
| } |
| |
| if (!unparseTranslator.isEnabled()) { |
| // Not creating a view, so no need to track view expansions. |
| return nodeOutputs; |
| } |
| |
| List<ASTNode> fieldDescList = new ArrayList<>(); |
| |
| for (Map.Entry<ASTNode, ExprNodeDesc> entry : nodeOutputs.entrySet()) { |
| if (!(entry.getValue() instanceof ExprNodeColumnDesc)) { |
| // we need to translate the ExprNodeFieldDesc too, e.g., identifiers in |
| // struct<>. |
| if (entry.getValue() instanceof ExprNodeFieldDesc) { |
| fieldDescList.add(entry.getKey()); |
| } |
| continue; |
| } |
| ASTNode node = entry.getKey(); |
| ExprNodeColumnDesc columnDesc = (ExprNodeColumnDesc) entry.getValue(); |
| if ((columnDesc.getTabAlias() == null) |
| || (columnDesc.getTabAlias().length() == 0)) { |
| // These aren't real column refs; instead, they are special |
| // internal expressions used in the representation of aggregation. |
| continue; |
| } |
| String[] tmp = input.reverseLookup(columnDesc.getColumn()); |
| // in subquery case, tmp may be from outside. |
| // check if outer present && (tmp is null || tmp not null - contains tbl info) |
| if (tcCtx.getOuterRR() != null && (tmp == null || (tmp[0] != null && columnDesc.getTabAlias() != null |
| && !tmp[0].equals(columnDesc.getTabAlias())))) { |
| tmp = tcCtx.getOuterRR().reverseLookup(columnDesc.getColumn()); |
| } |
| StringBuilder replacementText = new StringBuilder(); |
| replacementText.append(HiveUtils.unparseIdentifier(tmp[0], conf)); |
| replacementText.append("."); |
| replacementText.append(HiveUtils.unparseIdentifier(tmp[1], conf)); |
| unparseTranslator.addTranslation(node, replacementText.toString()); |
| } |
| |
| for (ASTNode node : fieldDescList) { |
| Map<ASTNode, String> map = translateFieldDesc(node); |
| for (Entry<ASTNode, String> entry : map.entrySet()) { |
| unparseTranslator.addTranslation(entry.getKey(), entry.getValue().toLowerCase()); |
| } |
| } |
| |
| return nodeOutputs; |
| } |
| |
| protected final Map<ASTNode, String> translateFieldDesc(ASTNode node) { |
| Map<ASTNode, String> map = new HashMap<>(); |
| if (node.getType() == HiveParser.DOT) { |
| for (Node child : node.getChildren()) { |
| map.putAll(translateFieldDesc((ASTNode) child)); |
| } |
| } else if (node.getType() == HiveParser.Identifier) { |
| map.put(node, HiveUtils.unparseIdentifier(node.getText(), conf)); |
| } |
| return map; |
| } |
| |
| @Override |
| public void validate() throws SemanticException { |
| boolean wasAcidChecked = false; |
| // Validate inputs and outputs have right protectmode to execute the query |
| for (ReadEntity readEntity : getInputs()) { |
| ReadEntity.Type type = readEntity.getType(); |
| |
| if (type != ReadEntity.Type.TABLE && |
| type != ReadEntity.Type.PARTITION) { |
| // In current implementation it will never happen, but we leave it |
| // here to make the logic complete. |
| continue; |
| } |
| |
| Table tbl = readEntity.getTable(); |
| Partition p = readEntity.getPartition(); |
| |
| if (p != null) { |
| tbl = p.getTable(); |
| } |
| if (tbl != null && AcidUtils.isTransactionalTable(tbl)) { |
| transactionalInQuery = true; |
| if (!wasAcidChecked) { |
| checkAcidTxnManager(tbl); |
| } |
| wasAcidChecked = true; |
| } |
| } |
| |
| for (WriteEntity writeEntity : getOutputs()) { |
| WriteEntity.Type type = writeEntity.getType(); |
| |
| if (type == WriteEntity.Type.PARTITION || type == WriteEntity.Type.DUMMYPARTITION) { |
| String conflictingArchive = null; |
| try { |
| Partition usedp = writeEntity.getPartition(); |
| Table tbl = usedp.getTable(); |
| if (AcidUtils.isTransactionalTable(tbl)) { |
| transactionalInQuery = true; |
| if (!wasAcidChecked) { |
| checkAcidTxnManager(tbl); |
| } |
| wasAcidChecked = true; |
| } |
| |
| LOG.debug("validated " + usedp.getName()); |
| LOG.debug(usedp.getTable().getTableName()); |
| if (!AcidUtils.isTransactionalTable(tbl) && conf.getBoolVar(HIVEARCHIVEENABLED)) { |
| // Do not check for ACID; it does not create new parts and this is expensive as hell. |
| // TODO: add an API to get table name list for archived parts with a single call; |
| // nobody uses this so we could skip the whole thing. |
| conflictingArchive = ArchiveUtils |
| .conflictingArchiveNameOrNull(db, tbl, usedp.getSpec()); |
| } |
| } catch (HiveException e) { |
| throw new SemanticException(e); |
| } |
| if (conflictingArchive != null) { |
| String message = String.format("Insert conflict with existing archive: %s", |
| conflictingArchive); |
| throw new SemanticException(message); |
| } |
| } else if (type == WriteEntity.Type.TABLE) { |
| Table tbl = writeEntity.getTable(); |
| if (AcidUtils.isTransactionalTable(tbl)) { |
| transactionalInQuery = true; |
| if (!wasAcidChecked) { |
| checkAcidTxnManager(tbl); |
| } |
| wasAcidChecked = true; |
| } |
| } |
| |
| if (type != WriteEntity.Type.TABLE && |
| type != WriteEntity.Type.PARTITION) { |
| LOG.debug("not validating writeEntity, because entity is neither table nor partition"); |
| continue; |
| } |
| } |
| |
| boolean reworkMapredWork = HiveConf.getBoolVar(this.conf, |
| HiveConf.ConfVars.HIVE_REWORK_MAPREDWORK); |
| |
| // validate all tasks |
| for (Task<?> rootTask : rootTasks) { |
| validate(rootTask, reworkMapredWork); |
| } |
| } |
| |
| private void validate(Task<?> task, boolean reworkMapredWork) |
| throws SemanticException { |
| Utilities.reworkMapRedWork(task, reworkMapredWork, conf); |
| if (task.getChildTasks() == null) { |
| return; |
| } |
| |
| for (Task<?> childTask : task.getChildTasks()) { |
| validate(childTask, reworkMapredWork); |
| } |
| } |
| |
| /** |
| * Update the default table properties with values fetch from the original table properties. The property names are |
| * defined in {@link SemanticAnalyzer#UPDATED_TBL_PROPS}. |
| * @param source properties of source table, must be not null. |
| * @param target properties of target table. |
| * @param skipped a list of properties which should be not overwritten. It can be null or empty. |
| */ |
| private void updateDefaultTblProps(Map<String, String> source, Map<String, String> target, List<String> skipped) { |
| if (source == null || target == null) { |
| return; |
| } |
| for (String property : UPDATED_TBL_PROPS) { |
| if ((skipped == null || !skipped.contains(property)) && source.containsKey(property)) { |
| target.put(property, source.get(property)); |
| } |
| } |
| } |
| |
| /** |
| * Add default properties for table property. If a default parameter exists |
| * in the tblProp, the value in tblProp will be kept. |
| * |
| * @param tblProp |
| * property map |
| * @return Modified table property map |
| */ |
| private Map<String, String> validateAndAddDefaultProperties( |
| Map<String, String> tblProp, boolean isExt, StorageFormat storageFormat, |
| String qualifiedTableName, List<Order> sortCols, boolean isMaterialization, |
| boolean isTemporaryTable, boolean isTransactional, boolean isManaged, String[] qualifiedTabName, boolean isTableTypeChanged) throws SemanticException { |
| Map<String, String> retValue = Optional.ofNullable(tblProp).orElseGet(HashMap::new); |
| |
| String paraString = HiveConf.getVar(conf, ConfVars.NEWTABLEDEFAULTPARA); |
| if (paraString != null && !paraString.isEmpty()) { |
| for (String keyValuePair : paraString.split(",")) { |
| String[] keyValue = keyValuePair.split("=", 2); |
| if (keyValue.length != 2) { |
| continue; |
| } |
| if (!retValue.containsKey(keyValue[0])) { |
| retValue.put(keyValue[0], keyValue[1]); |
| } |
| } |
| } |
| if (!retValue.containsKey(hive_metastoreConstants.TABLE_IS_TRANSACTIONAL) |
| && retValue.containsKey(hive_metastoreConstants.TABLE_TRANSACTIONAL_PROPERTIES)) { |
| throw new SemanticException("Cannot specify " |
| + hive_metastoreConstants.TABLE_TRANSACTIONAL_PROPERTIES |
| + " without " + hive_metastoreConstants.TABLE_IS_TRANSACTIONAL); |
| } |
| isExt = isExternalTableChanged(retValue, isTransactional, isExt, isTableTypeChanged); |
| |
| if (isExt && HiveConf.getBoolVar(conf, ConfVars.HIVE_EXTERNALTABLE_PURGE_DEFAULT)) { |
| if (retValue.get(MetaStoreUtils.EXTERNAL_TABLE_PURGE) == null) { |
| retValue.put(MetaStoreUtils.EXTERNAL_TABLE_PURGE, "true"); |
| } |
| } |
| |
| boolean makeInsertOnly = !isTemporaryTable && HiveConf.getBoolVar( |
| conf, ConfVars.HIVE_CREATE_TABLES_AS_INSERT_ONLY); |
| boolean makeAcid = !isTemporaryTable && makeAcid(); |
| // if not specify managed table and create.table.as.external is true |
| // ignore makeInsertOnly and makeAcid. |
| if (!isManaged && HiveConf.getBoolVar(conf, ConfVars.CREATE_TABLE_AS_EXTERNAL)) { |
| makeInsertOnly = false; |
| makeAcid = false; |
| } |
| if ((makeInsertOnly || makeAcid || isTransactional || isManaged) |
| && !isExt && !isMaterialization && StringUtils.isBlank(storageFormat.getStorageHandler()) |
| //don't overwrite user choice if transactional attribute is explicitly set |
| && !retValue.containsKey(hive_metastoreConstants.TABLE_IS_TRANSACTIONAL)) { |
| if (makeInsertOnly || isTransactional) { |
| retValue.put(hive_metastoreConstants.TABLE_IS_TRANSACTIONAL, "true"); |
| retValue.put(hive_metastoreConstants.TABLE_TRANSACTIONAL_PROPERTIES, |
| TransactionalValidationListener.INSERTONLY_TRANSACTIONAL_PROPERTY); |
| } |
| if (makeAcid || isTransactional || (isManaged && !makeInsertOnly)) { |
| retValue = convertToAcidByDefault(storageFormat, qualifiedTableName, sortCols, retValue); |
| } |
| } |
| if (!isExt) { |
| addDbAndTabToOutputs(qualifiedTabName, |
| TableType.MANAGED_TABLE, isTemporaryTable, retValue, storageFormat); |
| } else { |
| addDbAndTabToOutputs(qualifiedTabName, |
| TableType.EXTERNAL_TABLE, isTemporaryTable, retValue, storageFormat); |
| } |
| return retValue; |
| } |
| |
| /** |
| * This api is used to determine where to create acid tables are not. |
| * if the default table type is set to external, then create transactional table should result in acid tables, |
| * else create table should result in external table. |
| * */ |
| private boolean isExternalTableChanged (Map<String, String> tblProp, boolean isTransactional, boolean isExt, boolean isTableTypeChanged) { |
| if (isTableTypeChanged && tblProp != null && tblProp.getOrDefault(hive_metastoreConstants.TABLE_IS_TRANSACTIONAL, "false").equalsIgnoreCase("true") || isTransactional) { |
| isExt = false; |
| } |
| return isExt; |
| } |
| |
| private Map<String, String> convertToAcidByDefault( |
| StorageFormat storageFormat, String qualifiedTableName, List<Order> sortCols, |
| Map<String, String> retValue) { |
| /*for CTAS, TransactionalValidationListener.makeAcid() runs to late to make table Acid |
| so the initial write ends up running as non-acid...*/ |
| try { |
| Class inputFormatClass = storageFormat.getInputFormat() == null ? null : |
| Class.forName(storageFormat.getInputFormat()); |
| Class outputFormatClass = storageFormat.getOutputFormat() == null ? null : |
| Class.forName(storageFormat.getOutputFormat()); |
| if (inputFormatClass == null || outputFormatClass == null || |
| !AcidInputFormat.class.isAssignableFrom(inputFormatClass) || |
| !AcidOutputFormat.class.isAssignableFrom(outputFormatClass)) { |
| return retValue; |
| } |
| } catch (ClassNotFoundException e) { |
| LOG.warn("Could not verify InputFormat=" + storageFormat.getInputFormat() + " or OutputFormat=" + |
| storageFormat.getOutputFormat() + " for " + qualifiedTableName); |
| return retValue; |
| } |
| if (sortCols != null && !sortCols.isEmpty()) { |
| return retValue; |
| } |
| retValue.put(hive_metastoreConstants.TABLE_IS_TRANSACTIONAL, "true"); |
| retValue.put(hive_metastoreConstants.TABLE_TRANSACTIONAL_PROPERTIES, |
| TransactionalValidationListener.DEFAULT_TRANSACTIONAL_PROPERTY); |
| LOG.info("Automatically chose to make " + qualifiedTableName + " acid."); |
| return retValue; |
| } |
| |
| /** |
| * Checks to see if given partition columns has DEFAULT or CHECK constraints (whether ENABLED or DISABLED) |
| * Or has NOT NULL constraints (only ENABLED) |
| * @param partCols partition columns |
| * @param defConstraints default constraints |
| * @param notNullConstraints not null constraints |
| * @param checkConstraints CHECK constraints |
| * @return true or false |
| */ |
| private boolean hasConstraints(final List<FieldSchema> partCols, final List<SQLDefaultConstraint> defConstraints, |
| final List<SQLNotNullConstraint> notNullConstraints, |
| final List<SQLCheckConstraint> checkConstraints) { |
| for(FieldSchema partFS: partCols) { |
| for(SQLDefaultConstraint dc:defConstraints) { |
| if(dc.getColumn_name().equals(partFS.getName())) { |
| return true; |
| } |
| } |
| for(SQLCheckConstraint cc:checkConstraints) { |
| if(cc.getColumn_name().equals(partFS.getName())) { |
| return true; |
| } |
| } |
| for(SQLNotNullConstraint nc:notNullConstraints) { |
| if(nc.getColumn_name().equals(partFS.getName()) && nc.isEnable_cstr()) { |
| return true; |
| } |
| } |
| } |
| return false; |
| } |
| |
| /** |
| * Analyze the create table command. If it is a regular create-table or |
| * create-table-like statements, we create a DDLWork and return true. If it is |
| * a create-table-as-select, we get the necessary info such as the SerDe and |
| * Storage Format and put it in QB, and return false, indicating the rest of |
| * the semantic analyzer need to deal with the select statement with respect |
| * to the SerDe and Storage Format. |
| */ |
| ASTNode analyzeCreateTable( |
| ASTNode ast, QB qb, PlannerContext plannerCtx) throws SemanticException { |
| TableName qualifiedTabName = getQualifiedTableName((ASTNode) ast.getChild(0)); |
| final String dbDotTab = qualifiedTabName.getNotEmptyDbTable(); |
| |
| String likeTableName = null; |
| List<FieldSchema> cols = new ArrayList<FieldSchema>(); |
| List<FieldSchema> partCols = new ArrayList<FieldSchema>(); |
| List<String> partColNames = new ArrayList<>(); |
| List<String> bucketCols = new ArrayList<String>(); |
| List<SQLPrimaryKey> primaryKeys = new ArrayList<SQLPrimaryKey>(); |
| List<SQLForeignKey> foreignKeys = new ArrayList<SQLForeignKey>(); |
| List<SQLUniqueConstraint> uniqueConstraints = new ArrayList<>(); |
| List<SQLNotNullConstraint> notNullConstraints = new ArrayList<>(); |
| List<SQLDefaultConstraint> defaultConstraints= new ArrayList<>(); |
| List<SQLCheckConstraint> checkConstraints= new ArrayList<>(); |
| List<Order> sortCols = new ArrayList<Order>(); |
| int numBuckets = -1; |
| String comment = null; |
| String location = null; |
| Map<String, String> tblProps = null; |
| boolean ifNotExists = false; |
| boolean isExt = false; |
| boolean isTemporary = false; |
| boolean isManaged = false; |
| boolean isMaterialization = false; |
| boolean isTransactional = false; |
| ASTNode selectStmt = null; |
| final int CREATE_TABLE = 0; // regular CREATE TABLE |
| final int CTLT = 1; // CREATE TABLE LIKE ... (CTLT) |
| final int CTAS = 2; // CREATE TABLE AS SELECT ... (CTAS) |
| final int CTT = 3; // CREATE TRANSACTIONAL TABLE |
| final int CTLF = 4; // CREATE TABLE LIKE FILE |
| int command_type = CREATE_TABLE; |
| List<String> skewedColNames = new ArrayList<String>(); |
| List<List<String>> skewedValues = new ArrayList<List<String>>(); |
| Map<List<String>, String> listBucketColValuesMapping = new HashMap<List<String>, String>(); |
| boolean storedAsDirs = false; |
| boolean isUserStorageFormat = false; |
| boolean partitionTransformSpecExists = false; |
| String likeFile = null; |
| String likeFileFormat = null; |
| |
| RowFormatParams rowFormatParams = new RowFormatParams(); |
| StorageFormat storageFormat = new StorageFormat(conf); |
| |
| LOG.info("Creating table " + dbDotTab + " position=" + ast.getCharPositionInLine()); |
| int numCh = ast.getChildCount(); |
| |
| // set storage handler if default handler is provided in config |
| String defaultStorageHandler = HiveConf.getVar(conf, HIVE_DEFAULT_STORAGE_HANDLER); |
| if (defaultStorageHandler != null && !defaultStorageHandler.isEmpty()) { |
| LOG.info("Default storage handler class detected in config. Using storage handler class if exists: '{}'", |
| defaultStorageHandler); |
| storageFormat.setStorageHandler(defaultStorageHandler); |
| isUserStorageFormat = true; |
| } |
| |
| /* |
| * Check the 1st-level children and do simple semantic checks: 1) CTLT and |
| * CTAS should not coexists. 2) CTLT or CTAS should not coexists with column |
| * list (target table schema). 3) CTAS does not support partitioning (for |
| * now). |
| */ |
| for (int num = 1; num < numCh; num++) { |
| ASTNode child = (ASTNode) ast.getChild(num); |
| if (storageFormat.fillStorageFormat(child)) { |
| isUserStorageFormat = true; |
| continue; |
| } |
| switch (child.getToken().getType()) { |
| case HiveParser.TOK_IFNOTEXISTS: |
| ifNotExists = true; |
| break; |
| case HiveParser.KW_EXTERNAL: |
| isExt = true; |
| break; |
| case HiveParser.KW_MANAGED: |
| isManaged = true; |
| isTransactional = true; |
| break; |
| case HiveParser.KW_TEMPORARY: |
| isTemporary = true; |
| isMaterialization = MATERIALIZATION_MARKER.equals(child.getText()); |
| break; |
| case HiveParser.KW_TRANSACTIONAL: |
| isTransactional = true; |
| command_type = CTT; |
| break; |
| case HiveParser.TOK_LIKEFILE: |
| if (cols.size() != 0) { |
| throw new SemanticException(ErrorMsg.CTLT_COLLST_COEXISTENCE |
| .getMsg()); |
| } |
| likeFileFormat = getUnescapedName((ASTNode) child.getChild(0)); |
| likeFile = getUnescapedName((ASTNode) child.getChild(1)); |
| command_type = CTLF; |
| break; |
| case HiveParser.TOK_LIKETABLE: |
| if (child.getChildCount() > 0) { |
| likeTableName = getUnescapedName((ASTNode) child.getChild(0)); |
| if (likeTableName != null) { |
| if (command_type == CTAS) { |
| throw new SemanticException(ErrorMsg.CTAS_CTLT_COEXISTENCE |
| .getMsg()); |
| } |
| if (cols.size() != 0) { |
| throw new SemanticException(ErrorMsg.CTLT_COLLST_COEXISTENCE |
| .getMsg()); |
| } |
| } |
| command_type = CTLT; |
| } |
| break; |
| |
| case HiveParser.TOK_QUERY: // CTAS |
| if (command_type == CTLT) { |
| throw new SemanticException(ErrorMsg.CTAS_CTLT_COEXISTENCE.getMsg()); |
| } |
| if (cols.size() != 0) { |
| throw new SemanticException(ErrorMsg.CTAS_COLLST_COEXISTENCE.getMsg()); |
| } |
| if (partCols.size() != 0 || bucketCols.size() != 0) { |
| boolean dynPart = HiveConf.getBoolVar(conf, HiveConf.ConfVars.DYNAMICPARTITIONING); |
| if (dynPart == false) { |
| throw new SemanticException(ErrorMsg.CTAS_PARCOL_COEXISTENCE.getMsg()); |
| } else { |
| // TODO: support dynamic partition for CTAS |
| throw new SemanticException(ErrorMsg.CTAS_PARCOL_COEXISTENCE.getMsg()); |
| } |
| } |
| if (!conf.getBoolVar(ConfVars.HIVE_CTAS_EXTERNAL_TABLES) && isExt) { |
| throw new SemanticException(ErrorMsg.CTAS_EXTTBL_COEXISTENCE.getMsg()); |
| } |
| command_type = CTAS; |
| if (plannerCtx != null) { |
| plannerCtx.setCTASToken(child); |
| } |
| selectStmt = child; |
| break; |
| case HiveParser.TOK_TABCOLLIST: |
| cols = getColumns(child, true, ctx.getTokenRewriteStream(), primaryKeys, foreignKeys, |
| uniqueConstraints, notNullConstraints, defaultConstraints, checkConstraints, conf); |
| break; |
| case HiveParser.TOK_TABLECOMMENT: |
| comment = unescapeSQLString(child.getChild(0).getText()); |
| break; |
| case HiveParser.TOK_TABLEPARTCOLS: |
| partCols = getColumns(child, false, ctx.getTokenRewriteStream(), primaryKeys, foreignKeys, |
| uniqueConstraints, notNullConstraints, defaultConstraints, checkConstraints, conf); |
| if(hasConstraints(partCols, defaultConstraints, notNullConstraints, checkConstraints)) { |
| //TODO: these constraints should be supported for partition columns |
| throw new SemanticException( |
| ErrorMsg.INVALID_CSTR_SYNTAX.getMsg("NOT NULL,DEFAULT and CHECK Constraints are not allowed with " + |
| "partition columns. ")); |
| } |
| break; |
| case HiveParser.TOK_TABLEPARTCOLSBYSPEC: |
| SessionStateUtil.addResourceOrThrow(conf, hive_metastoreConstants.PARTITION_TRANSFORM_SPEC, |
| PartitionTransform.getPartitionTransformSpec(child)); |
| partitionTransformSpecExists = true; |
| break; |
| case HiveParser.TOK_TABLEPARTCOLNAMES: |
| partColNames = getColumnNames(child); |
| break; |
| case HiveParser.TOK_ALTERTABLE_BUCKETS: |
| bucketCols = getColumnNames((ASTNode) child.getChild(0)); |
| if (child.getChildCount() == 2) { |
| numBuckets = Integer.parseInt(child.getChild(1).getText()); |
| } else { |
| sortCols = getColumnNamesOrder((ASTNode) child.getChild(1)); |
| numBuckets = Integer.parseInt(child.getChild(2).getText()); |
| } |
| break; |
| case HiveParser.TOK_TABLEROWFORMAT: |
| rowFormatParams.analyzeRowFormat(child); |
| break; |
| case HiveParser.TOK_TABLELOCATION: |
| location = unescapeSQLString(child.getChild(0).getText()); |
| location = EximUtil.relativeToAbsolutePath(conf, location); |
| inputs.add(toReadEntity(location)); |
| break; |
| case HiveParser.TOK_TABLEPROPERTIES: |
| tblProps = getProps((ASTNode) child.getChild(0)); |
| addPropertyReadEntry(tblProps, inputs); |
| break; |
| case HiveParser.TOK_TABLESERIALIZER: |
| child = (ASTNode) child.getChild(0); |
| storageFormat.setSerde(unescapeSQLString(child.getChild(0).getText())); |
| if (child.getChildCount() == 2) { |
| readProps((ASTNode) (child.getChild(1).getChild(0)), |
| storageFormat.getSerdeProps()); |
| } |
| break; |
| case HiveParser.TOK_TABLESKEWED: |
| /** |
| * Throw an error if the user tries to use the DDL with |
| * hive.internal.ddl.list.bucketing.enable set to false. |
| */ |
| HiveConf hiveConf = SessionState.get().getConf(); |
| |
| // skewed column names |
| skewedColNames = SkewedTableUtils.analyzeSkewedTableDDLColNames(child); |
| // skewed value |
| skewedValues = SkewedTableUtils.analyzeDDLSkewedValues(child); |
| // stored as directories |
| storedAsDirs = analyzeStoredAdDirs(child); |
| |
| break; |
| default: |
| throw new AssertionError("Unknown token: " + child.getToken()); |
| } |
| } |
| |
| validateStorageFormat(storageFormat, tblProps, partitionTransformSpecExists); |
| |
| if (command_type == CREATE_TABLE || command_type == CTLT || command_type == CTT || command_type == CTLF) { |
| queryState.setCommandType(HiveOperation.CREATETABLE); |
| } else if (command_type == CTAS) { |
| queryState.setCommandType(HiveOperation.CREATETABLE_AS_SELECT); |
| } else { |
| throw new SemanticException("Unrecognized command."); |
| } |
| |
| if (isExt && ConstraintsUtils.hasEnabledOrValidatedConstraints(notNullConstraints, defaultConstraints, |
| checkConstraints)) { |
| throw new SemanticException( |
| ErrorMsg.INVALID_CSTR_SYNTAX.getMsg("Constraints are disallowed with External tables. " |
| + "Only RELY is allowed.")); |
| } |
| if (checkConstraints != null && !checkConstraints.isEmpty()) { |
| ConstraintsUtils.validateCheckConstraint(cols, checkConstraints, ctx.getConf()); |
| } |
| |
| storageFormat.fillDefaultStorageFormat(isExt, false); |
| |
| // check for existence of table |
| if (ifNotExists) { |
| try { |
| Table table = getTable(qualifiedTabName, false); |
| if (table != null) { // table exists |
| return null; |
| } |
| } catch (HiveException e) { |
| // should not occur since second parameter to getTableWithQN is false |
| throw new IllegalStateException("Unexpected Exception thrown: " + e.getMessage(), e); |
| } |
| } |
| |
| if (isTemporary) { |
| if (location == null) { |
| // for temporary tables we set the location to something in the session's scratch dir |
| // it has the same life cycle as the tmp table |
| try { |
| // Generate a unique ID for temp table path. |
| // This path will be fixed for the life of the temp table. |
| location = SessionState.generateTempTableLocation(conf); |
| } catch (MetaException err) { |
| throw new SemanticException("Error while generating temp table path:", err); |
| } |
| } |
| } |
| |
| // Handle different types of CREATE TABLE command |
| // Note: each branch must call addDbAndTabToOutputs after finalizing table properties. |
| Database database = getDatabase(qualifiedTabName.getDb()); |
| boolean isDefaultTableTypeChanged = false; |
| if(database.getParameters() != null) { |
| String defaultTableType = database.getParameters().getOrDefault(DEFAULT_TABLE_TYPE, null); |
| if (defaultTableType != null && defaultTableType.equalsIgnoreCase("external")) { |
| isExt = true; |
| isDefaultTableTypeChanged = true; |
| } else if (defaultTableType != null && defaultTableType.equalsIgnoreCase("acid")) { |
| isDefaultTableTypeChanged = true; |
| if (isExt) { // create external table on db with default type as acid |
| isTransactional = false; |
| } else { |
| isTransactional = true; |
| } |
| } |
| } |
| switch (command_type) { |
| case CTLF: |
| try { |
| if (!SchemaInferenceUtils.doesSupportSchemaInference(conf, likeFileFormat)) { |
| throw new SemanticException(ErrorMsg.CTLF_UNSUPPORTED_FORMAT.getErrorCodedMsg(likeFileFormat)); |
| } |
| } catch (HiveException e) { |
| throw new SemanticException(e.getMessage(), e); |
| } |
| // fall through |
| case CREATE_TABLE: // REGULAR CREATE TABLE DDL |
| if (!CollectionUtils.isEmpty(partColNames)) { |
| throw new SemanticException( |
| "Partition columns can only declared using their name and types in regular CREATE TABLE statements"); |
| } |
| tblProps = validateAndAddDefaultProperties( |
| tblProps, isExt, storageFormat, dbDotTab, sortCols, isMaterialization, isTemporary, |
| isTransactional, isManaged, new String[] {qualifiedTabName.getDb(), qualifiedTabName.getTable()}, isDefaultTableTypeChanged); |
| isExt = isExternalTableChanged(tblProps, isTransactional, isExt, isDefaultTableTypeChanged); |
| addDbAndTabToOutputs(new String[] {qualifiedTabName.getDb(), qualifiedTabName.getTable()}, |
| TableType.MANAGED_TABLE, isTemporary, tblProps, storageFormat); |
| |
| CreateTableDesc crtTblDesc = new CreateTableDesc(qualifiedTabName, |
| isExt, isTemporary, cols, partCols, |
| bucketCols, sortCols, numBuckets, rowFormatParams.fieldDelim, |
| rowFormatParams.fieldEscape, |
| rowFormatParams.collItemDelim, rowFormatParams.mapKeyDelim, rowFormatParams.lineDelim, |
| comment, |
| storageFormat.getInputFormat(), storageFormat.getOutputFormat(), location, storageFormat.getSerde(), |
| storageFormat.getStorageHandler(), storageFormat.getSerdeProps(), tblProps, ifNotExists, skewedColNames, |
| skewedValues, primaryKeys, foreignKeys, uniqueConstraints, notNullConstraints, defaultConstraints, |
| checkConstraints); |
| crtTblDesc.setStoredAsSubDirectories(storedAsDirs); |
| crtTblDesc.setNullFormat(rowFormatParams.nullFormat); |
| crtTblDesc.setLikeFile(likeFile); |
| crtTblDesc.setLikeFileFormat(likeFileFormat); |
| |
| crtTblDesc.validate(conf); |
| // outputs is empty, which means this create table happens in the current |
| // database. |
| rootTasks.add(TaskFactory.get(new DDLWork(getInputs(), getOutputs(), crtTblDesc))); |
| String tblLocation = null; |
| if (location != null) { |
| tblLocation = location; |
| } else { |
| try { |
| Warehouse wh = new Warehouse(conf); |
| tblLocation = wh.getDefaultTablePath(db.getDatabase(qualifiedTabName.getDb()), qualifiedTabName.getTable(), |
| isExt).toUri().getPath(); |
| } catch (MetaException | HiveException e) { |
| throw new SemanticException(e); |
| } |
| } |
| SessionStateUtil.addResourceOrThrow(conf, META_TABLE_LOCATION, tblLocation); |
| break; |
| case CTT: // CREATE TRANSACTIONAL TABLE |
| if (isExt && !isDefaultTableTypeChanged) { |
| throw new SemanticException( |
| qualifiedTabName.getTable() + " cannot be declared transactional because it's an external table"); |
| } |
| tblProps = validateAndAddDefaultProperties(tblProps, isExt, storageFormat, dbDotTab, sortCols, isMaterialization, |
| isTemporary, isTransactional, isManaged, new String[] {qualifiedTabName.getDb(), qualifiedTabName.getTable()}, isDefaultTableTypeChanged); |
| isExt = isExternalTableChanged(tblProps, isTransactional, isExt, isDefaultTableTypeChanged); |
| addDbAndTabToOutputs(new String[] {qualifiedTabName.getDb(), qualifiedTabName.getTable()}, |
| TableType.MANAGED_TABLE, false, tblProps, storageFormat); |
| |
| CreateTableDesc crtTranTblDesc = |
| new CreateTableDesc(qualifiedTabName, isExt, isTemporary, cols, partCols, bucketCols, sortCols, numBuckets, |
| rowFormatParams.fieldDelim, rowFormatParams.fieldEscape, rowFormatParams.collItemDelim, |
| rowFormatParams.mapKeyDelim, rowFormatParams.lineDelim, comment, storageFormat.getInputFormat(), |
| storageFormat.getOutputFormat(), location, storageFormat.getSerde(), storageFormat.getStorageHandler(), |
| storageFormat.getSerdeProps(), tblProps, ifNotExists, skewedColNames, skewedValues, primaryKeys, |
| foreignKeys, uniqueConstraints, notNullConstraints, defaultConstraints, checkConstraints); |
| crtTranTblDesc.setStoredAsSubDirectories(storedAsDirs); |
| crtTranTblDesc.setNullFormat(rowFormatParams.nullFormat); |
| |
| crtTranTblDesc.validate(conf); |
| // outputs is empty, which means this create table happens in the current |
| // database. |
| rootTasks.add(TaskFactory.get(new DDLWork(getInputs(), getOutputs(), crtTranTblDesc))); |
| break; |
| |
| case CTLT: // create table like <tbl_name> |
| |
| tblProps = validateAndAddDefaultProperties( |
| tblProps, isExt, storageFormat, dbDotTab, sortCols, isMaterialization, isTemporary, |
| |
| isTransactional, isManaged, new String[]{qualifiedTabName.getDb(), qualifiedTabName.getTable()}, isDefaultTableTypeChanged); |
| tblProps.put(hive_metastoreConstants.TABLE_IS_CTLT, "true"); |
| isExt = isExternalTableChanged(tblProps, isTransactional, isExt, isDefaultTableTypeChanged); |
| addDbAndTabToOutputs(new String[] {qualifiedTabName.getDb(), qualifiedTabName.getTable()}, |
| TableType.MANAGED_TABLE, isTemporary, tblProps, storageFormat); |
| |
| Table likeTable = getTable(likeTableName, false); |
| if (likeTable != null) { |
| if (isTemporary || isExt) { |
| updateDefaultTblProps(likeTable.getParameters(), tblProps, |
| new ArrayList<>(Arrays.asList(hive_metastoreConstants.TABLE_IS_TRANSACTIONAL, |
| hive_metastoreConstants.TABLE_TRANSACTIONAL_PROPERTIES))); |
| } else { |
| updateDefaultTblProps(likeTable.getParameters(), tblProps, null); |
| } |
| } |
| if (likeTable.getTableType() == TableType.EXTERNAL_TABLE && |
| HiveConf.getBoolVar(conf, ConfVars.CREATE_TABLE_AS_EXTERNAL)) { |
| isExt = true; |
| } |
| CreateTableLikeDesc crtTblLikeDesc = new CreateTableLikeDesc(dbDotTab, isExt, isTemporary, |
| storageFormat.getInputFormat(), storageFormat.getOutputFormat(), location, |
| storageFormat.getSerde(), storageFormat.getSerdeProps(), tblProps, ifNotExists, |
| likeTableName, isUserStorageFormat); |
| rootTasks.add(TaskFactory.get(new DDLWork(getInputs(), getOutputs(), crtTblLikeDesc))); |
| break; |
| |
| case CTAS: // create table as select |
| |
| if (isTemporary) { |
| if (!ctx.isExplainSkipExecution() && !isMaterialization) { |
| SessionState ss = SessionState.get(); |
| if (ss == null) { |
| throw new SemanticException("No current SessionState, cannot create temporary table " |
| + qualifiedTabName.getNotEmptyDbTable()); |
| } |
| Map<String, Table> tables = SessionHiveMetaStoreClient. |
| getTempTablesForDatabase(qualifiedTabName.getDb(), qualifiedTabName.getTable()); |
| if (tables != null && tables.containsKey(qualifiedTabName.getTable())) { |
| throw new SemanticException("Temporary table " + qualifiedTabName.getNotEmptyDbTable() |
| + " already exists"); |
| } |
| } |
| } else { |
| // Verify that the table does not already exist |
| // dumpTable is only used to check the conflict for non-temporary tables |
| try { |
| Table dumpTable = db.newTable(dbDotTab); |
| if (null != db.getTable(dumpTable.getDbName(), dumpTable.getTableName(), false) && !ctx.isExplainSkipExecution()) { |
| throw new SemanticException(ErrorMsg.TABLE_ALREADY_EXISTS.getMsg(dbDotTab)); |
| } |
| } catch (HiveException e) { |
| throw new SemanticException(e); |
| } |
| } |
| |
| if (location != null && location.length() != 0) { |
| Path locPath = new Path(location); |
| FileSystem curFs = null; |
| FileStatus locStats = null; |
| try { |
| curFs = locPath.getFileSystem(conf); |
| if(curFs != null) { |
| locStats = curFs.getFileStatus(locPath); |
| } |
| if (locStats != null && locStats.isDir()) { |
| FileStatus[] lStats = curFs.listStatus(locPath); |
| if(lStats != null && lStats.length != 0) { |
| // Don't throw an exception if the target location only contains the staging-dirs |
| for (FileStatus lStat : lStats) { |
| if (!lStat.getPath().getName().startsWith(HiveConf.getVar(conf, HiveConf.ConfVars.STAGINGDIR))) { |
| throw new SemanticException(ErrorMsg.CTAS_LOCATION_NONEMPTY.getMsg(location)); |
| } |
| } |
| } |
| } |
| } catch (FileNotFoundException nfe) { |
| //we will create the folder if it does not exist. |
| } catch (IOException ioE) { |
| LOG.debug("Exception when validate folder", ioE); |
| } |
| } |
| if (!CollectionUtils.isEmpty(partCols)) { |
| throw new SemanticException( |
| "Partition columns can only declared using their names in CTAS statements"); |
| } |
| |
| tblProps = validateAndAddDefaultProperties( |
| tblProps, isExt, storageFormat, dbDotTab, sortCols, isMaterialization, isTemporary, |
| isTransactional, isManaged, new String[]{qualifiedTabName.getDb(), qualifiedTabName.getTable()}, isDefaultTableTypeChanged); |
| isExt = isExternalTableChanged(tblProps, isTransactional, isExt, isDefaultTableTypeChanged); |
| tblProps.put(TABLE_IS_CTAS, "true"); |
| addDbAndTabToOutputs(new String[] {qualifiedTabName.getDb(), qualifiedTabName.getTable()}, |
| TableType.MANAGED_TABLE, isTemporary, tblProps, storageFormat); |
| tableDesc = new CreateTableDesc(qualifiedTabName, isExt, isTemporary, cols, |
| partColNames, bucketCols, sortCols, numBuckets, rowFormatParams.fieldDelim, |
| rowFormatParams.fieldEscape, rowFormatParams.collItemDelim, rowFormatParams.mapKeyDelim, |
| rowFormatParams.lineDelim, comment, storageFormat.getInputFormat(), |
| storageFormat.getOutputFormat(), location, storageFormat.getSerde(), |
| storageFormat.getStorageHandler(), storageFormat.getSerdeProps(), tblProps, ifNotExists, |
| skewedColNames, skewedValues, true, primaryKeys, foreignKeys, |
| uniqueConstraints, notNullConstraints, defaultConstraints, checkConstraints); |
| tableDesc.setMaterialization(isMaterialization); |
| tableDesc.setStoredAsSubDirectories(storedAsDirs); |
| tableDesc.setNullFormat(rowFormatParams.nullFormat); |
| qb.setTableDesc(tableDesc); |
| |
| return selectStmt; |
| |
| default: |
| throw new SemanticException("Unrecognized command."); |
| } |
| return null; |
| } |
| |
| private void validateStorageFormat( |
| StorageFormat storageFormat, Map<String, String> tblProps, boolean partitionTransformSpecExists) |
| throws SemanticException { |
| HiveStorageHandler handler; |
| try { |
| handler = HiveUtils.getStorageHandler(conf, storageFormat.getStorageHandler()); |
| } catch (HiveException e) { |
| throw new SemanticException("Failed to load storage handler: " + e.getMessage()); |
| } |
| |
| if (handler != null) { |
| if (partitionTransformSpecExists && !handler.supportsPartitionTransform()) { |
| throw new SemanticException("Partition transform is not supported for " + handler.getClass().getName()); |
| } |
| |
| String fileFormatPropertyKey = handler.getFileFormatPropertyKey(); |
| if (fileFormatPropertyKey != null) { |
| if (tblProps != null && tblProps.containsKey(fileFormatPropertyKey) && storageFormat.getSerdeProps() != null && |
| storageFormat.getSerdeProps().containsKey(fileFormatPropertyKey)) { |
| String fileFormat = tblProps.get(fileFormatPropertyKey); |
| throw new SemanticException( |
| "Provide only one of the following: STORED BY " + fileFormat + " or WITH SERDEPROPERTIES('" + |
| fileFormatPropertyKey + "'='" + fileFormat + "') or" + " TBLPROPERTIES('" + fileFormatPropertyKey |
| + "'='" + fileFormat + "')"); |
| } |
| } |
| } |
| } |
| |
| /** Adds entities for create table/create view. */ |
| private void addDbAndTabToOutputs(String[] qualifiedTabName, TableType type, |
| boolean isTemporary, Map<String, String> tblProps, StorageFormat storageFormat) throws SemanticException { |
| Database database = getDatabase(qualifiedTabName[0]); |
| outputs.add(new WriteEntity(database, WriteEntity.WriteType.DDL_SHARED)); |
| |
| Table t = new Table(qualifiedTabName[0], qualifiedTabName[1]); |
| t.setParameters(tblProps); |
| t.setTableType(type); |
| t.setTemporary(isTemporary); |
| HiveStorageHandler storageHandler = null; |
| if (storageFormat.getStorageHandler() != null) { |
| try { |
| storageHandler = (HiveStorageHandler) ReflectionUtils.newInstance( |
| conf.getClassByName(storageFormat.getStorageHandler()), SessionState.get().getConf()); |
| t.setProperty(META_TABLE_STORAGE, storageHandler.getClass().getName()); |
| } catch (ClassNotFoundException ex) { |
| LOG.error("Class not found. Storage handler will be set to null: "+ex.getMessage() , ex); |
| } |
| } |
| t.setStorageHandler(storageHandler); |
| for (Map.Entry<String,String> serdeMap : storageFormat.getSerdeProps().entrySet()){ |
| t.setSerdeParam(serdeMap.getKey(), serdeMap.getValue()); |
| } |
| WriteType lockType = tblProps != null && Boolean.parseBoolean(tblProps.get(TABLE_IS_CTAS)) |
| && AcidUtils.isExclusiveCTASEnabled(conf) |
| // iceberg CTAS has it's own locking mechanism, therefore we should exclude them |
| && (t.getStorageHandler() == null || !t.getStorageHandler().directInsert()) ? |
| WriteType.CTAS : WriteType.DDL_NO_LOCK; |
| |
| outputs.add(new WriteEntity(t, lockType)); |
| } |
| |
| protected ASTNode analyzeCreateView(ASTNode ast, QB qb, PlannerContext plannerCtx) throws SemanticException { |
| TableName qualTabName = getQualifiedTableName((ASTNode) ast.getChild(0)); |
| final String dbDotTable = qualTabName.getNotEmptyDbTable(); |
| List<FieldSchema> cols = null; |
| boolean ifNotExists = false; |
| boolean rewriteEnabled = true; |
| String comment = null; |
| ASTNode selectStmt = null; |
| Map<String, String> tblProps = null; |
| List<String> partColNames = null; |
| List<String> sortColNames = null; |
| List<String> distributeColNames = null; |
| String location = null; |
| RowFormatParams rowFormatParams = new RowFormatParams(); |
| StorageFormat storageFormat = new StorageFormat(conf); |
| boolean partitionTransformSpecExists = false; |
| |
| LOG.info("Creating view " + dbDotTable + " position=" |
| + ast.getCharPositionInLine()); |
| int numCh = ast.getChildCount(); |
| for (int num = 1; num < numCh; num++) { |
| ASTNode child = (ASTNode) ast.getChild(num); |
| if (storageFormat.fillStorageFormat(child)) { |
| continue; |
| } |
| switch (child.getToken().getType()) { |
| case HiveParser.TOK_IFNOTEXISTS: |
| ifNotExists = true; |
| break; |
| case HiveParser.TOK_REWRITE_DISABLED: |
| rewriteEnabled = false; |
| break; |
| case HiveParser.TOK_QUERY: |
| // For CBO |
| if (plannerCtx != null) { |
| plannerCtx.setViewToken(child); |
| } |
| selectStmt = child; |
| break; |
| case HiveParser.TOK_TABCOLNAME: |
| cols = getColumns(child); |
| break; |
| case HiveParser.TOK_TABLECOMMENT: |
| comment = unescapeSQLString(child.getChild(0).getText()); |
| break; |
| case HiveParser.TOK_TABLEPROPERTIES: |
| tblProps = getProps((ASTNode) child.getChild(0)); |
| break; |
| case HiveParser.TOK_VIEWPARTCOLS: |
| partColNames = getColumnNames((ASTNode) child.getChild(0)); |
| break; |
| case HiveParser.TOK_VIEWCLUSTERCOLS: |
| assert distributeColNames == null && sortColNames == null; |
| distributeColNames = getColumnNames((ASTNode) child.getChild(0)); |
| sortColNames = new ArrayList<>(distributeColNames); |
| break; |
| case HiveParser.TOK_VIEWDISTRIBUTECOLS: |
| assert distributeColNames == null; |
| distributeColNames = getColumnNames((ASTNode) child.getChild(0)); |
| break; |
| case HiveParser.TOK_VIEWSORTCOLS: |
| assert sortColNames == null; |
| sortColNames = getColumnNames((ASTNode) child.getChild(0)); |
| break; |
| case HiveParser.TOK_TABLEROWFORMAT: |
| rowFormatParams.analyzeRowFormat(child); |
| break; |
| case HiveParser.TOK_TABLELOCATION: |
| location = unescapeSQLString(child.getChild(0).getText()); |
| location = EximUtil.relativeToAbsolutePath(conf, location); |
| inputs.add(toReadEntity(location)); |
| break; |
| case HiveParser.TOK_TABLESERIALIZER: |
| child = (ASTNode) child.getChild(0); |
| storageFormat.setSerde(unescapeSQLString(child.getChild(0).getText())); |
| if (child.getChildCount() == 2) { |
| readProps((ASTNode) (child.getChild(1).getChild(0)), |
| storageFormat.getSerdeProps()); |
| } |
| break; |
| case HiveParser.TOK_TABLEPARTCOLSBYSPEC: |
| SessionStateUtil.addResourceOrThrow(conf, hive_metastoreConstants.PARTITION_TRANSFORM_SPEC, |
| PartitionTransform.getPartitionTransformSpec(child)); |
| partitionTransformSpecExists = true; |
| break; |
| default: |
| assert false; |
| } |
| } |
| |
| validateStorageFormat(storageFormat, tblProps, partitionTransformSpecExists); |
| |
| storageFormat.fillDefaultStorageFormat(false, true); |
| |
| if (!ifNotExists) { |
| // Verify that the table does not already exist |
| // dumpTable is only used to check the conflict for non-temporary tables |
| try { |
| Table dumpTable = db.newTable(dbDotTable); |
| if (null != db.getTable(dumpTable.getDbName(), dumpTable.getTableName(), false) && |
| !ctx.isExplainSkipExecution()) { |
| throw new SemanticException(ErrorMsg.TABLE_ALREADY_EXISTS.getMsg(dbDotTable)); |
| } |
| } catch (HiveException e) { |
| throw new SemanticException(e); |
| } |
| } |
| if (partColNames != null && (distributeColNames != null || sortColNames != null)) { |
| // Verify that partition columns and data organization columns are not overlapping |
| Set<String> partColNamesSet = new HashSet<>(partColNames); |
| if (distributeColNames != null) { |
| for (String colName : distributeColNames) { |
| if (partColNamesSet.contains(colName)) { |
| throw new SemanticException("Same column cannot be present in partition and cluster/distribute clause. " |
| + "Column name: " + colName); |
| } |
| } |
| } |
| if (sortColNames != null) { |
| for (String colName : sortColNames) { |
| if (partColNamesSet.contains(colName)) { |
| throw new SemanticException("Same column cannot be present in partition and cluster/sort clause. " |
| + "Column name: " + colName); |
| } |
| } |
| } |
| } |
| |
| unparseTranslator.enable(); |
| |
| if (makeAcid()) { |
| if (tblProps == null) { |
| tblProps = new HashMap<>(); |
| } |
| tblProps = convertToAcidByDefault(storageFormat, dbDotTable, null, tblProps); |
| } |
| if (tblProps == null) { |
| tblProps = new HashMap<>(); |
| } |
| tblProps.put(hive_metastoreConstants.TABLE_IS_CTAS, "true"); |
| |
| createVwDesc = new CreateMaterializedViewDesc( |
| dbDotTable, cols, comment, tblProps, partColNames, sortColNames, distributeColNames, |
| ifNotExists, rewriteEnabled, |
| storageFormat.getInputFormat(), storageFormat.getOutputFormat(), |
| location, storageFormat.getSerde(), storageFormat.getStorageHandler(), |
| storageFormat.getSerdeProps()); |
| addDbAndTabToOutputs(new String[] {qualTabName.getDb(), qualTabName.getTable()}, TableType.MATERIALIZED_VIEW, |
| false, tblProps, storageFormat); |
| queryState.setCommandType(HiveOperation.CREATE_MATERIALIZED_VIEW); |
| qb.setViewDesc(createVwDesc); |
| |
| return selectStmt; |
| } |
| |
| private boolean makeAcid() { |
| return MetastoreConf.getBoolVar(conf, MetastoreConf.ConfVars.CREATE_TABLES_AS_ACID) && |
| HiveConf.getBoolVar(conf, ConfVars.HIVE_SUPPORT_CONCURRENCY) && |
| DbTxnManager.class.getCanonicalName().equals(HiveConf.getVar(conf, ConfVars.HIVE_TXN_MANAGER)); |
| } |
| |
| // validate the (materialized) view statement |
| // check semantic conditions |
| private void validateCreateView() |
| throws SemanticException { |
| try { |
| // Do not allow view to be defined on temp table or other materialized view |
| validateTablesUsed(this); |
| if (createVwDesc.isRewriteEnabled()) { |
| int nativeAcidCount = 0; |
| int supportsSnapshotCount = 0; |
| for (TableScanOperator ts : topOps.values()) { |
| Table table = ts.getConf().getTableMetadata(); |
| if (SemanticAnalyzer.DUMMY_TABLE.equals(table.getTableName())) { |
| continue; |
| } |
| if (AcidUtils.isTransactionalTable(table)) { |
| ++nativeAcidCount; |
| } else if (table.isNonNative() && table.getStorageHandler().areSnapshotsSupported()) { |
| ++supportsSnapshotCount; |
| } else { |
| throw new SemanticException("Automatic rewriting for materialized view cannot " |
| + "be enabled if the materialized view uses non-transactional tables"); |
| } |
| if (isNotBlank(ts.getConf().getAsOfTimestamp()) || isNotBlank(ts.getConf().getAsOfVersion())) { |
| throw new SemanticException("Automatic rewriting for materialized view cannot " |
| + "be enabled if the materialized view uses time travel query."); |
| } |
| } |
| if (nativeAcidCount > 0 && supportsSnapshotCount > 0) { |
| throw new SemanticException("All materialized view source tables either must be native ACID tables or " + |
| "must support table snapshots."); |
| } |
| } |
| |
| if (!qb.hasTableDefined()) { |
| throw new SemanticException("Materialized view must have a table defined."); |
| } |
| |
| if (createVwDesc.isRewriteEnabled()) { |
| if (!ctx.isCboSucceeded()) { |
| String msg = "Cannot enable automatic rewriting for materialized view."; |
| if (ctx.getCboInfo() != null) { |
| msg += " " + ctx.getCboInfo(); |
| } else { |
| msg += " Check CBO is turned on: set " + ConfVars.HIVE_CBO_ENABLED.varname; |
| } |
| throw new SemanticException(msg); |
| } |
| if (!isValidAutomaticRewritingMaterialization()) { |
| String errorMessage = "Only query text based automatic rewriting is available for materialized view. " + |
| getInvalidAutomaticRewritingMaterializationReason(); |
| console.printError(errorMessage); |
| LOG.warn(errorMessage); |
| } |
| } |
| } catch (HiveException e) { |
| throw new SemanticException(e.getMessage(), e); |
| } |
| } |
| |
| // Process the position alias in GROUPBY and ORDERBY |
| void processPositionAlias(ASTNode ast) throws SemanticException { |
| boolean isBothByPos = HiveConf.getBoolVar(conf, ConfVars.HIVE_GROUPBY_ORDERBY_POSITION_ALIAS); |
| boolean isGbyByPos = isBothByPos |
| || HiveConf.getBoolVar(conf, ConfVars.HIVE_GROUPBY_POSITION_ALIAS); |
| boolean isObyByPos = isBothByPos |
| || HiveConf.getBoolVar(conf, ConfVars.HIVE_ORDERBY_POSITION_ALIAS); |
| |
| Deque<ASTNode> stack = new ArrayDeque<ASTNode>(); |
| stack.push(ast); |
| |
| while (!stack.isEmpty()) { |
| ASTNode next = stack.pop(); |
| |
| if (next.getChildCount() == 0) { |
| continue; |
| } |
| |
| boolean isAllCol; |
| ASTNode selectNode = null; |
| ASTNode groupbyNode = null; |
| ASTNode orderbyNode = null; |
| |
| // get node type |
| int child_count = next.getChildCount(); |
| for (int child_pos = 0; child_pos < child_count; ++child_pos) { |
| ASTNode node = (ASTNode) next.getChild(child_pos); |
| int type = node.getToken().getType(); |
| if (type == HiveParser.TOK_SELECT || type == HiveParser.TOK_SELECTDI) { |
| selectNode = node; |
| } else if (type == HiveParser.TOK_GROUPBY) { |
| groupbyNode = node; |
| } else if (type == HiveParser.TOK_ORDERBY) { |
| orderbyNode = node; |
| } |
| } |
| |
| if (selectNode != null) { |
| int selectExpCnt = selectNode.getChildCount(); |
| |
| // replace each of the position alias in GROUPBY with the actual column name |
| if (groupbyNode != null) { |
| for (int child_pos = 0; child_pos < groupbyNode.getChildCount(); ++child_pos) { |
| ASTNode node = (ASTNode) groupbyNode.getChild(child_pos); |
| if (node.getToken().getType() == HiveParser.Number) { |
| if (isGbyByPos) { |
| int pos = Integer.parseInt(node.getText()); |
| if (pos > 0 && pos <= selectExpCnt) { |
| groupbyNode.setChild(child_pos, |
| selectNode.getChild(pos - 1).getChild(0)); |
| } else { |
| throw new SemanticException( |
| ErrorMsg.INVALID_POSITION_ALIAS_IN_GROUPBY.getMsg( |
| "Position alias: " + pos + " does not exist\n" + |
| "The Select List is indexed from 1 to " + selectExpCnt)); |
| } |
| } else { |
| warn("Using constant number " + node.getText() + |
| " in group by. If you try to use position alias when hive.groupby.position.alias is false, the position alias will be ignored."); |
| } |
| } |
| } |
| } |
| |
| // replace each of the position alias in ORDERBY with the actual column name, |
| // if cbo is enabled, orderby position will be processed in genPlan |
| if (!HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_CBO_ENABLED) |
| && orderbyNode != null) { |
| isAllCol = false; |
| for (int child_pos = 0; child_pos < selectNode.getChildCount(); ++child_pos) { |
| ASTNode node = (ASTNode) selectNode.getChild(child_pos).getChild(0); |
| if (node != null && node.getToken().getType() == HiveParser.TOK_ALLCOLREF) { |
| isAllCol = true; |
| } |
| } |
| for (int child_pos = 0; child_pos < orderbyNode.getChildCount(); ++child_pos) { |
| ASTNode colNode = null; |
| ASTNode node = null; |
| if (orderbyNode.getChildCount() > 0) { |
| colNode = (ASTNode) orderbyNode.getChild(child_pos).getChild(0); |
| if (colNode.getChildCount() > 0) { |
| node = (ASTNode) colNode.getChild(0); |
| } |
| } |
| if (node != null && node.getToken().getType() == HiveParser.Number) { |
| if (isObyByPos) { |
| if (!isAllCol) { |
| int pos = Integer.parseInt(node.getText()); |
| if (pos > 0 && pos <= selectExpCnt && selectNode.getChild(pos - 1).getChildCount() > 0) { |
| colNode.setChild(0, selectNode.getChild(pos - 1).getChild(0)); |
| } else { |
| throw new SemanticException( |
| ErrorMsg.INVALID_POSITION_ALIAS_IN_ORDERBY.getMsg( |
| "Position alias: " + pos + " does not exist\n" + |
| "The Select List is indexed from 1 to " + selectExpCnt)); |
| } |
| } else { |
| throw new SemanticException( |
| ErrorMsg.NO_SUPPORTED_ORDERBY_ALLCOLREF_POS.getMsg()); |
| } |
| } else { //if not using position alias and it is a number. |
| warn("Using constant number " + node.getText() + |
| " in order by. If you try to use position alias when hive.orderby.position.alias is false, the position alias will be ignored."); |
| } |
| } |
| } |
| } |
| } |
| |
| List<Node> childrenList = next.getChildren(); |
| for (int i = childrenList.size() - 1; i >= 0; i--) { |
| stack.push((ASTNode)childrenList.get(i)); |
| } |
| } |
| } |
| |
| /** |
| * process analyze ... noscan command |
| * @param tree |
| * @throws SemanticException |
| */ |
| protected void processNoScanCommand (ASTNode tree) throws SemanticException { |
| // check if it is noscan command |
| checkNoScan(tree); |
| |
| //validate noscan |
| if (this.noscan) { |
| validateAnalyzeNoscan(tree); |
| } |
| } |
| |
| /** |
| * Validate noscan command |
| * |
| * @param tree |
| * @throws SemanticException |
| */ |
| private void validateAnalyzeNoscan(ASTNode tree) throws SemanticException { |
| // since it is noscan, it is true table name in command |
| String tableName = getUnescapedName((ASTNode) tree.getChild(0).getChild(0)); |
| Table tbl; |
| try { |
| tbl = getTableObjectByName(tableName); |
| } catch (InvalidTableException e) { |
| throw new SemanticException(ErrorMsg.INVALID_TABLE.getMsg(tableName), e); |
| } |
| catch (HiveException e) { |
| throw new SemanticException(e.getMessage(), e); |
| } |
| |
| /* noscan uses hdfs apis to retrieve such information from Namenode. */ |
| /* But that will be specific to hdfs. Through storagehandler mechanism, */ |
| /* storage of table could be on any storage system: hbase, cassandra etc. */ |
| /* A nice error message should be given to user. */ |
| if (tbl.isNonNative()) { |
| throw new SemanticException(ErrorMsg.ANALYZE_TABLE_NOSCAN_NON_NATIVE.getMsg(tbl |
| .getTableName())); |
| } |
| } |
| |
| /** |
| * It will check if this is analyze ... compute statistics noscan |
| * @param tree |
| */ |
| private void checkNoScan(ASTNode tree) { |
| if (tree.getChildCount() > 1) { |
| ASTNode child0 = (ASTNode) tree.getChild(0); |
| ASTNode child1; |
| if (child0.getToken().getType() == HiveParser.TOK_TAB) { |
| child0 = (ASTNode) child0.getChild(0); |
| if (child0.getToken().getType() == HiveParser.TOK_TABNAME) { |
| child1 = (ASTNode) tree.getChild(1); |
| if (child1.getToken().getType() == HiveParser.KW_NOSCAN) { |
| this.noscan = true; |
| } |
| } |
| } |
| } |
| } |
| |
| public QB getQB() { |
| return qb; |
| } |
| |
| void setQB(QB qb) { |
| this.qb = qb; |
| } |
| |
| //--------------------------- PTF handling ----------------------------------- |
| |
| /* |
| * - a partitionTableFunctionSource can be a tableReference, a SubQuery or another |
| * PTF invocation. |
| * - For a TABLEREF: set the source to the alias returned by processTable |
| * - For a SubQuery: set the source to the alias returned by processSubQuery |
| * - For a PTF invocation: recursively call processPTFChain. |
| */ |
| private PTFInputSpec processPTFSource(QB qb, ASTNode inputNode) throws SemanticException{ |
| |
| PTFInputSpec qInSpec = null; |
| int type = inputNode.getType(); |
| String alias; |
| switch(type) |
| { |
| case HiveParser.TOK_TABREF: |
| alias = processTable(qb, inputNode); |
| qInSpec = new PTFQueryInputSpec(); |
| ((PTFQueryInputSpec)qInSpec).setType(PTFQueryInputType.TABLE); |
| ((PTFQueryInputSpec)qInSpec).setSource(alias); |
| break; |
| case HiveParser.TOK_SUBQUERY: |
| alias = processSubQuery(qb, inputNode); |
| qInSpec = new PTFQueryInputSpec(); |
| ((PTFQueryInputSpec)qInSpec).setType(PTFQueryInputType.SUBQUERY); |
| ((PTFQueryInputSpec)qInSpec).setSource(alias); |
| break; |
| case HiveParser.TOK_PTBLFUNCTION: |
| qInSpec = processPTFChain(qb, inputNode); |
| break; |
| default: |
| throw new SemanticException(generateErrorMessage(inputNode, |
| "Unknown input type to PTF")); |
| } |
| |
| qInSpec.setAstNode(inputNode); |
| return qInSpec; |
| |
| } |
| |
| /* |
| * - tree form is |
| * ^(TOK_PTBLFUNCTION name alias? partitionTableFunctionSource partitioningSpec? arguments*) |
| * - a partitionTableFunctionSource can be a tableReference, a SubQuery or another |
| * PTF invocation. |
| */ |
| private PartitionedTableFunctionSpec processPTFChain(QB qb, ASTNode ptf) |
| throws SemanticException{ |
| int child_count = ptf.getChildCount(); |
| if (child_count < 2) { |
| throw new SemanticException(generateErrorMessage(ptf, |
| "Not enough Children " + child_count)); |
| } |
| |
| PartitionedTableFunctionSpec ptfSpec = new PartitionedTableFunctionSpec(); |
| ptfSpec.setAstNode(ptf); |
| |
| /* |
| * name |
| */ |
| ASTNode nameNode = (ASTNode) ptf.getChild(0); |
| ptfSpec.setName(nameNode.getText()); |
| |
| int inputIdx = 1; |
| |
| /* |
| * alias |
| */ |
| ASTNode secondChild = (ASTNode) ptf.getChild(1); |
| if ( secondChild.getType() == HiveParser.Identifier ) { |
| ptfSpec.setAlias(secondChild.getText()); |
| inputIdx++; |
| } |
| |
| /* |
| * input |
| */ |
| ASTNode inputNode = (ASTNode) ptf.getChild(inputIdx); |
| ptfSpec.setInput(processPTFSource(qb, inputNode)); |
| |
| int argStartIdx = inputIdx + 1; |
| |
| /* |
| * partitioning Spec |
| */ |
| int pSpecIdx = inputIdx + 1; |
| ASTNode pSpecNode = ptf.getChildCount() > inputIdx ? |
| (ASTNode) ptf.getChild(pSpecIdx) : null; |
| if (pSpecNode != null && pSpecNode.getType() == HiveParser.TOK_PARTITIONINGSPEC) |
| { |
| PartitioningSpec partitioning = processPTFPartitionSpec(pSpecNode); |
| ptfSpec.setPartitioning(partitioning); |
| argStartIdx++; |
| } |
| |
| /* |
| * arguments |
| */ |
| for(int i=argStartIdx; i < ptf.getChildCount(); i++) |
| { |
| ptfSpec.addArg((ASTNode) ptf.getChild(i)); |
| } |
| return ptfSpec; |
| } |
| |
| /* |
| * - invoked during FROM AST tree processing, on encountering a PTF invocation. |
| * - tree form is |
| * ^(TOK_PTBLFUNCTION name partitionTableFunctionSource partitioningSpec? arguments*) |
| * - setup a PTFInvocationSpec for this top level PTF invocation. |
| */ |
| private void processPTF(QB qb, ASTNode ptf) throws SemanticException{ |
| |
| PartitionedTableFunctionSpec ptfSpec = processPTFChain(qb, ptf); |
| |
| Optional.ofNullable(ptfSpec.getAlias()) |
| .ifPresent(qb::addAlias); |
| |
| PTFInvocationSpec spec = new PTFInvocationSpec(); |
| spec.setFunction(ptfSpec); |
| qb.addPTFNodeToSpec(ptf, spec); |
| } |
| |
| private void handleQueryWindowClauses(QB qb, Phase1Ctx ctx_1, ASTNode node) |
| throws SemanticException { |
| WindowingSpec spec = qb.getWindowingSpec(ctx_1.dest); |
| for(Node child : node.getChildren()) { |
| processQueryWindowClause(spec, (ASTNode) child); |
| } |
| } |
| |
| private PartitionSpec processPartitionSpec(ASTNode node) { |
| PartitionSpec pSpec = new PartitionSpec(); |
| for(Node child : node.getChildren()) { |
| PartitionExpression exprSpec = new PartitionExpression(); |
| exprSpec.setExpression((ASTNode) child); |
| pSpec.addExpression(exprSpec); |
| } |
| return pSpec; |
| } |
| |
| private PartitioningSpec processPTFPartitionSpec(ASTNode pSpecNode) |
| { |
| PartitioningSpec partitioning = new PartitioningSpec(); |
| ASTNode firstChild = (ASTNode) pSpecNode.getChild(0); |
| int type = firstChild.getType(); |
| |
| if ( type == HiveParser.TOK_DISTRIBUTEBY || type == HiveParser.TOK_CLUSTERBY ) |
| { |
| PartitionSpec pSpec = processPartitionSpec(firstChild); |
| partitioning.setPartSpec(pSpec); |
| ASTNode sortNode = pSpecNode.getChildCount() > 1 ? (ASTNode) pSpecNode.getChild(1) : null; |
| if ( sortNode != null ) |
| { |
| OrderSpec oSpec = processOrderSpec(sortNode); |
| partitioning.setOrderSpec(oSpec); |
| } |
| } |
| else if ( type == HiveParser.TOK_SORTBY || type == HiveParser.TOK_ORDERBY ) { |
| OrderSpec oSpec = processOrderSpec(firstChild); |
| partitioning.setOrderSpec(oSpec); |
| } |
| return partitioning; |
| } |
| |
| private WindowFunctionSpec processWindowFunction(ASTNode node, ASTNode wsNode) |
| throws SemanticException { |
| WindowFunctionSpec wfSpec = new WindowFunctionSpec(); |
| |
| switch(node.getType()) { |
| case HiveParser.TOK_FUNCTIONSTAR: |
| wfSpec.setStar(true); |
| break; |
| case HiveParser.TOK_FUNCTIONDI: |
| wfSpec.setDistinct(true); |
| break; |
| } |
| |
| wfSpec.setExpression(node); |
| |
| ASTNode nameNode = (ASTNode) node.getChild(0); |
| wfSpec.setName(nameNode.getText()); |
| |
| for(int i=1; i < node.getChildCount()-1; i++) { |
| ASTNode child = (ASTNode) node.getChild(i); |
| wfSpec.addArg(child); |
| } |
| |
| if ( wsNode != null ) { |
| WindowFunctionInfo functionInfo = FunctionRegistry.getWindowFunctionInfo(wfSpec.name); |
| if (functionInfo == null) { |
| throw new SemanticException(ErrorMsg.INVALID_FUNCTION.getMsg(wfSpec.name)); |
| } |
| wfSpec.setRespectNulls(processRespectIgnoreNulls(functionInfo, wsNode)); |
| wfSpec.setWindowSpec(processWindowSpec(wsNode)); |
| } |
| |
| return wfSpec; |
| } |
| |
| private boolean processRespectIgnoreNulls(WindowFunctionInfo functionInfo, ASTNode node) |
| throws SemanticException { |
| |
| for(int i=0; i < node.getChildCount(); i++) { |
| int type = node.getChild(i).getType(); |
| switch(type) { |
| case HiveParser.TOK_RESPECT_NULLS: |
| if (!functionInfo.isSupportsNullTreatment()) { |
| throw new SemanticException(ErrorMsg.NULL_TREATMENT_NOT_SUPPORTED, functionInfo.getDisplayName()); |
| } |
| return true; |
| case HiveParser.TOK_IGNORE_NULLS: |
| if (!functionInfo.isSupportsNullTreatment()) { |
| throw new SemanticException(ErrorMsg.NULL_TREATMENT_NOT_SUPPORTED, functionInfo.getDisplayName()); |
| } |
| return false; |
| } |
| } |
| |
| return true; |
| } |
| |
| private boolean containsLeadLagUDF(ASTNode expressionTree) { |
| int exprTokenType = expressionTree.getToken().getType(); |
| if (exprTokenType == HiveParser.TOK_FUNCTION) { |
| assert (expressionTree.getChildCount() != 0); |
| if (expressionTree.getChild(0).getType() == HiveParser.Identifier) { |
| String functionName = unescapeIdentifier(expressionTree.getChild(0) |
| .getText()); |
| functionName = functionName.toLowerCase(); |
| if ( FunctionRegistry.LAG_FUNC_NAME.equals(functionName) || |
| FunctionRegistry.LEAD_FUNC_NAME.equals(functionName) |
| ) { |
| return true; |
| } |
| } |
| } |
| for (int i = 0; i < expressionTree.getChildCount(); i++) { |
| if ( containsLeadLagUDF((ASTNode) expressionTree.getChild(i))) { |
| return true; |
| } |
| } |
| return false; |
| } |
| |
| private void processQueryWindowClause(WindowingSpec spec, ASTNode node) |
| throws SemanticException { |
| ASTNode nameNode = (ASTNode) node.getChild(0); |
| ASTNode wsNode = (ASTNode) node.getChild(1); |
| if(spec.getWindowSpecs() != null && spec.getWindowSpecs().containsKey(nameNode.getText())){ |
| throw new SemanticException(generateErrorMessage(nameNode, |
| "Duplicate definition of window " + nameNode.getText() + |
| " is not allowed")); |
| } |
| WindowSpec ws = processWindowSpec(wsNode); |
| spec.addWindowSpec(nameNode.getText(), ws); |
| } |
| |
| private WindowSpec processWindowSpec(ASTNode node) throws SemanticException { |
| boolean hasSrcId = false, hasPartSpec = false, hasWF = false; |
| int srcIdIdx = -1, partIdx = -1, wfIdx = -1; |
| |
| for(int i=0; i < node.getChildCount(); i++) |
| { |
| int type = node.getChild(i).getType(); |
| switch(type) |
| { |
| case HiveParser.Identifier: |
| hasSrcId = true; srcIdIdx = i; |
| break; |
| case HiveParser.TOK_PARTITIONINGSPEC: |
| hasPartSpec = true; partIdx = i; |
| break; |
| case HiveParser.TOK_WINDOWRANGE: |
| case HiveParser.TOK_WINDOWVALUES: |
| hasWF = true; wfIdx = i; |
| break; |
| } |
| } |
| |
| WindowSpec ws = new WindowSpec(); |
| |
| if (hasSrcId) { |
| ASTNode nameNode = (ASTNode) node.getChild(srcIdIdx); |
| ws.setSourceId(nameNode.getText()); |
| } |
| |
| if (hasPartSpec) { |
| ASTNode partNode = (ASTNode) node.getChild(partIdx); |
| PartitioningSpec partitioning = processPTFPartitionSpec(partNode); |
| ws.setPartitioning(partitioning); |
| } |
| |
| if (hasWF) { |
| ASTNode wfNode = (ASTNode) node.getChild(wfIdx); |
| WindowFrameSpec wfSpec = processWindowFrame(wfNode); |
| ws.setWindowFrame(wfSpec); |
| } |
| |
| return ws; |
| } |
| |
| private WindowFrameSpec processWindowFrame(ASTNode node) throws SemanticException { |
| int type = node.getType(); |
| BoundarySpec end = null; |
| |
| /* |
| * A WindowFrame may contain just the Start Boundary or in the |
| * between style of expressing a WindowFrame both boundaries |
| * are specified. |
| */ |
| BoundarySpec start = processBoundary((ASTNode) node.getChild(0)); |
| if ( node.getChildCount() > 1 ) { |
| end = processBoundary((ASTNode) node.getChild(1)); |
| } |
| // Note: TOK_WINDOWVALUES means RANGE type, TOK_WINDOWRANGE means ROWS type |
| return new WindowFrameSpec(type == HiveParser.TOK_WINDOWVALUES ? WindowType.RANGE : WindowType.ROWS, start, end); |
| } |
| |
| private BoundarySpec processBoundary(ASTNode node) throws SemanticException { |
| BoundarySpec bs = new BoundarySpec(); |
| int type = node.getType(); |
| boolean hasAmt = true; |
| |
| switch(type) |
| { |
| case HiveParser.KW_PRECEDING: |
| bs.setDirection(Direction.PRECEDING); |
| break; |
| case HiveParser.KW_FOLLOWING: |
| bs.setDirection(Direction.FOLLOWING); |
| break; |
| case HiveParser.KW_CURRENT: |
| bs.setDirection(Direction.CURRENT); |
| hasAmt = false; |
| break; |
| default: |
| // no-op |
| } |
| |
| if ( hasAmt ) |
| { |
| ASTNode amtNode = (ASTNode) node.getChild(0); |
| if ( amtNode.getType() == HiveParser.KW_UNBOUNDED) |
| { |
| bs.setAmt(BoundarySpec.UNBOUNDED_AMOUNT); |
| } |
| else |
| { |
| int amt = Integer.parseInt(amtNode.getText()); |
| if ( amt < 0 ) { |
| throw new SemanticException( |
| "Window Frame Boundary Amount must be a non-negative integer, provided amount is: " + amt); |
| } else if (amt == 0) { |
| // Convert 0 PRECEDING/FOLLOWING to CURRENT ROW |
| LOG.info("Converting 0 {} to CURRENT ROW", bs.getDirection()); |
| bs.setDirection(Direction.CURRENT); |
| hasAmt = false; |
| } else { |
| bs.setAmt(amt); |
| } |
| } |
| } |
| |
| return bs; |
| } |
| |
| //--------------------------- PTF handling: PTFInvocationSpec to PTFDesc -------------------------- |
| |
| private PTFDesc translatePTFInvocationSpec(PTFInvocationSpec ptfQSpec, RowResolver inputRR) |
| throws SemanticException { |
| PTFTranslator translator = new PTFTranslator(); |
| return translator.translate(ptfQSpec, this, conf, inputRR, unparseTranslator); |
| } |
| |
| private Operator genPTFPlan(PTFInvocationSpec ptfQSpec, Operator input) throws SemanticException { |
| List<PTFInvocationSpec> componentQueries = PTFTranslator.componentize(ptfQSpec); |
| for (PTFInvocationSpec ptfSpec : componentQueries) { |
| input = genPTFPlanForComponentQuery(ptfSpec, input); |
| } |
| LOG.debug("Created PTF Plan "); |
| return input; |
| } |
| |
| |
| /** |
| * Construct the data structures containing ExprNodeDesc for partition |
| * columns and order columns. Use the input definition to construct the list |
| * of output columns for the ReduceSinkOperator |
| */ |
| private void buildPTFReduceSinkDetails(PartitionedTableFunctionDef tabDef, |
| List<ExprNodeDesc> partCols, |
| List<ExprNodeDesc> orderCols, |
| StringBuilder orderString, |
| StringBuilder nullOrderString) { |
| |
| List<PTFExpressionDef> partColList = tabDef.getPartition().getExpressions(); |
| |
| for (PTFExpressionDef colDef : partColList) { |
| ExprNodeDesc exprNode = colDef.getExprNode(); |
| if (ExprNodeDescUtils.indexOf(exprNode, partCols) < 0) { |
| partCols.add(exprNode); |
| orderCols.add(exprNode); |
| orderString.append('+'); |
| nullOrderString.append('a'); |
| } |
| } |
| |
| /* |
| * Order columns are used as key columns for constructing |
| * the ReduceSinkOperator |
| * Since we do not explicitly add these to outputColumnNames, |
| * we need to set includeKeyCols = false while creating the |
| * ReduceSinkDesc |
| */ |
| List<OrderExpressionDef> orderColList = tabDef.getOrder().getExpressions(); |
| for (OrderExpressionDef colDef : orderColList) { |
| char orderChar = colDef.getOrder() == PTFInvocationSpec.Order.ASC ? '+' : '-'; |
| char nullOrderChar = colDef.getNullOrder() == PTFInvocationSpec.NullOrder.NULLS_FIRST ? 'a' : 'z'; |
| int index = ExprNodeDescUtils.indexOf(colDef.getExprNode(), orderCols); |
| if (index >= 0) { |
| orderString.setCharAt(index, orderChar); |
| nullOrderString.setCharAt(index, nullOrderChar); |
| continue; |
| } |
| orderCols.add(colDef.getExprNode()); |
| orderString.append(orderChar); |
| nullOrderString.append(nullOrderChar); |
| } |
| } |
| |
| private Operator genPTFPlanForComponentQuery(PTFInvocationSpec ptfQSpec, Operator input) |
| throws SemanticException { |
| /* |
| * 1. Create the PTFDesc from the Qspec attached to this QB. |
| */ |
| RowResolver rr = opParseCtx.get(input).getRowResolver(); |
| PTFDesc ptfDesc = translatePTFInvocationSpec(ptfQSpec, rr); |
| |
| /* |
| * 2. build Map-side Op Graph. Graph template is either: |
| * Input -> PTF_map -> ReduceSink |
| * or |
| * Input -> ReduceSink |
| * |
| * Here the ExprNodeDescriptors in the QueryDef are based on the Input Operator's RR. |
| */ |
| { |
| PartitionedTableFunctionDef tabDef = ptfDesc.getStartOfChain(); |
| |
| /* |
| * a. add Map-side PTF Operator if needed |
| */ |
| if (tabDef.isTransformsRawInput() ) |
| { |
| RowResolver ptfMapRR = tabDef.getRawInputShape().getRr(); |
| |
| ptfDesc.setMapSide(true); |
| input = putOpInsertMap(OperatorFactory.getAndMakeChild(ptfDesc, |
| new RowSchema(ptfMapRR.getColumnInfos()), input), ptfMapRR); |
| rr = opParseCtx.get(input).getRowResolver(); |
| } |
| |
| /* |
| * b. Build Reduce Sink Details (keyCols, valueCols, outColNames etc.) for this ptfDesc. |
| */ |
| |
| List<ExprNodeDesc> partCols = new ArrayList<ExprNodeDesc>(); |
| List<ExprNodeDesc> orderCols = new ArrayList<ExprNodeDesc>(); |
| StringBuilder orderString = new StringBuilder(); |
| StringBuilder nullOrderString = new StringBuilder(); |
| |
| /* |
| * Use the input RR of TableScanOperator in case there is no map-side |
| * reshape of input. |
| * If the parent of ReduceSinkOperator is PTFOperator, use it's |
| * output RR. |
| */ |
| buildPTFReduceSinkDetails(tabDef, partCols, orderCols, orderString, nullOrderString); |
| input = genReduceSinkPlan(input, partCols, orderCols, orderString.toString(), |
| nullOrderString.toString(), -1, Operation.NOT_ACID, false); |
| } |
| |
| /* |
| * 3. build Reduce-side Op Graph |
| */ |
| { |
| |
| /* |
| * c. Rebuilt the QueryDef. |
| * Why? |
| * - so that the ExprNodeDescriptors in the QueryDef are based on the |
| * Select Operator's RowResolver |
| */ |
| rr = opParseCtx.get(input).getRowResolver(); |
| ptfDesc = translatePTFInvocationSpec(ptfQSpec, rr); |
| |
| /* |
| * d. Construct PTF Operator. |
| */ |
| RowResolver ptfOpRR = ptfDesc.getFuncDef().getOutputShape().getRr(); |
| input = putOpInsertMap(OperatorFactory.getAndMakeChild(ptfDesc, |
| new RowSchema(ptfOpRR.getColumnInfos()), |
| input), ptfOpRR); |
| |
| } |
| return input; |
| } |
| |
| //--------------------------- Windowing handling: PTFInvocationSpec to PTFDesc -------------------- |
| |
| private Operator genWindowingPlan(QB qb, WindowingSpec wSpec, Operator input) throws SemanticException { |
| wSpec.validateAndMakeEffective(); |
| |
| if (!isCBOExecuted() && !qb.getParseInfo().getDestToGroupBy().isEmpty()) { |
| // If CBO did not optimize the query, we might need to replace grouping function |
| final String selClauseName = qb.getParseInfo().getClauseNames().iterator().next(); |
| final boolean cubeRollupGrpSetPresent = (!qb.getParseInfo().getDestRollups().isEmpty() |
| || !qb.getParseInfo().getDestGroupingSets().isEmpty() |
| || !qb.getParseInfo().getDestCubes().isEmpty()); |
| for (WindowExpressionSpec wExprSpec : wSpec.getWindowExpressions()) { |
| // Special handling of grouping function |
| wExprSpec.setExpression(rewriteGroupingFunctionAST( |
| getGroupByForClause(qb.getParseInfo(), selClauseName), wExprSpec.getExpression(), |
| !cubeRollupGrpSetPresent)); |
| } |
| } |
| |
| WindowingComponentizer groups = new WindowingComponentizer(wSpec); |
| RowResolver rr = opParseCtx.get(input).getRowResolver(); |
| |
| while(groups.hasNext() ) { |
| wSpec = groups.next(conf, this, unparseTranslator, rr); |
| input = genReduceSinkPlanForWindowing(wSpec, rr, input); |
| rr = opParseCtx.get(input).getRowResolver(); |
| PTFTranslator translator = new PTFTranslator(); |
| PTFDesc ptfDesc = translator.translate(wSpec, this, conf, rr, unparseTranslator); |
| RowResolver ptfOpRR = ptfDesc.getFuncDef().getOutputShape().getRr(); |
| input = putOpInsertMap(OperatorFactory.getAndMakeChild(ptfDesc, |
| new RowSchema(ptfOpRR.getColumnInfos()), input), ptfOpRR); |
| input = genSelectAllDesc(input); |
| rr = ptfOpRR; |
| } |
| |
| return input; |
| } |
| |
| private Operator genReduceSinkPlanForWindowing(WindowingSpec spec, |
| RowResolver inputRR, Operator input) throws SemanticException{ |
| |
| List<ExprNodeDesc> partCols = new ArrayList<ExprNodeDesc>(); |
| List<ExprNodeDesc> orderCols = new ArrayList<ExprNodeDesc>(); |
| StringBuilder order = new StringBuilder(); |
| StringBuilder nullOrder = new StringBuilder(); |
| |
| for (PartitionExpression partCol : spec.getQueryPartitionSpec().getExpressions()) { |
| ExprNodeDesc partExpr = genExprNodeDesc(partCol.getExpression(), inputRR); |
| if (ExprNodeDescUtils.indexOf(partExpr, partCols) < 0) { |
| partCols.add(partExpr); |
| orderCols.add(partExpr); |
| order.append('+'); |
| nullOrder.append('a'); |
| } |
| } |
| |
| if (spec.getQueryOrderSpec() != null) { |
| for (OrderExpression orderCol : spec.getQueryOrderSpec().getExpressions()) { |
| ExprNodeDesc orderExpr = genExprNodeDesc(orderCol.getExpression(), inputRR); |
| char orderChar = orderCol.getOrder() == PTFInvocationSpec.Order.ASC ? '+' : '-'; |
| char nullOrderChar = orderCol.getNullOrder() == PTFInvocationSpec.NullOrder.NULLS_FIRST ? 'a' : 'z'; |
| int index = ExprNodeDescUtils.indexOf(orderExpr, orderCols); |
| if (index >= 0) { |
| order.setCharAt(index, orderChar); |
| nullOrder.setCharAt(index, nullOrderChar); |
| continue; |
| } |
| orderCols.add(genExprNodeDesc(orderCol.getExpression(), inputRR)); |
| order.append(orderChar); |
| nullOrder.append(nullOrderChar); |
| } |
| } |
| |
| return genReduceSinkPlan(input, partCols, orderCols, order.toString(), nullOrder.toString(), |
| -1, Operation.NOT_ACID, false); |
| } |
| |
| public static List<WindowExpressionSpec> parseSelect(String selectExprStr) |
| throws SemanticException |
| { |
| ASTNode selNode = null; |
| try { |
| ParseDriver pd = new ParseDriver(); |
| selNode = pd.parseSelect(selectExprStr, null).getTree(); |
| } catch (ParseException pe) { |
| throw new SemanticException(pe); |
| } |
| |
| List<WindowExpressionSpec> selSpec = new ArrayList<WindowExpressionSpec>(); |
| int childCount = selNode.getChildCount(); |
| for (int i = 0; i < childCount; i++) { |
| ASTNode selExpr = (ASTNode) selNode.getChild(i); |
| if (selExpr.getType() != HiveParser.TOK_SELEXPR) { |
| throw new SemanticException(String.format( |
| "Only Select expressions supported in dynamic select list: %s", selectExprStr)); |
| } |
| ASTNode expr = (ASTNode) selExpr.getChild(0); |
| if (expr.getType() == HiveParser.TOK_ALLCOLREF) { |
| throw new SemanticException( |
| String.format("'%s' column not allowed in dynamic select list", selectExprStr)); |
| } |
| ASTNode aliasNode = selExpr.getChildCount() > 1 |
| && selExpr.getChild(1).getType() == HiveParser.Identifier ? |
| (ASTNode) selExpr.getChild(1) : null; |
| String alias = null; |
| if ( aliasNode != null ) { |
| alias = aliasNode.getText(); |
| } |
| else { |
| String[] tabColAlias = getColAlias(selExpr, null, null, true, -1); |
| alias = tabColAlias[1]; |
| } |
| WindowExpressionSpec exprSpec = new WindowExpressionSpec(); |
| exprSpec.setAlias(alias); |
| exprSpec.setExpression(expr); |
| selSpec.add(exprSpec); |
| } |
| |
| return selSpec; |
| } |
| |
| private void addAlternateGByKeyMappings(ASTNode gByExpr, ColumnInfo colInfo, |
| Operator<? extends OperatorDesc> reduceSinkOp, RowResolver gByRR) { |
| if ( gByExpr.getType() == HiveParser.DOT |
| && gByExpr.getChild(0).getType() == HiveParser.TOK_TABLE_OR_COL ) { |
| String tab_alias = BaseSemanticAnalyzer.unescapeIdentifier(gByExpr |
| .getChild(0).getChild(0).getText().toLowerCase()); |
| String col_alias = BaseSemanticAnalyzer.unescapeIdentifier( |
| gByExpr.getChild(1).getText().toLowerCase()); |
| gByRR.put(tab_alias, col_alias, colInfo); |
| } else if ( gByExpr.getType() == HiveParser.TOK_TABLE_OR_COL ) { |
| String col_alias = BaseSemanticAnalyzer.unescapeIdentifier(gByExpr |
| .getChild(0).getText().toLowerCase()); |
| String tab_alias = null; |
| /* |
| * If the input to the GBy has a tab alias for the column, then add an entry |
| * based on that tab_alias. |
| * For e.g. this query: |
| * select b.x, count(*) from t1 b group by x |
| * needs (tab_alias=b, col_alias=x) in the GBy RR. |
| * tab_alias=b comes from looking at the RowResolver that is the ancestor |
| * before any GBy/ReduceSinks added for the GBY operation. |
| */ |
| Operator<? extends OperatorDesc> parent = reduceSinkOp; |
| while ( parent instanceof ReduceSinkOperator || |
| parent instanceof GroupByOperator ) { |
| parent = parent.getParentOperators().get(0); |
| } |
| RowResolver parentRR = opParseCtx.get(parent).getRowResolver(); |
| try { |
| tab_alias = Optional.ofNullable(parentRR.get(null, col_alias)) |
| .map(ColumnInfo::getTabAlias) |
| .orElse(null); |
| } catch (SemanticException se) { |
| } |
| gByRR.put(tab_alias, col_alias, colInfo); |
| } |
| } |
| |
| private WriteEntity.WriteType determineWriteType(LoadTableDesc ltd, String dest) { |
| |
| if (ltd == null) { |
| return WriteEntity.WriteType.INSERT_OVERWRITE; |
| } |
| return ((ltd.getLoadFileType() == LoadFileType.REPLACE_ALL || ltd |
| .isInsertOverwrite()) ? WriteEntity.WriteType.INSERT_OVERWRITE : getWriteType(dest)); |
| |
| } |
| |
| private WriteEntity.WriteType getWriteType(String dest) { |
| return updating(dest) ? WriteEntity.WriteType.UPDATE : |
| (deleting(dest) ? WriteEntity.WriteType.DELETE : WriteEntity.WriteType.INSERT); |
| } |
| private boolean isAcidOutputFormat(Class<? extends OutputFormat> of) { |
| return Arrays.asList(of.getInterfaces()).contains(AcidOutputFormat.class); |
| } |
| |
| // Note that this method assumes you have already decided this is an Acid table. It cannot |
| // figure out if a table is Acid or not. |
| private AcidUtils.Operation getAcidType(String destination) { |
| return deleting(destination) ? AcidUtils.Operation.DELETE : |
| (updating(destination) ? AcidUtils.Operation.UPDATE : |
| AcidUtils.Operation.INSERT); |
| } |
| |
| private Context.Operation getWriteOperation(String destination) { |
| return deleting(destination) ? Context.Operation.DELETE : |
| (updating(destination) ? Context.Operation.UPDATE : |
| Context.Operation.OTHER); |
| } |
| |
| private AcidUtils.Operation getAcidType(Class<? extends OutputFormat> of, String dest, |
| boolean isMM) { |
| |
| // no need for any checks in the case of insert-only |
| if (isMM) { |
| return getAcidType(dest); |
| } |
| |
| if (SessionState.get() == null || !getTxnMgr().supportsAcid()) { |
| return AcidUtils.Operation.NOT_ACID; |
| } else if (isAcidOutputFormat(of)) { |
| return getAcidType(dest); |
| } else { |
| return AcidUtils.Operation.NOT_ACID; |
| } |
| } |
| |
| protected boolean updating(String destination) { |
| return destination.startsWith(Context.DestClausePrefix.UPDATE.toString()); |
| } |
| |
| private boolean deleting(String destination) { |
| return destination.startsWith(Context.DestClausePrefix.DELETE.toString()); |
| } |
| |
| // Make sure the proper transaction manager that supports ACID is being used |
| private void checkAcidTxnManager(Table table) throws SemanticException { |
| if (SessionState.get() != null && !getTxnMgr().supportsAcid() |
| && !HiveConf.getBoolVar(conf, ConfVars.HIVE_IN_TEST_REPL)) { |
| throw new SemanticException(ErrorMsg.TXNMGR_NOT_ACID, table.getDbName(), table.getTableName()); |
| } |
| } |
| |
| ASTNode genSelectDIAST(RowResolver rr) { |
| Map<String, Map<String, ColumnInfo>> map = rr.getRslvMap(); |
| ASTNode selectDI = new ASTNode(SELECTDI_TOKEN); |
| // Note: this will determine the order of columns in the result. For now, the columns for each |
| // table will be together; the order of the tables, as well as the columns within each |
| // table, is deterministic, but undefined - RR stores them in the order of addition. |
| for (String tabAlias : map.keySet()) { |
| for (Entry<String, ColumnInfo> entry : map.get(tabAlias).entrySet()) { |
| selectDI.addChild(buildSelExprSubTree(tabAlias, entry.getKey())); |
| } |
| } |
| return selectDI; |
| } |
| |
| private ASTNode buildSelExprSubTree(String tableAlias, String col) { |
| tableAlias = StringInternUtils.internIfNotNull(tableAlias); |
| col = StringInternUtils.internIfNotNull(col); |
| ASTNode selexpr = new ASTNode(SELEXPR_TOKEN); |
| ASTNode tableOrCol = new ASTNode(TABLEORCOL_TOKEN); |
| ASTNode dot = new ASTNode(DOT_TOKEN); |
| tableOrCol.addChild(new ASTNode(new CommonToken(HiveParser.Identifier, tableAlias))); |
| dot.addChild(tableOrCol); |
| dot.addChild(new ASTNode(new CommonToken(HiveParser.Identifier, col))); |
| selexpr.addChild(dot); |
| return selexpr; |
| } |
| |
| private void copyInfoToQueryProperties(QueryProperties queryProperties) { |
| if (qb != null) { |
| queryProperties.setQuery(qb.getIsQuery()); |
| queryProperties.setAnalyzeCommand(qb.getParseInfo().isAnalyzeCommand()); |
| queryProperties.setNoScanAnalyzeCommand(qb.getParseInfo().isNoScanAnalyzeCommand()); |
| queryProperties.setAnalyzeRewrite(qb.isAnalyzeRewrite()); |
| queryProperties.setCTAS(qb.getTableDesc() != null); |
| queryProperties.setHasOuterOrderBy(!qb.getParseInfo().getIsSubQ() && |
| !qb.getParseInfo().getDestToOrderBy().isEmpty()); |
| queryProperties.setOuterQueryLimit(qb.getParseInfo().getOuterQueryLimit()); |
| queryProperties.setMaterializedView(qb.isMaterializedView()); |
| } |
| } |
| |
| private void warn(String msg) { |
| SessionState.getConsole().printInfo(String.format("Warning: %s", msg)); |
| } |
| |
| public List<LoadFileDesc> getLoadFileWork() { |
| return loadFileWork; |
| } |
| |
| public List<LoadTableDesc> getLoadTableWork() { |
| return loadTableWork; |
| } |
| |
| public void setLoadFileWork(List<LoadFileDesc> loadFileWork) { |
| this.loadFileWork = loadFileWork; |
| } |
| |
| public void setLoadTableWork(List<LoadTableDesc> tblWork) { |
| this.loadTableWork = tblWork; |
| } |
| |
| private void quoteIdentifierTokens(TokenRewriteStream tokenRewriteStream) { |
| if (conf.getVar(ConfVars.HIVE_QUOTEDID_SUPPORT).equals("none")) { |
| return; |
| } |
| |
| for (int idx = tokenRewriteStream.MIN_TOKEN_INDEX; idx <= tokenRewriteStream.size()-1; idx++) { |
| Token curTok = tokenRewriteStream.get(idx); |
| if (curTok.getType() == HiveLexer.Identifier) { |
| // The Tokens have no distinction between Identifiers and QuotedIdentifiers. |
| // Ugly solution is just to surround all identifiers with quotes. |
| // Re-escape any backtick (`) characters in the identifier. |
| String escapedTokenText = curTok.getText().replaceAll("`", "``"); |
| tokenRewriteStream.replace(curTok, "`" + escapedTokenText + "`"); |
| } |
| } |
| } |
| |
| /** |
| * Generate the query string for this query (with fully resolved table references). |
| * @return The query string with resolved references. NULL if an error occurred. |
| */ |
| private String getQueryStringForCache(ASTNode ast) { |
| unparseTranslator.applyTranslations(ctx.getTokenRewriteStream(), RESULTS_CACHE_KEY_TOKEN_REWRITE_PROGRAM); |
| return ctx.getTokenRewriteStream() |
| .toString(RESULTS_CACHE_KEY_TOKEN_REWRITE_PROGRAM, ast.getTokenStartIndex(), ast.getTokenStopIndex()); |
| } |
| |
| private ValidTxnWriteIdList getQueryValidTxnWriteIdList() throws SemanticException { |
| // TODO: Once HIVE-18948 is in, should be able to retrieve writeIdList from the conf. |
| //cachedWriteIdList = AcidUtils.getValidTxnWriteIdList(conf); |
| // |
| List<String> transactionalTables = tablesFromReadEntities(inputs) |
| .stream() |
| .filter(AcidUtils::isTransactionalTable) |
| .map(Table::getFullyQualifiedName) |
| .collect(Collectors.toList()); |
| if (transactionalTables.size() > 0) { |
| try { |
| String txnString = conf.get(ValidTxnList.VALID_TXNS_KEY); |
| return getTxnMgr().getValidWriteIds(transactionalTables, txnString); |
| } catch (Exception err) { |
| String msg = "Error while getting the txnWriteIdList for tables " + transactionalTables |
| + " and validTxnList " + conf.get(ValidTxnList.VALID_TXNS_KEY); |
| throw new SemanticException(msg, err); |
| } |
| } |
| |
| // No transactional tables. |
| return null; |
| } |
| |
| private QueryResultsCache.LookupInfo createLookupInfoForQuery(ASTNode astNode) throws SemanticException { |
| QueryResultsCache.LookupInfo lookupInfo = null; |
| String queryString = getQueryStringForCache(astNode); |
| if (queryString != null) { |
| ValidTxnWriteIdList writeIdList = getQueryValidTxnWriteIdList(); |
| lookupInfo = new QueryResultsCache.LookupInfo(queryString, () -> writeIdList); |
| } |
| return lookupInfo; |
| } |
| |
| private boolean isResultsCacheEnabled() { |
| return conf.getBoolVar(HiveConf.ConfVars.HIVE_QUERY_RESULTS_CACHE_ENABLED) && |
| !(SessionState.get().isHiveServerQuery() && conf.getBoolVar(HiveConf.ConfVars.HIVE_SERVER2_ENABLE_DOAS)); |
| } |
| |
| /** |
| * Set the query plan to use cache entry passed in to return the query results. |
| * @param cacheEntry The results cache entry that will be used to resolve the query. |
| */ |
| private void useCachedResult(QueryResultsCache.CacheEntry cacheEntry, boolean needsReset) { |
| if (needsReset) { |
| reset(true); |
| inputs.clear(); |
| } |
| |
| // Change query FetchTask to use new location specified in results cache. |
| FetchTask fetchTask = (FetchTask) TaskFactory.get(cacheEntry.getFetchWork()); |
| setFetchTask(fetchTask); |
| |
| queryState.setCommandType(cacheEntry.getQueryInfo().getHiveOperation()); |
| resultSchema = cacheEntry.getQueryInfo().getResultSchema(); |
| setTableAccessInfo(cacheEntry.getQueryInfo().getTableAccessInfo()); |
| setColumnAccessInfo(cacheEntry.getQueryInfo().getColumnAccessInfo()); |
| inputs.addAll(cacheEntry.getQueryInfo().getInputs()); |
| |
| // Set recursive traversal in case the cached query was UNION generated by Tez. |
| conf.setBoolean(FileInputFormat.INPUT_DIR_RECURSIVE, true); |
| |
| // Indicate that the query will use a cached result. |
| setCacheUsage(new CacheUsage( |
| CacheUsage.CacheStatus.QUERY_USING_CACHE, cacheEntry)); |
| } |
| |
| private QueryResultsCache.QueryInfo createCacheQueryInfoForQuery(QueryResultsCache.LookupInfo lookupInfo) { |
| long queryTime = SessionState.get().getQueryCurrentTimestamp().toEpochMilli(); |
| return new QueryResultsCache.QueryInfo(queryTime, lookupInfo, queryState.getHiveOperation(), |
| resultSchema, getTableAccessInfo(), getColumnAccessInfo(), inputs); |
| } |
| |
| /** |
| * Some initial checks for a query to see if we can look this query up in the results cache. |
| */ |
| private boolean queryTypeCanUseCache() { |
| if (this.qb == null || this.qb.getParseInfo() == null) { |
| return false; |
| } |
| if (this instanceof ColumnStatsSemanticAnalyzer) { |
| // Column stats generates "select compute_stats() .." queries. |
| // Disable caching for these. |
| return false; |
| } |
| |
| if (queryState.getHiveOperation() != HiveOperation.QUERY) { |
| return false; |
| } |
| if (qb.getParseInfo().isAnalyzeCommand()) { |
| return false; |
| } |
| if (qb.getParseInfo().hasInsertTables()) { |
| return false; |
| } |
| if (qb.getParseInfo().isInsertOverwriteDirectory()) { |
| return false; |
| } |
| |
| // HIVE-19096 - disable for explain analyze |
| return ctx.getExplainAnalyze() == null; |
| } |
| |
| private boolean needsTransform() { |
| return SessionState.get().getAuthorizerV2() != null && |
| SessionState.get().getAuthorizerV2().needTransform(); |
| } |
| |
| /** |
| * Called after a query plan has been generated, to determine if the results of this query |
| * can be added to the results cache. |
| */ |
| private boolean queryCanBeCached() { |
| if (!queryTypeCanUseCache()) { |
| LOG.info("Not eligible for results caching - wrong query type"); |
| return false; |
| } |
| |
| // Query should have a fetch task. |
| if (getFetchTask() == null) { |
| LOG.info("Not eligible for results caching - no fetch task"); |
| return false; |
| } |
| |
| // At least one mr/tez job |
| if (Utilities.getNumClusterJobs(getRootTasks()) == 0) { |
| LOG.info("Not eligible for results caching - no mr/tez jobs"); |
| return false; |
| } |
| |
| // The query materialization validation check only occurs in CBO. Thus only cache results if CBO was used. |
| if (!ctx.isCboSucceeded()) { |
| LOG.info("Caching of query results is disabled if CBO was not run."); |
| QueryResultsCache.incrementMetric(MetricsConstant.QC_INVALID_FOR_CACHING); |
| return false; |
| } |
| |
| if (!isValidQueryCaching()) { |
| LOG.info("Not eligible for results caching - {}", getInvalidResultCacheReason()); |
| QueryResultsCache.incrementMetric(MetricsConstant.QC_INVALID_FOR_CACHING); |
| return false; |
| } |
| |
| if (!conf.getBoolVar(ConfVars.HIVE_QUERY_RESULTS_CACHE_NONTRANSACTIONAL_TABLES_ENABLED)) { |
| List<Table> nonTransactionalTables = getNonTransactionalTables(); |
| if (nonTransactionalTables.size() > 0) { |
| LOG.info("Not eligible for results caching - query contains non-transactional tables {}", |
| nonTransactionalTables); |
| return false; |
| } |
| } |
| return true; |
| } |
| |
| private Set<Table> tablesFromReadEntities(Set<ReadEntity> readEntities) { |
| return readEntities.stream() |
| .filter(entity -> entity.getType() == Entity.Type.TABLE) |
| .map(Entity::getTable) |
| .collect(Collectors.toSet()); |
| } |
| |
| private List<Table> getNonTransactionalTables() { |
| // views have been expanded by CBO already and can be ignored |
| return tablesFromReadEntities(inputs) |
| .stream() |
| .filter(table -> !table.isView()) |
| .filter(table -> !AcidUtils.isTransactionalTable(table)) |
| .collect(Collectors.toList()); |
| } |
| |
| /** |
| * Check the query results cache to see if the query represented by the lookupInfo can be |
| * answered using the results cache. If the cache contains a suitable entry, the semantic analyzer |
| * will be configured to use the found cache entry to answer the query. |
| */ |
| private boolean checkResultsCache(QueryResultsCache.LookupInfo lookupInfo, boolean needsReset) { |
| if (lookupInfo == null) { |
| return false; |
| } |
| try { |
| // In case this has not been initialized elsewhere. |
| QueryResultsCache.initialize(conf); |
| } catch (Exception err) { |
| throw new IllegalStateException(err); |
| } |
| // Don't increment the reader count for explain queries. |
| boolean isExplainQuery = (ctx.getExplainConfig() != null); |
| do { |
| QueryResultsCache.CacheEntry cacheEntry = QueryResultsCache.getInstance().lookup(lookupInfo); |
| if (cacheEntry != null) { |
| // Potentially wait on the cache entry if entry is in PENDING status |
| // Blocking here can potentially be dangerous - for example if the global compile lock |
| // is used this will block all subsequent queries that try to acquire the compile lock, |
| // so it should not be done unless parallel compilation is enabled. |
| // We might not want to block for explain queries as well. |
| if (cacheEntry.getStatus() == QueryResultsCache.CacheEntryStatus.PENDING) { |
| if (!isExplainQuery && |
| conf.getBoolVar(HiveConf.ConfVars.HIVE_QUERY_RESULTS_CACHE_WAIT_FOR_PENDING_RESULTS) && |
| conf.getBoolVar(HiveConf.ConfVars.HIVE_SERVER2_PARALLEL_COMPILATION)) { |
| if (!cacheEntry.waitForValidStatus()) { |
| LOG.info("Waiting on pending cacheEntry, but it failed to become valid"); |
| // The pending query we were waiting on failed, but there might still be another |
| // pending or completed entry in the cache that can satisfy this query. Lookup again. |
| continue; |
| } |
| } else { |
| LOG.info("Not waiting for pending cacheEntry"); |
| return false; |
| } |
| } |
| |
| if (cacheEntry.getStatus() == QueryResultsCache.CacheEntryStatus.VALID) { |
| if (!isExplainQuery) { |
| if (!cacheEntry.addReader()) { |
| return false; |
| } |
| } |
| // Use the cache rather than full query execution. |
| // At this point the caller should return from semantic analysis. |
| useCachedResult(cacheEntry, needsReset); |
| return true; |
| } |
| } |
| } while (false); |
| return false; |
| } |
| |
| private static final class ColsAndTypes { |
| public ColsAndTypes(String cols, String colTypes) { |
| this.cols = cols; |
| this.colTypes = colTypes; |
| } |
| public String cols; |
| public String colTypes; |
| } |
| |
| public String getInvalidAutomaticRewritingMaterializationReason() { |
| return invalidAutomaticRewritingMaterializationReason; |
| } |
| |
| public void setInvalidAutomaticRewritingMaterializationReason( |
| String invalidAutomaticRewritingMaterializationReason) { |
| this.invalidAutomaticRewritingMaterializationReason = |
| invalidAutomaticRewritingMaterializationReason; |
| } |
| |
| public boolean isValidAutomaticRewritingMaterialization() { |
| return (invalidAutomaticRewritingMaterializationReason == null); |
| } |
| |
| public String getInvalidResultCacheReason() { |
| return invalidResultCacheReason; |
| } |
| |
| public void setInvalidResultCacheReason( |
| String invalidQueryMaterializationReason) { |
| this.invalidResultCacheReason = invalidQueryMaterializationReason; |
| } |
| |
| public boolean isValidQueryCaching() { |
| return (invalidResultCacheReason == null); |
| } |
| |
| public void forViewCreation(String fqViewName) { |
| this.fqViewName = fqViewName; |
| this.forViewCreation = true; |
| } |
| |
| public Map<String, TableScanOperator> getTopOps() { |
| return topOps; |
| } |
| |
| public Map<String, ReadEntity> getViewAliasToInput() { |
| return viewAliasToInput; |
| } |
| |
| public Operator getSinkOp() { |
| return sinkOp; |
| } |
| |
| protected enum MaterializationRebuildMode { |
| NONE, |
| INSERT_OVERWRITE_REBUILD, |
| AGGREGATE_INSERT_REBUILD, |
| AGGREGATE_INSERT_DELETE_REBUILD, |
| JOIN_INSERT_REBUILD, |
| JOIN_INSERT_DELETE_REBUILD |
| } |
| |
| /** |
| * @return table name in db.table form with proper quoting/escaping to be used in a SQL statement |
| */ |
| protected String getFullTableNameForSQL(ASTNode n) throws SemanticException { |
| switch (n.getType()) { |
| case HiveParser.TOK_TABNAME: |
| TableName tableName = getQualifiedTableName(n); |
| return HiveTableName.ofNullable(HiveUtils.unparseIdentifier(tableName.getTable(), this.conf), |
| HiveUtils.unparseIdentifier(tableName.getDb(), this.conf), tableName.getTableMetaRef()).getNotEmptyDbTable(); |
| case HiveParser.TOK_TABREF: |
| return getFullTableNameForSQL((ASTNode) n.getChild(0)); |
| default: |
| throw raiseWrongType("TOK_TABNAME", n); |
| } |
| } |
| |
| /** |
| * Append list of partition columns to Insert statement, i.e. the 1st set of partCol1,partCol2 |
| * INSERT INTO T PARTITION(partCol1,partCol2...) SELECT col1, ... partCol1,partCol2... |
| */ |
| protected void addPartitionColsToInsert(List<FieldSchema> partCols, StringBuilder rewrittenQueryStr) { |
| addPartitionColsToInsert(partCols, null, rewrittenQueryStr); |
| } |
| |
| /** |
| * Append list of partition columns to Insert statement. If user specified partition spec, then |
| * use it to get/set the value for partition column else use dynamic partition mode with no value. |
| * Static partition mode: |
| * INSERT INTO T PARTITION(partCol1=val1,partCol2...) SELECT col1, ... partCol1,partCol2... |
| * Dynamic partition mode: |
| * INSERT INTO T PARTITION(partCol1,partCol2...) SELECT col1, ... partCol1,partCol2... |
| */ |
| protected void addPartitionColsToInsert(List<FieldSchema> partCols, |
| Map<String, String> partSpec, |
| StringBuilder rewrittenQueryStr) { |
| // If the table is partitioned we have to put the partition() clause in |
| if (partCols != null && partCols.size() > 0) { |
| rewrittenQueryStr.append(" partition ("); |
| boolean first = true; |
| for (FieldSchema fschema : partCols) { |
| if (first) { |
| first = false; |
| } else { |
| rewrittenQueryStr.append(", "); |
| } |
| // Would be nice if there was a way to determine if quotes are needed |
| rewrittenQueryStr.append(HiveUtils.unparseIdentifier(fschema.getName(), this.conf)); |
| String partVal = (partSpec != null) ? partSpec.get(fschema.getName()) : null; |
| if (partVal != null) { |
| rewrittenQueryStr.append("=").append(partVal); |
| } |
| } |
| rewrittenQueryStr.append(")"); |
| } |
| } |
| |
| @Override |
| public WriteEntity getAcidAnalyzeTable() { |
| return acidAnalyzeTable; |
| } |
| |
| @Override |
| public void executeUnParseTranslations() { |
| unparseTranslator.applyTranslations(ctx.getTokenRewriteStream()); |
| } |
| |
| @Override |
| public void startAnalysis() { |
| if (conf.getBoolVar(ConfVars.HIVE_OPTIMIZE_HMS_QUERY_CACHE_ENABLED)) { |
| queryState.createHMSCache(); |
| } |
| } |
| } |