blob: 7e114283a41aff80acd68f6b2b61a8accef904df [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hive.ql.parse;
import com.google.common.base.Function;
import com.google.common.collect.ArrayListMultimap;
import com.google.common.collect.ImmutableBiMap;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableList.Builder;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.Iterables;
import com.google.common.collect.Lists;
import com.google.common.collect.Multimap;
import java.util.Map.Entry;
import java.util.Optional;
import java.util.regex.Pattern;
import org.antlr.runtime.ClassicToken;
import org.antlr.runtime.CommonToken;
import org.antlr.runtime.tree.Tree;
import org.antlr.runtime.tree.TreeVisitor;
import org.antlr.runtime.tree.TreeVisitorAction;
import org.apache.calcite.adapter.druid.DruidQuery;
import org.apache.calcite.adapter.druid.DruidSchema;
import org.apache.calcite.adapter.druid.DruidTable;
import org.apache.calcite.adapter.java.JavaTypeFactory;
import org.apache.calcite.adapter.jdbc.JdbcConvention;
import org.apache.calcite.adapter.jdbc.JdbcImplementor;
import org.apache.calcite.adapter.jdbc.JdbcRules.JdbcAggregate;
import org.apache.calcite.adapter.jdbc.JdbcRules.JdbcFilter;
import org.apache.calcite.adapter.jdbc.JdbcRules.JdbcJoin;
import org.apache.calcite.adapter.jdbc.JdbcRules.JdbcProject;
import org.apache.calcite.adapter.jdbc.JdbcRules.JdbcSort;
import org.apache.calcite.adapter.jdbc.JdbcRules.JdbcUnion;
import org.apache.calcite.adapter.jdbc.JdbcSchema;
import org.apache.calcite.adapter.jdbc.JdbcTable;
import org.apache.calcite.config.CalciteConnectionConfig;
import org.apache.calcite.config.CalciteConnectionConfigImpl;
import org.apache.calcite.config.CalciteConnectionProperty;
import org.apache.calcite.config.NullCollation;
import org.apache.calcite.interpreter.BindableConvention;
import org.apache.calcite.plan.RelOptCluster;
import org.apache.calcite.plan.RelOptMaterialization;
import org.apache.calcite.plan.RelOptPlanner;
import org.apache.calcite.plan.RelOptRule;
import org.apache.calcite.plan.RelOptSchema;
import org.apache.calcite.plan.RelOptUtil;
import org.apache.calcite.plan.RelTraitSet;
import org.apache.calcite.plan.hep.HepMatchOrder;
import org.apache.calcite.plan.hep.HepPlanner;
import org.apache.calcite.plan.hep.HepProgram;
import org.apache.calcite.plan.hep.HepProgramBuilder;
import org.apache.calcite.plan.hep.HepRelVertex;
import org.apache.calcite.plan.volcano.AbstractConverter;
import org.apache.calcite.plan.volcano.RelSubset;
import org.apache.calcite.rel.AbstractRelNode;
import org.apache.calcite.rel.RelCollation;
import org.apache.calcite.rel.RelCollationImpl;
import org.apache.calcite.rel.RelCollations;
import org.apache.calcite.rel.RelDistribution;
import org.apache.calcite.rel.RelDistributions;
import org.apache.calcite.rel.RelFieldCollation;
import org.apache.calcite.rel.RelNode;
import org.apache.calcite.rel.RelVisitor;
import org.apache.calcite.rel.convert.ConverterImpl;
import org.apache.calcite.rel.core.Aggregate;
import org.apache.calcite.rel.core.AggregateCall;
import org.apache.calcite.rel.core.CorrelationId;
import org.apache.calcite.rel.core.Filter;
import org.apache.calcite.rel.core.JoinRelType;
import org.apache.calcite.rel.core.SetOp;
import org.apache.calcite.rel.core.TableScan;
import org.apache.calcite.rel.metadata.CachingRelMetadataProvider;
import org.apache.calcite.rel.metadata.ChainedRelMetadataProvider;
import org.apache.calcite.rel.metadata.JaninoRelMetadataProvider;
import org.apache.calcite.rel.metadata.RelMetadataProvider;
import org.apache.calcite.rel.metadata.RelMetadataQuery;
import org.apache.calcite.rel.rules.JoinToMultiJoinRule;
import org.apache.calcite.rel.rules.LoptOptimizeJoinRule;
import org.apache.calcite.rel.rules.ProjectMergeRule;
import org.apache.calcite.rel.rules.ProjectRemoveRule;
import org.apache.calcite.rel.type.RelDataType;
import org.apache.calcite.rel.type.RelDataTypeFactory;
import org.apache.calcite.rel.type.RelDataTypeField;
import org.apache.calcite.rel.type.RelDataTypeImpl;
import org.apache.calcite.rex.RexBuilder;
import org.apache.calcite.rex.RexCall;
import org.apache.calcite.rex.RexExecutor;
import org.apache.calcite.rex.RexFieldAccess;
import org.apache.calcite.rex.RexFieldCollation;
import org.apache.calcite.rex.RexInputRef;
import org.apache.calcite.rex.RexLiteral;
import org.apache.calcite.rex.RexNode;
import org.apache.calcite.rex.RexShuttle;
import org.apache.calcite.rex.RexUtil;
import org.apache.calcite.rex.RexWindowBound;
import org.apache.calcite.schema.SchemaPlus;
import org.apache.calcite.sql.SqlAggFunction;
import org.apache.calcite.sql.SqlCall;
import org.apache.calcite.sql.SqlDialect;
import org.apache.calcite.sql.SqlDialectFactoryImpl;
import org.apache.calcite.sql.SqlExplainLevel;
import org.apache.calcite.sql.SqlKind;
import org.apache.calcite.sql.SqlLiteral;
import org.apache.calcite.sql.SqlNode;
import org.apache.calcite.sql.SqlOperator;
import org.apache.calcite.sql.SqlWindow;
import org.apache.calcite.sql.dialect.HiveSqlDialect;
import org.apache.calcite.sql.parser.SqlParserPos;
import org.apache.calcite.sql.type.SqlTypeName;
import org.apache.calcite.sql.validate.SqlValidatorUtil;
import org.apache.calcite.tools.Frameworks;
import org.apache.calcite.util.CompositeList;
import org.apache.calcite.util.ImmutableBitSet;
import org.apache.calcite.util.ImmutableNullableList;
import org.apache.calcite.util.Pair;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.common.TableName;
import org.apache.hadoop.hive.conf.Constants;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
import org.apache.hadoop.hive.conf.HiveConf.StrictChecks;
import org.apache.hadoop.hive.metastore.api.FieldSchema;
import org.apache.hadoop.hive.ql.Context;
import org.apache.hadoop.hive.ql.ErrorMsg;
import org.apache.hadoop.hive.ql.QueryProperties;
import org.apache.hadoop.hive.ql.QueryState;
import org.apache.hadoop.hive.ql.exec.ColumnInfo;
import org.apache.hadoop.hive.ql.exec.Description;
import org.apache.hadoop.hive.ql.exec.FunctionInfo;
import org.apache.hadoop.hive.ql.exec.FunctionRegistry;
import org.apache.hadoop.hive.ql.exec.Operator;
import org.apache.hadoop.hive.ql.exec.OperatorFactory;
import org.apache.hadoop.hive.ql.exec.RowSchema;
import org.apache.hadoop.hive.ql.exec.Utilities;
import org.apache.hadoop.hive.ql.io.AcidUtils;
import org.apache.hadoop.hive.ql.lib.Node;
import org.apache.hadoop.hive.ql.log.PerfLogger;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.metadata.HiveMaterializedViewsRegistry;
import org.apache.hadoop.hive.ql.metadata.HiveRelOptMaterialization;
import org.apache.hadoop.hive.ql.metadata.HiveUtils;
import org.apache.hadoop.hive.ql.metadata.NotNullConstraint;
import org.apache.hadoop.hive.ql.metadata.PrimaryKeyInfo;
import org.apache.hadoop.hive.ql.metadata.Table;
import org.apache.hadoop.hive.ql.metadata.VirtualColumn;
import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException;
import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSubquerySemanticException;
import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteViewSemanticException;
import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil;
import org.apache.hadoop.hive.ql.optimizer.calcite.HiveConfPlannerContext;
import org.apache.hadoop.hive.ql.optimizer.calcite.HiveDefaultRelMetadataProvider;
import org.apache.hadoop.hive.ql.optimizer.calcite.HiveMaterializedViewASTSubQueryRewriteShuttle;
import org.apache.hadoop.hive.ql.optimizer.calcite.HiveTezModelRelMetadataProvider;
import org.apache.hadoop.hive.ql.optimizer.calcite.RuleEventLogger;
import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveAggregateSortLimitRule;
import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveJoinSwapConstraintsRule;
import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveSemiJoinProjectTransposeRule;
import org.apache.hadoop.hive.ql.optimizer.calcite.rules.views.HiveMaterializationRelMetadataProvider;
import org.apache.hadoop.hive.ql.optimizer.calcite.HivePlannerContext;
import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelDistribution;
import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelFactories;
import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelOptMaterializationValidator;
import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelOptUtil;
import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRexExecutorImpl;
import org.apache.hadoop.hive.ql.optimizer.calcite.HiveTypeSystemImpl;
import org.apache.hadoop.hive.ql.optimizer.calcite.RelOptHiveTable;
import org.apache.hadoop.hive.ql.optimizer.calcite.TraitsUtil;
import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException.UnsupportedFeature;
import org.apache.hadoop.hive.ql.optimizer.calcite.cost.HiveAlgorithmsConf;
import org.apache.hadoop.hive.ql.optimizer.calcite.cost.HiveVolcanoPlanner;
import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveAggregate;
import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveAntiJoin;
import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveExcept;
import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFilter;
import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveGroupingID;
import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveIntersect;
import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin;
import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveMultiJoin;
import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject;
import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveRelNode;
import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveRexExprList;
import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSemiJoin;
import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSortExchange;
import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSortLimit;
import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableFunctionScan;
import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan;
import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveUnion;
import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.jdbc.HiveJdbcConverter;
import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.jdbc.JdbcHiveTableScan;
import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveAggregateJoinTransposeRule;
import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveAggregateProjectMergeRule;
import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveAggregatePullUpConstantsRule;
import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveAggregateReduceFunctionsRule;
import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveAggregateReduceRule;
import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveAggregateSplitRule;
import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveCardinalityPreservingJoinRule;
import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveDruidRules;
import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveExceptRewriteRule;
import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveExpandDistinctAggregatesRule;
import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveFieldTrimmerRule;
import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveFilterAggregateTransposeRule;
import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveFilterJoinRule;
import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveFilterMergeRule;
import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveFilterProjectTSTransposeRule;
import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveFilterProjectTransposeRule;
import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveFilterSetOpTransposeRule;
import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveFilterSortPredicates;
import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveFilterSortTransposeRule;
import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveInBetweenExpandRule;
import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveInsertExchange4JoinRule;
import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveIntersectMergeRule;
import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveIntersectRewriteRule;
import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveJoinAddNotNullRule;
import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveJoinCommuteRule;
import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveJoinConstraintsRule;
import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveJoinProjectTransposeRule;
import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveJoinPushTransitivePredicatesRule;
import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveJoinToMultiJoinRule;
import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveAntiSemiJoinRule;
import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HivePartitionPruneRule;
import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HivePointLookupOptimizerRule;
import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HivePreFilteringRule;
import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveProjectFilterPullUpConstantsRule;
import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveProjectJoinTransposeRule;
import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveProjectMergeRule;
import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveProjectOverIntersectRemoveRule;
import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveProjectSortExchangeTransposeRule;
import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveProjectSortTransposeRule;
import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveReduceExpressionsRule;
import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveReduceExpressionsWithStatsRule;
import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveRelDecorrelator;
import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveRelFieldTrimmer;
import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveRemoveGBYSemiJoinRule;
import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveRemoveSqCountCheck;
import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveRewriteToDataSketchesRules;
import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveRulesRegistry;
import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveSemiJoinRule;
import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveSortJoinReduceRule;
import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveSortLimitRemoveRule;
import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveSortMergeRule;
import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveSortProjectTransposeRule;
import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveSortPullUpConstantsRule;
import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveSortRemoveRule;
import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveSortUnionReduceRule;
import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveSubQueryRemoveRule;
import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveUnionSimpleSelectsToInlineTableRule;
import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveUnionMergeRule;
import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveUnionPullUpConstantsRule;
import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveWindowingFixRule;
import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveWindowingLastValueRewrite;
import org.apache.hadoop.hive.ql.optimizer.calcite.rules.jdbc.JDBCAbstractSplitFilterRule;
import org.apache.hadoop.hive.ql.optimizer.calcite.rules.jdbc.JDBCAggregationPushDownRule;
import org.apache.hadoop.hive.ql.optimizer.calcite.rules.jdbc.JDBCExpandExpressionsRule;
import org.apache.hadoop.hive.ql.optimizer.calcite.rules.jdbc.JDBCExtractJoinFilterRule;
import org.apache.hadoop.hive.ql.optimizer.calcite.rules.jdbc.JDBCFilterJoinRule;
import org.apache.hadoop.hive.ql.optimizer.calcite.rules.jdbc.JDBCFilterPushDownRule;
import org.apache.hadoop.hive.ql.optimizer.calcite.rules.jdbc.JDBCJoinPushDownRule;
import org.apache.hadoop.hive.ql.optimizer.calcite.rules.jdbc.JDBCProjectPushDownRule;
import org.apache.hadoop.hive.ql.optimizer.calcite.rules.jdbc.JDBCSortPushDownRule;
import org.apache.hadoop.hive.ql.optimizer.calcite.rules.jdbc.JDBCUnionPushDownRule;
import org.apache.hadoop.hive.ql.optimizer.calcite.rules.views.HiveMaterializedViewBoxing;
import org.apache.hadoop.hive.ql.optimizer.calcite.rules.views.HiveMaterializedViewRule;
import org.apache.hadoop.hive.ql.optimizer.calcite.rules.views.HiveMaterializedViewUtils;
import org.apache.hadoop.hive.ql.optimizer.calcite.translator.ASTBuilder;
import org.apache.hadoop.hive.ql.optimizer.calcite.translator.ASTConverter;
import org.apache.hadoop.hive.ql.parse.type.FunctionHelper;
import org.apache.hadoop.hive.ql.parse.type.FunctionHelper.AggregateInfo;
import org.apache.hadoop.hive.ql.parse.type.HiveFunctionHelper;
import org.apache.hadoop.hive.ql.parse.type.JoinTypeCheckCtx;
import org.apache.hadoop.hive.ql.optimizer.calcite.translator.PlanModifierForReturnPath;
import org.apache.hadoop.hive.ql.optimizer.calcite.translator.SqlFunctionConverter;
import org.apache.hadoop.hive.ql.optimizer.calcite.translator.TypeConverter;
import org.apache.hadoop.hive.ql.optimizer.calcite.translator.opconventer.HiveOpConverter;
import org.apache.hadoop.hive.ql.parse.PTFInvocationSpec.OrderExpression;
import org.apache.hadoop.hive.ql.parse.PTFInvocationSpec.OrderSpec;
import org.apache.hadoop.hive.ql.parse.PTFInvocationSpec.PartitionExpression;
import org.apache.hadoop.hive.ql.parse.PTFInvocationSpec.PartitionSpec;
import org.apache.hadoop.hive.ql.parse.QBExpr.Opcode;
import org.apache.hadoop.hive.ql.parse.WindowingSpec.BoundarySpec;
import org.apache.hadoop.hive.ql.parse.WindowingSpec.WindowExpressionSpec;
import org.apache.hadoop.hive.ql.parse.WindowingSpec.WindowFunctionSpec;
import org.apache.hadoop.hive.ql.parse.WindowingSpec.WindowSpec;
import org.apache.hadoop.hive.ql.parse.WindowingSpec.WindowType;
import org.apache.hadoop.hive.ql.parse.type.RexNodeTypeCheck;
import org.apache.hadoop.hive.ql.parse.type.TypeCheckCtx;
import org.apache.hadoop.hive.ql.parse.type.TypeCheckProcFactory;
import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc;
import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
import org.apache.hadoop.hive.ql.plan.HiveOperation;
import org.apache.hadoop.hive.ql.plan.SelectDesc;
import org.apache.hadoop.hive.ql.plan.mapper.EmptyStatsSource;
import org.apache.hadoop.hive.ql.plan.mapper.StatsSource;
import org.apache.hadoop.hive.ql.reexec.ReCompileException;
import org.apache.hadoop.hive.ql.session.SessionState;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFArray;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDTF;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDTFInline;
import org.apache.hadoop.hive.serde2.objectinspector.StandardStructObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.StructField;
import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
import org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
import org.joda.time.Interval;
import java.io.IOException;
import java.lang.reflect.Field;
import java.lang.reflect.InvocationTargetException;
import java.math.BigDecimal;
import java.util.AbstractMap.SimpleEntry;
import java.util.ArrayDeque;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.BitSet;
import java.util.Collection;
import java.util.Collections;
import java.util.Deque;
import java.util.EnumSet;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import java.util.Set;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.stream.Collectors;
import java.util.stream.IntStream;
import javax.sql.DataSource;
import static org.apache.hadoop.hive.ql.optimizer.calcite.HiveMaterializedViewASTSubQueryRewriteShuttle.getMaterializedViewByAST;
import static org.apache.hadoop.hive.ql.metadata.HiveRelOptMaterialization.RewriteAlgorithm.ANY;
public class CalcitePlanner extends SemanticAnalyzer {
private static final String EXCLUDED_RULES_PREFIX = "Excluded rules: ";
/**
* {@link org.antlr.runtime.TokenRewriteStream} offers the opportunity of multiple rewrites of the same
* input text (in our case the sql query text). These rewrites are called programs and identified by a string.
* EXPANDED_QUERY_TOKEN_REWRITE_PROGRAM is for identifying the program which replaces all identifiers in the
* query with fully qualified identifiers.
*/
private static final String EXPANDED_QUERY_TOKEN_REWRITE_PROGRAM = "EXPANDED_QUERY_PROGRAM";
private final AtomicInteger noColsMissingStats = new AtomicInteger(0);
private SemanticException semanticException;
private boolean runCBO = true;
private boolean disableSemJoinReordering = true;
private final CBOFallbackStrategy fallbackStrategy;
private EnumSet<ExtendedCBOProfile> profilesCBO;
private static final CommonToken FROM_TOKEN =
new ImmutableCommonToken(HiveParser.TOK_FROM, "TOK_FROM");
private static final CommonToken DEST_TOKEN =
new ImmutableCommonToken(HiveParser.TOK_DESTINATION, "TOK_DESTINATION");
private static final CommonToken DIR_TOKEN =
new ImmutableCommonToken(HiveParser.TOK_DIR, "TOK_DIR");
private static final CommonToken TMPFILE_TOKEN =
new ImmutableCommonToken(HiveParser.TOK_TMP_FILE, "TOK_TMP_FILE");
private static final CommonToken SELECT_TOKEN =
new ImmutableCommonToken(HiveParser.TOK_SELECT, "TOK_SELECT");
private static final CommonToken SELEXPR_TOKEN =
new ImmutableCommonToken(HiveParser.TOK_SELEXPR, "TOK_SELEXPR");
private static final CommonToken TABLEORCOL_TOKEN =
new ImmutableCommonToken(HiveParser.TOK_TABLE_OR_COL, "TOK_TABLE_OR_COL");
private static final CommonToken INSERT_TOKEN =
new ImmutableCommonToken(HiveParser.TOK_INSERT, "TOK_INSERT");
private static final CommonToken QUERY_TOKEN =
new ImmutableCommonToken(HiveParser.TOK_QUERY, "TOK_QUERY");
private static final CommonToken SUBQUERY_TOKEN =
new ImmutableCommonToken(HiveParser.TOK_SUBQUERY, "TOK_SUBQUERY");
private static final Pattern PATTERN_VARCHAR =
Pattern.compile("VARCHAR\\(2147483647\\)");
private static final Pattern PATTERN_TIMESTAMP =
Pattern.compile("TIMESTAMP\\(9\\)");
/**
* This is the list of operators that are specifically used in Hive.
*/
private static final List<Class<? extends RelNode>> HIVE_REL_NODE_CLASSES =
ImmutableList.of(
RelNode.class,
AbstractRelNode.class,
RelSubset.class,
HepRelVertex.class,
ConverterImpl.class,
AbstractConverter.class,
HiveTableScan.class,
HiveAggregate.class,
HiveAntiJoin.class,
HiveExcept.class,
HiveFilter.class,
HiveIntersect.class,
HiveJoin.class,
HiveMultiJoin.class,
HiveProject.class,
HiveRelNode.class,
HiveSemiJoin.class,
HiveSortExchange.class,
HiveSortLimit.class,
HiveTableFunctionScan.class,
HiveUnion.class,
DruidQuery.class,
HiveJdbcConverter.class,
JdbcHiveTableScan.class,
JdbcAggregate.class,
JdbcFilter.class,
JdbcJoin.class,
JdbcProject.class,
JdbcSort.class,
JdbcUnion.class);
public CalcitePlanner(QueryState queryState) throws SemanticException {
super(queryState);
if (!HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_CBO_ENABLED)) {
runCBO = false;
disableSemJoinReordering = false;
}
fallbackStrategy = CBOFallbackStrategy.valueOf(conf.getVar(ConfVars.HIVE_CBO_FALLBACK_STRATEGY));
}
public void resetCalciteConfiguration() {
if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_CBO_ENABLED)) {
runCBO = true;
disableSemJoinReordering = true;
}
}
@Override
@SuppressWarnings("nls")
public void analyzeInternal(ASTNode ast) throws SemanticException {
if (runCBO) {
super.analyzeInternal(ast, PreCboCtx::new);
} else {
super.analyzeInternal(ast);
}
}
/**
* This method is useful if we want to obtain the logical plan after being parsed and
* optimized by Calcite.
*
* @return the Calcite plan for the query, null if it could not be generated
*/
public RelNode genLogicalPlan(ASTNode ast) throws SemanticException {
LOG.info("Starting generating logical plan");
PreCboCtx cboCtx = new PreCboCtx();
//change the location of position alias process here
processPositionAlias(ast);
if (!genResolvedParseTree(ast, cboCtx)) {
return null;
}
ASTNode queryForCbo = ast;
if (cboCtx.type == PreCboCtx.Type.CTAS || cboCtx.type == PreCboCtx.Type.VIEW) {
queryForCbo = cboCtx.nodeOfInterest; // nodeOfInterest is the query
}
Pair<Boolean, String> pairCanCBOHandleReason = canCBOHandleAst(queryForCbo, getQB(), cboCtx);
runCBO = pairCanCBOHandleReason.left;
if (!runCBO) {
ctx.setCboInfo("Plan not optimized by CBO because the statement " + pairCanCBOHandleReason.right);
return null;
}
profilesCBO = obtainCBOProfiles(queryProperties);
disableJoinMerge = true;
final RelNode resPlan = logicalPlan();
LOG.info("Finished generating logical plan");
return resPlan;
}
public static RelOptPlanner createPlanner(HiveConf conf) {
return createPlanner(conf, new HashSet<>(), EmptyStatsSource.INSTANCE, false);
}
private static RelOptPlanner createPlanner(
HiveConf conf, Set<RelNode> corrScalarRexSQWithAgg,
StatsSource statsSource, boolean isExplainPlan) {
final Double maxSplitSize = (double) HiveConf.getLongVar(
conf, HiveConf.ConfVars.MAPREDMAXSPLITSIZE);
final Double maxMemory = (double) HiveConf.getLongVar(
conf, HiveConf.ConfVars.HIVECONVERTJOINNOCONDITIONALTASKTHRESHOLD);
HiveAlgorithmsConf algorithmsConf = new HiveAlgorithmsConf(maxSplitSize, maxMemory);
HiveRulesRegistry registry = new HiveRulesRegistry();
Properties calciteConfigProperties = new Properties();
calciteConfigProperties.setProperty(
CalciteConnectionProperty.TIME_ZONE.camelName(),
conf.getLocalTimeZone().getId());
calciteConfigProperties.setProperty(
CalciteConnectionProperty.MATERIALIZATIONS_ENABLED.camelName(),
Boolean.FALSE.toString());
CalciteConnectionConfig calciteConfig = new CalciteConnectionConfigImpl(calciteConfigProperties);
boolean isCorrelatedColumns = HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_CBO_STATS_CORRELATED_MULTI_KEY_JOINS);
boolean heuristicMaterializationStrategy = HiveConf.getVar(conf,
HiveConf.ConfVars.HIVE_MATERIALIZED_VIEW_REWRITING_SELECTION_STRATEGY).equals("heuristic");
HivePlannerContext confContext = new HivePlannerContext(algorithmsConf, registry, calciteConfig,
corrScalarRexSQWithAgg,
new HiveConfPlannerContext(isCorrelatedColumns, heuristicMaterializationStrategy, isExplainPlan),
statsSource);
RelOptPlanner planner = HiveVolcanoPlanner.createPlanner(confContext);
planner.addListener(new RuleEventLogger());
return planner;
}
@Override
@SuppressWarnings("rawtypes")
Operator genOPTree(ASTNode ast, PlannerContext plannerCtx) throws SemanticException {
final Operator sinkOp;
if (!runCBO) {
sinkOp = super.genOPTree(ast, plannerCtx);
} else {
PreCboCtx cboCtx = (PreCboCtx) plannerCtx;
List<ASTNode> oldHints = new ArrayList<>();
// Cache the hints before CBO runs and removes them.
// Use the hints later in top level QB.
getHintsFromQB(getQB(), oldHints);
// Note: for now, we don't actually pass the queryForCbo to CBO, because
// it accepts qb, not AST, and can also access all the private stuff in
// SA. We rely on the fact that CBO ignores the unknown tokens (create
// table, destination), so if the query is otherwise ok, it is as if we
// did remove those and gave CBO the proper AST. That is kinda hacky.
ASTNode queryForCbo = ast;
if (cboCtx.type == PreCboCtx.Type.CTAS || cboCtx.type == PreCboCtx.Type.VIEW) {
queryForCbo = cboCtx.nodeOfInterest; // nodeOfInterest is the query
}
Pair<Boolean, String> canCBOHandleReason = canCBOHandleAst(queryForCbo, getQB(), cboCtx);
runCBO = canCBOHandleReason.left;
if (queryProperties.hasMultiDestQuery()) {
handleMultiDestQuery(ast, cboCtx);
}
if (runCBO) {
profilesCBO = obtainCBOProfiles(queryProperties);
disableJoinMerge = true;
final boolean materializedView = getQB().isMaterializedView();
try {
// 0. Gen Optimized Plan
RelNode newPlan = logicalPlan();
if (this.conf.getBoolVar(HiveConf.ConfVars.HIVE_CBO_RETPATH_HIVEOP)) {
if (cboCtx.type == PreCboCtx.Type.VIEW && !materializedView) {
throw new SemanticException("Create view is not supported in cbo return path.");
}
sinkOp = getOptimizedHiveOPDag(newPlan);
if (oldHints.size() > 0) {
LOG.debug("Propagating hints to QB: " + oldHints);
getQB().getParseInfo().setHintList(oldHints);
}
LOG.info("CBO Succeeded; optimized logical plan.");
this.ctx.setCboInfo(getOptimizedByCboInfo());
this.ctx.setCboSucceeded(true);
} else {
// 1. Convert Plan to AST
ASTNode newAST = getOptimizedAST(newPlan);
// 1.1. Fix up the query for insert/ctas/materialized views
newAST = fixUpAfterCbo(ast, newAST, cboCtx);
// 2. Regen OP plan from optimized AST
if (forViewCreation) {
// the reset would remove the translations
executeUnparseTranlations();
// save the resultSchema before rewriting it
originalResultSchema = resultSchema;
}
if (cboCtx.type == PreCboCtx.Type.VIEW) {
try {
viewSelect = handleCreateViewDDL(newAST);
} catch (SemanticException e) {
throw new CalciteViewSemanticException(e.getMessage());
}
} else if (cboCtx.type == PreCboCtx.Type.CTAS) {
// CTAS
init(false);
setAST(newAST);
newAST = reAnalyzeCTASAfterCbo(newAST);
} else {
// All others
init(false);
}
if (oldHints.size() > 0) {
if (getQB().getParseInfo().getHints() != null) {
LOG.warn("Hints are not null in the optimized tree; "
+ "after CBO " + getQB().getParseInfo().getHints().dump());
} else {
LOG.debug("Propagating hints to QB: " + oldHints);
getQB().getParseInfo().setHintList(oldHints);
}
}
Phase1Ctx ctx_1 = initPhase1Ctx();
if (!doPhase1(newAST, getQB(), ctx_1, null)) {
throw new RuntimeException("Couldn't do phase1 on CBO optimized query plan");
}
// unfortunately making prunedPartitions immutable is not possible
// here with SemiJoins not all tables are costed in CBO, so their
// PartitionList is not evaluated until the run phase.
getMetaData(getQB());
disableJoinMerge = defaultJoinMerge;
sinkOp = genPlan(getQB());
LOG.info("CBO Succeeded; optimized logical plan.");
this.ctx.setCboInfo(getOptimizedByCboInfo());
this.ctx.setCboSucceeded(true);
if (this.ctx.isExplainPlan()) {
// Enrich explain with information derived from CBO
ExplainConfiguration explainConfig = this.ctx.getExplainConfig();
if (explainConfig.isCbo()) {
if (!explainConfig.isCboJoinCost()) {
// Include cost as provided by Calcite
newPlan.getCluster().invalidateMetadataQuery();
RelMetadataQuery.THREAD_PROVIDERS.set(JaninoRelMetadataProvider.DEFAULT);
}
if (explainConfig.isFormatted()) {
this.ctx.setCalcitePlan(HiveRelOptUtil.toJsonString(newPlan));
} else if (explainConfig.isCboCost() || explainConfig.isCboJoinCost()) {
this.ctx.setCalcitePlan(RelOptUtil.toString(newPlan, SqlExplainLevel.ALL_ATTRIBUTES));
} else {
// Do not include join cost
this.ctx.setCalcitePlan(RelOptUtil.toString(newPlan));
}
} else if (explainConfig.isFormatted()) {
this.ctx.setCalcitePlan(HiveRelOptUtil.toJsonString(newPlan));
this.ctx.setOptimizedSql(getOptimizedSql(newPlan));
} else if (explainConfig.isExtended()) {
this.ctx.setOptimizedSql(getOptimizedSql(newPlan));
}
}
if (LOG.isTraceEnabled()) {
LOG.trace(getOptimizedSql(newPlan));
LOG.trace(newAST.dump());
}
}
} catch (Exception e) {
LOG.error("CBO failed, skipping CBO. ", e);
String cboMsg = "Plan not optimized by CBO.";
boolean isMissingStats = noColsMissingStats.get() > 0;
if (isMissingStats) {
LOG.error("CBO failed due to missing column stats (see previous errors), skipping CBO");
cboMsg = "Plan not optimized by CBO due to missing statistics. Please check log for more details.";
} else if (e instanceof CalciteSemanticException) {
CalciteSemanticException cse = (CalciteSemanticException) e;
UnsupportedFeature unsupportedFeature = cse.getUnsupportedFeature();
if (unsupportedFeature != null) {
cboMsg = "Plan not optimized by CBO due to missing feature [" + unsupportedFeature + "].";
}
}
this.ctx.setCboInfo(cboMsg);
// Determine if we should re-throw the exception OR if we try to mark the query to retry as non-CBO.
if (fallbackStrategy.isFatal(e)) {
if (e instanceof RuntimeException || e instanceof SemanticException) {
// These types of exceptions do not need wrapped
throw e;
}
// Wrap all other errors (Should only hit in tests)
throw new SemanticException(e);
} else {
throw new ReCompileException(this.ctx.getCboInfo());
}
} finally {
runCBO = false;
disableJoinMerge = defaultJoinMerge;
disableSemJoinReordering = false;
}
} else {
String msg;
if (canCBOHandleReason.right != null) {
msg = "Plan not optimized by CBO because the statement " + canCBOHandleReason.right;
} else {
msg = "Plan not optimized by CBO.";
}
this.ctx.setCboInfo(msg);
sinkOp = super.genOPTree(ast, plannerCtx);
}
}
return sinkOp;
}
private String getOptimizedByCboInfo() {
String ruleExclusionRegex = conf.get(ConfVars.HIVE_CBO_RULE_EXCLUSION_REGEX.varname, "");
String cboInfo = "Plan optimized by CBO.";
if (!ruleExclusionRegex.isEmpty()) {
cboInfo = cboInfo + (" " + EXCLUDED_RULES_PREFIX + ruleExclusionRegex);
}
return cboInfo;
}
private ASTNode handleCreateViewDDL(ASTNode ast) throws SemanticException {
saveViewDefinition();
String originalText = createVwDesc.getViewOriginalText();
String expandedText = createVwDesc.getViewExpandedText();
List<FieldSchema> schema = createVwDesc.getSchema();
List<FieldSchema> partitionColumns = createVwDesc.getPartCols();
init(false);
setAST(ast);
ASTNode newAST = reAnalyzeViewAfterCbo(ast);
createVwDesc.setViewOriginalText(originalText);
createVwDesc.setViewExpandedText(expandedText);
createVwDesc.setSchema(schema);
createVwDesc.setPartCols(partitionColumns);
return newAST;
}
/*
* Tries to optimize FROM clause of multi-insert. No attempt to optimize insert clauses of the query.
* Returns true if rewriting is successful, false otherwise.
*/
private void handleMultiDestQuery(ASTNode ast, PreCboCtx cboCtx) throws SemanticException {
// Not supported by CBO
if (!runCBO) {
return;
}
// Currently, we only optimized the query the content of the FROM clause
// for multi-insert queries. Thus, nodeOfInterest is the FROM clause
if (isJoinToken(cboCtx.nodeOfInterest)) {
// Join clause: rewriting is needed
ASTNode subq = rewriteASTForMultiInsert(ast, cboCtx.nodeOfInterest);
if (subq != null) {
// We could rewrite into a subquery
cboCtx.nodeOfInterest = (ASTNode) subq.getChild(0);
QB newQB = new QB(null, "", false);
Phase1Ctx ctx_1 = initPhase1Ctx();
doPhase1(cboCtx.nodeOfInterest, newQB, ctx_1, null);
setQB(newQB);
getMetaData(getQB());
} else {
runCBO = false;
}
} else if (cboCtx.nodeOfInterest.getToken().getType() == HiveParser.TOK_SUBQUERY) {
// Subquery: no rewriting needed
ASTNode subq = cboCtx.nodeOfInterest;
// First child is subquery, second child is alias
// We set the node of interest and QB to the subquery
// We do not need to generate the QB again, but rather we use it directly
cboCtx.nodeOfInterest = (ASTNode) subq.getChild(0);
String subQAlias = unescapeIdentifier(subq.getChild(1).getText());
final QB newQB = getQB().getSubqForAlias(subQAlias).getQB();
newQB.getParseInfo().setAlias("");
newQB.getParseInfo().setIsSubQ(false);
setQB(newQB);
} else {
// No need to run CBO (table ref or virtual table) or not supported
runCBO = false;
}
}
private ASTNode rewriteASTForMultiInsert(ASTNode query, ASTNode nodeOfInterest) {
// 1. gather references from original query
// This is a map from aliases to references.
// We keep all references as we will need to modify them after creating
// the subquery
final Multimap<String, Object> aliasNodes = ArrayListMultimap.create();
// To know if we need to bail out
final AtomicBoolean notSupported = new AtomicBoolean(false);
TreeVisitorAction action = new TreeVisitorAction() {
@Override
public Object pre(Object t) {
if (!notSupported.get()) {
if (ParseDriver.adaptor.getType(t) == HiveParser.TOK_ALLCOLREF) {
// TODO: this is a limitation of the AST rewriting approach that we will
// not be able to overcome till proper integration of full multi-insert
// queries with Calcite is implemented.
// The current rewriting gather references from insert clauses and then
// updates them with the new subquery references. However, if insert
// clauses use * or tab.*, we cannot resolve the columns that we are
// referring to. Thus, we just bail out and those queries will not be
// currently optimized by Calcite.
// An example of such query is:
// FROM T_A a LEFT JOIN T_B b ON a.id = b.id
// INSERT OVERWRITE TABLE join_result_1
// SELECT a.*, b.*
// INSERT OVERWRITE TABLE join_result_3
// SELECT a.*, b.*;
notSupported.set(true);
} else if (ParseDriver.adaptor.getType(t) == HiveParser.DOT) {
Object c = ParseDriver.adaptor.getChild(t, 0);
if (c != null && ParseDriver.adaptor.getType(c) == HiveParser.TOK_TABLE_OR_COL) {
aliasNodes.put(((ASTNode) t).toStringTree(), t);
}
} else if (ParseDriver.adaptor.getType(t) == HiveParser.TOK_TABLE_OR_COL) {
Object p = ParseDriver.adaptor.getParent(t);
if (p == null || ParseDriver.adaptor.getType(p) != HiveParser.DOT) {
aliasNodes.put(((ASTNode) t).toStringTree(), t);
}
}
}
return t;
}
@Override
public Object post(Object t) {
return t;
}
};
TreeVisitor tv = new TreeVisitor(ParseDriver.adaptor);
// We will iterate through the children: if it is an INSERT, we will traverse
// the subtree to gather the references
for (int i = 0; i < query.getChildCount(); i++) {
ASTNode child = (ASTNode) query.getChild(i);
if (ParseDriver.adaptor.getType(child) != HiveParser.TOK_INSERT) {
// If it is not an INSERT, we do not need to anything
continue;
}
tv.visit(child, action);
}
if (notSupported.get()) {
// Bail out
return null;
}
// 2. rewrite into query
// TOK_QUERY
// TOK_FROM
// join
// TOK_INSERT
// TOK_DESTINATION
// TOK_DIR
// TOK_TMP_FILE
// TOK_SELECT
// refs
ASTNode from = new ASTNode(FROM_TOKEN);
from.addChild((ASTNode) ParseDriver.adaptor.dupTree(nodeOfInterest));
ASTNode destination = new ASTNode(DEST_TOKEN);
ASTNode dir = new ASTNode(DIR_TOKEN);
ASTNode tmpFile = new ASTNode(TMPFILE_TOKEN);
dir.addChild(tmpFile);
destination.addChild(dir);
ASTNode select = new ASTNode(SELECT_TOKEN);
int num = 0;
for (Collection<Object> selectIdentifier : aliasNodes.asMap().values()) {
Iterator<Object> it = selectIdentifier.iterator();
ASTNode node = (ASTNode) it.next();
// Add select expression
ASTNode selectExpr = new ASTNode(SELEXPR_TOKEN);
selectExpr.addChild((ASTNode) ParseDriver.adaptor.dupTree(node)); // Identifier
String colAlias = "col" + num;
selectExpr.addChild(new ASTNode(new CommonToken(HiveParser.Identifier, colAlias))); // Alias
select.addChild(selectExpr);
// Rewrite all INSERT references (all the node values for this key)
ASTNode colExpr = new ASTNode(TABLEORCOL_TOKEN);
colExpr.addChild(new ASTNode(new CommonToken(HiveParser.Identifier, colAlias)));
replaceASTChild(node, colExpr);
while (it.hasNext()) {
// Loop to rewrite rest of INSERT references
node = (ASTNode) it.next();
colExpr = new ASTNode(TABLEORCOL_TOKEN);
colExpr.addChild(new ASTNode(new CommonToken(HiveParser.Identifier, colAlias)));
replaceASTChild(node, colExpr);
}
num++;
}
ASTNode insert = new ASTNode(INSERT_TOKEN);
insert.addChild(destination);
insert.addChild(select);
ASTNode newQuery = new ASTNode(QUERY_TOKEN);
newQuery.addChild(from);
newQuery.addChild(insert);
// 3. create subquery
ASTNode subq = new ASTNode(SUBQUERY_TOKEN);
subq.addChild(newQuery);
subq.addChild(new ASTNode(new CommonToken(HiveParser.Identifier, "subq")));
replaceASTChild(nodeOfInterest, subq);
// 4. return subquery
return subq;
}
/**
* Can CBO handle the given AST?
*
* @param ast
* Top level AST
* @param qb
* top level QB corresponding to the AST
* @param cboCtx
* @return boolean
*
* Assumption:<br>
* If top level QB is query then everything below it must also be
* Query.
*/
Pair<Boolean, String> canCBOHandleAst(ASTNode ast, QB qb, PreCboCtx cboCtx) {
int root = ast.getToken().getType();
boolean needToLogMessage = STATIC_LOG.isInfoEnabled();
boolean isSupportedRoot = root == HiveParser.TOK_QUERY || root == HiveParser.TOK_EXPLAIN
|| qb.isCTAS() || qb.isMaterializedView();
// Queries without a source table currently are not supported by CBO
boolean isSupportedType = (qb.getIsQuery())
|| qb.isCTAS() || qb.isMaterializedView() || cboCtx.type == PreCboCtx.Type.INSERT
|| cboCtx.type == PreCboCtx.Type.MULTI_INSERT;
boolean noBadTokens = HiveCalciteUtil.validateASTForUnsupportedTokens(ast);
boolean result = isSupportedRoot && isSupportedType && noBadTokens;
String msg = "";
if (!result) {
if (!isSupportedRoot) {
msg += "doesn't have QUERY or EXPLAIN as root and not a CTAS; ";
}
if (!isSupportedType) {
msg += "is not a query with at least one source table "
+ " or there is a subquery without a source table, or CTAS, or insert; ";
}
if (!noBadTokens) {
msg += "has unsupported tokens; ";
}
if (msg.isEmpty()) {
msg += "has some unspecified limitations; ";
}
msg = msg.substring(0, msg.length() - 2);
if (needToLogMessage) {
STATIC_LOG.info("Not invoking CBO because the statement " + msg);
}
return Pair.of(false, msg);
}
// Now check QB in more detail. canHandleQbForCbo returns null if query can
// be handled.
msg = CalcitePlanner.canHandleQbForCbo(queryProperties, conf, true);
if (msg == null) {
return Pair.of(true, msg);
}
if (needToLogMessage) {
STATIC_LOG.info("Not invoking CBO because the statement " + msg);
}
return Pair.of(false, msg);
}
/**
* Checks whether Calcite can handle the query.
*
* @param queryProperties
* @param conf
* @param topLevelQB
* Does QB corresponds to top most query block?
* @return null if the query can be handled; non-null reason string if it
* cannot be.
*
* Assumption:<br>
* 1. If top level QB is query then everything below it must also be
* Query<br>
* 2. Nested Subquery will return false for qbToChk.getIsQuery()
*/
private static String canHandleQbForCbo(QueryProperties queryProperties,
HiveConf conf, boolean topLevelQB) {
List<String> reasons = new ArrayList<>();
// Not ok to run CBO, build error message.
if (queryProperties.hasClusterBy()) {
reasons.add("has cluster by");
}
if (queryProperties.hasDistributeBy()) {
reasons.add("has distribute by");
}
if (queryProperties.hasSortBy() && queryProperties.hasLimit()) {
reasons.add("has sort by with limit");
}
if (queryProperties.hasPTF()) {
reasons.add("has PTF");
}
if (queryProperties.usesScript()) {
reasons.add("uses scripts");
}
if (!queryProperties.isCBOSupportedLateralViews()) {
reasons.add("has lateral views");
}
return reasons.isEmpty() ? null : String.join("; ", reasons);
}
/* This method inserts the right profiles into profiles CBO depending
* on the query characteristics. */
private static EnumSet<ExtendedCBOProfile> obtainCBOProfiles(QueryProperties queryProperties) {
EnumSet<ExtendedCBOProfile> profilesCBO = EnumSet.noneOf(ExtendedCBOProfile.class);
// If the query contains more than one join
if (queryProperties.getJoinCount() > 1) {
profilesCBO.add(ExtendedCBOProfile.JOIN_REORDERING);
}
// If the query contains windowing processing
if (queryProperties.hasWindowing()) {
profilesCBO.add(ExtendedCBOProfile.WINDOWING_POSTPROCESSING);
}
return profilesCBO;
}
@Override
boolean isCBOExecuted() {
return runCBO;
}
@Override
boolean isCBOSupportedLateralView(ASTNode lateralView) {
// Lateral view AST has the following shape:
// ^(TOK_LATERAL_VIEW
// ^(TOK_SELECT ^(TOK_SELEXPR ^(TOK_FUNCTION Identifier params) identifier* tableAlias)))
if (lateralView.getToken().getType() == HiveParser.TOK_LATERAL_VIEW_OUTER) {
// LATERAL VIEW OUTER not supported in CBO
return false;
}
// Only INLINE followed by ARRAY supported in CBO
ASTNode lvFunc = (ASTNode) lateralView.getChild(0).getChild(0).getChild(0);
String lvFuncName = lvFunc.getChild(0).getText();
if (lvFuncName.compareToIgnoreCase(
GenericUDTFInline.class.getAnnotation(Description.class).name()) != 0) {
return false;
}
if (lvFunc.getChildCount() != 2) {
return false;
}
ASTNode innerFunc = (ASTNode) lvFunc.getChild(1);
if (innerFunc.getToken().getType() != HiveParser.TOK_FUNCTION ||
innerFunc.getChild(0).getText().compareToIgnoreCase(
GenericUDFArray.class.getAnnotation(Description.class).name()) != 0) {
return false;
}
return true;
}
@Override
boolean continueJoinMerge() {
return !(runCBO && disableSemJoinReordering);
}
@Override
Table materializeCTE(String cteName, CTEClause cte) throws HiveException {
ASTNode createTable = new ASTNode(new ClassicToken(HiveParser.TOK_CREATETABLE));
ASTNode tableName = new ASTNode(new ClassicToken(HiveParser.TOK_TABNAME));
tableName.addChild(new ASTNode(new ClassicToken(HiveParser.Identifier, cteName)));
ASTNode temporary = new ASTNode(new ClassicToken(HiveParser.KW_TEMPORARY, MATERIALIZATION_MARKER));
createTable.addChild(tableName);
createTable.addChild(temporary);
createTable.addChild(cte.cteNode);
CalcitePlanner analyzer = new CalcitePlanner(queryState);
analyzer.initCtx(ctx);
analyzer.init(false);
// should share cte contexts
analyzer.aliasToCTEs.putAll(aliasToCTEs);
HiveOperation operation = queryState.getHiveOperation();
try {
analyzer.analyzeInternal(createTable);
} finally {
queryState.setCommandType(operation);
}
Table table = analyzer.tableDesc.toTable(conf);
Path location = table.getDataLocation();
try {
location.getFileSystem(conf).mkdirs(location);
} catch (IOException e) {
throw new HiveException(e);
}
table.setMaterializedTable(true);
LOG.info(cteName + " will be materialized into " + location);
cte.source = analyzer;
ctx.addMaterializedTable(cteName, table);
// For CalcitePlanner, store qualified name too
ctx.addMaterializedTable(table.getFullyQualifiedName(), table);
return table;
}
@Override
String fixCtasColumnName(String colName) {
if (runCBO) {
int lastDot = colName.lastIndexOf('.');
if (lastDot < 0)
{
return colName; // alias is not fully qualified
}
String nqColumnName = colName.substring(lastDot + 1);
STATIC_LOG.debug("Replacing " + colName + " (produced by CBO) by " + nqColumnName);
return nqColumnName;
}
return super.fixCtasColumnName(colName);
}
/**
* The context that doPhase1 uses to populate information pertaining to CBO
* (currently, this is used for CTAS and insert-as-select).
*/
protected static class PreCboCtx extends PlannerContext {
enum Type {
NONE, INSERT, MULTI_INSERT, CTAS, VIEW, UNEXPECTED
}
private ASTNode nodeOfInterest;
private Type type = Type.NONE;
private void set(Type type, ASTNode ast) {
if (this.type != Type.NONE) {
STATIC_LOG.warn("Setting " + type + " when already " + this.type + "; node " + ast.dump()
+ " vs old node " + nodeOfInterest.dump());
this.type = Type.UNEXPECTED;
return;
}
this.type = type;
this.nodeOfInterest = ast;
}
@Override
void setCTASToken(ASTNode child) {
set(PreCboCtx.Type.CTAS, child);
}
@Override
void setViewToken(ASTNode child) {
set(PreCboCtx.Type.VIEW, child);
}
@Override
void setInsertToken(ASTNode ast, boolean isTmpFileDest) {
if (!isTmpFileDest) {
set(PreCboCtx.Type.INSERT, ast);
}
}
@Override
void setMultiInsertToken(ASTNode child) {
set(PreCboCtx.Type.MULTI_INSERT, child);
}
@Override
void resetToken() {
this.type = Type.NONE;
this.nodeOfInterest = null;
}
}
protected ASTNode fixUpAfterCbo(ASTNode originalAst, ASTNode newAst, PreCboCtx cboCtx)
throws SemanticException {
switch (cboCtx.type) {
case NONE:
// nothing to do
return newAst;
case CTAS:
case VIEW: {
// Patch the optimized query back into original CTAS AST, replacing the
// original query.
replaceASTChild(cboCtx.nodeOfInterest, newAst);
return originalAst;
}
case INSERT: {
// We need to patch the dest back to original into new query.
// This makes assumptions about the structure of the AST.
ASTNode newDest = new ASTSearcher().simpleBreadthFirstSearch(newAst, HiveParser.TOK_QUERY,
HiveParser.TOK_INSERT, HiveParser.TOK_DESTINATION);
if (newDest == null) {
LOG.error("Cannot find destination after CBO; new ast is " + newAst.dump());
throw new SemanticException("Cannot find destination after CBO");
}
replaceASTChild(newDest, cboCtx.nodeOfInterest);
return newAst;
}
case MULTI_INSERT: {
// Patch the optimized query back into original FROM clause.
replaceASTChild(cboCtx.nodeOfInterest, newAst);
return originalAst;
}
default:
throw new AssertionError("Unexpected type " + cboCtx.type);
}
}
ASTNode reAnalyzeCTASAfterCbo(ASTNode newAst) throws SemanticException {
// analyzeCreateTable uses this.ast, but doPhase1 doesn't, so only reset it
// here.
newAst = analyzeCreateTable(newAst, getQB(), null);
if (newAst == null) {
LOG.error("analyzeCreateTable failed to initialize CTAS after CBO;" + " new ast is "
+ getAST().dump());
throw new SemanticException("analyzeCreateTable failed to initialize CTAS after CBO");
}
return newAst;
}
ASTNode reAnalyzeViewAfterCbo(ASTNode newAst) throws SemanticException {
// analyzeCreateView uses this.ast, but doPhase1 doesn't, so only reset it
// here.
newAst = analyzeCreateView(newAst, getQB(), null);
if (newAst == null) {
LOG.error("analyzeCreateTable failed to initialize materialized view after CBO;" + " new ast is "
+ getAST().dump());
throw new SemanticException("analyzeCreateTable failed to initialize materialized view after CBO");
}
return newAst;
}
public static class ASTSearcher {
private final LinkedList<ASTNode> searchQueue = new LinkedList<ASTNode>();
public ASTNode simpleBreadthFirstSearch(ASTNode ast, Collection<Integer> tokens) {
int[] tokenArray = new int[tokens.size()];
int i = 0;
for (Integer token : tokens) {
tokenArray[i] = token;
++i;
}
return simpleBreadthFirstSearch(ast, tokenArray);
}
/**
* Performs breadth-first search of the AST for a nested set of tokens. Tokens
* don't have to be each others' direct children, they can be separated by
* layers of other tokens. For each token in the list, the first one found is
* matched and there's no backtracking; thus, if AST has multiple instances of
* some token, of which only one matches, it is not guaranteed to be found. We
* use this for simple things. Not thread-safe - reuses searchQueue.
*/
public ASTNode simpleBreadthFirstSearch(ASTNode ast, int... tokens) {
searchQueue.clear();
searchQueue.add(ast);
for (int i = 0; i < tokens.length; ++i) {
boolean found = false;
int token = tokens[i];
while (!searchQueue.isEmpty() && !found) {
ASTNode next = searchQueue.poll();
found = next.getType() == token;
if (found) {
if (i == tokens.length - 1) {
return next;
}
searchQueue.clear();
}
for (int j = 0; j < next.getChildCount(); ++j) {
searchQueue.add((ASTNode) next.getChild(j));
}
}
if (!found) {
return null;
}
}
return null;
}
public ASTNode depthFirstSearch(ASTNode ast, int token) {
searchQueue.clear();
searchQueue.add(ast);
while (!searchQueue.isEmpty()) {
ASTNode next = searchQueue.poll();
if (next.getType() == token) {
return next;
}
for (int j = 0; j < next.getChildCount(); ++j) {
searchQueue.add((ASTNode) next.getChild(j));
}
}
return null;
}
public ASTNode simpleBreadthFirstSearchAny(ASTNode ast, int... tokens) {
searchQueue.clear();
searchQueue.add(ast);
while (!searchQueue.isEmpty()) {
ASTNode next = searchQueue.poll();
for (int i = 0; i < tokens.length; ++i) {
if (next.getType() == tokens[i]) {
return next;
}
}
for (int i = 0; i < next.getChildCount(); ++i) {
searchQueue.add((ASTNode) next.getChild(i));
}
}
return null;
}
public void reset() {
searchQueue.clear();
}
}
private static void replaceASTChild(ASTNode child, ASTNode newChild) {
ASTNode parent = (ASTNode) child.parent;
int childIndex = child.childIndex;
parent.deleteChild(childIndex);
parent.insertChild(childIndex, newChild);
}
/**
* Get optimized logical plan for the given QB tree in the semAnalyzer.
*
* @return
* @throws SemanticException
*/
RelNode logicalPlan() throws SemanticException {
RelNode optimizedOptiqPlan = null;
Frameworks.PlannerAction<RelNode> calcitePlannerAction = null;
if (this.columnAccessInfo == null) {
this.columnAccessInfo = new ColumnAccessInfo();
}
calcitePlannerAction = createPlannerAction(prunedPartitions, ctx.getStatsSource(), this.columnAccessInfo);
try {
optimizedOptiqPlan = Frameworks.withPlanner(calcitePlannerAction, Frameworks
.newConfigBuilder().typeSystem(new HiveTypeSystemImpl()).build());
} catch (Exception e) {
rethrowCalciteException(e);
throw new AssertionError("rethrowCalciteException didn't throw for " + e.getMessage());
}
return optimizedOptiqPlan;
}
protected Frameworks.PlannerAction<RelNode> createPlannerAction(
Map<String, PrunedPartitionList> partitionCache,
StatsSource statsSource,
ColumnAccessInfo columnAccessInfo) {
return new CalcitePlannerAction(partitionCache, statsSource, columnAccessInfo, getQB());
}
/**
* Get SQL rewrite for a Calcite logical plan
*
* @return Optimized SQL text (or null, if failed)
*/
public String getOptimizedSql(RelNode optimizedOptiqPlan) {
boolean nullsLast = HiveConf.getBoolVar(conf, ConfVars.HIVE_DEFAULT_NULLS_LAST);
NullCollation nullCollation = nullsLast ? NullCollation.LAST : NullCollation.LOW;
SqlDialect dialect = new HiveSqlDialect(SqlDialect.EMPTY_CONTEXT
.withDatabaseProduct(SqlDialect.DatabaseProduct.HIVE)
.withDatabaseMajorVersion(4) // TODO: should not be hardcoded
.withDatabaseMinorVersion(0)
.withIdentifierQuoteString("`")
.withDataTypeSystem(new HiveTypeSystemImpl())
.withNullCollation(nullCollation)) {
@Override
protected boolean allowsAs() {
return true;
}
@Override
public boolean supportsCharSet() {
return false;
}
};
try {
final JdbcImplementor jdbcImplementor =
new JdbcImplementor(dialect, (JavaTypeFactory) optimizedOptiqPlan.getCluster()
.getTypeFactory());
final JdbcImplementor.Result result = jdbcImplementor.visitRoot(optimizedOptiqPlan);
String sql = result.asStatement().toSqlString(dialect).getSql();
sql = PATTERN_VARCHAR.matcher(sql).replaceAll("STRING"); // VARCHAR(INTEGER.MAX) -> STRING
sql = PATTERN_TIMESTAMP.matcher(sql).replaceAll("TIMESTAMP"); // TIMESTAMP(9) -> TIMESTAMP
return sql;
} catch (Error | Exception e) {
// We play it safe here. If we get an error or exception,
// we will simply not print the optimized SQL.
LOG.warn("Rel2SQL Rewrite threw error", e);
}
return null;
}
/**
* Get Optimized AST for the given QB tree in the semAnalyzer.
*
* @return Optimized operator tree translated in to Hive AST
* @throws SemanticException
*/
ASTNode getOptimizedAST() throws SemanticException {
return getOptimizedAST(logicalPlan());
}
/**
* Get Optimized AST for the given QB tree in the semAnalyzer.
*
* @return Optimized operator tree translated in to Hive AST
* @throws SemanticException
*/
ASTNode getOptimizedAST(RelNode optimizedOptiqPlan) throws SemanticException {
ASTNode optiqOptimizedAST = ASTConverter.convert(optimizedOptiqPlan, resultSchema,
HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_COLUMN_ALIGNMENT),ctx.getPlanMapper());
return optiqOptimizedAST;
}
/**
* Get Optimized Hive Operator DAG for the given QB tree in the semAnalyzer.
*
* @return Optimized Hive operator tree
* @throws SemanticException
*/
Operator getOptimizedHiveOPDag(RelNode optimizedOptiqPlan) throws SemanticException {
RelNode modifiedOptimizedOptiqPlan = PlanModifierForReturnPath.convertOpTree(
optimizedOptiqPlan, resultSchema, this.getQB().getTableDesc() != null);
LOG.debug("Translating the following plan:\n" + RelOptUtil.toString(modifiedOptimizedOptiqPlan));
Operator<?> hiveRoot = new HiveOpConverter(this, conf, unparseTranslator, topOps)
.convert(modifiedOptimizedOptiqPlan);
RowResolver hiveRootRR = genRowResolver(hiveRoot, getQB());
opParseCtx.put(hiveRoot, new OpParseContext(hiveRootRR));
String dest = getQB().getParseInfo().getClauseNames().iterator().next();
if (isInsertInto(getQB().getParseInfo(), dest)) {
Operator<?> selOp = handleInsertStatement(dest, hiveRoot, hiveRootRR, getQB());
return genFileSinkPlan(dest, getQB(), selOp);
} else {
return genFileSinkPlan(dest, getQB(), hiveRoot);
}
}
// This function serves as the wrapper of handleInsertStatementSpec in
// SemanticAnalyzer
Operator<?> handleInsertStatement(String dest, Operator<?> input, RowResolver inputRR, QB qb)
throws SemanticException {
List<ExprNodeDesc> colList = new ArrayList<ExprNodeDesc>();
List<ColumnInfo> columns = inputRR.getColumnInfos();
for (int i = 0; i < columns.size(); i++) {
ColumnInfo col = columns.get(i);
colList.add(new ExprNodeColumnDesc(col));
}
ASTNode selExprList = qb.getParseInfo().getSelForClause(dest);
RowResolver rowResolver = createRowResolver(columns);
rowResolver = handleInsertStatementSpec(colList, dest, rowResolver, qb, selExprList);
List<String> columnNames = new ArrayList<String>();
Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>();
for (int i = 0; i < colList.size(); i++) {
String outputCol = getColumnInternalName(i);
colExprMap.put(outputCol, colList.get(i));
columnNames.add(outputCol);
}
Operator<?> output = putOpInsertMap(OperatorFactory.getAndMakeChild(new SelectDesc(colList,
columnNames), new RowSchema(rowResolver.getColumnInfos()), input), rowResolver);
output.setColumnExprMap(colExprMap);
return output;
}
private RowResolver createRowResolver(List<ColumnInfo> columnInfos) {
RowResolver rowResolver = new RowResolver();
int pos = 0;
for (ColumnInfo columnInfo : columnInfos) {
ColumnInfo newColumnInfo = new ColumnInfo(columnInfo);
newColumnInfo.setInternalName(HiveConf.getColumnInternalName(pos++));
rowResolver.put(newColumnInfo.getTabAlias(), newColumnInfo.getAlias(), newColumnInfo);
}
return rowResolver;
}
/***
* Unwraps Calcite Invocation exceptions coming meta data provider chain and
* obtains the real cause.
*
* @param e
*/
private void rethrowCalciteException(Exception e) throws SemanticException {
Throwable first = (semanticException != null) ? semanticException : e, current = first, cause = current
.getCause();
while (cause != null) {
Throwable causeOfCause = cause.getCause();
if (current == first && causeOfCause == null && isUselessCause(first)) {
// "cause" is a root cause, and "e"/"first" is a useless
// exception it's wrapped in.
first = cause;
break;
} else if (causeOfCause != null && isUselessCause(cause)
&& ExceptionHelper.resetCause(current, causeOfCause)) {
// "cause" was a useless intermediate cause and was replace it
// with its own cause.
cause = causeOfCause;
continue; // do loop once again with the new cause of "current"
}
current = cause;
cause = current.getCause();
}
if (first instanceof RuntimeException) {
throw (RuntimeException) first;
} else if (first instanceof SemanticException) {
throw (SemanticException) first;
}
throw new RuntimeException(first);
}
private static class ExceptionHelper {
private static final Field CAUSE_FIELD = getField(Throwable.class, "cause"),
TARGET_FIELD = getField(InvocationTargetException.class, "target"),
MESSAGE_FIELD = getField(Throwable.class, "detailMessage");
private static Field getField(Class<?> clazz, String name) {
try {
Field f = clazz.getDeclaredField(name);
f.setAccessible(true);
return f;
} catch (Throwable t) {
return null;
}
}
public static boolean resetCause(Throwable target, Throwable newCause) {
try {
if (MESSAGE_FIELD == null) {
return false;
}
Field field = (target instanceof InvocationTargetException) ? TARGET_FIELD : CAUSE_FIELD;
if (field == null) {
return false;
}
Throwable oldCause = target.getCause();
String oldMsg = target.getMessage();
field.set(target, newCause);
if (oldMsg != null && oldMsg.equals(oldCause.toString())) {
MESSAGE_FIELD.set(target, newCause == null ? null : newCause.toString());
}
} catch (Throwable se) {
return false;
}
return true;
}
}
private boolean isUselessCause(Throwable t) {
return t instanceof RuntimeException || t instanceof InvocationTargetException;
}
private RowResolver genRowResolver(Operator op, QB qb) {
RowResolver rr = new RowResolver();
String subqAlias = (qb.getAliases().size() == 1 && qb.getSubqAliases().size() == 1) ? qb
.getAliases().get(0) : null;
for (ColumnInfo ci : op.getSchema().getSignature()) {
try {
rr.putWithCheck((subqAlias != null) ? subqAlias : ci.getTabAlias(),
ci.getAlias() != null ? ci.getAlias() : ci.getInternalName(), ci.getInternalName(),
new ColumnInfo(ci));
} catch (SemanticException e) {
throw new RuntimeException(e);
}
}
return rr;
}
private enum ExtendedCBOProfile {
JOIN_REORDERING,
WINDOWING_POSTPROCESSING,
REFERENTIAL_CONSTRAINTS;
}
/**
* Code responsible for Calcite plan generation and optimization.
*/
public class CalcitePlannerAction implements Frameworks.PlannerAction<RelNode> {
private RelOptCluster cluster;
private RelOptSchema relOptSchema;
private FunctionHelper functionHelper;
private final Map<String, PrunedPartitionList> partitionCache;
private final Map<String, ColumnStatsList> colStatsCache;
private final ColumnAccessInfo columnAccessInfo;
private Map<HiveProject, Table> viewProjectToTableSchema;
private final QB rootQB;
// correlated vars across subqueries within same query needs to have different ID
private int subqueryId;
// this is to keep track if a subquery is correlated and contains aggregate
// since this is special cased when it is rewritten in SubqueryRemoveRule
Set<RelNode> corrScalarRexSQWithAgg = new HashSet<RelNode>();
// TODO: Do we need to keep track of RR, ColNameToPosMap for every op or
// just last one.
LinkedHashMap<RelNode, RowResolver> relToHiveRR = new LinkedHashMap<RelNode, RowResolver>();
LinkedHashMap<RelNode, ImmutableMap<String, Integer>> relToHiveColNameCalcitePosMap = new LinkedHashMap<RelNode, ImmutableMap<String, Integer>>();
private final StatsSource statsSource;
private RelNode dummyTableScan;
Map<List<String>, JdbcConvention> jdbcConventionMap = new HashMap<>();
Map<List<String>, JdbcSchema> schemaMap = new HashMap<>();
Map<RelNode, ASTNode> subQueryMap = new HashMap<>();
protected CalcitePlannerAction(
Map<String, PrunedPartitionList> partitionCache,
StatsSource statsSource,
ColumnAccessInfo columnAccessInfo, QB rootQB) {
this.partitionCache = partitionCache;
this.statsSource = statsSource;
this.rootQB = rootQB;
this.colStatsCache = ctx.getOpContext().getColStatsCache();
this.columnAccessInfo = columnAccessInfo;
}
@Override
public RelNode apply(RelOptCluster cluster, RelOptSchema relOptSchema, SchemaPlus rootSchema) {
RelNode calcitePlan;
subqueryId = -1;
/*
* recreate cluster, so that it picks up the additional traitDef
*/
RelOptPlanner planner = createPlanner(conf, corrScalarRexSQWithAgg, statsSource, ctx.isExplainPlan());
final RexBuilder rexBuilder = cluster.getRexBuilder();
final RelOptCluster optCluster = RelOptCluster.create(planner, rexBuilder);
this.cluster = optCluster;
this.relOptSchema = relOptSchema;
this.functionHelper = new HiveFunctionHelper(rexBuilder);
PerfLogger perfLogger = SessionState.getPerfLogger();
// 1. Gen Calcite Plan
perfLogger.perfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER);
try {
calcitePlan = genLogicalPlan(getQB(), true, null, null);
// freeze the names in the hash map for objects that are only interested
// in the parsed tables in the original query.
tabNameToTabObject.markParsingCompleted();
// if it is to create view, we do not use table alias
resultSchema = convertRowSchemaToResultSetSchema(relToHiveRR.get(calcitePlan),
(forViewCreation || getQB().isMaterializedView()) ? false : HiveConf.getBoolVar(conf,
HiveConf.ConfVars.HIVE_RESULTSET_USE_UNIQUE_COLUMN_NAMES));
} catch (SemanticException e) {
semanticException = e;
throw new RuntimeException(e);
}
perfLogger.perfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER, "Calcite: Plan generation");
if (LOG.isDebugEnabled()) {
LOG.debug("Initial CBO Plan:\n" + RelOptUtil.toString(calcitePlan));
}
// Create executor
RexExecutor executorProvider = new HiveRexExecutorImpl();
calcitePlan.getCluster().getPlanner().setExecutor(executorProvider);
// Create and set MD provider
HiveDefaultRelMetadataProvider mdProvider = new HiveDefaultRelMetadataProvider(conf, HIVE_REL_NODE_CLASSES);
RelMetadataQuery.THREAD_PROVIDERS.set(JaninoRelMetadataProvider.of(mdProvider.getMetadataProvider()));
optCluster.invalidateMetadataQuery();
calcitePlan = applyMaterializedViewRewritingByText(
ast, calcitePlan, optCluster, mdProvider.getMetadataProvider());
// We need to get the ColumnAccessInfo and viewToTableSchema for views.
HiveRelFieldTrimmer.get()
.trim(HiveRelFactories.HIVE_BUILDER.create(optCluster, null),
calcitePlan, this.columnAccessInfo, this.viewProjectToTableSchema);
//Remove subquery
if (LOG.isDebugEnabled()) {
LOG.debug("Plan before removing subquery:\n" + RelOptUtil.toString(calcitePlan));
}
calcitePlan = removeSubqueries(calcitePlan, mdProvider.getMetadataProvider());
if (LOG.isDebugEnabled()) {
LOG.debug("Plan after removing subquery:\n" + RelOptUtil.toString(calcitePlan));
}
calcitePlan = HiveRelDecorrelator.decorrelateQuery(calcitePlan);
if (LOG.isDebugEnabled()) {
LOG.debug("Plan after decorrelation:\n" + RelOptUtil.toString(calcitePlan));
}
// Validate query materialization for query results caching. This check needs
// to occur before constant folding, which may remove some function calls
// from the query plan.
// In addition, if it is a materialized view creation and we are enabling it
// for rewriting, it should pass all checks done for query results caching
// and on top of that we should check that it only contains operators that
// are supported by the rewriting algorithm.
HiveRelOptMaterializationValidator materializationValidator = new HiveRelOptMaterializationValidator();
materializationValidator.validate(calcitePlan);
setInvalidResultCacheReason(
materializationValidator.getResultCacheInvalidReason());
setInvalidAutomaticRewritingMaterializationReason(
materializationValidator.getAutomaticRewritingInvalidReason());
// 2. Apply pre-join order optimizations
calcitePlan = applyPreJoinOrderingTransforms(calcitePlan, mdProvider.getMetadataProvider(), executorProvider);
if (LOG.isDebugEnabled()) {
LOG.debug("Plan after pre-join transformations:\n" + RelOptUtil.toString(calcitePlan));
}
// 3. Materialized view based rewriting
// We disable it for CTAS and MV creation queries (trying to avoid any problem
// due to data freshness)
if (conf.getBoolVar(ConfVars.HIVE_MATERIALIZED_VIEW_ENABLE_AUTO_REWRITING) &&
!getQB().isMaterializedView() && !ctx.isLoadingMaterializedView() && !getQB().isCTAS() &&
getQB().hasTableDefined() &&
!forViewCreation) {
calcitePlan =
applyMaterializedViewRewriting(planner, calcitePlan, mdProvider.getMetadataProvider(), executorProvider);
if (LOG.isDebugEnabled()) {
LOG.debug("Plan after view-based rewriting:\n" + RelOptUtil.toString(calcitePlan));
}
}
// 4. Apply join order optimizations: reordering MST algorithm
// If join optimizations failed because of missing stats, we continue with
// the rest of optimizations
if (profilesCBO.contains(ExtendedCBOProfile.JOIN_REORDERING)) {
calcitePlan = applyJoinOrderingTransform(calcitePlan, mdProvider.getMetadataProvider(), executorProvider);
if (LOG.isDebugEnabled()) {
LOG.debug("Plan after join transformations:\n" + RelOptUtil.toString(calcitePlan));
}
} else {
disableSemJoinReordering = false;
}
// 5. Apply post-join order optimizations
calcitePlan = applyPostJoinOrderingTransform(calcitePlan, mdProvider.getMetadataProvider(), executorProvider);
if (conf.getBoolVar(HiveConf.ConfVars.HIVE_OPTIMIZE_SORT_PREDS_WITH_STATS)) {
calcitePlan = calcitePlan.accept(new HiveFilterSortPredicates(noColsMissingStats));
}
if (LOG.isDebugEnabled()) {
LOG.debug("Plan after post-join transformations:\n" + RelOptUtil.toString(calcitePlan));
}
return calcitePlan;
}
/**
* Perform all optimizations before Join Ordering.
*
* @param basePlan
* original plan
* @param mdProvider
* meta data provider
* @param executorProvider
* executor
* @return
*/
protected RelNode applyPreJoinOrderingTransforms(RelNode basePlan, RelMetadataProvider mdProvider, RexExecutor executorProvider) {
// TODO: Decorelation of subquery should be done before attempting
// Partition Pruning; otherwise Expression evaluation may try to execute
// corelated sub query.
PerfLogger perfLogger = SessionState.getPerfLogger();
final int maxCNFNodeCount = conf.getIntVar(HiveConf.ConfVars.HIVE_CBO_CNF_NODES_LIMIT);
final int minNumORClauses = conf.getIntVar(HiveConf.ConfVars.HIVEPOINTLOOKUPOPTIMIZERMIN);
final boolean allowDisjunctivePredicates = conf.getBoolVar(ConfVars.HIVE_JOIN_DISJ_TRANSITIVE_PREDICATES_PUSHDOWN);
final HepProgramBuilder program = new HepProgramBuilder();
//0. SetOp rewrite
generatePartialProgram(program, true, HepMatchOrder.BOTTOM_UP,
HiveProjectOverIntersectRemoveRule.INSTANCE, HiveIntersectMergeRule.INSTANCE,
HiveUnionMergeRule.INSTANCE);
generatePartialProgram(program, false, HepMatchOrder.DEPTH_FIRST,
HiveIntersectRewriteRule.INSTANCE);
generatePartialProgram(program, false, HepMatchOrder.DEPTH_FIRST,
HiveExceptRewriteRule.INSTANCE);
//1. Distinct aggregate rewrite
if (!isMaterializedViewMaintenance() && conf.getBoolVar(ConfVars.HIVE_OPTIMIZE_BI_ENABLED)) {
// Rewrite to datasketches if enabled
if (conf.getBoolVar(ConfVars.HIVE_OPTIMIZE_BI_REWRITE_COUNTDISTINCT_ENABLED)) {
String sketchType = conf.getVar(ConfVars.HIVE_OPTIMIZE_BI_REWRITE_COUNT_DISTINCT_SKETCH);
RelOptRule rule = new HiveRewriteToDataSketchesRules.CountDistinctRewrite(sketchType);
generatePartialProgram(program, true, HepMatchOrder.TOP_DOWN, rule);
}
if (conf.getBoolVar(ConfVars.HIVE_OPTIMIZE_BI_REWRITE_PERCENTILE_DISC_ENABLED)) {
String sketchType = conf.getVar(ConfVars.HIVE_OPTIMIZE_BI_REWRITE_PERCENTILE_DISC_SKETCH);
RelOptRule rule = new HiveRewriteToDataSketchesRules.PercentileDiscRewrite(sketchType);
generatePartialProgram(program, true, HepMatchOrder.TOP_DOWN, rule);
}
if (conf.getBoolVar(ConfVars.HIVE_OPTIMIZE_BI_REWRITE_CUME_DIST_ENABLED)) {
String sketchType = conf.getVar(ConfVars.HIVE_OPTIMIZE_BI_REWRITE_CUME_DIST_SKETCH);
RelOptRule rule = new HiveRewriteToDataSketchesRules.CumeDistRewriteRule(sketchType);
generatePartialProgram(program, true, HepMatchOrder.TOP_DOWN, rule);
}
if (conf.getBoolVar(ConfVars.HIVE_OPTIMIZE_BI_REWRITE_NTILE_ENABLED)) {
String sketchType = conf.getVar(ConfVars.HIVE_OPTIMIZE_BI_REWRITE_NTILE_SKETCH);
RelOptRule rule = new HiveRewriteToDataSketchesRules.NTileRewrite(sketchType);
generatePartialProgram(program, true, HepMatchOrder.TOP_DOWN, rule);
}
if (conf.getBoolVar(ConfVars.HIVE_OPTIMIZE_BI_REWRITE_RANK_ENABLED)) {
String sketchType = conf.getVar(ConfVars.HIVE_OPTIMIZE_BI_REWRITE_RANK_SKETCH);
RelOptRule rule = new HiveRewriteToDataSketchesRules.RankRewriteRule(sketchType);
generatePartialProgram(program, true, HepMatchOrder.TOP_DOWN, rule);
}
}
// Run this optimization early, since it is expanding the operator pipeline.
if (!conf.getVar(HiveConf.ConfVars.HIVE_EXECUTION_ENGINE).equals("mr") &&
conf.getBoolVar(HiveConf.ConfVars.HIVEOPTIMIZEDISTINCTREWRITE)) {
// Its not clear, if this rewrite is always performant on MR, since extra map phase
// introduced for 2nd MR job may offset gains of this multi-stage aggregation.
// We need a cost model for MR to enable this on MR.
generatePartialProgram(program, true, HepMatchOrder.TOP_DOWN,
HiveExpandDistinctAggregatesRule.INSTANCE);
}
// 2. Try factoring out common filter elements & separating deterministic
// vs non-deterministic UDF. This needs to run before PPD so that PPD can
// add on-clauses for old style Join Syntax
// Ex: select * from R1 join R2 where ((R1.x=R2.x) and R1.y<10) or
// ((R1.x=R2.x) and R1.z=10)) and rand(1) < 0.1
generatePartialProgram(program, false, HepMatchOrder.DEPTH_FIRST,
new HivePreFilteringRule(maxCNFNodeCount));
// 3. Run exhaustive PPD, add not null filters, transitive inference,
// constant propagation, constant folding
List<RelOptRule> rules = Lists.newArrayList();
if (conf.getBoolVar(HiveConf.ConfVars.HIVEOPTPPD_WINDOWING)) {
rules.add(HiveFilterProjectTransposeRule.DETERMINISTIC_WINDOWING);
} else {
rules.add(HiveFilterProjectTransposeRule.DETERMINISTIC);
}
rules.add(HiveFilterSetOpTransposeRule.INSTANCE);
rules.add(HiveFilterSortTransposeRule.INSTANCE);
rules.add(HiveFilterJoinRule.JOIN);
rules.add(HiveFilterJoinRule.FILTER_ON_JOIN);
rules.add(new HiveFilterAggregateTransposeRule(Filter.class, HiveRelFactories.HIVE_BUILDER,
Aggregate.class));
rules.add(HiveFilterMergeRule.INSTANCE);
if (conf.getBoolVar(HiveConf.ConfVars.HIVE_OPTIMIZE_REDUCE_WITH_STATS)) {
rules.add(HiveReduceExpressionsWithStatsRule.INSTANCE);
}
rules.add(HiveProjectFilterPullUpConstantsRule.INSTANCE);
rules.add(HiveReduceExpressionsRule.PROJECT_INSTANCE);
rules.add(HiveReduceExpressionsRule.FILTER_INSTANCE);
rules.add(HiveReduceExpressionsRule.JOIN_INSTANCE);
rules.add(HiveReduceExpressionsRule.SEMIJOIN_INSTANCE);
rules.add(HiveAggregateReduceFunctionsRule.INSTANCE);
rules.add(HiveAggregateReduceRule.INSTANCE);
if (conf.getBoolVar(HiveConf.ConfVars.HIVEPOINTLOOKUPOPTIMIZER)) {
rules.add(new HivePointLookupOptimizerRule.FilterCondition(minNumORClauses));
rules.add(new HivePointLookupOptimizerRule.JoinCondition(minNumORClauses));
rules.add(new HivePointLookupOptimizerRule.ProjectionExpressions(minNumORClauses));
}
rules.add(HiveProjectJoinTransposeRule.INSTANCE);
if (conf.getBoolVar(HiveConf.ConfVars.HIVE_OPTIMIZE_CONSTRAINTS_JOIN) &&
profilesCBO.contains(ExtendedCBOProfile.REFERENTIAL_CONSTRAINTS)) {
rules.add(HiveJoinConstraintsRule.INSTANCE);
}
rules.add(HiveJoinAddNotNullRule.INSTANCE_JOIN);
rules.add(HiveJoinAddNotNullRule.INSTANCE_SEMIJOIN);
rules.add(HiveJoinAddNotNullRule.INSTANCE_ANTIJOIN);
rules.add(new HiveJoinPushTransitivePredicatesRule(HiveJoin.class, allowDisjunctivePredicates));
rules.add(new HiveJoinPushTransitivePredicatesRule(HiveSemiJoin.class, allowDisjunctivePredicates));
rules.add(new HiveJoinPushTransitivePredicatesRule(HiveAntiJoin.class, allowDisjunctivePredicates));
rules.add(HiveSortMergeRule.INSTANCE);
rules.add(HiveSortPullUpConstantsRule.SORT_LIMIT_INSTANCE);
rules.add(HiveSortPullUpConstantsRule.SORT_EXCHANGE_INSTANCE);
rules.add(HiveUnionPullUpConstantsRule.INSTANCE);
rules.add(HiveAggregatePullUpConstantsRule.INSTANCE);
generatePartialProgram(program, true, HepMatchOrder.BOTTOM_UP,
rules.toArray(new RelOptRule[0]));
// 4. Push down limit through outer join
// NOTE: We run this after PPD to support old style join syntax.
// Ex: select * from R1 left outer join R2 where ((R1.x=R2.x) and R1.y<10) or
// ((R1.x=R2.x) and R1.z=10)) and rand(1) < 0.1 order by R1.x limit 10
if (conf.getBoolVar(HiveConf.ConfVars.HIVE_OPTIMIZE_LIMIT_TRANSPOSE)) {
// This should be a cost based decision, but till we enable the extended cost
// model, we will use the given value for the variable
final float reductionProportion = HiveConf.getFloatVar(conf,
HiveConf.ConfVars.HIVE_OPTIMIZE_LIMIT_TRANSPOSE_REDUCTION_PERCENTAGE);
final long reductionTuples = HiveConf.getLongVar(conf,
HiveConf.ConfVars.HIVE_OPTIMIZE_LIMIT_TRANSPOSE_REDUCTION_TUPLES);
generatePartialProgram(program, true, HepMatchOrder.TOP_DOWN,
HiveSortMergeRule.INSTANCE, HiveSortProjectTransposeRule.INSTANCE,
HiveSortJoinReduceRule.INSTANCE, HiveSortUnionReduceRule.INSTANCE);
generatePartialProgram(program, true, HepMatchOrder.BOTTOM_UP,
new HiveSortRemoveRule(reductionProportion, reductionTuples),
HiveProjectSortTransposeRule.INSTANCE);
}
// Push Down Semi Joins
//TODO: Enable this later
/*perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER);
basePlan = hepPlan(basePlan, true, mdProvider, executorProvider, SemiJoinJoinTransposeRule.INSTANCE,
SemiJoinFilterTransposeRule.INSTANCE, SemiJoinProjectTransposeRule.INSTANCE);
perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER,
"Calcite: Prejoin ordering transformation, Push Down Semi Joins"); */
// 5. Try to remove limit and order by
generatePartialProgram(program, false, HepMatchOrder.DEPTH_FIRST,
HiveSortLimitRemoveRule.INSTANCE);
// 6. Apply Partition Pruning
generatePartialProgram(program, false, HepMatchOrder.DEPTH_FIRST,
new HivePartitionPruneRule(conf));
// 7. Projection Pruning (this introduces select above TS & hence needs to be run last due to PP)
generatePartialProgram(program, false, HepMatchOrder.TOP_DOWN,
new HiveFieldTrimmerRule(true));
// 8. Rerun PPD through Project as column pruning would have introduced
// DT above scans; By pushing filter just above TS, Hive can push it into
// storage (incase there are filters on non partition cols). This only
// matches FIL-PROJ-TS
// Also merge, remove and reduce Project if possible
generatePartialProgram(program, true, HepMatchOrder.TOP_DOWN,
HiveFilterProjectTSTransposeRule.INSTANCE, HiveFilterProjectTSTransposeRule.INSTANCE_DRUID,
HiveProjectFilterPullUpConstantsRule.INSTANCE, HiveProjectMergeRule.INSTANCE,
ProjectRemoveRule.Config.DEFAULT.toRule(), HiveSortMergeRule.INSTANCE);
// 9. Get rid of sq_count_check if group by key is constant
if (conf.getBoolVar(ConfVars.HIVE_REMOVE_SQ_COUNT_CHECK)) {
generatePartialProgram(program, false, HepMatchOrder.DEPTH_FIRST,
HiveRemoveSqCountCheck.INSTANCE);
}
// 10. Convert left outer join + null filter on right side table column to anti join. Add this
// rule after all the optimization for which calcite support for anti join is missing.
// Needs to be done before ProjectRemoveRule as it expect a project over filter.
// This is done before join re-ordering as join re-ordering is converting the left outer
// to right join in some cases before converting back again to left outer.
if (conf.getBoolVar(ConfVars.HIVE_CONVERT_ANTI_JOIN)) {
generatePartialProgram(program, false, HepMatchOrder.DEPTH_FIRST,
HiveAntiSemiJoinRule.INSTANCE);
}
// Trigger program
perfLogger.perfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER);
basePlan = executeProgram(basePlan, program.build(), mdProvider, executorProvider);
perfLogger.perfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER,
"Calcite: Prejoin ordering transformation");
return basePlan;
}
/**
* Returns true if MV is being loaded, constructed or being rebuilt.
*/
private boolean isMaterializedViewMaintenance() {
return mvRebuildMode != MaterializationRebuildMode.NONE
|| ctx.isLoadingMaterializedView()
|| getQB().isMaterializedView();
}
protected RelNode applyMaterializedViewRewriting(RelOptPlanner planner, RelNode basePlan,
RelMetadataProvider mdProvider, RexExecutor executorProvider) {
final RelOptCluster optCluster = basePlan.getCluster();
final PerfLogger perfLogger = SessionState.getPerfLogger();
final boolean useMaterializedViewsRegistry =
!conf.get(HiveConf.ConfVars.HIVE_SERVER2_MATERIALIZED_VIEWS_REGISTRY_IMPL.varname).equals("DUMMY");
final String ruleExclusionRegex = conf.get(ConfVars.HIVE_CBO_RULE_EXCLUSION_REGEX.varname, "");
final RelNode calcitePreMVRewritingPlan = basePlan;
final Set<TableName> tablesUsedQuery = getTablesUsed(basePlan);
// Add views to planner
List<HiveRelOptMaterialization> materializations = new ArrayList<>();
try {
// This is not a rebuild, we retrieve all the materializations.
// In turn, we do not need to force the materialization contents to be up-to-date,
// as this is not a rebuild, and we apply the user parameters
// (HIVE_MATERIALIZED_VIEW_REWRITING_TIME_WINDOW) instead.
if (useMaterializedViewsRegistry) {
materializations.addAll(db.getPreprocessedMaterializedViewsFromRegistry(tablesUsedQuery, getTxnMgr()));
} else {
materializations.addAll(db.getPreprocessedMaterializedViews(tablesUsedQuery, getTxnMgr()));
}
// We need to use the current cluster for the scan operator on views,
// otherwise the planner will throw an Exception (different planners)
materializations = materializations.stream().
map(materialization -> materialization.copyToNewCluster(optCluster)).
collect(Collectors.toList());
} catch (HiveException e) {
LOG.warn("Exception loading materialized views", e);
}
if (materializations.isEmpty()) {
// There are no materializations, we can return the original plan
return calcitePreMVRewritingPlan;
}
perfLogger.perfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER);
// We need to expand IN/BETWEEN expressions when materialized view rewriting
// is triggered since otherwise this may prevent some rewritings from happening
HepProgramBuilder program = new HepProgramBuilder();
generatePartialProgram(program, false, HepMatchOrder.DEPTH_FIRST,
HiveInBetweenExpandRule.FILTER_INSTANCE,
HiveInBetweenExpandRule.JOIN_INSTANCE,
HiveInBetweenExpandRule.PROJECT_INSTANCE);
basePlan = executeProgram(basePlan, program.build(), mdProvider, executorProvider);
// Pre-processing to being able to trigger additional rewritings
basePlan = HiveMaterializedViewBoxing.boxPlan(basePlan);
// If this is not a rebuild, we use Volcano planner as the decision
// on whether to use MVs or not and which MVs to use should be cost-based
optCluster.invalidateMetadataQuery();
RelMetadataQuery.THREAD_PROVIDERS.set(HiveMaterializationRelMetadataProvider.DEFAULT);
// Add materializations to planner
for (RelOptMaterialization materialization : materializations) {
planner.addMaterialization(materialization);
}
// Add rule to split aggregate with grouping sets (if any)
planner.addRule(HiveAggregateSplitRule.INSTANCE);
// Add view-based rewriting rules to planner
for (RelOptRule rule : HiveMaterializedViewRule.MATERIALIZED_VIEW_REWRITING_RULES) {
planner.addRule(rule);
}
// Unboxing rule
planner.addRule(HiveMaterializedViewBoxing.INSTANCE_UNBOXING);
// Partition pruner rule
planner.addRule(HiveFilterProjectTSTransposeRule.INSTANCE);
planner.addRule(new HivePartitionPruneRule(conf));
// Optimize plan
if (!ruleExclusionRegex.isEmpty()) {
LOG.info("The CBO rules matching the following regex are excluded from planning: {}",
ruleExclusionRegex);
planner.setRuleDescExclusionFilter(Pattern.compile(ruleExclusionRegex));
}
planner.setRoot(basePlan);
basePlan = planner.findBestExp();
// Remove view-based rewriting rules from planner
planner.clear();
// Restore default cost model
optCluster.invalidateMetadataQuery();
RelMetadataQuery.THREAD_PROVIDERS.set(JaninoRelMetadataProvider.of(mdProvider));
perfLogger.perfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER, "Calcite: View-based rewriting");
List<Table> materializedViewsUsedOriginalPlan = getMaterializedViewsUsed(calcitePreMVRewritingPlan);
List<Table> materializedViewsUsedAfterRewrite = getMaterializedViewsUsed(basePlan);
if (materializedViewsUsedOriginalPlan.size() == materializedViewsUsedAfterRewrite.size()) {
// Materialized view-based rewriting did not happen, we can return the original plan
return calcitePreMVRewritingPlan;
}
try {
if (!HiveMaterializedViewUtils.checkPrivilegeForMaterializedViews(materializedViewsUsedAfterRewrite)) {
// if materialized views do not have appropriate privileges, we shouldn't be using them
return calcitePreMVRewritingPlan;
}
} catch (HiveException e) {
LOG.warn("Exception checking privileges for materialized views", e);
return calcitePreMVRewritingPlan;
}
// A rewriting was produced, we will check whether it was part of an incremental rebuild
// to try to replace INSERT OVERWRITE by INSERT or MERGE
if (useMaterializedViewsRegistry) {
// Before proceeding we need to check whether materialized views used are up-to-date
// wrt information in metastore
try {
if (!db.validateMaterializedViewsFromRegistry(materializedViewsUsedAfterRewrite, tablesUsedQuery, getTxnMgr())) {
return calcitePreMVRewritingPlan;
}
} catch (HiveException e) {
LOG.warn("Exception validating materialized views", e);
return calcitePreMVRewritingPlan;
}
}
// Now we trigger some needed optimization rules again
return applyPreJoinOrderingTransforms(basePlan, mdProvider, executorProvider);
}
private boolean isMaterializedViewRewritingByTextEnabled() {
return conf.getBoolVar(ConfVars.HIVE_MATERIALIZED_VIEW_ENABLE_AUTO_REWRITING_SQL) &&
!HiveMaterializedViewsRegistry.get().isEmpty() &&
mvRebuildMode == MaterializationRebuildMode.NONE &&
!rootQB.isMaterializedView() && !ctx.isLoadingMaterializedView() && !rootQB.isCTAS() &&
rootQB.getIsQuery() &&
rootQB.hasTableDefined() &&
!forViewCreation;
}
private RelNode applyMaterializedViewRewritingByText(
ASTNode queryToRewriteAST,
RelNode originalPlan,
RelOptCluster optCluster,
RelMetadataProvider metadataProvider) {
if (!isMaterializedViewRewritingByTextEnabled()) {
return originalPlan;
}
String expandedQueryText = null;
try {
unparseTranslator.applyTranslations(ctx.getTokenRewriteStream(), EXPANDED_QUERY_TOKEN_REWRITE_PROGRAM);
expandedQueryText = ctx.getTokenRewriteStream().toString(
EXPANDED_QUERY_TOKEN_REWRITE_PROGRAM,
queryToRewriteAST.getTokenStartIndex(),
queryToRewriteAST.getTokenStopIndex());
ASTNode expandedAST = ParseUtils.parse(expandedQueryText, new Context(conf));
Set<TableName> tablesUsedByOriginalPlan = getTablesUsed(removeSubqueries(originalPlan, metadataProvider));
RelNode mvScan = getMaterializedViewByAST(
expandedAST, optCluster, ANY, db, tablesUsedByOriginalPlan, getTxnMgr());
if (mvScan != null) {
return mvScan;
}
if (!conf.getBoolVar(ConfVars.HIVE_MATERIALIZED_VIEW_ENABLE_AUTO_REWRITING_SUBQUERY_SQL)) {
return originalPlan;
}
return new HiveMaterializedViewASTSubQueryRewriteShuttle(subQueryMap, queryToRewriteAST, expandedAST,
HiveRelFactories.HIVE_BUILDER.create(optCluster, null),
db, tablesUsedByOriginalPlan, getTxnMgr()).rewrite(originalPlan);
} catch (Exception e) {
LOG.warn("Automatic materialized view query rewrite failed. expanded query text: {} AST string {} ",
expandedQueryText, queryToRewriteAST.toStringTree(), e);
return originalPlan;
}
}
/**
* Perform join reordering optimization.
*
* @param basePlan
* original plan
* @param mdProvider
* meta data provider
* @param executorProvider
* executor
* @return
*/
private RelNode applyJoinOrderingTransform(RelNode basePlan, RelMetadataProvider mdProvider, RexExecutor executorProvider) {
PerfLogger perfLogger = SessionState.getPerfLogger();
final HepProgramBuilder program = new HepProgramBuilder();
// Remove Projects between Joins so that JoinToMultiJoinRule can merge them to MultiJoin.
// If FK-PK are declared, it tries to pull non-filtering column appending join nodes.
List<RelOptRule> rules = Lists.newArrayList();
if (profilesCBO.contains(ExtendedCBOProfile.REFERENTIAL_CONSTRAINTS)) {
rules.add(HiveJoinSwapConstraintsRule.INSTANCE);
}
rules.add(HiveSemiJoinProjectTransposeRule.INSTANCE);
rules.add(HiveJoinProjectTransposeRule.LEFT_PROJECT_BTW_JOIN);
rules.add(HiveJoinProjectTransposeRule.RIGHT_PROJECT_BTW_JOIN);
rules.add(HiveProjectMergeRule.INSTANCE);
if (profilesCBO.contains(ExtendedCBOProfile.REFERENTIAL_CONSTRAINTS)) {
rules.add(conf.getBoolVar(HiveConf.ConfVars.HIVEOPTPPD_WINDOWING) ?
HiveFilterProjectTransposeRule.DETERMINISTIC_WINDOWING_ON_NON_FILTERING_JOIN :
HiveFilterProjectTransposeRule.DETERMINISTIC_ON_NON_FILTERING_JOIN);
rules.add(HiveFilterJoinRule.FILTER_ON_NON_FILTERING_JOIN);
}
generatePartialProgram(program, true, HepMatchOrder.BOTTOM_UP,
rules.toArray(new RelOptRule[0]));
// Join reordering
generatePartialProgram(program, false, HepMatchOrder.BOTTOM_UP,
new JoinToMultiJoinRule(HiveJoin.class), new LoptOptimizeJoinRule(HiveRelFactories.HIVE_BUILDER));
perfLogger.perfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER);
RelNode calciteOptimizedPlan;
try {
calciteOptimizedPlan = executeProgram(basePlan, program.build(), mdProvider, executorProvider);
} catch (Exception e) {
if (noColsMissingStats.get() > 0) {
LOG.warn("Missing column stats (see previous messages), skipping join reordering in CBO");
noColsMissingStats.set(0);
calciteOptimizedPlan = basePlan;
disableSemJoinReordering = false;
} else {
throw e;
}
}
perfLogger.perfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER, "Calcite: Join Reordering");
return calciteOptimizedPlan;
}
/**
* Perform join reordering post-optimization.
*
* @param basePlan
* original plan
* @param mdProvider
* meta data provider
* @param executorProvider
* executor
* @return
*/
private RelNode applyPostJoinOrderingTransform(RelNode basePlan, RelMetadataProvider mdProvider, RexExecutor executorProvider) {
PerfLogger perfLogger = SessionState.getPerfLogger();
final HepProgramBuilder program = new HepProgramBuilder();
double factor = conf.getFloatVar(ConfVars.HIVE_CARDINALITY_PRESERVING_JOIN_OPTIMIZATION_FACTOR);
if (factor > 0.0) {
generatePartialProgram(program, false, HepMatchOrder.TOP_DOWN,
new HiveCardinalityPreservingJoinRule(factor));
}
// 1. Run other optimizations that do not need stats
generatePartialProgram(program, false, HepMatchOrder.DEPTH_FIRST,
ProjectRemoveRule.Config.DEFAULT.toRule(),
HiveUnionMergeRule.INSTANCE,
new HiveUnionSimpleSelectsToInlineTableRule(dummyTableScan),
HiveAggregateProjectMergeRule.INSTANCE,
HiveProjectMergeRule.INSTANCE_NO_FORCE,
HiveJoinCommuteRule.INSTANCE,
new HiveAggregateSortLimitRule(conf.getBoolVar(ConfVars.HIVE_DEFAULT_NULLS_LAST)));
// 2. Run aggregate-join transpose (cost based)
// If it failed because of missing stats, we continue with
// the rest of optimizations
if (conf.getBoolVar(ConfVars.AGGR_JOIN_TRANSPOSE) || conf.getBoolVar(ConfVars.AGGR_JOIN_TRANSPOSE_UNIQUE)) {
generatePartialProgram(program, false, HepMatchOrder.DEPTH_FIRST,
new HiveAggregateJoinTransposeRule(noColsMissingStats,
conf.getBoolVar(ConfVars.AGGR_JOIN_TRANSPOSE),
conf.getBoolVar(ConfVars.AGGR_JOIN_TRANSPOSE_UNIQUE)));
}
// 3. Convert Join + GBy to semijoin
// Run this rule at later stages, since many calcite rules cant deal with semijoin
if (conf.getBoolVar(ConfVars.SEMIJOIN_CONVERSION)) {
generatePartialProgram(program, true, HepMatchOrder.DEPTH_FIRST,
HiveSemiJoinRule.INSTANCE_PROJECT, HiveSemiJoinRule.INSTANCE_PROJECT_SWAPPED,
HiveSemiJoinRule.INSTANCE_AGGREGATE, HiveSemiJoinRule.INSTANCE_AGGREGATE_SWAPPED);
}
// 4. convert SemiJoin + GBy to SemiJoin
generatePartialProgram(program, false, HepMatchOrder.DEPTH_FIRST,
HiveRemoveGBYSemiJoinRule.INSTANCE);
// 5. Run rule to fix windowing issue when it is done over
// aggregation columns (HIVE-10627)
if (profilesCBO.contains(ExtendedCBOProfile.WINDOWING_POSTPROCESSING)) {
generatePartialProgram(program, false, HepMatchOrder.DEPTH_FIRST,
HiveWindowingFixRule.INSTANCE);
generatePartialProgram(program, false, HepMatchOrder.DEPTH_FIRST,
HiveWindowingLastValueRewrite.INSTANCE);
}
// 7. Apply Druid transformation rules
generatePartialProgram(program, false, HepMatchOrder.DEPTH_FIRST,
HiveDruidRules.FILTER_DATE_RANGE_RULE,
HiveDruidRules.FILTER, HiveDruidRules.PROJECT_FILTER_TRANSPOSE,
HiveDruidRules.AGGREGATE_FILTER_TRANSPOSE,
HiveDruidRules.AGGREGATE_PROJECT,
HiveDruidRules.PROJECT,
HiveDruidRules.EXPAND_SINGLE_DISTINCT_AGGREGATES_DRUID_RULE,
HiveDruidRules.AGGREGATE,
HiveDruidRules.POST_AGGREGATION_PROJECT,
HiveDruidRules.FILTER_AGGREGATE_TRANSPOSE,
HiveDruidRules.FILTER_PROJECT_TRANSPOSE,
HiveDruidRules.HAVING_FILTER_RULE,
HiveDruidRules.SORT_PROJECT_TRANSPOSE,
HiveDruidRules.SORT);
// 8. Apply JDBC transformation rules
if (conf.getBoolVar(ConfVars.HIVE_ENABLE_JDBC_PUSHDOWN)) {
List<RelOptRule> rules = Lists.newArrayList();
rules.add(JDBCExpandExpressionsRule.FILTER_INSTANCE);
rules.add(JDBCExpandExpressionsRule.JOIN_INSTANCE);
rules.add(JDBCExpandExpressionsRule.PROJECT_INSTANCE);
rules.add(JDBCExtractJoinFilterRule.INSTANCE);
rules.add(JDBCAbstractSplitFilterRule.SPLIT_FILTER_ABOVE_JOIN);
rules.add(JDBCAbstractSplitFilterRule.SPLIT_FILTER_ABOVE_CONVERTER);
rules.add(JDBCFilterJoinRule.INSTANCE);
rules.add(JDBCFilterPushDownRule.INSTANCE);
rules.add(JDBCProjectPushDownRule.INSTANCE);
if (!conf.getBoolVar(ConfVars.HIVE_ENABLE_JDBC_SAFE_PUSHDOWN)) {
rules.add(JDBCJoinPushDownRule.INSTANCE);
rules.add(JDBCUnionPushDownRule.INSTANCE);
rules.add(JDBCAggregationPushDownRule.INSTANCE);
rules.add(JDBCSortPushDownRule.INSTANCE);
}
generatePartialProgram(program, true, HepMatchOrder.TOP_DOWN,
rules.toArray(new RelOptRule[rules.size()]));
}
// 9. Run rules to aid in translation from Calcite tree to Hive tree
if (HiveConf.getBoolVar(conf, ConfVars.HIVE_CBO_RETPATH_HIVEOP)) {
// 9.1. Merge join into multijoin operators (if possible)
generatePartialProgram(program, true, HepMatchOrder.BOTTOM_UP,
HiveJoinProjectTransposeRule.BOTH_PROJECT_INCLUDE_OUTER,
HiveJoinProjectTransposeRule.LEFT_PROJECT_INCLUDE_OUTER,
HiveJoinProjectTransposeRule.RIGHT_PROJECT_INCLUDE_OUTER,
HiveJoinToMultiJoinRule.INSTANCE, HiveProjectMergeRule.INSTANCE);
// The previous rules can pull up projections through join operators,
// thus we run the field trimmer again to push them back down
generatePartialProgram(program, false, HepMatchOrder.TOP_DOWN,
new HiveFieldTrimmerRule(false));
generatePartialProgram(program, false, HepMatchOrder.DEPTH_FIRST,
ProjectRemoveRule.Config.DEFAULT.toRule(), new ProjectMergeRule(false, HiveRelFactories.HIVE_BUILDER));
generatePartialProgram(program, true, HepMatchOrder.TOP_DOWN,
HiveFilterProjectTSTransposeRule.INSTANCE, HiveFilterProjectTSTransposeRule.INSTANCE_DRUID,
HiveProjectFilterPullUpConstantsRule.INSTANCE);
// 9.2. Introduce exchange operators below join/multijoin operators
generatePartialProgram(program, false, HepMatchOrder.DEPTH_FIRST,
HiveInsertExchange4JoinRule.EXCHANGE_BELOW_JOIN, HiveInsertExchange4JoinRule.EXCHANGE_BELOW_MULTIJOIN);
} else {
generatePartialProgram(program, false, HepMatchOrder.DEPTH_FIRST,
HiveProjectSortExchangeTransposeRule.INSTANCE, HiveProjectMergeRule.INSTANCE);
}
// 10. We need to expand IN/BETWEEN expressions when loading a materialized view
// since otherwise this may prevent some rewritings from happening
if (ctx.isLoadingMaterializedView()) {
generatePartialProgram(program, false, HepMatchOrder.DEPTH_FIRST,
HiveInBetweenExpandRule.FILTER_INSTANCE,
HiveInBetweenExpandRule.JOIN_INSTANCE,
HiveInBetweenExpandRule.PROJECT_INSTANCE);
}
// Trigger program
perfLogger.perfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER);
basePlan = executeProgram(basePlan, program.build(), mdProvider, executorProvider);
perfLogger.perfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER,
"Calcite: Postjoin ordering transformation");
return basePlan;
}
protected Set<TableName> getTablesUsed(RelNode plan) {
Set<TableName> tablesUsed = new HashSet<>();
new RelVisitor() {
@Override
public void visit(RelNode node, int ordinal, RelNode parent) {
if (node instanceof TableScan) {
TableScan ts = (TableScan) node;
Table hiveTableMD = ((RelOptHiveTable) ts.getTable()).getHiveTableMD();
tablesUsed.add(hiveTableMD.getFullTableName());
}
super.visit(node, ordinal, parent);
}
}.go(plan);
return tablesUsed;
}
protected List<Table> getMaterializedViewsUsed(RelNode plan) {
List<Table> materializedViewsUsed = new ArrayList<>();
new RelVisitor() {
@Override
public void visit(RelNode node, int ordinal, RelNode parent) {
if (node instanceof TableScan) {
TableScan ts = (TableScan) node;
Table table = ((RelOptHiveTable) ts.getTable()).getHiveTableMD();
if (table.isMaterializedView()) {
materializedViewsUsed.add(table);
}
} else if (node instanceof DruidQuery) {
DruidQuery dq = (DruidQuery) node;
Table table = ((RelOptHiveTable) dq.getTable()).getHiveTableMD();
if (table.isMaterializedView()) {
materializedViewsUsed.add(table);
}
}
super.visit(node, ordinal, parent);
}
}.go(plan);
return materializedViewsUsed;
}
/**
* Removes sub-queries (if present) from the specified query plan.
* @return a new query plan without subquery expressions.
*/
private RelNode removeSubqueries(RelNode basePlan, RelMetadataProvider mdProvider) {
final HepProgramBuilder builder = new HepProgramBuilder();
builder.addMatchOrder(HepMatchOrder.DEPTH_FIRST);
builder.addRuleCollection(
ImmutableList.of(HiveSubQueryRemoveRule.forFilter(conf), HiveSubQueryRemoveRule.forProject(conf)));
return executeProgram(basePlan, builder.build(), mdProvider, null);
}
/**
* Generate a HEP program with the given rule set.
*
* @param isCollection
* @param order
* @param rules
*/
protected void generatePartialProgram(HepProgramBuilder programBuilder, boolean isCollection, HepMatchOrder order,
RelOptRule... rules) {
programBuilder.addMatchOrder(order);
if (isCollection) {
programBuilder.addRuleCollection(ImmutableList.copyOf(rules));
} else {
for (RelOptRule r : rules) {
programBuilder.addRuleInstance(r);
}
}
}
protected RelNode executeProgram(RelNode basePlan, HepProgram program,
RelMetadataProvider mdProvider, RexExecutor executorProvider) {
return executeProgram(basePlan, program, mdProvider, executorProvider, null);
}
protected RelNode executeProgram(RelNode basePlan, HepProgram program,
RelMetadataProvider mdProvider, RexExecutor executorProvider,
List<HiveRelOptMaterialization> materializations) {
final String ruleExclusionRegex = conf.get(ConfVars.HIVE_CBO_RULE_EXCLUSION_REGEX.varname, "");
// Create planner and copy context
HepPlanner planner = new HepPlanner(program,
basePlan.getCluster().getPlanner().getContext());
planner.addListener(new RuleEventLogger());
List<RelMetadataProvider> list = Lists.newArrayList();
list.add(mdProvider);
planner.registerMetadataProviders(list);
RelMetadataProvider chainedProvider = ChainedRelMetadataProvider.of(list);
cluster.setMetadataProvider(
new CachingRelMetadataProvider(chainedProvider, planner));
if (executorProvider != null) {
// basePlan.getCluster.getPlanner is the VolcanoPlanner from apply()
// both planners need to use the correct executor
cluster.getPlanner().setExecutor(executorProvider);
planner.setExecutor(executorProvider);
}
if (materializations != null) {
// Add materializations to planner
for (RelOptMaterialization materialization : materializations) {
planner.addMaterialization(materialization);
}
}
if (!ruleExclusionRegex.isEmpty()) {
LOG.info("The CBO rules matching the following regex are excluded from planning: {}",
ruleExclusionRegex);
planner.setRuleDescExclusionFilter(Pattern.compile(ruleExclusionRegex));
}
planner.setRoot(basePlan);
return planner.findBestExp();
}
@SuppressWarnings("nls")
private RelNode genSetOpLogicalPlan(Opcode opcode, String alias, String leftalias, RelNode leftRel,
String rightalias, RelNode rightRel) throws SemanticException {
// 1. Get Row Resolvers, Column map for original left and right input of
// SetOp Rel
RowResolver leftRR = this.relToHiveRR.get(leftRel);
RowResolver rightRR = this.relToHiveRR.get(rightRel);
Map<String, ColumnInfo> leftmap = leftRR.getFieldMap(leftalias);
Map<String, ColumnInfo> rightmap = rightRR.getFieldMap(rightalias);
// 2. Validate that SetOp is feasible according to Hive (by using type
// info from RR)
if (leftmap.size() != rightmap.size()) {
throw new SemanticException("Schema of both sides of union should match.");
}
ASTNode tabref = getQB().getAliases().isEmpty() ? null : getQB().getParseInfo()
.getSrcForAlias(getQB().getAliases().get(0));
// 3. construct SetOp Output RR using original left & right Input
RowResolver setOpOutRR = new RowResolver();
Iterator<Map.Entry<String, ColumnInfo>> lIter = leftmap.entrySet().iterator();
Iterator<Map.Entry<String, ColumnInfo>> rIter = rightmap.entrySet().iterator();
while (lIter.hasNext()) {
Map.Entry<String, ColumnInfo> lEntry = lIter.next();
Map.Entry<String, ColumnInfo> rEntry = rIter.next();
ColumnInfo lInfo = lEntry.getValue();
ColumnInfo rInfo = rEntry.getValue();
String field = lEntry.getKey();
// try widening conversion, otherwise fail union
TypeInfo commonTypeInfo = FunctionRegistry.getCommonClassForUnionAll(lInfo.getType(),
rInfo.getType());
if (commonTypeInfo == null) {
throw new SemanticException(generateErrorMessage(tabref,
"Schema of both sides of setop should match: Column " + field
+ " is of type " + lInfo.getType().getTypeName()
+ " on first table and type " + rInfo.getType().getTypeName()
+ " on second table"));
}
ColumnInfo setOpColInfo = new ColumnInfo(lInfo);
setOpColInfo.setType(commonTypeInfo);
setOpOutRR.put(alias, field, setOpColInfo);
}
// 4. Determine which columns requires cast on left/right input (Calcite
// requires exact types on both sides of SetOp)
boolean leftNeedsTypeCast = false;
boolean rightNeedsTypeCast = false;
List<RexNode> leftProjs = new ArrayList<RexNode>();
List<RexNode> rightProjs = new ArrayList<RexNode>();
List<RelDataTypeField> leftRowDT = leftRel.getRowType().getFieldList();
List<RelDataTypeField> rightRowDT = rightRel.getRowType().getFieldList();
RelDataType leftFieldDT;
RelDataType rightFieldDT;
RelDataType unionFieldDT;
for (int i = 0; i < leftRowDT.size(); i++) {
leftFieldDT = leftRowDT.get(i).getType();
rightFieldDT = rightRowDT.get(i).getType();
if (!leftFieldDT.equals(rightFieldDT)) {
unionFieldDT = TypeConverter.convert(setOpOutRR.getColumnInfos().get(i).getType(),
cluster.getTypeFactory());
if (!unionFieldDT.equals(leftFieldDT)) {
leftNeedsTypeCast = true;
}
leftProjs.add(cluster.getRexBuilder().ensureType(unionFieldDT,
cluster.getRexBuilder().makeInputRef(leftFieldDT, i), true));
if (!unionFieldDT.equals(rightFieldDT)) {
rightNeedsTypeCast = true;
}
rightProjs.add(cluster.getRexBuilder().ensureType(unionFieldDT,
cluster.getRexBuilder().makeInputRef(rightFieldDT, i), true));
} else {
leftProjs.add(cluster.getRexBuilder().ensureType(leftFieldDT,
cluster.getRexBuilder().makeInputRef(leftFieldDT, i), true));
rightProjs.add(cluster.getRexBuilder().ensureType(rightFieldDT,
cluster.getRexBuilder().makeInputRef(rightFieldDT, i), true));
}
}
// 5. Introduce Project Rel above original left/right inputs if cast is
// needed for type parity
RelNode setOpLeftInput = leftRel;
RelNode setOpRightInput = rightRel;
if (leftNeedsTypeCast) {
setOpLeftInput = HiveProject.create(leftRel, leftProjs, leftRel.getRowType()
.getFieldNames());
}
if (rightNeedsTypeCast) {
setOpRightInput = HiveProject.create(rightRel, rightProjs, rightRel.getRowType()
.getFieldNames());
}
// 6. Construct SetOp Rel
Builder<RelNode> bldr = new ImmutableList.Builder<RelNode>();
bldr.add(setOpLeftInput);
bldr.add(setOpRightInput);
SetOp setOpRel = null;
switch (opcode) {
case UNION:
setOpRel = new HiveUnion(cluster, TraitsUtil.getDefaultTraitSet(cluster), bldr.build());
break;
case INTERSECT:
setOpRel = new HiveIntersect(cluster, TraitsUtil.getDefaultTraitSet(cluster), bldr.build(),
false);
break;
case INTERSECTALL:
setOpRel = new HiveIntersect(cluster, TraitsUtil.getDefaultTraitSet(cluster), bldr.build(),
true);
break;
case EXCEPT:
setOpRel = new HiveExcept(cluster, TraitsUtil.getDefaultTraitSet(cluster), bldr.build(),
false);
break;
case EXCEPTALL:
setOpRel = new HiveExcept(cluster, TraitsUtil.getDefaultTraitSet(cluster), bldr.build(),
true);
break;
default:
throw new SemanticException(ErrorMsg.UNSUPPORTED_SET_OPERATOR.getMsg(opcode.toString()));
}
relToHiveRR.put(setOpRel, setOpOutRR);
relToHiveColNameCalcitePosMap.put(setOpRel, buildHiveToCalciteColumnMap(setOpOutRR));
return setOpRel;
}
private RelNode genJoinRelNode(RelNode leftRel, String leftTableAlias, RelNode rightRel, String rightTableAlias, JoinType hiveJoinType,
ASTNode joinCond, ImmutableMap<String, Integer> outerNameToPosMap, RowResolver outerRR) throws SemanticException {
RowResolver leftRR = this.relToHiveRR.get(leftRel);
RowResolver rightRR = this.relToHiveRR.get(rightRel);
// 1. Construct ExpressionNodeDesc representing Join Condition
RexNode calciteJoinCond = null;
List<String> namedColumns = null;
if (joinCond != null) {
JoinTypeCheckCtx jCtx = new JoinTypeCheckCtx(leftRR, rightRR, hiveJoinType);
jCtx.setOuterRR(outerRR);
RowResolver input = jCtx.getInputRR();
// named columns join
// TODO: we can also do the same for semi join but it seems that other
// DBMS does not support it yet.
if (joinCond.getType() == HiveParser.TOK_TABCOLNAME
&& !hiveJoinType.equals(JoinType.LEFTSEMI)) {
namedColumns = new ArrayList<>();
// We will transform using clause and make it look like an on-clause.
// So, lets generate a valid on-clause AST from using.
ASTNode and = (ASTNode) ParseDriver.adaptor.create(HiveParser.KW_AND, "and");
ASTNode equal = null;
int count = 0;
for (Node child : joinCond.getChildren()) {
String columnName = ((ASTNode) child).getText();
// dealing with views
if (unparseTranslator != null && unparseTranslator.isEnabled()) {
unparseTranslator.addIdentifierTranslation((ASTNode) child);
}
namedColumns.add(columnName);
ASTNode left = ASTBuilder.qualifiedName(leftTableAlias, columnName);
ASTNode right = ASTBuilder.qualifiedName(rightTableAlias, columnName);
equal = (ASTNode) ParseDriver.adaptor.create(HiveParser.EQUAL, "=");
ParseDriver.adaptor.addChild(equal, left);
ParseDriver.adaptor.addChild(equal, right);
ParseDriver.adaptor.addChild(and, equal);
count++;
}
joinCond = count > 1 ? and : equal;
} else if (unparseTranslator != null && unparseTranslator.isEnabled()) {
genAllExprNodeDesc(joinCond, input, jCtx);
}
Map<ASTNode, RexNode> exprNodes = RexNodeTypeCheck.genExprNodeJoinCond(
joinCond, jCtx, cluster.getRexBuilder());
if (jCtx.getError() != null) {
throw new SemanticException(SemanticAnalyzer.generateErrorMessage(jCtx.getErrorSrcNode(),
jCtx.getError()));
}
calciteJoinCond = exprNodes.get(joinCond);
} else {
calciteJoinCond = cluster.getRexBuilder().makeLiteral(true);
}
// 2. Validate that join condition is legal (i.e no function refering to
// both sides of join, only equi join)
// TODO: Join filter handling (only supported for OJ by runtime or is it
// supported for IJ as well)
// 3. Construct Join Rel Node and RowResolver for the new Join Node
boolean leftSemiJoin = false;
JoinRelType calciteJoinType;
switch (hiveJoinType) {
case LEFTOUTER:
calciteJoinType = JoinRelType.LEFT;
break;
case RIGHTOUTER:
calciteJoinType = JoinRelType.RIGHT;
break;
case FULLOUTER:
calciteJoinType = JoinRelType.FULL;
break;
case LEFTSEMI:
calciteJoinType = JoinRelType.SEMI;
leftSemiJoin = true;
break;
case ANTI:
calciteJoinType = JoinRelType.ANTI;
leftSemiJoin = true;
break;
case INNER:
default:
calciteJoinType = JoinRelType.INNER;
break;
}
RelNode topRel = null;
RowResolver topRR = null;
if (leftSemiJoin) {
List<RelDataTypeField> sysFieldList = new ArrayList<RelDataTypeField>();
List<RexNode> leftJoinKeys = new ArrayList<RexNode>();
List<RexNode> rightJoinKeys = new ArrayList<RexNode>();
RexNode nonEquiConds = HiveRelOptUtil.splitHiveJoinCondition(sysFieldList, ImmutableList.of(leftRel, rightRel),
calciteJoinCond, ImmutableList.of(leftJoinKeys, rightJoinKeys), null, null);
RelNode[] inputRels = new RelNode[] { leftRel, rightRel };
final List<Integer> leftKeys = new ArrayList<Integer>();
final List<Integer> rightKeys = new ArrayList<Integer>();
RexNode remainingEquiCond = HiveCalciteUtil.projectNonColumnEquiConditions(HiveRelFactories.HIVE_PROJECT_FACTORY,
inputRels, leftJoinKeys, rightJoinKeys, 0, leftKeys, rightKeys);
// Adjust right input fields in nonEquiConds if previous call modified the input
if (inputRels[0] != leftRel) {
nonEquiConds = RexUtil.shift(nonEquiConds, leftRel.getRowType().getFieldCount(),
inputRels[0].getRowType().getFieldCount() - leftRel.getRowType().getFieldCount());
}
calciteJoinCond = remainingEquiCond != null ?
RexUtil.composeConjunction(cluster.getRexBuilder(),
ImmutableList.of(remainingEquiCond, nonEquiConds), false) :
nonEquiConds;
final RelDataType combinedRowType = SqlValidatorUtil.createJoinType(
cluster.getTypeFactory(), inputRels[0].getRowType(), inputRels[1].getRowType(),
null, ImmutableList.of());
if (hiveJoinType == JoinType.LEFTSEMI) {
topRel = HiveSemiJoin.getSemiJoin(cluster, cluster.traitSetOf(HiveRelNode.CONVENTION),
inputRels[0], inputRels[1],
HiveCalciteUtil.fixNullability(cluster.getRexBuilder(),
calciteJoinCond, RelOptUtil.getFieldTypeList(combinedRowType)));
} else {
topRel = HiveAntiJoin.getAntiJoin(cluster, cluster.traitSetOf(HiveRelNode.CONVENTION),
inputRels[0], inputRels[1],
HiveCalciteUtil.fixNullability(cluster.getRexBuilder(),
calciteJoinCond, RelOptUtil.getFieldTypeList(combinedRowType)));
}
// Create join RR: we need to check whether we need to update left RR in case
// previous call to projectNonColumnEquiConditions updated it
if (inputRels[0] != leftRel) {
RowResolver newLeftRR = new RowResolver();
if (!RowResolver.add(newLeftRR, leftRR)) {
LOG.warn("Duplicates detected when adding columns to RR: see previous message");
}
for (int i = leftRel.getRowType().getFieldCount();
i < inputRels[0].getRowType().getFieldCount(); i++) {
ColumnInfo oColInfo = new ColumnInfo(
SemanticAnalyzer.getColumnInternalName(i),
TypeConverter.convert(inputRels[0].getRowType().getFieldList().get(i).getType()),
null, false);
newLeftRR.put(oColInfo.getTabAlias(), oColInfo.getInternalName(), oColInfo);
}
RowResolver joinRR = new RowResolver();
if (!RowResolver.add(joinRR, newLeftRR)) {
LOG.warn("Duplicates detected when adding columns to RR: see previous message");
}
relToHiveColNameCalcitePosMap.put(topRel, buildHiveToCalciteColumnMap(joinRR));
relToHiveRR.put(topRel, joinRR);
// Introduce top project operator to remove additional column(s) that have
// been introduced
List<RexNode> topFields = new ArrayList<RexNode>();
List<String> topFieldNames = new ArrayList<String>();
for (int i = 0; i < leftRel.getRowType().getFieldCount(); i++) {
final RelDataTypeField field = leftRel.getRowType().getFieldList().get(i);
topFields.add(leftRel.getCluster().getRexBuilder().makeInputRef(field.getType(), i));
topFieldNames.add(field.getName());
}
topRel = HiveRelFactories.HIVE_PROJECT_FACTORY.createProject(topRel, Collections.emptyList(), topFields, topFieldNames);
}
topRR = new RowResolver();
if (!RowResolver.add(topRR, leftRR)) {
LOG.warn("Duplicates detected when adding columns to RR: see previous message");
}
} else {
final RelDataType combinedRowType = SqlValidatorUtil.createJoinType(
cluster.getTypeFactory(), leftRel.getRowType(), rightRel.getRowType(),
null, ImmutableList.of());
topRR = RowResolver.getCombinedRR(leftRR, rightRR);
final ImmutableMap<String, Integer> hiveColNameCalcitePosMap =
buildHiveToCalciteColumnMap(topRR);
calciteJoinCond = new CorrelationConverter(
new InputContext(combinedRowType, hiveColNameCalcitePosMap, topRR),
outerNameToPosMap, outerRR, subqueryId).apply(calciteJoinCond);
topRel = HiveJoin.getJoin(
cluster, leftRel, rightRel,
HiveCalciteUtil.fixNullability(cluster.getRexBuilder(),
calciteJoinCond, RelOptUtil.getFieldTypeList(combinedRowType)),
calciteJoinType);
if (namedColumns != null) {
List<String> tableAliases = new ArrayList<>();
tableAliases.add(leftTableAlias);
tableAliases.add(rightTableAlias);
topRR.setNamedJoinInfo(new NamedJoinInfo(tableAliases, namedColumns, hiveJoinType));
}
}
// 4. Add new rel & its RR to the maps
relToHiveColNameCalcitePosMap.put(topRel, buildHiveToCalciteColumnMap(topRR));
relToHiveRR.put(topRel, topRR);
return topRel;
}
/**
* Generate Join Logical Plan Relnode by walking through the join AST.
*
* @param aliasToRel
* Alias(Table/Relation alias) to RelNode; only read and not
* written in to by this method
* @return
* @throws SemanticException
*/
private RelNode genJoinLogicalPlan(ASTNode joinParseTree, Map<String, RelNode> aliasToRel,
ImmutableMap<String, Integer> outerNameToPosMap, RowResolver outerRR)
throws SemanticException {
RelNode leftRel = null;
RelNode rightRel = null;
JoinType hiveJoinType = null;
if (joinParseTree.getToken().getType() == HiveParser.TOK_UNIQUEJOIN) {
String msg = String.format("UNIQUE JOIN is currently not supported in CBO,"
+ " turn off cbo to use UNIQUE JOIN.");
LOG.debug(msg);
throw new CalciteSemanticException(msg, UnsupportedFeature.Unique_join);
}
// 1. Determine Join Type
// TODO: What about TOK_CROSSJOIN, TOK_MAPJOIN
switch (joinParseTree.getToken().getType()) {
case HiveParser.TOK_LEFTOUTERJOIN:
hiveJoinType = JoinType.LEFTOUTER;
break;
case HiveParser.TOK_RIGHTOUTERJOIN:
hiveJoinType = JoinType.RIGHTOUTER;
break;
case HiveParser.TOK_FULLOUTERJOIN:
hiveJoinType = JoinType.FULLOUTER;
break;
case HiveParser.TOK_LEFTSEMIJOIN:
hiveJoinType = JoinType.LEFTSEMI;
break;
case HiveParser.TOK_LEFTANTISEMIJOIN:
hiveJoinType = JoinType.ANTI;
break;
default:
hiveJoinType = JoinType.INNER;
break;
}
// 2. Get Left Table Alias
ASTNode left = (ASTNode) joinParseTree.getChild(0);
String leftTableAlias = null;
if ((left.getToken().getType() == HiveParser.TOK_TABREF)
|| (left.getToken().getType() == HiveParser.TOK_SUBQUERY)
|| (left.getToken().getType() == HiveParser.TOK_PTBLFUNCTION)) {
leftTableAlias = getTableAlias(left);
leftRel = aliasToRel.get(leftTableAlias);
} else if (SemanticAnalyzer.isJoinToken(left)) {
leftRel = genJoinLogicalPlan(left, aliasToRel, outerNameToPosMap, outerRR);
} else if (left.getToken().getType() == HiveParser.TOK_LATERAL_VIEW) {
leftRel = genLateralViewPlans(left, aliasToRel);
} else {
assert (false);
}
// 3. Get Right Table Alias
ASTNode right = (ASTNode) joinParseTree.getChild(1);
String rightTableAlias = null;
if ((right.getToken().getType() == HiveParser.TOK_TABREF)
|| (right.getToken().getType() == HiveParser.TOK_SUBQUERY)
|| (right.getToken().getType() == HiveParser.TOK_PTBLFUNCTION)) {
rightTableAlias = getTableAlias(right);
rightRel = aliasToRel.get(rightTableAlias);
} else if (right.getToken().getType() == HiveParser.TOK_LATERAL_VIEW) {
rightRel = genLateralViewPlans(right, aliasToRel);
} else {
assert (false);
}
// 4. Get Join Condn
ASTNode joinCond = (ASTNode) joinParseTree.getChild(2);
// 5. Create Join rel
return genJoinRelNode(leftRel, leftTableAlias, rightRel, rightTableAlias, hiveJoinType, joinCond,
outerNameToPosMap, outerRR);
}
private RelNode genTableLogicalPlan(String tableAlias, QB qb) throws SemanticException {
RowResolver rr = new RowResolver();
RelNode tableRel = null;
try {
// 1. If the table has a Sample specified, bail from Calcite path.
// 2. if returnpath is on and hivetestmode is on bail
if (qb.getParseInfo().getTabSample(tableAlias) != null
|| getNameToSplitSampleMap().containsKey(tableAlias)
|| (conf.getBoolVar(HiveConf.ConfVars.HIVE_CBO_RETPATH_HIVEOP)) && (conf.getBoolVar(HiveConf.ConfVars.HIVETESTMODE)) ) {
String msg = String.format("Table Sample specified for %s."
+ " Currently we don't support Table Sample clauses in CBO,"
+ " turn off cbo for queries on tableSamples.", tableAlias);
LOG.debug(msg);
throw new CalciteSemanticException(msg, UnsupportedFeature.Table_sample_clauses);
}
// 2. Get Table Metadata
Table tabMetaData = qb.getMetaData().getSrcForAlias(tableAlias);
// 3. Get Table Logical Schema (Row Type)
// NOTE: Table logical schema = Non Partition Cols + Partition Cols +
// Virtual Cols
// 3.1 Add Column info for non partion cols (Object Inspector fields)
StructObjectInspector rowObjectInspector = (StructObjectInspector) tabMetaData.getDeserializer()
.getObjectInspector();
List<? extends StructField> fields = rowObjectInspector.getAllStructFieldRefs();
ColumnInfo colInfo;
String colName;
ArrayList<ColumnInfo> cInfoLst = new ArrayList<>();
final NotNullConstraint nnc = tabMetaData.getNotNullConstraint();
final PrimaryKeyInfo pkc = tabMetaData.getPrimaryKeyInfo();
for (StructField structField : fields) {
colName = structField.getFieldName();
colInfo = new ColumnInfo(
structField.getFieldName(),
TypeInfoUtils.getTypeInfoFromObjectInspector(structField.getFieldObjectInspector()),
isNullable(colName, nnc, pkc), tableAlias, false);
colInfo.setSkewedCol(isSkewedCol(tableAlias, qb, colName));
rr.put(tableAlias, colName, colInfo);
cInfoLst.add(colInfo);
}
// TODO: Fix this
ArrayList<ColumnInfo> nonPartitionColumns = new ArrayList<ColumnInfo>(cInfoLst);
ArrayList<ColumnInfo> partitionColumns = new ArrayList<ColumnInfo>();
// 3.2 Add column info corresponding to partition columns
for (FieldSchema part_col : tabMetaData.getPartCols()) {
colName = part_col.getName();
colInfo = new ColumnInfo(colName,
TypeInfoFactory.getPrimitiveTypeInfo(part_col.getType()),
isNullable(colName, nnc, pkc), tableAlias, true);
rr.put(tableAlias, colName, colInfo);
cInfoLst.add(colInfo);
partitionColumns.add(colInfo);
}
final TableType tableType = obtainTableType(tabMetaData);
// 3.3 Add column info corresponding to virtual columns
List<VirtualColumn> virtualCols = new ArrayList<>();
if (tableType == TableType.NATIVE) {
virtualCols = VirtualColumn.getRegistry(conf);
if (AcidUtils.isNonNativeAcidTable(tabMetaData)) {
virtualCols.addAll(tabMetaData.getStorageHandler().acidVirtualColumns());
}
for (VirtualColumn vc : virtualCols) {
colInfo = new ColumnInfo(vc.getName(), vc.getTypeInfo(), tableAlias, true,
vc.getIsHidden());
rr.put(tableAlias, vc.getName().toLowerCase(), colInfo);
cInfoLst.add(colInfo);
}
}
// 4. Build operator
Map<String, String> tabPropsFromQuery = qb.getTabPropsForAlias(tableAlias);
HiveTableScan.HiveTableScanTrait tableScanTrait = HiveTableScan.HiveTableScanTrait.from(tabPropsFromQuery);
RelOptHiveTable optTable;
if (tableType == TableType.DRUID ||
(tableType == TableType.JDBC && tabMetaData.getProperty(Constants.JDBC_TABLE) != null)) {
// Create case sensitive columns list
List<String> originalColumnNames =
((StandardStructObjectInspector)rowObjectInspector).getOriginalColumnNames();
List<ColumnInfo> cIList = new ArrayList<ColumnInfo>(originalColumnNames.size());
for (int i = 0; i < rr.getColumnInfos().size(); i++) {
cIList.add(new ColumnInfo(originalColumnNames.get(i), rr.getColumnInfos().get(i).getType(),
tableAlias, false));
}
// Build row type from field <type, name>
RelDataType rowType = TypeConverter.getType(cluster, cIList);
// Build RelOptAbstractTable
List<String> fullyQualifiedTabName = new ArrayList<>();
if (tabMetaData.getDbName() != null && !tabMetaData.getDbName().isEmpty()) {
fullyQualifiedTabName.add(tabMetaData.getDbName());
}
fullyQualifiedTabName.add(tabMetaData.getTableName());
if (tableType == TableType.DRUID) {
// Build Druid query
String address = HiveConf.getVar(conf,
HiveConf.ConfVars.HIVE_DRUID_BROKER_DEFAULT_ADDRESS);
String dataSource = tabMetaData.getParameters().get(Constants.DRUID_DATA_SOURCE);
Set<String> metrics = new HashSet<>();
RexBuilder rexBuilder = cluster.getRexBuilder();
RelDataTypeFactory dtFactory = rexBuilder.getTypeFactory();
List<RelDataType> druidColTypes = new ArrayList<>();
List<String> druidColNames = new ArrayList<>();
//@TODO FIX this, we actually do not need this anymore,
// in addition to that Druid allow numeric dimensions now so this check is not accurate
for (RelDataTypeField field : rowType.getFieldList()) {
if (DruidTable.DEFAULT_TIMESTAMP_COLUMN.equals(field.getName())) {
// Druid's time column is always not null.
druidColTypes.add(dtFactory.createTypeWithNullability(field.getType(), false));
} else {
druidColTypes.add(field.getType());
}
druidColNames.add(field.getName());
if (field.getName().equals(DruidTable.DEFAULT_TIMESTAMP_COLUMN)) {
// timestamp
continue;
}
if (field.getType().getSqlTypeName() == SqlTypeName.VARCHAR) {
// dimension
continue;
}
metrics.add(field.getName());
}
List<Interval> intervals = Arrays.asList(DruidTable.DEFAULT_INTERVAL);
rowType = dtFactory.createStructType(druidColTypes, druidColNames);
DruidTable druidTable = new DruidTable(new DruidSchema(address, address, false),
dataSource, RelDataTypeImpl.proto(rowType), metrics, DruidTable.DEFAULT_TIMESTAMP_COLUMN,
intervals, null, null);
optTable = new RelOptHiveTable(relOptSchema, relOptSchema.getTypeFactory(), fullyQualifiedTabName,
rowType, tabMetaData, nonPartitionColumns, partitionColumns, virtualCols, conf,
db, tabNameToTabObject, partitionCache, colStatsCache, noColsMissingStats);
final TableScan scan = new HiveTableScan(cluster, cluster.traitSetOf(HiveRelNode.CONVENTION),
optTable, null == tableAlias ? tabMetaData.getTableName() : tableAlias,
getAliasId(tableAlias, qb), HiveConf.getBoolVar(conf,
HiveConf.ConfVars.HIVE_CBO_RETPATH_HIVEOP), qb.isInsideView()
|| qb.getAliasInsideView().contains(tableAlias.toLowerCase()), tableScanTrait);
tableRel = DruidQuery.create(cluster, cluster.traitSetOf(BindableConvention.INSTANCE),
optTable, druidTable, ImmutableList.of(scan), DruidSqlOperatorConverter.getDefaultMap());
} else {
optTable = new RelOptHiveTable(relOptSchema, relOptSchema.getTypeFactory(), fullyQualifiedTabName,
rowType, tabMetaData, nonPartitionColumns, partitionColumns, virtualCols, conf,
db, tabNameToTabObject, partitionCache, colStatsCache, noColsMissingStats);
final HiveTableScan hts = new HiveTableScan(cluster,
cluster.traitSetOf(HiveRelNode.CONVENTION), optTable,
null == tableAlias ? tabMetaData.getTableName() : tableAlias,
getAliasId(tableAlias, qb),
HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_CBO_RETPATH_HIVEOP),
qb.isInsideView() || qb.getAliasInsideView().contains(tableAlias.toLowerCase()), tableScanTrait);
final String dataBaseType = tabMetaData.getProperty(Constants.JDBC_DATABASE_TYPE);
final String url = tabMetaData.getProperty(Constants.JDBC_URL);
final String driver = tabMetaData.getProperty(Constants.JDBC_DRIVER);
final String user = tabMetaData.getProperty(Constants.JDBC_USERNAME);
final String pswd;
if (tabMetaData.getProperty(Constants.JDBC_PASSWORD) != null) {
pswd = tabMetaData.getProperty(Constants.JDBC_PASSWORD);
} else if (tabMetaData.getProperty(Constants.JDBC_KEYSTORE) != null) {
String keystore = tabMetaData.getProperty(Constants.JDBC_KEYSTORE);
String key = tabMetaData.getProperty(Constants.JDBC_KEY);
pswd = Utilities.getPasswdFromKeystore(keystore, key);
} else if (tabMetaData.getProperty(Constants.JDBC_PASSWORD_URI) != null) {
pswd = Utilities.getPasswdFromUri(tabMetaData.getProperty(Constants.JDBC_PASSWORD_URI));
} else {
pswd = null;
LOG.warn("No password found for accessing {} table via JDBC", fullyQualifiedTabName);
}
final String catalogName = tabMetaData.getProperty(Constants.JDBC_CATALOG);
final String schemaName = tabMetaData.getProperty(Constants.JDBC_SCHEMA);
final String tableName = tabMetaData.getProperty(Constants.JDBC_TABLE);
DataSource ds = JdbcSchema.dataSource(url, driver, user, pswd);
SqlDialect jdbcDialect = JdbcSchema.createDialect(SqlDialectFactoryImpl.INSTANCE, ds);
String dialectName = jdbcDialect.getClass().getName();
if (LOG.isDebugEnabled()) {
LOG.debug("Dialect for table {}: {}", tableName, dialectName);
}
List<String> jdbcConventionKey = ImmutableNullableList.of(url, driver, user, pswd, dialectName, dataBaseType);
jdbcConventionMap.putIfAbsent(jdbcConventionKey, JdbcConvention.of(jdbcDialect, null, dataBaseType));
JdbcConvention jc = jdbcConventionMap.get(jdbcConventionKey);
List<String> schemaKey = ImmutableNullableList.of(url, driver, user, pswd, dialectName, dataBaseType,
catalogName, schemaName);
schemaMap.putIfAbsent(schemaKey, new JdbcSchema(ds, jc.dialect, jc, catalogName, schemaName));
JdbcSchema schema = schemaMap.get(schemaKey);
JdbcTable jt = (JdbcTable) schema.getTable(tableName);
if (jt == null) {
throw new SemanticException("Table " + tableName + " was not found in the database");
}
JdbcHiveTableScan jdbcTableRel = new JdbcHiveTableScan(cluster, optTable, jt, jc, hts);
tableRel = new HiveJdbcConverter(cluster, jdbcTableRel.getTraitSet().replace(HiveRelNode.CONVENTION),
jdbcTableRel, jc, url, user);
}
} else {
// Build row type from field <type, name>
RelDataType rowType = TypeConverter.getType(cluster, rr, null);
// Build RelOptAbstractTable
List<String> fullyQualifiedTabName = new ArrayList<>();
if (tabMetaData.getDbName() != null && !tabMetaData.getDbName().isEmpty()) {
fullyQualifiedTabName.add(tabMetaData.getDbName());
}
fullyQualifiedTabName.add(tabMetaData.getTableName());
optTable = new RelOptHiveTable(relOptSchema, relOptSchema.getTypeFactory(), fullyQualifiedTabName,
rowType, tabMetaData, nonPartitionColumns, partitionColumns, virtualCols, conf,
db, tabNameToTabObject, partitionCache, colStatsCache, noColsMissingStats);
// Build Hive Table Scan Rel
tableRel = new HiveTableScan(cluster, cluster.traitSetOf(HiveRelNode.CONVENTION), optTable,
null == tableAlias ? tabMetaData.getTableName() : tableAlias,
getAliasId(tableAlias, qb), HiveConf.getBoolVar(conf,
HiveConf.ConfVars.HIVE_CBO_RETPATH_HIVEOP), qb.isInsideView()
|| qb.getAliasInsideView().contains(tableAlias.toLowerCase()), tableScanTrait);
}
if (optTable.hasReferentialConstraints()) {
profilesCBO.add(ExtendedCBOProfile.REFERENTIAL_CONSTRAINTS);
}
// 6. Add Schema(RR) to RelNode-Schema map
ImmutableMap<String, Integer> hiveToCalciteColMap = buildHiveToCalciteColumnMap(rr);
relToHiveRR.put(tableRel, rr);
relToHiveColNameCalcitePosMap.put(tableRel, hiveToCalciteColMap);
} catch (Exception e) {
if (e instanceof SemanticException) {
throw (SemanticException) e;
} else {
throw (new RuntimeException(e));
}
}
return tableRel;
}
private boolean isNullable(String colName, NotNullConstraint notNullConstraints, PrimaryKeyInfo primaryKeyInfo) {
if (notNullConstraints != null && notNullConstraints.getNotNullConstraints().containsValue(colName)) {
return false;
}
if (primaryKeyInfo != null && primaryKeyInfo.getColNames().containsValue(colName)) {
return false;
}
return true;
}
private TableType obtainTableType(Table tabMetaData) {
if (tabMetaData.getStorageHandler() != null) {
final String storageHandlerStr = tabMetaData.getStorageHandler().toString();
if (storageHandlerStr
.equals(Constants.DRUID_HIVE_STORAGE_HANDLER_ID)) {
return TableType.DRUID;
}
if (storageHandlerStr
.equals(Constants.JDBC_HIVE_STORAGE_HANDLER_ID)) {
return TableType.JDBC;
}
}
return TableType.NATIVE;
}
private RelNode genFilterRelNode(ASTNode filterNode, RelNode srcRel,
ImmutableMap<String, Integer> outerNameToPosMap, RowResolver outerRR,
boolean useCaching) throws SemanticException {
RexNode filterExpression = genRexNode(filterNode, relToHiveRR.get(srcRel),
outerRR, null, useCaching, cluster.getRexBuilder());
return genFilterRelNode(filterExpression, srcRel, outerNameToPosMap, outerRR);
}
private RelNode genFilterRelNode(RexNode filterExpression, RelNode srcRel,
ImmutableMap<String, Integer> outerNameToPosMap, RowResolver outerRR) throws SemanticException {
if (RexUtil.isLiteral(filterExpression, false)
&& filterExpression.getType().getSqlTypeName() != SqlTypeName.BOOLEAN) {
// queries like select * from t1 where 'foo';
// Calcite's rule PushFilterThroughProject chokes on it. Arguably, we
// can insert a cast to
// boolean in such cases, but since Postgres, Oracle and MS SQL server
// fail on compile time
// for such queries, its an arcane corner case, not worth of adding that
// complexity.
throw new CalciteSemanticException("Filter expression with non-boolean return type.",
UnsupportedFeature.Filter_expression_with_non_boolean_return_type);
}
final ImmutableMap<String, Integer> hiveColNameCalcitePosMap =
this.relToHiveColNameCalcitePosMap.get(srcRel);
filterExpression = new CorrelationConverter(
new InputContext(srcRel.getRowType(), hiveColNameCalcitePosMap, relToHiveRR.get(srcRel)),
outerNameToPosMap, outerRR, subqueryId).apply(filterExpression);
RexNode factoredFilterExpression = RexUtil
.pullFactors(cluster.getRexBuilder(), filterExpression);
RelNode filterRel = new HiveFilter(cluster, cluster.traitSetOf(HiveRelNode.CONVENTION), srcRel,
HiveCalciteUtil.fixNullability(cluster.getRexBuilder(),
factoredFilterExpression, RelOptUtil.getFieldTypeList(srcRel.getRowType())));
this.relToHiveColNameCalcitePosMap.put(filterRel, hiveColNameCalcitePosMap);
relToHiveRR.put(filterRel, relToHiveRR.get(srcRel));
return filterRel;
}
/**
* Shuttle that replaces certain references with correlation variables
* if needed.
*/
private class CorrelationConverter extends RexShuttle {
private final InputContext inputContext;
private final ImmutableMap<Integer, String> outerPositionToColumnName;
private final RowResolver outerRowResolver;
private final int correlatedId;
private CorrelationConverter(InputContext inputContext,
ImmutableMap<String, Integer> outerColumnNameToPosition, RowResolver outerRowResolver,
int correlatedId) {
this.inputContext = inputContext;
this.outerPositionToColumnName = outerColumnNameToPosition == null ?
null : ImmutableBiMap.copyOf(outerColumnNameToPosition).inverse();
this.outerRowResolver = outerRowResolver;
this.correlatedId = correlatedId;
}
@Override
public RexNode visitInputRef(RexInputRef col) {
InputContext context = null;
if (inputContext.inputRowResolver == null) {
context = inputContext;
} else {
int index = col.getIndex();
String colName = inputContext.positionToColumnName.get(index);
if (colName != null) {
context = inputContext;
}
}
if(context == null) {
// we have correlated column, build data type from outer rr
RelDataType rowType;
try {
rowType = TypeConverter.getType(cluster, outerRowResolver, null);
} catch (CalciteSemanticException e) {
throw new RuntimeException("Error converting type", e);
}
int index = col.getIndex() - inputContext.inputRowType.getFieldList().size();
if (outerPositionToColumnName.get(index) == null) {
throw new RuntimeException(ErrorMsg.INVALID_COLUMN_NAME.getMsg());
}
CorrelationId colCorr = new CorrelationId(correlatedId);
RexNode corExpr = cluster.getRexBuilder().makeCorrel(rowType, colCorr);
return cluster.getRexBuilder().makeFieldAccess(corExpr, index);
}
int pos = col.getIndex();
return cluster.getRexBuilder().makeInputRef(
context.inputRowType.getFieldList().get(pos).getType(), pos);
}
}
private RelNode genLateralViewPlans(ASTNode lateralView, Map<String, RelNode> aliasToRel)
throws SemanticException {
final RexBuilder rexBuilder = this.cluster.getRexBuilder();
final RelDataTypeFactory dtFactory = this.cluster.getTypeFactory();
final String inlineFunctionName =
GenericUDTFInline.class.getAnnotation(Description.class).name();
int numChildren = lateralView.getChildCount();
assert (numChildren == 2);
// 1) Obtain input and all related data structures
ASTNode next = (ASTNode) lateralView.getChild(1);
RelNode inputRel = null;
switch (next.getToken().getType()) {
case HiveParser.TOK_TABREF:
case HiveParser.TOK_SUBQUERY:
case HiveParser.TOK_PTBLFUNCTION:
String inputTableAlias = getTableAlias(next);
inputRel = aliasToRel.get(inputTableAlias);
break;
case HiveParser.TOK_LATERAL_VIEW:
inputRel = genLateralViewPlans(next, aliasToRel);
break;
default:
throw new SemanticException(ASTErrorUtils.getMsg(
ErrorMsg.LATERAL_VIEW_INVALID_CHILD.getMsg(), lateralView));
}
// Input row resolver
RowResolver inputRR = this.relToHiveRR.get(inputRel);
// Extract input refs. They will serve as input for the function invocation
List<RexNode> inputRefs = Lists.transform(inputRel.getRowType().getFieldList(),
input -> new RexInputRef(input.getIndex(), input.getType()));
// Extract type for the arguments
List<RelDataType> inputRefsTypes = new ArrayList<>();
for (int i = 0; i < inputRefs.size(); i++) {
inputRefsTypes.add(inputRefs.get(i).getType());
}
// Input name to position map
ImmutableMap<String, Integer> inputPosMap = this.relToHiveColNameCalcitePosMap.get(inputRel);
// 2) Generate HiveTableFunctionScan RelNode for lateral view
// TODO: Support different functions (not only INLINE) with LATERAL VIEW JOIN
// ^(TOK_LATERAL_VIEW ^(TOK_SELECT ^(TOK_SELEXPR ^(TOK_FUNCTION Identifier["inline"] valuesClause) identifier* tableAlias)))
final ASTNode selExprClause =
(ASTNode) lateralView.getChild(0).getChild(0);
final ASTNode functionCall =
(ASTNode) selExprClause.getChild(0);
if (functionCall.getChild(0).getText().compareToIgnoreCase(inlineFunctionName) != 0) {
throw new SemanticException("CBO only supports inline LVJ");
}
final ASTNode valuesClause =
(ASTNode) functionCall.getChild(1);
// Output types. They will be the concatenation of the input refs types and
// the types of the expressions for the lateral view generated rows
// Generate all expressions from lateral view
RexCall valuesExpr = (RexCall) genRexNode(
valuesClause, inputRR, false, false, cluster.getRexBuilder());
RelDataType valuesRowType = valuesExpr.getType().getComponentType();
List<RexNode> newStructExprs = new ArrayList<>();
for (RexNode structExpr : valuesExpr.getOperands()) {
RexCall structCall = (RexCall) structExpr;
List<RexNode> exprs = new ArrayList<>(inputRefs);
exprs.addAll(structCall.getOperands());
newStructExprs.add(rexBuilder.makeCall(structCall.op, exprs));
}
RexNode convertedFinalValuesExpr =
rexBuilder.makeCall(valuesExpr.op, newStructExprs);
// The return type will be the concatenation of input type and original values type
RelDataType retType = SqlValidatorUtil.deriveJoinRowType(inputRel.getRowType(),
valuesRowType, JoinRelType.INNER, dtFactory, null, ImmutableList.of());
// Create inline SQL operator
FunctionInfo inlineFunctionInfo = FunctionRegistry.getFunctionInfo(inlineFunctionName);
SqlOperator calciteOp = SqlFunctionConverter.getCalciteOperator(
inlineFunctionName, inlineFunctionInfo.getGenericUDTF(),
ImmutableList.copyOf(inputRefsTypes), retType);
RelNode htfsRel = HiveTableFunctionScan.create(cluster, TraitsUtil.getDefaultTraitSet(cluster),
ImmutableList.of(inputRel), rexBuilder.makeCall(calciteOp, convertedFinalValuesExpr),
null, retType, null);
// 3) Keep track of colname-to-posmap && RR for new op
RowResolver outputRR = new RowResolver();
// Add all input columns
if (!RowResolver.add(outputRR, inputRR)) {
LOG.warn("Duplicates detected when adding columns to RR: see previous message");
}
// Add all columns from lateral view
// First we extract the information that the query provides
String tableAlias = null;
List<String> columnAliases = new ArrayList<>();
Set<String> uniqueNames = new HashSet<>();
for (int i = 1; i < selExprClause.getChildren().size(); i++) {
ASTNode child = (ASTNode) selExprClause.getChild(i);
switch (child.getToken().getType()) {
case HiveParser.TOK_TABALIAS:
tableAlias = unescapeIdentifier(child.getChild(0).getText());
break;
default:
String colAlias = unescapeIdentifier(child.getText());
if (uniqueNames.contains(colAlias)) {
// Column aliases defined by query for lateral view output are duplicated
throw new SemanticException(ErrorMsg.COLUMN_ALIAS_ALREADY_EXISTS.getMsg(colAlias));
}
columnAliases.add(colAlias);
uniqueNames.add(colAlias);
}
}
if (tableAlias == null) {
// Parser enforces that table alias is added, but check again
throw new SemanticException("Alias should be specified LVJ");
}
if (!columnAliases.isEmpty() &&
columnAliases.size() != valuesRowType.getFieldCount()) {
// Number of columns in the aliases does not match with number of columns
// generated by the lateral view
throw new SemanticException(ErrorMsg.UDTF_ALIAS_MISMATCH.getMsg());
}
if (columnAliases.isEmpty()) {
// Auto-generate column aliases
for (int i = 0; i < valuesRowType.getFieldCount(); i++) {
columnAliases.add(SemanticAnalyzer.getColumnInternalName(i));
}
}
ListTypeInfo listTypeInfo = (ListTypeInfo) TypeConverter.convert(valuesExpr.getType()); // Array should have ListTypeInfo
StructTypeInfo typeInfos = (StructTypeInfo) listTypeInfo.getListElementTypeInfo(); // Within the list, we extract types
for (int i = 0, j = 0; i < columnAliases.size(); i++) {
String internalColName;
do {
internalColName = SemanticAnalyzer.getColumnInternalName(j++);
} while (inputRR.getPosition(internalColName) != -1);
outputRR.put(tableAlias, columnAliases.get(i),
new ColumnInfo(internalColName, typeInfos.getAllStructFieldTypeInfos().get(i),
tableAlias, false));
}
this.relToHiveColNameCalcitePosMap.put(htfsRel, buildHiveToCalciteColumnMap(outputRR));
this.relToHiveRR.put(htfsRel, outputRR);
// 4) Return new operator
return htfsRel;
}
private boolean genSubQueryRelNode(QB qb, ASTNode node, RelNode srcRel, boolean forHavingClause,
Map<ASTNode, QBSubQueryParseInfo> subQueryToRelNode)
throws CalciteSubquerySemanticException {
Set<ASTNode> corrScalarQueriesWithAgg = new HashSet<ASTNode>();
boolean isSubQuery = false;
boolean enableJoinReordering = false;
try {
Deque<ASTNode> stack = new ArrayDeque<ASTNode>();
stack.push(node);
while (!stack.isEmpty()) {
ASTNode next = stack.pop();
switch (next.getType()) {
case HiveParser.TOK_SUBQUERY_EXPR:
QBSubQueryParseInfo parseInfo = QBSubQueryParseInfo.parse(next);
if (parseInfo.hasFullAggregate() && (
parseInfo.getOperator().getType() == QBSubQuery.SubQueryType.EXISTS ||
parseInfo.getOperator().getType() == QBSubQuery.SubQueryType.NOT_EXISTS)) {
subQueryToRelNode.put(next, parseInfo);
isSubQuery = true;
break;
}
//disallow subqueries which HIVE doesn't currently support
SubQueryUtils.subqueryRestrictionCheck(qb, next, srcRel, forHavingClause,
corrScalarQueriesWithAgg, ctx, this.relToHiveRR);
String sbQueryAlias = "sq_" + qb.incrNumSubQueryPredicates();
QB qbSQ = new QB(qb.getId(), sbQueryAlias, true);
qbSQ.setInsideView(qb.isInsideView());
Phase1Ctx ctx1 = initPhase1Ctx();
ASTNode subQueryRoot = (ASTNode) next.getChild(1);
doPhase1(subQueryRoot, qbSQ, ctx1, null);
getMetaData(qbSQ);
this.subqueryId++;
RelNode subQueryRelNode =
genLogicalPlan(qbSQ, false, relToHiveColNameCalcitePosMap.get(srcRel), relToHiveRR.get(srcRel));
if (subQueryRelNode instanceof HiveProject) {
subQueryMap.put(subQueryRelNode, subQueryRoot);
}
subQueryToRelNode.put(next, parseInfo.setSubQueryRelNode(subQueryRelNode));
//keep track of subqueries which are scalar, correlated and contains aggregate
// subquery expression. This will later be special cased in Subquery remove rule
// for correlated scalar queries with aggregate we have take care of the case where
// inner aggregate happens on empty result
if (corrScalarQueriesWithAgg.contains(next)) {
corrScalarRexSQWithAgg.add(subQueryRelNode);
}
isSubQuery = true;
enableJoinReordering = true;
break;
default:
int childCount = next.getChildCount();
for (int i = childCount - 1; i >= 0; i--) {
stack.push((ASTNode) next.getChild(i));
}
}
}
} catch (SemanticException e) {
throw new CalciteSubquerySemanticException(e.getMessage());
}
if (enableJoinReordering) {
// since subqueries will later be rewritten into JOINs we want join reordering logic to trigger
profilesCBO.add(ExtendedCBOProfile.JOIN_REORDERING);
}
return isSubQuery;
}
private RelNode genFilterRelNode(QB qb, ASTNode searchCond, RelNode srcRel,
ImmutableMap<String, Integer> outerNameToPosMap, RowResolver outerRR, boolean forHavingClause)
throws SemanticException {
final Map<ASTNode, QBSubQueryParseInfo> subQueryToRelNode = new HashMap<>();
boolean isSubQuery = genSubQueryRelNode(qb, searchCond, srcRel, forHavingClause, subQueryToRelNode);
if(isSubQuery) {
RexNode filterExpression = genRexNode(searchCond, relToHiveRR.get(srcRel),
outerRR, subQueryToRelNode, forHavingClause, cluster.getRexBuilder());
ImmutableMap<String, Integer> hiveColNameCalcitePosMap = this.relToHiveColNameCalcitePosMap
.get(srcRel);
filterExpression = new CorrelationConverter(
new InputContext(srcRel.getRowType(), hiveColNameCalcitePosMap, relToHiveRR.get(srcRel)),
outerNameToPosMap, outerRR, subqueryId).apply(filterExpression);
RelNode filterRel = new HiveFilter(cluster, cluster.traitSetOf(HiveRelNode.CONVENTION),
srcRel, filterExpression);
relToHiveColNameCalcitePosMap.put(filterRel, relToHiveColNameCalcitePosMap.get(srcRel));
relToHiveRR.put(filterRel, relToHiveRR.get(srcRel));
return filterRel;
} else {
return genFilterRelNode(searchCond, srcRel, outerNameToPosMap, outerRR, forHavingClause);
}
}
private RelNode genFilterLogicalPlan(QB qb, RelNode srcRel, ImmutableMap<String, Integer> outerNameToPosMap,
RowResolver outerRR, boolean forHavingClause) throws SemanticException {
RelNode filterRel = null;
Iterator<ASTNode> whereClauseIterator = getQBParseInfo(qb).getDestToWhereExpr().values()
.iterator();
if (whereClauseIterator.hasNext()) {
filterRel = genFilterRelNode(qb, (ASTNode) whereClauseIterator.next().getChild(0), srcRel,
outerNameToPosMap, outerRR, forHavingClause);
}
return filterRel;
}
/**
* This method creates a HiveFilter containing a filter expression to enforce constraints.
* Constraints to check: not null, check
* The return value is the pair of Constraint HiveFilter and the corresponding RowResolver
* or null if the target has no constraint defined or all of them are disabled.
*/
private Pair<RelNode, RowResolver> genConstraintFilterLogicalPlan(
QB qb, Pair<RelNode, RowResolver> selPair, ImmutableMap<String, Integer> outerNameToPosMap, RowResolver outerRR)
throws SemanticException {
if (qb.getIsQuery()) {
return null;
}
String dest = qb.getParseInfo().getClauseNames().iterator().next();
if (!updating(dest)) {
return null;
}
RowResolver inputRR = relToHiveRR.get(selPair.left);
RexNode constraintUDF = RexNodeTypeCheck.genConstraintsExpr(
conf, cluster.getRexBuilder(), getTargetTable(qb, dest), updating(dest), inputRR);
if (constraintUDF == null) {
return null;
}
RelNode constraintRel = genFilterRelNode(constraintUDF, selPair.left, outerNameToPosMap, outerRR);
List<RexNode> originalInputRefs = toRexNodeList(selPair.left);
List<RexNode> selectedRefs = originalInputRefs.subList(0, selPair.right.getColumnInfos().size());
return new Pair<>(genSelectRelNode(selectedRefs, selPair.right, constraintRel), selPair.right);
}
private AggregateCall convertGBAgg(AggregateInfo agg, List<RexNode> gbChildProjLst,
HashMap<String, Integer> rexNodeToPosMap, Integer childProjLstIndx) throws SemanticException {
// 1. Get agg fn ret type in Calcite
RelDataType aggFnRetType = TypeConverter.convert(agg.getReturnType(),
this.cluster.getTypeFactory());
// 2. Convert Agg Fn args and type of args to Calcite
List<Integer> argList = new ArrayList<>();
ImmutableList.Builder<RelDataType> aggArgRelDTBldr = ImmutableList.builder();
for (RexNode rexNd : agg.getParameters()) {
Integer inputIndx = rexNodeToPosMap.get(rexNd.toString());
if (inputIndx == null) {
gbChildProjLst.add(rexNd);
rexNodeToPosMap.put(rexNd.toString(), childProjLstIndx);
inputIndx = childProjLstIndx;
childProjLstIndx++;
}
argList.add(inputIndx);
aggArgRelDTBldr.add(rexNd.getType());
}
// 3. Get Aggregation FN from Calcite given name, ret type and input arg
// type
final SqlAggFunction aggregation = SqlFunctionConverter.getCalciteAggFn(agg.getAggregateName(), agg.isDistinct(),
aggArgRelDTBldr.build(), aggFnRetType);
return new AggregateCall(aggregation, agg.isDistinct(), argList, aggFnRetType, null);
}
private RelNode genGBRelNode(List<RexNode> gbExprs, List<AggregateInfo> aggInfoLst,
List<Long> groupSets, RelNode srcRel) throws SemanticException {
final boolean hasGroupSets = groupSets != null && !groupSets.isEmpty();
final List<RexNode> gbChildProjLst = Lists.newArrayList();
final HashMap<String, Integer> rexNodeToPosMap = new HashMap<>();
final List<Integer> groupSetPositions = Lists.newArrayList();
Integer gbIndx = 0;
for (RexNode gbExpr : gbExprs) {
gbChildProjLst.add(gbExpr);
groupSetPositions.add(gbIndx);
rexNodeToPosMap.put(gbExpr.toString(), gbIndx);
gbIndx++;
}
final ImmutableBitSet groupSet = ImmutableBitSet.of(groupSetPositions);
// Grouping sets: we need to transform them into ImmutableBitSet
// objects for Calcite
List<ImmutableBitSet> transformedGroupSets = null;
if(hasGroupSets) {
Set<ImmutableBitSet> setTransformedGroupSets =
new HashSet<>(groupSets.size());
for(long val: groupSets) {
setTransformedGroupSets.add(convert(val, groupSet.cardinality()));
}
// Calcite expects the grouping sets sorted and without duplicates
transformedGroupSets = new ArrayList<>(setTransformedGroupSets);
Collections.sort(transformedGroupSets, ImmutableBitSet.COMPARATOR);
}
List<AggregateCall> aggregateCalls = Lists.newArrayList();
for (AggregateInfo agg : aggInfoLst) {
aggregateCalls.add(
convertGBAgg(agg, gbChildProjLst, rexNodeToPosMap, gbChildProjLst.size()));
}
if (hasGroupSets) {
// Create GroupingID column
AggregateCall aggCall = AggregateCall.create(HiveGroupingID.INSTANCE,
false, new ImmutableList.Builder<Integer>().build(), -1,
this.cluster.getTypeFactory().createSqlType(SqlTypeName.BIGINT),
HiveGroupingID.INSTANCE.getName());
aggregateCalls.add(aggCall);
}
if (gbChildProjLst.isEmpty()) {
// This will happen for count(*), in such cases we arbitrarily pick
// first element from srcRel
gbChildProjLst.add(this.cluster.getRexBuilder().makeInputRef(srcRel, 0));
}
// Create input project fixing up nullability of inputs
RelNode gbInputRel = HiveProject.create(
srcRel,
HiveCalciteUtil.fixNullability(cluster.getRexBuilder(), gbChildProjLst, RelOptUtil.getFieldTypeList(srcRel.getRowType())),
null);
HiveRelNode aggregateRel = new HiveAggregate(cluster, cluster.traitSetOf(HiveRelNode.CONVENTION),
gbInputRel, groupSet, transformedGroupSets, aggregateCalls);
return aggregateRel;
}
/* This method returns the flip big-endian representation of value */
private ImmutableBitSet convert(long value, int length) {
BitSet bits = new BitSet();
for (int index = length - 1; index >= 0; index--) {
if (value % 2 != 0) {
bits.set(index);
}
value = value >>> 1;
}
// We flip the bits because Calcite considers that '1'
// means that the column participates in the GroupBy
// and '0' does not, as opposed to grouping_id.
bits.flip(0, length);
return ImmutableBitSet.FROM_BIT_SET.apply(bits);
}
private void addAlternateGByKeyMappings(ASTNode gByExpr, ColumnInfo colInfo,
RowResolver gByInputRR, RowResolver gByRR) {
if (gByExpr.getType() == HiveParser.DOT
&& gByExpr.getChild(0).getType() == HiveParser.TOK_TABLE_OR_COL) {
String tab_alias = unescapeIdentifier(gByExpr.getChild(0).getChild(0)
.getText().toLowerCase());
String col_alias = unescapeIdentifier(gByExpr.getChild(1).getText().toLowerCase());
gByRR.put(tab_alias, col_alias, colInfo);
} else if (gByExpr.getType() == HiveParser.TOK_TABLE_OR_COL) {
String col_alias = unescapeIdentifier(gByExpr.getChild(0).getText().toLowerCase());
String tab_alias = null;
/*
* If the input to the GBy has a tab alias for the column, then add an
* entry based on that tab_alias. For e.g. this query: select b.x,
* count(*) from t1 b group by x needs (tab_alias=b, col_alias=x) in the
* GBy RR. tab_alias=b comes from looking at the RowResolver that is the
* ancestor before any GBy/ReduceSinks added for the GBY operation.
*/
try {
ColumnInfo pColInfo = gByInputRR.get(tab_alias, col_alias);
tab_alias = pColInfo == null ? null : pColInfo.getTabAlias();
} catch (SemanticException se) {
}
gByRR.put(tab_alias, col_alias, colInfo);
}
}
private void addToGBExpr(RowResolver groupByOutputRowResolver,
RowResolver groupByInputRowResolver, ASTNode grpbyExpr, RexNode grpbyExprNDesc,
List<RexNode> gbExprNDescLst, List<String> outputColumnNames) {
int i = gbExprNDescLst.size();
String field = SemanticAnalyzer.getColumnInternalName(i);
outputColumnNames.add(field);
gbExprNDescLst.add(grpbyExprNDesc);
ColumnInfo oColInfo = new ColumnInfo(field, TypeConverter.convert(grpbyExprNDesc.getType()), null, false);
groupByOutputRowResolver.putExpression(grpbyExpr, oColInfo);
addAlternateGByKeyMappings(grpbyExpr, oColInfo, groupByInputRowResolver,
groupByOutputRowResolver);
}
private AggregateInfo getHiveAggInfo(ASTNode aggAst, int aggFnLstArgIndx, RowResolver inputRR)
throws SemanticException {
List<RexNode> aggParameters = new ArrayList<>();
for (int i = 1; i <= aggFnLstArgIndx; i++) {
RexNode parameterExpr = genRexNode(
(ASTNode) aggAst.getChild(i), inputRR, cluster.getRexBuilder());
aggParameters.add(parameterExpr);
}
boolean isDistinct = aggAst.getType() == HiveParser.TOK_FUNCTIONDI;
boolean isAllColumns = aggAst.getType() == HiveParser.TOK_FUNCTIONSTAR;
String aggName = unescapeIdentifier(aggAst.getChild(0).getText());
AggregateInfo aInfo = functionHelper.getWindowAggregateFunctionInfo(
isDistinct, isAllColumns, aggName, aggParameters);
// If that did not work, try GenericUDF translation
if (aInfo == null) {
TypeCheckCtx tcCtx = new TypeCheckCtx(inputRR, cluster.getRexBuilder());
// We allow stateful functions in the SELECT list (but nowhere else)
tcCtx.setAllowStatefulFunctions(true);
tcCtx.setAllowDistinctFunctions(false);
RexNode exp = genRexNode((ASTNode) aggAst.getChild(0), inputRR, tcCtx);
aInfo = new AggregateInfo(
aggParameters, TypeConverter.convert(exp.getType()), aggName, isDistinct);
}
return aInfo;
}
/**
* Generate a group by plan.
*/
private RelNode genGBLogicalPlan(QB qb, RelNode srcRel) throws SemanticException {
RelNode groupByRel = null;
QBParseInfo qbp = getQBParseInfo(qb);
// 1. Gather GB Expressions (AST) (GB + Aggregations)
// NOTE: Multi Insert is not supported
String destClauseName = qbp.getClauseNames().iterator().next();
// Check and transform group by *. This will only happen for select distinct *.
// Here the "genSelectPlan" is being leveraged.
// The main benefits are (1) remove virtual columns that should
// not be included in the group by; (2) add the fully qualified column names to unParseTranslator
// so that view is supported. The drawback is that an additional SEL op is added. If it is
// not necessary, it will be removed by NonBlockingOpDeDupProc Optimizer because it will match
// SEL%SEL% rule.
ASTNode selExprList = qb.getParseInfo().getSelForClause(destClauseName);
SubQueryUtils.checkForTopLevelSubqueries(selExprList);
if (selExprList.getToken().getType() == HiveParser.TOK_SELECTDI
&& selExprList.getChildCount() == 1 && selExprList.getChild(0).getChildCount() == 1) {
ASTNode node = (ASTNode) selExprList.getChild(0).getChild(0);
if (node.getToken().getType() == HiveParser.TOK_ALLCOLREF) {
// As we said before, here we use genSelectLogicalPlan to rewrite AllColRef
srcRel = genSelectLogicalPlan(qb, srcRel, srcRel, null, null, true).getKey();
RowResolver rr = relToHiveRR.get(srcRel);
qbp.setSelExprForClause(destClauseName, genSelectDIAST(rr));
}
}
// Select DISTINCT + windowing; GBy handled by genSelectForWindowing
if (selExprList.getToken().getType() == HiveParser.TOK_SELECTDI &&
!qb.getAllWindowingSpecs().isEmpty()) {
return null;
}
List<ASTNode> groupByNodes = getGroupByForClause(qbp, destClauseName);
Map<String, ASTNode> aggregationTrees = qbp.getAggregationExprsForClause(destClauseName);
boolean hasGrpByAstExprs = groupByNodes != null && !groupByNodes.isEmpty();
boolean hasAggregationTrees = aggregationTrees != null && !aggregationTrees.isEmpty();
final boolean cubeRollupGrpSetPresent = (!qbp.getDestRollups().isEmpty()
|| !qbp.getDestGroupingSets().isEmpty() || !qbp.getDestCubes().isEmpty());
// 2. Sanity check
if (conf.getBoolVar(HiveConf.ConfVars.HIVEGROUPBYSKEW)
&& qbp.getDistinctFuncExprsForClause(destClauseName).size() > 1) {
throw new SemanticException(ErrorMsg.UNSUPPORTED_MULTIPLE_DISTINCTS.getMsg());
}
if (cubeRollupGrpSetPresent) {
if (!HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVEMAPSIDEAGGREGATE)) {
throw new SemanticException(ErrorMsg.HIVE_GROUPING_SETS_AGGR_NOMAPAGGR.getMsg());
}
if (conf.getBoolVar(HiveConf.ConfVars.HIVEGROUPBYSKEW)) {
if (qbp.getDestGroupingSets().size() > conf
.getIntVar(HiveConf.ConfVars.HIVE_NEW_JOB_GROUPING_SET_CARDINALITY)) {
String errorMsg = "The number of rows per input row due to grouping sets is "
+ qbp.getDestGroupingSets().size();
throw new SemanticException(
ErrorMsg.HIVE_GROUPING_SETS_THRESHOLD_NOT_ALLOWED_WITH_SKEW.getMsg(errorMsg));
}
}
}
if (hasGrpByAstExprs || hasAggregationTrees) {
List<RexNode> groupByExpressions = new ArrayList<>();
List<String> outputColumnNames = new ArrayList<>();
// 3. Input, Output Row Resolvers
RowResolver groupByInputRowResolver = this.relToHiveRR.get(srcRel);
RowResolver groupByOutputRowResolver = new RowResolver();
groupByOutputRowResolver.setIsExprResolver(true);
if (hasGrpByAstExprs) {
// 4. Construct GB Keys (ExprNode)
for (int i = 0; i < groupByNodes.size(); ++i) {
ASTNode groupByNode = groupByNodes.get(i);
Map<ASTNode, RexNode> astToRexNodeMap = genAllRexNode(
groupByNode, groupByInputRowResolver, cluster.getRexBuilder());
RexNode groupByExpression = astToRexNodeMap.get(groupByNode);
if (groupByExpression == null) {
throw new CalciteSemanticException("Invalid Column Reference: " + groupByNode.dump(),
UnsupportedFeature.Invalid_column_reference);
}
addToGBExpr(groupByOutputRowResolver, groupByInputRowResolver, groupByNode,
groupByExpression, groupByExpressions, outputColumnNames);
}
}
// 5. GroupingSets, Cube, Rollup
int groupingColsSize = groupByExpressions.size();
List<Long> groupingSets = null;
if (cubeRollupGrpSetPresent) {
groupingSets = getGroupByGroupingSetsForClause(qbp, destClauseName).getRight();
}
// 6. Construct aggregation function Info
ArrayList<AggregateInfo> aggregations = new ArrayList<AggregateInfo>();
if (hasAggregationTrees) {
assert (aggregationTrees != null);
for (ASTNode value : aggregationTrees.values()) {
// 6.1 Determine type of UDAF
// This is the GenericUDAF name
String aggName = unescapeIdentifier(value.getChild(0).getText());
boolean isDistinct = value.getType() == HiveParser.TOK_FUNCTIONDI;
boolean isAllColumns = value.getType() == HiveParser.TOK_FUNCTIONSTAR;
// 6.2 Convert UDAF Params to ExprNodeDesc
List<RexNode> aggParameters = new ArrayList<>();
for (int i = 1; i < value.getChildCount(); i++) {
RexNode parameterExpr = genRexNode(
(ASTNode) value.getChild(i), groupByInputRowResolver, cluster.getRexBuilder());
aggParameters.add(parameterExpr);
}
AggregateInfo aInfo = functionHelper.getAggregateFunctionInfo(
isDistinct, isAllColumns, aggName, aggParameters);
aggregations.add(aInfo);
String field = getColumnInternalName(groupingColsSize + aggregations.size() - 1);
outputColumnNames.add(field);
groupByOutputRowResolver.putExpression(value,
new ColumnInfo(field, aInfo.getReturnType(), "", false));
}
}
// 7. If GroupingSets, Cube, Rollup were used, we account grouping__id
if(groupingSets != null && !groupingSets.isEmpty()) {
String field = getColumnInternalName(groupingColsSize + aggregations.size());
outputColumnNames.add(field);
groupByOutputRowResolver.put(null, VirtualColumn.GROUPINGID.getName(),
new ColumnInfo(
field,
VirtualColumn.GROUPINGID.getTypeInfo(),
null,
true));
}
// 8. We create the group_by operator
groupByRel = genGBRelNode(groupByExpressions, aggregations, groupingSets, srcRel);
relToHiveColNameCalcitePosMap.put(groupByRel, buildHiveToCalciteColumnMap(groupByOutputRowResolver));
relToHiveRR.put(groupByRel, groupByOutputRowResolver);
}
return groupByRel;
}
/**
* Generate OB RelNode and input Select RelNode that should be used to
* introduce top constraining Project. If Input select RelNode is not
* present then don't introduce top constraining select.
*
* @param qb
* @param selPair
* @param outermostOB
* @return RelNode OB RelNode
* @throws SemanticException
*/
private RelNode genOBLogicalPlan(QB qb, Pair<RelNode, RowResolver> selPair,
boolean outermostOB) throws SemanticException {
QBParseInfo qbp = getQBParseInfo(qb);
String dest = qbp.getClauseNames().iterator().next();
ASTNode obAST = qbp.getOrderByForClause(dest);
if (obAST == null) {
return null;
}
// 1. OB Expr sanity test
// in strict mode, in the presence of order by, limit must be
// specified
Integer limit = qb.getParseInfo().getDestLimit(dest);
if (limit == null) {
String error = StrictChecks.checkNoLimit(conf);
if (error != null) {
throw new SemanticException(SemanticAnalyzer.generateErrorMessage(obAST, error));
}
}
OBLogicalPlanGenState obLogicalPlanGenState = beginGenOBLogicalPlan(obAST, selPair, outermostOB);
// 4. Construct SortRel
RelTraitSet traitSet = cluster.traitSetOf(HiveRelNode.CONVENTION);
RelCollation canonizedCollation = traitSet.canonize(
RelCollationImpl.of(obLogicalPlanGenState.getFieldCollation()));
RelNode sortRel;
if (limit != null) {
Integer offset = qb.getParseInfo().getDestLimitOffset(dest);
RexNode offsetRN = (offset == null || offset == 0) ?
null : cluster.getRexBuilder().makeExactLiteral(BigDecimal.valueOf(offset));
RexNode fetchRN = cluster.getRexBuilder().makeExactLiteral(BigDecimal.valueOf(limit));
sortRel = new HiveSortLimit(cluster, traitSet, obLogicalPlanGenState.getObInputRel(), canonizedCollation,
offsetRN, fetchRN);
} else {
sortRel = new HiveSortLimit(cluster, traitSet, obLogicalPlanGenState.getObInputRel(), canonizedCollation,
null, null);
}
return endGenOBLogicalPlan(obLogicalPlanGenState, sortRel);
}
private RelNode genSBLogicalPlan(QB qb, Pair<RelNode, RowResolver> selPair,
boolean outermostOB) throws SemanticException {
QBParseInfo qbp = getQBParseInfo(qb);
String dest = qbp.getClauseNames().iterator().next();
ASTNode sbAST = qbp.getSortByForClause(dest);
if (sbAST == null) {
return null;
}
OBLogicalPlanGenState obLogicalPlanGenState = beginGenOBLogicalPlan(sbAST, selPair, outermostOB);
// 4. Construct SortRel
RelTraitSet traitSet = cluster.traitSetOf(HiveRelNode.CONVENTION);
RelCollation canonizedCollation =
traitSet.canonize(RelCollationImpl.of(obLogicalPlanGenState.getFieldCollation()));
List<Integer> joinKeyPositions = new ArrayList<>(canonizedCollation.getFieldCollations().size());
ImmutableList.Builder<RexNode> builder = ImmutableList.builder();
for (RelFieldCollation relFieldCollation : canonizedCollation.getFieldCollations()) {
int index = relFieldCollation.getFieldIndex();
joinKeyPositions.add(index);
builder.add(cluster.getRexBuilder().makeInputRef(obLogicalPlanGenState.getObInputRel(), index));
}
RelNode sortRel = HiveSortExchange.create(
obLogicalPlanGenState.getObInputRel(),
// In case of SORT BY we do not need Distribution
// but the instance RelDistributions.ANY can not be used here because
// org.apache.calcite.rel.core.Exchange has
// assert distribution != RelDistributions.ANY;
new HiveRelDistribution(RelDistribution.Type.ANY, RelDistributions.ANY.getKeys()),
canonizedCollation,
builder.build());
return endGenOBLogicalPlan(obLogicalPlanGenState, sortRel);
}
// - Walk through OB exprs and extract field collations and additional virtual columns needed
// - Add Child Project Rel if needed,
// - Generate Output RR, input Sel Rel for top constraining Sel
private OBLogicalPlanGenState beginGenOBLogicalPlan(
ASTNode obAST, Pair<RelNode, RowResolver> selPair, boolean outermostOB) throws SemanticException {
// selPair.getKey() is the operator right before OB
// selPair.getValue() is RR which only contains columns needed in result
// set. Extra columns needed by order by will be absent from it.
RelNode srcRel = selPair.getKey();
RowResolver selectOutputRR = selPair.getValue();
// 2. Walk through OB exprs and extract field collations and additional
// virtual columns needed
final List<RexNode> newVCLst = new ArrayList<>();
final List<RelFieldCollation> fieldCollations = Lists.newArrayList();
int fieldIndex = 0;
List<Node> obASTExprLst = obAST.getChildren();
List<Pair<ASTNode, TypeInfo>> vcASTTypePairs = new ArrayList<>();
RowResolver inputRR = relToHiveRR.get(srcRel);
RowResolver outputRR = new RowResolver();
int srcRelRecordSz = srcRel.getRowType().getFieldCount();
for (int i = 0; i < obASTExprLst.size(); i++) {
// 2.1 Convert AST Expr to ExprNode
ASTNode orderByNode = (ASTNode) obASTExprLst.get(i);
ASTNode nullObASTExpr = (ASTNode) orderByNode.getChild(0);
ASTNode ref = (ASTNode) nullObASTExpr.getChild(0);
boolean isBothByPos = HiveConf.getBoolVar(conf, ConfVars.HIVE_GROUPBY_ORDERBY_POSITION_ALIAS);
boolean isObyByPos = isBothByPos
|| HiveConf.getBoolVar(conf, ConfVars.HIVE_ORDERBY_POSITION_ALIAS);
// replace each of the position alias in ORDERBY with the actual column
if (ref != null && ref.getToken().getType() == HiveParser.Number) {
if (isObyByPos) {
fieldIndex = getFieldIndexFromColumnNumber(selectOutputRR, ref);
} else { // if not using position alias and it is a number.
LOG.warn("Using constant number "
+ ref.getText()
+ " in order by. If you try to use position alias when hive.orderby.position.alias is false, " +
"the position alias will be ignored.");
}
} else {
// 2.2 Convert ExprNode to RexNode
RexNode orderByExpression = getOrderByExpression(selectOutputRR, inputRR, orderByNode, ref);
// 2.3 Determine the index of ob expr in child schema
// NOTE: Calcite can not take compound exprs in OB without it being
// present in the child (& hence we add a child Project Rel)
if (orderByExpression instanceof RexInputRef) {
fieldIndex = ((RexInputRef) orderByExpression).getIndex();
} else {
fieldIndex = srcRelRecordSz + newVCLst.size();
newVCLst.add(orderByExpression);
vcASTTypePairs.add(new Pair<>(ref, TypeConverter.convert(orderByExpression.getType())));
}
}
// 2.4 Determine the Direction of order by
RelFieldCollation.Direction order = RelFieldCollation.Direction.DESCENDING;
if (orderByNode.getType() == HiveParser.TOK_TABSORTCOLNAMEASC) {
order = RelFieldCollation.Direction.ASCENDING;
}
RelFieldCollation.NullDirection nullOrder;
if (nullObASTExpr.getType() == HiveParser.TOK_NULLS_FIRST) {
nullOrder = RelFieldCollation.NullDirection.FIRST;
} else if (nullObASTExpr.getType() == HiveParser.TOK_NULLS_LAST) {
nullOrder = RelFieldCollation.NullDirection.LAST;
} else {
throw new SemanticException("Unexpected null ordering option: "
+ nullObASTExpr.getType());
}
// 2.5 Add to field collations
fieldCollations.add(new RelFieldCollation(fieldIndex, order, nullOrder));
}
// 3. Add Child Project Rel if needed, Generate Output RR, input Sel Rel
// for top constraining Sel
RelNode obInputRel = srcRel;
if (!newVCLst.isEmpty()) {
List<RexNode> originalInputRefs = toRexNodeList(srcRel);
RowResolver obSyntheticProjectRR = new RowResolver();
if (!RowResolver.add(obSyntheticProjectRR, inputRR)) {
throw new CalciteSemanticException(
"Duplicates detected when adding columns to RR: see previous message",
UnsupportedFeature.Duplicates_in_RR);
}
int vcolPos = inputRR.getRowSchema().getSignature().size();
for (Pair<ASTNode, TypeInfo> astTypePair : vcASTTypePairs) {
obSyntheticProjectRR.putExpression(astTypePair.getKey(), new ColumnInfo(
SemanticAnalyzer.getColumnInternalName(vcolPos), astTypePair.getValue(), null,
false));
vcolPos++;
}
obInputRel = genSelectRelNode(CompositeList.of(originalInputRefs, newVCLst),
obSyntheticProjectRR, srcRel);
if (outermostOB) {
if (!RowResolver.add(outputRR, inputRR)) {
throw new CalciteSemanticException(
"Duplicates detected when adding columns to RR: see previous message",
UnsupportedFeature.Duplicates_in_RR);
}
} else {
if (!RowResolver.add(outputRR, obSyntheticProjectRR)) {
throw new CalciteSemanticException(
"Duplicates detected when adding columns to RR: see previous message",
UnsupportedFeature.Duplicates_in_RR);
}
}
} else {
if (!RowResolver.add(outputRR, inputRR)) {
throw new CalciteSemanticException(
"Duplicates detected when adding columns to RR: see previous message",
UnsupportedFeature.Duplicates_in_RR);
}
}
return new OBLogicalPlanGenState(obInputRel, fieldCollations, selectOutputRR, outputRR, srcRel);
}
private RexNode getOrderByExpression(
RowResolver selectOutputRR, RowResolver inputRR, ASTNode orderByNode, ASTNode ref)
throws SemanticException {
// first try to get it from select
// in case of udtf, selectOutputRR may be null.
RexNode orderByExpression = null;
if (selectOutputRR != null) {
try {
Map<ASTNode, RexNode> astToExprNDescMap = genAllRexNode(ref, selectOutputRR, cluster.getRexBuilder());
orderByExpression = astToExprNDescMap.get(ref);
} catch (SemanticException ex) {
// we can tolerate this as this is the previous behavior
LOG.debug("Can not find column in " + ref.getText() + ". The error msg is "
+ ex.getMessage());
}
}
// then try to get it from all
if (orderByExpression == null) {
Map<ASTNode, RexNode> astToExprNDescMap = genAllRexNode(ref, inputRR, cluster.getRexBuilder());
orderByExpression = astToExprNDescMap.get(ref);
}
if (orderByExpression == null) {
throw new SemanticException("Invalid order by expression: " + orderByNode.toString());
}
return orderByExpression;
}
// SELECT a, b FROM t ORDER BY 1
private int getFieldIndexFromColumnNumber(RowResolver selectOutputRR, ASTNode ref) throws SemanticException {
int fieldIndex;
int pos = Integer.parseInt(ref.getText());
if (pos > 0 && pos <= selectOutputRR.getColumnInfos().size()) {
// fieldIndex becomes so simple
// Note that pos starts from 1 while fieldIndex starts from 0;
fieldIndex = pos - 1;
} else {
throw new SemanticException(
ErrorMsg.INVALID_POSITION_ALIAS_IN_ORDERBY.getMsg("Position alias: " + pos
+ " does not exist\n" + "The Select List is indexed from 1 to "
+ selectOutputRR.getColumnInfos().size()));
}
return fieldIndex;
}
private List<RexNode> toRexNodeList(RelNode srcRel) {
return srcRel.getRowType().getFieldList().stream()
.map(input -> new RexInputRef(input.getIndex(), input.getType()))
.collect(Collectors.toList());
}
// 5. Update RR maps
// NOTE: Output RR for SortRel is considered same as its input; we may
// end up not using VC that is present in sort rel. Also note that
// rowtype of sortrel is the type of it child; if child happens to be
// synthetic project that we introduced then that projectrel would
// contain the vc.
public RelNode endGenOBLogicalPlan(OBLogicalPlanGenState obLogicalPlanGenState, RelNode sortRel)
throws CalciteSemanticException {
ImmutableMap<String, Integer> hiveColNameCalcitePosMap =
buildHiveToCalciteColumnMap(obLogicalPlanGenState.getOutputRR());
relToHiveRR.put(sortRel, obLogicalPlanGenState.getOutputRR());
relToHiveColNameCalcitePosMap.put(sortRel, hiveColNameCalcitePosMap);
if (obLogicalPlanGenState.getSelectOutputRR() != null) {
List<RexNode> originalInputRefs = toRexNodeList(obLogicalPlanGenState.getSrcRel());
List<RexNode> selectedRefs = originalInputRefs.subList(
0, obLogicalPlanGenState.getSelectOutputRR().getColumnInfos().size());
// We need to add select since order by schema may have more columns than result schema.
return genSelectRelNode(selectedRefs, obLogicalPlanGenState.getSelectOutputRR(), sortRel);
} else {
return sortRel;
}
}
private RelNode genLimitLogicalPlan(QB qb, RelNode srcRel) throws SemanticException {
HiveRelNode sortRel = null;
QBParseInfo qbp = getQBParseInfo(qb);
SimpleEntry<Integer,Integer> entry =
qbp.getDestToLimit().get(qbp.getClauseNames().iterator().next());
Integer offset = (entry == null) ? null : entry.getKey();
Integer fetch = (entry == null) ? null : entry.getValue();
if (fetch != null) {
RexNode offsetRN = (offset == null || offset == 0) ?
null : cluster.getRexBuilder().makeExactLiteral(BigDecimal.valueOf(offset));
RexNode fetchRN = cluster.getRexBuilder().makeExactLiteral(BigDecimal.valueOf(fetch));
RelTraitSet traitSet = cluster.traitSetOf(HiveRelNode.CONVENTION);
RelCollation canonizedCollation = traitSet.canonize(RelCollations.EMPTY);
sortRel = new HiveSortLimit(cluster, traitSet, srcRel, canonizedCollation, offsetRN, fetchRN);
RowResolver inputRR = relToHiveRR.get(srcRel);
RowResolver outputRR = inputRR.duplicate();
ImmutableMap<String, Integer> hiveColNameCalcitePosMap = buildHiveToCalciteColumnMap(outputRR);
relToHiveRR.put(sortRel, outputRR);
relToHiveColNameCalcitePosMap.put(sortRel, hiveColNameCalcitePosMap);
}
return sortRel;
}
private List<RexNode> getPartitionKeys(PartitionSpec ps,
RowResolver inputRR) throws SemanticException {
List<RexNode> pKeys = new ArrayList<>();
if (ps != null) {
List<PartitionExpression> pExprs = ps.getExpressions();
for (PartitionExpression pExpr : pExprs) {
TypeCheckCtx tcCtx = new TypeCheckCtx(inputRR, cluster.getRexBuilder());
tcCtx.setAllowStatefulFunctions(true);
RexNode exp = genRexNode(pExpr.getExpression(), inputRR, tcCtx);
pKeys.add(exp);
}
}
return pKeys;
}
private List<RexFieldCollation> getOrderKeys(OrderSpec os,
RowResolver inputRR) throws SemanticException {
List<RexFieldCollation> oKeys = new ArrayList<>();
if (os != null) {
List<OrderExpression> oExprs = os.getExpressions();
for (OrderExpression oExpr : oExprs) {
TypeCheckCtx tcCtx = new TypeCheckCtx(inputRR, cluster.getRexBuilder());
tcCtx.setAllowStatefulFunctions(true);
RexNode ordExp = genRexNode(oExpr.getExpression(), inputRR, tcCtx);
Set<SqlKind> flags = new HashSet<SqlKind>();
if (oExpr.getOrder() == org.apache.hadoop.hive.ql.parse.PTFInvocationSpec.Order.DESC) {
flags.add(SqlKind.DESCENDING);
}
if (oExpr.getNullOrder() == org.apache.hadoop.hive.ql.parse.PTFInvocationSpec.NullOrder.NULLS_FIRST) {
flags.add(SqlKind.NULLS_FIRST);
} else if (oExpr.getNullOrder() == org.apache.hadoop.hive.ql.parse.PTFInvocationSpec.NullOrder.NULLS_LAST) {
flags.add(SqlKind.NULLS_LAST);
} else {
throw new SemanticException(
"Unexpected null ordering option: " + oExpr.getNullOrder());
}
oKeys.add(new RexFieldCollation(ordExp, flags));
}
}
return oKeys;
}
private RexWindowBound getBound(BoundarySpec bs) {
RexWindowBound rwb = null;
if (bs != null) {
SqlParserPos pos = new SqlParserPos(1, 1);
SqlNode amt = bs.getAmt() == 0 || bs.getAmt() == BoundarySpec.UNBOUNDED_AMOUNT
? null
: SqlLiteral.createExactNumeric(String.valueOf(bs.getAmt()), new SqlParserPos(2, 2));
RexNode amtLiteral = null;
SqlCall sc = null;
if (amt != null) {
amtLiteral = cluster.getRexBuilder().makeLiteral(Integer.valueOf(bs.getAmt()),
cluster.getTypeFactory().createSqlType(SqlTypeName.INTEGER), true);
}
switch (bs.getDirection()) {
case PRECEDING:
if (amt == null) {
rwb = RexWindowBound.create(SqlWindow.createUnboundedPreceding(pos), null);
} else {
sc = (SqlCall) SqlWindow.createPreceding(amt, pos);
rwb = RexWindowBound.create(sc,
cluster.getRexBuilder().makeCall(sc.getOperator(), amtLiteral));
}
break;
case CURRENT:
rwb = RexWindowBound.create(SqlWindow.createCurrentRow(new SqlParserPos(1, 1)), null);
break;
case FOLLOWING:
if (amt == null) {
rwb = RexWindowBound.create(SqlWindow.createUnboundedFollowing(new SqlParserPos(1, 1)),
null);
} else {
sc = (SqlCall) SqlWindow.createFollowing(amt, pos);
rwb = RexWindowBound.create(sc,
cluster.getRexBuilder().makeCall(sc.getOperator(), amtLiteral));
}
break;
}
}
return rwb;
}
private int getWindowSpecIndx(ASTNode wndAST) {
int wi = wndAST.getChildCount() - 1;
if (wi <= 0 || (wndAST.getChild(wi).getType() != HiveParser.TOK_WINDOWSPEC)) {
wi = -1;
}
return wi;
}
private Pair<RexNode, TypeInfo> genWindowingProj(WindowExpressionSpec wExpSpec, RelNode srcRel)
throws SemanticException {
RexNode w = null;
TypeInfo wHiveRetType = null;
if (wExpSpec instanceof WindowFunctionSpec) {
WindowFunctionSpec wFnSpec = (WindowFunctionSpec) wExpSpec;
ASTNode windowProjAst = wFnSpec.getExpression();
// TODO: do we need to get to child?
int wndSpecASTIndx = getWindowSpecIndx(windowProjAst);
// 2. Get Hive Aggregate Info
AggregateInfo hiveAggInfo = getHiveAggInfo(windowProjAst, wndSpecASTIndx - 1,
this.relToHiveRR.get(srcRel));
// 3. Get Calcite Return type for Agg Fn
wHiveRetType = hiveAggInfo.getReturnType();
RelDataType calciteAggFnRetType = TypeConverter.convert(hiveAggInfo.getReturnType(),
this.cluster.getTypeFactory());
// 4. Convert Agg Fn args to Calcite
List<RexNode> calciteAggFnArgs = hiveAggInfo.getParameters();
Builder<RelDataType> calciteAggFnArgsTypeBldr = ImmutableList.builder();
for (int i = 0; i < hiveAggInfo.getParameters().size(); i++) {
calciteAggFnArgsTypeBldr.add(hiveAggInfo.getParameters().get(i).getType());
}
ImmutableList<RelDataType> calciteAggFnArgsType = calciteAggFnArgsTypeBldr.build();
// 5. Get Calcite Agg Fn
final SqlAggFunction calciteAggFn = SqlFunctionConverter.getCalciteAggFn(
hiveAggInfo.getAggregateName(), hiveAggInfo.isDistinct(), calciteAggFnArgsType, calciteAggFnRetType);
// 6. Translate Window spec
RowResolver inputRR = relToHiveRR.get(srcRel);
WindowFunctionSpec wndFuncSpec = (WindowFunctionSpec) wExpSpec;
WindowSpec wndSpec = wndFuncSpec.getWindowSpec();
List<RexNode> partitionKeys = getPartitionKeys(wndSpec.getPartition(), inputRR);
List<RexFieldCollation> orderKeys = getOrderKeys(wndSpec.getOrder(), inputRR);
RexWindowBound lowerBound = getBound(wndSpec.getWindowFrame().getStart());
RexWindowBound upperBound = getBound(wndSpec.getWindowFrame().getEnd());
boolean isRows = wndSpec.getWindowFrame().getWindowType() == WindowType.ROWS;
w = cluster.getRexBuilder().makeOver(calciteAggFnRetType, calciteAggFn, calciteAggFnArgs,
partitionKeys, ImmutableList.<RexFieldCollation> copyOf(orderKeys), lowerBound,
upperBound, isRows, true, false, hiveAggInfo.isDistinct(), !wndFuncSpec.isRespectNulls());
} else {
// TODO: Convert to Semantic Exception
throw new RuntimeException("Unsupported window Spec");
}
return new Pair<>(w, wHiveRetType);
}
private RelNode genSelectForWindowing(QB qb, RelNode srcRel, HashSet<ColumnInfo> newColumns)
throws SemanticException {
getQBParseInfo(qb);
WindowingSpec wSpec = (!qb.getAllWindowingSpecs().isEmpty()) ? qb.getAllWindowingSpecs()
.values().iterator().next() : null;
if (wSpec == null) {
return null;
}
// 1. Get valid Window Function Spec
wSpec.validateAndMakeEffective();
List<WindowExpressionSpec> windowExpressions = wSpec.getWindowExpressions();
if (windowExpressions == null || windowExpressions.isEmpty()) {
return null;
}
RowResolver inputRR = this.relToHiveRR.get(srcRel);
// 2. Get RexNodes for original Projections from below
List<RexNode> projsForWindowSelOp = new ArrayList<RexNode>(
HiveCalciteUtil.getProjsFromBelowAsInputRef(srcRel));
// 3. Construct new Row Resolver with everything from below.
RowResolver out_rwsch = new RowResolver();
if (!RowResolver.add(out_rwsch, inputRR)) {
LOG.warn("Duplicates detected when adding columns to RR: see previous message");
}
// 4. Walk through Window Expressions & Construct RexNodes for those,
// Update out_rwsch
final QBParseInfo qbp = getQBParseInfo(qb);
final String selClauseName = qbp.getClauseNames().iterator().next();
final boolean cubeRollupGrpSetPresent = (!qbp.getDestRollups().isEmpty()
|| !qbp.getDestGroupingSets().isEmpty() || !qbp.getDestCubes().isEmpty());
for (WindowExpressionSpec wExprSpec : windowExpressions) {
if (!qbp.getDestToGroupBy().isEmpty()) {
// Special handling of grouping function
wExprSpec.setExpression(rewriteGroupingFunctionAST(
getGroupByForClause(qbp, selClauseName), wExprSpec.getExpression(),
!cubeRollupGrpSetPresent));
}
if (out_rwsch.getExpression(wExprSpec.getExpression()) == null) {
Pair<RexNode, TypeInfo> wtp = genWindowingProj(wExprSpec, srcRel);
projsForWindowSelOp.add(wtp.getKey());
// 6.2.2 Update Output Row Schema
ColumnInfo oColInfo = new ColumnInfo(
SemanticAnalyzer.getColumnInternalName(projsForWindowSelOp.size()), wtp.getValue(),
null, false);
out_rwsch.putExpression(wExprSpec.getExpression(), oColInfo);
newColumns.add(oColInfo);
}
}
return genSelectRelNode(projsForWindowSelOp, out_rwsch, srcRel, windowExpressions);
}
private RelNode genSelectRelNode(List<RexNode> calciteColLst, RowResolver out_rwsch,
RelNode srcRel) throws CalciteSemanticException {
return genSelectRelNode(calciteColLst, out_rwsch, srcRel, null);
}
private RelNode genSelectRelNode(List<RexNode> calciteColLst, RowResolver out_rwsch,
RelNode srcRel, List<WindowExpressionSpec> windowExpressions) throws CalciteSemanticException {
// 1. Build Column Names
Set<String> colNamesSet = new HashSet<>();
List<ColumnInfo> cInfoLst = out_rwsch.getRowSchema().getSignature();
List<String> columnNames = new ArrayList<>();
Map<String,String> windowToAlias = null;
if (windowExpressions != null ) {
windowToAlias = new HashMap<>();
for (WindowExpressionSpec wes : windowExpressions) {
windowToAlias.put(wes.getExpression().toStringTree().toLowerCase(), wes.getAlias());
}
}
String[] qualifiedColNames;
String tmpColAlias;
for (int i = 0; i < calciteColLst.size(); i++) {
ColumnInfo cInfo = cInfoLst.get(i);
qualifiedColNames = out_rwsch.reverseLookup(cInfo.getInternalName());
/*
* if (qualifiedColNames[0] != null && !qualifiedColNames[0].isEmpty())
* tmpColAlias = qualifiedColNames[0] + "." + qualifiedColNames[1]; else
*/
tmpColAlias = qualifiedColNames[1];
if (tmpColAlias.contains(".") || tmpColAlias.contains(":")) {
tmpColAlias = cInfo.getInternalName();
}
// Prepend column names with '_o_' if it starts with '_c'
/*
* Hive treats names that start with '_c' as internalNames; so change
* the names so we don't run into this issue when converting back to
* Hive AST.
*/
if (tmpColAlias.startsWith("_c")) {
tmpColAlias = "_o_" + tmpColAlias;
} else if (windowToAlias != null && windowToAlias.containsKey(tmpColAlias)) {
tmpColAlias = windowToAlias.get(tmpColAlias);
}
int suffix = 1;
while (colNamesSet.contains(tmpColAlias)) {
tmpColAlias = qualifiedColNames[1] + suffix;
suffix++;
}
colNamesSet.add(tmpColAlias);
columnNames.add(tmpColAlias);
}
// 3 Build Calcite Rel Node for project using converted projections & col
// names. Fix nullability
HiveRelNode selRel = HiveProject.create(
srcRel,
HiveCalciteUtil.fixNullability(cluster.getRexBuilder(), calciteColLst, RelOptUtil.getFieldTypeList(srcRel.getRowType())),
columnNames);
// 4. Keep track of colname-to-posmap && RR for new select
this.relToHiveColNameCalcitePosMap.put(selRel, buildHiveToCalciteColumnMap(out_rwsch));
this.relToHiveRR.put(selRel, out_rwsch);
return selRel;
}
private void setQueryHints(QB qb) throws SemanticException {
QBParseInfo qbp = getQBParseInfo(qb);
String selClauseName = qbp.getClauseNames().iterator().next();
Tree selExpr0 = qbp.getSelForClause(selClauseName).getChild(0);
if (selExpr0.getType() != HiveParser.QUERY_HINT) {
return;
}
String hint = ctx.getTokenRewriteStream().toString(
selExpr0.getTokenStartIndex(), selExpr0.getTokenStopIndex());
LOG.debug("Handling query hints: " + hint);
ParseDriver pd = new ParseDriver();
try {
ASTNode hintNode = pd.parseHint(hint);
qbp.setHints(hintNode);
} catch (ParseException e) {
throw new SemanticException("failed to parse query hint: "+e.getMessage(), e);
}
}
private Pair<RelNode, RowResolver> genSelectLogicalPlan(QB qb, RelNode srcRel, RelNode starSrcRel,
ImmutableMap<String, Integer> outerNameToPosMap, RowResolver outerRR, boolean isAllColRefRewrite)
throws SemanticException {
Pair<RelNode, RowResolver> retNodeRR = internalGenSelectLogicalPlan(qb, srcRel, starSrcRel, outerNameToPosMap,
outerRR, isAllColRefRewrite);
QBParseInfo qbp = getQBParseInfo(qb);
String selClauseName = qbp.getClauseNames().iterator().next();
ASTNode selExprList = qbp.getSelForClause(selClauseName);
if (isSelectDistinct(selExprList) && hasGroupBySibling(selExprList)) {
retNodeRR = genGBSelectDistinctPlan(retNodeRR);
}
return retNodeRR;
}
/**
* NOTE: there can only be one select caluse since we don't handle multi destination insert.
* @param isAllColRefRewrite
* when it is true, it means that it is called from group by *, where we use
* genSelectLogicalPlan to rewrite *
* @return RelNode: the select relnode RowResolver: i.e., originalRR, the RR after select when there is an order by.
*/
private Pair<RelNode, RowResolver> internalGenSelectLogicalPlan(QB qb, RelNode srcRel, RelNode starSrcRel,
ImmutableMap<String, Integer> outerNameToPosMap, RowResolver outerRR, boolean isAllColRefRewrite)
throws SemanticException {
// 0. Generate a Select Node for Windowing
// Exclude the newly-generated select columns from */etc. resolution.
HashSet<ColumnInfo> excludedColumns = new HashSet<ColumnInfo>();
RelNode selForWindow = genSelectForWindowing(qb, srcRel, excludedColumns);
srcRel = (selForWindow == null) ? srcRel : selForWindow;
List<RexNode> columnList = new ArrayList<>();
// 1. Get Select Expression List
QBParseInfo qbp = getQBParseInfo(qb);
String selClauseName = qbp.getClauseNames().iterator().next();
ASTNode selExprList = qbp.getSelForClause(selClauseName);
// make sure if there is subquery it is top level expression
SubQueryUtils.checkForTopLevelSubqueries(selExprList);
final boolean cubeRollupGrpSetPresent = (!qbp.getDestRollups().isEmpty()
|| !qbp.getDestGroupingSets().isEmpty() || !qbp.getDestCubes().isEmpty());
// 2.Row resolvers for input, output
RowResolver outputRR = new RowResolver();
Integer pos = Integer.valueOf(0);
// TODO: will this also fix windowing? try
RowResolver inputRR = this.relToHiveRR.get(srcRel), starRR = inputRR;
inputRR.setCheckForAmbiguity(true);
if (starSrcRel != null) {
starRR = this.relToHiveRR.get(starSrcRel);
}
// 3. Query Hints
// TODO: Handle Query Hints; currently we ignore them
int posn = 0;
boolean hintPresent = (selExprList.getChild(0).getType() == HiveParser.QUERY_HINT);
if (hintPresent) {
posn++;
}
// 4. Bailout if select involves Transform
boolean isInTransform = (selExprList.getChild(posn).getChild(0).getType() == HiveParser.TOK_TRANSFORM);
if (isInTransform) {
String msg = String.format("SELECT TRANSFORM is currently not supported in CBO,"
+ " turn off cbo to use TRANSFORM.");
LOG.debug(msg);
throw new CalciteSemanticException(msg, UnsupportedFeature.Select_transform);
}
// 5. Check if select involves UDTF
String udtfTableAlias = null;
GenericUDTF genericUDTF = null;
String genericUDTFName = null;
ArrayList<String> udtfColAliases = new ArrayList<String>();
ASTNode expr = (ASTNode) selExprList.getChild(posn).getChild(0);
int exprType = expr.getType();
if (exprType == HiveParser.TOK_FUNCTION || exprType == HiveParser.TOK_FUNCTIONSTAR) {
String funcName = TypeCheckProcFactory.getFunctionText(expr, true);
FunctionInfo fi = FunctionRegistry.getFunctionInfo(funcName);
if (fi != null && fi.getGenericUDTF() != null) {
LOG.debug("Find UDTF " + funcName);
genericUDTF = fi.getGenericUDTF();
genericUDTFName = funcName;
if (!fi.isNative()) {
unparseTranslator.addIdentifierTranslation((ASTNode) expr.getChild(0));
}
if (genericUDTF != null && exprType == HiveParser.TOK_FUNCTIONSTAR) {
genRexNodeRegex(".*", null, (ASTNode) expr.getChild(0),
columnList, null, inputRR, starRR, pos, outputRR, qb.getAliases(), false);
}
}
}
if (genericUDTF != null) {
// Only support a single expression when it's a UDTF
if (selExprList.getChildCount() > 1) {
throw new SemanticException(generateErrorMessage(
(ASTNode) selExprList.getChild(1),
ErrorMsg.UDTF_MULTIPLE_EXPR.getMsg()));
}
ASTNode selExpr = (ASTNode) selExprList.getChild(posn);
// Get the column / table aliases from the expression. Start from 1 as
// 0 is the TOK_FUNCTION
// column names also can be inferred from result of UDTF
for (int i = 1; i < selExpr.getChildCount(); i++) {
ASTNode selExprChild = (ASTNode) selExpr.getChild(i);
switch (selExprChild.getType()) {
case HiveParser.Identifier:
udtfColAliases.add(unescapeIdentifier(selExprChild.getText().toLowerCase()));
unparseTranslator.addIdentifierTranslation(selExprChild);
break;
case HiveParser.TOK_TABALIAS:
assert (selExprChild.getChildCount() == 1);
udtfTableAlias = unescapeIdentifier(selExprChild.getChild(0)
.getText());
qb.addAlias(udtfTableAlias);
unparseTranslator.addIdentifierTranslation((ASTNode) selExprChild
.getChild(0));
break;
default:
throw new SemanticException("Find invalid token type " + selExprChild.getType()
+ " in UDTF.");
}
}
LOG.debug("UDTF table alias is " + udtfTableAlias);
LOG.debug("UDTF col aliases are " + udtfColAliases);
}
// 6. Iterate over all expression (after SELECT)
ASTNode exprList;
if (genericUDTF != null) {
exprList = expr;
} else {
exprList = selExprList;
}
// For UDTF's, skip the function name to get the expressions
int startPosn = genericUDTF != null ? posn + 1 : posn;
for (int i = startPosn; i < exprList.getChildCount(); ++i) {
// 6.1 child can be EXPR AS ALIAS, or EXPR.
ASTNode child = (ASTNode) exprList.getChild(i);
boolean hasAsClause = (!isInTransform) && (child.getChildCount() == 2);
// 6.2 EXPR AS (ALIAS,...) parses, but is only allowed for UDTF's
// This check is not needed and invalid when there is a transform b/c
// the
// AST's are slightly different.
if (genericUDTF == null && child.getChildCount() > 2) {
throw new SemanticException(SemanticAnalyzer.generateErrorMessage(
(ASTNode) child.getChild(2), ErrorMsg.INVALID_AS.getMsg()));
}
String tabAlias;
String colAlias;
if (genericUDTF != null) {
tabAlias = null;
colAlias = getAutogenColAliasPrfxLbl() + i;
expr = child;
} else {
// 6.3 Get rid of TOK_SELEXPR
expr = (ASTNode) child.getChild(0);
String[] colRef = getColAlias(child, getAutogenColAliasPrfxLbl(),
inputRR, autogenColAliasPrfxIncludeFuncName(), i);
tabAlias = colRef[0];
colAlias = colRef[1];
if (hasAsClause) {
unparseTranslator.addIdentifierTranslation((ASTNode) child
.getChild(1));
}
}
Map<ASTNode, QBSubQueryParseInfo> subQueryToRelNode = new HashMap<>();
boolean isSubQuery = genSubQueryRelNode(qb, expr, srcRel, false,
subQueryToRelNode);
if(isSubQuery) {
RexNode subQueryExpr = genRexNode(expr, relToHiveRR.get(srcRel),
outerRR, subQueryToRelNode, true, cluster.getRexBuilder());
columnList.add(subQueryExpr);
ColumnInfo colInfo = new ColumnInfo(SemanticAnalyzer.getColumnInternalName(pos),
TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo(
TypeConverter.convert(subQueryExpr.getType())),
tabAlias, false);
if (!outputRR.putWithCheck(tabAlias, colAlias, null, colInfo)) {
throw new CalciteSemanticException("Cannot add column to RR: " + tabAlias + "."
+ colAlias + " => " + colInfo + " due to duplication, see previous warnings",
UnsupportedFeature.Duplicates_in_RR);
}
pos = Integer.valueOf(pos.intValue() + 1);
} else {
// 6.4 Build ExprNode corresponding to colums
if (expr.getType() == HiveParser.TOK_ALLCOLREF) {
pos = genRexNodeRegex(".*",
expr.getChildCount() == 0 ? null : getUnescapedName((ASTNode) expr.getChild(0)).toLowerCase(),
expr, columnList, excludedColumns, inputRR, starRR, pos, outputRR, qb.getAliases(), true);
} else if (expr.getType() == HiveParser.TOK_TABLE_OR_COL
&& !hasAsClause
&& !inputRR.getIsExprResolver()
&& isRegex(
unescapeIdentifier(expr.getChild(0).getText()), conf)) {
// In case the expression is a regex COL.
// This can only happen without AS clause
// We don't allow this for ExprResolver - the Group By case
pos = genRexNodeRegex(unescapeIdentifier(expr.getChild(0).getText()), null,
expr, columnList, excludedColumns, inputRR, starRR, pos, outputRR, qb.getAliases(), true);
} else if (expr.getType() == HiveParser.DOT
&& expr.getChild(0).getType() == HiveParser.TOK_TABLE_OR_COL
&& inputRR.hasTableAlias(unescapeIdentifier(expr.getChild(0)
.getChild(0).getText().toLowerCase()))
&& !hasAsClause
&& !inputRR.getIsExprResolver()
&& isRegex(
unescapeIdentifier(expr.getChild(1).getText()), conf)) {
// In case the expression is TABLE.COL (col can be regex).
// This can only happen without AS clause
// We don't allow this for ExprResolver - the Group By case
pos = genRexNodeRegex(
unescapeIdentifier(expr.getChild(1).getText()),
unescapeIdentifier(expr.getChild(0).getChild(0).getText().toLowerCase()),
expr, columnList, excludedColumns, inputRR, starRR, pos,
outputRR, qb.getAliases(), true);
} else if (ParseUtils.containsTokenOfType(expr, HiveParser.TOK_FUNCTIONDI) &&
!ParseUtils.containsTokenOfType(expr, HiveParser.TOK_WINDOWSPEC) &&
!(srcRel instanceof HiveAggregate ||
(srcRel.getInputs().size() == 1 && srcRel.getInput(0) instanceof HiveAggregate))) {
// Likely a malformed query eg, select hash(distinct c1) from t1;
throw new CalciteSemanticException("Distinct without an aggregation.",
UnsupportedFeature.Distinct_without_an_aggregation);
} else {
// Case when this is an expression
TypeCheckCtx tcCtx = new TypeCheckCtx(inputRR, cluster.getRexBuilder());
// We allow stateful functions in the SELECT list (but nowhere else)
tcCtx.setAllowStatefulFunctions(true);
if (!qbp.getDestToGroupBy().isEmpty()) {
// Special handling of grouping function
expr = rewriteGroupingFunctionAST(getGroupByForClause(qbp, selClauseName), expr,
!cubeRollupGrpSetPresent);
}
RexNode expression = genRexNode(expr, inputRR, tcCtx);
String recommended = recommendName(expression, colAlias, inputRR);
if (recommended != null && outputRR.get(null, recommended) == null) {
colAlias = recommended;
}
columnList.add(expression);
TypeInfo typeInfo = expression.isA(SqlKind.LITERAL) ?
TypeConverter.convertLiteralType((RexLiteral) expression) :
TypeConverter.convert(expression.getType());
ColumnInfo colInfo = new ColumnInfo(SemanticAnalyzer.getColumnInternalName(pos),
TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo(typeInfo),
tabAlias, false);
outputRR.put(tabAlias, colAlias, colInfo);
pos = Integer.valueOf(pos.intValue() + 1);
}
}
}
// 7. For correlated queries
ImmutableMap<String, Integer> hiveColNameCalcitePosMap =
buildHiveColNameToInputPosMap(columnList, inputRR);
CorrelationConverter cc = new CorrelationConverter(
new InputContext(srcRel.getRowType(), hiveColNameCalcitePosMap, relToHiveRR.get(srcRel)),
outerNameToPosMap, outerRR, subqueryId);
columnList = columnList.stream()
.map(cc::apply)
.collect(Collectors.toList());
// 8. Build Calcite Rel
RelNode outputRel = null;
if (genericUDTF != null) {
// The basic idea for CBO support of UDTF is to treat UDTF as a special
// project.
// In AST return path, as we just need to generate a SEL_EXPR, we just
// need to remember the expressions and the alias.
// In OP return path, we need to generate a SEL and then a UDTF
// following old semantic analyzer.
return genUDTFPlan(genericUDTF, genericUDTFName, udtfTableAlias, udtfColAliases, qb,
columnList, outputRR, srcRel);
} else {
String dest = qbp.getClauseNames().iterator().next();
ASTNode obAST = qbp.getOrderByForClause(dest);
ASTNode sbAST = qbp.getSortByForClause(dest);
RowResolver originalRR = null;
// We only support limited unselected column following by order by.
// TODO: support unselected columns in genericUDTF and windowing functions.
// We examine the order by in this query block and adds in column needed
// by order by in select list.
//
// If DISTINCT is present, it is not possible to ORDER BY unselected
// columns, and in fact adding all columns would change the behavior of
// DISTINCT, so we bypass this logic.
if ((obAST != null || sbAST != null)
&& selExprList.getToken().getType() != HiveParser.TOK_SELECTDI
&& !isAllColRefRewrite) {
// 1. OB Expr sanity test
// in strict mode, in the presence of order by, limit must be
// specified
Integer limit = qb.getParseInfo().getDestLimit(dest);
if (limit == null) {
String error = StrictChecks.checkNoLimit(conf);
if (error != null) {
throw new SemanticException(SemanticAnalyzer.generateErrorMessage(obAST, error));
}
}
originalRR = appendInputColumns(srcRel, columnList, outputRR, inputRR);
outputRel = genSelectRelNode(columnList, outputRR, srcRel);
// outputRel is the generated augmented select with extra unselected
// columns, and originalRR is the original generated select
return new Pair<RelNode, RowResolver>(outputRel, originalRR);
} else {
if (qbp.getQualifyExprForClause(dest) != null) {
int originalColumnListSize = columnList.size();
originalRR = appendInputColumns(srcRel, columnList, outputRR, inputRR);
RelNode combinedProject = genSelectRelNode(columnList, outputRR, srcRel);
RelNode qualifyRel = genQualifyLogicalPlan(qb, combinedProject);
List<RexNode> topProjectColumnList = new ArrayList<>(originalColumnListSize);
for (int i = 0; i < originalColumnListSize; ++i) {
topProjectColumnList.add(qualifyRel.getCluster().getRexBuilder().makeInputRef(
qualifyRel.getRowType().getFieldList().get(i).getType(), i));
}
outputRel = genSelectRelNode(topProjectColumnList, originalRR, qualifyRel);
outputRR = originalRR;
} else {
outputRel = genSelectRelNode(columnList, outputRR, srcRel);
}
}
}
// 9. Handle select distinct as GBY if there exist windowing functions
if (selForWindow != null && selExprList.getToken().getType() == HiveParser.TOK_SELECTDI) {
ImmutableBitSet groupSet = ImmutableBitSet.range(outputRel.getRowType().getFieldList().size());
outputRel = new HiveAggregate(cluster, cluster.traitSetOf(HiveRelNode.CONVENTION),
outputRel, groupSet, null, new ArrayList<AggregateCall>());
RowResolver groupByOutputRowResolver = new RowResolver();
for (int i = 0; i < outputRR.getColumnInfos().size(); i++) {
ColumnInfo colInfo = outputRR.getColumnInfos().get(i);
ColumnInfo newColInfo = new ColumnInfo(colInfo.getInternalName(),
colInfo.getType(), colInfo.getTabAlias(), colInfo.getIsVirtualCol());
groupByOutputRowResolver.put(colInfo.getTabAlias(), colInfo.getAlias(), newColInfo);
}
relToHiveColNameCalcitePosMap.put(outputRel, buildHiveToCalciteColumnMap(groupByOutputRowResolver));
this.relToHiveRR.put(outputRel, groupByOutputRowResolver);
}
inputRR.setCheckForAmbiguity(false);
return new Pair<>(outputRel, outputRR);
}
private RowResolver appendInputColumns(RelNode srcRel, List<RexNode> columnList, RowResolver outputRR, RowResolver inputRR) throws SemanticException {
RowResolver originalRR;
List<RexNode> originalInputRefs = Lists.transform(srcRel.getRowType().getFieldList(),
new Function<RelDataTypeField, RexNode>() {
@Override
public RexNode apply(RelDataTypeField input) {
return new RexInputRef(input.getIndex(), input.getType());
}
});
originalRR = outputRR.duplicate();
for (int i = 0; i < inputRR.getColumnInfos().size(); i++) {
ColumnInfo colInfo = new ColumnInfo(inputRR.getColumnInfos().get(i));
String internalName = SemanticAnalyzer.getColumnInternalName(outputRR.getColumnInfos()
.size() + i);
colInfo.setInternalName(internalName);
// if there is any confict, then we do not generate it in the new select
// otherwise, we add it into the calciteColLst and generate the new select
if (!outputRR.putWithCheck(colInfo.getTabAlias(), colInfo.getAlias(), internalName,
colInfo)) {
LOG.trace("Column already present in RR. skipping.");
} else {
columnList.add(originalInputRefs.get(i));
}
}
return originalRR;
}
Integer genRexNodeRegex(String colRegex, String tabAlias, ASTNode sel,
List<RexNode> exprList, Set<ColumnInfo> excludeCols, RowResolver input,
RowResolver colSrcRR, Integer pos, RowResolver output, List<String> aliases,
boolean ensureUniqueCols) throws SemanticException {
List<org.apache.commons.lang3.tuple.Pair<ColumnInfo, RowResolver>> colList = new ArrayList<>();
Integer i = genColListRegex(colRegex, tabAlias, sel,
colList, excludeCols, input, colSrcRR, pos, output, aliases, ensureUniqueCols);
for (org.apache.commons.lang3.tuple.Pair<ColumnInfo, RowResolver> p : colList) {
exprList.add(RexNodeTypeCheck.toExprNode(p.getLeft(), p.getRight(), 0, cluster.getRexBuilder()));
}
return i;
}
private Pair<RelNode, RowResolver> genUDTFPlan(GenericUDTF genericUDTF,
String genericUDTFName,
String outputTableAlias,
ArrayList<String> colAliases,
QB qb,
List<RexNode> selectColLst,
RowResolver selectRR,
RelNode input) throws SemanticException {
// No GROUP BY / DISTRIBUTE BY / SORT BY / CLUSTER BY
QBParseInfo qbp = qb.getParseInfo();
if (!qbp.getDestToGroupBy().isEmpty()) {
throw new SemanticException(ErrorMsg.UDTF_NO_GROUP_BY.getMsg());
}
if (!qbp.getDestToDistributeBy().isEmpty()) {
throw new SemanticException(ErrorMsg.UDTF_NO_DISTRIBUTE_BY.getMsg());
}
if (!qbp.getDestToSortBy().isEmpty()) {
throw new SemanticException(ErrorMsg.UDTF_NO_SORT_BY.getMsg());
}
if (!qbp.getDestToClusterBy().isEmpty()) {
throw new SemanticException(ErrorMsg.UDTF_NO_CLUSTER_BY.getMsg());
}
if (!qbp.getAliasToLateralViews().isEmpty()) {
throw new SemanticException(ErrorMsg.UDTF_LATERAL_VIEW.getMsg());
}
LOG.debug("Table alias: " + outputTableAlias + " Col aliases: " + colAliases);
// Create the return type info for the input columns and initialize the
// UDTF
StructTypeInfo type = (StructTypeInfo) TypeConverter.convert(
functionHelper.getReturnType(
functionHelper.getFunctionInfo(genericUDTFName),
selectColLst));
int numUdtfCols = type.getAllStructFieldNames().size();
if (colAliases.isEmpty()) {
// user did not specfied alias names, infer names from outputOI
for (String fieldName : type.getAllStructFieldNames()) {
colAliases.add(fieldName);
}
}
// Make sure that the number of column aliases in the AS clause matches
// the number of columns output by the UDTF
int numSuppliedAliases = colAliases.size();
if (numUdtfCols != numSuppliedAliases) {
throw new SemanticException(ErrorMsg.UDTF_ALIAS_MISMATCH.getMsg("expected " + numUdtfCols
+ " aliases " + "but got " + numSuppliedAliases));
}
// Generate the output column info's / row resolver using internal names.
List<ColumnInfo> udtfCols = new ArrayList<ColumnInfo>();
Iterator<String> colAliasesIter = colAliases.iterator();
for (int i = 0; i < type.getAllStructFieldTypeInfos().size(); i++) {
final String fieldName = type.getAllStructFieldNames().get(i);
final TypeInfo fieldTypeInfo = type.getAllStructFieldTypeInfos().get(i);
String colAlias = colAliasesIter.next();
assert (colAlias != null);
// Since the UDTF operator feeds into a LVJ operator that will rename
// all the internal names, we can just use field name from the UDTF's OI
// as the internal name
ColumnInfo col = new ColumnInfo(fieldName, fieldTypeInfo, outputTableAlias, false);
udtfCols.add(col);
}
// Create the row resolver for this operator from the output columns
RowResolver outputRR = new RowResolver();
for (int i = 0; i < udtfCols.size(); i++) {
outputRR.put(outputTableAlias, colAliases.get(i), udtfCols.get(i));
}
// Add the UDTFOperator to the operator DAG
RelTraitSet traitSet = TraitsUtil.getDefaultTraitSet(cluster);
// Build row type from field <type, name>
RelDataType retType = TypeConverter.getType(cluster, outputRR, null);
Builder<RelDataType> argTypeBldr = ImmutableList.<RelDataType> builder();
RexBuilder rexBuilder = cluster.getRexBuilder();
RelDataTypeFactory dtFactory = rexBuilder.getTypeFactory();
RowSchema rs = selectRR.getRowSchema();
for (ColumnInfo ci : rs.getSignature()) {
argTypeBldr.add(TypeConverter.convert(ci.getType(), dtFactory));
}
SqlOperator calciteOp = SqlFunctionConverter.getCalciteOperator(genericUDTFName, genericUDTF,
argTypeBldr.build(), retType);
// Hive UDTF only has a single input
List<RelNode> list = new ArrayList<>();
list.add(input);
RexNode rexNode = cluster.getRexBuilder().makeCall(calciteOp, selectColLst);
RelNode udtf = HiveTableFunctionScan.create(cluster, traitSet, list, rexNode, null, retType,
null);
// Add new rel & its RR to the maps
relToHiveColNameCalcitePosMap.put(udtf, buildHiveToCalciteColumnMap(outputRR));
relToHiveRR.put(udtf, outputRR);
return new Pair<>(udtf, outputRR);
}
private Pair<RelNode, RowResolver> genGBSelectDistinctPlan(Pair<RelNode, RowResolver> srcNodeRR)
throws SemanticException {
RelNode srcRel = srcNodeRR.left;
RelDataType inputRT = srcRel.getRowType();
List<Integer> groupSetPositions =
IntStream.range(0, inputRT.getFieldCount()).boxed().collect(Collectors.toList());
HiveAggregate distAgg = new HiveAggregate(cluster, cluster.traitSetOf(HiveRelNode.CONVENTION), srcRel,
ImmutableBitSet.of(groupSetPositions), null, new ArrayList<AggregateCall>());
// This comes from genSelectLogicalPlan, must be a project assert srcRel instanceof HiveProject;
RowResolver outputRR = srcNodeRR.right;
if (outputRR == null) {
outputRR = relToHiveRR.get(srcRel);
}
relToHiveRR.put(distAgg, outputRR);
relToHiveColNameCalcitePosMap.put(distAgg, relToHiveColNameCalcitePosMap.get(srcRel));
return new Pair<RelNode, RowResolver>(distAgg, outputRR);
}
private RelNode genLogicalPlan(QBExpr qbexpr) throws SemanticException {
switch (qbexpr.getOpcode()) {
case NULLOP:
return genLogicalPlan(qbexpr.getQB(), false, null, null);
case UNION:
case INTERSECT:
case INTERSECTALL:
case EXCEPT:
case EXCEPTALL:
RelNode qbexpr1Ops = genLogicalPlan(qbexpr.getQBExpr1());
RelNode qbexpr2Ops = genLogicalPlan(qbexpr.getQBExpr2());
return genSetOpLogicalPlan(qbexpr.getOpcode(), qbexpr.getAlias(), qbexpr.getQBExpr1()
.getAlias(), qbexpr1Ops, qbexpr.getQBExpr2().getAlias(), qbexpr2Ops);
default:
return null;
}
}
private RelNode genLogicalPlan(QB qb, boolean outerMostQB,
ImmutableMap<String, Integer> outerNameToPosMap,
RowResolver outerRR) throws SemanticException {
RelNode srcRel = null;
RelNode filterRel = null;
RelNode gbRel = null;
RelNode gbHavingRel = null;
RelNode selectRel = null;
RelNode obRel = null;
RelNode sbRel = null;
RelNode limitRel = null;
// First generate all the opInfos for the elements in the from clause
Map<String, RelNode> aliasToRel = new HashMap<String, RelNode>();
// 0. Check if we can handle the SubQuery;
// canHandleQbForCbo returns null if the query can be handled.
String reason = canHandleQbForCbo(queryProperties, conf, false);
if (reason != null) {
String msg = "CBO can not handle Sub Query";
if (LOG.isDebugEnabled()) {
LOG.debug(msg + " because it: " + reason);
}
throw new CalciteSemanticException(msg, UnsupportedFeature.Subquery);
}
// 1. Build Rel For Src (SubQuery, TS, Join)
// 1.1. Recurse over the subqueries to fill the subquery part of the plan
for (String subqAlias : qb.getSubqAliases()) {
QBExpr qbexpr = qb.getSubqForAlias(subqAlias);
RelNode relNode = genLogicalPlan(qbexpr);
ASTNode subqueryRoot = qbexpr.getSubQueryRoot();
if (subqueryRoot != null &&
conf.getBoolVar(ConfVars.HIVE_MATERIALIZED_VIEW_ENABLE_AUTO_REWRITING_SUBQUERY_SQL) &&
relNode instanceof HiveProject) {
subQueryMap.put(relNode, subqueryRoot);
}
aliasToRel.put(subqAlias, relNode);
if (qb.getViewToTabSchema().containsKey(subqAlias)) {
if (relNode instanceof HiveProject) {
if (this.viewProjectToTableSchema == null) {
this.viewProjectToTableSchema = new LinkedHashMap<>();
}
viewProjectToTableSchema.put((HiveProject) relNode, qb.getViewToTabSchema().get(subqAlias));
} else {
throw new SemanticException("View " + subqAlias + " is corresponding to "
+ relNode.toString() + ", rather than a HiveProject.");
}
}
}
// 1.2 Recurse over all the source tables
for (String tableAlias : qb.getTabAliases()) {
RelNode op = genTableLogicalPlan(tableAlias, qb);
aliasToRel.put(tableAlias, op);
}
if (aliasToRel.isEmpty()) {
// // This may happen for queries like select 1; (no source table)
qb.getMetaData().setSrcForAlias(DUMMY_TABLE, getDummyTable());
qb.addAlias(DUMMY_TABLE);
qb.setTabAlias(DUMMY_TABLE, DUMMY_TABLE);
RelNode op = genTableLogicalPlan(DUMMY_TABLE, qb);
dummyTableScan = op;
aliasToRel.put(DUMMY_TABLE, op);
}
// 1.3 process join
// 1.3.1 process hints
setQueryHints(qb);
// 1.3.2 process the actual join
if (qb.getParseInfo().getJoinExpr() != null) {
srcRel = genJoinLogicalPlan(qb.getParseInfo().getJoinExpr(), aliasToRel, outerNameToPosMap, outerRR);
} else {
// If no join then there should only be either 1 TS or 1 SubQuery
Map.Entry<String, RelNode> uniqueAliasToRel = aliasToRel.entrySet().iterator().next();
srcRel = uniqueAliasToRel.getValue();
// If it contains a LV
List<ASTNode> lateralViews = getQBParseInfo(qb).getAliasToLateralViews().get(uniqueAliasToRel.getKey());
if (lateralViews != null) {
srcRel = genLateralViewPlans(Iterables.getLast(lateralViews), aliasToRel);
}
}
// 2. Build Rel for where Clause
filterRel = genFilterLogicalPlan(qb, srcRel, outerNameToPosMap, outerRR, false);
srcRel = (filterRel == null) ? srcRel : filterRel;
RelNode starSrcRel = srcRel;
// 3. Build Rel for GB Clause
gbRel = genGBLogicalPlan(qb, srcRel);
srcRel = (gbRel == null) ? srcRel : gbRel;
// 4. Build Rel for GB Having Clause
gbHavingRel = genGBHavingLogicalPlan(qb, srcRel);
srcRel = (gbHavingRel == null) ? srcRel : gbHavingRel;
// 5. Build Rel for Select Clause
Pair<RelNode, RowResolver> selPair = genSelectLogicalPlan(qb, srcRel, starSrcRel, outerNameToPosMap, outerRR, false);
selectRel = selPair.getKey();
srcRel = (selectRel == null) ? srcRel : selectRel;
// Build Rel for Constraint checks
Pair<RelNode, RowResolver> constraintPair =
genConstraintFilterLogicalPlan(qb, selPair, outerNameToPosMap, outerRR);
if (constraintPair != null) {
selPair = constraintPair;
}
// 6. Build Rel for OB Clause
obRel = genOBLogicalPlan(qb, selPair, outerMostQB);
if (obRel != null) {
srcRel = obRel;
} else {
// 7. Build Rel for Sort By Clause
sbRel = genSBLogicalPlan(qb, selPair, outerMostQB);
srcRel = (sbRel == null) ? srcRel : sbRel;
// 8. Build Rel for Limit Clause
limitRel = genLimitLogicalPlan(qb, srcRel);
srcRel = (limitRel == null) ? srcRel : limitRel;
}
// 9. Incase this QB corresponds to subquery then modify its RR to point
// to subquery alias.
if (qb.getParseInfo().getAlias() != null) {
RowResolver rr = this.relToHiveRR.get(srcRel);
RowResolver newRR = new RowResolver();
String alias = qb.getParseInfo().getAlias();
List<String> targetColNames = processTableColumnNames(qb.getParseInfo().getColAliases(), alias);
if (targetColNames.size() > rr.getColumnInfos().size()) {
throw new SemanticException(ErrorMsg.WITH_COL_LIST_NUM_OVERFLOW, alias,
Integer.toString(rr.getColumnInfos().size()), Integer.toString(targetColNames.size()));
}
for (int i = 0; i < rr.getColumnInfos().size(); ++i) {
ColumnInfo colInfo = rr.getColumnInfos().get(i);
String name = colInfo.getInternalName();
String[] tmp = rr.reverseLookup(name);
ColumnInfo newCi = new ColumnInfo(colInfo);
newCi.setTabAlias(alias);
if (i < targetColNames.size()) {
tmp[1] = targetColNames.get(i);
newCi.setAlias(tmp[1]);
} else if ("".equals(tmp[0]) || tmp[1] == null) {
// ast expression is not a valid column name for table
tmp[1] = colInfo.getInternalName();
}
newRR.putWithCheck(alias, tmp[1], colInfo.getInternalName(), newCi);
}
relToHiveRR.put(srcRel, newRR);
relToHiveColNameCalcitePosMap.put(srcRel, buildHiveToCalciteColumnMap(newRR));
}
if (LOG.isDebugEnabled()) {
LOG.debug("Created Plan for Query Block " + qb.getId());
}
setQB(qb);
return srcRel;
}
private RelNode genGBHavingLogicalPlan(QB qb, RelNode srcRel) throws SemanticException {
RelNode gbFilter = null;
QBParseInfo qbp = getQBParseInfo(qb);
String destClauseName = qbp.getClauseNames().iterator().next();
ASTNode havingClause = qbp.getHavingForClause(qbp.getClauseNames().iterator().next());
if (havingClause != null) {
if (!(srcRel instanceof HiveAggregate)) {
// ill-formed query like select * from t1 having c1 > 0;
throw new CalciteSemanticException("Having clause without any group-by.",
UnsupportedFeature.Having_clause_without_any_groupby);
}
ASTNode targetNode = (ASTNode) havingClause.getChild(0);
validateNoHavingReferenceToAlias(qb, targetNode);
if (!qbp.getDestToGroupBy().isEmpty()) {
final boolean cubeRollupGrpSetPresent = (!qbp.getDestRollups().isEmpty()
|| !qbp.getDestGroupingSets().isEmpty() || !qbp.getDestCubes().isEmpty());
// Special handling of grouping function
targetNode = rewriteGroupingFunctionAST(getGroupByForClause(qbp, destClauseName), targetNode,
!cubeRollupGrpSetPresent);
}
gbFilter = genFilterRelNode(qb, targetNode, srcRel, null, null, true);
}
return gbFilter;
}
private RelNode genQualifyLogicalPlan(QB qb, RelNode srcRel) throws SemanticException {
QBParseInfo qbp = getQBParseInfo(qb);
String destClauseName = qbp.getClauseNames().iterator().next();
ASTNode qualifyClause = qbp.getQualifyExprForClause(destClauseName);
if (qualifyClause == null) {
throw new SemanticException("Missing expression: qualify.");
}
ASTNode targetNode = (ASTNode) qualifyClause.getChild(0);
return genFilterRelNode(qb, targetNode, srcRel, null, null, true);
}
/*
* Bail if having clause uses Select Expression aliases for Aggregation
* expressions. We could do what Hive does. But this is non standard
* behavior. Making sure this doesn't cause issues when translating through
* Calcite is not worth it.
*/
private void validateNoHavingReferenceToAlias(QB qb, ASTNode havingExpr)
throws CalciteSemanticException {
QBParseInfo qbPI = qb.getParseInfo();
Map<ASTNode, String> exprToAlias = qbPI.getAllExprToColumnAlias();
/*
* a mouthful, but safe: - a QB is guaranteed to have atleast 1
* destination - we don't support multi insert, so picking the first dest.
*/
Set<String> aggExprs = qbPI.getDestToAggregationExprs().values().iterator().next().keySet();
for (Map.Entry<ASTNode, String> selExpr : exprToAlias.entrySet()) {
ASTNode selAST = selExpr.getKey();
if (!aggExprs.contains(selAST.toStringTree().toLowerCase())) {
continue;
}
final String aliasToCheck = selExpr.getValue();
final Set<Object> aliasReferences = new HashSet<Object>();
TreeVisitorAction action = new TreeVisitorAction() {
@Override
public Object pre(Object t) {
if (ParseDriver.adaptor.getType(t) == HiveParser.TOK_TABLE_OR_COL) {
Object c = ParseDriver.adaptor.getChild(t, 0);
if (c != null && ParseDriver.adaptor.getType(c) == HiveParser.Identifier
&& ParseDriver.adaptor.getText(c).equals(aliasToCheck)) {
aliasReferences.add(t);
}
}
return t;
}
@Override
public Object post(Object t) {
return t;
}
};
new TreeVisitor(ParseDriver.adaptor).visit(havingExpr, action);
if (aliasReferences.size() > 0) {
String havingClause = ctx.getTokenRewriteStream().toString(
havingExpr.getTokenStartIndex(), havingExpr.getTokenStopIndex());
String msg = String.format("Encountered Select alias '%s' in having clause '%s'"
+ " This non standard behavior is not supported with cbo on."
+ " Turn off cbo for these queries.", aliasToCheck, havingClause);
LOG.debug(msg);
throw new CalciteSemanticException(msg, UnsupportedFeature.Select_alias_in_having_clause);
}
}
}
private ImmutableMap<String, Integer> buildHiveToCalciteColumnMap(RowResolver rr) {
ImmutableMap.Builder<String, Integer> b = new ImmutableMap.Builder<String, Integer>();
for (ColumnInfo ci : rr.getRowSchema().getSignature()) {
b.put(ci.getInternalName(), rr.getPosition(ci.getInternalName()));
}
return b.build();
}
private ImmutableMap<String, Integer> buildHiveColNameToInputPosMap(
List<RexNode> columnList, RowResolver inputRR) {
final ImmutableBitSet refs =
RelOptUtil.InputFinder.bits(columnList, null);
ImmutableMap.Builder<String, Integer> hiveColNameToInputPosMapBuilder =
new ImmutableMap.Builder<>();
for (int ref : refs) {
hiveColNameToInputPosMapBuilder.put(
inputRR.getColumnInfos().get(ref).getInternalName(), ref);
}
return hiveColNameToInputPosMapBuilder.build();
}
private QBParseInfo getQBParseInfo(QB qb) throws CalciteSemanticException {
return qb.getParseInfo();
}
}
/**
* This class stores the partial results of Order/Sort by clause logical plan generation.
* See {@link CalcitePlannerAction#beginGenOBLogicalPlan}, {@link CalcitePlannerAction#endGenOBLogicalPlan}
*/
private static class OBLogicalPlanGenState {
private final RelNode obInputRel;
private final List<RelFieldCollation> canonizedCollation;
private final RowResolver selectOutputRR;
private final RowResolver outputRR;
private final RelNode srcRel;
OBLogicalPlanGenState(RelNode obInputRel, List<RelFieldCollation> canonizedCollation,
RowResolver selectOutputRR, RowResolver outputRR, RelNode srcRel) {
this.obInputRel = obInputRel;
this.canonizedCollation = canonizedCollation;
this.selectOutputRR = selectOutputRR;
this.outputRR = outputRR;
this.srcRel = srcRel;
}
public RelNode getObInputRel() {
return obInputRel;
}
public List<RelFieldCollation> getFieldCollation() {
return canonizedCollation;
}
public RowResolver getSelectOutputRR() {
return selectOutputRR;
}
public RowResolver getOutputRR() {
return outputRR;
}
public RelNode getSrcRel() {
return srcRel;
}
}
@Override
protected Table getTableObjectByName(String tabName, boolean throwException) throws HiveException {
String[] names = Utilities.getDbTableName(tabName);
final String tableName = names[1];
final String dbName = names[0];
String metaTable = null;
if (names.length == 3) {
metaTable = names[2];
}
String fullyQualName = dbName + "." + tableName;
if (metaTable != null) {
fullyQualName = fullyQualName + "." + metaTable;
}
if (!tabNameToTabObject.containsKey(fullyQualName)) {
Table table = db.getTable(dbName, tableName, metaTable, throwException, true, false);
if (table != null) {
tabNameToTabObject.put(fullyQualName, table);
}
return table;
}
return tabNameToTabObject.get(fullyQualName);
}
RexNode genRexNode(ASTNode expr, RowResolver input,
RowResolver outerRR, Map<ASTNode, QBSubQueryParseInfo> subqueryToRelNode,
boolean useCaching, RexBuilder rexBuilder) throws SemanticException {
TypeCheckCtx tcCtx = new TypeCheckCtx(input, rexBuilder, useCaching, false);
tcCtx.setOuterRR(outerRR);
tcCtx.setSubqueryToRelNode(subqueryToRelNode);
return genRexNode(expr, input, tcCtx);
}
/**
* Generates a Calcite {@link RexNode} for the expression with TypeCheckCtx.
*/
RexNode genRexNode(ASTNode expr, RowResolver input, RexBuilder rexBuilder)
throws SemanticException {
// Since the user didn't supply a customized type-checking context,
// use default settings.
return genRexNode(expr, input, true, false, rexBuilder);
}
RexNode genRexNode(ASTNode expr, RowResolver input, boolean useCaching,
boolean foldExpr, RexBuilder rexBuilder) throws SemanticException {
TypeCheckCtx tcCtx = new TypeCheckCtx(input, rexBuilder, useCaching, foldExpr);
return genRexNode(expr, input, tcCtx);
}
/**
* Generates a Calcite {@link RexNode} for the expression and children of it
* with default TypeCheckCtx.
*/
Map<ASTNode, RexNode> genAllRexNode(ASTNode expr, RowResolver input, RexBuilder rexBuilder)
throws SemanticException {
TypeCheckCtx tcCtx = new TypeCheckCtx(input, rexBuilder);
return genAllRexNode(expr, input, tcCtx);
}
/**
* Returns a Calcite {@link RexNode} for the expression.
* If it is evaluated already in previous operator, it can be retrieved from cache.
*/
RexNode genRexNode(ASTNode expr, RowResolver input,
TypeCheckCtx tcCtx) throws SemanticException {
RexNode cached = null;
if (tcCtx.isUseCaching()) {
cached = getRexNodeCached(expr, input, tcCtx);
}
if (cached == null) {
Map<ASTNode, RexNode> allExprs = genAllRexNode(expr, input, tcCtx);
return allExprs.get(expr);
}
return cached;
}
/**
* Find RexNode for the expression cached in the RowResolver. Returns null if not exists.
*/
private RexNode getRexNodeCached(ASTNode node, RowResolver input,
TypeCheckCtx tcCtx) throws SemanticException {
ColumnInfo colInfo = input.getExpression(node);
if (colInfo != null) {
ASTNode source = input.getExpressionSource(node);
if (source != null) {
unparseTranslator.addCopyTranslation(node, source);
}
return RexNodeTypeCheck.toExprNode(colInfo, input, 0, tcCtx.getRexBuilder());
}
return null;
}
/**
* Generates all of the Calcite {@link RexNode}s for the expression and children of it
* passed in the arguments. This function uses the row resolver and the metadata information
* that are passed as arguments to resolve the column names to internal names.
*
* @param expr
* The expression
* @param input
* The row resolver
* @param tcCtx
* Customized type-checking context
* @return expression to exprNodeDesc mapping
* @throws SemanticException Failed to evaluate expression
*/
Map<ASTNode, RexNode> genAllRexNode(ASTNode expr, RowResolver input,
TypeCheckCtx tcCtx) throws SemanticException {
// Create the walker and the rules dispatcher.
tcCtx.setUnparseTranslator(unparseTranslator);
Map<ASTNode, RexNode> nodeOutputs =
RexNodeTypeCheck.genExprNode(expr, tcCtx);
RexNode desc = nodeOutputs.get(expr);
if (desc == null) {
String tableOrCol = BaseSemanticAnalyzer.unescapeIdentifier(expr
.getChild(0).getText());
ColumnInfo colInfo = input.get(null, tableOrCol);
String errMsg;
if (colInfo == null && input.getIsExprResolver()){
errMsg = ASTErrorUtils.getMsg(
ErrorMsg.NON_KEY_EXPR_IN_GROUPBY.getMsg(), expr);
} else {
errMsg = tcCtx.getError();
}
throw new SemanticException(Optional.ofNullable(errMsg).orElse("Error in parsing "));
}
if (desc instanceof HiveRexExprList) {
throw new SemanticException("TOK_ALLCOLREF is not supported in current context");
}
if (!unparseTranslator.isEnabled()) {
// Not creating a view, so no need to track view expansions.
return nodeOutputs;
}
List<ASTNode> fieldDescList = new ArrayList<>();
for (Map.Entry<ASTNode, RexNode> entry : nodeOutputs.entrySet()) {
if (!(entry.getValue() instanceof RexInputRef)) {
// we need to translate the RexFieldAccess too, e.g., identifiers in
// struct<>.
if (entry.getValue() instanceof RexFieldAccess) {
fieldDescList.add(entry.getKey());
}
continue;
}
ASTNode node = entry.getKey();
RexInputRef columnDesc = (RexInputRef) entry.getValue();
int index = columnDesc.getIndex();
String[] tmp;
if (index < input.getColumnInfos().size()) {
ColumnInfo columnInfo = input.getColumnInfos().get(index);
if (columnInfo.getTabAlias() == null
|| columnInfo.getTabAlias().length() == 0) {
// These aren't real column refs; instead, they are special
// internal expressions used in the representation of aggregation.
continue;
}
tmp = input.reverseLookup(columnInfo.getInternalName());
} else {
// in subquery case, tmp may be from outside.
ColumnInfo columnInfo = tcCtx.getOuterRR().getColumnInfos().get(
index - input.getColumnInfos().size());
if (columnInfo.getTabAlias() == null
|| columnInfo.getTabAlias().length() == 0) {
continue;
}
tmp = tcCtx.getOuterRR().reverseLookup(columnInfo.getInternalName());
}
StringBuilder replacementText = new StringBuilder();
replacementText.append(HiveUtils.unparseIdentifier(tmp[0], conf));
replacementText.append(".");
replacementText.append(HiveUtils.unparseIdentifier(tmp[1], conf));
unparseTranslator.addTranslation(node, replacementText.toString());
}
for (ASTNode node : fieldDescList) {
Map<ASTNode, String> map = translateFieldDesc(node);
for (Entry<ASTNode, String> entry : map.entrySet()) {
unparseTranslator.addTranslation(entry.getKey(), entry.getValue().toLowerCase());
}
}
return nodeOutputs;
}
private String recommendName(RexNode exp, String colAlias, RowResolver rowResolver) {
if (!colAlias.startsWith(autogenColAliasPrfxLbl)) {
return null;
}
String column = recommendInputName(exp, rowResolver);
if (column != null && !column.startsWith(autogenColAliasPrfxLbl)) {
return column;
}
return null;
}
/**
* Recommend name for the expression
*/
private static String recommendInputName(RexNode desc, RowResolver rowResolver) {
Integer pos = null;
if (desc instanceof RexInputRef) {
pos = ((RexInputRef) desc).getIndex();
}
if (desc.isA(SqlKind.CAST)) {
RexNode input = ((RexCall) desc).operands.get(0);
if (input instanceof RexInputRef) {
pos = ((RexInputRef) input).getIndex();
}
}
return pos != null ?
rowResolver.getColumnInfos().get(pos).getInternalName() :
null;
}
/**
* Contains information useful to decorrelate queries.
*/
protected static class InputContext {
protected final RelDataType inputRowType;
protected final ImmutableBiMap<Integer, String> positionToColumnName;
protected final RowResolver inputRowResolver;
protected InputContext(RelDataType inputRowType, ImmutableMap<String, Integer> columnNameToPosition,
RowResolver inputRowResolver) {
this.inputRowType = inputRowType;
this.positionToColumnName = ImmutableBiMap.copyOf(columnNameToPosition).inverse();
this.inputRowResolver = inputRowResolver.duplicate();
}
}
/**
* This method can be called at startup time to pre-register all the
* additional Hive classes (compared to Calcite core classes) that may
* be visited during the planning phase in the metadata providers
* and the field trimmer.
*/
public static void warmup() {
JaninoRelMetadataProvider.DEFAULT.register(HIVE_REL_NODE_CLASSES);
HiveDefaultRelMetadataProvider.initializeMetadataProviderClass(HIVE_REL_NODE_CLASSES);
HiveTezModelRelMetadataProvider.DEFAULT.register(HIVE_REL_NODE_CLASSES);
HiveMaterializationRelMetadataProvider.DEFAULT.register(HIVE_REL_NODE_CLASSES);
HiveRelFieldTrimmer.initializeFieldTrimmerClass(HIVE_REL_NODE_CLASSES);
}
private enum TableType {
DRUID,
NATIVE,
JDBC
}
}