blob: 9f521a77ef5ea88406ff576231808dabd2b3958b [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hive.ql.optimizer.calcite;
import com.google.common.collect.Iterables;
import org.apache.calcite.avatica.util.TimeUnitRange;
import org.apache.calcite.plan.Context;
import org.apache.calcite.plan.Contexts;
import org.apache.calcite.plan.RelOptCluster;
import org.apache.calcite.plan.RelOptSchema;
import org.apache.calcite.plan.RelOptUtil;
import org.apache.calcite.rel.RelCollations;
import org.apache.calcite.rel.RelNode;
import org.apache.calcite.rel.core.CorrelationId;
import org.apache.calcite.rel.core.Join;
import org.apache.calcite.rel.core.JoinRelType;
import org.apache.calcite.rel.type.RelDataType;
import org.apache.calcite.rex.RexBuilder;
import org.apache.calcite.rex.RexCorrelVariable;
import org.apache.calcite.rex.RexInputRef;
import org.apache.calcite.rex.RexNode;
import org.apache.calcite.rex.RexShuttle;
import org.apache.calcite.rex.RexUtil;
import org.apache.calcite.schema.SchemaPlus;
import org.apache.calcite.server.CalciteServerStatement;
import org.apache.calcite.sql.SqlAggFunction;
import org.apache.calcite.sql.SqlFunction;
import org.apache.calcite.sql.fun.SqlStdOperatorTable;
import org.apache.calcite.tools.FrameworkConfig;
import org.apache.calcite.tools.Frameworks;
import org.apache.calcite.tools.RelBuilder;
import org.apache.calcite.tools.RelBuilderFactory;
import org.apache.calcite.util.ImmutableBitSet;
import org.apache.calcite.util.Litmus;
import org.apache.hadoop.hive.ql.optimizer.calcite.functions.HiveMergeableAggregate;
import org.apache.hadoop.hive.ql.optimizer.calcite.functions.HiveSqlCountAggFunction;
import org.apache.hadoop.hive.ql.optimizer.calcite.functions.HiveSqlMinMaxAggFunction;
import org.apache.hadoop.hive.ql.optimizer.calcite.functions.HiveSqlSumAggFunction;
import org.apache.hadoop.hive.ql.optimizer.calcite.functions.HiveSqlSumEmptyIsZeroAggFunction;
import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFloorDate;
import com.google.common.collect.ImmutableList;
import java.util.ArrayList;
import java.util.List;
import java.util.Set;
/**
* Builder for relational expressions in Hive.
*
* <p>{@code RelBuilder} does not make possible anything that you could not
* also accomplish by calling the factory methods of the particular relational
* expression. But it makes common tasks more straightforward and concise.
*
* <p>It is not thread-safe.
*/
public class HiveRelBuilder extends RelBuilder {
private HiveRelBuilder(Context context, RelOptCluster cluster, RelOptSchema relOptSchema) {
super(context, cluster, relOptSchema);
}
/** Creates a RelBuilder. */
public static RelBuilder create(FrameworkConfig config) {
final RelOptCluster[] clusters = {null};
final RelOptSchema[] relOptSchemas = {null};
Frameworks.withPrepare(
new Frameworks.PrepareAction<Void>(config) {
@Override
public Void apply(RelOptCluster cluster, RelOptSchema relOptSchema,
SchemaPlus rootSchema, CalciteServerStatement statement) {
clusters[0] = cluster;
relOptSchemas[0] = relOptSchema;
return null;
}
});
return new HiveRelBuilder(config.getContext(), clusters[0], relOptSchemas[0]);
}
/** Creates a {@link RelBuilderFactory}, a partially-created RelBuilder.
* Just add a {@link RelOptCluster} and a {@link RelOptSchema} */
public static RelBuilderFactory proto(final Context context) {
return new RelBuilderFactory() {
@Override
public RelBuilder create(RelOptCluster cluster, RelOptSchema schema) {
return new HiveRelBuilder(context, cluster, schema);
}
};
}
/** Creates a {@link RelBuilderFactory} that uses a given set of factories. */
public static RelBuilderFactory proto(Object... factories) {
return proto(Contexts.of(factories));
}
@Override
public RelBuilder filter(Iterable<? extends RexNode> predicates) {
final RexNode x = RexUtil.composeConjunction(
cluster.getRexBuilder(), predicates, false);
if (!x.isAlwaysTrue()) {
final RelNode input = build();
final RelNode filter = HiveRelFactories.HIVE_FILTER_FACTORY.createFilter(input, x);
return this.push(filter);
}
return this;
}
/**
* Empty relationship can be expressed in many different ways, e.g.,
* filter(cond=false), empty LogicalValues(), etc. Calcite default implementation
* uses empty LogicalValues(); however, currently there is not an equivalent to
* this expression in Hive. Thus, we use limit 0, since Hive already includes
* optimizations that will do early pruning of the result tree when it is found,
* e.g., GlobalLimitOptimizer.
*/
@Override
public RelBuilder empty() {
final RelNode input = build();
final RelNode sort = HiveRelFactories.HIVE_SORT_FACTORY.createSort(
input, RelCollations.of(), null, literal(0));
return this.push(sort);
}
public static SqlFunction getFloorSqlFunction(TimeUnitRange flag) {
switch (flag) {
case YEAR:
return HiveFloorDate.YEAR;
case QUARTER:
return HiveFloorDate.QUARTER;
case MONTH:
return HiveFloorDate.MONTH;
case DAY:
return HiveFloorDate.DAY;
case HOUR:
return HiveFloorDate.HOUR;
case MINUTE:
return HiveFloorDate.MINUTE;
case SECOND:
return HiveFloorDate.SECOND;
}
return SqlStdOperatorTable.FLOOR;
}
public static SqlAggFunction getRollup(SqlAggFunction aggregation) {
if (aggregation instanceof HiveMergeableAggregate) {
HiveMergeableAggregate mAgg = (HiveMergeableAggregate) aggregation;
return mAgg.getMergeAggFunction();
}
if (aggregation instanceof HiveSqlSumAggFunction
|| aggregation instanceof HiveSqlMinMaxAggFunction
|| aggregation instanceof HiveSqlSumEmptyIsZeroAggFunction) {
return aggregation;
}
if (aggregation instanceof HiveSqlCountAggFunction) {
HiveSqlCountAggFunction countAgg = (HiveSqlCountAggFunction) aggregation;
return new HiveSqlSumEmptyIsZeroAggFunction(countAgg.isDistinct(), countAgg.getReturnTypeInference(),
countAgg.getOperandTypeInference(), countAgg.getOperandTypeChecker());
}
return null;
}
/** Creates a {@link Join} with correlating variables. */
@Override
public RelBuilder join(JoinRelType joinType, RexNode condition,
Set<CorrelationId> variablesSet) {
if (Bug.CALCITE_4574_FIXED) {
throw new IllegalStateException("Method overriding should be removed once CALCITE-4574 is fixed");
}
RelNode right = this.peek(0);
RelNode left = this.peek(1);
final boolean correlate = variablesSet.size() == 1;
RexNode postCondition = literal(true);
if (correlate) {
final CorrelationId id = Iterables.getOnlyElement(variablesSet);
if (!RelOptUtil.notContainsCorrelation(left, id, Litmus.IGNORE)) {
throw new IllegalArgumentException("variable " + id
+ " must not be used by left input to correlation");
}
// Correlate does not have an ON clause.
switch (joinType) {
case LEFT:
case SEMI:
case ANTI:
// For a LEFT/SEMI/ANTI, predicate must be evaluated first.
filter(condition.accept(new Shifter(left, id, right)));
right = this.peek(0);
break;
case INNER:
// For INNER, we can defer.
postCondition = condition;
break;
default:
throw new IllegalArgumentException("Correlated " + joinType + " join is not supported");
}
final ImmutableBitSet requiredColumns = RelOptUtil.correlationColumns(id, right);
List<RexNode> leftFields = this.fields(2, 0);
List<RexNode> requiredFields = new ArrayList<>();
for (int i = 0; i < leftFields.size(); i++) {
if (requiredColumns.get(i)) {
requiredFields.add(leftFields.get(i));
}
}
correlate(joinType, id, requiredFields);
filter(postCondition);
} else {
// When there is no correlation use the default logic which works OK for now
// Cannot copy-paste the respective code here cause we don't have access to stack,
// Frame etc. and we might lose existing aliases in the builder
assert variablesSet.isEmpty();
super.join(joinType,condition, variablesSet);
}
return this;
}
/** Shuttle that shifts a predicate's inputs to the left, replacing early
* ones with references to a
* {@link RexCorrelVariable}. */
private class Shifter extends RexShuttle {
private final RelNode left;
private final CorrelationId id;
private final RelNode right;
Shifter(RelNode left, CorrelationId id, RelNode right) {
this.left = left;
this.id = id;
this.right = right;
if (Bug.CALCITE_4574_FIXED) {
throw new IllegalStateException("Class should be redundant once CALCITE-4574 is fixed");
}
}
public RexNode visitInputRef(RexInputRef inputRef) {
final RelDataType leftRowType = left.getRowType();
final RexBuilder rexBuilder = getRexBuilder();
final int leftCount = leftRowType.getFieldCount();
if (inputRef.getIndex() < leftCount) {
final RexNode v = rexBuilder.makeCorrel(leftRowType, id);
return rexBuilder.makeFieldAccess(v, inputRef.getIndex());
} else {
return rexBuilder.makeInputRef(right, inputRef.getIndex() - leftCount);
}
}
}
@Override
protected boolean shouldMergeProject() {
/* CALCITE-2470 added ability to merge Project-s together.
* The problem with it is that it may merge 2 windowing expressions.
*/
return false;
}
/** Make the method visible */
@Override
public AggCall aggregateCall(SqlAggFunction aggFunction, boolean distinct, boolean approximate, boolean ignoreNulls,
RexNode filter, ImmutableList<RexNode> orderKeys, String alias, ImmutableList<RexNode> operands) {
return super.aggregateCall(aggFunction, distinct, approximate, ignoreNulls, filter, orderKeys, alias, operands);
}
}