| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to you under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| package org.apache.calcite.plan; |
| |
| import org.apache.calcite.DataContext; |
| import org.apache.calcite.rel.type.RelDataType; |
| import org.apache.calcite.rex.RexBuilder; |
| import org.apache.calcite.rex.RexCall; |
| import org.apache.calcite.rex.RexExecutable; |
| import org.apache.calcite.rex.RexExecutor; |
| import org.apache.calcite.rex.RexExecutorImpl; |
| import org.apache.calcite.rex.RexInputRef; |
| import org.apache.calcite.rex.RexNode; |
| import org.apache.calcite.rex.RexUtil; |
| import org.apache.calcite.rex.RexVisitorImpl; |
| import org.apache.calcite.sql.SqlKind; |
| import org.apache.calcite.sql.SqlOperator; |
| import org.apache.calcite.util.Pair; |
| import org.apache.calcite.util.trace.CalciteLogger; |
| |
| import com.google.common.collect.ImmutableList; |
| import com.google.common.collect.ImmutableSet; |
| import com.google.common.collect.Sets; |
| |
| import org.checkerframework.checker.nullness.qual.Nullable; |
| import org.slf4j.LoggerFactory; |
| |
| import java.util.ArrayList; |
| import java.util.HashMap; |
| import java.util.List; |
| import java.util.Map; |
| import java.util.Set; |
| |
| import static java.util.Objects.requireNonNull; |
| |
| /** |
| * Checks whether one condition logically implies another. |
| * |
| * <p>If A ⇒ B, whenever A is true, B will be true also. |
| * |
| * <p>For example: |
| * <ul> |
| * <li>(x > 10) ⇒ (x > 5) |
| * <li>(x = 10) ⇒ (x < 30 OR y > 30) |
| * <li>(x = 10) ⇒ (x IS NOT NULL) |
| * <li>(x > 10 AND y = 20) ⇒ (x > 5) |
| * </ul> |
| */ |
| public class RexImplicationChecker { |
| private static final CalciteLogger LOGGER = |
| new CalciteLogger(LoggerFactory.getLogger(RexImplicationChecker.class)); |
| |
| final RexBuilder builder; |
| final RexExecutor executor; |
| final RelDataType rowType; |
| |
| public RexImplicationChecker( |
| RexBuilder builder, |
| RexExecutor executor, |
| RelDataType rowType) { |
| this.builder = requireNonNull(builder, "builder"); |
| this.executor = requireNonNull(executor, "executor"); |
| this.rowType = requireNonNull(rowType, "rowType"); |
| } |
| |
| /** |
| * Checks if condition first implies (⇒) condition second. |
| * |
| * <p>This reduces to SAT problem which is NP-Complete. |
| * When this method says first implies second then it is definitely true. |
| * But it cannot prove that first does not imply second. |
| * |
| * @param first first condition |
| * @param second second condition |
| * @return true if it can prove first ⇒ second; otherwise false i.e., |
| * it doesn't know if implication holds |
| */ |
| public boolean implies(RexNode first, RexNode second) { |
| // Validation |
| if (!validate(first, second)) { |
| return false; |
| } |
| |
| LOGGER.debug("Checking if {} => {}", first.toString(), second.toString()); |
| |
| // Get DNF |
| RexNode firstDnf = RexUtil.toDnf(builder, first); |
| RexNode secondDnf = RexUtil.toDnf(builder, second); |
| |
| // Check Trivial Cases |
| if (firstDnf.isAlwaysFalse() |
| || secondDnf.isAlwaysTrue()) { |
| return true; |
| } |
| |
| // Decompose DNF into a list of conditions, each of which is a conjunction. |
| // For example, |
| // (x > 10 AND y > 30) OR (z > 90) |
| // is converted to list of 2 conditions: |
| // (x > 10 AND y > 30) |
| // z > 90 |
| // |
| // Similarly, decompose CNF into a list of conditions, each of which is a |
| // disjunction. |
| List<RexNode> firsts = RelOptUtil.disjunctions(firstDnf); |
| List<RexNode> seconds = RelOptUtil.disjunctions(secondDnf); |
| |
| for (RexNode f : firsts) { |
| // Check if f implies at least |
| // one of the conjunctions in list secondDnfs. |
| // If f could not imply even one conjunction in |
| // secondDnfs, then final implication may be false. |
| if (!impliesAny(f, seconds)) { |
| LOGGER.debug("{} does not imply {}", first, second); |
| return false; |
| } |
| } |
| |
| LOGGER.debug("{} implies {}", first, second); |
| return true; |
| } |
| |
| /** Returns whether the predicate {@code first} implies (⇒) |
| * at least one predicate in {@code seconds}. */ |
| private boolean impliesAny(RexNode first, List<RexNode> seconds) { |
| for (RexNode second : seconds) { |
| if (impliesConjunction(first, second)) { |
| return true; |
| } |
| } |
| return false; |
| } |
| |
| /** Returns whether the predicate {@code first} implies {@code second} (both |
| * may be conjunctions). */ |
| private boolean impliesConjunction(RexNode first, RexNode second) { |
| if (implies2(first, second)) { |
| return true; |
| } |
| switch (first.getKind()) { |
| case AND: |
| for (RexNode f : RelOptUtil.conjunctions(first)) { |
| if (implies2(f, second)) { |
| return true; |
| } |
| } |
| break; |
| default: |
| break; |
| } |
| return false; |
| } |
| |
| /** Returns whether the predicate {@code first} (not a conjunction) |
| * implies {@code second}. */ |
| private boolean implies2(RexNode first, RexNode second) { |
| if (second.isAlwaysFalse()) { // f cannot imply s |
| return false; |
| } |
| |
| // E.g. "x is null" implies "x is null". |
| if (first.equals(second)) { |
| return true; |
| } |
| |
| // Several things imply "IS NOT NULL" |
| switch (second.getKind()) { |
| case IS_NOT_NULL: |
| // Suppose we know that first is strong in second; that is, |
| // the if second is null, then first will be null. |
| // Then, first being not null implies that second is not null. |
| // |
| // For example, first is "x > y", second is "x". |
| // If we know that "x > y" is not null, we know that "x" is not null. |
| final RexNode operand = ((RexCall) second).getOperands().get(0); |
| final Strong strong = new Strong() { |
| @Override public boolean isNull(RexNode node) { |
| return node.equals(operand) |
| || super.isNull(node); |
| } |
| }; |
| if (strong.isNull(first)) { |
| return true; |
| } |
| break; |
| default: |
| break; |
| } |
| |
| final InputUsageFinder firstUsageFinder = new InputUsageFinder(); |
| final InputUsageFinder secondUsageFinder = new InputUsageFinder(); |
| |
| RexUtil.apply(firstUsageFinder, ImmutableList.of(), first); |
| RexUtil.apply(secondUsageFinder, ImmutableList.of(), second); |
| |
| // Check Support |
| if (!checkSupport(firstUsageFinder, secondUsageFinder)) { |
| LOGGER.warn("Support for checking {} => {} is not there", first, second); |
| return false; |
| } |
| |
| ImmutableList.Builder<Set<Pair<RexInputRef, @Nullable RexNode>>> usagesBuilder = |
| ImmutableList.builder(); |
| for (Map.Entry<RexInputRef, InputRefUsage<SqlOperator, @Nullable RexNode>> entry |
| : firstUsageFinder.usageMap.entrySet()) { |
| ImmutableSet.Builder<Pair<RexInputRef, @Nullable RexNode>> usageBuilder = |
| ImmutableSet.builder(); |
| if (entry.getValue().usageList.size() > 0) { |
| for (final Pair<SqlOperator, @Nullable RexNode> pair |
| : entry.getValue().usageList) { |
| usageBuilder.add(Pair.of(entry.getKey(), pair.getValue())); |
| } |
| usagesBuilder.add(usageBuilder.build()); |
| } |
| } |
| |
| final Set<List<Pair<RexInputRef, @Nullable RexNode>>> usages = |
| Sets.cartesianProduct(usagesBuilder.build()); |
| |
| for (List<Pair<RexInputRef, @Nullable RexNode>> usageList : usages) { |
| // Get the literals from first conjunction and executes second conjunction |
| // using them. |
| // |
| // E.g., for |
| // x > 30 ⇒ x > 10, |
| // we will replace x by 30 in second expression and execute it i.e., |
| // 30 > 10 |
| // |
| // If it's true then we infer implication. |
| final DataContext dataValues = |
| VisitorDataContext.of(rowType, usageList); |
| |
| if (!isSatisfiable(second, dataValues)) { |
| return false; |
| } |
| } |
| |
| return true; |
| } |
| |
| private boolean isSatisfiable(RexNode second, @Nullable DataContext dataValues) { |
| if (dataValues == null) { |
| return false; |
| } |
| |
| ImmutableList<RexNode> constExps = ImmutableList.of(second); |
| final RexExecutable exec = RexExecutorImpl.getExecutable(builder, constExps, rowType); |
| |
| @Nullable Object[] result; |
| exec.setDataContext(dataValues); |
| try { |
| result = exec.execute(); |
| } catch (Exception e) { |
| // TODO: CheckSupport should not allow this exception to be thrown |
| // Need to monitor it and handle all the cases raising them. |
| LOGGER.warn("Exception thrown while checking if => {}: {}", second, e.getMessage()); |
| return false; |
| } |
| return result != null |
| && result.length == 1 |
| && result[0] instanceof Boolean |
| && (Boolean) result[0]; |
| } |
| |
| /** |
| * Looks at the usage of variables in first and second conjunction to decide |
| * whether this kind of expression is currently supported for proving first |
| * implies second. |
| * |
| * <ol> |
| * <li>Variables should be used only once in both the conjunction against |
| * given set of operations only: >, <, ≤, ≥, =; ≠. |
| * |
| * <li>All the variables used in second condition should be used even in the |
| * first. |
| * |
| * <li>If operator used for variable in first is op1 and op2 for second, then |
| * we support these combination for conjunction (op1, op2) then op1, op2 |
| * belongs to one of the following sets: |
| * |
| * <ul> |
| * <li>(<, ≤) X (<, ≤) <i>note: X represents cartesian product</i> |
| * <li>(> / ≥) X (>, ≥) |
| * <li>(=) X (>, ≥, <, ≤, =, ≠) |
| * <li>(≠, =) |
| * </ul> |
| * |
| * <li>We support at most 2 operators to be be used for a variable in first |
| * and second usages. |
| * |
| * </ol> |
| * |
| * @return whether input usage pattern is supported |
| */ |
| private static boolean checkSupport(InputUsageFinder firstUsageFinder, |
| InputUsageFinder secondUsageFinder) { |
| final Map<RexInputRef, InputRefUsage<SqlOperator, @Nullable RexNode>> firstUsageMap = |
| firstUsageFinder.usageMap; |
| final Map<RexInputRef, InputRefUsage<SqlOperator, @Nullable RexNode>> secondUsageMap = |
| secondUsageFinder.usageMap; |
| |
| for (Map.Entry<RexInputRef, InputRefUsage<SqlOperator, @Nullable RexNode>> entry |
| : secondUsageMap.entrySet()) { |
| final InputRefUsage<SqlOperator, @Nullable RexNode> secondUsage = entry.getValue(); |
| final List<Pair<SqlOperator, @Nullable RexNode>> secondUsageList = secondUsage.usageList; |
| final int secondLen = secondUsageList.size(); |
| |
| if (secondUsage.usageCount != secondLen || secondLen > 2) { |
| return false; |
| } |
| |
| final InputRefUsage<SqlOperator, @Nullable RexNode> firstUsage = |
| firstUsageMap.get(entry.getKey()); |
| |
| if (firstUsage == null |
| || firstUsage.usageList.size() != firstUsage.usageCount |
| || firstUsage.usageCount > 2) { |
| return false; |
| } |
| |
| final List<Pair<SqlOperator, @Nullable RexNode>> firstUsageList = firstUsage.usageList; |
| final int firstLen = firstUsageList.size(); |
| |
| final SqlKind fKind = firstUsageList.get(0).getKey().getKind(); |
| final SqlKind sKind = secondUsageList.get(0).getKey().getKind(); |
| final SqlKind fKind2 = |
| firstLen == 2 ? firstUsageList.get(1).getKey().getKind() : null; |
| final SqlKind sKind2 = |
| secondLen == 2 ? secondUsageList.get(1).getKey().getKind() : null; |
| |
| // Note: arguments to isEquivalentOp are never null, however checker-framework's |
| // dataflow is not strong enough, so the first parameter is marked as nullable |
| //noinspection ConstantConditions |
| if (firstLen == 2 && secondLen == 2 |
| && fKind2 != null && sKind2 != null |
| && !(isEquivalentOp(fKind, sKind) && isEquivalentOp(fKind2, sKind2)) |
| && !(isEquivalentOp(fKind, sKind2) && isEquivalentOp(fKind2, sKind))) { |
| return false; |
| } else if (firstLen == 1 && secondLen == 1 |
| && fKind != SqlKind.EQUALS && !isSupportedUnaryOperators(sKind) |
| && !isEquivalentOp(fKind, sKind)) { |
| return false; |
| } else if (firstLen == 1 && secondLen == 2 && fKind != SqlKind.EQUALS) { |
| return false; |
| } else if (firstLen == 2 && secondLen == 1) { |
| // Allow only cases like |
| // x < 30 and x < 40 implies x < 70 |
| // x > 30 and x < 40 implies x < 70 |
| // But disallow cases like |
| // x > 30 and x > 40 implies x < 70 |
| //noinspection ConstantConditions |
| if (fKind2 != null && !isOppositeOp(fKind, fKind2) && !isSupportedUnaryOperators(sKind) |
| && !(isEquivalentOp(fKind, fKind2) && isEquivalentOp(fKind, sKind))) { |
| return false; |
| } |
| } |
| } |
| |
| return true; |
| } |
| |
| private static boolean isSupportedUnaryOperators(SqlKind kind) { |
| switch (kind) { |
| case IS_NOT_NULL: |
| case IS_NULL: |
| return true; |
| default: |
| return false; |
| } |
| } |
| |
| private static boolean isEquivalentOp(@Nullable SqlKind fKind, SqlKind sKind) { |
| switch (sKind) { |
| case GREATER_THAN: |
| case GREATER_THAN_OR_EQUAL: |
| if (!(fKind == SqlKind.GREATER_THAN) |
| && !(fKind == SqlKind.GREATER_THAN_OR_EQUAL)) { |
| return false; |
| } |
| break; |
| case LESS_THAN: |
| case LESS_THAN_OR_EQUAL: |
| if (!(fKind == SqlKind.LESS_THAN) |
| && !(fKind == SqlKind.LESS_THAN_OR_EQUAL)) { |
| return false; |
| } |
| break; |
| default: |
| return false; |
| } |
| |
| return true; |
| } |
| |
| private static boolean isOppositeOp(SqlKind fKind, SqlKind sKind) { |
| switch (sKind) { |
| case GREATER_THAN: |
| case GREATER_THAN_OR_EQUAL: |
| if (!(fKind == SqlKind.LESS_THAN) |
| && !(fKind == SqlKind.LESS_THAN_OR_EQUAL)) { |
| return false; |
| } |
| break; |
| case LESS_THAN: |
| case LESS_THAN_OR_EQUAL: |
| if (!(fKind == SqlKind.GREATER_THAN) |
| && !(fKind == SqlKind.GREATER_THAN_OR_EQUAL)) { |
| return false; |
| } |
| break; |
| default: |
| return false; |
| } |
| return true; |
| } |
| |
| private static boolean validate(RexNode first, RexNode second) { |
| return first instanceof RexCall && second instanceof RexCall; |
| } |
| |
| /** |
| * Visitor that builds a usage map of inputs used by an expression. |
| * |
| * <p>E.g: for x > 10 AND y < 20 AND x = 40, usage map is as follows: |
| * <ul> |
| * <li>key: x value: {(>, 10),(=, 40), usageCount = 2} |
| * <li>key: y value: {(>, 20), usageCount = 1} |
| * </ul> |
| */ |
| private static class InputUsageFinder extends RexVisitorImpl<Void> { |
| final Map<RexInputRef, InputRefUsage<SqlOperator, @Nullable RexNode>> usageMap = |
| new HashMap<>(); |
| |
| InputUsageFinder() { |
| super(true); |
| } |
| |
| @Override public Void visitInputRef(RexInputRef inputRef) { |
| InputRefUsage<SqlOperator, @Nullable RexNode> inputRefUse = getUsageMap(inputRef); |
| inputRefUse.usageCount++; |
| return null; |
| } |
| |
| @Override public Void visitCall(RexCall call) { |
| switch (call.getOperator().getKind()) { |
| case GREATER_THAN: |
| case GREATER_THAN_OR_EQUAL: |
| case LESS_THAN: |
| case LESS_THAN_OR_EQUAL: |
| case EQUALS: |
| case NOT_EQUALS: |
| updateBinaryOpUsage(call); |
| break; |
| case IS_NULL: |
| case IS_NOT_NULL: |
| updateUnaryOpUsage(call); |
| break; |
| default: |
| } |
| return super.visitCall(call); |
| } |
| |
| private void updateUnaryOpUsage(RexCall call) { |
| final List<RexNode> operands = call.getOperands(); |
| RexNode first = RexUtil.removeCast(operands.get(0)); |
| |
| if (first.isA(SqlKind.INPUT_REF)) { |
| updateUsage(call.getOperator(), (RexInputRef) first, null); |
| } |
| } |
| |
| private void updateBinaryOpUsage(RexCall call) { |
| final List<RexNode> operands = call.getOperands(); |
| RexNode first = RexUtil.removeCast(operands.get(0)); |
| RexNode second = RexUtil.removeCast(operands.get(1)); |
| |
| if (first.isA(SqlKind.INPUT_REF) |
| && second.isA(SqlKind.LITERAL)) { |
| updateUsage(call.getOperator(), (RexInputRef) first, second); |
| } |
| |
| if (first.isA(SqlKind.LITERAL) |
| && second.isA(SqlKind.INPUT_REF)) { |
| updateUsage(requireNonNull(call.getOperator().reverse()), |
| (RexInputRef) second, first); |
| } |
| } |
| |
| private void updateUsage(SqlOperator op, RexInputRef inputRef, |
| @Nullable RexNode literal) { |
| final InputRefUsage<SqlOperator, @Nullable RexNode> inputRefUse = |
| getUsageMap(inputRef); |
| Pair<SqlOperator, @Nullable RexNode> use = Pair.of(op, literal); |
| inputRefUse.usageList.add(use); |
| } |
| |
| private InputRefUsage<SqlOperator, @Nullable RexNode> getUsageMap(RexInputRef rex) { |
| InputRefUsage<SqlOperator, @Nullable RexNode> inputRefUse = usageMap.get(rex); |
| if (inputRefUse == null) { |
| inputRefUse = new InputRefUsage<>(); |
| usageMap.put(rex, inputRefUse); |
| } |
| |
| return inputRefUse; |
| } |
| } |
| |
| /** |
| * Usage of a {@link RexInputRef} in an expression. |
| * |
| * @param <T1> left type |
| * @param <T2> right type */ |
| private static class InputRefUsage<T1, T2> { |
| private final List<Pair<T1, T2>> usageList = new ArrayList<>(); |
| private int usageCount = 0; |
| } |
| } |