blob: 7e851f82ec639ca9f72bb504d64444b57874b9ec [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to you under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.calcite.plan.volcano;
import org.apache.calcite.plan.RelOptCost;
import org.apache.calcite.plan.RelOptRuleOperand;
import org.apache.calcite.rel.RelNode;
import org.apache.calcite.rel.RelNodes;
import org.apache.calcite.rel.metadata.RelMetadataQuery;
import org.apache.calcite.util.Util;
import org.apache.calcite.util.trace.CalciteTrace;
import com.google.common.collect.HashMultimap;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Multimap;
import com.google.common.collect.Ordering;
import org.slf4j.Logger;
import java.io.PrintWriter;
import java.io.StringWriter;
import java.util.ArrayDeque;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Comparator;
import java.util.Deque;
import java.util.EnumMap;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
/**
* Priority queue of relexps whose rules have not been called, and rule-matches
* which have not yet been acted upon.
*/
class RuleQueue {
//~ Static fields/initializers ---------------------------------------------
private static final Logger LOGGER = CalciteTrace.getPlannerTracer();
private static final Set<String> ALL_RULES = ImmutableSet.of("<ALL RULES>");
/**
* Largest value which is less than one.
*/
private static final double ONE_MINUS_EPSILON = computeOneMinusEpsilon();
//~ Instance fields --------------------------------------------------------
/**
* The importance of each subset.
*/
final Map<RelSubset, Double> subsetImportances = new HashMap<>();
/**
* The set of RelSubsets whose importance is currently in an artificially
* raised state. Typically this only includes RelSubsets which have only
* logical RelNodes.
*/
final Set<RelSubset> boostedSubsets = new HashSet<>();
/**
* Map of {@link VolcanoPlannerPhase} to a list of rule-matches. Initially,
* there is an empty {@link PhaseMatchList} for each planner phase. As the
* planner invokes {@link #addMatch(VolcanoRuleMatch)} the rule-match is
* added to the appropriate PhaseMatchList(s). As the planner completes
* phases, the matching entry is removed from this list to avoid unused
* work.
*/
final Map<VolcanoPlannerPhase, PhaseMatchList> matchListMap =
new EnumMap<>(VolcanoPlannerPhase.class);
/**
* Sorts rule-matches into decreasing order of importance.
*/
private static final Comparator<VolcanoRuleMatch> MATCH_COMPARATOR =
new RuleMatchImportanceComparator();
private final VolcanoPlanner planner;
/**
* Compares relexps according to their cached 'importance'.
*/
private final Ordering<RelSubset> relImportanceOrdering =
Ordering.from(new RelImportanceComparator());
/**
* Maps a {@link VolcanoPlannerPhase} to a set of rule descriptions. Named rules
* may be invoked in their corresponding phase.
*
* <p>See {@link VolcanoPlannerPhaseRuleMappingInitializer} for more
* information regarding the contents of this Map and how it is initialized.
*/
private final Map<VolcanoPlannerPhase, Set<String>> phaseRuleMapping;
//~ Constructors -----------------------------------------------------------
RuleQueue(VolcanoPlanner planner) {
this.planner = planner;
phaseRuleMapping = new EnumMap<>(VolcanoPlannerPhase.class);
// init empty sets for all phases
for (VolcanoPlannerPhase phase : VolcanoPlannerPhase.values()) {
phaseRuleMapping.put(phase, new HashSet<>());
}
// configure phases
planner.getPhaseRuleMappingInitializer().initialize(phaseRuleMapping);
for (VolcanoPlannerPhase phase : VolcanoPlannerPhase.values()) {
// empty phases get converted to "all rules"
if (phaseRuleMapping.get(phase).isEmpty()) {
phaseRuleMapping.put(phase, ALL_RULES);
}
// create a match list data structure for each phase
PhaseMatchList matchList = new PhaseMatchList(phase);
matchListMap.put(phase, matchList);
}
}
//~ Methods ----------------------------------------------------------------
/**
* Clear internal data structure for this rule queue.
*/
public void clear() {
this.subsetImportances.clear();
this.boostedSubsets.clear();
for (PhaseMatchList matchList : matchListMap.values()) {
matchList.clear();
}
}
/**
* Removes the {@link PhaseMatchList rule-match list} for the given planner
* phase.
*/
public void phaseCompleted(VolcanoPlannerPhase phase) {
matchListMap.get(phase).clear();
}
/**
* Computes the importance of a set (which is that of its most important
* subset).
*/
public double getImportance(RelSet set) {
double importance = 0;
for (RelSubset subset : set.subsets) {
importance =
Math.max(
importance,
getImportance(subset));
}
return importance;
}
/**
* Recomputes the importance of the given RelSubset.
*
* @param subset RelSubset whose importance is to be recomputed
* @param force if true, forces an importance update even if the subset has
* not been registered
*/
public void recompute(RelSubset subset, boolean force) {
Double previousImportance = subsetImportances.get(subset);
if (previousImportance == null) {
if (!force) {
// Subset has not been registered yet. Don't worry about it.
return;
}
previousImportance = Double.NEGATIVE_INFINITY;
}
double importance = computeImportance(subset);
if (previousImportance == importance) {
return;
}
updateImportance(subset, importance);
}
/**
* Equivalent to
* {@link #recompute(RelSubset, boolean) recompute(subset, false)}.
*/
public void recompute(RelSubset subset) {
recompute(subset, false);
}
/**
* Artificially boosts the importance of the given {@link RelSubset}s by a
* given factor.
*
* <p>Iterates over the currently boosted RelSubsets and removes their
* importance boost, forcing a recalculation of the RelSubsets' importances
* (see {@link #recompute(RelSubset)}).
*
* <p>Once RelSubsets have been restored to their normal importance, the
* given RelSubsets have their importances boosted. A RelSubset's boosted
* importance is always less than 1.0 (and never equal to 1.0).
*
* @param subsets RelSubsets to boost importance (priority)
* @param factor the amount to boost their importances (e.g., 1.25 increases
* importance by 25%)
*/
public void boostImportance(Collection<RelSubset> subsets, double factor) {
LOGGER.trace("boostImportance({}, {})", factor, subsets);
final List<RelSubset> boostRemovals = new ArrayList<>();
final Iterator<RelSubset> iter = boostedSubsets.iterator();
while (iter.hasNext()) {
RelSubset subset = iter.next();
if (!subsets.contains(subset)) {
iter.remove();
boostRemovals.add(subset);
}
}
boostRemovals.sort(new Comparator<RelSubset>() {
public int compare(RelSubset o1, RelSubset o2) {
int o1children = countChildren(o1);
int o2children = countChildren(o2);
int c = Integer.compare(o1children, o2children);
if (c == 0) {
// for determinism
c = Integer.compare(o1.getId(), o2.getId());
}
return c;
}
private int countChildren(RelSubset subset) {
int count = 0;
for (RelNode rel : subset.getRels()) {
count += rel.getInputs().size();
}
return count;
}
});
for (RelSubset subset : boostRemovals) {
subset.propagateBoostRemoval(planner);
}
for (RelSubset subset : subsets) {
double importance = subsetImportances.get(subset);
updateImportance(
subset,
Math.min(ONE_MINUS_EPSILON, importance * factor));
subset.boosted = true;
boostedSubsets.add(subset);
}
}
void updateImportance(RelSubset subset, Double importance) {
subsetImportances.put(subset, importance);
for (PhaseMatchList matchList : matchListMap.values()) {
Multimap<RelSubset, VolcanoRuleMatch> relMatchMap =
matchList.matchMap;
if (relMatchMap.containsKey(subset)) {
for (VolcanoRuleMatch match : relMatchMap.get(subset)) {
match.clearCachedImportance();
}
}
}
}
/**
* Returns the importance of an equivalence class of relational expressions.
* Subset importances are held in a lookup table, and importance changes
* gradually propagate through that table.
*
* <p>If a subset in the same set but with a different calling convention is
* deemed to be important, then this subset has at least half of its
* importance. (This rule is designed to encourage conversions to take
* place.)</p>
*/
double getImportance(RelSubset rel) {
assert rel != null;
double importance = 0;
final RelSet set = planner.getSet(rel);
assert set != null;
for (RelSubset subset2 : set.subsets) {
final Double d = subsetImportances.get(subset2);
if (d == null) {
continue;
}
double subsetImportance = d;
if (subset2 != rel) {
subsetImportance /= 2;
}
if (subsetImportance > importance) {
importance = subsetImportance;
}
}
return importance;
}
/**
* Adds a rule match. The rule-matches are automatically added to all
* existing {@link PhaseMatchList per-phase rule-match lists} which allow
* the rule referenced by the match.
*/
void addMatch(VolcanoRuleMatch match) {
final String matchName = match.toString();
for (PhaseMatchList matchList : matchListMap.values()) {
Set<String> phaseRuleSet = phaseRuleMapping.get(matchList.phase);
if (phaseRuleSet != ALL_RULES) {
String ruleDescription = match.getRule().toString();
if (!phaseRuleSet.contains(ruleDescription)) {
continue;
}
}
if (!matchList.names.add(matchName)) {
// Identical match has already been added.
continue;
}
LOGGER.trace("{} Rule-match queued: {}", matchList.phase.toString(), matchName);
matchList.list.add(match);
matchList.matchMap.put(
planner.getSubset(match.rels[0]), match);
}
}
/**
* Computes the <dfn>importance</dfn> of a node. Importance is defined as
* follows:
*
* <ul>
* <li>the root {@link RelSubset} has an importance of 1</li>
* <li>the importance of any other subset is the max of its importance to
* its parents</li>
* <li>The importance of children is pro-rated according to the cost of the
* children. Consider a node which has a cost of 3, and children with costs
* of 2 and 5. The total cost is 10. If the node has an importance of .5,
* then the children will have importance of .1 and .25. The retains .15
* importance points, to reflect the fact that work needs to be done on the
* node's algorithm.</li>
* </ul>
*
* <p>The formula for the importance <i>I</i> of node n is:
*
* <blockquote>I<sub>n</sub> = Max<sub>parents p of n</sub>{I<sub>p</sub> .
* W <sub>n, p</sub>}</blockquote>
*
* <p>where W<sub>n, p</sub>, the weight of n within its parent p, is
*
* <blockquote>W<sub>n, p</sub> = Cost<sub>n</sub> / (SelfCost<sub>p</sub> +
* Cost<sub>n0</sub> + ... + Cost<sub>nk</sub>)
* </blockquote>
*/
double computeImportance(RelSubset subset) {
double importance;
if (subset == planner.root) {
// The root always has importance = 1
importance = 1.0;
} else {
final RelMetadataQuery mq = subset.getCluster().getMetadataQuery();
// The importance of a subset is the max of its importance to its
// parents
importance = 0.0;
for (RelSubset parent : subset.getParentSubsets(planner)) {
final double childImportance =
computeImportanceOfChild(mq, subset, parent);
importance = Math.max(importance, childImportance);
}
}
LOGGER.trace("Importance of [{}] is {}", subset, importance);
return importance;
}
private void dump() {
if (LOGGER.isTraceEnabled()) {
StringWriter sw = new StringWriter();
PrintWriter pw = new PrintWriter(sw);
dump(pw);
pw.flush();
LOGGER.trace(sw.toString());
planner.getRoot().getCluster().invalidateMetadataQuery();
}
}
private void dump(PrintWriter pw) {
planner.dump(pw);
pw.print("Importances: {");
for (RelSubset subset
: relImportanceOrdering.sortedCopy(subsetImportances.keySet())) {
pw.print(" " + subset.toString() + "=" + subsetImportances.get(subset));
}
pw.println("}");
}
/**
* Removes the rule match with the highest importance, and returns it.
*
* <p>Returns {@code null} if there are no more matches.</p>
*
* <p>Note that the VolcanoPlanner may still decide to reject rule matches
* which have become invalid, say if one of their operands belongs to an
* obsolete set or has importance=0.
*
* @throws java.lang.AssertionError if this method is called with a phase
* previously marked as completed via
* {@link #phaseCompleted(VolcanoPlannerPhase)}.
*/
VolcanoRuleMatch popMatch(VolcanoPlannerPhase phase) {
dump();
PhaseMatchList phaseMatchList = matchListMap.get(phase);
if (phaseMatchList == null) {
throw new AssertionError("Used match list for phase " + phase
+ " after phase complete");
}
final List<VolcanoRuleMatch> matchList = phaseMatchList.list;
VolcanoRuleMatch match;
for (;;) {
if (matchList.isEmpty()) {
return null;
}
int bestPos;
if (LOGGER.isTraceEnabled()) {
matchList.sort(MATCH_COMPARATOR);
match = matchList.get(0);
bestPos = 0;
StringBuilder b = new StringBuilder();
b.append("Sorted rule queue:");
for (VolcanoRuleMatch match2 : matchList) {
final double importance = match2.computeImportance();
b.append("\n");
b.append(match2);
b.append(" importance ");
b.append(importance);
}
LOGGER.trace(b.toString());
} else {
// If we're not tracing, it's not worth the effort of sorting the
// list to find the minimum.
match = null;
bestPos = -1;
int i = -1;
for (VolcanoRuleMatch match2 : matchList) {
++i;
if (match == null
|| MATCH_COMPARATOR.compare(match2, match) < 0) {
bestPos = i;
match = match2;
}
}
}
// Removal from the middle is not efficient, but the removal from the tail is.
// We remove the very last element, then put it to the bestPos index which
// effectively removes an element from the list.
final VolcanoRuleMatch lastElement = matchList.remove(matchList.size() - 1);
if (bestPos < matchList.size()) {
// Replace the middle element with the last one
matchList.set(bestPos, lastElement);
}
if (skipMatch(match)) {
LOGGER.debug("Skip match: {}", match);
} else {
break;
}
}
// If sets have merged since the rule match was enqueued, the match
// may not be removed from the matchMap because the subset may have
// changed, it is OK to leave it since the matchMap will be cleared
// at the end.
phaseMatchList.matchMap.remove(
planner.getSubset(match.rels[0]), match);
LOGGER.debug("Pop match: {}", match);
return match;
}
/** Returns whether to skip a match. This happens if any of the
* {@link RelNode}s have importance zero. */
private boolean skipMatch(VolcanoRuleMatch match) {
for (RelNode rel : match.rels) {
Double importance = planner.relImportances.get(rel);
if (importance != null && importance == 0d) {
return true;
}
}
// If the same subset appears more than once along any path from root
// operand to a leaf operand, we have matched a cycle. A relational
// expression that consumes its own output can never be implemented, and
// furthermore, if we fire rules on it we may generate lots of garbage.
// For example, if
// Project(A, X = X + 0)
// is in the same subset as A, then we would generate
// Project(A, X = X + 0 + 0)
// Project(A, X = X + 0 + 0 + 0)
// also in the same subset. They are valid but useless.
final Deque<RelSubset> subsets = new ArrayDeque<>();
try {
checkDuplicateSubsets(subsets, match.rule.getOperand(), match.rels);
} catch (Util.FoundOne e) {
return true;
}
return false;
}
/** Recursively checks whether there are any duplicate subsets along any path
* from root of the operand tree to one of the leaves.
*
* <p>It is OK for a match to have duplicate subsets if they are not on the
* same path. For example,
*
* <blockquote><pre>
* Join
* / \
* X X
* </pre></blockquote>
*
* <p>is a valid match.
*
* @throws org.apache.calcite.util.Util.FoundOne on match
*/
private void checkDuplicateSubsets(Deque<RelSubset> subsets,
RelOptRuleOperand operand, RelNode[] rels) {
final RelSubset subset = planner.getSubset(rels[operand.ordinalInRule]);
if (subsets.contains(subset)) {
throw Util.FoundOne.NULL;
}
if (!operand.getChildOperands().isEmpty()) {
subsets.push(subset);
for (RelOptRuleOperand childOperand : operand.getChildOperands()) {
checkDuplicateSubsets(subsets, childOperand, rels);
}
final RelSubset x = subsets.pop();
assert x == subset;
}
}
/**
* Returns the importance of a child to a parent. This is defined by the
* importance of the parent, pro-rated by the cost of the child. For
* example, if the parent has importance = 0.8 and cost 100, then a child
* with cost 50 will have importance 0.4, and a child with cost 25 will have
* importance 0.2.
*/
private double computeImportanceOfChild(RelMetadataQuery mq, RelSubset child,
RelSubset parent) {
final double parentImportance = getImportance(parent);
final double childCost = toDouble(planner.getCost(child, mq));
final double parentCost = toDouble(planner.getCost(parent, mq));
double alpha = childCost / parentCost;
if (alpha >= 1.0) {
// child is always less important than parent
alpha = 0.99;
}
final double importance = parentImportance * alpha;
LOGGER.trace("Importance of [{}] to its parent [{}] is {} (parent importance={}, child cost={},"
+ " parent cost={})", child, parent, importance, parentImportance, childCost, parentCost);
return importance;
}
/**
* Converts a cost to a scalar quantity.
*/
private double toDouble(RelOptCost cost) {
if (cost.isInfinite()) {
return 1e+30;
} else {
return cost.getCpu() + cost.getRows() + cost.getIo();
}
}
private static double computeOneMinusEpsilon() {
for (double d = 0d;;) {
double d0 = d;
d = (d + 1d) / 2d;
if (d == 1.0) {
return d0;
}
}
}
//~ Inner Classes ----------------------------------------------------------
/**
* Compares {@link RelNode} objects according to their cached 'importance'.
*/
private class RelImportanceComparator implements Comparator<RelSubset> {
public int compare(
RelSubset rel1,
RelSubset rel2) {
double imp1 = getImportance(rel1);
double imp2 = getImportance(rel2);
int c = Double.compare(imp2, imp1);
if (c == 0) {
c = rel1.getId() - rel2.getId();
}
return c;
}
}
/**
* Compares {@link VolcanoRuleMatch} objects according to their importance.
* Matches which are more important collate earlier. Ties are adjudicated by
* comparing the {@link RelNode#getId id}s of the relational expressions
* matched.
*/
private static class RuleMatchImportanceComparator
implements Comparator<VolcanoRuleMatch> {
public int compare(VolcanoRuleMatch match1,
VolcanoRuleMatch match2) {
double imp1 = match1.getImportance();
double imp2 = match2.getImportance();
int c = Double.compare(imp1, imp2);
if (c != 0) {
return -c;
}
c = match1.rule.getClass().getName()
.compareTo(match2.rule.getClass().getName());
if (c != 0) {
return -c;
}
return -RelNodes.compareRels(match1.rels, match2.rels);
}
}
/**
* PhaseMatchList represents a set of {@link VolcanoRuleMatch rule-matches}
* for a particular
* {@link VolcanoPlannerPhase phase of the planner's execution}.
*/
private static class PhaseMatchList {
/**
* The VolcanoPlannerPhase that this PhaseMatchList is used in.
*/
final VolcanoPlannerPhase phase;
/**
* Current list of VolcanoRuleMatches for this phase. New rule-matches
* are appended to the end of this list.
* The rules are not sorted in any way.
*/
final List<VolcanoRuleMatch> list = new ArrayList<>();
/**
* A set of rule-match names contained in {@link #list}. Allows fast
* detection of duplicate rule-matches.
*/
final Set<String> names = new HashSet<>();
/**
* Multi-map of RelSubset to VolcanoRuleMatches. Used to
* {@link VolcanoRuleMatch#clearCachedImportance() clear} the rule-match's
* cached importance when the importance of a related RelSubset is modified
* (e.g., due to invocation of
* {@link RuleQueue#boostImportance(Collection, double)}).
*/
final Multimap<RelSubset, VolcanoRuleMatch> matchMap =
HashMultimap.create();
PhaseMatchList(VolcanoPlannerPhase phase) {
this.phase = phase;
}
void clear() {
list.clear();
((ArrayList<VolcanoRuleMatch>) list).trimToSize();
names.clear();
matchMap.clear();
}
}
}