| // Licensed to the Apache Software Foundation (ASF) under one |
| // or more contributor license agreements. See the NOTICE file |
| // distributed with this work for additional information |
| // regarding copyright ownership. The ASF licenses this file |
| // to you under the Apache License, Version 2.0 (the |
| // "License"); you may not use this file except in compliance |
| // with the License. You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, |
| // software distributed under the License is distributed on an |
| // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| // KIND, either express or implied. See the License for the |
| // specific language governing permissions and limitations |
| // under the License. |
| |
| package org.apache.impala.analysis; |
| |
| import java.util.ArrayList; |
| import java.util.Collection; |
| import java.util.LinkedHashSet; |
| import java.util.List; |
| import java.util.Set; |
| |
| import org.apache.impala.common.TreeNode; |
| import org.apache.impala.planner.PlanNode; |
| import org.apache.impala.thrift.TSortingOrder; |
| |
| import com.google.common.base.Preconditions; |
| import com.google.common.base.Predicates; |
| import com.google.common.collect.Lists; |
| |
| /** |
| * Encapsulates all the information needed to compute ORDER BY |
| * This doesn't contain aliases or positional exprs. |
| * TODO: reorganize this completely, this doesn't really encapsulate anything; this |
| * should move into planner/ and encapsulate the implementation of the sort of a |
| * particular input row (materialize all row slots) |
| */ |
| public class SortInfo { |
| // All sort exprs with cost greater than this will be materialized. Since we don't |
| // currently have any information about actual function costs, this value is intended to |
| // ensure that all expensive functions will be materialized while still leaving simple |
| // operations unmaterialized, for example 'SlotRef + SlotRef' should have a cost below |
| // this threshold. |
| // TODO: rethink this when we have a better cost model. |
| private static final float SORT_MATERIALIZATION_COST_THRESHOLD = |
| Expr.FUNCTION_CALL_COST; |
| |
| private List<Expr> sortExprs_; |
| private final List<Boolean> isAscOrder_; |
| // True if "NULLS FIRST", false if "NULLS LAST", null if not specified. |
| private final List<Boolean> nullsFirstParams_; |
| // The single tuple that is materialized, sorted, and output by a sort operator |
| // (i.e. SortNode or TopNNode) |
| private TupleDescriptor sortTupleDesc_; |
| // List of exprs evaluated against the sort input and materialized into the sort tuple. |
| // One expr per slot in 'sortTupleDesc_'. |
| private final List<Expr> materializedExprs_; |
| // Maps from exprs materialized into the sort tuple to their corresponding SlotRefs. |
| private final ExprSubstitutionMap outputSmap_; |
| private TSortingOrder sortingOrder_; |
| |
| public SortInfo(List<Expr> sortExprs, List<Boolean> isAscOrder, |
| List<Boolean> nullsFirstParams) { |
| this(sortExprs, isAscOrder, nullsFirstParams, TSortingOrder.LEXICAL); |
| } |
| |
| public SortInfo(List<Expr> sortExprs, List<Boolean> isAscOrder, |
| List<Boolean> nullsFirstParams, TSortingOrder sortingOrder) { |
| Preconditions.checkArgument(sortExprs.size() == isAscOrder.size()); |
| Preconditions.checkArgument(sortExprs.size() == nullsFirstParams.size()); |
| sortExprs_ = sortExprs; |
| isAscOrder_ = isAscOrder; |
| nullsFirstParams_ = nullsFirstParams; |
| materializedExprs_ = new ArrayList<>(); |
| outputSmap_ = new ExprSubstitutionMap(); |
| sortingOrder_ = sortingOrder; |
| } |
| |
| /** |
| * C'tor for cloning. |
| */ |
| private SortInfo(SortInfo other) { |
| sortExprs_ = Expr.cloneList(other.sortExprs_); |
| isAscOrder_ = Lists.newArrayList(other.isAscOrder_); |
| nullsFirstParams_ = Lists.newArrayList(other.nullsFirstParams_); |
| materializedExprs_ = Expr.cloneList(other.materializedExprs_); |
| sortTupleDesc_ = other.sortTupleDesc_; |
| outputSmap_ = other.outputSmap_.clone(); |
| sortingOrder_ = other.sortingOrder_; |
| } |
| |
| public List<Expr> getSortExprs() { return sortExprs_; } |
| public List<Boolean> getIsAscOrder() { return isAscOrder_; } |
| public List<Boolean> getNullsFirstParams() { return nullsFirstParams_; } |
| public List<Expr> getMaterializedExprs() { return materializedExprs_; } |
| public TupleDescriptor getSortTupleDescriptor() { return sortTupleDesc_; } |
| public ExprSubstitutionMap getOutputSmap() { return outputSmap_; } |
| public TSortingOrder getSortingOrder() { return sortingOrder_; } |
| |
| /** |
| * Gets the list of booleans indicating whether nulls come first or last, independent |
| * of asc/desc. |
| */ |
| public List<Boolean> getNullsFirst() { |
| Preconditions.checkState(sortExprs_.size() == nullsFirstParams_.size()); |
| List<Boolean> nullsFirst = new ArrayList<>(); |
| for (int i = 0; i < sortExprs_.size(); ++i) { |
| nullsFirst.add(OrderByElement.nullsFirst(nullsFirstParams_.get(i), |
| isAscOrder_.get(i))); |
| } |
| return nullsFirst; |
| } |
| |
| /** |
| * Materializes the slots in 'sortTupleDesc_' referenced in the sort exprs. |
| * Materializes the slots referenced by the corresponding materialized expr after |
| * applying the 'smap'. Valid to call after createSortTupleInfo(). |
| */ |
| public void materializeRequiredSlots(Analyzer analyzer, ExprSubstitutionMap smap) { |
| Preconditions.checkNotNull(sortTupleDesc_); |
| Preconditions.checkState(sortTupleDesc_.isMaterialized()); |
| analyzer.materializeSlots(sortExprs_); |
| List<SlotDescriptor> sortTupleSlotDescs = sortTupleDesc_.getSlots(); |
| List<Expr> materializedExprs = new ArrayList<>(); |
| for (int i = 0; i < sortTupleSlotDescs.size(); ++i) { |
| if (sortTupleSlotDescs.get(i).isMaterialized()) { |
| materializedExprs.add(materializedExprs_.get(i)); |
| } |
| } |
| List<Expr> substMaterializedExprs = |
| Expr.substituteList(materializedExprs, smap, analyzer, false); |
| analyzer.materializeSlots(substMaterializedExprs); |
| } |
| |
| /** |
| * Replaces 'sortExprs_' according to smap. This needs to be called to make sure that |
| * the sort exprs refer to the new tuple materialized by this sort instead of the |
| * original input. |
| */ |
| public void substituteSortExprs(ExprSubstitutionMap smap, Analyzer analyzer) { |
| sortExprs_ = Expr.substituteList(sortExprs_, smap, analyzer, false); |
| } |
| |
| /** |
| * Validates internal state. Asserts that all sort exprs are bound by the sort tuple. |
| */ |
| public void checkConsistency() { |
| Preconditions.checkState( |
| materializedExprs_.size() == sortTupleDesc_.getSlots().size()); |
| for (Expr sortExpr: sortExprs_) { |
| Preconditions.checkState(sortExpr.isBound(sortTupleDesc_.getId())); |
| } |
| } |
| |
| @Override |
| public SortInfo clone() { return new SortInfo(this); } |
| |
| /** |
| * Matches SlotRef expressions that do not reference the sort tuple. |
| */ |
| private class IsInputSlotRefPred implements com.google.common.base.Predicate<Expr> { |
| private final TupleId sortTid_; |
| public IsInputSlotRefPred(TupleId sortTid) { |
| sortTid_ = sortTid; |
| } |
| |
| @Override |
| public boolean apply(Expr e) { |
| return e instanceof SlotRef && !e.isBound(sortTid_); |
| } |
| } |
| |
| /** |
| * Create a tuple descriptor for the single tuple that is materialized, sorted, and |
| * output by the sort node. Materializes slots required by 'resultExprs' as well as |
| * non-deterministic and expensive order by exprs. The materialized exprs are |
| * substituted with slot refs into the new tuple. This simplifies the sorting logic for |
| * total and top-n sorts. |
| */ |
| public void createSortTupleInfo(List<Expr> resultExprs, Analyzer analyzer) { |
| Preconditions.checkState(sortTupleDesc_ == null); |
| Preconditions.checkState(outputSmap_.size() == 0); |
| |
| // The descriptor for the tuples on which the sort operates. |
| sortTupleDesc_ = analyzer.getDescTbl().createTupleDescriptor("sort"); |
| sortTupleDesc_.setIsMaterialized(true); |
| |
| // The following exprs are materialized: |
| // 1. Sort exprs that we chose to materialize |
| // 2. SlotRefs against the sort input contained in the result and sort exprs |
| // after substituting the materialized sort exprs. |
| // 3. TupleIsNullPredicates from 'resultExprs' which are not legal to evaluate after |
| // the sort because the tuples referenced by it are gone after the sort. |
| |
| // Case 1: Materialize chosen sort exprs. |
| addMaterializedExprs(getMaterializedSortExprs(), analyzer); |
| |
| // Case 2: Materialize required input slots. Using a LinkedHashSet prevents the |
| // slots getting reordered unnecessarily. |
| Set<SlotRef> inputSlotRefs = new LinkedHashSet<>(); |
| IsInputSlotRefPred pred = new IsInputSlotRefPred(sortTupleDesc_.getId()); |
| TreeNode.collect(Expr.substituteList(resultExprs, outputSmap_, analyzer, false), |
| pred, inputSlotRefs); |
| TreeNode.collect(Expr.substituteList(sortExprs_, outputSmap_, analyzer, false), |
| pred, inputSlotRefs); |
| addMaterializedExprs(inputSlotRefs, analyzer); |
| |
| // Case 3: Materialize TupleIsNullPredicates. |
| List<Expr> tupleIsNullPreds = new ArrayList<>(); |
| TreeNode.collect(resultExprs, Predicates.instanceOf(TupleIsNullPredicate.class), |
| tupleIsNullPreds); |
| Expr.removeDuplicates(tupleIsNullPreds); |
| addMaterializedExprs(tupleIsNullPreds, analyzer); |
| |
| // The sort exprs are evaluated against the sort tuple, so they must reflect the |
| // materialization decision above. |
| substituteSortExprs(outputSmap_, analyzer); |
| checkConsistency(); |
| } |
| |
| /** |
| * Materializes each of the given exprs into 'sortTupleDesc' as follows: |
| * - Adds a new slot in 'sortTupleDesc_' |
| * - Adds an entry in 'outputSmap_' mapping from the expr to a SlotRef on the new slot |
| * - Adds an entry in 'materializedExprs_' |
| * Valid to call after createSortTupleInfo(). |
| */ |
| public <T extends Expr> void addMaterializedExprs(Collection<T> exprs, |
| Analyzer analyzer) { |
| Preconditions.checkNotNull(sortTupleDesc_); |
| for (Expr srcExpr : exprs) { |
| SlotDescriptor dstSlotDesc; |
| if (srcExpr instanceof SlotRef) { |
| SlotDescriptor srcSlotDesc = ((SlotRef) srcExpr).getDesc(); |
| dstSlotDesc = analyzer.copySlotDescriptor(srcSlotDesc, sortTupleDesc_); |
| } else { |
| dstSlotDesc = analyzer.addSlotDescriptor(sortTupleDesc_); |
| dstSlotDesc.initFromExpr(srcExpr); |
| } |
| dstSlotDesc.setSourceExpr(srcExpr); |
| outputSmap_.put(srcExpr.clone(), new SlotRef(dstSlotDesc)); |
| materializedExprs_.add(srcExpr); |
| } |
| } |
| |
| /** |
| * Estimates the size of the data materialized in memory by the TopN operator. The |
| * method uses the formula <code>estimatedSize = estimated # of rows in memory * |
| * average tuple serialized size</code>. 'cardinality' is the cardinality of the TopN |
| * operator and 'offset' is the value in the 'OFFSET [x]' clause. |
| */ |
| public long estimateTopNMaterializedSize(long cardinality, long offset) { |
| getSortTupleDescriptor().computeMemLayout(); |
| return (long) Math.ceil(getSortTupleDescriptor().getAvgSerializedSize() |
| * (PlanNode.checkedAdd(cardinality, offset))); |
| } |
| |
| /** |
| * Returns the subset of 'sortExprs_' that should be materialized. A sort expr is |
| * is materialized if it: |
| * - contains a non-deterministic expr |
| * - contains a UDF (since we don't know if they're deterministic) |
| * - is more expensive than a cost threshold |
| * - does not have a cost set |
| */ |
| private List<Expr> getMaterializedSortExprs() { |
| List<Expr> result = new ArrayList<>(); |
| for (Expr sortExpr : sortExprs_) { |
| if (!sortExpr.hasCost() |
| || sortExpr.getCost() > SORT_MATERIALIZATION_COST_THRESHOLD |
| || sortExpr.contains(Expr.IS_NONDETERMINISTIC_BUILTIN_FN_PREDICATE) |
| || sortExpr.contains(Expr.IS_UDF_PREDICATE)) { |
| result.add(sortExpr); |
| } |
| } |
| return result; |
| } |
| } |