| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to you under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| package org.apache.calcite.rel.metadata; |
| |
| import org.apache.calcite.plan.RelOptUtil; |
| import org.apache.calcite.rel.RelCollation; |
| import org.apache.calcite.rel.RelNode; |
| import org.apache.calcite.rel.core.Aggregate; |
| import org.apache.calcite.rel.core.AggregateCall; |
| import org.apache.calcite.rel.core.Join; |
| import org.apache.calcite.rel.core.JoinRelType; |
| import org.apache.calcite.rel.core.Minus; |
| import org.apache.calcite.rel.core.Project; |
| import org.apache.calcite.rel.core.Union; |
| import org.apache.calcite.rex.RexBuilder; |
| import org.apache.calcite.rex.RexCall; |
| import org.apache.calcite.rex.RexDynamicParam; |
| import org.apache.calcite.rex.RexInputRef; |
| import org.apache.calcite.rex.RexLiteral; |
| import org.apache.calcite.rex.RexLocalRef; |
| import org.apache.calcite.rex.RexNode; |
| import org.apache.calcite.rex.RexProgram; |
| import org.apache.calcite.rex.RexUtil; |
| import org.apache.calcite.rex.RexVisitorImpl; |
| import org.apache.calcite.sql.SqlBasicFunction; |
| import org.apache.calcite.sql.SqlFunction; |
| import org.apache.calcite.sql.SqlFunctionCategory; |
| import org.apache.calcite.sql.SqlKind; |
| import org.apache.calcite.sql.fun.SqlStdOperatorTable; |
| import org.apache.calcite.sql.type.OperandTypes; |
| import org.apache.calcite.sql.type.ReturnTypes; |
| import org.apache.calcite.util.ImmutableBitSet; |
| import org.apache.calcite.util.NumberUtil; |
| import org.apache.calcite.util.Util; |
| |
| import com.google.common.collect.ImmutableList; |
| |
| import org.checkerframework.checker.nullness.qual.Nullable; |
| import org.checkerframework.checker.nullness.qual.PolyNull; |
| |
| import java.math.BigDecimal; |
| import java.util.ArrayList; |
| import java.util.LinkedHashSet; |
| import java.util.List; |
| import java.util.Set; |
| |
| import static com.google.common.base.Preconditions.checkArgument; |
| |
| import static org.apache.calcite.util.NumberUtil.multiply; |
| |
| /** |
| * RelMdUtil provides utility methods used by the metadata provider methods. |
| */ |
| public class RelMdUtil { |
| //~ Static fields/initializers --------------------------------------------- |
| |
| public static final SqlFunction ARTIFICIAL_SELECTIVITY_FUNC = |
| SqlBasicFunction.create("ARTIFICIAL_SELECTIVITY", |
| ReturnTypes.BOOLEAN, // returns boolean since we'll AND it |
| OperandTypes.NUMERIC, // takes a numeric param |
| SqlFunctionCategory.SYSTEM); |
| |
| //~ Methods ---------------------------------------------------------------- |
| |
| private RelMdUtil() { |
| } |
| |
| /** |
| * Creates a RexNode that stores a selectivity value corresponding to the |
| * selectivity of a semijoin. This can be added to a filter to simulate the |
| * effect of the semijoin during costing, but should never appear in a real |
| * plan since it has no physical implementation. |
| * |
| * @param rel the semijoin of interest |
| * @return constructed rexnode |
| */ |
| public static RexNode makeSemiJoinSelectivityRexNode(RelMetadataQuery mq, Join rel) { |
| RexBuilder rexBuilder = rel.getCluster().getRexBuilder(); |
| double selectivity = |
| computeSemiJoinSelectivity(mq, rel.getLeft(), rel.getRight(), rel); |
| return rexBuilder.makeCall(ARTIFICIAL_SELECTIVITY_FUNC, |
| rexBuilder.makeApproxLiteral(new BigDecimal(selectivity))); |
| } |
| |
| /** |
| * Returns the selectivity value stored in a call. |
| * |
| * @param artificialSelectivityFuncNode Call containing the selectivity value |
| * @return selectivity value |
| */ |
| public static double getSelectivityValue( |
| RexNode artificialSelectivityFuncNode) { |
| assert artificialSelectivityFuncNode instanceof RexCall; |
| RexCall call = (RexCall) artificialSelectivityFuncNode; |
| assert call.getOperator() == ARTIFICIAL_SELECTIVITY_FUNC; |
| RexNode operand = call.getOperands().get(0); |
| @SuppressWarnings("unboxing.of.nullable") |
| double doubleValue = ((RexLiteral) operand).getValueAs(Double.class); |
| return doubleValue; |
| } |
| |
| /** |
| * Computes the selectivity of a semijoin filter if it is applied on a fact |
| * table. The computation is based on the selectivity of the dimension |
| * table/columns and the number of distinct values in the fact table |
| * columns. |
| * |
| * @param factRel fact table participating in the semijoin |
| * @param dimRel dimension table participating in the semijoin |
| * @param rel semijoin rel |
| * @return calculated selectivity |
| */ |
| public static double computeSemiJoinSelectivity(RelMetadataQuery mq, |
| RelNode factRel, RelNode dimRel, Join rel) { |
| return computeSemiJoinSelectivity(mq, factRel, dimRel, rel.analyzeCondition().leftKeys, |
| rel.analyzeCondition().rightKeys); |
| } |
| |
| /** |
| * Computes the selectivity of a semijoin filter if it is applied on a fact |
| * table. The computation is based on the selectivity of the dimension |
| * table/columns and the number of distinct values in the fact table |
| * columns. |
| * |
| * @param factRel fact table participating in the semijoin |
| * @param dimRel dimension table participating in the semijoin |
| * @param factKeyList LHS keys used in the filter |
| * @param dimKeyList RHS keys used in the filter |
| * @return calculated selectivity |
| */ |
| public static double computeSemiJoinSelectivity(RelMetadataQuery mq, |
| RelNode factRel, RelNode dimRel, List<Integer> factKeyList, |
| List<Integer> dimKeyList) { |
| ImmutableBitSet.Builder factKeys = ImmutableBitSet.builder(); |
| for (int factCol : factKeyList) { |
| factKeys.set(factCol); |
| } |
| ImmutableBitSet.Builder dimKeyBuilder = ImmutableBitSet.builder(); |
| for (int dimCol : dimKeyList) { |
| dimKeyBuilder.set(dimCol); |
| } |
| final ImmutableBitSet dimKeys = dimKeyBuilder.build(); |
| |
| Double factPop = mq.getPopulationSize(factRel, factKeys.build()); |
| if (factPop == null) { |
| // use the dimension population if the fact population is |
| // unavailable; since we're filtering the fact table, that's |
| // the population we ideally want to use |
| factPop = mq.getPopulationSize(dimRel, dimKeys); |
| } |
| |
| // if cardinality and population are available, use them; otherwise |
| // use percentage original rows |
| Double selectivity; |
| Double dimCard = |
| mq.getDistinctRowCount( |
| dimRel, |
| dimKeys, |
| null); |
| if ((dimCard != null) && (factPop != null)) { |
| // to avoid division by zero |
| if (factPop < 1.0) { |
| factPop = 1.0; |
| } |
| selectivity = dimCard / factPop; |
| } else { |
| selectivity = mq.getPercentageOriginalRows(dimRel); |
| } |
| |
| if (selectivity == null) { |
| // set a default selectivity based on the number of semijoin keys |
| selectivity = |
| Math.pow( |
| 0.1, |
| dimKeys.cardinality()); |
| } else if (selectivity > 1.0) { |
| selectivity = 1.0; |
| } |
| return selectivity; |
| } |
| |
| /** |
| * Returns true if the columns represented in a bit mask are definitely |
| * known to form a unique column set. |
| * |
| * @param rel the relational expression that the column mask corresponds |
| * to |
| * @param colMask bit mask containing columns that will be tested for |
| * uniqueness |
| * @return true if bit mask represents a unique column set; false if not (or |
| * if no metadata is available) |
| */ |
| public static boolean areColumnsDefinitelyUnique(RelMetadataQuery mq, |
| RelNode rel, ImmutableBitSet colMask) { |
| Boolean b = mq.areColumnsUnique(rel, colMask, false); |
| return b != null && b; |
| } |
| |
| public static @Nullable Boolean areColumnsUnique(RelMetadataQuery mq, RelNode rel, |
| List<RexInputRef> columnRefs) { |
| ImmutableBitSet.Builder colMask = ImmutableBitSet.builder(); |
| for (RexInputRef columnRef : columnRefs) { |
| colMask.set(columnRef.getIndex()); |
| } |
| return mq.areColumnsUnique(rel, colMask.build()); |
| } |
| |
| public static boolean areColumnsDefinitelyUnique(RelMetadataQuery mq, |
| RelNode rel, List<RexInputRef> columnRefs) { |
| Boolean b = areColumnsUnique(mq, rel, columnRefs); |
| return b != null && b; |
| } |
| |
| /** |
| * Returns true if the columns represented in a bit mask are definitely |
| * known to form a unique column set, when nulls have been filtered from |
| * the columns. |
| * |
| * @param rel the relational expression that the column mask corresponds |
| * to |
| * @param colMask bit mask containing columns that will be tested for |
| * uniqueness |
| * @return true if bit mask represents a unique column set; false if not (or |
| * if no metadata is available) |
| */ |
| public static boolean areColumnsDefinitelyUniqueWhenNullsFiltered( |
| RelMetadataQuery mq, RelNode rel, ImmutableBitSet colMask) { |
| Boolean b = mq.areColumnsUnique(rel, colMask, true); |
| return b != null && b; |
| } |
| |
| public static @Nullable Boolean areColumnsUniqueWhenNullsFiltered(RelMetadataQuery mq, |
| RelNode rel, List<RexInputRef> columnRefs) { |
| ImmutableBitSet.Builder colMask = ImmutableBitSet.builder(); |
| |
| for (RexInputRef columnRef : columnRefs) { |
| colMask.set(columnRef.getIndex()); |
| } |
| |
| return mq.areColumnsUnique(rel, colMask.build(), true); |
| } |
| |
| public static boolean areColumnsDefinitelyUniqueWhenNullsFiltered( |
| RelMetadataQuery mq, RelNode rel, List<RexInputRef> columnRefs) { |
| Boolean b = areColumnsUniqueWhenNullsFiltered(mq, rel, columnRefs); |
| return b != null && b; |
| } |
| |
| /** |
| * Separates a bit-mask representing a join into masks representing the left |
| * and right inputs into the join. |
| * |
| * @param groupKey original bit-mask |
| * @param leftMask left bit-mask to be set |
| * @param rightMask right bit-mask to be set |
| * @param nFieldsOnLeft number of fields in the left input |
| */ |
| public static void setLeftRightBitmaps( |
| ImmutableBitSet groupKey, |
| ImmutableBitSet.Builder leftMask, |
| ImmutableBitSet.Builder rightMask, |
| int nFieldsOnLeft) { |
| for (int bit : groupKey) { |
| if (bit < nFieldsOnLeft) { |
| leftMask.set(bit); |
| } else { |
| rightMask.set(bit - nFieldsOnLeft); |
| } |
| } |
| } |
| |
| /** |
| * Returns the number of distinct values provided numSelected are selected |
| * where there are domainSize distinct values. |
| * |
| * <p>Note that in the case where domainSize == numSelected, it's not true |
| * that the return value should be domainSize. If you pick 100 random values |
| * between 1 and 100, you'll most likely end up with fewer than 100 distinct |
| * values, because you'll pick some values more than once. |
| * |
| * <p>The implementation is an unbiased estimation of the number of distinct |
| * values by performing a number of selections (with replacement) from a |
| * universe set. |
| * |
| * @param domainSize Size of the universe set |
| * @param numSelected The number of selections |
| * |
| * @return the expected number of distinct values, or null if either argument |
| * is null |
| */ |
| public static @PolyNull Double numDistinctVals( |
| @PolyNull Double domainSize, |
| @PolyNull Double numSelected) { |
| if ((domainSize == null) || (numSelected == null)) { |
| return domainSize; |
| } |
| |
| // Cap the input sizes at MAX_VALUE to ensure that the calculations |
| // using these values return meaningful values |
| double dSize = capInfinity(domainSize); |
| double numSel = capInfinity(numSelected); |
| |
| // The formula is derived as follows: |
| // |
| // Suppose we have N distinct values, and we select n from them (with replacement). |
| // For any value i, we use C(i) = k to express the event that the value is selected exactly |
| // k times in the n selections. |
| // |
| // It can be seen that, for any one selection, the probability of the value being selected |
| // is 1/N. So the probability of being selected exactly k times is |
| // |
| // Pr{C(i) = k} = C(n, k) * (1 / N)^k * (1 - 1 / N)^(n - k), |
| // where C(n, k) = n! / [k! * (n - k)!] |
| // |
| // The probability that the value is never selected is |
| // Pr{C(i) = 0} = C(n, 0) * (1/N)^0 * (1 - 1 / N)^n = (1 - 1 / N)^n |
| // |
| // We define indicator random variable I(i), so that I(i) = 1 iff |
| // value i is selected in at least one of the selections. We have |
| // E[I(i)] = 1 * Pr{I(i) = 1} + 0 * Pr{I(i) = 0) = Pr{I(i) = 1} |
| // = Pr{C(i) > 0} = 1 - Pr{C(i) = 0} = 1 - (1 - 1 / N)^n |
| // |
| // The expected number of distinct values in the overall n selections is: |
| // E(I(1)] + E(I(2)] + ... + E(I(N)] = N * [1 - (1 - 1 / N)^n] |
| |
| double res = 0; |
| if (dSize > 0) { |
| // We calculate the result in 3 cases: |
| // 1) If N and n are not very large values, we directly calculate |
| // N * [1 - exp(n * ln(1 - 1 / N))] |
| // 2) If N or n are very large values, but n / N is not close to 0, |
| // We compute the result by expanding the exponent n * ln(1 - 1 / N) |
| // as a Taylor series. |
| // 3) If N or n are very large values, and n / N is close to 0, |
| // we expand the whole formula as a Taylor series. |
| // We separate the 3 cases by trying to expand the exponent as a Taylor series. |
| // If the required number of terms for convergence is too big, it is case 1. |
| // If the required number of terms is too small, it is case 3. Otherwise, |
| // it is case 2. |
| |
| // To expand the component as a Taylor series, we have: |
| // n * ln(1 - 1 / N) |
| // = n * (- 1 / N - 1 / (2 * N ^ 2) - 1 / (3 * N ^ 3) - ...) |
| // |
| // To get an approximate result, we truncate the series after the first m terms. |
| // This leads to an error of: |
| // n * (1 / [(m + 1) * N ^ (m + 1)] + 1 / [(m + 2) * dSize ^ (m + 2) + ...]) |
| // < n / (m + 1) / [N ^ m * (N - 1)] < n / (N ^ m) |
| // |
| // To get an accurate result, the error should be smaller than 1e-16, because a |
| // double can represent at most 15 digits after the decimal point. Therefore, we need |
| // n / (N ^ m ) < 1e-16, or N ^ m / n > 1e16. |
| // |
| // The smallest value of m that satisfies the above formula is the |
| // number of terms required for convergence. |
| |
| final int maxTerms = 32; |
| final int minTerms = 1; |
| |
| int numTerms = 1; |
| if (dSize > 10) { |
| double dPower = dSize; |
| while (dPower / numSel <= 1e16) { |
| dPower = dPower * dPower; |
| numTerms *= 2; |
| } |
| } else { |
| // for small dSize, force case 1 |
| numTerms = maxTerms + 1; |
| } |
| |
| if (numTerms > maxTerms) { |
| // case 1 |
| double expo = numSel * Math.log(1.0 - 1.0 / dSize); |
| res = (1.0 - Math.exp(expo)) * dSize; |
| } else if (numTerms > minTerms) { |
| // case 2 |
| double expo = 0; |
| double dPower = dSize; |
| for (int i = 1; i <= numTerms; i++) { |
| expo -= numSel / dPower / i; |
| dPower *= dSize; |
| } |
| res = (1.0 - Math.exp(expo)) * dSize; |
| } else { |
| // numTerms <= minTerms |
| // case 3 |
| |
| // Since the ratio n / N is close to 0, we can expand the exponent as |
| // n * ln(1 - 1 / N) ≈ n * (- 1 / N) = - n / N. |
| // So N * [1 - (1 - 1 / N) ^ n ] = N * [1 - exp(n * ln(1 - 1 / N))] |
| // ≈ N * [1 - exp(- n / N)]. |
| // Since exp(x) = 1 + x + x ^ 2 / 2 + x ^ 3 / 6 + ..., we have |
| // N * [1 - exp(- n / N)] = N * [n / N - n ^ 2 / (2 * N ^ 2) + n ^ 3 / (6 * N ^ 3) - ...] |
| // = n - n ^ 2 / (2 * N) + n ^ 3 / (6 * N ^ 2) - ... |
| // Since n / N is close to 0, and due to the factorial in the denominator, |
| // the above series converges to 0 very quickly. |
| res = 0; |
| numTerms = 5; |
| double selPower = numSel; |
| double domPower = 1; |
| int factorial = 1; |
| int sign = 1; |
| for (int i = 1; i < numTerms; i++) { |
| res += sign * selPower / factorial / domPower; |
| |
| selPower *= numSel; |
| domPower *= dSize; |
| factorial *= i + 1; |
| sign *= -1; |
| } |
| } |
| } |
| |
| // fix the boundary cases |
| if (res > dSize) { |
| res = dSize; |
| } |
| |
| if (res > numSel) { |
| res = numSel; |
| } |
| |
| if (res < 0) { |
| res = 0; |
| } |
| |
| return res; |
| } |
| |
| /** |
| * Caps a double value at Double.MAX_VALUE if it's currently infinity |
| * |
| * @param d the Double object |
| * @return the double value if it's not infinity; else Double.MAX_VALUE |
| */ |
| public static double capInfinity(Double d) { |
| return d.isInfinite() ? Double.MAX_VALUE : d; |
| } |
| |
| /** |
| * Returns default estimates for selectivities, in the absence of stats. |
| * |
| * @param predicate predicate for which selectivity will be computed; null |
| * means true, so gives selectity of 1.0 |
| * @return estimated selectivity |
| */ |
| public static double guessSelectivity(@Nullable RexNode predicate) { |
| return guessSelectivity(predicate, false); |
| } |
| |
| /** |
| * Returns default estimates for selectivities, in the absence of stats. |
| * |
| * @param predicate predicate for which selectivity will be computed; |
| * null means true, so gives selectity of 1.0 |
| * @param artificialOnly return only the selectivity contribution from |
| * artificial nodes |
| * @return estimated selectivity |
| */ |
| public static double guessSelectivity( |
| @Nullable RexNode predicate, |
| boolean artificialOnly) { |
| double sel = 1.0; |
| if ((predicate == null) || predicate.isAlwaysTrue()) { |
| return sel; |
| } |
| |
| double artificialSel = 1.0; |
| |
| for (RexNode pred : RelOptUtil.conjunctions(predicate)) { |
| if (pred.getKind() == SqlKind.IS_NOT_NULL) { |
| sel *= .9; |
| } else if ( |
| (pred instanceof RexCall) |
| && (((RexCall) pred).getOperator() |
| == RelMdUtil.ARTIFICIAL_SELECTIVITY_FUNC)) { |
| artificialSel *= RelMdUtil.getSelectivityValue(pred); |
| } else if (pred.isA(SqlKind.EQUALS)) { |
| sel *= .15; |
| } else if (pred.isA(SqlKind.COMPARISON)) { |
| sel *= .5; |
| } else { |
| sel *= .25; |
| } |
| } |
| |
| if (artificialOnly) { |
| return artificialSel; |
| } else { |
| return sel * artificialSel; |
| } |
| } |
| |
| /** |
| * AND's two predicates together, either of which may be null, removing |
| * redundant filters. |
| * |
| * @param rexBuilder rexBuilder used to construct AND'd RexNode |
| * @param pred1 first predicate |
| * @param pred2 second predicate |
| * @return AND'd predicate or individual predicates if one is null |
| */ |
| public static @Nullable RexNode unionPreds( |
| RexBuilder rexBuilder, |
| @Nullable RexNode pred1, |
| @Nullable RexNode pred2) { |
| final Set<RexNode> unionList = new LinkedHashSet<>(); |
| unionList.addAll(RelOptUtil.conjunctions(pred1)); |
| unionList.addAll(RelOptUtil.conjunctions(pred2)); |
| return RexUtil.composeConjunction(rexBuilder, unionList, true); |
| } |
| |
| /** |
| * Takes the difference between two predicates, removing from the first any |
| * predicates also in the second. |
| * |
| * @param rexBuilder rexBuilder used to construct AND'd RexNode |
| * @param pred1 first predicate |
| * @param pred2 second predicate |
| * @return MINUS'd predicate list |
| */ |
| public static @Nullable RexNode minusPreds( |
| RexBuilder rexBuilder, |
| @Nullable RexNode pred1, |
| @Nullable RexNode pred2) { |
| final List<RexNode> minusList = |
| new ArrayList<>(RelOptUtil.conjunctions(pred1)); |
| minusList.removeAll(RelOptUtil.conjunctions(pred2)); |
| return RexUtil.composeConjunction(rexBuilder, minusList, true); |
| } |
| |
| /** |
| * Takes a bitmap representing a set of input references and extracts the |
| * ones that reference the group by columns in an aggregate. |
| * |
| * @param groupKey the original bitmap |
| * @param aggRel the aggregate |
| * @param childKey sets bits from groupKey corresponding to group by columns |
| */ |
| public static void setAggChildKeys( |
| ImmutableBitSet groupKey, |
| Aggregate aggRel, |
| ImmutableBitSet.Builder childKey) { |
| List<AggregateCall> aggCalls = aggRel.getAggCallList(); |
| for (int bit : groupKey) { |
| if (bit < aggRel.getGroupCount()) { |
| // group by column |
| childKey.set(bit); |
| } else { |
| // aggregate column -- set a bit for each argument being |
| // aggregated |
| AggregateCall agg = aggCalls.get(bit - aggRel.getGroupCount()); |
| for (Integer arg : agg.getArgList()) { |
| childKey.set(arg); |
| } |
| } |
| } |
| } |
| |
| /** |
| * Forms two bitmaps by splitting the columns in a bitmap according to |
| * whether or not the column references the child input or is an expression. |
| * |
| * @param projExprs Project expressions |
| * @param groupKey Bitmap whose columns will be split |
| * @param baseCols Bitmap representing columns from the child input |
| * @param projCols Bitmap representing non-child columns |
| */ |
| public static void splitCols( |
| List<RexNode> projExprs, |
| ImmutableBitSet groupKey, |
| ImmutableBitSet.Builder baseCols, |
| ImmutableBitSet.Builder projCols) { |
| for (int bit : groupKey) { |
| final RexNode e = projExprs.get(bit); |
| if (e instanceof RexInputRef) { |
| baseCols.set(((RexInputRef) e).getIndex()); |
| } else { |
| projCols.set(bit); |
| } |
| } |
| } |
| |
| /** |
| * Computes the cardinality of a particular expression from the projection |
| * list. |
| * |
| * @param rel RelNode corresponding to the project |
| * @param expr projection expression |
| * @return cardinality |
| */ |
| public static @Nullable Double cardOfProjExpr(RelMetadataQuery mq, Project rel, |
| RexNode expr) { |
| return expr.accept(new CardOfProjExpr(mq, rel)); |
| } |
| |
| /** |
| * Computes the population size for a set of keys returned from a join. |
| * |
| * @param join_ Join relational operator |
| * @param groupKey Keys to compute the population for |
| * @return computed population size |
| */ |
| public static @Nullable Double getJoinPopulationSize(RelMetadataQuery mq, |
| RelNode join_, ImmutableBitSet groupKey) { |
| Join join = (Join) join_; |
| if (!join.getJoinType().projectsRight()) { |
| return mq.getPopulationSize(join.getLeft(), groupKey); |
| } |
| ImmutableBitSet.Builder leftMask = ImmutableBitSet.builder(); |
| ImmutableBitSet.Builder rightMask = ImmutableBitSet.builder(); |
| RelNode left = join.getLeft(); |
| RelNode right = join.getRight(); |
| |
| // separate the mask into masks for the left and right |
| RelMdUtil.setLeftRightBitmaps( |
| groupKey, leftMask, rightMask, left.getRowType().getFieldCount()); |
| |
| Double population = |
| multiply( |
| mq.getPopulationSize(left, leftMask.build()), |
| mq.getPopulationSize(right, rightMask.build())); |
| |
| return numDistinctVals(population, mq.getRowCount(join)); |
| } |
| |
| /** Add an epsilon to the value passed in. */ |
| public static double addEpsilon(double d) { |
| assert d >= 0d; |
| final double d0 = d; |
| if (d < 10) { |
| // For small d, adding 1 would change the value significantly. |
| d *= 1.001d; |
| if (d != d0) { |
| return d; |
| } |
| } |
| // For medium d, add 1. Keeps integral values integral. |
| ++d; |
| if (d != d0) { |
| return d; |
| } |
| // For large d, adding 1 might not change the value. Add .1%. |
| // If d is NaN, this still will probably not change the value. That's OK. |
| d *= 1.001d; |
| return d; |
| } |
| |
| /** |
| * Computes the number of distinct rows for a set of keys returned from a |
| * semi-join. |
| * |
| * @param semiJoinRel RelNode representing the semi-join |
| * @param mq metadata query |
| * @param groupKey keys that the distinct row count will be computed for |
| * @param predicate join predicate |
| * @return number of distinct rows |
| */ |
| public static @Nullable Double getSemiJoinDistinctRowCount(Join semiJoinRel, RelMetadataQuery mq, |
| ImmutableBitSet groupKey, @Nullable RexNode predicate) { |
| if (predicate == null || predicate.isAlwaysTrue()) { |
| if (groupKey.isEmpty()) { |
| return 1D; |
| } |
| } |
| // create a RexNode representing the selectivity of the |
| // semijoin filter and pass it to getDistinctRowCount |
| RexNode newPred = RelMdUtil.makeSemiJoinSelectivityRexNode(mq, semiJoinRel); |
| if (predicate != null) { |
| RexBuilder rexBuilder = semiJoinRel.getCluster().getRexBuilder(); |
| newPred = |
| rexBuilder.makeCall( |
| SqlStdOperatorTable.AND, |
| newPred, |
| predicate); |
| } |
| |
| return mq.getDistinctRowCount(semiJoinRel.getLeft(), groupKey, newPred); |
| } |
| |
| /** |
| * Computes the number of distinct rows for a set of keys returned from a |
| * join. Also known as NDV (number of distinct values). |
| * |
| * @param joinRel RelNode representing the join |
| * @param joinType type of join |
| * @param groupKey keys that the distinct row count will be computed for |
| * @param predicate join predicate |
| * @param useMaxNdv If true use formula <code>max(left NDV, right NDV)</code>, |
| * otherwise use <code>left NDV * right NDV</code>. |
| * @return number of distinct rows |
| */ |
| public static @Nullable Double getJoinDistinctRowCount(RelMetadataQuery mq, |
| RelNode joinRel, JoinRelType joinType, ImmutableBitSet groupKey, |
| @Nullable RexNode predicate, boolean useMaxNdv) { |
| if (predicate == null || predicate.isAlwaysTrue()) { |
| if (groupKey.isEmpty()) { |
| return 1D; |
| } |
| } |
| Join join = (Join) joinRel; |
| if (join.isSemiJoin()) { |
| return getSemiJoinDistinctRowCount(join, mq, groupKey, predicate); |
| } |
| Double distRowCount; |
| ImmutableBitSet.Builder leftMask = ImmutableBitSet.builder(); |
| ImmutableBitSet.Builder rightMask = ImmutableBitSet.builder(); |
| RelNode left = joinRel.getInputs().get(0); |
| RelNode right = joinRel.getInputs().get(1); |
| |
| RelMdUtil.setLeftRightBitmaps( |
| groupKey, |
| leftMask, |
| rightMask, |
| left.getRowType().getFieldCount()); |
| |
| // determine which filters apply to the left vs right |
| RexNode leftPred = null; |
| RexNode rightPred = null; |
| if (predicate != null) { |
| final List<RexNode> leftFilters = new ArrayList<>(); |
| final List<RexNode> rightFilters = new ArrayList<>(); |
| final List<RexNode> joinFilters = new ArrayList<>(); |
| final List<RexNode> predList = RelOptUtil.conjunctions(predicate); |
| |
| RelOptUtil.classifyFilters( |
| joinRel, |
| predList, |
| joinType.canPushIntoFromAbove(), |
| joinType.canPushLeftFromAbove(), |
| joinType.canPushRightFromAbove(), |
| joinFilters, |
| leftFilters, |
| rightFilters); |
| |
| RexBuilder rexBuilder = joinRel.getCluster().getRexBuilder(); |
| leftPred = |
| RexUtil.composeConjunction(rexBuilder, leftFilters, true); |
| rightPred = |
| RexUtil.composeConjunction(rexBuilder, rightFilters, true); |
| } |
| |
| if (useMaxNdv) { |
| distRowCount = |
| NumberUtil.max( |
| mq.getDistinctRowCount(left, leftMask.build(), leftPred), |
| mq.getDistinctRowCount(right, rightMask.build(), rightPred)); |
| } else { |
| distRowCount = |
| multiply( |
| mq.getDistinctRowCount(left, leftMask.build(), leftPred), |
| mq.getDistinctRowCount(right, rightMask.build(), rightPred)); |
| } |
| |
| return RelMdUtil.numDistinctVals(distRowCount, mq.getRowCount(joinRel)); |
| } |
| |
| /** Returns an estimate of the number of rows returned by a {@link Union} |
| * (before duplicates are eliminated). */ |
| public static double getUnionAllRowCount(RelMetadataQuery mq, Union rel) { |
| double rowCount = 0; |
| for (RelNode input : rel.getInputs()) { |
| rowCount += mq.getRowCount(input); |
| } |
| return rowCount; |
| } |
| |
| /** Returns an estimate of the number of rows returned by a {@link Minus}. */ |
| public static double getMinusRowCount(RelMetadataQuery mq, Minus minus) { |
| // REVIEW jvs 30-May-2005: I just pulled this out of a hat. |
| final List<RelNode> inputs = minus.getInputs(); |
| double dRows = mq.getRowCount(inputs.get(0)); |
| for (int i = 1; i < inputs.size(); i++) { |
| dRows -= 0.5 * mq.getRowCount(inputs.get(i)); |
| } |
| if (dRows < 0) { |
| dRows = 0; |
| } |
| return dRows; |
| } |
| |
| /** Returns an estimate of the number of rows returned by a {@link Join}. */ |
| public static @Nullable Double getJoinRowCount(RelMetadataQuery mq, Join join, |
| RexNode condition) { |
| if (!join.getJoinType().projectsRight()) { |
| // Create a RexNode representing the selectivity of the |
| // semijoin filter and pass it to getSelectivity |
| RexNode semiJoinSelectivity = |
| RelMdUtil.makeSemiJoinSelectivityRexNode(mq, join); |
| Double selectivity = mq.getSelectivity(join.getLeft(), semiJoinSelectivity); |
| if (selectivity == null) { |
| return null; |
| } |
| return (join.getJoinType() == JoinRelType.SEMI |
| ? selectivity |
| : 1D - selectivity) // ANTI join |
| * mq.getRowCount(join.getLeft()); |
| } |
| // Row count estimates of 0 will be rounded up to 1. |
| // So, use maxRowCount where the product is very small. |
| final Double left = mq.getRowCount(join.getLeft()); |
| final Double right = mq.getRowCount(join.getRight()); |
| if (left == null || right == null) { |
| return null; |
| } |
| if (left <= 1D || right <= 1D) { |
| Double max = mq.getMaxRowCount(join); |
| if (max != null && max <= 1D) { |
| return max; |
| } |
| } |
| |
| Double selectivity = mq.getSelectivity(join, condition); |
| if (selectivity == null) { |
| return null; |
| } |
| double innerRowCount = left * right * selectivity; |
| switch (join.getJoinType()) { |
| case INNER: |
| return innerRowCount; |
| case LEFT: |
| return left * (1D - selectivity) + innerRowCount; |
| case RIGHT: |
| return right * (1D - selectivity) + innerRowCount; |
| case FULL: |
| return (left + right) * (1D - selectivity) + innerRowCount; |
| default: |
| throw Util.unexpected(join.getJoinType()); |
| } |
| } |
| |
| public static double estimateFilteredRows(RelNode child, RexProgram program, |
| RelMetadataQuery mq) { |
| // convert the program's RexLocalRef condition to an expanded RexNode |
| RexLocalRef programCondition = program.getCondition(); |
| RexNode condition; |
| if (programCondition == null) { |
| condition = null; |
| } else { |
| condition = program.expandLocalRef(programCondition); |
| } |
| return estimateFilteredRows(child, condition, mq); |
| } |
| |
| public static double estimateFilteredRows(RelNode child, @Nullable RexNode condition, |
| RelMetadataQuery mq) { |
| @SuppressWarnings("unboxing.of.nullable") |
| double result = |
| multiply(mq.getRowCount(child), mq.getSelectivity(child, condition)); |
| return result; |
| } |
| |
| /** Returns a point on a line. |
| * |
| * <p>The result is always a value between {@code minY} and {@code maxY}, |
| * even if {@code x} is not between {@code minX} and {@code maxX}. |
| * |
| * <p>Examples:<ul> |
| * <li>{@code linear(0, 0, 10, 100, 200}} returns 100 because 0 is minX |
| * <li>{@code linear(5, 0, 10, 100, 200}} returns 150 because 5 is |
| * mid-way between minX and maxX |
| * <li>{@code linear(5, 0, 10, 100, 200}} returns 160 |
| * <li>{@code linear(10, 0, 10, 100, 200}} returns 200 because 10 is maxX |
| * <li>{@code linear(-2, 0, 10, 100, 200}} returns 100 because -2 is |
| * less than minX and is therefore treated as minX |
| * <li>{@code linear(12, 0, 10, 100, 200}} returns 100 because 12 is |
| * greater than maxX and is therefore treated as maxX |
| * </ul> |
| */ |
| public static double linear(int x, int minX, int maxX, double minY, double |
| maxY) { |
| checkArgument(minX < maxX); |
| checkArgument(minY < maxY); |
| if (x < minX) { |
| return minY; |
| } |
| if (x > maxX) { |
| return maxY; |
| } |
| return minY + (double) (x - minX) / (double) (maxX - minX) * (maxY - minY); |
| } |
| |
| //~ Inner Classes ---------------------------------------------------------- |
| |
| /** Visitor that walks over a scalar expression and computes the |
| * cardinality of its result. */ |
| private static class CardOfProjExpr extends RexVisitorImpl<@Nullable Double> { |
| private final RelMetadataQuery mq; |
| private Project rel; |
| |
| CardOfProjExpr(RelMetadataQuery mq, Project rel) { |
| super(true); |
| this.mq = mq; |
| this.rel = rel; |
| } |
| |
| @Override public @Nullable Double visitInputRef(RexInputRef var) { |
| int index = var.getIndex(); |
| ImmutableBitSet col = ImmutableBitSet.of(index); |
| Double distinctRowCount = |
| mq.getDistinctRowCount(rel.getInput(), col, null); |
| if (distinctRowCount == null) { |
| return null; |
| } else { |
| return numDistinctVals(distinctRowCount, mq.getRowCount(rel)); |
| } |
| } |
| |
| @Override public @Nullable Double visitLiteral(RexLiteral literal) { |
| return numDistinctVals(1.0, mq.getRowCount(rel)); |
| } |
| |
| @Override public @Nullable Double visitCall(RexCall call) { |
| Double distinctRowCount; |
| Double rowCount = mq.getRowCount(rel); |
| if (call.isA(SqlKind.MINUS_PREFIX)) { |
| distinctRowCount = cardOfProjExpr(mq, rel, call.getOperands().get(0)); |
| } else if (call.isA(ImmutableList.of(SqlKind.PLUS, SqlKind.MINUS))) { |
| Double card0 = cardOfProjExpr(mq, rel, call.getOperands().get(0)); |
| if (card0 == null) { |
| return null; |
| } |
| Double card1 = cardOfProjExpr(mq, rel, call.getOperands().get(1)); |
| if (card1 == null) { |
| return null; |
| } |
| distinctRowCount = Math.max(card0, card1); |
| } else if (call.isA(ImmutableList.of(SqlKind.TIMES, SqlKind.DIVIDE))) { |
| distinctRowCount = |
| multiply( |
| cardOfProjExpr(mq, rel, call.getOperands().get(0)), |
| cardOfProjExpr(mq, rel, call.getOperands().get(1))); |
| |
| // TODO zfong 6/21/06 - Broadbase has code to handle date |
| // functions like year, month, day; E.g., cardinality of Month() |
| // is 12 |
| } else { |
| if (call.getOperands().size() == 1) { |
| distinctRowCount = cardOfProjExpr(mq, rel, call.getOperands().get(0)); |
| } else { |
| distinctRowCount = rowCount / 10; |
| } |
| } |
| |
| return numDistinctVals(distinctRowCount, rowCount); |
| } |
| } |
| |
| /** Returns whether a relational expression is already sorted and has fewer |
| * rows than the sum of offset and limit. |
| * |
| * <p>If this is the case, it is safe to push down a |
| * {@link org.apache.calcite.rel.core.Sort} with limit and optional offset. */ |
| public static boolean checkInputForCollationAndLimit(RelMetadataQuery mq, |
| RelNode input, RelCollation collation, @Nullable RexNode offset, @Nullable RexNode fetch) { |
| return alreadySorted(mq, input, collation) && alreadySmaller(mq, input, offset, fetch); |
| } |
| |
| // Checks if the input is already sorted |
| private static boolean alreadySorted(RelMetadataQuery mq, RelNode input, RelCollation collation) { |
| if (collation.getFieldCollations().isEmpty()) { |
| return true; |
| } |
| final ImmutableList<RelCollation> collations = mq.collations(input); |
| if (collations == null) { |
| // Cannot be determined |
| return false; |
| } |
| for (RelCollation inputCollation : collations) { |
| if (inputCollation.satisfies(collation)) { |
| return true; |
| } |
| } |
| return false; |
| } |
| |
| // Checks if we are not reducing the number of tuples |
| private static boolean alreadySmaller(RelMetadataQuery mq, RelNode input, |
| @Nullable RexNode offset, @Nullable RexNode fetch) { |
| if (fetch == null) { |
| return true; |
| } |
| final Double rowCount = mq.getMaxRowCount(input); |
| if (rowCount == null || offset instanceof RexDynamicParam || fetch instanceof RexDynamicParam) { |
| // Cannot be determined |
| return false; |
| } |
| final int offsetVal = offset == null ? 0 : RexLiteral.intValue(offset); |
| final int limit = RexLiteral.intValue(fetch); |
| return (double) offsetVal + (double) limit >= rowCount; |
| } |
| |
| /** |
| * Validates whether a value represents a percentage number |
| * (that is, a value in the interval [0.0, 1.0]) and returns the value. |
| * |
| * <p>Returns null if and only if {@code result} is null. |
| * |
| * <p>Throws if {@code result} is not null, not in range 0 to 1, |
| * and assertions are enabled. |
| */ |
| public static @PolyNull Double validatePercentage(@PolyNull Double result) { |
| assert isPercentage(result, true); |
| return result; |
| } |
| |
| private static boolean isPercentage(@Nullable Double result, boolean fail) { |
| if (result != null) { |
| final double d = result; |
| if (d < 0.0) { |
| assert !fail; |
| return false; |
| } |
| if (d > 1.0) { |
| assert !fail; |
| return false; |
| } |
| } |
| return true; |
| } |
| |
| /** |
| * Validates the {@code result} is valid. |
| * |
| * <p>Never let the result go below 1, as it will result in incorrect |
| * calculations if the row-count is used as the denominator in a |
| * division expression. Also, cap the value at the max double value |
| * to avoid calculations using infinity. |
| * |
| * <p>Returns null if and only if {@code result} is null. |
| * |
| * <p>Throws if {@code result} is not null, is negative, |
| * and assertions are enabled. |
| * |
| * @return the corrected value from the {@code result} |
| * @throws AssertionError if the {@code result} is negative |
| */ |
| public static @PolyNull Double validateResult(@PolyNull Double result) { |
| if (result == null) { |
| return null; |
| } |
| |
| if (result.isInfinite()) { |
| result = Double.MAX_VALUE; |
| } |
| assert isNonNegative(result, true); |
| if (result < 1.0) { |
| result = 1.0; |
| } |
| return result; |
| } |
| |
| private static boolean isNonNegative(@Nullable Double result, boolean fail) { |
| if (result != null) { |
| final double d = result; |
| if (d < 0.0) { |
| assert !fail; |
| return false; |
| } |
| } |
| return true; |
| } |
| |
| /** |
| * Removes cached metadata values for specified RelNode. |
| * |
| * @param rel RelNode whose cached metadata should be removed |
| * @return true if cache for the provided RelNode was not empty |
| */ |
| public static boolean clearCache(RelNode rel) { |
| return rel.getCluster().getMetadataQuery().clearCache(rel); |
| } |
| } |