blob: 11121db1cff9d8e6490fb8257095a7bdb740efff [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to you under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.calcite.rel.metadata;
import org.apache.calcite.plan.RelOptUtil;
import org.apache.calcite.rel.RelCollation;
import org.apache.calcite.rel.RelNode;
import org.apache.calcite.rel.core.Aggregate;
import org.apache.calcite.rel.core.AggregateCall;
import org.apache.calcite.rel.core.Join;
import org.apache.calcite.rel.core.JoinRelType;
import org.apache.calcite.rel.core.Minus;
import org.apache.calcite.rel.core.Project;
import org.apache.calcite.rel.core.Union;
import org.apache.calcite.rex.RexBuilder;
import org.apache.calcite.rex.RexCall;
import org.apache.calcite.rex.RexDynamicParam;
import org.apache.calcite.rex.RexInputRef;
import org.apache.calcite.rex.RexLiteral;
import org.apache.calcite.rex.RexLocalRef;
import org.apache.calcite.rex.RexNode;
import org.apache.calcite.rex.RexProgram;
import org.apache.calcite.rex.RexUtil;
import org.apache.calcite.rex.RexVisitorImpl;
import org.apache.calcite.sql.SqlBasicFunction;
import org.apache.calcite.sql.SqlFunction;
import org.apache.calcite.sql.SqlFunctionCategory;
import org.apache.calcite.sql.SqlKind;
import org.apache.calcite.sql.fun.SqlStdOperatorTable;
import org.apache.calcite.sql.type.OperandTypes;
import org.apache.calcite.sql.type.ReturnTypes;
import org.apache.calcite.util.ImmutableBitSet;
import org.apache.calcite.util.NumberUtil;
import org.apache.calcite.util.Util;
import com.google.common.collect.ImmutableList;
import org.checkerframework.checker.nullness.qual.Nullable;
import org.checkerframework.checker.nullness.qual.PolyNull;
import java.math.BigDecimal;
import java.util.ArrayList;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Set;
import static com.google.common.base.Preconditions.checkArgument;
import static org.apache.calcite.util.NumberUtil.multiply;
/**
* RelMdUtil provides utility methods used by the metadata provider methods.
*/
public class RelMdUtil {
//~ Static fields/initializers ---------------------------------------------
public static final SqlFunction ARTIFICIAL_SELECTIVITY_FUNC =
SqlBasicFunction.create("ARTIFICIAL_SELECTIVITY",
ReturnTypes.BOOLEAN, // returns boolean since we'll AND it
OperandTypes.NUMERIC, // takes a numeric param
SqlFunctionCategory.SYSTEM);
//~ Methods ----------------------------------------------------------------
private RelMdUtil() {
}
/**
* Creates a RexNode that stores a selectivity value corresponding to the
* selectivity of a semijoin. This can be added to a filter to simulate the
* effect of the semijoin during costing, but should never appear in a real
* plan since it has no physical implementation.
*
* @param rel the semijoin of interest
* @return constructed rexnode
*/
public static RexNode makeSemiJoinSelectivityRexNode(RelMetadataQuery mq, Join rel) {
RexBuilder rexBuilder = rel.getCluster().getRexBuilder();
double selectivity =
computeSemiJoinSelectivity(mq, rel.getLeft(), rel.getRight(), rel);
return rexBuilder.makeCall(ARTIFICIAL_SELECTIVITY_FUNC,
rexBuilder.makeApproxLiteral(new BigDecimal(selectivity)));
}
/**
* Returns the selectivity value stored in a call.
*
* @param artificialSelectivityFuncNode Call containing the selectivity value
* @return selectivity value
*/
public static double getSelectivityValue(
RexNode artificialSelectivityFuncNode) {
assert artificialSelectivityFuncNode instanceof RexCall;
RexCall call = (RexCall) artificialSelectivityFuncNode;
assert call.getOperator() == ARTIFICIAL_SELECTIVITY_FUNC;
RexNode operand = call.getOperands().get(0);
@SuppressWarnings("unboxing.of.nullable")
double doubleValue = ((RexLiteral) operand).getValueAs(Double.class);
return doubleValue;
}
/**
* Computes the selectivity of a semijoin filter if it is applied on a fact
* table. The computation is based on the selectivity of the dimension
* table/columns and the number of distinct values in the fact table
* columns.
*
* @param factRel fact table participating in the semijoin
* @param dimRel dimension table participating in the semijoin
* @param rel semijoin rel
* @return calculated selectivity
*/
public static double computeSemiJoinSelectivity(RelMetadataQuery mq,
RelNode factRel, RelNode dimRel, Join rel) {
return computeSemiJoinSelectivity(mq, factRel, dimRel, rel.analyzeCondition().leftKeys,
rel.analyzeCondition().rightKeys);
}
/**
* Computes the selectivity of a semijoin filter if it is applied on a fact
* table. The computation is based on the selectivity of the dimension
* table/columns and the number of distinct values in the fact table
* columns.
*
* @param factRel fact table participating in the semijoin
* @param dimRel dimension table participating in the semijoin
* @param factKeyList LHS keys used in the filter
* @param dimKeyList RHS keys used in the filter
* @return calculated selectivity
*/
public static double computeSemiJoinSelectivity(RelMetadataQuery mq,
RelNode factRel, RelNode dimRel, List<Integer> factKeyList,
List<Integer> dimKeyList) {
ImmutableBitSet.Builder factKeys = ImmutableBitSet.builder();
for (int factCol : factKeyList) {
factKeys.set(factCol);
}
ImmutableBitSet.Builder dimKeyBuilder = ImmutableBitSet.builder();
for (int dimCol : dimKeyList) {
dimKeyBuilder.set(dimCol);
}
final ImmutableBitSet dimKeys = dimKeyBuilder.build();
Double factPop = mq.getPopulationSize(factRel, factKeys.build());
if (factPop == null) {
// use the dimension population if the fact population is
// unavailable; since we're filtering the fact table, that's
// the population we ideally want to use
factPop = mq.getPopulationSize(dimRel, dimKeys);
}
// if cardinality and population are available, use them; otherwise
// use percentage original rows
Double selectivity;
Double dimCard =
mq.getDistinctRowCount(
dimRel,
dimKeys,
null);
if ((dimCard != null) && (factPop != null)) {
// to avoid division by zero
if (factPop < 1.0) {
factPop = 1.0;
}
selectivity = dimCard / factPop;
} else {
selectivity = mq.getPercentageOriginalRows(dimRel);
}
if (selectivity == null) {
// set a default selectivity based on the number of semijoin keys
selectivity =
Math.pow(
0.1,
dimKeys.cardinality());
} else if (selectivity > 1.0) {
selectivity = 1.0;
}
return selectivity;
}
/**
* Returns true if the columns represented in a bit mask are definitely
* known to form a unique column set.
*
* @param rel the relational expression that the column mask corresponds
* to
* @param colMask bit mask containing columns that will be tested for
* uniqueness
* @return true if bit mask represents a unique column set; false if not (or
* if no metadata is available)
*/
public static boolean areColumnsDefinitelyUnique(RelMetadataQuery mq,
RelNode rel, ImmutableBitSet colMask) {
Boolean b = mq.areColumnsUnique(rel, colMask, false);
return b != null && b;
}
public static @Nullable Boolean areColumnsUnique(RelMetadataQuery mq, RelNode rel,
List<RexInputRef> columnRefs) {
ImmutableBitSet.Builder colMask = ImmutableBitSet.builder();
for (RexInputRef columnRef : columnRefs) {
colMask.set(columnRef.getIndex());
}
return mq.areColumnsUnique(rel, colMask.build());
}
public static boolean areColumnsDefinitelyUnique(RelMetadataQuery mq,
RelNode rel, List<RexInputRef> columnRefs) {
Boolean b = areColumnsUnique(mq, rel, columnRefs);
return b != null && b;
}
/**
* Returns true if the columns represented in a bit mask are definitely
* known to form a unique column set, when nulls have been filtered from
* the columns.
*
* @param rel the relational expression that the column mask corresponds
* to
* @param colMask bit mask containing columns that will be tested for
* uniqueness
* @return true if bit mask represents a unique column set; false if not (or
* if no metadata is available)
*/
public static boolean areColumnsDefinitelyUniqueWhenNullsFiltered(
RelMetadataQuery mq, RelNode rel, ImmutableBitSet colMask) {
Boolean b = mq.areColumnsUnique(rel, colMask, true);
return b != null && b;
}
public static @Nullable Boolean areColumnsUniqueWhenNullsFiltered(RelMetadataQuery mq,
RelNode rel, List<RexInputRef> columnRefs) {
ImmutableBitSet.Builder colMask = ImmutableBitSet.builder();
for (RexInputRef columnRef : columnRefs) {
colMask.set(columnRef.getIndex());
}
return mq.areColumnsUnique(rel, colMask.build(), true);
}
public static boolean areColumnsDefinitelyUniqueWhenNullsFiltered(
RelMetadataQuery mq, RelNode rel, List<RexInputRef> columnRefs) {
Boolean b = areColumnsUniqueWhenNullsFiltered(mq, rel, columnRefs);
return b != null && b;
}
/**
* Separates a bit-mask representing a join into masks representing the left
* and right inputs into the join.
*
* @param groupKey original bit-mask
* @param leftMask left bit-mask to be set
* @param rightMask right bit-mask to be set
* @param nFieldsOnLeft number of fields in the left input
*/
public static void setLeftRightBitmaps(
ImmutableBitSet groupKey,
ImmutableBitSet.Builder leftMask,
ImmutableBitSet.Builder rightMask,
int nFieldsOnLeft) {
for (int bit : groupKey) {
if (bit < nFieldsOnLeft) {
leftMask.set(bit);
} else {
rightMask.set(bit - nFieldsOnLeft);
}
}
}
/**
* Returns the number of distinct values provided numSelected are selected
* where there are domainSize distinct values.
*
* <p>Note that in the case where domainSize == numSelected, it's not true
* that the return value should be domainSize. If you pick 100 random values
* between 1 and 100, you'll most likely end up with fewer than 100 distinct
* values, because you'll pick some values more than once.
*
* <p>The implementation is an unbiased estimation of the number of distinct
* values by performing a number of selections (with replacement) from a
* universe set.
*
* @param domainSize Size of the universe set
* @param numSelected The number of selections
*
* @return the expected number of distinct values, or null if either argument
* is null
*/
public static @PolyNull Double numDistinctVals(
@PolyNull Double domainSize,
@PolyNull Double numSelected) {
if ((domainSize == null) || (numSelected == null)) {
return domainSize;
}
// Cap the input sizes at MAX_VALUE to ensure that the calculations
// using these values return meaningful values
double dSize = capInfinity(domainSize);
double numSel = capInfinity(numSelected);
// The formula is derived as follows:
//
// Suppose we have N distinct values, and we select n from them (with replacement).
// For any value i, we use C(i) = k to express the event that the value is selected exactly
// k times in the n selections.
//
// It can be seen that, for any one selection, the probability of the value being selected
// is 1/N. So the probability of being selected exactly k times is
//
// Pr{C(i) = k} = C(n, k) * (1 / N)^k * (1 - 1 / N)^(n - k),
// where C(n, k) = n! / [k! * (n - k)!]
//
// The probability that the value is never selected is
// Pr{C(i) = 0} = C(n, 0) * (1/N)^0 * (1 - 1 / N)^n = (1 - 1 / N)^n
//
// We define indicator random variable I(i), so that I(i) = 1 iff
// value i is selected in at least one of the selections. We have
// E[I(i)] = 1 * Pr{I(i) = 1} + 0 * Pr{I(i) = 0) = Pr{I(i) = 1}
// = Pr{C(i) > 0} = 1 - Pr{C(i) = 0} = 1 - (1 - 1 / N)^n
//
// The expected number of distinct values in the overall n selections is:
// E(I(1)] + E(I(2)] + ... + E(I(N)] = N * [1 - (1 - 1 / N)^n]
double res = 0;
if (dSize > 0) {
// We calculate the result in 3 cases:
// 1) If N and n are not very large values, we directly calculate
// N * [1 - exp(n * ln(1 - 1 / N))]
// 2) If N or n are very large values, but n / N is not close to 0,
// We compute the result by expanding the exponent n * ln(1 - 1 / N)
// as a Taylor series.
// 3) If N or n are very large values, and n / N is close to 0,
// we expand the whole formula as a Taylor series.
// We separate the 3 cases by trying to expand the exponent as a Taylor series.
// If the required number of terms for convergence is too big, it is case 1.
// If the required number of terms is too small, it is case 3. Otherwise,
// it is case 2.
// To expand the component as a Taylor series, we have:
// n * ln(1 - 1 / N)
// = n * (- 1 / N - 1 / (2 * N ^ 2) - 1 / (3 * N ^ 3) - ...)
//
// To get an approximate result, we truncate the series after the first m terms.
// This leads to an error of:
// n * (1 / [(m + 1) * N ^ (m + 1)] + 1 / [(m + 2) * dSize ^ (m + 2) + ...])
// < n / (m + 1) / [N ^ m * (N - 1)] < n / (N ^ m)
//
// To get an accurate result, the error should be smaller than 1e-16, because a
// double can represent at most 15 digits after the decimal point. Therefore, we need
// n / (N ^ m ) < 1e-16, or N ^ m / n > 1e16.
//
// The smallest value of m that satisfies the above formula is the
// number of terms required for convergence.
final int maxTerms = 32;
final int minTerms = 1;
int numTerms = 1;
if (dSize > 10) {
double dPower = dSize;
while (dPower / numSel <= 1e16) {
dPower = dPower * dPower;
numTerms *= 2;
}
} else {
// for small dSize, force case 1
numTerms = maxTerms + 1;
}
if (numTerms > maxTerms) {
// case 1
double expo = numSel * Math.log(1.0 - 1.0 / dSize);
res = (1.0 - Math.exp(expo)) * dSize;
} else if (numTerms > minTerms) {
// case 2
double expo = 0;
double dPower = dSize;
for (int i = 1; i <= numTerms; i++) {
expo -= numSel / dPower / i;
dPower *= dSize;
}
res = (1.0 - Math.exp(expo)) * dSize;
} else {
// numTerms <= minTerms
// case 3
// Since the ratio n / N is close to 0, we can expand the exponent as
// n * ln(1 - 1 / N) ≈ n * (- 1 / N) = - n / N.
// So N * [1 - (1 - 1 / N) ^ n ] = N * [1 - exp(n * ln(1 - 1 / N))]
// ≈ N * [1 - exp(- n / N)].
// Since exp(x) = 1 + x + x ^ 2 / 2 + x ^ 3 / 6 + ..., we have
// N * [1 - exp(- n / N)] = N * [n / N - n ^ 2 / (2 * N ^ 2) + n ^ 3 / (6 * N ^ 3) - ...]
// = n - n ^ 2 / (2 * N) + n ^ 3 / (6 * N ^ 2) - ...
// Since n / N is close to 0, and due to the factorial in the denominator,
// the above series converges to 0 very quickly.
res = 0;
numTerms = 5;
double selPower = numSel;
double domPower = 1;
int factorial = 1;
int sign = 1;
for (int i = 1; i < numTerms; i++) {
res += sign * selPower / factorial / domPower;
selPower *= numSel;
domPower *= dSize;
factorial *= i + 1;
sign *= -1;
}
}
}
// fix the boundary cases
if (res > dSize) {
res = dSize;
}
if (res > numSel) {
res = numSel;
}
if (res < 0) {
res = 0;
}
return res;
}
/**
* Caps a double value at Double.MAX_VALUE if it's currently infinity
*
* @param d the Double object
* @return the double value if it's not infinity; else Double.MAX_VALUE
*/
public static double capInfinity(Double d) {
return d.isInfinite() ? Double.MAX_VALUE : d;
}
/**
* Returns default estimates for selectivities, in the absence of stats.
*
* @param predicate predicate for which selectivity will be computed; null
* means true, so gives selectity of 1.0
* @return estimated selectivity
*/
public static double guessSelectivity(@Nullable RexNode predicate) {
return guessSelectivity(predicate, false);
}
/**
* Returns default estimates for selectivities, in the absence of stats.
*
* @param predicate predicate for which selectivity will be computed;
* null means true, so gives selectity of 1.0
* @param artificialOnly return only the selectivity contribution from
* artificial nodes
* @return estimated selectivity
*/
public static double guessSelectivity(
@Nullable RexNode predicate,
boolean artificialOnly) {
double sel = 1.0;
if ((predicate == null) || predicate.isAlwaysTrue()) {
return sel;
}
double artificialSel = 1.0;
for (RexNode pred : RelOptUtil.conjunctions(predicate)) {
if (pred.getKind() == SqlKind.IS_NOT_NULL) {
sel *= .9;
} else if (
(pred instanceof RexCall)
&& (((RexCall) pred).getOperator()
== RelMdUtil.ARTIFICIAL_SELECTIVITY_FUNC)) {
artificialSel *= RelMdUtil.getSelectivityValue(pred);
} else if (pred.isA(SqlKind.EQUALS)) {
sel *= .15;
} else if (pred.isA(SqlKind.COMPARISON)) {
sel *= .5;
} else {
sel *= .25;
}
}
if (artificialOnly) {
return artificialSel;
} else {
return sel * artificialSel;
}
}
/**
* AND's two predicates together, either of which may be null, removing
* redundant filters.
*
* @param rexBuilder rexBuilder used to construct AND'd RexNode
* @param pred1 first predicate
* @param pred2 second predicate
* @return AND'd predicate or individual predicates if one is null
*/
public static @Nullable RexNode unionPreds(
RexBuilder rexBuilder,
@Nullable RexNode pred1,
@Nullable RexNode pred2) {
final Set<RexNode> unionList = new LinkedHashSet<>();
unionList.addAll(RelOptUtil.conjunctions(pred1));
unionList.addAll(RelOptUtil.conjunctions(pred2));
return RexUtil.composeConjunction(rexBuilder, unionList, true);
}
/**
* Takes the difference between two predicates, removing from the first any
* predicates also in the second.
*
* @param rexBuilder rexBuilder used to construct AND'd RexNode
* @param pred1 first predicate
* @param pred2 second predicate
* @return MINUS'd predicate list
*/
public static @Nullable RexNode minusPreds(
RexBuilder rexBuilder,
@Nullable RexNode pred1,
@Nullable RexNode pred2) {
final List<RexNode> minusList =
new ArrayList<>(RelOptUtil.conjunctions(pred1));
minusList.removeAll(RelOptUtil.conjunctions(pred2));
return RexUtil.composeConjunction(rexBuilder, minusList, true);
}
/**
* Takes a bitmap representing a set of input references and extracts the
* ones that reference the group by columns in an aggregate.
*
* @param groupKey the original bitmap
* @param aggRel the aggregate
* @param childKey sets bits from groupKey corresponding to group by columns
*/
public static void setAggChildKeys(
ImmutableBitSet groupKey,
Aggregate aggRel,
ImmutableBitSet.Builder childKey) {
List<AggregateCall> aggCalls = aggRel.getAggCallList();
for (int bit : groupKey) {
if (bit < aggRel.getGroupCount()) {
// group by column
childKey.set(bit);
} else {
// aggregate column -- set a bit for each argument being
// aggregated
AggregateCall agg = aggCalls.get(bit - aggRel.getGroupCount());
for (Integer arg : agg.getArgList()) {
childKey.set(arg);
}
}
}
}
/**
* Forms two bitmaps by splitting the columns in a bitmap according to
* whether or not the column references the child input or is an expression.
*
* @param projExprs Project expressions
* @param groupKey Bitmap whose columns will be split
* @param baseCols Bitmap representing columns from the child input
* @param projCols Bitmap representing non-child columns
*/
public static void splitCols(
List<RexNode> projExprs,
ImmutableBitSet groupKey,
ImmutableBitSet.Builder baseCols,
ImmutableBitSet.Builder projCols) {
for (int bit : groupKey) {
final RexNode e = projExprs.get(bit);
if (e instanceof RexInputRef) {
baseCols.set(((RexInputRef) e).getIndex());
} else {
projCols.set(bit);
}
}
}
/**
* Computes the cardinality of a particular expression from the projection
* list.
*
* @param rel RelNode corresponding to the project
* @param expr projection expression
* @return cardinality
*/
public static @Nullable Double cardOfProjExpr(RelMetadataQuery mq, Project rel,
RexNode expr) {
return expr.accept(new CardOfProjExpr(mq, rel));
}
/**
* Computes the population size for a set of keys returned from a join.
*
* @param join_ Join relational operator
* @param groupKey Keys to compute the population for
* @return computed population size
*/
public static @Nullable Double getJoinPopulationSize(RelMetadataQuery mq,
RelNode join_, ImmutableBitSet groupKey) {
Join join = (Join) join_;
if (!join.getJoinType().projectsRight()) {
return mq.getPopulationSize(join.getLeft(), groupKey);
}
ImmutableBitSet.Builder leftMask = ImmutableBitSet.builder();
ImmutableBitSet.Builder rightMask = ImmutableBitSet.builder();
RelNode left = join.getLeft();
RelNode right = join.getRight();
// separate the mask into masks for the left and right
RelMdUtil.setLeftRightBitmaps(
groupKey, leftMask, rightMask, left.getRowType().getFieldCount());
Double population =
multiply(
mq.getPopulationSize(left, leftMask.build()),
mq.getPopulationSize(right, rightMask.build()));
return numDistinctVals(population, mq.getRowCount(join));
}
/** Add an epsilon to the value passed in. */
public static double addEpsilon(double d) {
assert d >= 0d;
final double d0 = d;
if (d < 10) {
// For small d, adding 1 would change the value significantly.
d *= 1.001d;
if (d != d0) {
return d;
}
}
// For medium d, add 1. Keeps integral values integral.
++d;
if (d != d0) {
return d;
}
// For large d, adding 1 might not change the value. Add .1%.
// If d is NaN, this still will probably not change the value. That's OK.
d *= 1.001d;
return d;
}
/**
* Computes the number of distinct rows for a set of keys returned from a
* semi-join.
*
* @param semiJoinRel RelNode representing the semi-join
* @param mq metadata query
* @param groupKey keys that the distinct row count will be computed for
* @param predicate join predicate
* @return number of distinct rows
*/
public static @Nullable Double getSemiJoinDistinctRowCount(Join semiJoinRel, RelMetadataQuery mq,
ImmutableBitSet groupKey, @Nullable RexNode predicate) {
if (predicate == null || predicate.isAlwaysTrue()) {
if (groupKey.isEmpty()) {
return 1D;
}
}
// create a RexNode representing the selectivity of the
// semijoin filter and pass it to getDistinctRowCount
RexNode newPred = RelMdUtil.makeSemiJoinSelectivityRexNode(mq, semiJoinRel);
if (predicate != null) {
RexBuilder rexBuilder = semiJoinRel.getCluster().getRexBuilder();
newPred =
rexBuilder.makeCall(
SqlStdOperatorTable.AND,
newPred,
predicate);
}
return mq.getDistinctRowCount(semiJoinRel.getLeft(), groupKey, newPred);
}
/**
* Computes the number of distinct rows for a set of keys returned from a
* join. Also known as NDV (number of distinct values).
*
* @param joinRel RelNode representing the join
* @param joinType type of join
* @param groupKey keys that the distinct row count will be computed for
* @param predicate join predicate
* @param useMaxNdv If true use formula <code>max(left NDV, right NDV)</code>,
* otherwise use <code>left NDV * right NDV</code>.
* @return number of distinct rows
*/
public static @Nullable Double getJoinDistinctRowCount(RelMetadataQuery mq,
RelNode joinRel, JoinRelType joinType, ImmutableBitSet groupKey,
@Nullable RexNode predicate, boolean useMaxNdv) {
if (predicate == null || predicate.isAlwaysTrue()) {
if (groupKey.isEmpty()) {
return 1D;
}
}
Join join = (Join) joinRel;
if (join.isSemiJoin()) {
return getSemiJoinDistinctRowCount(join, mq, groupKey, predicate);
}
Double distRowCount;
ImmutableBitSet.Builder leftMask = ImmutableBitSet.builder();
ImmutableBitSet.Builder rightMask = ImmutableBitSet.builder();
RelNode left = joinRel.getInputs().get(0);
RelNode right = joinRel.getInputs().get(1);
RelMdUtil.setLeftRightBitmaps(
groupKey,
leftMask,
rightMask,
left.getRowType().getFieldCount());
// determine which filters apply to the left vs right
RexNode leftPred = null;
RexNode rightPred = null;
if (predicate != null) {
final List<RexNode> leftFilters = new ArrayList<>();
final List<RexNode> rightFilters = new ArrayList<>();
final List<RexNode> joinFilters = new ArrayList<>();
final List<RexNode> predList = RelOptUtil.conjunctions(predicate);
RelOptUtil.classifyFilters(
joinRel,
predList,
joinType.canPushIntoFromAbove(),
joinType.canPushLeftFromAbove(),
joinType.canPushRightFromAbove(),
joinFilters,
leftFilters,
rightFilters);
RexBuilder rexBuilder = joinRel.getCluster().getRexBuilder();
leftPred =
RexUtil.composeConjunction(rexBuilder, leftFilters, true);
rightPred =
RexUtil.composeConjunction(rexBuilder, rightFilters, true);
}
if (useMaxNdv) {
distRowCount =
NumberUtil.max(
mq.getDistinctRowCount(left, leftMask.build(), leftPred),
mq.getDistinctRowCount(right, rightMask.build(), rightPred));
} else {
distRowCount =
multiply(
mq.getDistinctRowCount(left, leftMask.build(), leftPred),
mq.getDistinctRowCount(right, rightMask.build(), rightPred));
}
return RelMdUtil.numDistinctVals(distRowCount, mq.getRowCount(joinRel));
}
/** Returns an estimate of the number of rows returned by a {@link Union}
* (before duplicates are eliminated). */
public static double getUnionAllRowCount(RelMetadataQuery mq, Union rel) {
double rowCount = 0;
for (RelNode input : rel.getInputs()) {
rowCount += mq.getRowCount(input);
}
return rowCount;
}
/** Returns an estimate of the number of rows returned by a {@link Minus}. */
public static double getMinusRowCount(RelMetadataQuery mq, Minus minus) {
// REVIEW jvs 30-May-2005: I just pulled this out of a hat.
final List<RelNode> inputs = minus.getInputs();
double dRows = mq.getRowCount(inputs.get(0));
for (int i = 1; i < inputs.size(); i++) {
dRows -= 0.5 * mq.getRowCount(inputs.get(i));
}
if (dRows < 0) {
dRows = 0;
}
return dRows;
}
/** Returns an estimate of the number of rows returned by a {@link Join}. */
public static @Nullable Double getJoinRowCount(RelMetadataQuery mq, Join join,
RexNode condition) {
if (!join.getJoinType().projectsRight()) {
// Create a RexNode representing the selectivity of the
// semijoin filter and pass it to getSelectivity
RexNode semiJoinSelectivity =
RelMdUtil.makeSemiJoinSelectivityRexNode(mq, join);
Double selectivity = mq.getSelectivity(join.getLeft(), semiJoinSelectivity);
if (selectivity == null) {
return null;
}
return (join.getJoinType() == JoinRelType.SEMI
? selectivity
: 1D - selectivity) // ANTI join
* mq.getRowCount(join.getLeft());
}
// Row count estimates of 0 will be rounded up to 1.
// So, use maxRowCount where the product is very small.
final Double left = mq.getRowCount(join.getLeft());
final Double right = mq.getRowCount(join.getRight());
if (left == null || right == null) {
return null;
}
if (left <= 1D || right <= 1D) {
Double max = mq.getMaxRowCount(join);
if (max != null && max <= 1D) {
return max;
}
}
Double selectivity = mq.getSelectivity(join, condition);
if (selectivity == null) {
return null;
}
double innerRowCount = left * right * selectivity;
switch (join.getJoinType()) {
case INNER:
return innerRowCount;
case LEFT:
return left * (1D - selectivity) + innerRowCount;
case RIGHT:
return right * (1D - selectivity) + innerRowCount;
case FULL:
return (left + right) * (1D - selectivity) + innerRowCount;
default:
throw Util.unexpected(join.getJoinType());
}
}
public static double estimateFilteredRows(RelNode child, RexProgram program,
RelMetadataQuery mq) {
// convert the program's RexLocalRef condition to an expanded RexNode
RexLocalRef programCondition = program.getCondition();
RexNode condition;
if (programCondition == null) {
condition = null;
} else {
condition = program.expandLocalRef(programCondition);
}
return estimateFilteredRows(child, condition, mq);
}
public static double estimateFilteredRows(RelNode child, @Nullable RexNode condition,
RelMetadataQuery mq) {
@SuppressWarnings("unboxing.of.nullable")
double result =
multiply(mq.getRowCount(child), mq.getSelectivity(child, condition));
return result;
}
/** Returns a point on a line.
*
* <p>The result is always a value between {@code minY} and {@code maxY},
* even if {@code x} is not between {@code minX} and {@code maxX}.
*
* <p>Examples:<ul>
* <li>{@code linear(0, 0, 10, 100, 200}} returns 100 because 0 is minX
* <li>{@code linear(5, 0, 10, 100, 200}} returns 150 because 5 is
* mid-way between minX and maxX
* <li>{@code linear(5, 0, 10, 100, 200}} returns 160
* <li>{@code linear(10, 0, 10, 100, 200}} returns 200 because 10 is maxX
* <li>{@code linear(-2, 0, 10, 100, 200}} returns 100 because -2 is
* less than minX and is therefore treated as minX
* <li>{@code linear(12, 0, 10, 100, 200}} returns 100 because 12 is
* greater than maxX and is therefore treated as maxX
* </ul>
*/
public static double linear(int x, int minX, int maxX, double minY, double
maxY) {
checkArgument(minX < maxX);
checkArgument(minY < maxY);
if (x < minX) {
return minY;
}
if (x > maxX) {
return maxY;
}
return minY + (double) (x - minX) / (double) (maxX - minX) * (maxY - minY);
}
//~ Inner Classes ----------------------------------------------------------
/** Visitor that walks over a scalar expression and computes the
* cardinality of its result. */
private static class CardOfProjExpr extends RexVisitorImpl<@Nullable Double> {
private final RelMetadataQuery mq;
private Project rel;
CardOfProjExpr(RelMetadataQuery mq, Project rel) {
super(true);
this.mq = mq;
this.rel = rel;
}
@Override public @Nullable Double visitInputRef(RexInputRef var) {
int index = var.getIndex();
ImmutableBitSet col = ImmutableBitSet.of(index);
Double distinctRowCount =
mq.getDistinctRowCount(rel.getInput(), col, null);
if (distinctRowCount == null) {
return null;
} else {
return numDistinctVals(distinctRowCount, mq.getRowCount(rel));
}
}
@Override public @Nullable Double visitLiteral(RexLiteral literal) {
return numDistinctVals(1.0, mq.getRowCount(rel));
}
@Override public @Nullable Double visitCall(RexCall call) {
Double distinctRowCount;
Double rowCount = mq.getRowCount(rel);
if (call.isA(SqlKind.MINUS_PREFIX)) {
distinctRowCount = cardOfProjExpr(mq, rel, call.getOperands().get(0));
} else if (call.isA(ImmutableList.of(SqlKind.PLUS, SqlKind.MINUS))) {
Double card0 = cardOfProjExpr(mq, rel, call.getOperands().get(0));
if (card0 == null) {
return null;
}
Double card1 = cardOfProjExpr(mq, rel, call.getOperands().get(1));
if (card1 == null) {
return null;
}
distinctRowCount = Math.max(card0, card1);
} else if (call.isA(ImmutableList.of(SqlKind.TIMES, SqlKind.DIVIDE))) {
distinctRowCount =
multiply(
cardOfProjExpr(mq, rel, call.getOperands().get(0)),
cardOfProjExpr(mq, rel, call.getOperands().get(1)));
// TODO zfong 6/21/06 - Broadbase has code to handle date
// functions like year, month, day; E.g., cardinality of Month()
// is 12
} else {
if (call.getOperands().size() == 1) {
distinctRowCount = cardOfProjExpr(mq, rel, call.getOperands().get(0));
} else {
distinctRowCount = rowCount / 10;
}
}
return numDistinctVals(distinctRowCount, rowCount);
}
}
/** Returns whether a relational expression is already sorted and has fewer
* rows than the sum of offset and limit.
*
* <p>If this is the case, it is safe to push down a
* {@link org.apache.calcite.rel.core.Sort} with limit and optional offset. */
public static boolean checkInputForCollationAndLimit(RelMetadataQuery mq,
RelNode input, RelCollation collation, @Nullable RexNode offset, @Nullable RexNode fetch) {
return alreadySorted(mq, input, collation) && alreadySmaller(mq, input, offset, fetch);
}
// Checks if the input is already sorted
private static boolean alreadySorted(RelMetadataQuery mq, RelNode input, RelCollation collation) {
if (collation.getFieldCollations().isEmpty()) {
return true;
}
final ImmutableList<RelCollation> collations = mq.collations(input);
if (collations == null) {
// Cannot be determined
return false;
}
for (RelCollation inputCollation : collations) {
if (inputCollation.satisfies(collation)) {
return true;
}
}
return false;
}
// Checks if we are not reducing the number of tuples
private static boolean alreadySmaller(RelMetadataQuery mq, RelNode input,
@Nullable RexNode offset, @Nullable RexNode fetch) {
if (fetch == null) {
return true;
}
final Double rowCount = mq.getMaxRowCount(input);
if (rowCount == null || offset instanceof RexDynamicParam || fetch instanceof RexDynamicParam) {
// Cannot be determined
return false;
}
final int offsetVal = offset == null ? 0 : RexLiteral.intValue(offset);
final int limit = RexLiteral.intValue(fetch);
return (double) offsetVal + (double) limit >= rowCount;
}
/**
* Validates whether a value represents a percentage number
* (that is, a value in the interval [0.0, 1.0]) and returns the value.
*
* <p>Returns null if and only if {@code result} is null.
*
* <p>Throws if {@code result} is not null, not in range 0 to 1,
* and assertions are enabled.
*/
public static @PolyNull Double validatePercentage(@PolyNull Double result) {
assert isPercentage(result, true);
return result;
}
private static boolean isPercentage(@Nullable Double result, boolean fail) {
if (result != null) {
final double d = result;
if (d < 0.0) {
assert !fail;
return false;
}
if (d > 1.0) {
assert !fail;
return false;
}
}
return true;
}
/**
* Validates the {@code result} is valid.
*
* <p>Never let the result go below 1, as it will result in incorrect
* calculations if the row-count is used as the denominator in a
* division expression. Also, cap the value at the max double value
* to avoid calculations using infinity.
*
* <p>Returns null if and only if {@code result} is null.
*
* <p>Throws if {@code result} is not null, is negative,
* and assertions are enabled.
*
* @return the corrected value from the {@code result}
* @throws AssertionError if the {@code result} is negative
*/
public static @PolyNull Double validateResult(@PolyNull Double result) {
if (result == null) {
return null;
}
if (result.isInfinite()) {
result = Double.MAX_VALUE;
}
assert isNonNegative(result, true);
if (result < 1.0) {
result = 1.0;
}
return result;
}
private static boolean isNonNegative(@Nullable Double result, boolean fail) {
if (result != null) {
final double d = result;
if (d < 0.0) {
assert !fail;
return false;
}
}
return true;
}
/**
* Removes cached metadata values for specified RelNode.
*
* @param rel RelNode whose cached metadata should be removed
* @return true if cache for the provided RelNode was not empty
*/
public static boolean clearCache(RelNode rel) {
return rel.getCluster().getMetadataQuery().clearCache(rel);
}
}