blob: b3c4876a4d6571e667deb9a329e04019d892098b [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.asterix.optimizer.rules.cbo;
import java.util.ArrayList;
import java.util.List;
import org.apache.asterix.common.metadata.DataverseName;
import org.apache.asterix.metadata.declared.DataSource;
import org.apache.asterix.metadata.declared.DataSourceId;
import org.apache.asterix.metadata.declared.MetadataProvider;
import org.apache.asterix.metadata.entities.Index;
import org.apache.hyracks.algebricks.common.exceptions.AlgebricksException;
import org.apache.hyracks.algebricks.core.algebra.base.ILogicalExpression;
import org.apache.hyracks.algebricks.core.algebra.base.ILogicalOperator;
import org.apache.hyracks.algebricks.core.algebra.base.IOptimizationContext;
import org.apache.hyracks.algebricks.core.algebra.base.LogicalExpressionTag;
import org.apache.hyracks.algebricks.core.algebra.base.LogicalOperatorTag;
import org.apache.hyracks.algebricks.core.algebra.base.LogicalVariable;
import org.apache.hyracks.algebricks.core.algebra.expressions.AbstractFunctionCallExpression;
import org.apache.hyracks.algebricks.core.algebra.expressions.JoinProductivityAnnotation;
import org.apache.hyracks.algebricks.core.algebra.expressions.PredicateCardinalityAnnotation;
import org.apache.hyracks.algebricks.core.algebra.functions.AlgebricksBuiltinFunctions;
import org.apache.hyracks.algebricks.core.algebra.operators.logical.DataSourceScanOperator;
import org.apache.hyracks.algebricks.core.algebra.operators.logical.SelectOperator;
import org.apache.hyracks.algebricks.core.algebra.operators.logical.SubplanOperator;
import org.apache.hyracks.api.exceptions.ErrorCode;
import org.apache.hyracks.api.exceptions.IWarningCollector;
import org.apache.hyracks.api.exceptions.Warning;
public class Stats {
public double SELECTIVITY_FOR_SECONDARY_INDEX_SELECTION = 0.1;
protected IOptimizationContext optCtx;
protected JoinEnum joinEnum;
public Stats(IOptimizationContext context, JoinEnum joinE) {
optCtx = context;
joinEnum = joinE;
}
public DataverseName findDataverseName(DataSourceScanOperator scanOp) {
if (scanOp == null) {
// this should rarely happen (IN lists may cause this)
return null;
}
DataSourceId dsid = (DataSourceId) scanOp.getDataSource().getId();
return dsid.getDataverseName();
}
public Index findSampleIndex(DataSourceScanOperator scanOp, IOptimizationContext context)
throws AlgebricksException {
DataverseName dataverseName = findDataverseName(scanOp);
DataSource ds = (DataSource) scanOp.getDataSource();
DataSourceId dsid = ds.getId();
MetadataProvider mdp = (MetadataProvider) context.getMetadataProvider();
return mdp.findSampleIndex(dataverseName, dsid.getDatasourceName());
}
private double findJoinSelectivity(JoinProductivityAnnotation anno, AbstractFunctionCallExpression joinExpr)
throws AlgebricksException {
List<LogicalVariable> exprUsedVars = new ArrayList<>();
joinExpr.getUsedVariables(exprUsedVars);
if (exprUsedVars.size() != 2) {
// Since there is a left and right dataset here, expecting only two variables.
return 1.0;
}
int idx1 = joinEnum.findJoinNodeIndex(exprUsedVars.get(0)) + 1;
int idx2 = joinEnum.findJoinNodeIndex(exprUsedVars.get(1)) + 1;
double card1 = joinEnum.getJnArray()[idx1].origCardinality;
double card2 = joinEnum.getJnArray()[idx2].origCardinality;
if (card1 == 0.0 || card2 == 0.0) // should not happen
{
return 1.0;
}
// join sel = leftside * productivity/(card1 * card2);
if (anno != null) {
int leftIndex = joinEnum.findJoinNodeIndexByName(anno.getLeftSideDataSet());
if (leftIndex != idx1 && leftIndex != idx2) {
// should not happen
IWarningCollector warningCollector = joinEnum.optCtx.getWarningCollector();
if (warningCollector.shouldWarn()) {
warningCollector.warn(Warning.of(joinExpr.getSourceLocation(), ErrorCode.INAPPLICABLE_HINT,
"productivity", "Invalid collection name/alias: " + anno.getLeftSideDataSet()));
}
return 1.0;
}
double productivity = anno.getJoinProductivity();
if (productivity <= 0) {
IWarningCollector warningCollector = joinEnum.optCtx.getWarningCollector();
if (warningCollector.shouldWarn()) {
warningCollector.warn(Warning.of(joinExpr.getSourceLocation(), ErrorCode.INAPPLICABLE_HINT,
"productivity",
"Productivity specified: " + productivity + ", has to be a decimal value greater than 0"));
}
return 1.0;
}
if (leftIndex == idx1) {
return productivity / card2;
} else {
return productivity / card1;
}
} else {
if (card1 < card2) {
// we are assuming that the smaller side is the primary side and that the join is Pk-Fk join.
return 1.0 / card1;
}
return 1.0 / card2;
}
}
// The expression we get may not be a base condition. It could be comprised of ors and ands and nots. So have to
//recursively find the overall selectivity.
protected double getSelectivityFromAnnotation(AbstractFunctionCallExpression afcExpr, boolean join)
throws AlgebricksException {
double sel = 1.0;
if (afcExpr.getFunctionIdentifier().equals(AlgebricksBuiltinFunctions.OR)) {
double orSel = getSelectivityFromAnnotation(
(AbstractFunctionCallExpression) afcExpr.getArguments().get(0).getValue(), join);
for (int i = 1; i < afcExpr.getArguments().size(); i++) {
ILogicalExpression lexpr = afcExpr.getArguments().get(i).getValue();
if (lexpr.getExpressionTag().equals(LogicalExpressionTag.FUNCTION_CALL)) {
sel = getSelectivityFromAnnotation(
(AbstractFunctionCallExpression) afcExpr.getArguments().get(i).getValue(), join);
orSel = orSel + sel - orSel * sel;
}
}
return orSel;
} else if (afcExpr.getFunctionIdentifier().equals(AlgebricksBuiltinFunctions.AND)) {
double andSel = 1.0;
for (int i = 0; i < afcExpr.getArguments().size(); i++) {
ILogicalExpression lexpr = afcExpr.getArguments().get(i).getValue();
if (lexpr.getExpressionTag().equals(LogicalExpressionTag.FUNCTION_CALL)) {
sel = getSelectivityFromAnnotation(
(AbstractFunctionCallExpression) afcExpr.getArguments().get(i).getValue(), join);
andSel *= sel;
}
}
return andSel;
} else if (afcExpr.getFunctionIdentifier().equals(AlgebricksBuiltinFunctions.NOT)) {
ILogicalExpression lexpr = afcExpr.getArguments().get(0).getValue();
if (lexpr.getExpressionTag().equals(LogicalExpressionTag.FUNCTION_CALL)) {
sel = getSelectivityFromAnnotation(
(AbstractFunctionCallExpression) afcExpr.getArguments().get(0).getValue(), join);
return 1.0 - sel;
}
}
double s = 1.0;
PredicateCardinalityAnnotation pca = afcExpr.getAnnotation(PredicateCardinalityAnnotation.class);
if (pca != null) {
s = pca.getSelectivity();
if (s <= 0 || s >= 1) {
IWarningCollector warningCollector = joinEnum.optCtx.getWarningCollector();
if (warningCollector.shouldWarn()) {
warningCollector.warn(Warning.of(afcExpr.getSourceLocation(), ErrorCode.INAPPLICABLE_HINT,
"selectivity", "Selectivity specified: " + s
+ ", has to be a decimal value greater than 0 and less than 1"));
}
} else {
sel *= s;
}
} else {
JoinProductivityAnnotation jpa = afcExpr.getAnnotation(JoinProductivityAnnotation.class);
s = findJoinSelectivity(jpa, afcExpr);
sel *= s;
}
if (join && s == 1.0) {
// assume no selectivity was assigned
joinEnum.singleDatasetPreds.add(afcExpr);
}
return sel;
}
public double getSelectivityFromAnnotationMain(ILogicalExpression leExpr, boolean join) throws AlgebricksException {
double sel = 1.0;
if (leExpr.getExpressionTag().equals(LogicalExpressionTag.FUNCTION_CALL)) {
AbstractFunctionCallExpression afcExpr = (AbstractFunctionCallExpression) leExpr;
sel = getSelectivityFromAnnotation(afcExpr, join);
}
return sel;
}
// The next two routines should be combined and made more general
protected double getSelectivity(ILogicalOperator op, boolean join) throws AlgebricksException {
double sel = 1.0; // safe to return 1 if there is no annotation
if (op == null) {
return sel;
}
// find all the selectOperators here.
while (op.getOperatorTag() != LogicalOperatorTag.EMPTYTUPLESOURCE) {
if (op.getOperatorTag() == LogicalOperatorTag.SELECT) {
SelectOperator selOper = (SelectOperator) op;
sel *= getSelectivityFromAnnotationMain(selOper.getCondition().getValue(), join);
}
if (op.getOperatorTag() == LogicalOperatorTag.SUBPLAN) {
sel *= getSelectivity((SubplanOperator) op);
}
op = op.getInputs().get(0).getValue();
}
return sel;
}
protected double getSelectivity(SubplanOperator subplanOp) throws AlgebricksException {
double sel = 1.0; // safe to return 1 if there is no annotation
//ILogicalOperator op = subplanOp;
ILogicalOperator op = subplanOp.getNestedPlans().get(0).getRoots().get(0).getValue();
while (true) {
if (op.getOperatorTag() == LogicalOperatorTag.SELECT) {
SelectOperator selOper = (SelectOperator) op;
sel *= getSelectivityFromAnnotationMain(selOper.getCondition().getValue(), false);
}
if (op.getInputs().size() > 0) {
op = op.getInputs().get(0).getValue();
} else {
break;
}
}
return sel;
}
}