blob: ab7c8ad9330de9ec01bb218976224be0591cc636 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.rya.rdftriplestore.evaluation;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.apache.rya.rdftriplestore.utils.DefaultStatistics;
import org.eclipse.rdf4j.query.BindingSet;
import org.eclipse.rdf4j.query.Dataset;
import org.eclipse.rdf4j.query.algebra.Join;
import org.eclipse.rdf4j.query.algebra.LeftJoin;
import org.eclipse.rdf4j.query.algebra.StatementPattern;
import org.eclipse.rdf4j.query.algebra.TupleExpr;
import org.eclipse.rdf4j.query.algebra.Var;
import org.eclipse.rdf4j.query.algebra.evaluation.QueryOptimizer;
import org.eclipse.rdf4j.query.algebra.evaluation.impl.EvaluationStatistics;
import org.eclipse.rdf4j.query.algebra.helpers.AbstractQueryModelVisitor;
import org.eclipse.rdf4j.query.algebra.helpers.StatementPatternCollector;
/**
* A query optimizer that re-orders nested Joins.
*
* @author Arjohn Kampman
* @author James Leigh
*/
public class QueryJoinOptimizer implements QueryOptimizer {
protected final EvaluationStatistics statistics;
public QueryJoinOptimizer() {
this(new EvaluationStatistics());
}
public QueryJoinOptimizer(EvaluationStatistics statistics) {
this.statistics = statistics;
}
/**
* Applies generally applicable optimizations: path expressions are sorted
* from more to less specific.
*
* @param tupleExpr
*/
public void optimize(TupleExpr tupleExpr, Dataset dataset, BindingSet bindings) {
if (!(statistics instanceof DefaultStatistics)) {
tupleExpr.visit(new JoinVisitor());
}
}
protected class JoinVisitor extends AbstractQueryModelVisitor<RuntimeException> {
Set<String> boundVars = new HashSet<String>();
@Override
public void meet(LeftJoin leftJoin) {
leftJoin.getLeftArg().visit(this);
Set<String> origBoundVars = boundVars;
try {
boundVars = new HashSet<String>(boundVars);
boundVars.addAll(leftJoin.getLeftArg().getBindingNames());
leftJoin.getRightArg().visit(this);
} finally {
boundVars = origBoundVars;
}
}
@Override
public void meet(Join node) {
Set<String> origBoundVars = boundVars;
try {
boundVars = new HashSet<String>(boundVars);
// Recursively get the join arguments
List<TupleExpr> joinArgs = getJoinArgs(node, new ArrayList<TupleExpr>());
// Build maps of cardinalities and vars per tuple expression
Map<TupleExpr, Double> cardinalityMap = new HashMap<TupleExpr, Double>();
// Map<TupleExpr, List<Var>> varsMap = new HashMap<TupleExpr, List<Var>>();
// Map<Var, Double> varCardinalityMap = new HashMap<Var, Double>();
for (TupleExpr tupleExpr : joinArgs) {
double cardinality = statistics.getCardinality(tupleExpr);
// List<Var> statementPatternVars = getStatementPatternVars(tupleExpr);
cardinalityMap.put(tupleExpr, cardinality);
// varsMap.put(tupleExpr, statementPatternVars);
}
// Build map of var frequences
// Map<Var, Integer> varFreqMap = new HashMap<Var, Integer>();
// for (List<Var> varList : varsMap.values()) {
// getVarFreqMap(varList, varFreqMap);
// }
// Reorder the (recursive) join arguments to a more optimal sequence
List<TupleExpr> orderedJoinArgs = new ArrayList<TupleExpr>(joinArgs.size());
while (!joinArgs.isEmpty()) {
TupleExpr tupleExpr = selectNextTupleExpr(joinArgs, cardinalityMap
);
if (tupleExpr == null) {
break;
}
joinArgs.remove(tupleExpr);
orderedJoinArgs.add(tupleExpr);
// Recursively optimize join arguments
tupleExpr.visit(this);
boundVars.addAll(tupleExpr.getBindingNames());
}
// Build new join hierarchy
// Note: generated hierarchy is right-recursive to help the
// IterativeEvaluationOptimizer to factor out the left-most join
// argument
int i = 0;
TupleExpr replacement = orderedJoinArgs.get(i);
for (i++; i < orderedJoinArgs.size(); i++) {
replacement = new Join(replacement, orderedJoinArgs.get(i));
}
// Replace old join hierarchy
node.replaceWith(replacement);
} finally {
boundVars = origBoundVars;
}
}
protected <L extends List<TupleExpr>> L getJoinArgs(TupleExpr tupleExpr, L joinArgs) {
if (tupleExpr instanceof Join) {
Join join = (Join) tupleExpr;
getJoinArgs(join.getLeftArg(), joinArgs);
getJoinArgs(join.getRightArg(), joinArgs);
} else {
joinArgs.add(tupleExpr);
}
return joinArgs;
}
protected List<Var> getStatementPatternVars(TupleExpr tupleExpr) {
List<StatementPattern> stPatterns = StatementPatternCollector.process(tupleExpr);
List<Var> varList = new ArrayList<Var>(stPatterns.size() * 4);
for (StatementPattern sp : stPatterns) {
sp.getVars(varList);
}
return varList;
}
protected <M extends Map<Var, Integer>> M getVarFreqMap(List<Var> varList, M varFreqMap) {
for (Var var : varList) {
Integer freq = varFreqMap.get(var);
freq = (freq == null) ? 1 : freq + 1;
varFreqMap.put(var, freq);
}
return varFreqMap;
}
/**
* Selects from a list of tuple expressions the next tuple expression that
* should be evaluated. This method selects the tuple expression with
* highest number of bound variables, preferring variables that have been
* bound in other tuple expressions over variables with a fixed value.
*/
protected TupleExpr selectNextTupleExpr(List<TupleExpr> expressions,
Map<TupleExpr, Double> cardinalityMap
// ,Map<TupleExpr, List<Var>> varsMap,
// Map<Var, Integer> varFreqMap, Set<String> boundVars
) {
double lowestCardinality = Double.MAX_VALUE;
TupleExpr result = expressions.get(0);
for (TupleExpr tupleExpr : expressions) {
// Calculate a score for this tuple expression
// double cardinality = getTupleExprCardinality(tupleExpr, cardinalityMap, varsMap, varFreqMap, boundVars);
double cardinality = cardinalityMap.get(tupleExpr);
// List<Var> vars = varsMap.get(tupleExpr);
// List<Var> distinctUnboundVars = getUnboundVars(vars);
// if (distinctUnboundVars.size() >= 2) {
// cardinality *= (distinctUnboundVars.size() + 1);
// }
if (cardinality < lowestCardinality) {
// More specific path expression found
lowestCardinality = cardinality;
result = tupleExpr;
}
}
return result;
}
protected double getTupleExprCardinality(TupleExpr tupleExpr, Map<TupleExpr, Double> cardinalityMap,
Map<TupleExpr, List<Var>> varsMap, Map<Var, Integer> varFreqMap, Set<String> boundVars) {
double cardinality = cardinalityMap.get(tupleExpr);
List<Var> vars = varsMap.get(tupleExpr);
// Compensate for variables that are bound earlier in the evaluation
List<Var> unboundVars = getUnboundVars(vars);
List<Var> constantVars = getConstantVars(vars);
int nonConstantVarCount = vars.size() - constantVars.size();
if (nonConstantVarCount > 0) {
double exp = (double) unboundVars.size() / nonConstantVarCount;
cardinality = Math.pow(cardinality, exp);
}
if (unboundVars.isEmpty()) {
// Prefer patterns with more bound vars
if (nonConstantVarCount > 0) {
cardinality /= nonConstantVarCount;
}
} else {
// Prefer patterns that bind variables from other tuple expressions
int foreignVarFreq = getForeignVarFreq(unboundVars, varFreqMap);
if (foreignVarFreq > 0) {
cardinality /= foreignVarFreq;
}
}
// Prefer patterns that bind more variables
List<Var> distinctUnboundVars = getUnboundVars(new
HashSet<Var>(vars));
if (distinctUnboundVars.size() >= 2) {
cardinality /= distinctUnboundVars.size();
}
return cardinality;
}
protected List<Var> getConstantVars(Iterable<Var> vars) {
List<Var> constantVars = new ArrayList<Var>();
for (Var var : vars) {
if (var.hasValue()) {
constantVars.add(var);
}
}
return constantVars;
}
protected List<Var> getUnboundVars(Iterable<Var> vars) {
List<Var> unboundVars = new ArrayList<Var>();
for (Var var : vars) {
if (!var.hasValue() && !this.boundVars.contains(var.getName())) {
unboundVars.add(var);
}
}
return unboundVars;
}
protected int getForeignVarFreq(List<Var> ownUnboundVars, Map<Var, Integer> varFreqMap) {
int result = 0;
Map<Var, Integer> ownFreqMap = getVarFreqMap(ownUnboundVars, new HashMap<Var, Integer>());
for (Map.Entry<Var, Integer> entry : ownFreqMap.entrySet()) {
Var var = entry.getKey();
int ownFreq = entry.getValue();
result += varFreqMap.get(var) - ownFreq;
}
return result;
}
}
}