blob: 36739c506f295b8971d1b0e05b2fac7395989359 [file] [log] [blame]
package org.apache.rya.accumulo.pig.optimizer;
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.eclipse.rdf4j.query.BindingSet;
import org.eclipse.rdf4j.query.Dataset;
import org.eclipse.rdf4j.query.algebra.Join;
import org.eclipse.rdf4j.query.algebra.LeftJoin;
import org.eclipse.rdf4j.query.algebra.StatementPattern;
import org.eclipse.rdf4j.query.algebra.TupleExpr;
import org.eclipse.rdf4j.query.algebra.Var;
import org.eclipse.rdf4j.query.algebra.evaluation.QueryOptimizer;
import org.eclipse.rdf4j.query.algebra.evaluation.impl.EvaluationStatistics;
import org.eclipse.rdf4j.query.algebra.helpers.AbstractQueryModelVisitor;
import org.eclipse.rdf4j.query.algebra.helpers.StatementPatternCollector;
/**
* A query optimizer that re-orders nested Joins according to cardinality, preferring joins that have similar variables.
*
*/
public class SimilarVarJoinOptimizer implements QueryOptimizer {
protected final EvaluationStatistics statistics;
public SimilarVarJoinOptimizer() {
this(new EvaluationStatistics());
}
public SimilarVarJoinOptimizer(EvaluationStatistics statistics) {
this.statistics = statistics;
}
/**
* Applies generally applicable optimizations: path expressions are sorted
* from more to less specific.
*
* @param tupleExpr
*/
public void optimize(TupleExpr tupleExpr, Dataset dataset, BindingSet bindings) {
tupleExpr.visit(new JoinVisitor());
}
protected class JoinVisitor extends AbstractQueryModelVisitor<RuntimeException> {
Set<String> boundVars = new HashSet<String>();
@Override
public void meet(LeftJoin leftJoin) {
leftJoin.getLeftArg().visit(this);
Set<String> origBoundVars = boundVars;
try {
boundVars = new HashSet<String>(boundVars);
boundVars.addAll(leftJoin.getLeftArg().getBindingNames());
leftJoin.getRightArg().visit(this);
} finally {
boundVars = origBoundVars;
}
}
@Override
public void meet(Join node) {
Set<String> origBoundVars = boundVars;
try {
boundVars = new HashSet<String>(boundVars);
// Recursively get the join arguments
List<TupleExpr> joinArgs = getJoinArgs(node, new ArrayList<TupleExpr>());
// Build maps of cardinalities and vars per tuple expression
Map<TupleExpr, Double> cardinalityMap = new HashMap<TupleExpr, Double>();
for (TupleExpr tupleExpr : joinArgs) {
double cardinality = statistics.getCardinality(tupleExpr);
cardinalityMap.put(tupleExpr, cardinality);
}
// Reorder the (recursive) join arguments to a more optimal sequence
List<TupleExpr> orderedJoinArgs = new ArrayList<TupleExpr>(joinArgs.size());
TupleExpr last = null;
while (!joinArgs.isEmpty()) {
TupleExpr tupleExpr = selectNextTupleExpr(joinArgs, cardinalityMap, last);
if (tupleExpr == null) {
break;
}
joinArgs.remove(tupleExpr);
orderedJoinArgs.add(tupleExpr);
last = tupleExpr;
// Recursively optimize join arguments
tupleExpr.visit(this);
boundVars.addAll(tupleExpr.getBindingNames());
}
// Build new join hierarchy
// Note: generated hierarchy is right-recursive to help the
// IterativeEvaluationOptimizer to factor out the left-most join
// argument
int i = 0;
TupleExpr replacement = orderedJoinArgs.get(i);
for (i++; i < orderedJoinArgs.size(); i++) {
replacement = new Join(replacement, orderedJoinArgs.get(i));
}
// Replace old join hierarchy
node.replaceWith(replacement);
} finally {
boundVars = origBoundVars;
}
}
protected <L extends List<TupleExpr>> L getJoinArgs(TupleExpr tupleExpr, L joinArgs) {
if (tupleExpr instanceof Join) {
Join join = (Join) tupleExpr;
getJoinArgs(join.getLeftArg(), joinArgs);
getJoinArgs(join.getRightArg(), joinArgs);
} else {
joinArgs.add(tupleExpr);
}
return joinArgs;
}
protected List<Var> getStatementPatternVars(TupleExpr tupleExpr) {
if(tupleExpr == null)
return null;
List<StatementPattern> stPatterns = StatementPatternCollector.process(tupleExpr);
List<Var> varList = new ArrayList<Var>(stPatterns.size() * 4);
for (StatementPattern sp : stPatterns) {
sp.getVars(varList);
}
return varList;
}
protected <M extends Map<Var, Integer>> M getVarFreqMap(List<Var> varList, M varFreqMap) {
for (Var var : varList) {
Integer freq = varFreqMap.get(var);
freq = (freq == null) ? 1 : freq + 1;
varFreqMap.put(var, freq);
}
return varFreqMap;
}
/**
* Selects from a list of tuple expressions the next tuple expression that
* should be evaluated. This method selects the tuple expression with
* highest number of bound variables, preferring variables that have been
* bound in other tuple expressions over variables with a fixed value.
*/
protected TupleExpr selectNextTupleExpr(List<TupleExpr> expressions,
Map<TupleExpr, Double> cardinalityMap,
TupleExpr last) {
double lowestCardinality = Double.MAX_VALUE;
TupleExpr result = expressions.get(0);
expressions = getExprsWithSameVars(expressions, last);
for (TupleExpr tupleExpr : expressions) {
// Calculate a score for this tuple expression
double cardinality = cardinalityMap.get(tupleExpr);
if (cardinality < lowestCardinality) {
// More specific path expression found
lowestCardinality = cardinality;
result = tupleExpr;
}
}
return result;
}
protected List<TupleExpr> getExprsWithSameVars(List<TupleExpr> expressions, TupleExpr last) {
if(last == null)
return expressions;
List<TupleExpr> retExprs = new ArrayList<TupleExpr>();
for(TupleExpr tupleExpr : expressions) {
List<Var> statementPatternVars = getStatementPatternVars(tupleExpr);
List<Var> lastVars = getStatementPatternVars(last);
statementPatternVars.retainAll(lastVars);
if(statementPatternVars.size() > 0) {
retExprs.add(tupleExpr);
}
}
if(retExprs.size() == 0) {
return expressions;
}
return retExprs;
}
}
}