| /* |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| package org.apache.jena.sparql.algebra.optimize; |
| |
| import java.util.ArrayList; |
| import java.util.Collection; |
| import java.util.Collections; |
| import java.util.HashSet; |
| import java.util.List; |
| import java.util.Set; |
| |
| import org.apache.jena.atlas.lib.Pair; |
| import org.apache.jena.atlas.lib.tuple.Tuple ; |
| import org.apache.jena.sparql.algebra.Op ; |
| import org.apache.jena.sparql.algebra.OpVars ; |
| import org.apache.jena.sparql.algebra.TransformCopy ; |
| import org.apache.jena.sparql.algebra.op.* ; |
| import org.apache.jena.sparql.core.Substitute ; |
| import org.apache.jena.sparql.core.Var ; |
| import org.apache.jena.sparql.core.VarExprList ; |
| import org.apache.jena.sparql.expr.* ; |
| |
| /** |
| * <p> |
| * Optimizer for transforming implicit joins. These covers queries like the |
| * following: |
| * </p> |
| * |
| * <pre> |
| * SELECT * |
| * WHERE |
| * { |
| * ?s a ?type1 . |
| * OPTIONAL |
| * { |
| * ?t a ?type2 . |
| * FILTER(?s = ?t) |
| * } |
| * } |
| * </pre> |
| * <p> |
| * Clearly this is a trivial example but doing this optimization can have big |
| * performance gains since it can completely eliminate cross products that we |
| * would otherwise be required to evaluate. The optimization where applicable |
| * results in a query of the following form: |
| * </p> |
| * |
| * <pre> |
| * SELECT * |
| * WHERE |
| * { |
| * ?s a ?type1 . |
| * OPTIONAL |
| * { |
| * ?s a ?type1 . |
| * BIND(?s AS ?t) |
| * } |
| * } |
| * </pre> |
| * <p> |
| * This does not handle the simpler case of implicit joins where |
| * {@code OPTIONAL} is not involved, for that see |
| * {@link TransformFilterImplicitJoin} |
| * </p> |
| * <h3>Applicability</h3> |
| * <p> |
| * This optimization aims to eliminate implicit left joins of the form |
| * {@code ?x = ?y} or {@code SAMETERM(?x, ?y)}, the latter can almost always be |
| * safely eliminated while the former may only be eliminated in the case where |
| * we can guarantee that at least one of the variables is a non-literal e.g. it |
| * occurs in the subject/predicate position. In the case where this is not true |
| * the optimization may not be made since we cannot assume that we can map value |
| * equality to term equality by making the optimization. |
| * </p> |
| */ |
| public class TransformImplicitLeftJoin extends TransformCopy { |
| |
| @Override |
| public Op transform(OpLeftJoin opLeftJoin, Op left, Op right) { |
| // Must have associated expressions to be eligible |
| if (opLeftJoin.getExprs() == null) |
| return super.transform(opLeftJoin, left, right); |
| |
| // Try and apply this optimization |
| Op op = apply(opLeftJoin, left, right); |
| if (op == null) |
| return super.transform(opLeftJoin, left, right); |
| return op; |
| } |
| |
| private static Op apply(OpLeftJoin opLeftJoin, Op left, Op right) { |
| // This handles arbitrarily nested && conditions |
| ExprList orig = ExprList.splitConjunction(opLeftJoin.getExprs()); |
| |
| // Extract optimizable conditions? |
| Pair<List<Pair<Var, Var>>, ExprList> p = preprocessFilterImplicitJoin(left, right, orig); |
| |
| // Were there any optimizable conditions? |
| if (p == null || p.getLeft().size() == 0) |
| return null; |
| |
| List<Pair<Var, Var>> joins = p.getLeft(); |
| Collection<Var> varsMentioned = varsMentionedInImplictJoins(joins); |
| ExprList remaining = p.getRight(); |
| |
| // ---- Check if the subOp is the right shape to transform. |
| Op op = right; |
| if (!safeToTransform(joins, varsMentioned, op)) |
| return null; |
| |
| // We apply substitution only to the RHS |
| // This is because applying to both sides would change the join |
| // semantics of the Left Join |
| Collection<Var> lhsVars = OpVars.visibleVars(left); |
| Collection<Var> rhsVars = OpVars.visibleVars(right); |
| |
| // TODO A better approach here would be to build a dependency graph of |
| // the implicit joins and use that to inform the order in which they |
| // should be carried out or even to remove some entirely where |
| // transitivity and commutativity apply |
| |
| for (Pair<Var, Var> implicitJoin : joins) { |
| // Which variable do we want to substitute out? |
| // We don't need to deal with the case of neither variable being on |
| // the RHS |
| Var lVar = implicitJoin.getLeft(); |
| Var rVar = implicitJoin.getRight(); |
| |
| if (lhsVars.contains(lVar) && lhsVars.contains(rVar)) { |
| // Both vars are on LHS |
| |
| if (rhsVars.contains(lVar) && rhsVars.contains(rVar)) { |
| // Both vars are also on RHS |
| // Order of substitution doesn't matter |
| op = processFilterWorker(op, lVar, rVar); |
| } else if (rhsVars.contains(lVar)) { |
| // Substitute left variable for right variable |
| op = processFilterWorker(op, lVar, rVar); |
| } else if (rhsVars.contains(rVar)) { |
| // Substitute right variable for left variable |
| op = processFilterWorker(op, rVar, lVar); |
| } else { |
| // May be hit if trying to apply a sequence of |
| // substitutions |
| return null; |
| } |
| } else if (lhsVars.contains(lVar)) { |
| // Only left variable on RHS |
| |
| if (rhsVars.contains(rVar)) { |
| // Substitute right variable for left variable |
| op = processFilterWorker(op, rVar, lVar); |
| } else { |
| // May be hit if trying to apply a sequence of substitutions |
| return null; |
| } |
| } else if (lhsVars.contains(rVar)) { |
| // Only right variable on LHS |
| |
| if (rhsVars.contains(lVar)) { |
| // Substitute left variable for right variable |
| op = processFilterWorker(op, lVar, rVar); |
| } else { |
| // May be hit if trying to apply a sequence of substitutions |
| return null; |
| } |
| } else { |
| // Neither variable is on LHS |
| |
| if (rhsVars.contains(lVar) && rhsVars.contains(rVar)) { |
| // Both variables are on RHS so can substitute one for the |
| // other |
| op = processFilterWorker(op, lVar, rVar); |
| } else { |
| // May be hit if trying to apply a sequence of substitutions |
| return null; |
| } |
| } |
| |
| // Re-compute visible RHS vars after each substitution as it may |
| // affect subsequent substitutions |
| rhsVars = OpVars.visibleVars(op); |
| } |
| |
| if (remaining.size() > 0) { |
| return OpLeftJoin.create(left, op, remaining); |
| } else { |
| return OpLeftJoin.create(left, op, (ExprList) null); |
| } |
| } |
| |
| private static Pair<List<Pair<Var, Var>>, ExprList> preprocessFilterImplicitJoin(Op left, Op right, ExprList exprs) { |
| List<Pair<Var, Var>> exprsJoins = new ArrayList<>(); |
| ExprList exprsOther = new ExprList(); |
| for (Expr e : exprs.getList()) { |
| Pair<Var, Var> p = preprocess(left, right, e); |
| if (p != null) { |
| exprsJoins.add(p); |
| } else { |
| exprsOther.add(e); |
| } |
| } |
| if (exprsJoins.size() == 0) |
| return null; |
| return Pair.create(exprsJoins, exprsOther); |
| } |
| |
| private static Pair<Var, Var> preprocess(Op opLeft, Op opRight, Expr e) { |
| if (!(e instanceof E_Equals) && !(e instanceof E_SameTerm)) |
| return null; |
| |
| ExprFunction2 eq = (ExprFunction2) e; |
| // An equals or same term implicit join |
| Expr left = eq.getArg1(); |
| Expr right = eq.getArg2(); |
| |
| if (!left.isVariable() || !right.isVariable()) { |
| return null; |
| } |
| if (left.equals(right)) { |
| return null; |
| } |
| |
| // If neither variable is visible in RHS optimization does not apply |
| Collection<Var> rhsVars = OpVars.visibleVars(opRight); |
| if (!rhsVars.contains(left.asVar()) && !rhsVars.contains(right.asVar())) |
| return null; |
| |
| if (e instanceof E_Equals) { |
| // Is a safe equals for this optimization? |
| Tuple<Set<Var>> varsByPosition = OpVars.mentionedVarsByPosition(opLeft, opRight); |
| |
| if (!isSafeEquals(varsByPosition, left.asVar(), right.asVar())) |
| return null; |
| } |
| |
| return Pair.create(left.asVar(), right.asVar()); |
| } |
| |
| private static boolean isSafeEquals(Tuple<Set<Var>> varsByPosition, Var left, Var right) { |
| // For equality based joins ensure at least one variable must be |
| // used in graph/subject/predicate position thus guaranteeing it to |
| // not be a literal so replacing with term equality by ways of |
| // substitution will be safe |
| |
| // Should get 5 sets |
| if (varsByPosition.len() != 5) |
| return false; |
| |
| // If anything is used in the object/unknown position then we |
| // potentially have an issue unless it is also used in a safe |
| // position |
| Set<Var> safeVars = new HashSet<>(); |
| safeVars.addAll(varsByPosition.get(0)); |
| safeVars.addAll(varsByPosition.get(1)); |
| safeVars.addAll(varsByPosition.get(2)); |
| Set<Var> unsafeVars = new HashSet<>(); |
| unsafeVars.addAll(varsByPosition.get(3)); |
| unsafeVars.addAll(varsByPosition.get(4)); |
| boolean lhsSafe = true, rhsSafe = true; |
| if (unsafeVars.size() > 0) { |
| if (unsafeVars.contains(left)) { |
| // LHS Variable occurred in unsafe position |
| if (!safeVars.contains(left)) { |
| // LHS Variable is unsafe |
| lhsSafe = false; |
| } |
| } |
| if (unsafeVars.contains(right)) { |
| // RHS Variable occurred in unsafe position |
| if (!safeVars.contains(right)) { |
| // RHS Variable is unsafe |
| rhsSafe = false; |
| } |
| } |
| } |
| |
| // At least one variable must be safe or this equality expression is |
| // not an implicit join that can be safely optimized |
| return lhsSafe || rhsSafe; |
| } |
| |
| private static Collection<Var> varsMentionedInImplictJoins(List<Pair<Var, Var>> joins) { |
| Set<Var> vars = new HashSet<>(); |
| for (Pair<Var, Var> p : joins) { |
| vars.add(p.getLeft()); |
| vars.add(p.getRight()); |
| } |
| return vars; |
| } |
| |
| private static boolean safeToTransform(List<Pair<Var, Var>> joins, Collection<Var> varsEquality, Op op) { |
| // Structure as a visitor? |
| if (op instanceof OpBGP || op instanceof OpQuadPattern) |
| return true; |
| |
| if (op instanceof OpFilter) { |
| OpFilter opf = (OpFilter) op; |
| return safeToTransform(joins, varsEquality, opf.getSubOp()); |
| } |
| |
| // This will be applied also in sub-calls of the Transform but queries |
| // are very rarely so deep that it matters. |
| if (op instanceof OpSequence) { |
| OpN opN = (OpN) op; |
| for (Op subOp : opN.getElements()) { |
| if (!safeToTransform(joins, varsEquality, subOp)) |
| return false; |
| } |
| return true; |
| } |
| |
| if (op instanceof OpJoin) { |
| Op2 op2 = (Op2) op; |
| return safeToTransform(joins, varsEquality, op2.getLeft()) && safeToTransform(joins, varsEquality, op2.getRight()); |
| } |
| |
| if (op instanceof OpUnion) { |
| // True only if for any pairs that affect the pattern both variables occur |
| Set<Var> fixedVars = OpVars.fixedVars(op); |
| for (Pair<Var, Var> pair : joins) { |
| if (fixedVars.contains(pair.getLeft()) && !fixedVars.contains(pair.getRight())) return false; |
| if (!fixedVars.contains(pair.getLeft()) && fixedVars.contains(pair.getRight())) return false; |
| } |
| return true; |
| } |
| |
| // Not safe unless filter variables are mentioned on the LHS. |
| if (op instanceof OpConditional || op instanceof OpLeftJoin) { |
| Op2 opleftjoin = (Op2) op; |
| |
| if (!safeToTransform(joins, varsEquality, opleftjoin.getLeft()) |
| || !safeToTransform(joins, varsEquality, opleftjoin.getRight())) |
| return false; |
| |
| // Not only must the left and right be safe to transform, |
| // but the equality variable must be known to be always set. |
| |
| // If the varsLeft are disjoint from assigned vars, |
| // we may be able to push assign down right |
| // (this generalises the unit table case specialcase1) |
| // Needs more investigation. |
| |
| Op opLeft = opleftjoin.getLeft(); |
| Set<Var> varsLeft = OpVars.visibleVars(opLeft); |
| if (varsLeft.containsAll(varsEquality)) |
| return true; |
| return false; |
| } |
| |
| if (op instanceof OpGraph) { |
| OpGraph opg = (OpGraph) op; |
| return safeToTransform(joins, varsEquality, opg.getSubOp()); |
| } |
| |
| // Subquery - assume scope rewriting has already been applied. |
| if (op instanceof OpModifier) { |
| // ORDER BY? |
| OpModifier opMod = (OpModifier) op; |
| if (opMod instanceof OpProject) { |
| OpProject opProject = (OpProject) op; |
| // Writing "SELECT ?var" for "?var" -> a value would need |
| // AS-ification. |
| for (Var v : opProject.getVars()) { |
| if (varsEquality.contains(v)) |
| return false; |
| } |
| } |
| return safeToTransform(joins, varsEquality, opMod.getSubOp()); |
| } |
| |
| if (op instanceof OpGroup) { |
| OpGroup opGroup = (OpGroup) op; |
| VarExprList varExprList = opGroup.getGroupVars(); |
| return safeToTransform(varsEquality, varExprList) && safeToTransform(joins, varsEquality, opGroup.getSubOp()); |
| } |
| |
| if (op instanceof OpTable) { |
| OpTable opTable = (OpTable) op; |
| if (opTable.isJoinIdentity()) |
| return true; |
| } |
| |
| // Op1 - OpGroup |
| // Op1 - OpOrder |
| // Op1 - OpAssign, OpExtend |
| // Op1 - OpFilter - done. |
| // Op1 - OpLabel - easy |
| // Op1 - OpService - no. |
| |
| return false; |
| } |
| |
| private static boolean safeToTransform(Collection<Var> varsEquality, VarExprList varsExprList) { |
| // If the named variable is used, unsafe to rewrite. |
| return Collections.disjoint(varsExprList.getVars(), varsEquality); |
| } |
| |
| // ---- Transformation |
| |
| private static Op processFilterWorker(Op op, Var find, Var replace) { |
| return subst(op, find, replace); |
| } |
| |
| private static Op subst(Op subOp, Var find, Var replace) { |
| Op op = Substitute.substitute(subOp, find, replace); |
| return OpAssign.assign(op, find, new ExprVar(replace)); |
| } |
| } |