| /* |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| package org.apache.jena.sparql.algebra.optimize; |
| |
| import java.util.ArrayList; |
| import java.util.Collection; |
| import java.util.Collections; |
| import java.util.HashSet; |
| import java.util.List; |
| import java.util.Set; |
| |
| import org.apache.jena.atlas.lib.Pair; |
| import org.apache.jena.atlas.lib.tuple.Tuple ; |
| import org.apache.jena.sparql.algebra.Op ; |
| import org.apache.jena.sparql.algebra.OpVars ; |
| import org.apache.jena.sparql.algebra.TransformCopy ; |
| import org.apache.jena.sparql.algebra.op.* ; |
| import org.apache.jena.sparql.core.Substitute ; |
| import org.apache.jena.sparql.core.Var ; |
| import org.apache.jena.sparql.core.VarExprList ; |
| import org.apache.jena.sparql.expr.* ; |
| |
| /** |
| * <p> |
| * Optimizer for transforming implicit joins. These covers queries like the |
| * following: |
| * </p> |
| * |
| * <pre> |
| * SELECT * |
| * WHERE |
| * { |
| * ?s a ?type1 . |
| * ?t a ?type2 . |
| * FILTER(?s = ?t) |
| * } |
| * </pre> |
| * <p> |
| * Clearly this is a trivial example but doing this optimization can have big |
| * performance gains since it can completely eliminate cross products that we |
| * would otherwise be required to evaluate. The optimization where applicable |
| * results in a query of the following form: |
| * </p> |
| * |
| * <pre> |
| * SELECT * |
| * WHERE |
| * { |
| * ?s a ?type1 . |
| * ?s a ?type1 . |
| * BIND(?s AS ?t) |
| * } |
| * </pre> |
| * <p> |
| * The optimizer does not cover the implicit left join case, for that see |
| * {@link TransformImplicitLeftJoin} |
| * </p> |
| * <h3>Applicability</h3> |
| * <p> |
| * This optimization aims to eliminate implicit joins of the form |
| * {@code ?x = ?y} or {@code SAMETERM(?x, ?y)}, the latter can almost always be |
| * safely eliminated while the former may only be eliminated in the case where |
| * we can guarantee that at least one of the variables is a non-literal e.g. it |
| * occurs in the subject/predicate position. In the case where this is not true |
| * the optimization may not be made since we cannot assume that we can map value |
| * equality to term equality by making the optimization. |
| * </p> |
| * <h3>Known Limitations/To Do</h3> |
| * <ul> |
| * <li>Application to implicit joins may block the sequence transform which |
| * means the potential benefits of the optimization are negated</li> |
| * </ul> |
| */ |
| public class TransformFilterImplicitJoin extends TransformCopy { |
| |
| @Override |
| public Op transform(OpFilter opFilter, Op subOp) { |
| Op op = apply(opFilter.getExprs(), subOp); |
| if (op == null) |
| return super.transform(opFilter, subOp); |
| return op; |
| } |
| |
| private static Op apply(ExprList exprs, Op subOp) { |
| // ---- Find and extract any implicit join filters. |
| Pair<List<Pair<Var, Var>>, ExprList> p = preprocessFilterImplicitJoin(subOp, exprs); |
| if (p == null || p.getLeft().size() == 0) |
| return null; |
| |
| List<Pair<Var, Var>> joins = p.getLeft(); |
| Collection<Var> varsMentioned = varsMentionedInImplictJoins(joins); |
| ExprList remaining = p.getRight(); |
| |
| // Not possible to optimize if multiple overlapping implicit joins |
| // We can test this simply by checking that the number of vars in |
| // varsMentioned is double the number of detected implicit joins |
| |
| // TODO In principal this may be safe provided we carefully apply the |
| // substitutions in the correct order, this is left as a future |
| // enhancement to this optimizer |
| if (varsMentioned.size() != joins.size() * 2) |
| return null; |
| |
| // ---- Check if the subOp is the right shape to transform. |
| Op op = subOp; |
| |
| // Special case : deduce that the filter will always "eval unbound" |
| // hence eliminate all rows. Return the empty table. |
| if (testSpecialCaseUnused(subOp, joins, remaining)) |
| return OpTable.empty(); |
| |
| // Special case: the deep left op of a OpConditional/OpLeftJoin is unit |
| // table. |
| // This is { OPTIONAL{P1} OPTIONAL{P2} ... FILTER(?x = :x) } |
| if (testSpecialCaseOptional(subOp, joins, remaining)) { |
| // Find backbone of ops |
| List<Op> ops = extractOptionals(subOp); |
| ops = processSpecialCaseOptional(ops, joins); |
| // Put back together |
| op = rebuild((Op2) subOp, ops); |
| // Put all filters - either we optimized, or we left alone. |
| // Either way, the complete set of filter expressions. |
| op = OpFilter.filterBy(exprs, op); |
| return op; |
| } |
| |
| // Special case : filter is over a union where one/both sides are always false |
| if (testSpecialCaseUnion(subOp, joins)) { |
| // This will attempt to eliminate the sides that are always false |
| op = processSpecialCaseUnion(subOp, joins); |
| |
| // In the case where both sides were invalid we'll have a table empty |
| // operator at this point and can return immediately |
| if (op instanceof OpTable) return op; |
| } |
| |
| // ---- Transform |
| |
| if (!safeToTransform(joins, varsMentioned, op)) |
| return null; |
| for (Pair<Var, Var> implicitJoin : joins) { |
| // TODO Where there are multiple conditions it may be necessary to |
| // apply the substitutions more intelligently |
| op = processFilterWorker(op, implicitJoin.getLeft(), implicitJoin.getRight()); |
| } |
| |
| // ---- Place any filter expressions around the processed sub op. |
| if (remaining.size() > 0) |
| op = OpFilter.filterBy(remaining, op); |
| return op; |
| } |
| |
| // --- find and extract |
| private static Pair<List<Pair<Var, Var>>, ExprList> preprocessFilterImplicitJoin(Op subOp, ExprList exprs) { |
| List<Pair<Var, Var>> exprsJoins = new ArrayList<>(); |
| ExprList exprsOther = new ExprList(); |
| for (Expr e : exprs.getList()) { |
| Pair<Var, Var> p = preprocess(subOp, e); |
| if (p != null) { |
| exprsJoins.add(p); |
| } else { |
| exprsOther.add(e); |
| } |
| } |
| if (exprsJoins.size() == 0) |
| return null; |
| return Pair.create(exprsJoins, exprsOther); |
| } |
| |
| private static Pair<Var, Var> preprocess(Op subOp, Expr e) { |
| if (!(e instanceof E_Equals) && !(e instanceof E_SameTerm)) |
| return null; |
| |
| ExprFunction2 eq = (ExprFunction2) e; |
| Expr left = eq.getArg1(); |
| Expr right = eq.getArg2(); |
| |
| if (!left.isVariable() || !right.isVariable()) { |
| return null; |
| } |
| if (left.equals(right)) { |
| return null; |
| } |
| |
| if (e instanceof E_Equals) { |
| // Is a safe equals for this optimization? |
| Tuple<Set<Var>> varsByPosition = OpVars.mentionedVarsByPosition(subOp); |
| |
| if (!isSafeEquals(varsByPosition, left.asVar(), right.asVar())) |
| return null; |
| } |
| |
| return Pair.create(left.asVar(), right.asVar()); |
| } |
| |
| private static boolean isSafeEquals(Tuple<Set<Var>> varsByPosition, Var left, Var right) { |
| // For equality based joins ensure at least one variable must be |
| // used in graph/subject/predicate position thus guaranteeing it to |
| // not be a literal so replacing with term equality by ways of |
| // substitution will be safe |
| |
| // Should get 5 sets |
| if (varsByPosition.len() != 5) |
| return false; |
| |
| // If anything is used in the object/unknown position then we |
| // potentially have an issue unless it is also used in a safe |
| // position |
| Set<Var> safeVars = new HashSet<>(); |
| safeVars.addAll(varsByPosition.get(0)); |
| safeVars.addAll(varsByPosition.get(1)); |
| safeVars.addAll(varsByPosition.get(2)); |
| Set<Var> unsafeVars = new HashSet<>(); |
| unsafeVars.addAll(varsByPosition.get(3)); |
| unsafeVars.addAll(varsByPosition.get(4)); |
| boolean lhsSafe = true, rhsSafe = true; |
| if (unsafeVars.size() > 0) { |
| if (unsafeVars.contains(left)) { |
| // LHS Variable occurred in unsafe position |
| if (!safeVars.contains(left)) { |
| // LHS Variable is unsafe |
| lhsSafe = false; |
| } |
| } |
| if (unsafeVars.contains(right)) { |
| // RHS Variable occurred in unsafe position |
| if (!safeVars.contains(right)) { |
| // RHS Variable is unsafe |
| rhsSafe = false; |
| } |
| } |
| } |
| |
| // At least one variable must be safe or this equality expression is |
| // not an implicit join that can be safely optimized |
| return lhsSafe || rhsSafe; |
| } |
| |
| private static Collection<Var> varsMentionedInImplictJoins(List<Pair<Var, Var>> joins) { |
| Set<Var> vars = new HashSet<>(); |
| for (Pair<Var, Var> p : joins) { |
| vars.add(p.getLeft()); |
| vars.add(p.getRight()); |
| } |
| return vars; |
| } |
| |
| private static boolean safeToTransform(List<Pair<Var, Var>> joins, Collection<Var> varsEquality, Op op) { |
| // Structure as a visitor? |
| if (op instanceof OpBGP || op instanceof OpQuadPattern) |
| return true; |
| |
| if (op instanceof OpFilter) { |
| OpFilter opf = (OpFilter) op; |
| return safeToTransform(joins, varsEquality, opf.getSubOp()); |
| } |
| |
| // This will be applied also in sub-calls of the Transform but queries |
| // are very rarely so deep that it matters. |
| if (op instanceof OpSequence) { |
| OpN opN = (OpN) op; |
| for (Op subOp : opN.getElements()) { |
| if (!safeToTransform(joins, varsEquality, subOp)) |
| return false; |
| } |
| return true; |
| } |
| |
| if (op instanceof OpJoin) { |
| Op2 op2 = (Op2) op; |
| return safeToTransform(joins, varsEquality, op2.getLeft()) && safeToTransform(joins, varsEquality, op2.getRight()); |
| } |
| |
| if (op instanceof OpUnion) { |
| // True only if for any pairs that affect the pattern both variables occur |
| Set<Var> fixedVars = OpVars.fixedVars(op); |
| for (Pair<Var, Var> pair : joins) { |
| if (fixedVars.contains(pair.getLeft()) && !fixedVars.contains(pair.getRight())) return false; |
| if (!fixedVars.contains(pair.getLeft()) && fixedVars.contains(pair.getRight())) return false; |
| } |
| return true; |
| } |
| |
| // Not safe unless filter variables are mentioned on the LHS. |
| if (op instanceof OpConditional || op instanceof OpLeftJoin) { |
| Op2 opleftjoin = (Op2) op; |
| |
| if (!safeToTransform(joins, varsEquality, opleftjoin.getLeft()) |
| || !safeToTransform(joins, varsEquality, opleftjoin.getRight())) |
| return false; |
| |
| // Not only must the left and right be safe to transform, |
| // but the equality variable must be known to be always set. |
| |
| // If the varsLeft are disjoint from assigned vars, |
| // we may be able to push assign down right |
| // (this generalises the unit table case specialcase1) |
| // Needs more investigation. |
| |
| Op opLeft = opleftjoin.getLeft(); |
| Set<Var> varsLeft = OpVars.visibleVars(opLeft); |
| if (varsLeft.containsAll(varsEquality)) |
| return true; |
| return false; |
| } |
| |
| if (op instanceof OpGraph) { |
| OpGraph opg = (OpGraph) op; |
| return safeToTransform(joins, varsEquality, opg.getSubOp()); |
| } |
| |
| // Subquery - assume scope rewriting has already been applied. |
| if (op instanceof OpModifier) { |
| // ORDER BY? |
| OpModifier opMod = (OpModifier) op; |
| if (opMod instanceof OpProject) { |
| OpProject opProject = (OpProject) op; |
| // Writing "SELECT ?var" for "?var" -> a value would need |
| // AS-ification. |
| for (Var v : opProject.getVars()) { |
| if (varsEquality.contains(v)) |
| return false; |
| } |
| } |
| return safeToTransform(joins, varsEquality, opMod.getSubOp()); |
| } |
| |
| if (op instanceof OpGroup) { |
| OpGroup opGroup = (OpGroup) op; |
| VarExprList varExprList = opGroup.getGroupVars(); |
| return safeToTransform(varsEquality, varExprList) && safeToTransform(joins, varsEquality, opGroup.getSubOp()); |
| } |
| |
| if (op instanceof OpTable) { |
| OpTable opTable = (OpTable) op; |
| if (opTable.isJoinIdentity()) |
| return true; |
| } |
| |
| // Op1 - OpGroup |
| // Op1 - OpOrder |
| // Op1 - OpAssign, OpExtend |
| // Op1 - OpFilter - done. |
| // Op1 - OpLabel - easy |
| // Op1 - OpService - no. |
| |
| return false; |
| } |
| |
| private static boolean safeToTransform(Collection<Var> varsEquality, VarExprList varsExprList) { |
| // If the named variable is used, unsafe to rewrite. |
| return Collections.disjoint(varsExprList.getVars(), varsEquality); |
| } |
| |
| // -- A special case |
| |
| private static boolean testSpecialCaseUnused(Op op, List<Pair<Var, Var>> joins, ExprList remaining) { |
| // If the op does not contain one of the vars at all, then the implicit |
| // join will be "eval unbound" i.e. false. |
| // We can return empty table. |
| Set<Var> patternVars = OpVars.visibleVars(op); |
| for (Pair<Var, Var> p : joins) { |
| if (!patternVars.contains(p.getLeft()) || !patternVars.contains(p.getRight())) |
| return true; |
| } |
| return false; |
| } |
| |
| // If a sequence of OPTIONALS, and nothing prior to the first, we end up |
| // with a unit table on the left side of a next of LeftJoin/conditionals. |
| |
| private static boolean testSpecialCaseOptional(Op op, List<Pair<Var, Var>> joins, ExprList remaining) { |
| while (op instanceof OpConditional || op instanceof OpLeftJoin) { |
| Op2 opleftjoin2 = (Op2) op; |
| op = opleftjoin2.getLeft(); |
| } |
| return isTableUnit(op); |
| } |
| |
| private static boolean testSpecialCaseUnion(Op op, List<Pair<Var, Var>> joins) { |
| if (op instanceof OpUnion) { |
| OpUnion union = (OpUnion) op; |
| Set<Var> leftVars = OpVars.visibleVars(union.getLeft()); |
| Set<Var> rightVars = OpVars.visibleVars(union.getRight()); |
| |
| // Is a special case if there is any implicit join where only one of the variables mentioned in an |
| // implicit join is present on one side of the union |
| for (Pair<Var, Var> p : joins) { |
| if (!leftVars.contains(p.getLeft()) || !leftVars.contains(p.getRight())) return true; |
| if (!rightVars.contains(p.getLeft()) || !rightVars.contains(p.getRight())) return true; |
| } |
| } |
| return false; |
| } |
| |
| private static List<Op> extractOptionals(Op op) { |
| List<Op> chain = new ArrayList<>(); |
| while (op instanceof OpConditional || op instanceof OpLeftJoin) { |
| Op2 opleftjoin2 = (Op2) op; |
| chain.add(opleftjoin2.getRight()); |
| op = opleftjoin2.getLeft(); |
| } |
| return chain; |
| } |
| |
| private static List<Op> processSpecialCaseOptional(List<Op> ops, List<Pair<Var, Var>> joins) { |
| List<Op> ops2 = new ArrayList<>(); |
| Collection<Var> vars = varsMentionedInImplictJoins(joins); |
| |
| for (Op op : ops) { |
| Op op2 = op; |
| if (safeToTransform(joins, vars, op)) { |
| for (Pair<Var, Var> p : joins) |
| op2 = processFilterWorker(op, p.getLeft(), p.getRight()); |
| } |
| ops2.add(op2); |
| } |
| return ops2; |
| } |
| |
| private static Op rebuild(Op2 subOp, List<Op> ops) { |
| Op chain = OpTable.unit(); |
| for (Op op : ops) { |
| chain = subOp.copy(chain, op); |
| } |
| return chain; |
| } |
| |
| private static boolean isTableUnit(Op op) { |
| if (op instanceof OpTable) { |
| if (((OpTable) op).isJoinIdentity()) |
| return true; |
| } |
| return false; |
| } |
| |
| private static Op processSpecialCaseUnion(Op op, List<Pair<Var, Var>> joins) { |
| if (op instanceof OpUnion) { |
| OpUnion union = (OpUnion) op; |
| |
| Set<Var> leftVars = OpVars.visibleVars(union.getLeft()); |
| Set<Var> rightVars = OpVars.visibleVars(union.getRight()); |
| |
| // Is a special case if there is any implicit join where only one of the variables mentioned in an |
| // implicit join is present on one side of the union |
| boolean leftEmpty = false, rightEmpty = false; |
| for (Pair<Var, Var> p : joins) { |
| if (leftEmpty || !leftVars.contains(p.getLeft()) || !leftVars.contains(p.getRight())) leftEmpty = true; |
| if (rightEmpty || !rightVars.contains(p.getLeft()) || !rightVars.contains(p.getRight())) rightEmpty = true; |
| } |
| |
| // If both sides of the union guarantee to produce errors then just replace the whole thing with table empty |
| if (leftEmpty && rightEmpty) return OpTable.empty(); |
| if (leftEmpty) return union.getRight(); |
| if (rightEmpty) return union.getLeft(); |
| } |
| // Leave untouched |
| return op; |
| } |
| |
| // ---- Transformation |
| |
| private static Op processFilterWorker(Op op, Var find, Var replace) { |
| return subst(op, find, replace); |
| } |
| |
| private static Op subst(Op subOp, Var find, Var replace) { |
| Op op = Substitute.substitute(subOp, find, replace); |
| return OpAssign.assign(op, find, new ExprVar(replace)); |
| } |
| } |