| /* |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| package org.apache.jena.sparql.algebra.optimize; |
| |
| import java.util.ArrayList; |
| import java.util.Collection; |
| import java.util.Collections; |
| import java.util.HashMap; |
| import java.util.HashSet; |
| import java.util.List; |
| import java.util.Map; |
| import java.util.Set; |
| |
| import org.apache.jena.atlas.lib.Pair; |
| import org.apache.jena.query.ARQ ; |
| import org.apache.jena.sparql.algebra.* ; |
| import org.apache.jena.sparql.algebra.op.* ; |
| import org.apache.jena.sparql.core.Var ; |
| import org.apache.jena.sparql.core.VarExprList ; |
| import org.apache.jena.sparql.engine.binding.BindingFactory ; |
| import org.apache.jena.sparql.engine.binding.BindingMap ; |
| import org.apache.jena.sparql.expr.* ; |
| import org.apache.jena.sparql.util.Context ; |
| |
| /** |
| * A transform that aims to optimize queries where there is an inequality |
| * constraint on a variable in an attempt to speed up evaluation e.g |
| * |
| * <pre> |
| * PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> |
| * SELECT * |
| * WHERE |
| * { |
| * ?s rdf:type <http://type> ; |
| * ?p ?o . |
| * FILTER(?p != rdf:type) |
| * } |
| * </pre> |
| * |
| * Would transform to the following: |
| * |
| * <pre> |
| * PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> |
| * SELECT * |
| * WHERE |
| * { |
| * ?s rdf:type <http://type> ; |
| * ?p ?o . |
| * MINUS { VALUES ?p { rdf:type } } |
| * } |
| * </pre> |
| * |
| * <h3>Status</h3> |
| * <p> |
| * Performance testing has shown that often this gives minimal performance |
| * benefit so this optimization is not enabled by default. It may be explicitly |
| * enabled by setting the {@link ARQ#optFilterInequality} symbol in your |
| * {@link Context} or the ARQ global context ({@link ARQ#getContext()} to |
| * {@code true} |
| * </p> |
| * |
| * <h3>Applicability</h3> |
| * <p> |
| * This optimizer is conservative in that it only makes the optimization where |
| * the inequality constraint is against a non-literal as otherwise substituting |
| * the value changes the query semantics because it switches from value equality |
| * to the more restrictive term equality. The optimization is safe for |
| * non-literals because for those value and term equality are equivalent (in |
| * fact value equality is defined to be term equality). |
| * </p> |
| * <p> |
| * There are also various nested algebra structures that can make the |
| * optimization unsafe and so it does not take place if any of those situations |
| * is detected. |
| * </p> |
| */ |
| public class TransformFilterInequality extends TransformCopy { |
| /** |
| * Creates a new transform |
| */ |
| public TransformFilterInequality() { |
| } |
| |
| @Override |
| public Op transform(OpFilter opFilter, Op subOp) { |
| Op op = apply(opFilter.getExprs(), subOp); |
| if (op == null) |
| return super.transform(opFilter, subOp); |
| return op; |
| } |
| |
| private static Op apply(ExprList exprs, Op subOp) { |
| // ---- Find and extract any inequality filters. |
| Pair<List<Pair<Var, NodeValue>>, ExprList> p = preprocessFilterInequality(exprs); |
| if (p == null || p.getLeft().size() == 0) |
| return null; |
| |
| List<Pair<Var, NodeValue>> inequalities = p.getLeft(); |
| Collection<Var> varsMentioned = varsMentionedInInequalityFilters(inequalities); |
| ExprList remaining = p.getRight(); |
| |
| // If any of the conditions overlap the optimization is unsafe |
| // (the query is also likely incorrect but that isn't our problem) |
| |
| // TODO There is actually a special case here, if the inequality |
| // constraints are conflicting then we can special case to table empty. |
| |
| // ---- Check if the subOp is the right shape to transform. |
| Op op = subOp; |
| |
| // Special case : deduce that the filter will always "eval unbound" |
| // hence eliminate all rows. Return the empty table. |
| if (testSpecialCaseUnused(subOp, inequalities, remaining)) |
| return OpTable.empty(); |
| |
| // Special case: the deep left op of a OpConditional/OpLeftJoin is unit |
| // table. |
| // This is |
| // { OPTIONAL{P1} OPTIONAL{P2} ... FILTER(?x = :x) } |
| if (testSpecialCase1(subOp, inequalities, remaining)) { |
| // Find backbone of ops |
| List<Op> ops = extractOptionals(subOp); |
| ops = processSpecialCase1(ops, inequalities); |
| // Put back together |
| op = rebuild((Op2) subOp, ops); |
| // Put all filters - either we optimized, or we left alone. |
| // Either way, the complete set of filter expressions. |
| op = OpFilter.filterBy(exprs, op); |
| return op; |
| } |
| |
| // ---- Transform |
| |
| if (!safeToTransform(varsMentioned, op)) |
| return null; |
| |
| op = processFilterWorker(op, inequalities); |
| |
| // ---- Place any filter expressions around the processed sub op. |
| if (remaining.size() > 0) |
| op = OpFilter.filterBy(remaining, op); |
| return op; |
| } |
| |
| // --- find and extract |
| private static Pair<List<Pair<Var, NodeValue>>, ExprList> preprocessFilterInequality(ExprList exprs) { |
| List<Pair<Var, NodeValue>> exprsFilterInequality = new ArrayList<>(); |
| ExprList exprsOther = new ExprList(); |
| for (Expr e : exprs.getList()) { |
| Pair<Var, NodeValue> p = preprocess(e); |
| if (p != null) |
| exprsFilterInequality.add(p); |
| else |
| exprsOther.add(e); |
| } |
| if (exprsFilterInequality.size() == 0) |
| return null; |
| return Pair.create(exprsFilterInequality, exprsOther); |
| } |
| |
| private static Pair<Var, NodeValue> preprocess(Expr e) { |
| if (!(e instanceof E_NotEquals)) |
| return null; |
| |
| ExprFunction2 eq = (ExprFunction2) e; |
| Expr left = eq.getArg1(); |
| Expr right = eq.getArg2(); |
| |
| Var var = null; |
| NodeValue constant = null; |
| |
| if (left.isVariable() && right.isConstant()) { |
| var = left.asVar(); |
| constant = right.getConstant(); |
| } else if (right.isVariable() && left.isConstant()) { |
| var = right.asVar(); |
| constant = left.getConstant(); |
| } |
| |
| if (var == null || constant == null) |
| return null; |
| |
| // Final check for "!=" where a FILTER != can do value matching when the |
| // graph does not. |
| // Value based? |
| if (!ARQ.isStrictMode() && constant.isLiteral()) |
| return null; |
| |
| return Pair.create(var, constant); |
| } |
| |
| private static Collection<Var> varsMentionedInInequalityFilters(List<Pair<Var, NodeValue>> inequalities) { |
| Set<Var> vars = new HashSet<>(); |
| for (Pair<Var, NodeValue> p : inequalities) |
| vars.add(p.getLeft()); |
| return vars; |
| } |
| |
| private static boolean safeToTransform(Collection<Var> varsEquality, Op op) { |
| // TODO This may actually be overly conservative, since we aren't going |
| // to perform substitution we can potentially remove much of this method |
| |
| // Structure as a visitor? |
| if (op instanceof OpBGP || op instanceof OpQuadPattern) |
| return true; |
| |
| if (op instanceof OpFilter) { |
| OpFilter opf = (OpFilter) op; |
| // Expressions are always safe transform by substitution. |
| return safeToTransform(varsEquality, opf.getSubOp()); |
| } |
| |
| // This will be applied also in sub-calls of the Transform but queries |
| // are very rarely so deep that it matters. |
| if (op instanceof OpSequence) { |
| OpN opN = (OpN) op; |
| for (Op subOp : opN.getElements()) { |
| if (!safeToTransform(varsEquality, subOp)) |
| return false; |
| } |
| return true; |
| } |
| |
| if (op instanceof OpJoin || op instanceof OpUnion) { |
| Op2 op2 = (Op2) op; |
| return safeToTransform(varsEquality, op2.getLeft()) && safeToTransform(varsEquality, op2.getRight()); |
| } |
| |
| // Not safe unless filter variables are mentioned on the LHS. |
| if (op instanceof OpConditional || op instanceof OpLeftJoin) { |
| Op2 opleftjoin = (Op2) op; |
| |
| if (!safeToTransform(varsEquality, opleftjoin.getLeft()) || !safeToTransform(varsEquality, opleftjoin.getRight())) |
| return false; |
| |
| // Not only must the left and right be safe to transform, |
| // but the equality variable must be known to be always set. |
| |
| // If the varsLeft are disjoint from assigned vars, |
| // we may be able to push assign down right |
| // (this generalises the unit table case specialcase1) |
| // Needs more investigation. |
| |
| Op opLeft = opleftjoin.getLeft(); |
| Set<Var> varsLeft = OpVars.visibleVars(opLeft); |
| if (varsLeft.containsAll(varsEquality)) |
| return true; |
| return false; |
| } |
| |
| if (op instanceof OpGraph) { |
| OpGraph opg = (OpGraph) op; |
| return safeToTransform(varsEquality, opg.getSubOp()); |
| } |
| |
| // Subquery - assume scope rewriting has already been applied. |
| if (op instanceof OpModifier) { |
| // ORDER BY? |
| OpModifier opMod = (OpModifier) op; |
| if (opMod instanceof OpProject) { |
| OpProject opProject = (OpProject) op; |
| // Writing "SELECT ?var" for "?var" -> a value would need |
| // AS-ification. |
| for (Var v : opProject.getVars()) { |
| if (varsEquality.contains(v)) |
| return false; |
| } |
| } |
| return safeToTransform(varsEquality, opMod.getSubOp()); |
| } |
| |
| if (op instanceof OpGroup) { |
| OpGroup opGroup = (OpGroup) op; |
| VarExprList varExprList = opGroup.getGroupVars(); |
| return safeToTransform(varsEquality, varExprList) && safeToTransform(varsEquality, opGroup.getSubOp()); |
| } |
| |
| if (op instanceof OpTable) { |
| OpTable opTable = (OpTable) op; |
| if (opTable.isJoinIdentity()) |
| return true; |
| } |
| |
| // Op1 - OpGroup |
| // Op1 - OpOrder |
| // Op1 - OpAssign, OpExtend |
| // Op1 - OpFilter - done. |
| // Op1 - OpLabel - easy |
| // Op1 - OpService - no. |
| |
| return false; |
| } |
| |
| private static boolean safeToTransform(Collection<Var> varsEquality, VarExprList varsExprList) { |
| // If the named variable is used, unsafe to rewrite. |
| return Collections.disjoint(varsExprList.getVars(), varsEquality); |
| } |
| |
| // -- A special case |
| |
| private static boolean testSpecialCaseUnused(Op op, List<Pair<Var, NodeValue>> equalities, ExprList remaining) { |
| // If the op does not contain the var at all, for some equality |
| // then the filter expression will be "eval unbound" i.e. false. |
| // We can return empty table. |
| Set<Var> patternVars = OpVars.visibleVars(op); |
| for (Pair<Var, NodeValue> p : equalities) { |
| if (!patternVars.contains(p.getLeft())) |
| return true; |
| } |
| return false; |
| } |
| |
| // If a sequence of OPTIONALS, and nothing prior to the first, we end up |
| // with a unit table on the left side of a next of LeftJoin/conditionals. |
| |
| private static boolean testSpecialCase1(Op op, List<Pair<Var, NodeValue>> equalities, ExprList remaining) { |
| while (op instanceof OpConditional || op instanceof OpLeftJoin) { |
| Op2 opleftjoin2 = (Op2) op; |
| op = opleftjoin2.getLeft(); |
| } |
| return isUnitTable(op); |
| } |
| |
| private static List<Op> extractOptionals(Op op) { |
| List<Op> chain = new ArrayList<>(); |
| while (op instanceof OpConditional || op instanceof OpLeftJoin) { |
| Op2 opleftjoin2 = (Op2) op; |
| chain.add(opleftjoin2.getRight()); |
| op = opleftjoin2.getLeft(); |
| } |
| return chain; |
| } |
| |
| private static List<Op> processSpecialCase1(List<Op> ops, List<Pair<Var, NodeValue>> inequalities) { |
| List<Op> ops2 = new ArrayList<>(); |
| Collection<Var> vars = varsMentionedInInequalityFilters(inequalities); |
| |
| for (Op op : ops) { |
| Op op2 = op; |
| if (safeToTransform(vars, op)) { |
| op2 = processFilterWorker(op, inequalities); |
| } |
| ops2.add(op2); |
| } |
| return ops2; |
| } |
| |
| private static Op rebuild(Op2 subOp, List<Op> ops) { |
| Op chain = OpTable.unit(); |
| for (Op op : ops) { |
| chain = subOp.copy(chain, op); |
| } |
| return chain; |
| } |
| |
| private static boolean isUnitTable(Op op) { |
| if (op instanceof OpTable) { |
| if (((OpTable) op).isJoinIdentity()) |
| return true; |
| } |
| return false; |
| } |
| |
| private static Op processFilterWorker(Op op, List<Pair<Var, NodeValue>> inequalities) { |
| // Firstly find all the possible values for each variable |
| Map<Var, Set<NodeValue>> possibleValues = new HashMap<>(); |
| for (Pair<Var, NodeValue> inequalityTest : inequalities) { |
| if (!possibleValues.containsKey(inequalityTest.getLeft())) { |
| possibleValues.put(inequalityTest.getLeft(), new HashSet<NodeValue>()); |
| } |
| possibleValues.get(inequalityTest.getLeft()).add(inequalityTest.getRight()); |
| } |
| |
| // Then combine them into all possible rows to be eliminated |
| Table table = buildTable(possibleValues); |
| |
| // Then apply the MINUS |
| return OpMinus.create(op, OpTable.create(table)); |
| } |
| |
| private static Table buildTable(Map<Var, Set<NodeValue>> possibleValues) { |
| if (possibleValues.size() == 0) |
| return TableFactory.createEmpty(); |
| Table table = TableFactory.create(); |
| |
| // Although each filter condition must apply for a row to be accepted |
| // they are actually independent since only one condition needs to fail |
| // for the filter to reject the row. Thus for each unique variable/value |
| // combination a single row must be produced |
| for (Var v : possibleValues.keySet()) { |
| for (NodeValue value : possibleValues.get(v)) { |
| BindingMap b = BindingFactory.create(); |
| b.add(v, value.asNode()); |
| table.addBinding(b); |
| } |
| } |
| return table; |
| } |
| } |