Yet more fixes and test cases for assignment inlining (JENA-780)
- Don't inline out of DISTINCT/REDUCED
- Ensure we don't break projections when editing out inlined assignments
that were projected
- Ensure that editing projections only edits the outermost projection
diff --git a/jena-arq/src/main/java/com/hp/hpl/jena/sparql/algebra/optimize/TransformEliminateAssignments.java b/jena-arq/src/main/java/com/hp/hpl/jena/sparql/algebra/optimize/TransformEliminateAssignments.java
index d36f036..ee2b91d 100644
--- a/jena-arq/src/main/java/com/hp/hpl/jena/sparql/algebra/optimize/TransformEliminateAssignments.java
+++ b/jena-arq/src/main/java/com/hp/hpl/jena/sparql/algebra/optimize/TransformEliminateAssignments.java
@@ -37,6 +37,7 @@
import com.hp.hpl.jena.sparql.algebra.Transform;
import com.hp.hpl.jena.sparql.algebra.TransformCopy;
import com.hp.hpl.jena.sparql.algebra.Transformer;
+import com.hp.hpl.jena.sparql.algebra.op.OpDistinct;
import com.hp.hpl.jena.sparql.algebra.op.OpExt;
import com.hp.hpl.jena.sparql.algebra.op.OpExtend;
import com.hp.hpl.jena.sparql.algebra.op.OpFilter;
@@ -46,6 +47,7 @@
import com.hp.hpl.jena.sparql.algebra.op.OpMinus;
import com.hp.hpl.jena.sparql.algebra.op.OpOrder;
import com.hp.hpl.jena.sparql.algebra.op.OpProject;
+import com.hp.hpl.jena.sparql.algebra.op.OpReduced;
import com.hp.hpl.jena.sparql.algebra.op.OpTopN;
import com.hp.hpl.jena.sparql.algebra.op.OpUnion;
import com.hp.hpl.jena.sparql.core.Var;
@@ -71,10 +73,11 @@
* </p>
* <ol>
* <li>Assignments where the assigned variable is used only once in a subsequent
- * assignment can be in-lined</li>
+ * expression can be in-lined</li>
* <li>Assignments where the assigned value is never used elsewhere can be
* eliminated</li>
* </ol>
+ * <h3>Eligibility for In-lining</h3>
* <p>
* Both of these changes can only happen inside of projections as otherwise we
* have to assume that the user may need the resulting variable and thus we
@@ -84,19 +87,45 @@
* expression is deterministic is defined by {@link ExprLib#isStable(Expr)}.
* </p>
* <p>
+ * In-lining must also respect variable scope, it is possible with a nested
+ * query to have an assignment in-lined out through a projection that projects
+ * it provided that the projection is appropriately modified.
+ * </p>
+ * <p>
+ * There are also various other conditions on assignments that might be eligible
+ * for in-lining:
+ * </p>
+ * <ul>
+ * <li>They cannot occur inside a n-ary operator (e.g. join, {@code UNION},
+ * {@code OPTIONAL} etc.) because then in-lining would change semantics because
+ * an expression that previously was only valid for part of the query might
+ * become valid for a larger part of the query</li>
+ * <li>They cannot be in-lined into an {@code EXISTS} or {@code NOT EXISTS} in a
+ * filter</li>
+ * <li>They cannot be in-lined out of a {@code DISTINCT} or {@code REDUCED}
+ * because the assignment would be relevant for the purposes of distinctness</li>
+ * </ul>
+ * <p>
+ * Please see <a
+ * href="https://issues.apache.org/jira/browse/JENA-780">JENA-780</a> for more
+ * information on this.
+ * </p>
+ * <h3>In-lining Application</h3>
+ * <p>
* Assignments may be in-lined in the following places:
* </p>
* <ul>
- * <li>Filter Expressions</li>
- * <li>Bind and Select Expressions</li>
- * <li>Order By Expressions if aggressive in-lining is enabled or the assigned
- * expression is a constant</li>
+ * <li>{@code FILTER} Expressions</li>
+ * <li>{@code BIND} and Project Expressions</li>
+ * <li>{@code ORDER BY} Expressions if aggressive in-lining is enabled or the
+ * assigned expression is a constant</li>
* </ul>
* <p>
- * In the case of order by we only in-line assignments when aggressive mode is
- * set as the realities of order by are that expressions may be recomputed
- * multiple times and so in-lining may actually hurt performance in those cases
- * unless the expression to be in-lined is itself a constant.
+ * In the case of {@code ORDER BY} we only in-line assignments when aggressive
+ * mode is set unless the assignment is a constant value. This is because during
+ * order by evaluation expressions may be recomputed multiple times and so
+ * in-lining may actually hurt performance in those cases unless the expression
+ * to be in-lined is itself a constant.
* </p>
*/
public class TransformEliminateAssignments extends TransformCopy {
@@ -567,6 +596,20 @@
unsafe();
}
+ @Override
+ public void visit(OpDistinct opDistinct) {
+ // Inlining out through the distinct might change the results of the
+ // distinct
+ unsafe();
+ }
+
+ @Override
+ public void visit(OpReduced opReduced) {
+ // Inlining out through the reduced might change the results of the
+ // reduced
+ unsafe();
+ }
+
private void unsafe() {
// Throw out any assignments because if they would be eligible their
// values can't be bound in every branch of the union and thus
diff --git a/jena-arq/src/main/java/com/hp/hpl/jena/sparql/algebra/optimize/TransformRemoveAssignment.java b/jena-arq/src/main/java/com/hp/hpl/jena/sparql/algebra/optimize/TransformRemoveAssignment.java
index da56116..ceddcff 100644
--- a/jena-arq/src/main/java/com/hp/hpl/jena/sparql/algebra/optimize/TransformRemoveAssignment.java
+++ b/jena-arq/src/main/java/com/hp/hpl/jena/sparql/algebra/optimize/TransformRemoveAssignment.java
@@ -18,6 +18,7 @@
package com.hp.hpl.jena.sparql.algebra.optimize;
+import java.util.ArrayList;
import java.util.List;
import com.hp.hpl.jena.sparql.algebra.Op;
@@ -39,6 +40,7 @@
private Var var;
private Expr expr;
private boolean topmostOnly = true;
+ private boolean aboveExtend = false;
public TransformRemoveAssignment(Var var, Expr expr, boolean topmostOnly) {
this.var = var;
@@ -88,6 +90,9 @@
modified.add(v, orig.getExpr(v));
}
}
+ if (modified.size() > 0 && modified.size() == orig.size())
+ return null;
+
return modified;
}
@@ -96,6 +101,8 @@
VarExprList assignments = processAssignments(opExtend);
if (assignments == null)
return super.transform(opExtend, subOp);
+
+ this.aboveExtend = true;
// Rewrite appropriately
if (this.topmostOnly) {
@@ -114,14 +121,23 @@
return subOp;
}
}
+
}
public Op transform(OpProject opProject, Op subOp) {
if (!opProject.getVars().contains(this.var))
return super.transform(opProject, subOp);
- List<Var> newVars = opProject.getVars();
+ List<Var> newVars = new ArrayList<Var>(opProject.getVars());
newVars.remove(this.var);
- return new OpProject(subOp, newVars);
+ if (this.topmostOnly) {
+ if (this.aboveExtend) {
+ return new OpProject(subOp, newVars);
+ } else {
+ return opProject;
+ }
+ } else {
+ return new OpProject(subOp, newVars);
+ }
}
}
diff --git a/jena-arq/src/test/java/com/hp/hpl/jena/sparql/algebra/optimize/TestTransformEliminateAssignments.java b/jena-arq/src/test/java/com/hp/hpl/jena/sparql/algebra/optimize/TestTransformEliminateAssignments.java
index 52c6cf3..e8e97be 100644
--- a/jena-arq/src/test/java/com/hp/hpl/jena/sparql/algebra/optimize/TestTransformEliminateAssignments.java
+++ b/jena-arq/src/test/java/com/hp/hpl/jena/sparql/algebra/optimize/TestTransformEliminateAssignments.java
@@ -478,6 +478,30 @@
}
@Test
+ public void ineligible_12() {
+ // Can't inline through a distinct
+ //@formatter:off
+ testNoChange("(project (?y)",
+ " (filter (exprlist ?x)",
+ " (distinct",
+ " (extend (?x true)",
+ " (bgp (triple ?s ?p ?o))))))");
+ //@formatter:on
+ }
+
+ @Test
+ public void ineligible_13() {
+ // Can't inline through a reduced
+ //@formatter:off
+ testNoChange("(project (?y)",
+ " (filter (exprlist ?x)",
+ " (reduced",
+ " (extend (?x true)",
+ " (bgp (triple ?s ?p ?o))))))");
+ //@formatter:on
+ }
+
+ @Test
public void exists_01() {
// We can't inline into an EXISTS since the assignment isn't projected
// out anyway and its an n-ary operator so would change semantics
@@ -587,6 +611,18 @@
}
@Test
+ public void through_project_04() {
+ // Can't inline because the value is projected out
+ //@formatter:off
+ testNoChange("(project (?y)",
+ " (project (?x ?y)",
+ " (filter (exprlist ?x)",
+ " (extend (?x true)",
+ " (bgp (triple ?y <urn:pred> <urn:obj>))))))");
+ //@formatter:on
+ }
+
+ @Test
public void no_merge_01() {
// We should not merge extends
//@formatter:off
@@ -646,4 +682,62 @@
" (table unit))))");
//@formatter:on
}
+
+ @Test
+ public void scope_04() {
+ // Only the topmost assignment should be inlined since any deeper
+ // assignments are technically different variables
+ //@formatter:off
+ test(StrUtils.strjoinNL("(project (?y)",
+ " (filter (exprlist ?x)",
+ " (extend (?x true)",
+ " (project (?z)",
+ " (filter (exprlist (|| ?x (! ?x)))",
+ " (extend (?x false)",
+ " (table unit)))))))"),
+ "(project (?y)",
+ " (filter (exprlist true)",
+ " (project (?z)",
+ " (filter (exprlist (|| ?x (! ?x)))",
+ " (extend (?x false)",
+ " (table unit))))))");
+ //@formatter:on
+ }
+
+ @Test
+ public void scope_05() {
+ // Technically invalid query but validates that the inner projection of
+ // ?x blocks later in-lining
+ //@formatter:off
+ testNoChange("(project (?y)",
+ " (filter (exprlist ?x)",
+ " (extend (?x true)",
+ " (project (?x)",
+ " (filter (exprlist (|| ?x (! ?x)))",
+ " (extend (?x false)",
+ " (table unit)))))))");
+ //@formatter:off
+ }
+
+ @Test
+ public void scope_06() {
+ // In-lining in the outer scope should not change the inner scope
+ //@formatter:off
+ test(StrUtils.strjoinNL("(project (?y)",
+ " (filter (exprlist ?x)",
+ " (extend (?x true)",
+ " (project (?z)",
+ " (project (?x)",
+ " (filter (exprlist (|| ?x (! ?x)))",
+ " (extend (?x false)",
+ " (table unit))))))))"),
+ "(project (?y)",
+ " (filter (exprlist true)",
+ " (project (?z)",
+ " (project (?x)",
+ " (filter (exprlist (|| ?x (! ?x)))",
+ " (extend (?x false)",
+ " (table unit)))))))");
+ //@formatter:off
+ }
}