blob: d74d2575cb5ef99b40a5db445b281c2014fee0f9 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.jena.sparql.algebra.optimize;
import java.util.List;
import org.apache.jena.query.ARQ ;
import org.apache.jena.query.SortCondition ;
import org.apache.jena.sparql.algebra.Op ;
import org.apache.jena.sparql.algebra.TransformCopy ;
import org.apache.jena.sparql.algebra.op.OpDistinct ;
import org.apache.jena.sparql.algebra.op.OpOrder ;
import org.apache.jena.sparql.algebra.op.OpProject ;
import org.apache.jena.sparql.algebra.op.OpReduced ;
import org.apache.jena.sparql.core.Var ;
/**
* <p>
* Improved optimization for {@code ORDER BY} plus {@code DISTINCT} or
* {@code REDUCED} combinations, see JENA-441 for original proposal and
* discussion.
* </p>
* <p>
* This optimization is enabled by default as with most ARQ optimizations and
* may be disabled by setting the symbol
* {@link ARQ#optOrderByDistinctApplication} to false.
* </p>
* <h3>Optimization Applicability</h3>
* <p>
* This is a limited optimization that applies in the case where you have a
* query that meets the following conditions:
* </p>
* <ul>
* <li>Uses both {@code ORDER BY} and {@code DISTINCT} or {@code REDUCED} on the
* same level of the query</li>
* <li>There is a fixed list of variables to project i.e. not {@code SELECT *}</li>
* <li>{@code ORDER BY} conditions only use variables present in the project
* list</li>
* </ul>
* <p>
* Essentially this takes algebras of the following form:
* </p>
*
* <pre>
* (distinct
* (project (?var)
* (order (?var)
* ... )))
* </pre>
* <p>
* And produces algebra of the following form:
* </p>
*
* <pre>
* (order (?var)
* (distinct
* (project (?var)
* ... )))
* </pre>
* <p>
* In the general case this in unsafe because it would change the semantics of
* the query since {@code ORDER BY} can access variables that are not projected.
* However if the conditions outlined are met then this optimization is safe,
* the algebras will be semantically equivalent and the resulting form likely
* significantly more performant, of course YMMV depending on how much data you
* are querying.
* </p>
*/
public class TransformOrderByDistinctApplication extends TransformCopy {
@Override
public Op transform(OpDistinct opDistinct, Op subOp) {
if (subOp instanceof OpProject) {
OpProject project = (OpProject) subOp;
// At the project stage everything is a simple variable
// Inner operation must be an ORDER BY
if (project.getSubOp() instanceof OpOrder) {
List<Var> projectVars = project.getVars();
OpOrder order = (OpOrder) project.getSubOp();
// Everything we wish to order by must only use variables that
// appear in the project list
boolean ok = true;
for (SortCondition condition : order.getConditions()) {
if (!isValidSortCondition(condition, projectVars)) {
ok = false;
break;
}
}
// Everything checks out so we can make the change
if (ok) {
OpProject newProject = new OpProject(order.getSubOp(), project.getVars());
OpDistinct newDistinct = new OpDistinct(newProject);
return new OpOrder(newDistinct, order.getConditions());
}
}
} else {
if ( subOp instanceof OpOrder) {
// (distinct * (order ...)) ==> (order (distinct ...))
OpOrder order = (OpOrder)subOp;
OpDistinct newDistinct = new OpDistinct(order.getSubOp());
return new OpOrder(newDistinct, order.getConditions());
}
}
// If we reach here then this transform is not applicable
return super.transform(opDistinct, subOp);
}
@Override
public Op transform(OpReduced opReduced, Op subOp) {
if (subOp instanceof OpProject) {
OpProject project = (OpProject) subOp;
// At the project stage everything is a simple variable
// Inner operation must be an ORDER BY
if (project.getSubOp() instanceof OpOrder) {
List<Var> projectVars = project.getVars();
OpOrder order = (OpOrder) project.getSubOp();
// Everything we wish to order by must only use variables that
// appear in the project list
boolean ok = true;
for (SortCondition condition : order.getConditions()) {
if (!isValidSortCondition(condition, projectVars)) {
ok = false;
break;
}
}
// Everything checks out so we can make the change
if (ok) {
OpProject newProject = new OpProject(order.getSubOp(), project.getVars());
Op newReduced = OpReduced.create(newProject);
return new OpOrder(newReduced, order.getConditions());
}
}
}
// If we reach here then this transform is not applicable
return super.transform(opReduced, subOp);
}
/**
* Determines whether a sort condition is valid in terms of this optimizer
*
* @param cond
* Sort Condition
* @param projectVars
* Project Variables
* @return True if valid, false otherwise
*/
private boolean isValidSortCondition(SortCondition cond, List<Var> projectVars) {
if (cond.getExpression().isVariable()) {
return projectVars.contains(cond.getExpression().asVar());
} else {
for (Var v : cond.getExpression().getVarsMentioned()) {
if (!projectVars.contains(v))
return false;
}
return true;
}
}
}