| /* |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| package org.apache.jena.sparql.algebra.optimize; |
| |
| import java.util.Collection ; |
| import java.util.HashSet; |
| import java.util.Set; |
| |
| import org.apache.jena.query.SortCondition ; |
| import org.apache.jena.sparql.algebra.Op ; |
| import org.apache.jena.sparql.algebra.OpVars ; |
| import org.apache.jena.sparql.algebra.TransformCopy ; |
| import org.apache.jena.sparql.algebra.op.OpDistinct ; |
| import org.apache.jena.sparql.algebra.op.OpOrder ; |
| import org.apache.jena.sparql.algebra.op.OpProject ; |
| import org.apache.jena.sparql.algebra.op.OpReduced ; |
| import org.apache.jena.sparql.core.Var ; |
| |
| /** |
| * <p> |
| * Transforms generic {@code DISTINCT} plus {@code ORDER BY} combinations to |
| * {@code REDUCED} plus {@code ORDER BY} which typically gives better |
| * performance and memory consumption because engines have to keep less data |
| * in-memory to evaluate it. |
| * </p> |
| * <p> |
| * As with most optimizations this is only applied when it is safe to do so. The |
| * criteria for being safe to do so are as follows: |
| * </p> |
| * <ul> |
| * <li>Uses both {@code ORDER BY} and {@code DISTINCT} on the same level of the |
| * query</li> |
| * <li>There is a fixed list of variables to project i.e. not {@code SELECT *}</li> |
| * <li>{@code ORDER BY} conditions cover all the projected variables prior to |
| * the use of any other variables</li> |
| * </ul> |
| * <h3>Related Optimizations</h3> |
| * <p> |
| * See also {@link TransformOrderByDistinctApplication} which is a better |
| * optimization for these kinds of queries but only applies to a more limited range |
| * of queries. Where possible that optimization is applied in preference to this |
| * one. |
| * </p> |
| * <p> |
| * {@link TransformTopN} covers the case of {@code DISTINCT} plus |
| * {@code ORDER BY} where there is also a {@code LIMIT}. Where possible that |
| * optimization is applied in preference to either this or |
| * {@link TransformOrderByDistinctApplication}. |
| * </p> |
| * |
| */ |
| public class TransformDistinctToReduced extends TransformCopy { |
| |
| public TransformDistinctToReduced() {} |
| |
| // Best is this is after TransformTopN but they are order independent |
| // TopN of "reduced or distinct of order" is handled. |
| //@Override |
| public Op transform1(OpDistinct opDistinct, Op subOp) { |
| if (subOp instanceof OpProject) { |
| OpProject opProject = (OpProject) subOp; |
| if (opProject.getSubOp() instanceof OpOrder) { |
| OpOrder opOrder = (OpOrder) opProject.getSubOp(); |
| Set<Var> projectVars = new HashSet<>(opProject.getVars()) ; |
| if (isSafe(projectVars, opOrder)) { |
| return OpReduced.create(subOp); |
| } |
| } |
| } |
| return super.transform(opDistinct, subOp); |
| } |
| |
| |
| @Override |
| public Op transform(OpDistinct opDistinct, Op subOp) { |
| |
| OpOrder opOrder = null ; |
| Set<Var> projectVars = null ; |
| /* SELECT DISTINCT * {} ORDER BY |
| * giving an alegbra expression of the form: |
| * (distinct |
| * (order |
| */ |
| if (subOp instanceof OpOrder) { |
| opOrder = (OpOrder) subOp; |
| projectVars = OpVars.visibleVars(subOp) ; |
| } else if (subOp instanceof OpProject) { |
| OpProject opProject = (OpProject) subOp; |
| if (opProject.getSubOp() instanceof OpOrder) { |
| projectVars = new HashSet<>(opProject.getVars()) ; |
| opOrder = (OpOrder) opProject.getSubOp(); |
| } |
| } |
| |
| if ( projectVars == null ) |
| return super.transform(opDistinct, subOp) ; |
| |
| if (isSafe(projectVars, opOrder)) |
| return OpReduced.create(subOp); |
| |
| return super.transform(opDistinct, subOp); |
| } |
| |
| protected boolean isSafe(Set<Var> projectVars, OpOrder opOrder) { |
| Set<Var> seenVars = new HashSet<>(); |
| |
| // For the optimization to be safe all project variables must appear in |
| // the ordering prior to any unprojected variables |
| // Ordering by expressions is fine provided they use only projected |
| // variables |
| boolean ok = true; |
| for (SortCondition cond : opOrder.getConditions()) { |
| if (!isValidSortCondition(cond, projectVars, seenVars)) { |
| ok = false; |
| break; |
| } |
| // As soon as we've seen all variables we know this is safe and any |
| // further sort conditions are irrelevant |
| if (seenVars.size() == projectVars.size()) |
| return true ; |
| } |
| // The projects vars must all have been seen. |
| return (seenVars.size() == projectVars.size()) ; |
| } |
| |
| /** |
| * Determines whether a sort condition is valid in terms of this optimizer |
| * |
| * @param cond |
| * Sort Condition |
| * @param projectVars |
| * Project Variables |
| * @return True if valid, false otherwise |
| */ |
| private boolean isValidSortCondition(SortCondition cond, Collection<Var> projectVars, Set<Var> seenVars) { |
| if (cond.getExpression().isVariable()) { |
| if (projectVars.contains(cond.getExpression().asVar())) { |
| seenVars.add(cond.getExpression().asVar()); |
| return true; |
| } |
| return false; |
| } else { |
| for (Var v : cond.getExpression().getVarsMentioned()) { |
| if (!projectVars.contains(v)) |
| return false; |
| seenVars.add(v); |
| } |
| return true; |
| } |
| } |
| } |