| /* |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| package org.apache.jena.sparql.algebra.optimize; |
| |
| import java.util.List ; |
| import java.util.Set ; |
| |
| import org.apache.jena.query.ARQ ; |
| import org.apache.jena.query.Query ; |
| import org.apache.jena.query.SortCondition ; |
| import org.apache.jena.sparql.algebra.Op ; |
| import org.apache.jena.sparql.algebra.TransformCopy ; |
| import org.apache.jena.sparql.algebra.op.* ; |
| import org.apache.jena.sparql.core.Var ; |
| import org.apache.jena.sparql.expr.ExprVars ; |
| import org.apache.jena.sparql.util.Symbol ; |
| |
| |
| /** |
| * Optimization that changes queries that uses <tt>OFFSET/LIMIT</tt> and <tt>ORDER BY</tt> |
| * to execute using <tt>Top N</tt>: i.e. while executing, keep only the top N items seen. |
| * This avoids full sort of the whole results, saving space and time. |
| */ |
| public class TransformTopN extends TransformCopy { |
| |
| // This must be less than ARQ.spillToDiskThreshold. |
| // Otherwise DISTINCT ends up reordering. |
| private static final int defaultTopNSortingThreshold = 1000; |
| /** @deprecated Use {ARQ.topNSortingThreshold} */ |
| public static final Symbol externalSortBufferSize = ARQ.topNSortingThreshold; |
| |
| /* For reference: from the algebra generation of a query, the order of operations is: |
| * Limit/Offset |
| * Distinct/reduce |
| * Project |
| * OrderBy |
| * Values |
| * Having |
| * Select Expressions |
| * Group |
| * but note that a subquery can be used to create other orders. |
| */ |
| |
| @Override |
| public Op transform(final OpSlice opSlice, final Op inSubOp) { |
| /* |
| * This looks for all the following cases of slice with optionally |
| * distinct and/or project follow by order. It is quicker to execute |
| * by avoiding the full sort, just track the top items. |
| * |
| * + slice-order => topN |
| * + slice-distinct|reduced-order => top-distinct |
| * + slice-project-order => project-top |
| * + slice distinct project order => topN distinct project (only some cases) |
| * |
| * If the slice has an offset, a (slice X _) is added. (slice 0 _) is a no-op and it not added. |
| * |
| * In detail: |
| * |
| * Case 1: |
| * (slice X N |
| * (order (cond) PATTERN) ) |
| * ==> |
| * (slice X _ |
| * (top (X+N cond) PATTERN) ) |
| * |
| * Case 2: |
| * (slice X N |
| * (distinct or reduced |
| * (order (cond) PATTERN) )) |
| * ==> |
| * (slice X _ |
| * (top (X+N cond) (distinct PATTERN)) |
| * |
| * Case 3: |
| * (slice X N |
| * (project (vars) |
| * (order (cond) |
| * PATTERN )))) |
| * ==> |
| * (slice X _ |
| * (project (vars) |
| * (top (X+N cond) PATTERN) )) |
| * |
| * Case 4: |
| * (slice X N |
| * (distinct |
| * (project (vars) |
| * (order (cond) PATTERN) ))) |
| * ==> |
| * If project-order can be swapped, |
| * (slice X N |
| * (top (X+N) (cond) |
| * (distinct |
| * (project (vars) |
| * PATTERN) ))) |
| * |
| * Care needed: because of the need to keep distinct, we can't |
| * process like case 3. Reversing (order) and (project) can only |
| * be done if the projection variables include all variables used |
| * by the sort conditions. |
| * |
| * When there is no project, we can push the distinct under the topN, |
| * but, when there is, the sort variables may include one projected away, |
| * it's not possible to do this with project. The key is that |
| * distinct-project can change the number of rows in ways that mean |
| * we can not predict the topN slice. |
| */ |
| |
| /* Algorthm: |
| * Test to see if the slice is small enough. |
| * Extract distinct/reduce, and projection details. |
| * Is it an (order)? If no - not applicable. |
| * |
| * If slice-project-order |
| * Treat as project-slice-order |
| * Output project-top |
| * If slice-distinct-project-order |
| * Test to see if project and order can swap |
| * If they can, output top-distinct-project |
| * else no action. |
| * |
| * Add a slice if there was an OFFSET. |
| * Distinct and reduce are treated as distinct. |
| */ |
| |
| Op subOp = inSubOp ; |
| |
| if ( opSlice.getLength() == Query.NOLIMIT ) |
| return doNothing(opSlice, inSubOp) ; |
| long limit = opSlice.getLength() ; |
| long offset = ( opSlice.getStart() != Query.NOLIMIT ) ? opSlice.getStart() : 0L ; |
| long N = limit+offset ; |
| |
| int threshold = defaultTopNSortingThreshold; |
| Number x = (Number)(ARQ.getContext().get(ARQ.topNSortingThreshold)) ; |
| if ( x != null ) |
| threshold = x.intValue(); |
| |
| if ( N >= threshold ) |
| return doNothing(opSlice, inSubOp) ; |
| |
| boolean distinct = false ; |
| boolean reduce = false ; |
| // Extract any distinct/reduce. |
| if ( subOp instanceof OpDistinct ) { |
| distinct = true ; |
| subOp = ((Op1)subOp).getSubOp() ; |
| } else if ( subOp instanceof OpReduced ) { |
| distinct = true ; |
| subOp = ((Op1)subOp).getSubOp() ; |
| } |
| |
| // Extract any projection. |
| List<Var> projection = null ; |
| if ( subOp instanceof OpProject ) { |
| OpProject opProject = (OpProject)subOp ; |
| projection = opProject.getVars() ; |
| subOp = opProject.getSubOp() ; |
| } |
| |
| if ( ! ( subOp instanceof OpOrder ) ) |
| return doNothing(opSlice, inSubOp) ; |
| // We have found an (order) |
| OpOrder opOrder = (OpOrder)subOp ; |
| subOp = opOrder.getSubOp() ; |
| |
| // Check safety for the distinct/reduce case |
| |
| if ( (reduce || distinct) && projection != null ) { |
| List<SortCondition> sortConditions = opOrder.getConditions() ; |
| Set<Var> orderVars = ExprVars.getVarsMentioned(sortConditions) ; |
| |
| // Slice and distinct interact. |
| |
| // Is the ordering stable with respect to the project? |
| // i.e. can we swap them and put a (top) for slice-order |
| // All project vars must be in the order so we can have: |
| // slice-distinct-project-order => top-distinct-project |
| |
| // If the projection is narrower than the order it is not safe. |
| |
| if ( ! projection.containsAll(orderVars) ) |
| return doNothing(opSlice, inSubOp) ; |
| } |
| |
| Op newOp = subOp ; |
| |
| if ( ( reduce || distinct ) && projection != null ) |
| newOp = new OpProject(newOp, projection) ; |
| |
| if ( distinct ) |
| newOp = OpDistinct.create(newOp) ; |
| if ( reduce ) |
| newOp = OpReduced.create(newOp) ; |
| |
| newOp = new OpTopN( newOp, (int)N, opOrder.getConditions() ) ; |
| |
| if ( ! reduce && ! distinct && projection != null ) |
| newOp = new OpProject(newOp, projection) ; |
| |
| if ( opSlice.getStart() > 0 ) |
| newOp = new OpSlice(newOp, opSlice.getStart(), Query.NOLIMIT) ; |
| |
| return newOp ; |
| } |
| |
| /** Marker to indicate that this transform is not applied at this point */ |
| private Op doNothing(OpSlice opSlice, Op subOp) { |
| return super.transform(opSlice, subOp) ; |
| } |
| } |