| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| package org.apache.lucene.spatial.prefix; |
| |
| import java.io.IOException; |
| import java.util.Iterator; |
| |
| import org.locationtech.spatial4j.shape.Shape; |
| import org.locationtech.spatial4j.shape.SpatialRelation; |
| import org.apache.lucene.index.LeafReaderContext; |
| import org.apache.lucene.index.TermsEnum; |
| import org.apache.lucene.search.DocIdSet; |
| import org.apache.lucene.spatial.prefix.tree.Cell; |
| import org.apache.lucene.spatial.prefix.tree.CellIterator; |
| import org.apache.lucene.spatial.prefix.tree.SpatialPrefixTree; |
| import org.apache.lucene.util.BytesRef; |
| |
| /** |
| * Traverses a {@link SpatialPrefixTree} indexed field, using the template and |
| * visitor design patterns for subclasses to guide the traversal and collect |
| * matching documents. |
| * <p> |
| * Subclasses implement {@link #getDocIdSet(org.apache.lucene.index.LeafReaderContext)} |
| * by instantiating a custom {@link VisitorTemplate} subclass (i.e. an anonymous inner class) |
| * and implement the required methods. |
| * |
| * @lucene.internal |
| */ |
| public abstract class AbstractVisitingPrefixTreeQuery extends AbstractPrefixTreeQuery { |
| |
| //Historical note: this code resulted from a refactoring of RecursivePrefixTreeQuery, |
| // which in turn came out of SOLR-2155 |
| |
| //This class perhaps could have been implemented in terms of FilteredTermsEnum & MultiTermQuery. |
| // Maybe so for simple Intersects predicate but not for when we want to collect terms |
| // differently depending on cell state like IsWithin and for fuzzy/accurate collection planned improvements. At |
| // least it would just make things more complicated. |
| |
| protected final int prefixGridScanLevel;//at least one less than grid.getMaxLevels() |
| |
| public AbstractVisitingPrefixTreeQuery(Shape queryShape, String fieldName, SpatialPrefixTree grid, |
| int detailLevel, int prefixGridScanLevel) { |
| super(queryShape, fieldName, grid, detailLevel); |
| this.prefixGridScanLevel = Math.max(0, Math.min(prefixGridScanLevel, grid.getMaxLevels() - 1)); |
| assert detailLevel <= grid.getMaxLevels(); |
| } |
| |
| /** |
| * An abstract class designed to make it easy to implement predicates or |
| * other operations on a {@link SpatialPrefixTree} indexed field. An instance |
| * of this class is not designed to be re-used across LeafReaderContext |
| * instances so simply create a new one per-leaf. |
| * The {@link #getDocIdSet()} method here starts the work. It first checks |
| * that there are indexed terms; if not it quickly returns null. Then it calls |
| * {@link #start()} so a subclass can set up a return value, like an |
| * {@link org.apache.lucene.util.FixedBitSet}. Then it starts the traversal |
| * process, calling {@link #findSubCellsToVisit(org.apache.lucene.spatial.prefix.tree.Cell)} |
| * which by default finds the top cells that intersect {@code queryShape}. If |
| * there isn't an indexed cell for a corresponding cell returned for this |
| * method then it's short-circuited until it finds one, at which point |
| * {@link #visitPrefix(org.apache.lucene.spatial.prefix.tree.Cell)} is called. At |
| * some depths, of the tree, the algorithm switches to a scanning mode that |
| * calls {@link #visitScanned(org.apache.lucene.spatial.prefix.tree.Cell)} |
| * for each leaf cell found. |
| * |
| * @lucene.internal |
| */ |
| public abstract class VisitorTemplate extends BaseTermsEnumTraverser { |
| |
| /* Future potential optimizations: |
| |
| * Can a polygon query shape be optimized / made-simpler at recursive depths |
| (e.g. intersection of shape + cell box) |
| |
| * RE "scan" vs divide & conquer performance decision: |
| We should use termsEnum.docFreq() as an estimate on the number of places at |
| this depth. It would be nice if termsEnum knew how many terms |
| start with the current term without having to repeatedly next() & test to find out. |
| |
| * Perhaps don't do intermediate seek()'s to cells above detailLevel that have Intersects |
| relation because we won't be collecting those docs any way. However seeking |
| does act as a short-circuit. So maybe do some percent of the time or when the level |
| is above some threshold. |
| |
| */ |
| |
| // |
| // TODO MAJOR REFACTOR SIMPLIFICATION BASED ON TreeCellIterator TODO |
| // |
| |
| private VNode curVNode;//current pointer, derived from query shape |
| private BytesRef curVNodeTerm = new BytesRef();//curVNode.cell's term, without leaf. in main loop only |
| |
| private BytesRef thisTerm;//the result of termsEnum.term() |
| private Cell indexedCell;//Cell wrapper of thisTerm. Always updated when thisTerm is. |
| |
| public VisitorTemplate(LeafReaderContext context) throws IOException { |
| super(context); |
| } |
| |
| public DocIdSet getDocIdSet() throws IOException { |
| assert curVNode == null : "Called more than once?"; |
| if (termsEnum == null) |
| return null; |
| if (!nextTerm()) {//advances |
| return null; |
| } |
| |
| curVNode = new VNode(null); |
| curVNode.reset(grid.getWorldCell()); |
| |
| start(); |
| |
| addIntersectingChildren(); |
| |
| main: while (thisTerm != null) {//terminates for other reasons too! |
| |
| //Advance curVNode pointer |
| if (curVNode.children != null) { |
| //-- HAVE CHILDREN: DESCEND |
| assert curVNode.children.hasNext();//if we put it there then it has something |
| preSiblings(curVNode); |
| curVNode = curVNode.children.next(); |
| } else { |
| //-- NO CHILDREN: ADVANCE TO NEXT SIBLING |
| VNode parentVNode = curVNode.parent; |
| while (true) { |
| if (parentVNode == null) |
| break main; // all done |
| if (parentVNode.children.hasNext()) { |
| //advance next sibling |
| curVNode = parentVNode.children.next(); |
| break; |
| } else { |
| //reached end of siblings; pop up |
| postSiblings(parentVNode); |
| parentVNode.children = null;//GC |
| parentVNode = parentVNode.parent; |
| } |
| } |
| } |
| |
| //Seek to curVNode's cell (or skip if termsEnum has moved beyond) |
| final int compare = indexedCell.compareToNoLeaf(curVNode.cell); |
| if (compare > 0) { |
| // The indexed cell is after; continue loop to next query cell |
| continue; |
| } |
| if (compare < 0) { |
| // The indexed cell is before; seek ahead to query cell: |
| // Seek ! |
| curVNode.cell.getTokenBytesNoLeaf(curVNodeTerm); |
| TermsEnum.SeekStatus seekStatus = termsEnum.seekCeil(curVNodeTerm); |
| if (seekStatus == TermsEnum.SeekStatus.END) |
| break; // all done |
| thisTerm = termsEnum.term(); |
| indexedCell = grid.readCell(thisTerm, indexedCell); |
| if (seekStatus == TermsEnum.SeekStatus.NOT_FOUND) { |
| // Did we find a leaf of the cell we were looking for or something after? |
| if (!indexedCell.isLeaf() || indexedCell.compareToNoLeaf(curVNode.cell) != 0) |
| continue; // The indexed cell is after; continue loop to next query cell |
| } |
| } |
| // indexedCell == queryCell (disregarding leaf). |
| |
| // If indexedCell is a leaf then there's no prefix (prefix sorts before) -- just visit and continue |
| if (indexedCell.isLeaf()) { |
| visitLeaf(indexedCell);//TODO or query cell? Though shouldn't matter. |
| if (!nextTerm()) break; |
| continue; |
| } |
| // If a prefix (non-leaf) then visit; see if we descend. |
| final boolean descend = visitPrefix(curVNode.cell);//need to use curVNode.cell not indexedCell |
| if (!nextTerm()) break; |
| // Check for adjacent leaf with the same prefix |
| if (indexedCell.isLeaf() && indexedCell.getLevel() == curVNode.cell.getLevel()) { |
| visitLeaf(indexedCell);//TODO or query cell? Though shouldn't matter. |
| if (!nextTerm()) break; |
| } |
| |
| |
| if (descend) { |
| addIntersectingChildren(); |
| } |
| |
| }//main loop |
| |
| return finish(); |
| } |
| |
| /** Called initially, and whenever {@link #visitPrefix(org.apache.lucene.spatial.prefix.tree.Cell)} |
| * returns true. */ |
| private void addIntersectingChildren() throws IOException { |
| assert thisTerm != null; |
| Cell cell = curVNode.cell; |
| if (cell.getLevel() >= detailLevel) |
| throw new IllegalStateException("Spatial logic error"); |
| |
| //Decide whether to continue to divide & conquer, or whether it's time to |
| // scan through terms beneath this cell. |
| // Scanning is a performance optimization trade-off. |
| |
| //TODO use termsEnum.docFreq() as heuristic |
| boolean scan = cell.getLevel() >= prefixGridScanLevel;//simple heuristic |
| |
| if (!scan) { |
| //Divide & conquer (ultimately termsEnum.seek()) |
| |
| Iterator<Cell> subCellsIter = findSubCellsToVisit(cell); |
| if (!subCellsIter.hasNext())//not expected |
| return; |
| curVNode.children = new VNodeCellIterator(subCellsIter, new VNode(curVNode)); |
| |
| } else { |
| //Scan (loop of termsEnum.next()) |
| |
| scan(detailLevel); |
| } |
| } |
| |
| /** |
| * Called when doing a divide and conquer to find the next intersecting cells |
| * of the query shape that are beneath {@code cell}. {@code cell} is |
| * guaranteed to have an intersection and thus this must return some number |
| * of nodes. |
| */ |
| protected CellIterator findSubCellsToVisit(Cell cell) { |
| return cell.getNextLevelCells(queryShape); |
| } |
| |
| /** |
| * Scans ({@code termsEnum.next()}) terms until a term is found that does |
| * not start with curVNode's cell. If it finds a leaf cell or a cell at |
| * level {@code scanDetailLevel} then it calls {@link |
| * #visitScanned(org.apache.lucene.spatial.prefix.tree.Cell)}. |
| */ |
| protected void scan(int scanDetailLevel) throws IOException { |
| //note: this can be a do-while instead in 6x; 5x has a back-compat with redundant leaves -- LUCENE-4942 |
| while (curVNode.cell.isPrefixOf(indexedCell)) { |
| if (indexedCell.getLevel() == scanDetailLevel |
| || (indexedCell.getLevel() < scanDetailLevel && indexedCell.isLeaf())) { |
| visitScanned(indexedCell); |
| } |
| //advance |
| if (!nextTerm()) break; |
| } |
| } |
| |
| private boolean nextTerm() throws IOException { |
| if ((thisTerm = termsEnum.next()) == null) |
| return false; |
| indexedCell = grid.readCell(thisTerm, indexedCell); |
| return true; |
| } |
| |
| /** Used for {@link VNode#children}. */ |
| private class VNodeCellIterator implements Iterator<VNode> { |
| |
| final Iterator<Cell> cellIter; |
| private final VNode vNode; |
| |
| VNodeCellIterator(Iterator<Cell> cellIter, VNode vNode) { |
| this.cellIter = cellIter; |
| this.vNode = vNode; |
| } |
| |
| @Override |
| public boolean hasNext() { |
| return cellIter.hasNext(); |
| } |
| |
| @Override |
| public VNode next() { |
| assert hasNext(); |
| vNode.reset(cellIter.next()); |
| return vNode; |
| } |
| |
| @Override |
| public void remove() {//it always removes |
| } |
| } |
| |
| /** Called first to setup things. */ |
| protected abstract void start() throws IOException; |
| |
| /** Called last to return the result. */ |
| protected abstract DocIdSet finish() throws IOException; |
| |
| /** |
| * Visit an indexed non-leaf cell. The presence of a prefix cell implies |
| * there are leaf cells at further levels. The cell passed should have it's |
| * {@link org.apache.lucene.spatial.prefix.tree.Cell#getShapeRel()} set |
| * relative to the filtered shape. |
| * |
| * @param cell An intersecting cell; not a leaf. |
| * @return true to descend to more levels. |
| */ |
| protected abstract boolean visitPrefix(Cell cell) throws IOException; |
| |
| /** |
| * Called when an indexed leaf cell is found. An |
| * indexed leaf cell usually means associated documents won't be found at |
| * further detail levels. However, if a document has |
| * multiple overlapping shapes at different resolutions, then this isn't true. |
| */ |
| protected abstract void visitLeaf(Cell cell) throws IOException; |
| |
| /** |
| * The cell is either indexed as a leaf or is the last level of detail. It |
| * might not even intersect the query shape, so be sure to check for that. |
| * The default implementation will check that and if passes then call |
| * {@link #visitLeaf(org.apache.lucene.spatial.prefix.tree.Cell)} or |
| * {@link #visitPrefix(org.apache.lucene.spatial.prefix.tree.Cell)}. |
| */ |
| protected void visitScanned(Cell cell) throws IOException { |
| final SpatialRelation relate = cell.getShape().relate(queryShape); |
| if (relate.intersects()) { |
| cell.setShapeRel(relate);//just being pedantic |
| if (cell.isLeaf()) { |
| visitLeaf(cell); |
| } else { |
| visitPrefix(cell); |
| } |
| } |
| } |
| |
| protected void preSiblings(VNode vNode) throws IOException { |
| } |
| |
| protected void postSiblings(VNode vNode) throws IOException { |
| } |
| }//class VisitorTemplate |
| |
| /** |
| * A visitor node/cell found via the query shape for {@link VisitorTemplate}. |
| * Sometimes these are reset(cell). It's like a LinkedList node but forms a |
| * tree. |
| * |
| * @lucene.internal |
| */ |
| protected static class VNode { |
| //Note: The VNode tree adds more code to debug/maintain v.s. a flattened |
| // LinkedList that we used to have. There is more opportunity here for |
| // custom behavior (see preSiblings & postSiblings) but that's not |
| // leveraged yet. Maybe this is slightly more GC friendly. |
| |
| final VNode parent;//only null at the root |
| Iterator<VNode> children;//null, then sometimes set, then null |
| Cell cell;//not null (except initially before reset()) |
| |
| /** |
| * call reset(cell) after to set the cell. |
| */ |
| VNode(VNode parent) { // remember to call reset(cell) after |
| this.parent = parent; |
| } |
| |
| void reset(Cell cell) { |
| assert cell != null; |
| this.cell = cell; |
| assert children == null; |
| } |
| |
| } |
| } |