| Index: lucene/spatial/src/java/org/apache/lucene/spatial/serialized/SerializedDVStrategy.java |
| IDEA additional info: |
| Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP |
| <+>UTF-8 |
| =================================================================== |
| --- lucene/spatial/src/java/org/apache/lucene/spatial/serialized/SerializedDVStrategy.java (revision 9e87821edeb3e24ca8dedaecf856f6510d61d0d3) |
| +++ lucene/spatial/src/java/org/apache/lucene/spatial/serialized/SerializedDVStrategy.java (revision ) |
| @@ -50,7 +50,7 @@ |
| |
| /** |
| * A SpatialStrategy based on serializing a Shape stored into BinaryDocValues. |
| - * This is not at all fast; it's designed to be used in conjuction with another index based |
| + * This is not at all fast; it's designed to be used in conjunction with another index based |
| * SpatialStrategy that is approximated (like {@link org.apache.lucene.spatial.prefix.RecursivePrefixTreeStrategy}) |
| * to add precision or eventually make more specific / advanced calculations on the per-document |
| * geometry. |
| Index: lucene/spatial/src/java/org/apache/lucene/spatial/prefix/CellTokenStream.java |
| IDEA additional info: |
| Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP |
| <+>UTF-8 |
| =================================================================== |
| --- lucene/spatial/src/java/org/apache/lucene/spatial/prefix/CellTokenStream.java (revision 9e87821edeb3e24ca8dedaecf856f6510d61d0d3) |
| +++ lucene/spatial/src/java/org/apache/lucene/spatial/prefix/CellTokenStream.java (revision ) |
| @@ -111,7 +111,7 @@ |
| if (omitLeafByte) |
| cell.getTokenBytesNoLeaf(bytes); |
| else |
| - cell.getTokenBytes(bytes); |
| + cell.getTokenBytesWithLeaf(bytes); |
| return bytes.hashCode(); |
| } |
| |
| Index: lucene/spatial/src/java/org/apache/lucene/spatial/prefix/PointPrefixTreeFieldCacheProvider.java |
| IDEA additional info: |
| Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP |
| <+>UTF-8 |
| =================================================================== |
| --- lucene/spatial/src/java/org/apache/lucene/spatial/prefix/PointPrefixTreeFieldCacheProvider.java (revision 9e87821edeb3e24ca8dedaecf856f6510d61d0d3) |
| +++ lucene/spatial/src/java/org/apache/lucene/spatial/prefix/PointPrefixTreeFieldCacheProvider.java (revision ) |
| @@ -24,28 +24,25 @@ |
| import org.apache.lucene.util.BytesRef; |
| |
| /** |
| - * Implementation of {@link ShapeFieldCacheProvider} designed for {@link PrefixTreeStrategy}s. |
| + * Implementation of {@link ShapeFieldCacheProvider} designed for {@link PrefixTreeStrategy}s that index points. |
| * |
| - * Note, due to the fragmented representation of Shapes in these Strategies, this implementation |
| - * can only retrieve the central {@link Point} of the original Shapes. |
| - * |
| * @lucene.internal |
| */ |
| public class PointPrefixTreeFieldCacheProvider extends ShapeFieldCacheProvider<Point> { |
| |
| - final SpatialPrefixTree grid; // |
| + private final SpatialPrefixTree grid; |
| + private Cell scanCell; |
| |
| public PointPrefixTreeFieldCacheProvider(SpatialPrefixTree grid, String shapeField, int defaultSize) { |
| super( shapeField, defaultSize ); |
| this.grid = grid; |
| + this.scanCell = grid.getWorldCell();//re-used in readShape to save GC |
| } |
| |
| - private Cell scanCell = null;//re-used in readShape to save GC |
| - |
| @Override |
| protected Point readShape(BytesRef term) { |
| - scanCell = grid.getCell(term.bytes, term.offset, term.length, scanCell); |
| - if (scanCell.getLevel() == grid.getMaxLevels() && !scanCell.isLeaf()) |
| + scanCell.readCell(term); |
| + if (scanCell.getLevel() == grid.getMaxLevels() && !scanCell.isLeaf())//points are never flagged as leaf |
| return scanCell.getShape().getCenter(); |
| return null; |
| } |
| Index: lucene/spatial/src/java/org/apache/lucene/spatial/prefix/RecursivePrefixTreeStrategy.java |
| IDEA additional info: |
| Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP |
| <+>UTF-8 |
| =================================================================== |
| --- lucene/spatial/src/java/org/apache/lucene/spatial/prefix/RecursivePrefixTreeStrategy.java (revision 9e87821edeb3e24ca8dedaecf856f6510d61d0d3) |
| +++ lucene/spatial/src/java/org/apache/lucene/spatial/prefix/RecursivePrefixTreeStrategy.java (revision ) |
| @@ -17,14 +17,21 @@ |
| * limitations under the License. |
| */ |
| |
| +import com.spatial4j.core.shape.Point; |
| import com.spatial4j.core.shape.Shape; |
| import org.apache.lucene.search.Filter; |
| import org.apache.lucene.spatial.DisjointSpatialFilter; |
| +import org.apache.lucene.spatial.prefix.tree.Cell; |
| +import org.apache.lucene.spatial.prefix.tree.CellIterator; |
| +import org.apache.lucene.spatial.prefix.tree.LegacyCell; |
| import org.apache.lucene.spatial.prefix.tree.SpatialPrefixTree; |
| import org.apache.lucene.spatial.query.SpatialArgs; |
| import org.apache.lucene.spatial.query.SpatialOperation; |
| import org.apache.lucene.spatial.query.UnsupportedSpatialOperation; |
| |
| +import java.util.ArrayList; |
| +import java.util.List; |
| + |
| /** |
| * A {@link PrefixTreeStrategy} which uses {@link AbstractVisitingPrefixTreeFilter}. |
| * This strategy has support for searching non-point shapes (note: not tested). |
| @@ -35,19 +42,26 @@ |
| * @lucene.experimental |
| */ |
| public class RecursivePrefixTreeStrategy extends PrefixTreeStrategy { |
| + /* Future potential optimizations: |
| |
| - private int prefixGridScanLevel; |
| + Each shape.relate(otherShape) result could be cached since much of the same relations will be invoked when |
| + multiple segments are involved. Do this for "complex" shapes, not cheap ones, and don't cache when disjoint to |
| + bbox because it's a cheap calc. This is one advantage TermQueryPrefixTreeStrategy has over RPT. |
| |
| - /** True if only indexed points shall be supported. See |
| - * {@link IntersectsPrefixTreeFilter#hasIndexedLeaves}. */ |
| - protected boolean pointsOnly = false; |
| + */ |
| |
| - /** See {@link ContainsPrefixTreeFilter#multiOverlappingIndexedShapes}. */ |
| + protected int prefixGridScanLevel; |
| + |
| + //Formerly known as simplifyIndexedCells. Eventually will be removed. Only compatible with RPT |
| + // and a LegacyPrefixTree. |
| + protected boolean pruneLeafyBranches = true; |
| + |
| + protected boolean pointsOnly = false;//if true, there are no leaves |
| + |
| protected boolean multiOverlappingIndexedShapes = true; |
| |
| public RecursivePrefixTreeStrategy(SpatialPrefixTree grid, String fieldName) { |
| - super(grid, fieldName, |
| - true);//simplify indexed cells |
| + super(grid, fieldName); |
| prefixGridScanLevel = grid.getMaxLevels() - 4;//TODO this default constant is dependent on the prefix grid size |
| } |
| |
| @@ -63,9 +77,88 @@ |
| this.prefixGridScanLevel = prefixGridScanLevel; |
| } |
| |
| + /** True if only indexed points shall be supported. There are no "leafs" in such a case. See |
| + * {@link IntersectsPrefixTreeFilter#hasIndexedLeaves}. */ |
| + public void setPointsOnly(boolean pointsOnly) { |
| + this.pointsOnly = pointsOnly; |
| + } |
| + |
| + /** See {@link ContainsPrefixTreeFilter#multiOverlappingIndexedShapes}. */ |
| + public void setMultiOverlappingIndexedShapes(boolean multiOverlappingIndexedShapes) { |
| + this.multiOverlappingIndexedShapes = multiOverlappingIndexedShapes; |
| + } |
| + |
| + /** An optional hint affecting non-point shapes: it will |
| + * simplify/aggregate sets of complete leaves in a cell to its parent, resulting in ~20-25% |
| + * fewer indexed cells. However, it will likely be removed in the future. (default=true) |
| + */ |
| + public void setPruneLeafyBranches(boolean pruneLeafyBranches) { |
| + this.pruneLeafyBranches = pruneLeafyBranches; |
| + } |
| + |
| @Override |
| public String toString() { |
| - return getClass().getSimpleName()+"(prefixGridScanLevel:"+prefixGridScanLevel+",SPG:("+ grid +"))"; |
| + StringBuilder str = new StringBuilder(getClass().getSimpleName()).append('('); |
| + str.append("SPG:(").append(grid.toString()).append(')'); |
| + if (pointsOnly) |
| + str.append(",pointsOnly"); |
| + if (pruneLeafyBranches) |
| + str.append(",pruneLeafyBranches"); |
| + if (prefixGridScanLevel != grid.getMaxLevels() - 4) |
| + str.append(",prefixGridScanLevel:").append(""+prefixGridScanLevel); |
| + if (!multiOverlappingIndexedShapes) |
| + str.append(",!multiOverlappingIndexedShapes"); |
| + return str.append(')').toString(); |
| + } |
| + |
| + @Override |
| + protected CellTokenStream createTokenStream(Shape shape, int detailLevel) { |
| + if (shape instanceof Point || !pruneLeafyBranches) |
| + return super.createTokenStream(shape, detailLevel); |
| + |
| + List<Cell> cells = new ArrayList<>(4096); |
| + recursiveTraverseAndPrune(grid.getWorldCell(), shape, detailLevel, cells); |
| + return new CellTokenStream().setCells(cells.iterator()); |
| + } |
| + |
| + /** Returns true if cell was added as a leaf. If it wasn't it recursively descends. */ |
| + private boolean recursiveTraverseAndPrune(Cell cell, Shape shape, int detailLevel, List<Cell> result) { |
| + // Important: this logic assumes Cells don't share anything with other cells when |
| + // calling cell.getNextLevelCells(). This is only true for LegacyCell. |
| + if (!(cell instanceof LegacyCell)) |
| + throw new IllegalStateException("pruneLeafyBranches must be disabled for use with grid "+grid); |
| + |
| + if (cell.getLevel() == detailLevel) { |
| + cell.setLeaf();//FYI might already be a leaf |
| + } |
| + if (cell.isLeaf()) { |
| + result.add(cell); |
| + return true; |
| + } |
| + if (cell.getLevel() != 0) |
| + result.add(cell); |
| + |
| + int leaves = 0; |
| + CellIterator subCells = cell.getNextLevelCells(shape); |
| + while (subCells.hasNext()) { |
| + Cell subCell = subCells.next(); |
| + if (recursiveTraverseAndPrune(subCell, shape, detailLevel, result)) |
| + leaves++; |
| + } |
| + //can we prune? |
| + if (leaves == ((LegacyCell)cell).getSubCellsSize() && cell.getLevel() != 0) { |
| + //Optimization: substitute the parent as a leaf instead of adding all |
| + // children as leaves |
| + |
| + //remove the leaves |
| + do { |
| + result.remove(result.size() - 1);//remove last |
| + } while (--leaves > 0); |
| + //add cell as the leaf |
| + cell.setLeaf(); |
| + return true; |
| + } |
| + return false; |
| } |
| |
| @Override |
| Index: lucene/spatial/src/test/org/apache/lucene/spatial/prefix/SpatialOpRecursivePrefixTreeTest.java |
| IDEA additional info: |
| Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP |
| <+>UTF-8 |
| =================================================================== |
| --- lucene/spatial/src/test/org/apache/lucene/spatial/prefix/SpatialOpRecursivePrefixTreeTest.java (revision 9e87821edeb3e24ca8dedaecf856f6510d61d0d3) |
| +++ lucene/spatial/src/test/org/apache/lucene/spatial/prefix/SpatialOpRecursivePrefixTreeTest.java (revision ) |
| @@ -33,6 +33,7 @@ |
| import org.apache.lucene.search.Query; |
| import org.apache.lucene.spatial.StrategyTestCase; |
| import org.apache.lucene.spatial.prefix.tree.Cell; |
| +import org.apache.lucene.spatial.prefix.tree.CellIterator; |
| import org.apache.lucene.spatial.prefix.tree.GeohashPrefixTree; |
| import org.apache.lucene.spatial.prefix.tree.QuadPrefixTree; |
| import org.apache.lucene.spatial.prefix.tree.SpatialPrefixTree; |
| @@ -63,7 +64,7 @@ |
| |
| public class SpatialOpRecursivePrefixTreeTest extends StrategyTestCase { |
| |
| - static final int ITERATIONS = 1;//Test Iterations |
| + static final int ITERATIONS = 10; |
| |
| private SpatialPrefixTree grid; |
| |
| @@ -80,6 +81,8 @@ |
| setupGeohashGrid(maxLevels); |
| //((PrefixTreeStrategy) strategy).setDistErrPct(0);//fully precise to grid |
| |
| + ((RecursivePrefixTreeStrategy)strategy).setPruneLeafyBranches(randomBoolean()); |
| + |
| System.out.println("Strategy: " + strategy.toString()); |
| } |
| |
| @@ -378,11 +381,14 @@ |
| double distErrPct = ((PrefixTreeStrategy) strategy).getDistErrPct(); |
| double distErr = SpatialArgs.calcDistanceFromErrPct(snapMe, distErrPct, ctx); |
| int detailLevel = grid.getLevelForDistance(distErr); |
| - List<Cell> cells = grid.getCells(snapMe, detailLevel, false, true); |
| + CellIterator cells = grid.getTreeCellIterator(snapMe, detailLevel); |
| |
| //calc bounding box of cells. |
| - List<Shape> cellShapes = new ArrayList<>(cells.size()); |
| - for (Cell cell : cells) { |
| + List<Shape> cellShapes = new ArrayList<>(1024); |
| + while (cells.hasNext()) { |
| + Cell cell = cells.next(); |
| + if (!cell.isLeaf()) |
| + continue; |
| cellShapes.add(cell.getShape()); |
| } |
| return new ShapeCollection<>(cellShapes, ctx).getBoundingBox(); |
| Index: lucene/spatial/src/java/org/apache/lucene/spatial/prefix/tree/GeohashPrefixTree.java |
| IDEA additional info: |
| Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP |
| <+>UTF-8 |
| =================================================================== |
| --- lucene/spatial/src/java/org/apache/lucene/spatial/prefix/tree/GeohashPrefixTree.java (revision 9e87821edeb3e24ca8dedaecf856f6510d61d0d3) |
| +++ lucene/spatial/src/java/org/apache/lucene/spatial/prefix/tree/GeohashPrefixTree.java (revision ) |
| @@ -22,12 +22,12 @@ |
| import com.spatial4j.core.shape.Point; |
| import com.spatial4j.core.shape.Rectangle; |
| import com.spatial4j.core.shape.Shape; |
| +import org.apache.lucene.util.BytesRef; |
| |
| import java.util.ArrayList; |
| import java.util.Collection; |
| import java.util.List; |
| |
| - |
| /** |
| * A {@link SpatialPrefixTree} based on |
| * <a href="http://en.wikipedia.org/wiki/Geohash">Geohashes</a>. |
| @@ -35,7 +35,7 @@ |
| * |
| * @lucene.experimental |
| */ |
| -public class GeohashPrefixTree extends SpatialPrefixTree { |
| +public class GeohashPrefixTree extends LegacyPrefixTree { |
| |
| /** |
| * Factory for creating {@link GeohashPrefixTree} instances with useful defaults |
| @@ -71,6 +71,11 @@ |
| } |
| |
| @Override |
| + public Cell getWorldCell() { |
| + return new GhCell(BytesRef.EMPTY_BYTES, 0, 0); |
| + } |
| + |
| + @Override |
| public int getLevelForDistance(double dist) { |
| if (dist == 0) |
| return maxLevels;//short circuit |
| @@ -79,15 +84,10 @@ |
| } |
| |
| @Override |
| - public Cell getCell(Point p, int level) { |
| + protected Cell getCell(Point p, int level) { |
| return new GhCell(GeohashUtils.encodeLatLon(p.getY(), p.getX(), level));//args are lat,lon (y,x) |
| } |
| |
| - @Override |
| - public Cell getCell(byte[] bytes, int offset, int len) { |
| - return new GhCell(bytes, offset, len); |
| - } |
| - |
| private static byte[] stringToBytesPlus1(String token) { |
| //copy ASCII token to byte array with one extra spot for eventual LEAF_BYTE if needed |
| byte[] bytes = new byte[token.length() + 1]; |
| @@ -97,9 +97,8 @@ |
| return bytes; |
| } |
| |
| - class GhCell extends Cell { |
| + private class GhCell extends LegacyCell { |
| |
| - private Shape shape;//cache |
| private String geohash;//cache; never has leaf byte, simply a geohash |
| |
| GhCell(String geohash) { |
| @@ -114,13 +113,12 @@ |
| } |
| |
| @Override |
| - protected SpatialPrefixTree getGrid() { return GeohashPrefixTree.this; } |
| + protected GeohashPrefixTree getGrid() { return GeohashPrefixTree.this; } |
| |
| @Override |
| - public void reset(byte[] bytes, int off, int len) { |
| - super.reset(bytes, off, len); |
| + public void readCell(BytesRef bytesRef) { |
| + super.readCell(bytesRef); |
| geohash = null; |
| - shape = null; |
| } |
| |
| @Override |
| @@ -139,8 +137,8 @@ |
| } |
| |
| @Override |
| - public Cell getSubCell(Point p) { |
| - return getGrid().getCell(p, getLevel() + 1);//not performant! |
| + protected GhCell getSubCell(Point p) { |
| + return (GhCell) getGrid().getCell(p, getLevel() + 1);//not performant! |
| } |
| |
| @Override |
| @@ -149,11 +147,6 @@ |
| shape = GeohashUtils.decodeBoundary(getGeohash(), getGrid().getSpatialContext()); |
| } |
| return shape; |
| - } |
| - |
| - @Override |
| - public Point getCenter() { |
| - return GeohashUtils.decode(getGeohash(), getGrid().getSpatialContext()); |
| } |
| |
| private String getGeohash() { |
| Index: lucene/spatial/src/java/org/apache/lucene/spatial/prefix/tree/LegacyCell.java |
| IDEA additional info: |
| Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP |
| <+>UTF-8 |
| =================================================================== |
| --- lucene/spatial/src/java/org/apache/lucene/spatial/prefix/tree/LegacyCell.java (revision ) |
| +++ lucene/spatial/src/java/org/apache/lucene/spatial/prefix/tree/LegacyCell.java (revision ) |
| @@ -0,0 +1,252 @@ |
| +package org.apache.lucene.spatial.prefix.tree; |
| + |
| +/* |
| + * Licensed to the Apache Software Foundation (ASF) under one or more |
| + * contributor license agreements. See the NOTICE file distributed with |
| + * this work for additional information regarding copyright ownership. |
| + * The ASF licenses this file to You under the Apache License, Version 2.0 |
| + * (the "License"); you may not use this file except in compliance with |
| + * the License. You may obtain a copy of the License at |
| + * |
| + * http://www.apache.org/licenses/LICENSE-2.0 |
| + * |
| + * Unless required by applicable law or agreed to in writing, software |
| + * distributed under the License is distributed on an "AS IS" BASIS, |
| + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| + * See the License for the specific language governing permissions and |
| + * limitations under the License. |
| + */ |
| + |
| +import com.spatial4j.core.shape.Point; |
| +import com.spatial4j.core.shape.Shape; |
| +import com.spatial4j.core.shape.SpatialRelation; |
| +import org.apache.lucene.util.BytesRef; |
| +import org.apache.lucene.util.StringHelper; |
| + |
| +import java.util.Collection; |
| + |
| +/** The base for the original two SPT's: Geohash & Quad. Don't subclass this for new SPTs. |
| + * @lucene.internal */ |
| +//public for RPT pruneLeafyBranches code |
| +public abstract class LegacyCell implements Cell { |
| + |
| + // Important: A LegacyCell doesn't share state for getNextLevelCells(), and |
| + // LegacySpatialPrefixTree assumes this in its simplify tree logic. |
| + |
| + private static final byte LEAF_BYTE = '+';//NOTE: must sort before letters & numbers |
| + |
| + //Arguably we could simply use a BytesRef, using an extra Object. |
| + private byte[] bytes;//generally bigger to potentially hold a leaf |
| + private int b_off; |
| + private int b_len;//doesn't reflect leaf; same as getLevel() |
| + |
| + protected boolean isLeaf; |
| + |
| + /** |
| + * When set via getSubCells(filter), it is the relationship between this cell |
| + * and the given shape filter. Doesn't participate in shape equality. |
| + */ |
| + protected SpatialRelation shapeRel; |
| + |
| + protected Shape shape;//cached |
| + |
| + /** Warning: Refers to the same bytes (no copy). If {@link #setLeaf()} is subsequently called then it |
| + * may modify bytes. */ |
| + protected LegacyCell(byte[] bytes, int off, int len) { |
| + this.bytes = bytes; |
| + this.b_off = off; |
| + this.b_len = len; |
| + readLeafAdjust(); |
| + } |
| + |
| + @Override |
| + public void readCell(BytesRef bytes) { |
| + shapeRel = null; |
| + shape = null; |
| + this.bytes = bytes.bytes; |
| + this.b_off = bytes.offset; |
| + this.b_len = bytes.length; |
| + readLeafAdjust(); |
| + } |
| + |
| + private void readLeafAdjust() { |
| + isLeaf = (b_len > 0 && bytes[b_off + b_len - 1] == LEAF_BYTE); |
| + if (isLeaf) |
| + b_len--; |
| + } |
| + |
| +// @Override |
| +// public void copyFrom(Cell source) { |
| +// LegacyCell src = (LegacyCell) source; |
| +// shapeRel = src.shapeRel; |
| +// shape = src.shape; |
| +// isLeaf = src.isLeaf; |
| +// //we don't actually copy the bytes because in LegacyCell the bytes aren't modified. (leaf byte doesn't count) |
| +// bytes = src.bytes; |
| +// b_off = src.b_off; |
| +// b_len = src.b_len; |
| +// } |
| + |
| + protected abstract SpatialPrefixTree getGrid(); |
| + |
| + @Override |
| + public SpatialRelation getShapeRel() { |
| + return shapeRel; |
| + } |
| + |
| + @Override |
| + public void setShapeRel(SpatialRelation rel) { |
| + this.shapeRel = rel; |
| + } |
| + |
| + @Override |
| + public boolean isLeaf() { |
| + return isLeaf; |
| + } |
| + |
| + @Override |
| + public void setLeaf() { |
| + isLeaf = true; |
| + } |
| + |
| + @Override |
| + public BytesRef getTokenBytesWithLeaf(BytesRef result) { |
| + result = getTokenBytesNoLeaf(result); |
| + if (!isLeaf) |
| + return result; |
| + if (result.bytes.length < result.offset + result.length + 1) { |
| + assert false : "Not supposed to happen; performance bug"; |
| + byte[] copy = new byte[result.length + 1]; |
| + System.arraycopy(result.bytes, result.offset, copy, 0, result.length - 1); |
| + result.bytes = copy; |
| + result.offset = 0; |
| + } |
| + result.bytes[result.offset + result.length++] = LEAF_BYTE; |
| + return result; |
| + } |
| + |
| + @Override |
| + public BytesRef getTokenBytesNoLeaf(BytesRef result) { |
| + if (result == null) |
| + return new BytesRef(bytes, b_off, b_len); |
| + result.bytes = bytes; |
| + result.offset = b_off; |
| + result.length = b_len; |
| + return result; |
| + } |
| + |
| + @Override |
| + public int getLevel() { |
| + return b_len; |
| + } |
| + |
| + @Override |
| + public CellIterator getNextLevelCells(Shape shapeFilter) { |
| + assert getLevel() < getGrid().getMaxLevels(); |
| + if (shapeFilter instanceof Point) { |
| + LegacyCell cell = getSubCell((Point) shapeFilter); |
| + cell.shapeRel = SpatialRelation.CONTAINS; |
| + return new SingletonCellIterator(cell); |
| + } else { |
| + return new FilterCellIterator(getSubCells().iterator(), shapeFilter); |
| + } |
| + } |
| + |
| + /** |
| + * Performant implementations are expected to implement this efficiently by |
| + * considering the current cell's boundary. |
| + * <p/> |
| + * Precondition: Never called when getLevel() == maxLevel. |
| + * Precondition: this.getShape().relate(p) != DISJOINT. |
| + */ |
| + protected abstract LegacyCell getSubCell(Point p); |
| + |
| + /** |
| + * Gets the cells at the next grid cell level that covers this cell. |
| + * Precondition: Never called when getLevel() == maxLevel. |
| + * |
| + * @return A set of cells (no dups), sorted, modifiable, not empty, not null. |
| + */ |
| + protected abstract Collection<Cell> getSubCells(); |
| + |
| + /** |
| + * {@link #getSubCells()}.size() -- usually a constant. Should be >=2 |
| + */ |
| + public abstract int getSubCellsSize(); |
| + |
| + @Override |
| + public boolean isPrefixOf(Cell c) { |
| + //Note: this only works when each level uses a whole number of bytes. |
| + LegacyCell cell = (LegacyCell)c; |
| + boolean result = sliceEquals(cell.bytes, cell.b_off, cell.b_len, bytes, b_off, b_len); |
| + assert result == StringHelper.startsWith(c.getTokenBytesNoLeaf(null), getTokenBytesNoLeaf(null)); |
| + return result; |
| + } |
| + |
| + /** Copied from {@link org.apache.lucene.util.StringHelper#startsWith(BytesRef, BytesRef)} |
| + * which calls this. This is to avoid creating a BytesRef. */ |
| + private static boolean sliceEquals(byte[] sliceToTest_bytes, int sliceToTest_offset, int sliceToTest_length, |
| + byte[] other_bytes, int other_offset, int other_length) { |
| + if (sliceToTest_length < other_length) { |
| + return false; |
| + } |
| + int i = sliceToTest_offset; |
| + int j = other_offset; |
| + final int k = other_offset + other_length; |
| + |
| + while (j < k) { |
| + if (sliceToTest_bytes[i++] != other_bytes[j++]) { |
| + return false; |
| + } |
| + } |
| + |
| + return true; |
| + } |
| + |
| + @Override |
| + public int compareToNoLeaf(Cell fromCell) { |
| + LegacyCell b = (LegacyCell) fromCell; |
| + return compare(bytes, b_off, b_len, b.bytes, b.b_off, b.b_len); |
| + } |
| + |
| + /** Copied from {@link BytesRef#compareTo(BytesRef)}. |
| + * This is to avoid creating a BytesRef. */ |
| + private static int compare(byte[] aBytes, int aUpto, int a_length, byte[] bBytes, int bUpto, int b_length) { |
| + final int aStop = aUpto + Math.min(a_length, b_length); |
| + while(aUpto < aStop) { |
| + int aByte = aBytes[aUpto++] & 0xff; |
| + int bByte = bBytes[bUpto++] & 0xff; |
| + |
| + int diff = aByte - bByte; |
| + if (diff != 0) { |
| + return diff; |
| + } |
| + } |
| + |
| + // One is a prefix of the other, or, they are equal: |
| + return a_length - b_length; |
| + } |
| + |
| + @Override |
| + public boolean equals(Object obj) { |
| + //this method isn't "normally" called; just in asserts/tests |
| + if (obj instanceof Cell) { |
| + Cell cell = (Cell) obj; |
| + return getTokenBytesWithLeaf(null).equals(cell.getTokenBytesWithLeaf(null)); |
| + } else { |
| + return false; |
| + } |
| + } |
| + |
| + @Override |
| + public int hashCode() { |
| + return getTokenBytesWithLeaf(null).hashCode(); |
| + } |
| + |
| + @Override |
| + public String toString() { |
| + //this method isn't "normally" called; just in asserts/tests |
| + return getTokenBytesWithLeaf(null).utf8ToString(); |
| + } |
| + |
| +} |
| Index: lucene/spatial/src/java/org/apache/lucene/spatial/prefix/tree/SpatialPrefixTree.java |
| IDEA additional info: |
| Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP |
| <+>UTF-8 |
| =================================================================== |
| --- lucene/spatial/src/java/org/apache/lucene/spatial/prefix/tree/SpatialPrefixTree.java (revision 9e87821edeb3e24ca8dedaecf856f6510d61d0d3) |
| +++ lucene/spatial/src/java/org/apache/lucene/spatial/prefix/tree/SpatialPrefixTree.java (revision ) |
| @@ -18,22 +18,12 @@ |
| */ |
| |
| import com.spatial4j.core.context.SpatialContext; |
| -import com.spatial4j.core.shape.Point; |
| -import com.spatial4j.core.shape.Rectangle; |
| import com.spatial4j.core.shape.Shape; |
| import org.apache.lucene.util.BytesRef; |
| |
| -import java.nio.charset.Charset; |
| -import java.nio.charset.StandardCharsets; |
| -import java.util.ArrayList; |
| -import java.util.Arrays; |
| -import java.util.Collection; |
| -import java.util.Collections; |
| -import java.util.List; |
| - |
| /** |
| * A spatial Prefix Tree, or Trie, which decomposes shapes into prefixed strings |
| - * at variable lengths corresponding to variable precision. Each string |
| + * at variable lengths corresponding to variable precision. Each string |
| * corresponds to a rectangular spatial region. This approach is |
| * also referred to "Grids", "Tiles", and "Spatial Tiers". |
| * <p/> |
| @@ -44,8 +34,6 @@ |
| */ |
| public abstract class SpatialPrefixTree { |
| |
| - protected static final Charset UTF8 = StandardCharsets.UTF_8; |
| - |
| protected final int maxLevels; |
| |
| protected final SpatialContext ctx; |
| @@ -83,155 +71,40 @@ |
| |
| /** |
| * Given a cell having the specified level, returns the distance from opposite |
| - * corners. Since this might very depending on where the cell is, this method |
| + * corners. Since this might vary depending on where the cell is, this method |
| * may over-estimate. |
| * |
| * @param level [1 to maxLevels] |
| * @return > 0 |
| */ |
| - public double getDistanceForLevel(int level) { |
| - if (level < 1 || level > getMaxLevels()) |
| - throw new IllegalArgumentException("Level must be in 1 to maxLevels range"); |
| - //TODO cache for each level |
| - Cell cell = getCell(ctx.getWorldBounds().getCenter(), level); |
| - Rectangle bbox = cell.getShape().getBoundingBox(); |
| - double width = bbox.getWidth(); |
| - double height = bbox.getHeight(); |
| - //Use standard cartesian hypotenuse. For geospatial, this answer is larger |
| - // than the correct one but it's okay to over-estimate. |
| - return Math.sqrt(width * width + height * height); |
| - } |
| + public abstract double getDistanceForLevel(int level); |
| |
| - private transient Cell worldCell;//cached |
| - |
| /** |
| - * Returns the level 0 cell which encompasses all spatial data. Equivalent to {@link #getCell(byte[], int, int)} with |
| - * no bytes. This cell is thread-safe, just like a spatial prefix grid is, although cells aren't |
| - * generally thread-safe. |
| + * Returns the level 0 cell which encompasses all spatial data. Equivalent to {@link Cell#readCell(BytesRef)} |
| + * with no bytes. |
| */ |
| - public Cell getWorldCell() {//another possible name: getTopCell |
| - if (worldCell == null) { |
| - worldCell = getCell(BytesRef.EMPTY_BYTES, 0, 0); |
| - worldCell.getShape();//lazy load; make thread-safe |
| - } |
| - return worldCell; |
| - } |
| + public abstract Cell getWorldCell(); //another possible name: getTopCell |
| |
| /** |
| - * The cell for the specified token. The empty string should be equal to {@link #getWorldCell()}. |
| - * Precondition: Never called when token length > maxLevel. |
| - */ |
| - public abstract Cell getCell(byte[] bytes, int offset, int len); |
| - |
| - public final Cell getCell(byte[] bytes, int offset, int len, Cell target) { |
| - if (target == null) { |
| - return getCell(bytes, offset, len); |
| - } |
| - |
| - target.reset(bytes, offset, len); |
| - return target; |
| - } |
| - |
| - /** |
| - * Returns the cell containing point {@code p} at the specified {@code level}. |
| - */ |
| - protected Cell getCell(Point p, int level) { |
| - return getCells(p, level, false).get(0); |
| - } |
| - |
| - /** |
| * Gets the intersecting cells for the specified shape, without exceeding |
| * detail level. If a cell is within the query shape then it's marked as a |
| - * leaf and none of its children are added. |
| - * <p/> |
| - * This implementation checks if shape is a Point and if so returns {@link |
| - * #getCells(com.spatial4j.core.shape.Point, int, boolean)}. |
| + * leaf and none of its children are added. For cells at detailLevel, they are marked as |
| + * leaves too, unless it's a point. |
| + * <p> |
| + * IMPORTANT: Cells returned from the iterator can be re-used for cells at the same level. So you can't simply |
| + * iterate to subsequent cells and still refer to the former cell nor the bytes returned from the former cell, unless |
| + * you know the former cell is a parent. |
| * |
| - * @param shape the shape; non-null |
| + * @param shape the shape; possibly null but the caller should liberally call |
| + * {@code remove()} if so. |
| * @param detailLevel the maximum detail level to get cells for |
| - * @param inclParents if true then all parent cells of leaves are returned |
| - * too. The top world cell is never returned. |
| - * @param simplify for non-point shapes, this will simply/aggregate sets of |
| - * complete leaves in a cell to its parent, resulting in |
| - * ~20-25% fewer cells. |
| - * @return a set of cells (no dups), sorted, immutable, non-null |
| + * @return the matching cells |
| */ |
| - public List<Cell> getCells(Shape shape, int detailLevel, boolean inclParents, |
| - boolean simplify) { |
| - //TODO consider an on-demand iterator -- it won't build up all cells in memory. |
| + public CellIterator getTreeCellIterator(Shape shape, int detailLevel) { |
| if (detailLevel > maxLevels) { |
| throw new IllegalArgumentException("detailLevel > maxLevels"); |
| } |
| - if (shape instanceof Point) { |
| - return getCells((Point) shape, detailLevel, inclParents); |
| - } |
| - List<Cell> cells = new ArrayList<>(inclParents ? 4096 : 2048); |
| - recursiveGetCells(getWorldCell(), shape, detailLevel, inclParents, simplify, cells); |
| - return cells; |
| - } |
| - |
| - /** |
| - * Returns true if cell was added as a leaf. If it wasn't it recursively |
| - * descends. |
| - */ |
| - private boolean recursiveGetCells(Cell cell, Shape shape, int detailLevel, |
| - boolean inclParents, boolean simplify, |
| - List<Cell> result) { |
| - if (cell.getLevel() == detailLevel) { |
| - cell.setLeaf();//FYI might already be a leaf |
| - } |
| - if (cell.isLeaf()) { |
| - result.add(cell); |
| - return true; |
| - } |
| - if (inclParents && cell.getLevel() != 0) |
| - result.add(cell); |
| - |
| - Collection<Cell> subCells = cell.getSubCells(shape); |
| - int leaves = 0; |
| - for (Cell subCell : subCells) { |
| - if (recursiveGetCells(subCell, shape, detailLevel, inclParents, simplify, result)) |
| - leaves++; |
| - } |
| - //can we simplify? |
| - if (simplify && leaves == cell.getSubCellsSize() && cell.getLevel() != 0) { |
| - //Optimization: substitute the parent as a leaf instead of adding all |
| - // children as leaves |
| - |
| - //remove the leaves |
| - do { |
| - result.remove(result.size() - 1);//remove last |
| - } while (--leaves > 0); |
| - //add cell as the leaf |
| - cell.setLeaf(); |
| - if (!inclParents) // otherwise it was already added up above |
| - result.add(cell); |
| - return true; |
| - } |
| - return false; |
| - } |
| - |
| - /** |
| - * A Point-optimized implementation of |
| - * {@link #getCells(com.spatial4j.core.shape.Shape, int, boolean, boolean)}. That |
| - * method in facts calls this for points. |
| - */ |
| - public List<Cell> getCells(Point p, int detailLevel, boolean inclParents) { |
| - Cell cell = getCell(p, detailLevel); |
| - assert !cell.isLeaf(); |
| - if (!inclParents || detailLevel == 1) { |
| - return Collections.singletonList(cell); |
| - } |
| - |
| - //fill in reverse order to be sorted |
| - Cell[] cells = new Cell[detailLevel]; |
| - for (int i = detailLevel-1; true; i--) { |
| - cells[i] = cell; |
| - if (i == 0) |
| - break; |
| - cell = cell.getParent(); |
| - } |
| - return Arrays.asList(cells); |
| + return new TreeCellIterator(shape, detailLevel, getWorldCell()); |
| } |
| |
| } |
| Index: lucene/spatial/src/java/org/apache/lucene/spatial/prefix/ContainsPrefixTreeFilter.java |
| IDEA additional info: |
| Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP |
| <+>UTF-8 |
| =================================================================== |
| --- lucene/spatial/src/java/org/apache/lucene/spatial/prefix/ContainsPrefixTreeFilter.java (revision 9e87821edeb3e24ca8dedaecf856f6510d61d0d3) |
| +++ lucene/spatial/src/java/org/apache/lucene/spatial/prefix/ContainsPrefixTreeFilter.java (revision ) |
| @@ -24,6 +24,7 @@ |
| import org.apache.lucene.search.DocIdSet; |
| import org.apache.lucene.search.DocIdSetIterator; |
| import org.apache.lucene.spatial.prefix.tree.Cell; |
| +import org.apache.lucene.spatial.prefix.tree.CellIterator; |
| import org.apache.lucene.spatial.prefix.tree.SpatialPrefixTree; |
| import org.apache.lucene.util.Bits; |
| import org.apache.lucene.util.BytesRef; |
| @@ -31,7 +32,6 @@ |
| |
| import java.io.IOException; |
| import java.util.Arrays; |
| -import java.util.Collection; |
| |
| /** |
| * Finds docs where its indexed shape {@link org.apache.lucene.spatial.query.SpatialOperation#Contains |
| @@ -84,7 +84,7 @@ |
| } |
| |
| BytesRef termBytes = new BytesRef();//no leaf |
| - Cell nextCell;//see getLeafDocs |
| + Cell nextCell = grid.getWorldCell();//see getLeafDocs |
| |
| /** This is the primary algorithm; recursive. Returns null if finds none. */ |
| private SmallDocSet visit(Cell cell, Bits acceptContains) throws IOException { |
| @@ -103,8 +103,9 @@ |
| subCellsFilter = null; |
| assert cell.getShape().relate(queryShape) == SpatialRelation.WITHIN; |
| } |
| - Collection <Cell> subCells = cell.getSubCells(subCellsFilter); |
| - for (Cell subCell : subCells) { |
| + CellIterator subCells = cell.getNextLevelCells(subCellsFilter); |
| + while (subCells.hasNext()) { |
| + Cell subCell = subCells.next(); |
| if (!seekExact(subCell)) |
| combinedSubResults = null; |
| else if (subCell.getLevel() == detailLevel) |
| @@ -131,9 +132,9 @@ |
| |
| private boolean seekExact(Cell cell) throws IOException { |
| assert cell.getTokenBytesNoLeaf(null).compareTo(termBytes) > 0; |
| - cell.getTokenBytesNoLeaf(termBytes); |
| if (termsEnum == null) |
| return false; |
| + termBytes = cell.getTokenBytesNoLeaf(termBytes); |
| return termsEnum.seekExact(termBytes); |
| } |
| |
| @@ -157,7 +158,8 @@ |
| termsEnum = null;//signals all done |
| return null; |
| } |
| - nextCell = grid.getCell(nextTerm.bytes, nextTerm.offset, nextTerm.length, nextCell); |
| + nextCell.readCell(nextTerm); |
| + assert leafCell.isPrefixOf(nextCell); |
| if (nextCell.getLevel() == leafCell.getLevel() && nextCell.isLeaf()) { |
| return collectDocs(acceptContains); |
| } else { |
| Index: lucene/spatial/src/java/org/apache/lucene/spatial/prefix/tree/LegacyPrefixTree.java |
| IDEA additional info: |
| Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP |
| <+>UTF-8 |
| =================================================================== |
| --- lucene/spatial/src/java/org/apache/lucene/spatial/prefix/tree/LegacyPrefixTree.java (revision ) |
| +++ lucene/spatial/src/java/org/apache/lucene/spatial/prefix/tree/LegacyPrefixTree.java (revision ) |
| @@ -0,0 +1,77 @@ |
| +package org.apache.lucene.spatial.prefix.tree; |
| + |
| +/* |
| + * Licensed to the Apache Software Foundation (ASF) under one or more |
| + * contributor license agreements. See the NOTICE file distributed with |
| + * this work for additional information regarding copyright ownership. |
| + * The ASF licenses this file to You under the Apache License, Version 2.0 |
| + * (the "License"); you may not use this file except in compliance with |
| + * the License. You may obtain a copy of the License at |
| + * |
| + * http://www.apache.org/licenses/LICENSE-2.0 |
| + * |
| + * Unless required by applicable law or agreed to in writing, software |
| + * distributed under the License is distributed on an "AS IS" BASIS, |
| + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| + * See the License for the specific language governing permissions and |
| + * limitations under the License. |
| + */ |
| + |
| +import com.spatial4j.core.context.SpatialContext; |
| +import com.spatial4j.core.shape.Point; |
| +import com.spatial4j.core.shape.Rectangle; |
| +import com.spatial4j.core.shape.Shape; |
| +import org.apache.lucene.util.BytesRef; |
| + |
| +import java.util.Arrays; |
| + |
| +/** The base for the original two SPT's: Geohash & Quad. Don't subclass this for new SPTs. |
| + * @lucene.internal */ |
| +abstract class LegacyPrefixTree extends SpatialPrefixTree { |
| + public LegacyPrefixTree(SpatialContext ctx, int maxLevels) { |
| + super(ctx, maxLevels); |
| + } |
| + |
| + public double getDistanceForLevel(int level) { |
| + if (level < 1 || level > getMaxLevels()) |
| + throw new IllegalArgumentException("Level must be in 1 to maxLevels range"); |
| + //TODO cache for each level |
| + Cell cell = getCell(ctx.getWorldBounds().getCenter(), level); |
| + Rectangle bbox = cell.getShape().getBoundingBox(); |
| + double width = bbox.getWidth(); |
| + double height = bbox.getHeight(); |
| + //Use standard cartesian hypotenuse. For geospatial, this answer is larger |
| + // than the correct one but it's okay to over-estimate. |
| + return Math.sqrt(width * width + height * height); |
| + } |
| + |
| + /** |
| + * Returns the cell containing point {@code p} at the specified {@code level}. |
| + */ |
| + protected abstract Cell getCell(Point p, int level); |
| + |
| + @Override |
| + public CellIterator getTreeCellIterator(Shape shape, int detailLevel) { |
| + if (!(shape instanceof Point)) |
| + return super.getTreeCellIterator(shape, detailLevel); |
| + |
| + //This specialization is here because the legacy implementations don't have a fast implementation of |
| + // cell.getSubCells(point). It's fastest here to encode the full bytes for detailLevel, and create |
| + // subcells from the bytesRef in a loop. This avoids an O(N^2) encode, and we have O(N) instead. |
| + |
| + Cell cell = getCell((Point) shape, detailLevel); |
| + assert !cell.isLeaf() && cell instanceof LegacyCell; |
| + BytesRef fullBytes = cell.getTokenBytesNoLeaf(null); |
| + //fill in reverse order to be sorted |
| + Cell[] cells = new Cell[detailLevel]; |
| + for (int i = 1; i < detailLevel; i++) { |
| + Cell parentCell = getWorldCell(); |
| + fullBytes.length = i; |
| + parentCell.readCell(fullBytes); |
| + cells[i-1] = parentCell; |
| + } |
| + cells[detailLevel-1] = cell; |
| + return new FilterCellIterator(Arrays.asList(cells).iterator(), null);//null filter |
| + } |
| + |
| +} |
| Index: lucene/spatial/src/java/org/apache/lucene/spatial/prefix/PrefixTreeStrategy.java |
| IDEA additional info: |
| Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP |
| <+>UTF-8 |
| =================================================================== |
| --- lucene/spatial/src/java/org/apache/lucene/spatial/prefix/PrefixTreeStrategy.java (revision 9e87821edeb3e24ca8dedaecf856f6510d61d0d3) |
| +++ lucene/spatial/src/java/org/apache/lucene/spatial/prefix/PrefixTreeStrategy.java (revision ) |
| @@ -19,6 +19,7 @@ |
| |
| import com.spatial4j.core.shape.Point; |
| import com.spatial4j.core.shape.Shape; |
| +import org.apache.lucene.analysis.TokenStream; |
| import org.apache.lucene.document.Field; |
| import org.apache.lucene.document.FieldType; |
| import org.apache.lucene.index.FieldInfo; |
| @@ -29,7 +30,7 @@ |
| import org.apache.lucene.spatial.query.SpatialArgs; |
| import org.apache.lucene.spatial.util.ShapeFieldCacheDistanceValueSource; |
| |
| -import java.util.List; |
| +import java.util.Iterator; |
| import java.util.Map; |
| import java.util.concurrent.ConcurrentHashMap; |
| |
| @@ -75,14 +76,12 @@ |
| public abstract class PrefixTreeStrategy extends SpatialStrategy { |
| protected final SpatialPrefixTree grid; |
| private final Map<String, PointPrefixTreeFieldCacheProvider> provider = new ConcurrentHashMap<>(); |
| - protected final boolean simplifyIndexedCells; |
| protected int defaultFieldValuesArrayLen = 2; |
| protected double distErrPct = SpatialArgs.DEFAULT_DISTERRPCT;// [ 0 TO 0.5 ] |
| |
| - public PrefixTreeStrategy(SpatialPrefixTree grid, String fieldName, boolean simplifyIndexedCells) { |
| + public PrefixTreeStrategy(SpatialPrefixTree grid, String fieldName) { |
| super(grid.getSpatialContext(), fieldName); |
| this.grid = grid; |
| - this.simplifyIndexedCells = simplifyIndexedCells; |
| } |
| |
| /** |
| @@ -120,15 +119,23 @@ |
| return createIndexableFields(shape, distErr); |
| } |
| |
| + /** |
| + * Turns {@link SpatialPrefixTree#getTreeCellIterator(Shape, int)} into a |
| + * {@link org.apache.lucene.analysis.TokenStream}. |
| + * {@code simplifyIndexedCells} is an optional hint affecting non-point shapes: it will |
| + * simply/aggregate sets of complete leaves in a cell to its parent, resulting in ~20-25% |
| + * fewer cells. It will likely be removed in the future. |
| + */ |
| public Field[] createIndexableFields(Shape shape, double distErr) { |
| int detailLevel = grid.getLevelForDistance(distErr); |
| - // note: maybe CellTokenStream should do this line, but it doesn't matter and it would create extra |
| - // coupling |
| - List<Cell> cells = grid.getCells(shape, detailLevel, true, simplifyIndexedCells);//intermediates cells |
| - |
| - Field field = new Field(getFieldName(), |
| - new CellTokenStream().setCells(cells.iterator()), FIELD_TYPE); |
| + TokenStream tokenStream = createTokenStream(shape, detailLevel); |
| + Field field = new Field(getFieldName(), tokenStream, FIELD_TYPE); |
| return new Field[]{field}; |
| + } |
| + |
| + protected CellTokenStream createTokenStream(Shape shape, int detailLevel) { |
| + Iterator<Cell> cells = grid.getTreeCellIterator(shape, detailLevel); |
| + return new CellTokenStream().setCells(cells); |
| } |
| |
| /* Indexed, tokenized, not stored. */ |
| Index: lucene/spatial/src/java/org/apache/lucene/spatial/prefix/AbstractVisitingPrefixTreeFilter.java |
| IDEA additional info: |
| Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP |
| <+>UTF-8 |
| =================================================================== |
| --- lucene/spatial/src/java/org/apache/lucene/spatial/prefix/AbstractVisitingPrefixTreeFilter.java (revision 9e87821edeb3e24ca8dedaecf856f6510d61d0d3) |
| +++ lucene/spatial/src/java/org/apache/lucene/spatial/prefix/AbstractVisitingPrefixTreeFilter.java (revision ) |
| @@ -22,6 +22,7 @@ |
| import org.apache.lucene.index.TermsEnum; |
| import org.apache.lucene.search.DocIdSet; |
| import org.apache.lucene.spatial.prefix.tree.Cell; |
| +import org.apache.lucene.spatial.prefix.tree.CellIterator; |
| import org.apache.lucene.spatial.prefix.tree.SpatialPrefixTree; |
| import org.apache.lucene.util.Bits; |
| import org.apache.lucene.util.BytesRef; |
| @@ -46,6 +47,11 @@ |
| //Historical note: this code resulted from a refactoring of RecursivePrefixTreeFilter, |
| // which in turn came out of SOLR-2155 |
| |
| + //This class perhaps could have been implemented in terms of FilteredTermsEnum & MultiTermQuery |
| + // & MultiTermQueryWrapperFilter. Maybe so for simple Intersects predicate but not for when we want to collect terms |
| + // differently depending on cell state like IsWithin and for fuzzy/accurate collection planned improvements. At |
| + // least it would just make things more complicated. |
| + |
| protected final int prefixGridScanLevel;//at least one less than grid.getMaxLevels() |
| |
| public AbstractVisitingPrefixTreeFilter(Shape queryShape, String fieldName, SpatialPrefixTree grid, |
| @@ -108,16 +114,21 @@ |
| does act as a short-circuit. So maybe do some percent of the time or when the level |
| is above some threshold. |
| |
| - * Each shape.relate(otherShape) result could be cached since much of the same relations |
| - will be invoked when multiple segments are involved. |
| + * Once we don't have redundant non-leaves indexed with leaf cells (LUCENE-4942), we can |
| + sometimes know to call next() instead of seek() if we're processing a leaf cell that |
| + didn't have a corresponding non-leaf. |
| |
| */ |
| |
| + // |
| + // TODO MAJOR REFACTOR SIMPLIFICATION BASED ON TreeCellIterator TODO |
| + // |
| + |
| protected final boolean hasIndexedLeaves;//if false then we can skip looking for them |
| |
| private VNode curVNode;//current pointer, derived from query shape |
| private BytesRef curVNodeTerm = new BytesRef();//curVNode.cell's term, without leaf |
| - private Cell scanCell; |
| + private Cell scanCell = grid.getWorldCell(); |
| |
| private BytesRef thisTerm;//the result of termsEnum.term() |
| |
| @@ -213,8 +224,8 @@ |
| if (hasIndexedLeaves && cell.getLevel() != 0) { |
| //If the next indexed term just adds a leaf marker ('+') to cell, |
| // then add all of those docs |
| - assert curVNode.cell.isWithin(curVNodeTerm, thisTerm); |
| - scanCell = grid.getCell(thisTerm.bytes, thisTerm.offset, thisTerm.length, scanCell); |
| + scanCell.readCell(thisTerm); |
| + assert curVNode.cell.isPrefixOf(scanCell); |
| if (scanCell.getLevel() == cell.getLevel() && scanCell.isLeaf()) { |
| visitLeaf(scanCell); |
| //advance |
| @@ -251,8 +262,8 @@ |
| * guaranteed to have an intersection and thus this must return some number |
| * of nodes. |
| */ |
| - protected Iterator<Cell> findSubCellsToVisit(Cell cell) { |
| - return cell.getSubCells(queryShape).iterator(); |
| + protected CellIterator findSubCellsToVisit(Cell cell) { |
| + return cell.getNextLevelCells(queryShape); |
| } |
| |
| /** |
| @@ -262,10 +273,12 @@ |
| * #visitScanned(org.apache.lucene.spatial.prefix.tree.Cell)}. |
| */ |
| protected void scan(int scanDetailLevel) throws IOException { |
| - for (; |
| + for ( ; |
| - thisTerm != null && curVNode.cell.isWithin(curVNodeTerm, thisTerm); |
| + thisTerm != null; |
| - thisTerm = termsEnum.next()) { |
| + thisTerm = termsEnum.next()) { |
| - scanCell = grid.getCell(thisTerm.bytes, thisTerm.offset, thisTerm.length, scanCell); |
| + scanCell.readCell(thisTerm); |
| + if (!curVNode.cell.isPrefixOf(scanCell)) |
| + break; |
| |
| int termLevel = scanCell.getLevel(); |
| if (termLevel < scanDetailLevel) { |
| Index: lucene/spatial/src/java/org/apache/lucene/spatial/prefix/tree/CellIterator.java |
| IDEA additional info: |
| Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP |
| <+>UTF-8 |
| =================================================================== |
| --- lucene/spatial/src/java/org/apache/lucene/spatial/prefix/tree/CellIterator.java (revision ) |
| +++ lucene/spatial/src/java/org/apache/lucene/spatial/prefix/tree/CellIterator.java (revision ) |
| @@ -0,0 +1,77 @@ |
| +package org.apache.lucene.spatial.prefix.tree; |
| + |
| +/* |
| + * Licensed to the Apache Software Foundation (ASF) under one or more |
| + * contributor license agreements. See the NOTICE file distributed with |
| + * this work for additional information regarding copyright ownership. |
| + * The ASF licenses this file to You under the Apache License, Version 2.0 |
| + * (the "License"); you may not use this file except in compliance with |
| + * the License. You may obtain a copy of the License at |
| + * |
| + * http://www.apache.org/licenses/LICENSE-2.0 |
| + * |
| + * Unless required by applicable law or agreed to in writing, software |
| + * distributed under the License is distributed on an "AS IS" BASIS, |
| + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| + * See the License for the specific language governing permissions and |
| + * limitations under the License. |
| + */ |
| + |
| +import java.util.Iterator; |
| +import java.util.NoSuchElementException; |
| + |
| +/** |
| + * An Iterator of SpatialPrefixTree Cells. The order is always sorted without duplicates. |
| + * |
| + * @lucene.experimental |
| + */ |
| +public abstract class CellIterator implements Iterator<Cell> { |
| + |
| + //note: nextCell or thisCell can be non-null but neither at the same time. That's |
| + // because they might return the same instance when re-used! |
| + |
| + protected Cell nextCell;//to be returned by next(), and null'ed after |
| + protected Cell thisCell;//see next() & thisCell(). Should be cleared in hasNext(). |
| + |
| + /** Returns the cell last returned from {@link #next()}. It's cleared by hasNext(). */ |
| + public Cell thisCell() { |
| + assert thisCell != null : "Only call thisCell() after next(), not hasNext()"; |
| + return thisCell; |
| + } |
| + |
| + // Arguably this belongs here and not on Cell |
| + //public SpatialRelation getShapeRel() |
| + |
| + /** |
| + * Gets the next cell that is >= {@code fromCell}, compared using non-leaf bytes. If it returns null then |
| + * the iterator is exhausted. |
| + */ |
| + public Cell nextFrom(Cell fromCell) { |
| + while (true) { |
| + if (!hasNext()) |
| + return null; |
| + Cell c = next();//will update thisCell |
| + if (c.compareToNoLeaf(fromCell) >= 0) { |
| + return c; |
| + } |
| + } |
| + } |
| + |
| + /** This prevents sub-cells (those underneath the current cell) from being iterated to, |
| + * if applicable, otherwise a NO-OP. */ |
| + @Override |
| + public void remove() { |
| + assert thisCell != null; |
| + } |
| + |
| + @Override |
| + public final Cell next() { |
| + if (nextCell == null) { |
| + if (!hasNext()) |
| + throw new NoSuchElementException(); |
| + } |
| + thisCell = nextCell; |
| + nextCell = null; |
| + return thisCell; |
| + } |
| +} |
| Index: lucene/spatial/src/java/org/apache/lucene/spatial/prefix/tree/Cell.java |
| IDEA additional info: |
| Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP |
| <+>UTF-8 |
| =================================================================== |
| --- lucene/spatial/src/java/org/apache/lucene/spatial/prefix/tree/Cell.java (revision 9e87821edeb3e24ca8dedaecf856f6510d61d0d3) |
| +++ lucene/spatial/src/java/org/apache/lucene/spatial/prefix/tree/Cell.java (revision ) |
| @@ -17,231 +17,103 @@ |
| * limitations under the License. |
| */ |
| |
| -import com.spatial4j.core.shape.Point; |
| import com.spatial4j.core.shape.Shape; |
| import com.spatial4j.core.shape.SpatialRelation; |
| import org.apache.lucene.util.BytesRef; |
| -import org.apache.lucene.util.StringHelper; |
| |
| -import java.util.ArrayList; |
| -import java.util.Collection; |
| -import java.util.Collections; |
| -import java.util.List; |
| - |
| /** |
| - * Represents a grid cell. These are not necessarily thread-safe, although calling {@link #getShape()} will |
| - * sufficiently prepare it to be so, if needed. |
| + * Represents a grid cell. Cell instances are generally very transient and may be re-used |
| + * internally. To get an instance, you could start with {@link SpatialPrefixTree#getWorldCell()}. |
| + * And from there you could either traverse down the tree with {@link #getNextLevelCells(com.spatial4j.core.shape.Shape)}, |
| + * or you could read an indexed term via {@link #readCell(org.apache.lucene.util.BytesRef)}. |
| + * When a cell is read from a term, it is comprised of just the base bytes plus optionally a leaf flag. |
| * |
| * @lucene.experimental |
| */ |
| -public abstract class Cell { |
| +public interface Cell { |
| |
| - private static final byte LEAF_BYTE = '+';//NOTE: must sort before letters & numbers |
| - |
| - //Arguably we could simply use a BytesRef, using an extra Object. |
| - private byte[] bytes; |
| - private int b_off; |
| - private int b_len; |
| - |
| - /** |
| - * When set via getSubCells(filter), it is the relationship between this cell |
| - * and the given shape filter. Doesn't participate in shape equality. |
| + /** This initializes the cell with the given bytes and clears any previous state. |
| + * Warning: Refers to the same byte array (no copy). If {@link #setLeaf()} is subsequently called, |
| + * then it may modify these bytes. |
| + * |
| + * @param bytes the bytes of the new cell. WARNING: copy by reference. |
| */ |
| - protected SpatialRelation shapeRel; |
| + void readCell(BytesRef bytes); |
| |
| - /** Warning: Refers to the same bytes (no copy). If {@link #setLeaf()} is subsequently called then it |
| - * may modify bytes. */ |
| - protected Cell(byte[] bytes, int off, int len) { |
| - this.bytes = bytes; |
| - this.b_off = off; |
| - this.b_len = len; |
| - } |
| +// If we bring this back; perhaps do so as a method that un-shares its internal state: void unshare(); |
| +// /** Resets the state of this cell such that it is identical to {@code source}. This can be used for |
| +// * cloning a cell to have a safe copy, and it also might be used to position this cell |
| +// * before calling {@link #readCell(org.apache.lucene.util.BytesRef)} in a loop if you know the first term |
| +// * is going to be close to some other cell, thereby saving some computations. */ |
| +// void copyFrom(Cell source); |
| |
| - /** Warning: Refers to the same bytes (no copy). If {@link #setLeaf()} is subsequently called then it |
| - * may modify bytes. */ |
| - public void reset(byte[] bytes, int off, int len) { |
| - assert getLevel() != 0; |
| - shapeRel = null; |
| - this.bytes = bytes; |
| - this.b_off = off; |
| - this.b_len = len; |
| - } |
| + /** Gets the relationship this cell has with the shape from which it was filtered from, assuming it came from a |
| + * {@link CellIterator}. Arguably it belongs there but it's very convenient here. */ |
| + SpatialRelation getShapeRel(); |
| |
| - protected abstract SpatialPrefixTree getGrid(); |
| + /** See {@link #getShapeRel()}. |
| + * @lucene.internal */ |
| + void setShapeRel(SpatialRelation rel); |
| |
| - public SpatialRelation getShapeRel() { |
| - return shapeRel; |
| - } |
| - |
| /** |
| - * For points, this is always false. Otherwise this is true if there are no |
| + * Some cells are flagged as leaves, which are indexed as such. A leaf cell is either within some |
| + * shape or it both intersects and the cell is at an accuracy threshold such that no smaller cells |
| + * for the shape will be represented. |
| * further cells with this prefix for the shape (always true at maxLevels). |
| */ |
| - public boolean isLeaf() { |
| - return (b_len > 0 && bytes[b_off + b_len - 1] == LEAF_BYTE); |
| - } |
| + boolean isLeaf(); |
| |
| - /** Modifies the bytes to reflect that this is a leaf. Warning: never invoke from a cell |
| + /** Set this cell to be a leaf. Warning: never call on a cell |
| * initialized to reference the same bytes from termsEnum, which should be treated as immutable. |
| - * Note: not supported at level 0. */ |
| - public void setLeaf() { |
| - assert getLevel() != 0; |
| - if (isLeaf()) |
| - return; |
| - //if isn't big enough, we have to copy |
| - if (bytes.length < b_off + b_len) { |
| - //hopefully this copying doesn't happen too much (DWS: I checked and it doesn't seem to happen) |
| - byte[] copy = new byte[b_len + 1]; |
| - System.arraycopy(bytes, b_off, copy, 0, b_len); |
| - copy[b_len++] = LEAF_BYTE; |
| - bytes = copy; |
| - b_off = 0; |
| - } else { |
| - bytes[b_off + b_len++] = LEAF_BYTE; |
| - } |
| - } |
| + * Note: not supported at level 0. |
| + * @lucene.internal */ |
| + void setLeaf(); |
| |
| /** |
| - * Returns the bytes for this cell. |
| + * Returns the bytes for this cell, with a leaf byte if this is a leaf cell. |
| * The result param is used to save object allocation, though it's bytes aren't used. |
| * @param result where the result goes, or null to create new |
| */ |
| - public BytesRef getTokenBytes(BytesRef result) { |
| - if (result == null) |
| - result = new BytesRef(); |
| - result.bytes = bytes; |
| - result.offset = b_off; |
| - result.length = b_len; |
| - return result; |
| - } |
| + BytesRef getTokenBytesWithLeaf(BytesRef result); |
| |
| /** |
| - * Returns the bytes for this cell, without leaf set. The bytes should sort before any |
| - * cells that have the leaf set for the spatial location. |
| + * Returns the bytes for this cell, without leaf set. The bytes should sort before |
| + * {@link #getTokenBytesWithLeaf(org.apache.lucene.util.BytesRef)}. |
| * The result param is used to save object allocation, though it's bytes aren't used. |
| * @param result where the result goes, or null to create new |
| */ |
| - public BytesRef getTokenBytesNoLeaf(BytesRef result) { |
| - result = getTokenBytes(result); |
| - if (isLeaf()) |
| - result.length--; |
| - return result; |
| - } |
| + BytesRef getTokenBytesNoLeaf(BytesRef result); |
| |
| /** Level 0 is the world (and has no parent), from then on a higher level means a smaller |
| * cell than the level before it. |
| */ |
| - public int getLevel() { |
| - return isLeaf() ? b_len - 1 : b_len; |
| - } |
| + int getLevel(); |
| |
| - /** Gets the parent cell that contains this one. Don't call on the world cell. */ |
| - public Cell getParent() { |
| - assert getLevel() > 0; |
| - return getGrid().getCell(bytes, b_off, b_len - (isLeaf() ? 2 : 1)); |
| - } |
| - |
| /** |
| - * Like {@link #getSubCells()} but with the results filtered by a shape. If |
| - * that shape is a {@link com.spatial4j.core.shape.Point} then it must call |
| - * {@link #getSubCell(com.spatial4j.core.shape.Point)}. The returned cells |
| - * should have {@link Cell#getShapeRel()} set to their relation with {@code |
| - * shapeFilter}. In addition, {@link Cell#isLeaf()} |
| + * Gets the cells at the next grid cell level underneath this one, optionally filtered by |
| + * {@code shapeFilter}. The returned cells should have {@link #getShapeRel()} set to |
| + * their relation with {@code shapeFilter}. In addition, for non-points {@link #isLeaf()} |
| * must be true when that relation is WITHIN. |
| * <p/> |
| + * IMPORTANT: Cells returned from this iterator can be shared, as well as the bytes. |
| + * <p/> |
| * Precondition: Never called when getLevel() == maxLevel. |
| * |
| * @param shapeFilter an optional filter for the returned cells. |
| * @return A set of cells (no dups), sorted. Not Modifiable. |
| */ |
| - public Collection<Cell> getSubCells(Shape shapeFilter) { |
| - //Note: Higher-performing subclasses might override to consider the shape filter to generate fewer cells. |
| - if (shapeFilter instanceof Point) { |
| - Cell subCell = getSubCell((Point) shapeFilter); |
| - subCell.shapeRel = SpatialRelation.CONTAINS; |
| - return Collections.singletonList(subCell); |
| - } |
| - Collection<Cell> cells = getSubCells(); |
| + CellIterator getNextLevelCells(Shape shapeFilter); |
| |
| - if (shapeFilter == null) { |
| - return cells; |
| - } |
| + /** Gets the shape for this cell; typically a Rectangle. */ |
| + Shape getShape(); |
| |
| - //TODO change API to return a filtering iterator |
| - List<Cell> copy = new ArrayList<>(cells.size()); |
| - for (Cell cell : cells) { |
| - SpatialRelation rel = cell.getShape().relate(shapeFilter); |
| - if (rel == SpatialRelation.DISJOINT) |
| - continue; |
| - cell.shapeRel = rel; |
| - if (rel == SpatialRelation.WITHIN) |
| - cell.setLeaf(); |
| - copy.add(cell); |
| - } |
| - return copy; |
| - } |
| - |
| /** |
| - * Performant implementations are expected to implement this efficiently by |
| - * considering the current cell's boundary. Precondition: Never called when |
| - * getLevel() == maxLevel. |
| - * <p/> |
| - * Precondition: this.getShape().relate(p) != DISJOINT. |
| + * Returns if the target term is within/underneath this cell; not necessarily a direct |
| + * descendant. |
| + * @param c the term |
| */ |
| - public abstract Cell getSubCell(Point p); |
| + boolean isPrefixOf(Cell c); |
| |
| - /** |
| - * Gets the cells at the next grid cell level that cover this cell. |
| - * Precondition: Never called when getLevel() == maxLevel. |
| - * |
| - * @return A set of cells (no dups), sorted, modifiable, not empty, not null. |
| - */ |
| - protected abstract Collection<Cell> getSubCells(); |
| - |
| - /** |
| - * {@link #getSubCells()}.size() -- usually a constant. Should be >=2 |
| - */ |
| - public abstract int getSubCellsSize(); |
| - |
| - /** Gets the shape for this cell; typically a Rectangle. This method also serves to trigger any lazy |
| - * loading needed to make the cell instance thread-safe. |
| - */ |
| - public abstract Shape getShape(); |
| - |
| - /** TODO remove once no longer used. */ |
| - public Point getCenter() { |
| - return getShape().getCenter(); |
| - } |
| - |
| - @Override |
| - public boolean equals(Object obj) { |
| - //this method isn't "normally" called; just in asserts/tests |
| - if (obj instanceof Cell) { |
| - Cell cell = (Cell) obj; |
| - return getTokenBytes(null).equals(cell.getTokenBytes(null)); |
| - } else { |
| - return false; |
| - } |
| - } |
| - |
| - @Override |
| - public int hashCode() { |
| - return getTokenBytesNoLeaf(null).hashCode(); |
| - } |
| - |
| - @Override |
| - public String toString() { |
| - //this method isn't "normally" called; just in asserts/tests |
| - return getTokenBytes(null).utf8ToString(); |
| - } |
| - |
| - /** |
| - * Returns if the target term is within/underneath this cell; not necessarily a direct descendant. |
| - * @param bytesNoLeaf must be getTokenBytesNoLeaf |
| - * @param term the term |
| - */ |
| - public boolean isWithin(BytesRef bytesNoLeaf, BytesRef term) { |
| - assert bytesNoLeaf.equals(getTokenBytesNoLeaf(null)); |
| - return StringHelper.startsWith(term, bytesNoLeaf); |
| - } |
| + /** Equivalent to {@code this.getTokenBytesNoLeaf(null).compareTo(fromCell.getTokenBytesNoLeaf(null))}. */ |
| + int compareToNoLeaf(Cell fromCell); |
| } |
| Index: lucene/spatial/src/java/org/apache/lucene/spatial/prefix/tree/package.html |
| IDEA additional info: |
| Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP |
| <+>UTF-8 |
| =================================================================== |
| --- lucene/spatial/src/java/org/apache/lucene/spatial/prefix/tree/package.html (revision 9e87821edeb3e24ca8dedaecf856f6510d61d0d3) |
| +++ lucene/spatial/src/java/org/apache/lucene/spatial/prefix/tree/package.html (revision ) |
| @@ -16,7 +16,8 @@ |
| --> |
| <html> |
| <body> |
| -The Spatial Prefix package supports spatial indexing by index-time tokens |
| +This package is about SpatialPrefixTree and any supporting classes. |
| +A SpatialPrefixTree supports spatial indexing by index-time tokens |
| where adding characters to a string gives greater resolution. |
| <p> |
| Potential Implementations include: |
| Index: lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/SpatialDocMaker.java |
| IDEA additional info: |
| Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP |
| <+>UTF-8 |
| =================================================================== |
| --- lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/SpatialDocMaker.java (revision 9e87821edeb3e24ca8dedaecf856f6510d61d0d3) |
| +++ lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/SpatialDocMaker.java (revision ) |
| @@ -97,12 +97,9 @@ |
| //A factory for the prefix tree grid |
| SpatialPrefixTree grid = SpatialPrefixTreeFactory.makeSPT(configMap, null, ctx); |
| |
| - RecursivePrefixTreeStrategy strategy = new RecursivePrefixTreeStrategy(grid, SPATIAL_FIELD) { |
| - { |
| - //protected field |
| - this.pointsOnly = config.get("spatial.docPointsOnly", false); |
| - } |
| - }; |
| + RecursivePrefixTreeStrategy strategy = new RecursivePrefixTreeStrategy(grid, SPATIAL_FIELD); |
| + strategy.setPointsOnly(config.get("spatial.docPointsOnly", false)); |
| + strategy.setPruneLeafyBranches(config.get("spatial.pruneLeafyBranches", true)); |
| |
| int prefixGridScanLevel = config.get("query.spatial.prefixGridScanLevel", -4); |
| if (prefixGridScanLevel < 0) |
| Index: lucene/spatial/src/java/org/apache/lucene/spatial/prefix/TermQueryPrefixTreeStrategy.java |
| IDEA additional info: |
| Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP |
| <+>UTF-8 |
| =================================================================== |
| --- lucene/spatial/src/java/org/apache/lucene/spatial/prefix/TermQueryPrefixTreeStrategy.java (revision 9e87821edeb3e24ca8dedaecf856f6510d61d0d3) |
| +++ lucene/spatial/src/java/org/apache/lucene/spatial/prefix/TermQueryPrefixTreeStrategy.java (revision ) |
| @@ -17,22 +17,26 @@ |
| * limitations under the License. |
| */ |
| |
| +import com.spatial4j.core.shape.Point; |
| import com.spatial4j.core.shape.Shape; |
| import org.apache.lucene.queries.TermsFilter; |
| import org.apache.lucene.search.Filter; |
| import org.apache.lucene.spatial.prefix.tree.Cell; |
| +import org.apache.lucene.spatial.prefix.tree.CellIterator; |
| import org.apache.lucene.spatial.prefix.tree.SpatialPrefixTree; |
| import org.apache.lucene.spatial.query.SpatialArgs; |
| import org.apache.lucene.spatial.query.SpatialOperation; |
| import org.apache.lucene.spatial.query.UnsupportedSpatialOperation; |
| import org.apache.lucene.util.BytesRef; |
| |
| +import java.util.ArrayList; |
| import java.util.List; |
| |
| /** |
| - * A basic implementation of {@link PrefixTreeStrategy} using a large {@link |
| - * TermsFilter} of all the cells from {@link SpatialPrefixTree#getCells(com.spatial4j.core.shape.Shape, |
| - * int, boolean, boolean)}. It only supports the search of indexed Point shapes. |
| + * A basic implementation of {@link PrefixTreeStrategy} using a large |
| + * {@link TermsFilter} of all the cells from |
| + * {@link SpatialPrefixTree#getTreeCellIterator(com.spatial4j.core.shape.Shape, int)}. |
| + * It only supports the search of indexed Point shapes. |
| * <p/> |
| * The precision of query shapes (distErrPct) is an important factor in using |
| * this Strategy. If the precision is too precise then it will result in many |
| @@ -42,9 +46,10 @@ |
| */ |
| public class TermQueryPrefixTreeStrategy extends PrefixTreeStrategy { |
| |
| + protected boolean simplifyIndexedCells = false; |
| + |
| public TermQueryPrefixTreeStrategy(SpatialPrefixTree grid, String fieldName) { |
| - super(grid, fieldName, |
| - false);//do not simplify indexed cells |
| + super(grid, fieldName); |
| } |
| |
| @Override |
| @@ -55,14 +60,36 @@ |
| |
| Shape shape = args.getShape(); |
| int detailLevel = grid.getLevelForDistance(args.resolveDistErr(ctx, distErrPct)); |
| - List<Cell> cells = grid.getCells(shape, detailLevel, |
| - false,//no parents |
| - true);//simplify |
| - BytesRef[] terms = new BytesRef[cells.size()]; |
| - int i = 0; |
| - for (Cell cell : cells) { |
| - terms[i++] = cell.getTokenBytesNoLeaf(null); |
| + |
| + //--get a List of BytesRef for each term we want (no parents, no leaf bytes)) |
| + final int GUESS_NUM_TERMS; |
| + if (shape instanceof Point) |
| + GUESS_NUM_TERMS = detailLevel;//perfect guess |
| + else |
| + GUESS_NUM_TERMS = 4096;//should this be a method on SpatialPrefixTree? |
| + |
| + BytesRef masterBytes = new BytesRef(GUESS_NUM_TERMS*detailLevel);//shared byte array for all terms |
| + List<BytesRef> terms = new ArrayList<>(GUESS_NUM_TERMS); |
| + |
| + CellIterator cells = grid.getTreeCellIterator(shape, detailLevel); |
| + while (cells.hasNext()) { |
| + Cell cell = cells.next(); |
| + if (!cell.isLeaf()) |
| + continue; |
| + BytesRef term = cell.getTokenBytesNoLeaf(null);//null because we want a new BytesRef |
| + //We copy out the bytes because it may be re-used across the iteration. This also gives us the opportunity |
| + // to use one contiguous block of memory for the bytes of all terms we need. |
| + masterBytes.grow(masterBytes.length + term.length); |
| + masterBytes.append(term); |
| + term.bytes = null;//don't need; will reset later |
| + term.offset = masterBytes.length - term.length; |
| + terms.add(term); |
| } |
| + //doing this now because if we did earlier, it's possible the bytes needed to grow() |
| + for (BytesRef byteRef : terms) { |
| + byteRef.bytes = masterBytes.bytes; |
| + } |
| + //unfortunately TermsFilter will needlessly sort & dedupe |
| return new TermsFilter(getFieldName(), terms); |
| } |
| |
| Index: lucene/spatial/src/java/org/apache/lucene/spatial/prefix/tree/QuadPrefixTree.java |
| IDEA additional info: |
| Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP |
| <+>UTF-8 |
| =================================================================== |
| --- lucene/spatial/src/java/org/apache/lucene/spatial/prefix/tree/QuadPrefixTree.java (revision 9e87821edeb3e24ca8dedaecf856f6510d61d0d3) |
| +++ lucene/spatial/src/java/org/apache/lucene/spatial/prefix/tree/QuadPrefixTree.java (revision ) |
| @@ -38,7 +38,7 @@ |
| * |
| * @lucene.experimental |
| */ |
| -public class QuadPrefixTree extends SpatialPrefixTree { |
| +public class QuadPrefixTree extends LegacyPrefixTree { |
| |
| /** |
| * Factory for creating {@link QuadPrefixTree} instances with useful defaults |
| @@ -115,6 +115,11 @@ |
| this(ctx, ctx.getWorldBounds(), maxLevels); |
| } |
| |
| + @Override |
| + public Cell getWorldCell() { |
| + return new QuadCell(BytesRef.EMPTY_BYTES, 0, 0); |
| + } |
| + |
| public void printInfo(PrintStream out) { |
| NumberFormat nf = NumberFormat.getNumberInstance(Locale.ROOT); |
| nf.setMaximumFractionDigits(5); |
| @@ -147,11 +152,6 @@ |
| return cells.get(0);//note cells could be longer if p on edge |
| } |
| |
| - @Override |
| - public Cell getCell(byte[] bytes, int offset, int len) { |
| - return new QuadCell(bytes, offset, len); |
| - } |
| - |
| private void build( |
| double x, |
| double y, |
| @@ -214,7 +214,7 @@ |
| str.length = strlen; |
| } |
| |
| - class QuadCell extends Cell{ |
| + private class QuadCell extends LegacyCell { |
| |
| QuadCell(byte[] bytes, int off, int len) { |
| super(bytes, off, len); |
| @@ -226,16 +226,10 @@ |
| } |
| |
| @Override |
| - protected SpatialPrefixTree getGrid() { return QuadPrefixTree.this; } |
| + protected QuadPrefixTree getGrid() { return QuadPrefixTree.this; } |
| |
| @Override |
| - public void reset(byte[] bytes, int off, int len) { |
| - super.reset(bytes, off, len); |
| - shape = null; |
| - } |
| - |
| - @Override |
| - public Collection<Cell> getSubCells() { |
| + protected Collection<Cell> getSubCells() { |
| BytesRef source = getTokenBytesNoLeaf(null); |
| BytesRef target = new BytesRef(); |
| |
| @@ -262,11 +256,9 @@ |
| } |
| |
| @Override |
| - public Cell getSubCell(Point p) { |
| - return QuadPrefixTree.this.getCell(p, getLevel() + 1);//not performant! |
| + protected QuadCell getSubCell(Point p) { |
| + return (QuadCell) QuadPrefixTree.this.getCell(p, getLevel() + 1);//not performant! |
| } |
| - |
| - private Shape shape;//cache |
| |
| @Override |
| public Shape getShape() { |
| Index: lucene/spatial/src/java/org/apache/lucene/spatial/prefix/WithinPrefixTreeFilter.java |
| IDEA additional info: |
| Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP |
| <+>UTF-8 |
| =================================================================== |
| --- lucene/spatial/src/java/org/apache/lucene/spatial/prefix/WithinPrefixTreeFilter.java (revision 9e87821edeb3e24ca8dedaecf856f6510d61d0d3) |
| +++ lucene/spatial/src/java/org/apache/lucene/spatial/prefix/WithinPrefixTreeFilter.java (revision ) |
| @@ -27,13 +27,12 @@ |
| import org.apache.lucene.index.AtomicReaderContext; |
| import org.apache.lucene.search.DocIdSet; |
| import org.apache.lucene.spatial.prefix.tree.Cell; |
| +import org.apache.lucene.spatial.prefix.tree.CellIterator; |
| import org.apache.lucene.spatial.prefix.tree.SpatialPrefixTree; |
| import org.apache.lucene.util.Bits; |
| import org.apache.lucene.util.FixedBitSet; |
| |
| import java.io.IOException; |
| -import java.util.Collection; |
| -import java.util.Iterator; |
| |
| /** |
| * Finds docs where its indexed shape is {@link org.apache.lucene.spatial.query.SpatialOperation#IsWithin |
| @@ -136,9 +135,9 @@ |
| } |
| |
| @Override |
| - protected Iterator<Cell> findSubCellsToVisit(Cell cell) { |
| + protected CellIterator findSubCellsToVisit(Cell cell) { |
| //use buffered query shape instead of orig. Works with null too. |
| - return cell.getSubCells(bufferedQueryShape).iterator(); |
| + return cell.getNextLevelCells(bufferedQueryShape); |
| } |
| |
| @Override |
| @@ -183,10 +182,10 @@ |
| if (relate == SpatialRelation.DISJOINT) |
| return false; |
| // Note: Generating all these cells just to determine intersection is not ideal. |
| - // It was easy to implement but could be optimized. For example if the docs |
| - // in question are already marked in the 'outside' bitset then it can be avoided. |
| - Collection<Cell> subCells = cell.getSubCells(null); |
| - for (Cell subCell : subCells) { |
| + // The real solution is LUCENE-4869. |
| + CellIterator subCells = cell.getNextLevelCells(null); |
| + while (subCells.hasNext()) { |
| + Cell subCell = subCells.next(); |
| if (!allCellsIntersectQuery(subCell, null))//recursion |
| return false; |
| } |
| Index: lucene/spatial/src/java/org/apache/lucene/spatial/prefix/tree/SingletonCellIterator.java |
| IDEA additional info: |
| Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP |
| <+>UTF-8 |
| =================================================================== |
| --- lucene/spatial/src/java/org/apache/lucene/spatial/prefix/tree/SingletonCellIterator.java (revision ) |
| +++ lucene/spatial/src/java/org/apache/lucene/spatial/prefix/tree/SingletonCellIterator.java (revision ) |
| @@ -0,0 +1,37 @@ |
| +package org.apache.lucene.spatial.prefix.tree; |
| + |
| +/* |
| + * Licensed to the Apache Software Foundation (ASF) under one or more |
| + * contributor license agreements. See the NOTICE file distributed with |
| + * this work for additional information regarding copyright ownership. |
| + * The ASF licenses this file to You under the Apache License, Version 2.0 |
| + * (the "License"); you may not use this file except in compliance with |
| + * the License. You may obtain a copy of the License at |
| + * |
| + * http://www.apache.org/licenses/LICENSE-2.0 |
| + * |
| + * Unless required by applicable law or agreed to in writing, software |
| + * distributed under the License is distributed on an "AS IS" BASIS, |
| + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| + * See the License for the specific language governing permissions and |
| + * limitations under the License. |
| + */ |
| + |
| +/** |
| + * A singleton (one Cell) instance of CellIterator. |
| + * |
| + * @lucene.internal |
| + */ |
| +class SingletonCellIterator extends CellIterator { |
| + |
| + SingletonCellIterator(Cell cell) { |
| + this.nextCell = cell;//preload nextCell |
| + } |
| + |
| + @Override |
| + public boolean hasNext() { |
| + thisCell = null; |
| + return nextCell != null; |
| + } |
| + |
| +} |
| Index: lucene/spatial/src/test/org/apache/lucene/spatial/prefix/tree/SpatialPrefixTreeTest.java |
| IDEA additional info: |
| Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP |
| <+>UTF-8 |
| =================================================================== |
| --- lucene/spatial/src/test/org/apache/lucene/spatial/prefix/tree/SpatialPrefixTreeTest.java (revision 9e87821edeb3e24ca8dedaecf856f6510d61d0d3) |
| +++ lucene/spatial/src/test/org/apache/lucene/spatial/prefix/tree/SpatialPrefixTreeTest.java (revision ) |
| @@ -61,7 +61,11 @@ |
| assertEquals(ctx.getWorldBounds(), c.getShape()); |
| while (c.getLevel() < trie.getMaxLevels()) { |
| prevC = c; |
| - List<Cell> subCells = new ArrayList<>(c.getSubCells()); |
| + List<Cell> subCells = new ArrayList<>(); |
| + CellIterator subCellsIter = c.getNextLevelCells(null); |
| + while (subCellsIter.hasNext()) { |
| + subCells.add(subCellsIter.next()); |
| + } |
| c = subCells.get(random().nextInt(subCells.size()-1)); |
| |
| assertEquals(prevC.getLevel()+1,c.getLevel()); |
| @@ -73,7 +77,7 @@ |
| } |
| } |
| /** |
| - * A PrefixTree pruning optimization gone bad. |
| + * A PrefixTree pruning optimization gone bad, applicable when optimize=true. |
| * See <a href="https://issues.apache.org/jira/browse/LUCENE-4770>LUCENE-4770</a>. |
| */ |
| @Test |
| \ No newline at end of file |
| Index: lucene/spatial/src/java/org/apache/lucene/spatial/prefix/IntersectsPrefixTreeFilter.java |
| IDEA additional info: |
| Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP |
| <+>UTF-8 |
| =================================================================== |
| --- lucene/spatial/src/java/org/apache/lucene/spatial/prefix/IntersectsPrefixTreeFilter.java (revision 9e87821edeb3e24ca8dedaecf856f6510d61d0d3) |
| +++ lucene/spatial/src/java/org/apache/lucene/spatial/prefix/IntersectsPrefixTreeFilter.java (revision ) |
| @@ -52,6 +52,15 @@ |
| |
| @Override |
| public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws IOException { |
| + /* Possible optimizations (in IN ADDITION TO THOSE LISTED IN VISITORTEMPLATE): |
| + |
| + * If docFreq is 1 (or < than some small threshold), then check to see if we've already |
| + collected it; if so short-circuit. Don't do this just for point data, as there is |
| + no benefit, or only marginal benefit when multi-valued. |
| + |
| + * Point query shape optimization when the only indexed data is a point (no leaves). Result is a term query. |
| + |
| + */ |
| return new VisitorTemplate(context, acceptDocs, hasIndexedLeaves) { |
| private FixedBitSet results; |
| |
| Index: lucene/spatial/src/java/org/apache/lucene/spatial/prefix/tree/TreeCellIterator.java |
| IDEA additional info: |
| Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP |
| <+>UTF-8 |
| =================================================================== |
| --- lucene/spatial/src/java/org/apache/lucene/spatial/prefix/tree/TreeCellIterator.java (revision ) |
| +++ lucene/spatial/src/java/org/apache/lucene/spatial/prefix/tree/TreeCellIterator.java (revision ) |
| @@ -0,0 +1,89 @@ |
| +package org.apache.lucene.spatial.prefix.tree; |
| + |
| +/* |
| + * Licensed to the Apache Software Foundation (ASF) under one or more |
| + * contributor license agreements. See the NOTICE file distributed with |
| + * this work for additional information regarding copyright ownership. |
| + * The ASF licenses this file to You under the Apache License, Version 2.0 |
| + * (the "License"); you may not use this file except in compliance with |
| + * the License. You may obtain a copy of the License at |
| + * |
| + * http://www.apache.org/licenses/LICENSE-2.0 |
| + * |
| + * Unless required by applicable law or agreed to in writing, software |
| + * distributed under the License is distributed on an "AS IS" BASIS, |
| + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| + * See the License for the specific language governing permissions and |
| + * limitations under the License. |
| + */ |
| + |
| +import com.spatial4j.core.shape.Point; |
| +import com.spatial4j.core.shape.Shape; |
| + |
| +/** |
| + * Navigates a {@link org.apache.lucene.spatial.prefix.tree.SpatialPrefixTree} from a given cell (typically the world |
| + * cell) down to a maximum number of configured levels, filtered by a given shape. Intermediate non-leaf cells are |
| + * returned. It supports {@link #remove()} for skipping traversal of subcells of the current cell. |
| + * |
| + * @lucene.internal |
| + */ |
| +class TreeCellIterator extends CellIterator { |
| + //This class uses a stack approach, which is more efficient than creating linked nodes. And it might more easily |
| + // pave the way for re-using Cell & CellIterator at a given level in the future. |
| + |
| + private final Shape shapeFilter;//possibly null |
| + private final CellIterator[] iterStack; |
| + private int stackIdx;//-1 when done |
| + private boolean descend; |
| + |
| + public TreeCellIterator(Shape shapeFilter, int detailLevel, Cell parentCell) { |
| + this.shapeFilter = shapeFilter; |
| + assert parentCell.getLevel() == 0; |
| + iterStack = new CellIterator[detailLevel]; |
| + CellIterator nextLevelCells = parentCell.getNextLevelCells(shapeFilter); |
| + iterStack[0] = nextLevelCells; |
| + stackIdx = 0;//always points to an iter (non-null) |
| + //note: not obvious but needed to visit the first cell before trying to descend |
| + descend = false; |
| + } |
| + |
| + @Override |
| + public boolean hasNext() { |
| + if (nextCell != null) |
| + return true; |
| + while (true) { |
| + if (stackIdx == -1)//the only condition in which we return false |
| + return false; |
| + //If we can descend... |
| + if (descend && !(stackIdx == iterStack.length - 1 || iterStack[stackIdx].thisCell().isLeaf())) { |
| + CellIterator nextIter = iterStack[stackIdx].thisCell().getNextLevelCells(shapeFilter); |
| + //push stack |
| + iterStack[++stackIdx] = nextIter; |
| + } |
| + //Get sibling... |
| + if (iterStack[stackIdx].hasNext()) { |
| + nextCell = iterStack[stackIdx].next(); |
| + //at detailLevel |
| + if (stackIdx == iterStack.length - 1 && !(shapeFilter instanceof Point)) //point check is a kludge |
| + nextCell.setLeaf();//because at bottom |
| + break; |
| + } |
| + //Couldn't get next; go up... |
| + //pop stack |
| + iterStack[stackIdx--] = null; |
| + descend = false;//so that we don't re-descend where we just were |
| + } |
| + assert nextCell != null; |
| + descend = true;//reset |
| + return true; |
| + } |
| + |
| + @Override |
| + public void remove() { |
| + assert thisCell() != null && nextCell == null; |
| + descend = false; |
| + } |
| + |
| + //TODO implement a smart nextFrom() that looks at the parent's bytes first |
| + |
| +} |
| Index: lucene/spatial/src/java/org/apache/lucene/spatial/prefix/tree/FilterCellIterator.java |
| IDEA additional info: |
| Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP |
| <+>UTF-8 |
| =================================================================== |
| --- lucene/spatial/src/java/org/apache/lucene/spatial/prefix/tree/FilterCellIterator.java (revision ) |
| +++ lucene/spatial/src/java/org/apache/lucene/spatial/prefix/tree/FilterCellIterator.java (revision ) |
| @@ -0,0 +1,62 @@ |
| +package org.apache.lucene.spatial.prefix.tree; |
| + |
| +/* |
| + * Licensed to the Apache Software Foundation (ASF) under one or more |
| + * contributor license agreements. See the NOTICE file distributed with |
| + * this work for additional information regarding copyright ownership. |
| + * The ASF licenses this file to You under the Apache License, Version 2.0 |
| + * (the "License"); you may not use this file except in compliance with |
| + * the License. You may obtain a copy of the License at |
| + * |
| + * http://www.apache.org/licenses/LICENSE-2.0 |
| + * |
| + * Unless required by applicable law or agreed to in writing, software |
| + * distributed under the License is distributed on an "AS IS" BASIS, |
| + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| + * See the License for the specific language governing permissions and |
| + * limitations under the License. |
| + */ |
| + |
| +import com.spatial4j.core.shape.Shape; |
| +import com.spatial4j.core.shape.SpatialRelation; |
| + |
| +import java.util.Iterator; |
| + |
| +/** |
| + * A filtering iterator of Cells. Those not matching the provided shape (disjoint) are |
| + * skipped. If {@code shapeFilter} is null then all cells are returned. |
| + * |
| + * @lucene.internal |
| + */ |
| +class FilterCellIterator extends CellIterator { |
| + final Iterator<Cell> baseIter; |
| + final Shape shapeFilter; |
| + |
| + FilterCellIterator(Iterator<Cell> baseIter, Shape shapeFilter) { |
| + this.baseIter = baseIter; |
| + this.shapeFilter = shapeFilter; |
| + } |
| + |
| + @Override |
| + public boolean hasNext() { |
| + thisCell = null; |
| + if (nextCell != null)//calling hasNext twice in a row |
| + return true; |
| + while (baseIter.hasNext()) { |
| + nextCell = baseIter.next(); |
| + if (shapeFilter == null) { |
| + return true; |
| + } else { |
| + SpatialRelation rel = nextCell.getShape().relate(shapeFilter); |
| + if (rel.intersects()) { |
| + nextCell.setShapeRel(rel); |
| + if (rel == SpatialRelation.WITHIN) |
| + nextCell.setLeaf(); |
| + return true; |
| + } |
| + } |
| + } |
| + return false; |
| + } |
| + |
| +} |