blob: b2895a88fe7d31b71e5bde34ee349d8cb2f60060 [file] [log] [blame]
Index: lucene/spatial/src/java/org/apache/lucene/spatial/serialized/SerializedDVStrategy.java
IDEA additional info:
Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP
<+>UTF-8
===================================================================
--- lucene/spatial/src/java/org/apache/lucene/spatial/serialized/SerializedDVStrategy.java (revision 9e87821edeb3e24ca8dedaecf856f6510d61d0d3)
+++ lucene/spatial/src/java/org/apache/lucene/spatial/serialized/SerializedDVStrategy.java (revision )
@@ -50,7 +50,7 @@
/**
* A SpatialStrategy based on serializing a Shape stored into BinaryDocValues.
- * This is not at all fast; it's designed to be used in conjuction with another index based
+ * This is not at all fast; it's designed to be used in conjunction with another index based
* SpatialStrategy that is approximated (like {@link org.apache.lucene.spatial.prefix.RecursivePrefixTreeStrategy})
* to add precision or eventually make more specific / advanced calculations on the per-document
* geometry.
Index: lucene/spatial/src/java/org/apache/lucene/spatial/prefix/CellTokenStream.java
IDEA additional info:
Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP
<+>UTF-8
===================================================================
--- lucene/spatial/src/java/org/apache/lucene/spatial/prefix/CellTokenStream.java (revision 9e87821edeb3e24ca8dedaecf856f6510d61d0d3)
+++ lucene/spatial/src/java/org/apache/lucene/spatial/prefix/CellTokenStream.java (revision )
@@ -111,7 +111,7 @@
if (omitLeafByte)
cell.getTokenBytesNoLeaf(bytes);
else
- cell.getTokenBytes(bytes);
+ cell.getTokenBytesWithLeaf(bytes);
return bytes.hashCode();
}
Index: lucene/spatial/src/java/org/apache/lucene/spatial/prefix/PointPrefixTreeFieldCacheProvider.java
IDEA additional info:
Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP
<+>UTF-8
===================================================================
--- lucene/spatial/src/java/org/apache/lucene/spatial/prefix/PointPrefixTreeFieldCacheProvider.java (revision 9e87821edeb3e24ca8dedaecf856f6510d61d0d3)
+++ lucene/spatial/src/java/org/apache/lucene/spatial/prefix/PointPrefixTreeFieldCacheProvider.java (revision )
@@ -24,28 +24,25 @@
import org.apache.lucene.util.BytesRef;
/**
- * Implementation of {@link ShapeFieldCacheProvider} designed for {@link PrefixTreeStrategy}s.
+ * Implementation of {@link ShapeFieldCacheProvider} designed for {@link PrefixTreeStrategy}s that index points.
*
- * Note, due to the fragmented representation of Shapes in these Strategies, this implementation
- * can only retrieve the central {@link Point} of the original Shapes.
- *
* @lucene.internal
*/
public class PointPrefixTreeFieldCacheProvider extends ShapeFieldCacheProvider<Point> {
- final SpatialPrefixTree grid; //
+ private final SpatialPrefixTree grid;
+ private Cell scanCell;
public PointPrefixTreeFieldCacheProvider(SpatialPrefixTree grid, String shapeField, int defaultSize) {
super( shapeField, defaultSize );
this.grid = grid;
+ this.scanCell = grid.getWorldCell();//re-used in readShape to save GC
}
- private Cell scanCell = null;//re-used in readShape to save GC
-
@Override
protected Point readShape(BytesRef term) {
- scanCell = grid.getCell(term.bytes, term.offset, term.length, scanCell);
- if (scanCell.getLevel() == grid.getMaxLevels() && !scanCell.isLeaf())
+ scanCell.readCell(term);
+ if (scanCell.getLevel() == grid.getMaxLevels() && !scanCell.isLeaf())//points are never flagged as leaf
return scanCell.getShape().getCenter();
return null;
}
Index: lucene/spatial/src/java/org/apache/lucene/spatial/prefix/RecursivePrefixTreeStrategy.java
IDEA additional info:
Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP
<+>UTF-8
===================================================================
--- lucene/spatial/src/java/org/apache/lucene/spatial/prefix/RecursivePrefixTreeStrategy.java (revision 9e87821edeb3e24ca8dedaecf856f6510d61d0d3)
+++ lucene/spatial/src/java/org/apache/lucene/spatial/prefix/RecursivePrefixTreeStrategy.java (revision )
@@ -17,14 +17,21 @@
* limitations under the License.
*/
+import com.spatial4j.core.shape.Point;
import com.spatial4j.core.shape.Shape;
import org.apache.lucene.search.Filter;
import org.apache.lucene.spatial.DisjointSpatialFilter;
+import org.apache.lucene.spatial.prefix.tree.Cell;
+import org.apache.lucene.spatial.prefix.tree.CellIterator;
+import org.apache.lucene.spatial.prefix.tree.LegacyCell;
import org.apache.lucene.spatial.prefix.tree.SpatialPrefixTree;
import org.apache.lucene.spatial.query.SpatialArgs;
import org.apache.lucene.spatial.query.SpatialOperation;
import org.apache.lucene.spatial.query.UnsupportedSpatialOperation;
+import java.util.ArrayList;
+import java.util.List;
+
/**
* A {@link PrefixTreeStrategy} which uses {@link AbstractVisitingPrefixTreeFilter}.
* This strategy has support for searching non-point shapes (note: not tested).
@@ -35,19 +42,26 @@
* @lucene.experimental
*/
public class RecursivePrefixTreeStrategy extends PrefixTreeStrategy {
+ /* Future potential optimizations:
- private int prefixGridScanLevel;
+ Each shape.relate(otherShape) result could be cached since much of the same relations will be invoked when
+ multiple segments are involved. Do this for "complex" shapes, not cheap ones, and don't cache when disjoint to
+ bbox because it's a cheap calc. This is one advantage TermQueryPrefixTreeStrategy has over RPT.
- /** True if only indexed points shall be supported. See
- * {@link IntersectsPrefixTreeFilter#hasIndexedLeaves}. */
- protected boolean pointsOnly = false;
+ */
- /** See {@link ContainsPrefixTreeFilter#multiOverlappingIndexedShapes}. */
+ protected int prefixGridScanLevel;
+
+ //Formerly known as simplifyIndexedCells. Eventually will be removed. Only compatible with RPT
+ // and a LegacyPrefixTree.
+ protected boolean pruneLeafyBranches = true;
+
+ protected boolean pointsOnly = false;//if true, there are no leaves
+
protected boolean multiOverlappingIndexedShapes = true;
public RecursivePrefixTreeStrategy(SpatialPrefixTree grid, String fieldName) {
- super(grid, fieldName,
- true);//simplify indexed cells
+ super(grid, fieldName);
prefixGridScanLevel = grid.getMaxLevels() - 4;//TODO this default constant is dependent on the prefix grid size
}
@@ -63,9 +77,88 @@
this.prefixGridScanLevel = prefixGridScanLevel;
}
+ /** True if only indexed points shall be supported. There are no "leafs" in such a case. See
+ * {@link IntersectsPrefixTreeFilter#hasIndexedLeaves}. */
+ public void setPointsOnly(boolean pointsOnly) {
+ this.pointsOnly = pointsOnly;
+ }
+
+ /** See {@link ContainsPrefixTreeFilter#multiOverlappingIndexedShapes}. */
+ public void setMultiOverlappingIndexedShapes(boolean multiOverlappingIndexedShapes) {
+ this.multiOverlappingIndexedShapes = multiOverlappingIndexedShapes;
+ }
+
+ /** An optional hint affecting non-point shapes: it will
+ * simplify/aggregate sets of complete leaves in a cell to its parent, resulting in ~20-25%
+ * fewer indexed cells. However, it will likely be removed in the future. (default=true)
+ */
+ public void setPruneLeafyBranches(boolean pruneLeafyBranches) {
+ this.pruneLeafyBranches = pruneLeafyBranches;
+ }
+
@Override
public String toString() {
- return getClass().getSimpleName()+"(prefixGridScanLevel:"+prefixGridScanLevel+",SPG:("+ grid +"))";
+ StringBuilder str = new StringBuilder(getClass().getSimpleName()).append('(');
+ str.append("SPG:(").append(grid.toString()).append(')');
+ if (pointsOnly)
+ str.append(",pointsOnly");
+ if (pruneLeafyBranches)
+ str.append(",pruneLeafyBranches");
+ if (prefixGridScanLevel != grid.getMaxLevels() - 4)
+ str.append(",prefixGridScanLevel:").append(""+prefixGridScanLevel);
+ if (!multiOverlappingIndexedShapes)
+ str.append(",!multiOverlappingIndexedShapes");
+ return str.append(')').toString();
+ }
+
+ @Override
+ protected CellTokenStream createTokenStream(Shape shape, int detailLevel) {
+ if (shape instanceof Point || !pruneLeafyBranches)
+ return super.createTokenStream(shape, detailLevel);
+
+ List<Cell> cells = new ArrayList<>(4096);
+ recursiveTraverseAndPrune(grid.getWorldCell(), shape, detailLevel, cells);
+ return new CellTokenStream().setCells(cells.iterator());
+ }
+
+ /** Returns true if cell was added as a leaf. If it wasn't it recursively descends. */
+ private boolean recursiveTraverseAndPrune(Cell cell, Shape shape, int detailLevel, List<Cell> result) {
+ // Important: this logic assumes Cells don't share anything with other cells when
+ // calling cell.getNextLevelCells(). This is only true for LegacyCell.
+ if (!(cell instanceof LegacyCell))
+ throw new IllegalStateException("pruneLeafyBranches must be disabled for use with grid "+grid);
+
+ if (cell.getLevel() == detailLevel) {
+ cell.setLeaf();//FYI might already be a leaf
+ }
+ if (cell.isLeaf()) {
+ result.add(cell);
+ return true;
+ }
+ if (cell.getLevel() != 0)
+ result.add(cell);
+
+ int leaves = 0;
+ CellIterator subCells = cell.getNextLevelCells(shape);
+ while (subCells.hasNext()) {
+ Cell subCell = subCells.next();
+ if (recursiveTraverseAndPrune(subCell, shape, detailLevel, result))
+ leaves++;
+ }
+ //can we prune?
+ if (leaves == ((LegacyCell)cell).getSubCellsSize() && cell.getLevel() != 0) {
+ //Optimization: substitute the parent as a leaf instead of adding all
+ // children as leaves
+
+ //remove the leaves
+ do {
+ result.remove(result.size() - 1);//remove last
+ } while (--leaves > 0);
+ //add cell as the leaf
+ cell.setLeaf();
+ return true;
+ }
+ return false;
}
@Override
Index: lucene/spatial/src/test/org/apache/lucene/spatial/prefix/SpatialOpRecursivePrefixTreeTest.java
IDEA additional info:
Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP
<+>UTF-8
===================================================================
--- lucene/spatial/src/test/org/apache/lucene/spatial/prefix/SpatialOpRecursivePrefixTreeTest.java (revision 9e87821edeb3e24ca8dedaecf856f6510d61d0d3)
+++ lucene/spatial/src/test/org/apache/lucene/spatial/prefix/SpatialOpRecursivePrefixTreeTest.java (revision )
@@ -33,6 +33,7 @@
import org.apache.lucene.search.Query;
import org.apache.lucene.spatial.StrategyTestCase;
import org.apache.lucene.spatial.prefix.tree.Cell;
+import org.apache.lucene.spatial.prefix.tree.CellIterator;
import org.apache.lucene.spatial.prefix.tree.GeohashPrefixTree;
import org.apache.lucene.spatial.prefix.tree.QuadPrefixTree;
import org.apache.lucene.spatial.prefix.tree.SpatialPrefixTree;
@@ -63,7 +64,7 @@
public class SpatialOpRecursivePrefixTreeTest extends StrategyTestCase {
- static final int ITERATIONS = 1;//Test Iterations
+ static final int ITERATIONS = 10;
private SpatialPrefixTree grid;
@@ -80,6 +81,8 @@
setupGeohashGrid(maxLevels);
//((PrefixTreeStrategy) strategy).setDistErrPct(0);//fully precise to grid
+ ((RecursivePrefixTreeStrategy)strategy).setPruneLeafyBranches(randomBoolean());
+
System.out.println("Strategy: " + strategy.toString());
}
@@ -378,11 +381,14 @@
double distErrPct = ((PrefixTreeStrategy) strategy).getDistErrPct();
double distErr = SpatialArgs.calcDistanceFromErrPct(snapMe, distErrPct, ctx);
int detailLevel = grid.getLevelForDistance(distErr);
- List<Cell> cells = grid.getCells(snapMe, detailLevel, false, true);
+ CellIterator cells = grid.getTreeCellIterator(snapMe, detailLevel);
//calc bounding box of cells.
- List<Shape> cellShapes = new ArrayList<>(cells.size());
- for (Cell cell : cells) {
+ List<Shape> cellShapes = new ArrayList<>(1024);
+ while (cells.hasNext()) {
+ Cell cell = cells.next();
+ if (!cell.isLeaf())
+ continue;
cellShapes.add(cell.getShape());
}
return new ShapeCollection<>(cellShapes, ctx).getBoundingBox();
Index: lucene/spatial/src/java/org/apache/lucene/spatial/prefix/tree/GeohashPrefixTree.java
IDEA additional info:
Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP
<+>UTF-8
===================================================================
--- lucene/spatial/src/java/org/apache/lucene/spatial/prefix/tree/GeohashPrefixTree.java (revision 9e87821edeb3e24ca8dedaecf856f6510d61d0d3)
+++ lucene/spatial/src/java/org/apache/lucene/spatial/prefix/tree/GeohashPrefixTree.java (revision )
@@ -22,12 +22,12 @@
import com.spatial4j.core.shape.Point;
import com.spatial4j.core.shape.Rectangle;
import com.spatial4j.core.shape.Shape;
+import org.apache.lucene.util.BytesRef;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
-
/**
* A {@link SpatialPrefixTree} based on
* <a href="http://en.wikipedia.org/wiki/Geohash">Geohashes</a>.
@@ -35,7 +35,7 @@
*
* @lucene.experimental
*/
-public class GeohashPrefixTree extends SpatialPrefixTree {
+public class GeohashPrefixTree extends LegacyPrefixTree {
/**
* Factory for creating {@link GeohashPrefixTree} instances with useful defaults
@@ -71,6 +71,11 @@
}
@Override
+ public Cell getWorldCell() {
+ return new GhCell(BytesRef.EMPTY_BYTES, 0, 0);
+ }
+
+ @Override
public int getLevelForDistance(double dist) {
if (dist == 0)
return maxLevels;//short circuit
@@ -79,15 +84,10 @@
}
@Override
- public Cell getCell(Point p, int level) {
+ protected Cell getCell(Point p, int level) {
return new GhCell(GeohashUtils.encodeLatLon(p.getY(), p.getX(), level));//args are lat,lon (y,x)
}
- @Override
- public Cell getCell(byte[] bytes, int offset, int len) {
- return new GhCell(bytes, offset, len);
- }
-
private static byte[] stringToBytesPlus1(String token) {
//copy ASCII token to byte array with one extra spot for eventual LEAF_BYTE if needed
byte[] bytes = new byte[token.length() + 1];
@@ -97,9 +97,8 @@
return bytes;
}
- class GhCell extends Cell {
+ private class GhCell extends LegacyCell {
- private Shape shape;//cache
private String geohash;//cache; never has leaf byte, simply a geohash
GhCell(String geohash) {
@@ -114,13 +113,12 @@
}
@Override
- protected SpatialPrefixTree getGrid() { return GeohashPrefixTree.this; }
+ protected GeohashPrefixTree getGrid() { return GeohashPrefixTree.this; }
@Override
- public void reset(byte[] bytes, int off, int len) {
- super.reset(bytes, off, len);
+ public void readCell(BytesRef bytesRef) {
+ super.readCell(bytesRef);
geohash = null;
- shape = null;
}
@Override
@@ -139,8 +137,8 @@
}
@Override
- public Cell getSubCell(Point p) {
- return getGrid().getCell(p, getLevel() + 1);//not performant!
+ protected GhCell getSubCell(Point p) {
+ return (GhCell) getGrid().getCell(p, getLevel() + 1);//not performant!
}
@Override
@@ -149,11 +147,6 @@
shape = GeohashUtils.decodeBoundary(getGeohash(), getGrid().getSpatialContext());
}
return shape;
- }
-
- @Override
- public Point getCenter() {
- return GeohashUtils.decode(getGeohash(), getGrid().getSpatialContext());
}
private String getGeohash() {
Index: lucene/spatial/src/java/org/apache/lucene/spatial/prefix/tree/LegacyCell.java
IDEA additional info:
Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP
<+>UTF-8
===================================================================
--- lucene/spatial/src/java/org/apache/lucene/spatial/prefix/tree/LegacyCell.java (revision )
+++ lucene/spatial/src/java/org/apache/lucene/spatial/prefix/tree/LegacyCell.java (revision )
@@ -0,0 +1,252 @@
+package org.apache.lucene.spatial.prefix.tree;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import com.spatial4j.core.shape.Point;
+import com.spatial4j.core.shape.Shape;
+import com.spatial4j.core.shape.SpatialRelation;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.StringHelper;
+
+import java.util.Collection;
+
+/** The base for the original two SPT's: Geohash & Quad. Don't subclass this for new SPTs.
+ * @lucene.internal */
+//public for RPT pruneLeafyBranches code
+public abstract class LegacyCell implements Cell {
+
+ // Important: A LegacyCell doesn't share state for getNextLevelCells(), and
+ // LegacySpatialPrefixTree assumes this in its simplify tree logic.
+
+ private static final byte LEAF_BYTE = '+';//NOTE: must sort before letters & numbers
+
+ //Arguably we could simply use a BytesRef, using an extra Object.
+ private byte[] bytes;//generally bigger to potentially hold a leaf
+ private int b_off;
+ private int b_len;//doesn't reflect leaf; same as getLevel()
+
+ protected boolean isLeaf;
+
+ /**
+ * When set via getSubCells(filter), it is the relationship between this cell
+ * and the given shape filter. Doesn't participate in shape equality.
+ */
+ protected SpatialRelation shapeRel;
+
+ protected Shape shape;//cached
+
+ /** Warning: Refers to the same bytes (no copy). If {@link #setLeaf()} is subsequently called then it
+ * may modify bytes. */
+ protected LegacyCell(byte[] bytes, int off, int len) {
+ this.bytes = bytes;
+ this.b_off = off;
+ this.b_len = len;
+ readLeafAdjust();
+ }
+
+ @Override
+ public void readCell(BytesRef bytes) {
+ shapeRel = null;
+ shape = null;
+ this.bytes = bytes.bytes;
+ this.b_off = bytes.offset;
+ this.b_len = bytes.length;
+ readLeafAdjust();
+ }
+
+ private void readLeafAdjust() {
+ isLeaf = (b_len > 0 && bytes[b_off + b_len - 1] == LEAF_BYTE);
+ if (isLeaf)
+ b_len--;
+ }
+
+// @Override
+// public void copyFrom(Cell source) {
+// LegacyCell src = (LegacyCell) source;
+// shapeRel = src.shapeRel;
+// shape = src.shape;
+// isLeaf = src.isLeaf;
+// //we don't actually copy the bytes because in LegacyCell the bytes aren't modified. (leaf byte doesn't count)
+// bytes = src.bytes;
+// b_off = src.b_off;
+// b_len = src.b_len;
+// }
+
+ protected abstract SpatialPrefixTree getGrid();
+
+ @Override
+ public SpatialRelation getShapeRel() {
+ return shapeRel;
+ }
+
+ @Override
+ public void setShapeRel(SpatialRelation rel) {
+ this.shapeRel = rel;
+ }
+
+ @Override
+ public boolean isLeaf() {
+ return isLeaf;
+ }
+
+ @Override
+ public void setLeaf() {
+ isLeaf = true;
+ }
+
+ @Override
+ public BytesRef getTokenBytesWithLeaf(BytesRef result) {
+ result = getTokenBytesNoLeaf(result);
+ if (!isLeaf)
+ return result;
+ if (result.bytes.length < result.offset + result.length + 1) {
+ assert false : "Not supposed to happen; performance bug";
+ byte[] copy = new byte[result.length + 1];
+ System.arraycopy(result.bytes, result.offset, copy, 0, result.length - 1);
+ result.bytes = copy;
+ result.offset = 0;
+ }
+ result.bytes[result.offset + result.length++] = LEAF_BYTE;
+ return result;
+ }
+
+ @Override
+ public BytesRef getTokenBytesNoLeaf(BytesRef result) {
+ if (result == null)
+ return new BytesRef(bytes, b_off, b_len);
+ result.bytes = bytes;
+ result.offset = b_off;
+ result.length = b_len;
+ return result;
+ }
+
+ @Override
+ public int getLevel() {
+ return b_len;
+ }
+
+ @Override
+ public CellIterator getNextLevelCells(Shape shapeFilter) {
+ assert getLevel() < getGrid().getMaxLevels();
+ if (shapeFilter instanceof Point) {
+ LegacyCell cell = getSubCell((Point) shapeFilter);
+ cell.shapeRel = SpatialRelation.CONTAINS;
+ return new SingletonCellIterator(cell);
+ } else {
+ return new FilterCellIterator(getSubCells().iterator(), shapeFilter);
+ }
+ }
+
+ /**
+ * Performant implementations are expected to implement this efficiently by
+ * considering the current cell's boundary.
+ * <p/>
+ * Precondition: Never called when getLevel() == maxLevel.
+ * Precondition: this.getShape().relate(p) != DISJOINT.
+ */
+ protected abstract LegacyCell getSubCell(Point p);
+
+ /**
+ * Gets the cells at the next grid cell level that covers this cell.
+ * Precondition: Never called when getLevel() == maxLevel.
+ *
+ * @return A set of cells (no dups), sorted, modifiable, not empty, not null.
+ */
+ protected abstract Collection<Cell> getSubCells();
+
+ /**
+ * {@link #getSubCells()}.size() -- usually a constant. Should be >=2
+ */
+ public abstract int getSubCellsSize();
+
+ @Override
+ public boolean isPrefixOf(Cell c) {
+ //Note: this only works when each level uses a whole number of bytes.
+ LegacyCell cell = (LegacyCell)c;
+ boolean result = sliceEquals(cell.bytes, cell.b_off, cell.b_len, bytes, b_off, b_len);
+ assert result == StringHelper.startsWith(c.getTokenBytesNoLeaf(null), getTokenBytesNoLeaf(null));
+ return result;
+ }
+
+ /** Copied from {@link org.apache.lucene.util.StringHelper#startsWith(BytesRef, BytesRef)}
+ * which calls this. This is to avoid creating a BytesRef. */
+ private static boolean sliceEquals(byte[] sliceToTest_bytes, int sliceToTest_offset, int sliceToTest_length,
+ byte[] other_bytes, int other_offset, int other_length) {
+ if (sliceToTest_length < other_length) {
+ return false;
+ }
+ int i = sliceToTest_offset;
+ int j = other_offset;
+ final int k = other_offset + other_length;
+
+ while (j < k) {
+ if (sliceToTest_bytes[i++] != other_bytes[j++]) {
+ return false;
+ }
+ }
+
+ return true;
+ }
+
+ @Override
+ public int compareToNoLeaf(Cell fromCell) {
+ LegacyCell b = (LegacyCell) fromCell;
+ return compare(bytes, b_off, b_len, b.bytes, b.b_off, b.b_len);
+ }
+
+ /** Copied from {@link BytesRef#compareTo(BytesRef)}.
+ * This is to avoid creating a BytesRef. */
+ private static int compare(byte[] aBytes, int aUpto, int a_length, byte[] bBytes, int bUpto, int b_length) {
+ final int aStop = aUpto + Math.min(a_length, b_length);
+ while(aUpto < aStop) {
+ int aByte = aBytes[aUpto++] & 0xff;
+ int bByte = bBytes[bUpto++] & 0xff;
+
+ int diff = aByte - bByte;
+ if (diff != 0) {
+ return diff;
+ }
+ }
+
+ // One is a prefix of the other, or, they are equal:
+ return a_length - b_length;
+ }
+
+ @Override
+ public boolean equals(Object obj) {
+ //this method isn't "normally" called; just in asserts/tests
+ if (obj instanceof Cell) {
+ Cell cell = (Cell) obj;
+ return getTokenBytesWithLeaf(null).equals(cell.getTokenBytesWithLeaf(null));
+ } else {
+ return false;
+ }
+ }
+
+ @Override
+ public int hashCode() {
+ return getTokenBytesWithLeaf(null).hashCode();
+ }
+
+ @Override
+ public String toString() {
+ //this method isn't "normally" called; just in asserts/tests
+ return getTokenBytesWithLeaf(null).utf8ToString();
+ }
+
+}
Index: lucene/spatial/src/java/org/apache/lucene/spatial/prefix/tree/SpatialPrefixTree.java
IDEA additional info:
Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP
<+>UTF-8
===================================================================
--- lucene/spatial/src/java/org/apache/lucene/spatial/prefix/tree/SpatialPrefixTree.java (revision 9e87821edeb3e24ca8dedaecf856f6510d61d0d3)
+++ lucene/spatial/src/java/org/apache/lucene/spatial/prefix/tree/SpatialPrefixTree.java (revision )
@@ -18,22 +18,12 @@
*/
import com.spatial4j.core.context.SpatialContext;
-import com.spatial4j.core.shape.Point;
-import com.spatial4j.core.shape.Rectangle;
import com.spatial4j.core.shape.Shape;
import org.apache.lucene.util.BytesRef;
-import java.nio.charset.Charset;
-import java.nio.charset.StandardCharsets;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.Collection;
-import java.util.Collections;
-import java.util.List;
-
/**
* A spatial Prefix Tree, or Trie, which decomposes shapes into prefixed strings
- * at variable lengths corresponding to variable precision. Each string
+ * at variable lengths corresponding to variable precision. Each string
* corresponds to a rectangular spatial region. This approach is
* also referred to "Grids", "Tiles", and "Spatial Tiers".
* <p/>
@@ -44,8 +34,6 @@
*/
public abstract class SpatialPrefixTree {
- protected static final Charset UTF8 = StandardCharsets.UTF_8;
-
protected final int maxLevels;
protected final SpatialContext ctx;
@@ -83,155 +71,40 @@
/**
* Given a cell having the specified level, returns the distance from opposite
- * corners. Since this might very depending on where the cell is, this method
+ * corners. Since this might vary depending on where the cell is, this method
* may over-estimate.
*
* @param level [1 to maxLevels]
* @return > 0
*/
- public double getDistanceForLevel(int level) {
- if (level < 1 || level > getMaxLevels())
- throw new IllegalArgumentException("Level must be in 1 to maxLevels range");
- //TODO cache for each level
- Cell cell = getCell(ctx.getWorldBounds().getCenter(), level);
- Rectangle bbox = cell.getShape().getBoundingBox();
- double width = bbox.getWidth();
- double height = bbox.getHeight();
- //Use standard cartesian hypotenuse. For geospatial, this answer is larger
- // than the correct one but it's okay to over-estimate.
- return Math.sqrt(width * width + height * height);
- }
+ public abstract double getDistanceForLevel(int level);
- private transient Cell worldCell;//cached
-
/**
- * Returns the level 0 cell which encompasses all spatial data. Equivalent to {@link #getCell(byte[], int, int)} with
- * no bytes. This cell is thread-safe, just like a spatial prefix grid is, although cells aren't
- * generally thread-safe.
+ * Returns the level 0 cell which encompasses all spatial data. Equivalent to {@link Cell#readCell(BytesRef)}
+ * with no bytes.
*/
- public Cell getWorldCell() {//another possible name: getTopCell
- if (worldCell == null) {
- worldCell = getCell(BytesRef.EMPTY_BYTES, 0, 0);
- worldCell.getShape();//lazy load; make thread-safe
- }
- return worldCell;
- }
+ public abstract Cell getWorldCell(); //another possible name: getTopCell
/**
- * The cell for the specified token. The empty string should be equal to {@link #getWorldCell()}.
- * Precondition: Never called when token length > maxLevel.
- */
- public abstract Cell getCell(byte[] bytes, int offset, int len);
-
- public final Cell getCell(byte[] bytes, int offset, int len, Cell target) {
- if (target == null) {
- return getCell(bytes, offset, len);
- }
-
- target.reset(bytes, offset, len);
- return target;
- }
-
- /**
- * Returns the cell containing point {@code p} at the specified {@code level}.
- */
- protected Cell getCell(Point p, int level) {
- return getCells(p, level, false).get(0);
- }
-
- /**
* Gets the intersecting cells for the specified shape, without exceeding
* detail level. If a cell is within the query shape then it's marked as a
- * leaf and none of its children are added.
- * <p/>
- * This implementation checks if shape is a Point and if so returns {@link
- * #getCells(com.spatial4j.core.shape.Point, int, boolean)}.
+ * leaf and none of its children are added. For cells at detailLevel, they are marked as
+ * leaves too, unless it's a point.
+ * <p>
+ * IMPORTANT: Cells returned from the iterator can be re-used for cells at the same level. So you can't simply
+ * iterate to subsequent cells and still refer to the former cell nor the bytes returned from the former cell, unless
+ * you know the former cell is a parent.
*
- * @param shape the shape; non-null
+ * @param shape the shape; possibly null but the caller should liberally call
+ * {@code remove()} if so.
* @param detailLevel the maximum detail level to get cells for
- * @param inclParents if true then all parent cells of leaves are returned
- * too. The top world cell is never returned.
- * @param simplify for non-point shapes, this will simply/aggregate sets of
- * complete leaves in a cell to its parent, resulting in
- * ~20-25% fewer cells.
- * @return a set of cells (no dups), sorted, immutable, non-null
+ * @return the matching cells
*/
- public List<Cell> getCells(Shape shape, int detailLevel, boolean inclParents,
- boolean simplify) {
- //TODO consider an on-demand iterator -- it won't build up all cells in memory.
+ public CellIterator getTreeCellIterator(Shape shape, int detailLevel) {
if (detailLevel > maxLevels) {
throw new IllegalArgumentException("detailLevel > maxLevels");
}
- if (shape instanceof Point) {
- return getCells((Point) shape, detailLevel, inclParents);
- }
- List<Cell> cells = new ArrayList<>(inclParents ? 4096 : 2048);
- recursiveGetCells(getWorldCell(), shape, detailLevel, inclParents, simplify, cells);
- return cells;
- }
-
- /**
- * Returns true if cell was added as a leaf. If it wasn't it recursively
- * descends.
- */
- private boolean recursiveGetCells(Cell cell, Shape shape, int detailLevel,
- boolean inclParents, boolean simplify,
- List<Cell> result) {
- if (cell.getLevel() == detailLevel) {
- cell.setLeaf();//FYI might already be a leaf
- }
- if (cell.isLeaf()) {
- result.add(cell);
- return true;
- }
- if (inclParents && cell.getLevel() != 0)
- result.add(cell);
-
- Collection<Cell> subCells = cell.getSubCells(shape);
- int leaves = 0;
- for (Cell subCell : subCells) {
- if (recursiveGetCells(subCell, shape, detailLevel, inclParents, simplify, result))
- leaves++;
- }
- //can we simplify?
- if (simplify && leaves == cell.getSubCellsSize() && cell.getLevel() != 0) {
- //Optimization: substitute the parent as a leaf instead of adding all
- // children as leaves
-
- //remove the leaves
- do {
- result.remove(result.size() - 1);//remove last
- } while (--leaves > 0);
- //add cell as the leaf
- cell.setLeaf();
- if (!inclParents) // otherwise it was already added up above
- result.add(cell);
- return true;
- }
- return false;
- }
-
- /**
- * A Point-optimized implementation of
- * {@link #getCells(com.spatial4j.core.shape.Shape, int, boolean, boolean)}. That
- * method in facts calls this for points.
- */
- public List<Cell> getCells(Point p, int detailLevel, boolean inclParents) {
- Cell cell = getCell(p, detailLevel);
- assert !cell.isLeaf();
- if (!inclParents || detailLevel == 1) {
- return Collections.singletonList(cell);
- }
-
- //fill in reverse order to be sorted
- Cell[] cells = new Cell[detailLevel];
- for (int i = detailLevel-1; true; i--) {
- cells[i] = cell;
- if (i == 0)
- break;
- cell = cell.getParent();
- }
- return Arrays.asList(cells);
+ return new TreeCellIterator(shape, detailLevel, getWorldCell());
}
}
Index: lucene/spatial/src/java/org/apache/lucene/spatial/prefix/ContainsPrefixTreeFilter.java
IDEA additional info:
Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP
<+>UTF-8
===================================================================
--- lucene/spatial/src/java/org/apache/lucene/spatial/prefix/ContainsPrefixTreeFilter.java (revision 9e87821edeb3e24ca8dedaecf856f6510d61d0d3)
+++ lucene/spatial/src/java/org/apache/lucene/spatial/prefix/ContainsPrefixTreeFilter.java (revision )
@@ -24,6 +24,7 @@
import org.apache.lucene.search.DocIdSet;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.spatial.prefix.tree.Cell;
+import org.apache.lucene.spatial.prefix.tree.CellIterator;
import org.apache.lucene.spatial.prefix.tree.SpatialPrefixTree;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
@@ -31,7 +32,6 @@
import java.io.IOException;
import java.util.Arrays;
-import java.util.Collection;
/**
* Finds docs where its indexed shape {@link org.apache.lucene.spatial.query.SpatialOperation#Contains
@@ -84,7 +84,7 @@
}
BytesRef termBytes = new BytesRef();//no leaf
- Cell nextCell;//see getLeafDocs
+ Cell nextCell = grid.getWorldCell();//see getLeafDocs
/** This is the primary algorithm; recursive. Returns null if finds none. */
private SmallDocSet visit(Cell cell, Bits acceptContains) throws IOException {
@@ -103,8 +103,9 @@
subCellsFilter = null;
assert cell.getShape().relate(queryShape) == SpatialRelation.WITHIN;
}
- Collection <Cell> subCells = cell.getSubCells(subCellsFilter);
- for (Cell subCell : subCells) {
+ CellIterator subCells = cell.getNextLevelCells(subCellsFilter);
+ while (subCells.hasNext()) {
+ Cell subCell = subCells.next();
if (!seekExact(subCell))
combinedSubResults = null;
else if (subCell.getLevel() == detailLevel)
@@ -131,9 +132,9 @@
private boolean seekExact(Cell cell) throws IOException {
assert cell.getTokenBytesNoLeaf(null).compareTo(termBytes) > 0;
- cell.getTokenBytesNoLeaf(termBytes);
if (termsEnum == null)
return false;
+ termBytes = cell.getTokenBytesNoLeaf(termBytes);
return termsEnum.seekExact(termBytes);
}
@@ -157,7 +158,8 @@
termsEnum = null;//signals all done
return null;
}
- nextCell = grid.getCell(nextTerm.bytes, nextTerm.offset, nextTerm.length, nextCell);
+ nextCell.readCell(nextTerm);
+ assert leafCell.isPrefixOf(nextCell);
if (nextCell.getLevel() == leafCell.getLevel() && nextCell.isLeaf()) {
return collectDocs(acceptContains);
} else {
Index: lucene/spatial/src/java/org/apache/lucene/spatial/prefix/tree/LegacyPrefixTree.java
IDEA additional info:
Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP
<+>UTF-8
===================================================================
--- lucene/spatial/src/java/org/apache/lucene/spatial/prefix/tree/LegacyPrefixTree.java (revision )
+++ lucene/spatial/src/java/org/apache/lucene/spatial/prefix/tree/LegacyPrefixTree.java (revision )
@@ -0,0 +1,77 @@
+package org.apache.lucene.spatial.prefix.tree;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import com.spatial4j.core.context.SpatialContext;
+import com.spatial4j.core.shape.Point;
+import com.spatial4j.core.shape.Rectangle;
+import com.spatial4j.core.shape.Shape;
+import org.apache.lucene.util.BytesRef;
+
+import java.util.Arrays;
+
+/** The base for the original two SPT's: Geohash & Quad. Don't subclass this for new SPTs.
+ * @lucene.internal */
+abstract class LegacyPrefixTree extends SpatialPrefixTree {
+ public LegacyPrefixTree(SpatialContext ctx, int maxLevels) {
+ super(ctx, maxLevels);
+ }
+
+ public double getDistanceForLevel(int level) {
+ if (level < 1 || level > getMaxLevels())
+ throw new IllegalArgumentException("Level must be in 1 to maxLevels range");
+ //TODO cache for each level
+ Cell cell = getCell(ctx.getWorldBounds().getCenter(), level);
+ Rectangle bbox = cell.getShape().getBoundingBox();
+ double width = bbox.getWidth();
+ double height = bbox.getHeight();
+ //Use standard cartesian hypotenuse. For geospatial, this answer is larger
+ // than the correct one but it's okay to over-estimate.
+ return Math.sqrt(width * width + height * height);
+ }
+
+ /**
+ * Returns the cell containing point {@code p} at the specified {@code level}.
+ */
+ protected abstract Cell getCell(Point p, int level);
+
+ @Override
+ public CellIterator getTreeCellIterator(Shape shape, int detailLevel) {
+ if (!(shape instanceof Point))
+ return super.getTreeCellIterator(shape, detailLevel);
+
+ //This specialization is here because the legacy implementations don't have a fast implementation of
+ // cell.getSubCells(point). It's fastest here to encode the full bytes for detailLevel, and create
+ // subcells from the bytesRef in a loop. This avoids an O(N^2) encode, and we have O(N) instead.
+
+ Cell cell = getCell((Point) shape, detailLevel);
+ assert !cell.isLeaf() && cell instanceof LegacyCell;
+ BytesRef fullBytes = cell.getTokenBytesNoLeaf(null);
+ //fill in reverse order to be sorted
+ Cell[] cells = new Cell[detailLevel];
+ for (int i = 1; i < detailLevel; i++) {
+ Cell parentCell = getWorldCell();
+ fullBytes.length = i;
+ parentCell.readCell(fullBytes);
+ cells[i-1] = parentCell;
+ }
+ cells[detailLevel-1] = cell;
+ return new FilterCellIterator(Arrays.asList(cells).iterator(), null);//null filter
+ }
+
+}
Index: lucene/spatial/src/java/org/apache/lucene/spatial/prefix/PrefixTreeStrategy.java
IDEA additional info:
Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP
<+>UTF-8
===================================================================
--- lucene/spatial/src/java/org/apache/lucene/spatial/prefix/PrefixTreeStrategy.java (revision 9e87821edeb3e24ca8dedaecf856f6510d61d0d3)
+++ lucene/spatial/src/java/org/apache/lucene/spatial/prefix/PrefixTreeStrategy.java (revision )
@@ -19,6 +19,7 @@
import com.spatial4j.core.shape.Point;
import com.spatial4j.core.shape.Shape;
+import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.index.FieldInfo;
@@ -29,7 +30,7 @@
import org.apache.lucene.spatial.query.SpatialArgs;
import org.apache.lucene.spatial.util.ShapeFieldCacheDistanceValueSource;
-import java.util.List;
+import java.util.Iterator;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
@@ -75,14 +76,12 @@
public abstract class PrefixTreeStrategy extends SpatialStrategy {
protected final SpatialPrefixTree grid;
private final Map<String, PointPrefixTreeFieldCacheProvider> provider = new ConcurrentHashMap<>();
- protected final boolean simplifyIndexedCells;
protected int defaultFieldValuesArrayLen = 2;
protected double distErrPct = SpatialArgs.DEFAULT_DISTERRPCT;// [ 0 TO 0.5 ]
- public PrefixTreeStrategy(SpatialPrefixTree grid, String fieldName, boolean simplifyIndexedCells) {
+ public PrefixTreeStrategy(SpatialPrefixTree grid, String fieldName) {
super(grid.getSpatialContext(), fieldName);
this.grid = grid;
- this.simplifyIndexedCells = simplifyIndexedCells;
}
/**
@@ -120,15 +119,23 @@
return createIndexableFields(shape, distErr);
}
+ /**
+ * Turns {@link SpatialPrefixTree#getTreeCellIterator(Shape, int)} into a
+ * {@link org.apache.lucene.analysis.TokenStream}.
+ * {@code simplifyIndexedCells} is an optional hint affecting non-point shapes: it will
+ * simply/aggregate sets of complete leaves in a cell to its parent, resulting in ~20-25%
+ * fewer cells. It will likely be removed in the future.
+ */
public Field[] createIndexableFields(Shape shape, double distErr) {
int detailLevel = grid.getLevelForDistance(distErr);
- // note: maybe CellTokenStream should do this line, but it doesn't matter and it would create extra
- // coupling
- List<Cell> cells = grid.getCells(shape, detailLevel, true, simplifyIndexedCells);//intermediates cells
-
- Field field = new Field(getFieldName(),
- new CellTokenStream().setCells(cells.iterator()), FIELD_TYPE);
+ TokenStream tokenStream = createTokenStream(shape, detailLevel);
+ Field field = new Field(getFieldName(), tokenStream, FIELD_TYPE);
return new Field[]{field};
+ }
+
+ protected CellTokenStream createTokenStream(Shape shape, int detailLevel) {
+ Iterator<Cell> cells = grid.getTreeCellIterator(shape, detailLevel);
+ return new CellTokenStream().setCells(cells);
}
/* Indexed, tokenized, not stored. */
Index: lucene/spatial/src/java/org/apache/lucene/spatial/prefix/AbstractVisitingPrefixTreeFilter.java
IDEA additional info:
Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP
<+>UTF-8
===================================================================
--- lucene/spatial/src/java/org/apache/lucene/spatial/prefix/AbstractVisitingPrefixTreeFilter.java (revision 9e87821edeb3e24ca8dedaecf856f6510d61d0d3)
+++ lucene/spatial/src/java/org/apache/lucene/spatial/prefix/AbstractVisitingPrefixTreeFilter.java (revision )
@@ -22,6 +22,7 @@
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.search.DocIdSet;
import org.apache.lucene.spatial.prefix.tree.Cell;
+import org.apache.lucene.spatial.prefix.tree.CellIterator;
import org.apache.lucene.spatial.prefix.tree.SpatialPrefixTree;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
@@ -46,6 +47,11 @@
//Historical note: this code resulted from a refactoring of RecursivePrefixTreeFilter,
// which in turn came out of SOLR-2155
+ //This class perhaps could have been implemented in terms of FilteredTermsEnum & MultiTermQuery
+ // & MultiTermQueryWrapperFilter. Maybe so for simple Intersects predicate but not for when we want to collect terms
+ // differently depending on cell state like IsWithin and for fuzzy/accurate collection planned improvements. At
+ // least it would just make things more complicated.
+
protected final int prefixGridScanLevel;//at least one less than grid.getMaxLevels()
public AbstractVisitingPrefixTreeFilter(Shape queryShape, String fieldName, SpatialPrefixTree grid,
@@ -108,16 +114,21 @@
does act as a short-circuit. So maybe do some percent of the time or when the level
is above some threshold.
- * Each shape.relate(otherShape) result could be cached since much of the same relations
- will be invoked when multiple segments are involved.
+ * Once we don't have redundant non-leaves indexed with leaf cells (LUCENE-4942), we can
+ sometimes know to call next() instead of seek() if we're processing a leaf cell that
+ didn't have a corresponding non-leaf.
*/
+ //
+ // TODO MAJOR REFACTOR SIMPLIFICATION BASED ON TreeCellIterator TODO
+ //
+
protected final boolean hasIndexedLeaves;//if false then we can skip looking for them
private VNode curVNode;//current pointer, derived from query shape
private BytesRef curVNodeTerm = new BytesRef();//curVNode.cell's term, without leaf
- private Cell scanCell;
+ private Cell scanCell = grid.getWorldCell();
private BytesRef thisTerm;//the result of termsEnum.term()
@@ -213,8 +224,8 @@
if (hasIndexedLeaves && cell.getLevel() != 0) {
//If the next indexed term just adds a leaf marker ('+') to cell,
// then add all of those docs
- assert curVNode.cell.isWithin(curVNodeTerm, thisTerm);
- scanCell = grid.getCell(thisTerm.bytes, thisTerm.offset, thisTerm.length, scanCell);
+ scanCell.readCell(thisTerm);
+ assert curVNode.cell.isPrefixOf(scanCell);
if (scanCell.getLevel() == cell.getLevel() && scanCell.isLeaf()) {
visitLeaf(scanCell);
//advance
@@ -251,8 +262,8 @@
* guaranteed to have an intersection and thus this must return some number
* of nodes.
*/
- protected Iterator<Cell> findSubCellsToVisit(Cell cell) {
- return cell.getSubCells(queryShape).iterator();
+ protected CellIterator findSubCellsToVisit(Cell cell) {
+ return cell.getNextLevelCells(queryShape);
}
/**
@@ -262,10 +273,12 @@
* #visitScanned(org.apache.lucene.spatial.prefix.tree.Cell)}.
*/
protected void scan(int scanDetailLevel) throws IOException {
- for (;
+ for ( ;
- thisTerm != null && curVNode.cell.isWithin(curVNodeTerm, thisTerm);
+ thisTerm != null;
- thisTerm = termsEnum.next()) {
+ thisTerm = termsEnum.next()) {
- scanCell = grid.getCell(thisTerm.bytes, thisTerm.offset, thisTerm.length, scanCell);
+ scanCell.readCell(thisTerm);
+ if (!curVNode.cell.isPrefixOf(scanCell))
+ break;
int termLevel = scanCell.getLevel();
if (termLevel < scanDetailLevel) {
Index: lucene/spatial/src/java/org/apache/lucene/spatial/prefix/tree/CellIterator.java
IDEA additional info:
Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP
<+>UTF-8
===================================================================
--- lucene/spatial/src/java/org/apache/lucene/spatial/prefix/tree/CellIterator.java (revision )
+++ lucene/spatial/src/java/org/apache/lucene/spatial/prefix/tree/CellIterator.java (revision )
@@ -0,0 +1,77 @@
+package org.apache.lucene.spatial.prefix.tree;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.util.Iterator;
+import java.util.NoSuchElementException;
+
+/**
+ * An Iterator of SpatialPrefixTree Cells. The order is always sorted without duplicates.
+ *
+ * @lucene.experimental
+ */
+public abstract class CellIterator implements Iterator<Cell> {
+
+ //note: nextCell or thisCell can be non-null but neither at the same time. That's
+ // because they might return the same instance when re-used!
+
+ protected Cell nextCell;//to be returned by next(), and null'ed after
+ protected Cell thisCell;//see next() & thisCell(). Should be cleared in hasNext().
+
+ /** Returns the cell last returned from {@link #next()}. It's cleared by hasNext(). */
+ public Cell thisCell() {
+ assert thisCell != null : "Only call thisCell() after next(), not hasNext()";
+ return thisCell;
+ }
+
+ // Arguably this belongs here and not on Cell
+ //public SpatialRelation getShapeRel()
+
+ /**
+ * Gets the next cell that is >= {@code fromCell}, compared using non-leaf bytes. If it returns null then
+ * the iterator is exhausted.
+ */
+ public Cell nextFrom(Cell fromCell) {
+ while (true) {
+ if (!hasNext())
+ return null;
+ Cell c = next();//will update thisCell
+ if (c.compareToNoLeaf(fromCell) >= 0) {
+ return c;
+ }
+ }
+ }
+
+ /** This prevents sub-cells (those underneath the current cell) from being iterated to,
+ * if applicable, otherwise a NO-OP. */
+ @Override
+ public void remove() {
+ assert thisCell != null;
+ }
+
+ @Override
+ public final Cell next() {
+ if (nextCell == null) {
+ if (!hasNext())
+ throw new NoSuchElementException();
+ }
+ thisCell = nextCell;
+ nextCell = null;
+ return thisCell;
+ }
+}
Index: lucene/spatial/src/java/org/apache/lucene/spatial/prefix/tree/Cell.java
IDEA additional info:
Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP
<+>UTF-8
===================================================================
--- lucene/spatial/src/java/org/apache/lucene/spatial/prefix/tree/Cell.java (revision 9e87821edeb3e24ca8dedaecf856f6510d61d0d3)
+++ lucene/spatial/src/java/org/apache/lucene/spatial/prefix/tree/Cell.java (revision )
@@ -17,231 +17,103 @@
* limitations under the License.
*/
-import com.spatial4j.core.shape.Point;
import com.spatial4j.core.shape.Shape;
import com.spatial4j.core.shape.SpatialRelation;
import org.apache.lucene.util.BytesRef;
-import org.apache.lucene.util.StringHelper;
-import java.util.ArrayList;
-import java.util.Collection;
-import java.util.Collections;
-import java.util.List;
-
/**
- * Represents a grid cell. These are not necessarily thread-safe, although calling {@link #getShape()} will
- * sufficiently prepare it to be so, if needed.
+ * Represents a grid cell. Cell instances are generally very transient and may be re-used
+ * internally. To get an instance, you could start with {@link SpatialPrefixTree#getWorldCell()}.
+ * And from there you could either traverse down the tree with {@link #getNextLevelCells(com.spatial4j.core.shape.Shape)},
+ * or you could read an indexed term via {@link #readCell(org.apache.lucene.util.BytesRef)}.
+ * When a cell is read from a term, it is comprised of just the base bytes plus optionally a leaf flag.
*
* @lucene.experimental
*/
-public abstract class Cell {
+public interface Cell {
- private static final byte LEAF_BYTE = '+';//NOTE: must sort before letters & numbers
-
- //Arguably we could simply use a BytesRef, using an extra Object.
- private byte[] bytes;
- private int b_off;
- private int b_len;
-
- /**
- * When set via getSubCells(filter), it is the relationship between this cell
- * and the given shape filter. Doesn't participate in shape equality.
+ /** This initializes the cell with the given bytes and clears any previous state.
+ * Warning: Refers to the same byte array (no copy). If {@link #setLeaf()} is subsequently called,
+ * then it may modify these bytes.
+ *
+ * @param bytes the bytes of the new cell. WARNING: copy by reference.
*/
- protected SpatialRelation shapeRel;
+ void readCell(BytesRef bytes);
- /** Warning: Refers to the same bytes (no copy). If {@link #setLeaf()} is subsequently called then it
- * may modify bytes. */
- protected Cell(byte[] bytes, int off, int len) {
- this.bytes = bytes;
- this.b_off = off;
- this.b_len = len;
- }
+// If we bring this back; perhaps do so as a method that un-shares its internal state: void unshare();
+// /** Resets the state of this cell such that it is identical to {@code source}. This can be used for
+// * cloning a cell to have a safe copy, and it also might be used to position this cell
+// * before calling {@link #readCell(org.apache.lucene.util.BytesRef)} in a loop if you know the first term
+// * is going to be close to some other cell, thereby saving some computations. */
+// void copyFrom(Cell source);
- /** Warning: Refers to the same bytes (no copy). If {@link #setLeaf()} is subsequently called then it
- * may modify bytes. */
- public void reset(byte[] bytes, int off, int len) {
- assert getLevel() != 0;
- shapeRel = null;
- this.bytes = bytes;
- this.b_off = off;
- this.b_len = len;
- }
+ /** Gets the relationship this cell has with the shape from which it was filtered from, assuming it came from a
+ * {@link CellIterator}. Arguably it belongs there but it's very convenient here. */
+ SpatialRelation getShapeRel();
- protected abstract SpatialPrefixTree getGrid();
+ /** See {@link #getShapeRel()}.
+ * @lucene.internal */
+ void setShapeRel(SpatialRelation rel);
- public SpatialRelation getShapeRel() {
- return shapeRel;
- }
-
/**
- * For points, this is always false. Otherwise this is true if there are no
+ * Some cells are flagged as leaves, which are indexed as such. A leaf cell is either within some
+ * shape or it both intersects and the cell is at an accuracy threshold such that no smaller cells
+ * for the shape will be represented.
* further cells with this prefix for the shape (always true at maxLevels).
*/
- public boolean isLeaf() {
- return (b_len > 0 && bytes[b_off + b_len - 1] == LEAF_BYTE);
- }
+ boolean isLeaf();
- /** Modifies the bytes to reflect that this is a leaf. Warning: never invoke from a cell
+ /** Set this cell to be a leaf. Warning: never call on a cell
* initialized to reference the same bytes from termsEnum, which should be treated as immutable.
- * Note: not supported at level 0. */
- public void setLeaf() {
- assert getLevel() != 0;
- if (isLeaf())
- return;
- //if isn't big enough, we have to copy
- if (bytes.length < b_off + b_len) {
- //hopefully this copying doesn't happen too much (DWS: I checked and it doesn't seem to happen)
- byte[] copy = new byte[b_len + 1];
- System.arraycopy(bytes, b_off, copy, 0, b_len);
- copy[b_len++] = LEAF_BYTE;
- bytes = copy;
- b_off = 0;
- } else {
- bytes[b_off + b_len++] = LEAF_BYTE;
- }
- }
+ * Note: not supported at level 0.
+ * @lucene.internal */
+ void setLeaf();
/**
- * Returns the bytes for this cell.
+ * Returns the bytes for this cell, with a leaf byte if this is a leaf cell.
* The result param is used to save object allocation, though it's bytes aren't used.
* @param result where the result goes, or null to create new
*/
- public BytesRef getTokenBytes(BytesRef result) {
- if (result == null)
- result = new BytesRef();
- result.bytes = bytes;
- result.offset = b_off;
- result.length = b_len;
- return result;
- }
+ BytesRef getTokenBytesWithLeaf(BytesRef result);
/**
- * Returns the bytes for this cell, without leaf set. The bytes should sort before any
- * cells that have the leaf set for the spatial location.
+ * Returns the bytes for this cell, without leaf set. The bytes should sort before
+ * {@link #getTokenBytesWithLeaf(org.apache.lucene.util.BytesRef)}.
* The result param is used to save object allocation, though it's bytes aren't used.
* @param result where the result goes, or null to create new
*/
- public BytesRef getTokenBytesNoLeaf(BytesRef result) {
- result = getTokenBytes(result);
- if (isLeaf())
- result.length--;
- return result;
- }
+ BytesRef getTokenBytesNoLeaf(BytesRef result);
/** Level 0 is the world (and has no parent), from then on a higher level means a smaller
* cell than the level before it.
*/
- public int getLevel() {
- return isLeaf() ? b_len - 1 : b_len;
- }
+ int getLevel();
- /** Gets the parent cell that contains this one. Don't call on the world cell. */
- public Cell getParent() {
- assert getLevel() > 0;
- return getGrid().getCell(bytes, b_off, b_len - (isLeaf() ? 2 : 1));
- }
-
/**
- * Like {@link #getSubCells()} but with the results filtered by a shape. If
- * that shape is a {@link com.spatial4j.core.shape.Point} then it must call
- * {@link #getSubCell(com.spatial4j.core.shape.Point)}. The returned cells
- * should have {@link Cell#getShapeRel()} set to their relation with {@code
- * shapeFilter}. In addition, {@link Cell#isLeaf()}
+ * Gets the cells at the next grid cell level underneath this one, optionally filtered by
+ * {@code shapeFilter}. The returned cells should have {@link #getShapeRel()} set to
+ * their relation with {@code shapeFilter}. In addition, for non-points {@link #isLeaf()}
* must be true when that relation is WITHIN.
* <p/>
+ * IMPORTANT: Cells returned from this iterator can be shared, as well as the bytes.
+ * <p/>
* Precondition: Never called when getLevel() == maxLevel.
*
* @param shapeFilter an optional filter for the returned cells.
* @return A set of cells (no dups), sorted. Not Modifiable.
*/
- public Collection<Cell> getSubCells(Shape shapeFilter) {
- //Note: Higher-performing subclasses might override to consider the shape filter to generate fewer cells.
- if (shapeFilter instanceof Point) {
- Cell subCell = getSubCell((Point) shapeFilter);
- subCell.shapeRel = SpatialRelation.CONTAINS;
- return Collections.singletonList(subCell);
- }
- Collection<Cell> cells = getSubCells();
+ CellIterator getNextLevelCells(Shape shapeFilter);
- if (shapeFilter == null) {
- return cells;
- }
+ /** Gets the shape for this cell; typically a Rectangle. */
+ Shape getShape();
- //TODO change API to return a filtering iterator
- List<Cell> copy = new ArrayList<>(cells.size());
- for (Cell cell : cells) {
- SpatialRelation rel = cell.getShape().relate(shapeFilter);
- if (rel == SpatialRelation.DISJOINT)
- continue;
- cell.shapeRel = rel;
- if (rel == SpatialRelation.WITHIN)
- cell.setLeaf();
- copy.add(cell);
- }
- return copy;
- }
-
/**
- * Performant implementations are expected to implement this efficiently by
- * considering the current cell's boundary. Precondition: Never called when
- * getLevel() == maxLevel.
- * <p/>
- * Precondition: this.getShape().relate(p) != DISJOINT.
+ * Returns if the target term is within/underneath this cell; not necessarily a direct
+ * descendant.
+ * @param c the term
*/
- public abstract Cell getSubCell(Point p);
+ boolean isPrefixOf(Cell c);
- /**
- * Gets the cells at the next grid cell level that cover this cell.
- * Precondition: Never called when getLevel() == maxLevel.
- *
- * @return A set of cells (no dups), sorted, modifiable, not empty, not null.
- */
- protected abstract Collection<Cell> getSubCells();
-
- /**
- * {@link #getSubCells()}.size() -- usually a constant. Should be >=2
- */
- public abstract int getSubCellsSize();
-
- /** Gets the shape for this cell; typically a Rectangle. This method also serves to trigger any lazy
- * loading needed to make the cell instance thread-safe.
- */
- public abstract Shape getShape();
-
- /** TODO remove once no longer used. */
- public Point getCenter() {
- return getShape().getCenter();
- }
-
- @Override
- public boolean equals(Object obj) {
- //this method isn't "normally" called; just in asserts/tests
- if (obj instanceof Cell) {
- Cell cell = (Cell) obj;
- return getTokenBytes(null).equals(cell.getTokenBytes(null));
- } else {
- return false;
- }
- }
-
- @Override
- public int hashCode() {
- return getTokenBytesNoLeaf(null).hashCode();
- }
-
- @Override
- public String toString() {
- //this method isn't "normally" called; just in asserts/tests
- return getTokenBytes(null).utf8ToString();
- }
-
- /**
- * Returns if the target term is within/underneath this cell; not necessarily a direct descendant.
- * @param bytesNoLeaf must be getTokenBytesNoLeaf
- * @param term the term
- */
- public boolean isWithin(BytesRef bytesNoLeaf, BytesRef term) {
- assert bytesNoLeaf.equals(getTokenBytesNoLeaf(null));
- return StringHelper.startsWith(term, bytesNoLeaf);
- }
+ /** Equivalent to {@code this.getTokenBytesNoLeaf(null).compareTo(fromCell.getTokenBytesNoLeaf(null))}. */
+ int compareToNoLeaf(Cell fromCell);
}
Index: lucene/spatial/src/java/org/apache/lucene/spatial/prefix/tree/package.html
IDEA additional info:
Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP
<+>UTF-8
===================================================================
--- lucene/spatial/src/java/org/apache/lucene/spatial/prefix/tree/package.html (revision 9e87821edeb3e24ca8dedaecf856f6510d61d0d3)
+++ lucene/spatial/src/java/org/apache/lucene/spatial/prefix/tree/package.html (revision )
@@ -16,7 +16,8 @@
-->
<html>
<body>
-The Spatial Prefix package supports spatial indexing by index-time tokens
+This package is about SpatialPrefixTree and any supporting classes.
+A SpatialPrefixTree supports spatial indexing by index-time tokens
where adding characters to a string gives greater resolution.
<p>
Potential Implementations include:
Index: lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/SpatialDocMaker.java
IDEA additional info:
Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP
<+>UTF-8
===================================================================
--- lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/SpatialDocMaker.java (revision 9e87821edeb3e24ca8dedaecf856f6510d61d0d3)
+++ lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/SpatialDocMaker.java (revision )
@@ -97,12 +97,9 @@
//A factory for the prefix tree grid
SpatialPrefixTree grid = SpatialPrefixTreeFactory.makeSPT(configMap, null, ctx);
- RecursivePrefixTreeStrategy strategy = new RecursivePrefixTreeStrategy(grid, SPATIAL_FIELD) {
- {
- //protected field
- this.pointsOnly = config.get("spatial.docPointsOnly", false);
- }
- };
+ RecursivePrefixTreeStrategy strategy = new RecursivePrefixTreeStrategy(grid, SPATIAL_FIELD);
+ strategy.setPointsOnly(config.get("spatial.docPointsOnly", false));
+ strategy.setPruneLeafyBranches(config.get("spatial.pruneLeafyBranches", true));
int prefixGridScanLevel = config.get("query.spatial.prefixGridScanLevel", -4);
if (prefixGridScanLevel < 0)
Index: lucene/spatial/src/java/org/apache/lucene/spatial/prefix/TermQueryPrefixTreeStrategy.java
IDEA additional info:
Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP
<+>UTF-8
===================================================================
--- lucene/spatial/src/java/org/apache/lucene/spatial/prefix/TermQueryPrefixTreeStrategy.java (revision 9e87821edeb3e24ca8dedaecf856f6510d61d0d3)
+++ lucene/spatial/src/java/org/apache/lucene/spatial/prefix/TermQueryPrefixTreeStrategy.java (revision )
@@ -17,22 +17,26 @@
* limitations under the License.
*/
+import com.spatial4j.core.shape.Point;
import com.spatial4j.core.shape.Shape;
import org.apache.lucene.queries.TermsFilter;
import org.apache.lucene.search.Filter;
import org.apache.lucene.spatial.prefix.tree.Cell;
+import org.apache.lucene.spatial.prefix.tree.CellIterator;
import org.apache.lucene.spatial.prefix.tree.SpatialPrefixTree;
import org.apache.lucene.spatial.query.SpatialArgs;
import org.apache.lucene.spatial.query.SpatialOperation;
import org.apache.lucene.spatial.query.UnsupportedSpatialOperation;
import org.apache.lucene.util.BytesRef;
+import java.util.ArrayList;
import java.util.List;
/**
- * A basic implementation of {@link PrefixTreeStrategy} using a large {@link
- * TermsFilter} of all the cells from {@link SpatialPrefixTree#getCells(com.spatial4j.core.shape.Shape,
- * int, boolean, boolean)}. It only supports the search of indexed Point shapes.
+ * A basic implementation of {@link PrefixTreeStrategy} using a large
+ * {@link TermsFilter} of all the cells from
+ * {@link SpatialPrefixTree#getTreeCellIterator(com.spatial4j.core.shape.Shape, int)}.
+ * It only supports the search of indexed Point shapes.
* <p/>
* The precision of query shapes (distErrPct) is an important factor in using
* this Strategy. If the precision is too precise then it will result in many
@@ -42,9 +46,10 @@
*/
public class TermQueryPrefixTreeStrategy extends PrefixTreeStrategy {
+ protected boolean simplifyIndexedCells = false;
+
public TermQueryPrefixTreeStrategy(SpatialPrefixTree grid, String fieldName) {
- super(grid, fieldName,
- false);//do not simplify indexed cells
+ super(grid, fieldName);
}
@Override
@@ -55,14 +60,36 @@
Shape shape = args.getShape();
int detailLevel = grid.getLevelForDistance(args.resolveDistErr(ctx, distErrPct));
- List<Cell> cells = grid.getCells(shape, detailLevel,
- false,//no parents
- true);//simplify
- BytesRef[] terms = new BytesRef[cells.size()];
- int i = 0;
- for (Cell cell : cells) {
- terms[i++] = cell.getTokenBytesNoLeaf(null);
+
+ //--get a List of BytesRef for each term we want (no parents, no leaf bytes))
+ final int GUESS_NUM_TERMS;
+ if (shape instanceof Point)
+ GUESS_NUM_TERMS = detailLevel;//perfect guess
+ else
+ GUESS_NUM_TERMS = 4096;//should this be a method on SpatialPrefixTree?
+
+ BytesRef masterBytes = new BytesRef(GUESS_NUM_TERMS*detailLevel);//shared byte array for all terms
+ List<BytesRef> terms = new ArrayList<>(GUESS_NUM_TERMS);
+
+ CellIterator cells = grid.getTreeCellIterator(shape, detailLevel);
+ while (cells.hasNext()) {
+ Cell cell = cells.next();
+ if (!cell.isLeaf())
+ continue;
+ BytesRef term = cell.getTokenBytesNoLeaf(null);//null because we want a new BytesRef
+ //We copy out the bytes because it may be re-used across the iteration. This also gives us the opportunity
+ // to use one contiguous block of memory for the bytes of all terms we need.
+ masterBytes.grow(masterBytes.length + term.length);
+ masterBytes.append(term);
+ term.bytes = null;//don't need; will reset later
+ term.offset = masterBytes.length - term.length;
+ terms.add(term);
}
+ //doing this now because if we did earlier, it's possible the bytes needed to grow()
+ for (BytesRef byteRef : terms) {
+ byteRef.bytes = masterBytes.bytes;
+ }
+ //unfortunately TermsFilter will needlessly sort & dedupe
return new TermsFilter(getFieldName(), terms);
}
Index: lucene/spatial/src/java/org/apache/lucene/spatial/prefix/tree/QuadPrefixTree.java
IDEA additional info:
Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP
<+>UTF-8
===================================================================
--- lucene/spatial/src/java/org/apache/lucene/spatial/prefix/tree/QuadPrefixTree.java (revision 9e87821edeb3e24ca8dedaecf856f6510d61d0d3)
+++ lucene/spatial/src/java/org/apache/lucene/spatial/prefix/tree/QuadPrefixTree.java (revision )
@@ -38,7 +38,7 @@
*
* @lucene.experimental
*/
-public class QuadPrefixTree extends SpatialPrefixTree {
+public class QuadPrefixTree extends LegacyPrefixTree {
/**
* Factory for creating {@link QuadPrefixTree} instances with useful defaults
@@ -115,6 +115,11 @@
this(ctx, ctx.getWorldBounds(), maxLevels);
}
+ @Override
+ public Cell getWorldCell() {
+ return new QuadCell(BytesRef.EMPTY_BYTES, 0, 0);
+ }
+
public void printInfo(PrintStream out) {
NumberFormat nf = NumberFormat.getNumberInstance(Locale.ROOT);
nf.setMaximumFractionDigits(5);
@@ -147,11 +152,6 @@
return cells.get(0);//note cells could be longer if p on edge
}
- @Override
- public Cell getCell(byte[] bytes, int offset, int len) {
- return new QuadCell(bytes, offset, len);
- }
-
private void build(
double x,
double y,
@@ -214,7 +214,7 @@
str.length = strlen;
}
- class QuadCell extends Cell{
+ private class QuadCell extends LegacyCell {
QuadCell(byte[] bytes, int off, int len) {
super(bytes, off, len);
@@ -226,16 +226,10 @@
}
@Override
- protected SpatialPrefixTree getGrid() { return QuadPrefixTree.this; }
+ protected QuadPrefixTree getGrid() { return QuadPrefixTree.this; }
@Override
- public void reset(byte[] bytes, int off, int len) {
- super.reset(bytes, off, len);
- shape = null;
- }
-
- @Override
- public Collection<Cell> getSubCells() {
+ protected Collection<Cell> getSubCells() {
BytesRef source = getTokenBytesNoLeaf(null);
BytesRef target = new BytesRef();
@@ -262,11 +256,9 @@
}
@Override
- public Cell getSubCell(Point p) {
- return QuadPrefixTree.this.getCell(p, getLevel() + 1);//not performant!
+ protected QuadCell getSubCell(Point p) {
+ return (QuadCell) QuadPrefixTree.this.getCell(p, getLevel() + 1);//not performant!
}
-
- private Shape shape;//cache
@Override
public Shape getShape() {
Index: lucene/spatial/src/java/org/apache/lucene/spatial/prefix/WithinPrefixTreeFilter.java
IDEA additional info:
Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP
<+>UTF-8
===================================================================
--- lucene/spatial/src/java/org/apache/lucene/spatial/prefix/WithinPrefixTreeFilter.java (revision 9e87821edeb3e24ca8dedaecf856f6510d61d0d3)
+++ lucene/spatial/src/java/org/apache/lucene/spatial/prefix/WithinPrefixTreeFilter.java (revision )
@@ -27,13 +27,12 @@
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.search.DocIdSet;
import org.apache.lucene.spatial.prefix.tree.Cell;
+import org.apache.lucene.spatial.prefix.tree.CellIterator;
import org.apache.lucene.spatial.prefix.tree.SpatialPrefixTree;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.FixedBitSet;
import java.io.IOException;
-import java.util.Collection;
-import java.util.Iterator;
/**
* Finds docs where its indexed shape is {@link org.apache.lucene.spatial.query.SpatialOperation#IsWithin
@@ -136,9 +135,9 @@
}
@Override
- protected Iterator<Cell> findSubCellsToVisit(Cell cell) {
+ protected CellIterator findSubCellsToVisit(Cell cell) {
//use buffered query shape instead of orig. Works with null too.
- return cell.getSubCells(bufferedQueryShape).iterator();
+ return cell.getNextLevelCells(bufferedQueryShape);
}
@Override
@@ -183,10 +182,10 @@
if (relate == SpatialRelation.DISJOINT)
return false;
// Note: Generating all these cells just to determine intersection is not ideal.
- // It was easy to implement but could be optimized. For example if the docs
- // in question are already marked in the 'outside' bitset then it can be avoided.
- Collection<Cell> subCells = cell.getSubCells(null);
- for (Cell subCell : subCells) {
+ // The real solution is LUCENE-4869.
+ CellIterator subCells = cell.getNextLevelCells(null);
+ while (subCells.hasNext()) {
+ Cell subCell = subCells.next();
if (!allCellsIntersectQuery(subCell, null))//recursion
return false;
}
Index: lucene/spatial/src/java/org/apache/lucene/spatial/prefix/tree/SingletonCellIterator.java
IDEA additional info:
Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP
<+>UTF-8
===================================================================
--- lucene/spatial/src/java/org/apache/lucene/spatial/prefix/tree/SingletonCellIterator.java (revision )
+++ lucene/spatial/src/java/org/apache/lucene/spatial/prefix/tree/SingletonCellIterator.java (revision )
@@ -0,0 +1,37 @@
+package org.apache.lucene.spatial.prefix.tree;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * A singleton (one Cell) instance of CellIterator.
+ *
+ * @lucene.internal
+ */
+class SingletonCellIterator extends CellIterator {
+
+ SingletonCellIterator(Cell cell) {
+ this.nextCell = cell;//preload nextCell
+ }
+
+ @Override
+ public boolean hasNext() {
+ thisCell = null;
+ return nextCell != null;
+ }
+
+}
Index: lucene/spatial/src/test/org/apache/lucene/spatial/prefix/tree/SpatialPrefixTreeTest.java
IDEA additional info:
Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP
<+>UTF-8
===================================================================
--- lucene/spatial/src/test/org/apache/lucene/spatial/prefix/tree/SpatialPrefixTreeTest.java (revision 9e87821edeb3e24ca8dedaecf856f6510d61d0d3)
+++ lucene/spatial/src/test/org/apache/lucene/spatial/prefix/tree/SpatialPrefixTreeTest.java (revision )
@@ -61,7 +61,11 @@
assertEquals(ctx.getWorldBounds(), c.getShape());
while (c.getLevel() < trie.getMaxLevels()) {
prevC = c;
- List<Cell> subCells = new ArrayList<>(c.getSubCells());
+ List<Cell> subCells = new ArrayList<>();
+ CellIterator subCellsIter = c.getNextLevelCells(null);
+ while (subCellsIter.hasNext()) {
+ subCells.add(subCellsIter.next());
+ }
c = subCells.get(random().nextInt(subCells.size()-1));
assertEquals(prevC.getLevel()+1,c.getLevel());
@@ -73,7 +77,7 @@
}
}
/**
- * A PrefixTree pruning optimization gone bad.
+ * A PrefixTree pruning optimization gone bad, applicable when optimize=true.
* See <a href="https://issues.apache.org/jira/browse/LUCENE-4770>LUCENE-4770</a>.
*/
@Test
\ No newline at end of file
Index: lucene/spatial/src/java/org/apache/lucene/spatial/prefix/IntersectsPrefixTreeFilter.java
IDEA additional info:
Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP
<+>UTF-8
===================================================================
--- lucene/spatial/src/java/org/apache/lucene/spatial/prefix/IntersectsPrefixTreeFilter.java (revision 9e87821edeb3e24ca8dedaecf856f6510d61d0d3)
+++ lucene/spatial/src/java/org/apache/lucene/spatial/prefix/IntersectsPrefixTreeFilter.java (revision )
@@ -52,6 +52,15 @@
@Override
public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws IOException {
+ /* Possible optimizations (in IN ADDITION TO THOSE LISTED IN VISITORTEMPLATE):
+
+ * If docFreq is 1 (or < than some small threshold), then check to see if we've already
+ collected it; if so short-circuit. Don't do this just for point data, as there is
+ no benefit, or only marginal benefit when multi-valued.
+
+ * Point query shape optimization when the only indexed data is a point (no leaves). Result is a term query.
+
+ */
return new VisitorTemplate(context, acceptDocs, hasIndexedLeaves) {
private FixedBitSet results;
Index: lucene/spatial/src/java/org/apache/lucene/spatial/prefix/tree/TreeCellIterator.java
IDEA additional info:
Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP
<+>UTF-8
===================================================================
--- lucene/spatial/src/java/org/apache/lucene/spatial/prefix/tree/TreeCellIterator.java (revision )
+++ lucene/spatial/src/java/org/apache/lucene/spatial/prefix/tree/TreeCellIterator.java (revision )
@@ -0,0 +1,89 @@
+package org.apache.lucene.spatial.prefix.tree;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import com.spatial4j.core.shape.Point;
+import com.spatial4j.core.shape.Shape;
+
+/**
+ * Navigates a {@link org.apache.lucene.spatial.prefix.tree.SpatialPrefixTree} from a given cell (typically the world
+ * cell) down to a maximum number of configured levels, filtered by a given shape. Intermediate non-leaf cells are
+ * returned. It supports {@link #remove()} for skipping traversal of subcells of the current cell.
+ *
+ * @lucene.internal
+ */
+class TreeCellIterator extends CellIterator {
+ //This class uses a stack approach, which is more efficient than creating linked nodes. And it might more easily
+ // pave the way for re-using Cell & CellIterator at a given level in the future.
+
+ private final Shape shapeFilter;//possibly null
+ private final CellIterator[] iterStack;
+ private int stackIdx;//-1 when done
+ private boolean descend;
+
+ public TreeCellIterator(Shape shapeFilter, int detailLevel, Cell parentCell) {
+ this.shapeFilter = shapeFilter;
+ assert parentCell.getLevel() == 0;
+ iterStack = new CellIterator[detailLevel];
+ CellIterator nextLevelCells = parentCell.getNextLevelCells(shapeFilter);
+ iterStack[0] = nextLevelCells;
+ stackIdx = 0;//always points to an iter (non-null)
+ //note: not obvious but needed to visit the first cell before trying to descend
+ descend = false;
+ }
+
+ @Override
+ public boolean hasNext() {
+ if (nextCell != null)
+ return true;
+ while (true) {
+ if (stackIdx == -1)//the only condition in which we return false
+ return false;
+ //If we can descend...
+ if (descend && !(stackIdx == iterStack.length - 1 || iterStack[stackIdx].thisCell().isLeaf())) {
+ CellIterator nextIter = iterStack[stackIdx].thisCell().getNextLevelCells(shapeFilter);
+ //push stack
+ iterStack[++stackIdx] = nextIter;
+ }
+ //Get sibling...
+ if (iterStack[stackIdx].hasNext()) {
+ nextCell = iterStack[stackIdx].next();
+ //at detailLevel
+ if (stackIdx == iterStack.length - 1 && !(shapeFilter instanceof Point)) //point check is a kludge
+ nextCell.setLeaf();//because at bottom
+ break;
+ }
+ //Couldn't get next; go up...
+ //pop stack
+ iterStack[stackIdx--] = null;
+ descend = false;//so that we don't re-descend where we just were
+ }
+ assert nextCell != null;
+ descend = true;//reset
+ return true;
+ }
+
+ @Override
+ public void remove() {
+ assert thisCell() != null && nextCell == null;
+ descend = false;
+ }
+
+ //TODO implement a smart nextFrom() that looks at the parent's bytes first
+
+}
Index: lucene/spatial/src/java/org/apache/lucene/spatial/prefix/tree/FilterCellIterator.java
IDEA additional info:
Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP
<+>UTF-8
===================================================================
--- lucene/spatial/src/java/org/apache/lucene/spatial/prefix/tree/FilterCellIterator.java (revision )
+++ lucene/spatial/src/java/org/apache/lucene/spatial/prefix/tree/FilterCellIterator.java (revision )
@@ -0,0 +1,62 @@
+package org.apache.lucene.spatial.prefix.tree;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import com.spatial4j.core.shape.Shape;
+import com.spatial4j.core.shape.SpatialRelation;
+
+import java.util.Iterator;
+
+/**
+ * A filtering iterator of Cells. Those not matching the provided shape (disjoint) are
+ * skipped. If {@code shapeFilter} is null then all cells are returned.
+ *
+ * @lucene.internal
+ */
+class FilterCellIterator extends CellIterator {
+ final Iterator<Cell> baseIter;
+ final Shape shapeFilter;
+
+ FilterCellIterator(Iterator<Cell> baseIter, Shape shapeFilter) {
+ this.baseIter = baseIter;
+ this.shapeFilter = shapeFilter;
+ }
+
+ @Override
+ public boolean hasNext() {
+ thisCell = null;
+ if (nextCell != null)//calling hasNext twice in a row
+ return true;
+ while (baseIter.hasNext()) {
+ nextCell = baseIter.next();
+ if (shapeFilter == null) {
+ return true;
+ } else {
+ SpatialRelation rel = nextCell.getShape().relate(shapeFilter);
+ if (rel.intersects()) {
+ nextCell.setShapeRel(rel);
+ if (rel == SpatialRelation.WITHIN)
+ nextCell.setLeaf();
+ return true;
+ }
+ }
+ }
+ return false;
+ }
+
+}