| Index: lucene/spatial/src/java/org/apache/lucene/spatial/prefix/RecursivePrefixTreeStrategy.java |
| IDEA additional info: |
| Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP |
| <+>UTF-8 |
| =================================================================== |
| --- lucene/spatial/src/java/org/apache/lucene/spatial/prefix/RecursivePrefixTreeStrategy.java (revision 1465678) |
| +++ lucene/spatial/src/java/org/apache/lucene/spatial/prefix/RecursivePrefixTreeStrategy.java (revision ) |
| @@ -19,6 +19,7 @@ |
| |
| import com.spatial4j.core.shape.Shape; |
| import org.apache.lucene.search.Filter; |
| +import org.apache.lucene.spatial.DisjointSpatialFilter; |
| import org.apache.lucene.spatial.prefix.tree.SpatialPrefixTree; |
| import org.apache.lucene.spatial.query.SpatialArgs; |
| import org.apache.lucene.spatial.query.SpatialOperation; |
| @@ -62,12 +63,14 @@ |
| |
| @Override |
| public Filter makeFilter(SpatialArgs args) { |
| + final SpatialOperation op = args.getOperation(); |
| + if (op == SpatialOperation.IsDisjointTo) |
| + return new DisjointSpatialFilter(this, args, getFieldName()); |
| |
| Shape shape = args.getShape(); |
| int detailLevel = grid.getLevelForDistance(args.resolveDistErr(ctx, distErrPct)); |
| final boolean hasIndexedLeaves = true; |
| |
| - final SpatialOperation op = args.getOperation(); |
| if (op == SpatialOperation.Intersects) { |
| return new IntersectsPrefixTreeFilter( |
| shape, getFieldName(), grid, detailLevel, prefixGridScanLevel, |
| Index: lucene/spatial/src/test/org/apache/lucene/spatial/prefix/SpatialOpRecursivePrefixTreeTest.java |
| IDEA additional info: |
| Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP |
| <+>UTF-8 |
| =================================================================== |
| --- lucene/spatial/src/test/org/apache/lucene/spatial/prefix/SpatialOpRecursivePrefixTreeTest.java (revision 1465679) |
| +++ lucene/spatial/src/test/org/apache/lucene/spatial/prefix/SpatialOpRecursivePrefixTreeTest.java (revision ) |
| @@ -24,6 +24,10 @@ |
| import com.spatial4j.core.shape.Shape; |
| import com.spatial4j.core.shape.SpatialRelation; |
| import com.spatial4j.core.shape.impl.RectangleImpl; |
| +import org.apache.lucene.document.Document; |
| +import org.apache.lucene.document.Field; |
| +import org.apache.lucene.document.StoredField; |
| +import org.apache.lucene.document.StringField; |
| import org.apache.lucene.search.Query; |
| import org.apache.lucene.spatial.StrategyTestCase; |
| import org.apache.lucene.spatial.prefix.tree.Cell; |
| @@ -35,15 +39,22 @@ |
| import org.junit.Test; |
| |
| import java.io.IOException; |
| +import java.util.ArrayList; |
| +import java.util.Collection; |
| +import java.util.Collections; |
| import java.util.Iterator; |
| import java.util.LinkedHashMap; |
| +import java.util.LinkedHashSet; |
| import java.util.List; |
| import java.util.Map; |
| import java.util.Set; |
| -import java.util.TreeSet; |
| |
| import static com.carrotsearch.randomizedtesting.RandomizedTest.randomInt; |
| import static com.carrotsearch.randomizedtesting.RandomizedTest.randomIntBetween; |
| +import static com.spatial4j.core.shape.SpatialRelation.CONTAINS; |
| +import static com.spatial4j.core.shape.SpatialRelation.DISJOINT; |
| +import static com.spatial4j.core.shape.SpatialRelation.INTERSECTS; |
| +import static com.spatial4j.core.shape.SpatialRelation.WITHIN; |
| |
| public class SpatialOpRecursivePrefixTreeTest extends StrategyTestCase { |
| |
| @@ -66,7 +77,7 @@ |
| this.strategy = new RecursivePrefixTreeStrategy(grid, getClass().getSimpleName()); |
| //((PrefixTreeStrategy) strategy).setDistErrPct(0);//fully precise to grid |
| |
| - System.out.println("Strategy: "+strategy.toString()); |
| + System.out.println("Strategy: " + strategy.toString()); |
| } |
| |
| @Test |
| @@ -91,14 +102,21 @@ |
| } |
| |
| @Test |
| + @Repeat(iterations = 10) |
| + public void testDisjoint() throws IOException { |
| + mySetup(-1); |
| + doTest(SpatialOperation.IsDisjointTo); |
| + } |
| + |
| + @Test |
| public void testWithinDisjointParts() throws IOException { |
| mySetup(7); |
| - |
| //one shape comprised of two parts, quite separated apart |
| - adoc("0", new ShapePair(ctx.makeRectangle(0, 10, -120, -100), ctx.makeRectangle(220, 240, 110, 125))); |
| + adoc("0", new ShapePair(ctx.makeRectangle(0, 10, -120, -100), ctx.makeRectangle(220, 240, 110, 125), false)); |
| commit(); |
| //query surrounds only the second part of the indexed shape |
| - Query query = strategy.makeQuery(new SpatialArgs(SpatialOperation.IsWithin, ctx.makeRectangle(210, 245, 105, 128))); |
| + Query query = strategy.makeQuery(new SpatialArgs(SpatialOperation.IsWithin, |
| + ctx.makeRectangle(210, 245, 105, 128))); |
| SearchResults searchResults = executeQuery(query, 1); |
| //we shouldn't find it because it's not completely within |
| assertTrue(searchResults.numFound == 0); |
| @@ -127,30 +145,73 @@ |
| ), 1).numFound==1);//match |
| } |
| |
| + //Override so we can index parts of a pair separately, resulting in the detailLevel |
| + // being independent for each shape vs the whole thing |
| + @Override |
| + protected Document newDoc(String id, Shape shape) { |
| + Document doc = new Document(); |
| + doc.add(new StringField("id", id, Field.Store.YES)); |
| + if (shape != null) { |
| + Collection<Shape> shapes; |
| + if (shape instanceof ShapePair) { |
| + shapes = new ArrayList<>(2); |
| + shapes.add(((ShapePair)shape).shape1); |
| + shapes.add(((ShapePair)shape).shape2); |
| + } else { |
| + shapes = Collections.singleton(shape); |
| + } |
| + for (Shape shapei : shapes) { |
| + for (Field f : strategy.createIndexableFields(shapei)) { |
| + doc.add(f); |
| + } |
| + } |
| + if (storeShape) |
| + doc.add(new StoredField(strategy.getFieldName(), ctx.toString(shape))); |
| + } |
| + return doc; |
| + } |
| + |
| private void doTest(final SpatialOperation operation) throws IOException { |
| + final boolean biasContains = (operation == SpatialOperation.Contains); |
| + |
| Map<String, Shape> indexedShapes = new LinkedHashMap<String, Shape>(); |
| + Map<String, Shape> indexedShapesGS = new LinkedHashMap<String, Shape>(); |
| final int numIndexedShapes = randomIntBetween(1, 6); |
| for (int i = 0; i < numIndexedShapes; i++) { |
| - String id = ""+i; |
| + String id = "" + i; |
| Shape indexedShape; |
| - if (random().nextInt(4) == 0) { |
| - indexedShape = new ShapePair( gridSnapp(randomRectangle()), gridSnapp(randomRectangle()) ); |
| + Shape indexedShapeGS; //(grid-snapped) |
| + int R = random().nextInt(12); |
| + if (R == 0) {//1 in 10 |
| + indexedShape = null; //no shape for this doc |
| + indexedShapeGS = null; |
| + } else if (R % 4 == 0) {//3 in 12 |
| + //comprised of more than one shape |
| + Rectangle shape1 = randomRectangle(); |
| + Rectangle shape2 = randomRectangle(); |
| + indexedShape = new ShapePair(shape1, shape2, biasContains); |
| + indexedShapeGS = new ShapePair(gridSnap(shape1), gridSnap(shape2), biasContains); |
| } else { |
| - indexedShape = gridSnapp(randomRectangle()); |
| + //just one shape |
| + indexedShape = randomRectangle(); |
| + indexedShapeGS = gridSnap(indexedShape); |
| } |
| indexedShapes.put(id, indexedShape); |
| + indexedShapesGS.put(id, indexedShapeGS); |
| + |
| adoc(id, indexedShape); |
| + |
| if (random().nextInt(10) == 0) |
| - commit(); |
| - } |
| + commit();//intermediate commit, produces extra segments |
| |
| - //delete some |
| + } |
| Iterator<String> idIter = indexedShapes.keySet().iterator(); |
| while (idIter.hasNext()) { |
| String id = idIter.next(); |
| if (random().nextInt(10) == 0) { |
| deleteDoc(id); |
| idIter.remove(); |
| + indexedShapesGS.remove(id); |
| } |
| } |
| |
| @@ -160,35 +221,80 @@ |
| for (int i = 0; i < numQueryShapes; i++) { |
| int scanLevel = randomInt(grid.getMaxLevels()); |
| ((RecursivePrefixTreeStrategy) strategy).setPrefixGridScanLevel(scanLevel); |
| - Shape queryShape = gridSnapp(randomRectangle()); |
| + final Shape queryShape = randomRectangle(); |
| |
| - //Generate truth via brute force |
| - Set<String> expectedIds = new TreeSet<String>(); |
| + final boolean DISJOINT = operation.equals(SpatialOperation.IsDisjointTo); |
| + |
| + //Generate truth via brute force: |
| + // We really try to ensure true-positive matches (if the predicate on the raw shapes match |
| + // then the search should find those same matches). |
| + // approximations, false-positive matches |
| + Set <String> expectedIds = new LinkedHashSet<String>();//true-positives |
| + Set<String> secondaryIds = new LinkedHashSet<String>();//false-positives (unless disjoint) |
| for (Map.Entry<String, Shape> entry : indexedShapes.entrySet()) { |
| - if (operation.evaluate(entry.getValue(), queryShape)) |
| - expectedIds.add(entry.getKey()); |
| + Shape indexedShapeCompare = entry.getValue(); |
| + if (indexedShapeCompare == null) |
| + continue; |
| + Shape queryShapeCompare = queryShape; |
| + String id = entry.getKey(); |
| + if (operation.evaluate(indexedShapeCompare, queryShapeCompare)) { |
| + expectedIds.add(id); |
| + if (DISJOINT) { |
| + //if no longer intersect after buffering them, for disjoint, remember this |
| + indexedShapeCompare = indexedShapesGS.get(entry.getKey()); |
| + queryShapeCompare = gridSnap(queryShape); |
| + if (!operation.evaluate(indexedShapeCompare, queryShapeCompare)) |
| + secondaryIds.add(id); |
| - } |
| + } |
| + } else if (!DISJOINT) { |
| + //buffer either the indexed or query shape (via gridSnap) and try again |
| + if (operation.equals(SpatialOperation.Intersects)) { |
| + indexedShapeCompare = indexedShapesGS.get(entry.getKey()); |
| + queryShapeCompare = gridSnap(queryShape); |
| + } else if (operation.equals(SpatialOperation.Contains)) { |
| + indexedShapeCompare = indexedShapesGS.get(entry.getKey()); |
| + } else if (operation.equals(SpatialOperation.IsWithin)) { |
| + queryShapeCompare = gridSnap(queryShape); |
| + } |
| + if (operation.evaluate(indexedShapeCompare, queryShapeCompare)) |
| + secondaryIds.add(id); |
| + } |
| + } |
| |
| //Search and verify results |
| - Query query = strategy.makeQuery(new SpatialArgs(operation, queryShape)); |
| + SpatialArgs args = new SpatialArgs(operation, queryShape); |
| + Query query = strategy.makeQuery(args); |
| SearchResults got = executeQuery(query, 100); |
| - Set<String> remainingExpectedIds = new TreeSet<String>(expectedIds); |
| - String msg = queryShape.toString()+" Expect: "+expectedIds; |
| + Set<String> remainingExpectedIds = new LinkedHashSet<String>(expectedIds); |
| for (SearchResult result : got.results) { |
| String id = result.getId(); |
| - Object removed = remainingExpectedIds.remove(id); |
| - if (removed == null) { |
| - fail("Shouldn't match " + id + " ("+ indexedShapes.get(id) +") in " + msg); |
| + boolean removed = remainingExpectedIds.remove(id); |
| + if (!removed && (!DISJOINT && !secondaryIds.contains(id))) { |
| + fail("Shouldn't match", id, indexedShapes, indexedShapesGS, queryShape); |
| } |
| } |
| + if (DISJOINT) |
| + remainingExpectedIds.removeAll(secondaryIds); |
| if (!remainingExpectedIds.isEmpty()) { |
| - Shape firstFailedMatch = indexedShapes.get(remainingExpectedIds.iterator().next()); |
| - fail("Didn't match " + firstFailedMatch + " in " + msg +" (of "+remainingExpectedIds.size()+")"); |
| + String id = remainingExpectedIds.iterator().next(); |
| + fail("Should have matched", id, indexedShapes, indexedShapesGS, queryShape); |
| } |
| } |
| } |
| |
| - protected Rectangle gridSnapp(Shape snapMe) { |
| + private void fail(String label, String id, Map<String, Shape> indexedShapes, Map<String, Shape> indexedShapesGS, Shape queryShape) { |
| + System.err.println("Ig:" + indexedShapesGS.get(id) + " Qg:" + gridSnap(queryShape)); |
| + fail(label + " I #" + id + ":" + indexedShapes.get(id) + " Q:" + queryShape); |
| + } |
| + |
| + |
| +// private Rectangle inset(Rectangle r) { |
| +// //typically inset by 1 (whole numbers are easy to read) |
| +// double d = Math.min(1.0, grid.getDistanceForLevel(grid.getMaxLevels()) / 4); |
| +// return ctx.makeRectangle(r.getMinX() + d, r.getMaxX() - d, r.getMinY() + d, r.getMaxY() - d); |
| +// } |
| + |
| + protected Rectangle gridSnap(Shape snapMe) { |
| //The next 4 lines mimic PrefixTreeStrategy.createIndexableFields() |
| double distErrPct = ((PrefixTreeStrategy) strategy).getDistErrPct(); |
| double distErr = SpatialArgs.calcDistanceFromErrPct(snapMe, distErrPct, ctx); |
| @@ -210,27 +316,57 @@ |
| return ctx.makeRectangle(minX, maxX, minY, maxY); |
| } |
| |
| - /** An aggregate of 2 shapes. Only implements what's necessary for the test here. |
| - * TODO replace with Spatial4j trunk ShapeCollection. */ |
| + /** |
| + * An aggregate of 2 shapes. Only implements what's necessary for the test |
| + * here. TODO replace with Spatial4j trunk ShapeCollection. |
| + */ |
| private class ShapePair implements Shape { |
| |
| - Shape shape1, shape2; |
| + final Rectangle shape1, shape2; |
| + final boolean biasContainsThenWithin;//a hack |
| |
| - public ShapePair(Shape shape1, Shape shape2) { |
| + public ShapePair(Rectangle shape1, Rectangle shape2, boolean containsThenWithin) { |
| this.shape1 = shape1; |
| this.shape2 = shape2; |
| + biasContainsThenWithin = containsThenWithin; |
| } |
| |
| @Override |
| public SpatialRelation relate(Shape other) { |
| - //easy to observe is correct; not an optimal code path but this is a test |
| - if (shape1.relate(other) == SpatialRelation.CONTAINS || shape2.relate(other) == SpatialRelation.CONTAINS) |
| - return SpatialRelation.CONTAINS; |
| - if (shape1.relate(other) == SpatialRelation.WITHIN && shape2.relate(other) == SpatialRelation.WITHIN) |
| - return SpatialRelation.WITHIN; |
| + SpatialRelation r = relateApprox(other); |
| + if (r != INTERSECTS) |
| + return r; |
| + //See if the correct answer is actually Contains |
| + Rectangle oRect = (Rectangle)other; |
| + boolean pairTouches = shape1.relate(shape2).intersects(); |
| + if (!pairTouches) |
| + return r; |
| + //test all 4 corners |
| + if (relate(ctx.makePoint(oRect.getMinX(), oRect.getMinY())) == CONTAINS |
| + && relate(ctx.makePoint(oRect.getMinX(), oRect.getMaxY())) == CONTAINS |
| + && relate(ctx.makePoint(oRect.getMaxX(), oRect.getMinY())) == CONTAINS |
| + && relate(ctx.makePoint(oRect.getMaxX(), oRect.getMaxY())) == CONTAINS) |
| + return CONTAINS; |
| + return r; |
| + } |
| + |
| + private SpatialRelation relateApprox(Shape other) { |
| + if (biasContainsThenWithin) { |
| + if (shape1.relate(other) == CONTAINS || shape1.equals(other) |
| + || shape2.relate(other) == CONTAINS || shape2.equals(other)) return CONTAINS; |
| + |
| + if (shape1.relate(other) == WITHIN && shape2.relate(other) == WITHIN) return WITHIN; |
| + |
| + } else { |
| + if ((shape1.relate(other) == WITHIN || shape1.equals(other)) |
| + && (shape2.relate(other) == WITHIN || shape2.equals(other))) return WITHIN; |
| + |
| + if (shape1.relate(other) == CONTAINS || shape2.relate(other) == CONTAINS) return CONTAINS; |
| + } |
| + |
| if (shape1.relate(other).intersects() || shape2.relate(other).intersects()) |
| - return SpatialRelation.INTERSECTS; |
| - return SpatialRelation.DISJOINT; |
| + return INTERSECTS;//might actually be 'CONTAINS' if these 2 are adjacent |
| + return DISJOINT; |
| } |
| |
| @Override |
| @@ -251,6 +387,11 @@ |
| @Override |
| public Point getCenter() { |
| throw new UnsupportedOperationException("TODO unimplemented");//TODO |
| + } |
| + |
| + @Override |
| + public String toString() { |
| + return "ShapePair(" + shape1 + " , " + shape2 + ")"; |
| } |
| } |
| |
| Index: lucene/spatial/src/java/org/apache/lucene/spatial/DisjointSpatialFilter.java |
| IDEA additional info: |
| Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP |
| <+>UTF-8 |
| =================================================================== |
| --- lucene/spatial/src/java/org/apache/lucene/spatial/DisjointSpatialFilter.java (revision ) |
| +++ lucene/spatial/src/java/org/apache/lucene/spatial/DisjointSpatialFilter.java (revision ) |
| @@ -0,0 +1,113 @@ |
| +package org.apache.lucene.spatial; |
| + |
| +/* |
| + * Licensed to the Apache Software Foundation (ASF) under one or more |
| + * contributor license agreements. See the NOTICE file distributed with |
| + * this work for additional information regarding copyright ownership. |
| + * The ASF licenses this file to You under the Apache License, Version 2.0 |
| + * (the "License"); you may not use this file except in compliance with |
| + * the License. You may obtain a copy of the License at |
| + * |
| + * http://www.apache.org/licenses/LICENSE-2.0 |
| + * |
| + * Unless required by applicable law or agreed to in writing, software |
| + * distributed under the License is distributed on an "AS IS" BASIS, |
| + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| + * See the License for the specific language governing permissions and |
| + * limitations under the License. |
| + */ |
| + |
| +import org.apache.lucene.index.AtomicReaderContext; |
| +import org.apache.lucene.queries.ChainedFilter; |
| +import org.apache.lucene.search.BitsFilteredDocIdSet; |
| +import org.apache.lucene.search.DocIdSet; |
| +import org.apache.lucene.search.FieldCache; |
| +import org.apache.lucene.search.Filter; |
| +import org.apache.lucene.spatial.query.SpatialArgs; |
| +import org.apache.lucene.spatial.query.SpatialOperation; |
| +import org.apache.lucene.util.Bits; |
| + |
| +import java.io.IOException; |
| + |
| +/** |
| + * A Spatial Filter implementing {@link SpatialOperation#IsDisjointTo} in terms |
| + * of a {@link SpatialStrategy}'s support for {@link SpatialOperation#Intersects}. |
| + * A document is considered disjoint if it has spatial data that does not |
| + * intersect with the query shape. Another way of looking at this is that it's |
| + * a way to invert a query shape. |
| + * |
| + * @lucene.experimental |
| + */ |
| +public class DisjointSpatialFilter extends Filter { |
| + |
| + private final String field;//maybe null |
| + private final Filter intersectsFilter; |
| + |
| + /** |
| + * |
| + * @param strategy Needed to compute intersects |
| + * @param args Used in spatial intersection |
| + * @param field This field is used to determine which docs have spatial data via |
| + * {@link org.apache.lucene.search.FieldCache#getDocsWithField(org.apache.lucene.index.AtomicReader, String)}. |
| + * Passing null will assume all docs have spatial data. |
| + */ |
| + public DisjointSpatialFilter(SpatialStrategy strategy, SpatialArgs args, String field) { |
| + this.field = field; |
| + |
| + // TODO consider making SpatialArgs cloneable |
| + SpatialOperation origOp = args.getOperation();//copy so we can restore |
| + args.setOperation(SpatialOperation.Intersects);//temporarily set to intersects |
| + intersectsFilter = strategy.makeFilter(args); |
| + args.setOperation(origOp);//restore so it looks like it was |
| + } |
| + |
| + @Override |
| + public boolean equals(Object o) { |
| + if (this == o) return true; |
| + if (o == null || getClass() != o.getClass()) return false; |
| + |
| + DisjointSpatialFilter that = (DisjointSpatialFilter) o; |
| + |
| + if (field != null ? !field.equals(that.field) : that.field != null) |
| + return false; |
| + if (!intersectsFilter.equals(that.intersectsFilter)) return false; |
| + |
| + return true; |
| + } |
| + |
| + @Override |
| + public int hashCode() { |
| + int result = field != null ? field.hashCode() : 0; |
| + result = 31 * result + intersectsFilter.hashCode(); |
| + return result; |
| + } |
| + |
| + @Override |
| + public DocIdSet getDocIdSet(AtomicReaderContext context, final Bits acceptDocs) throws IOException { |
| + Bits docsWithField; |
| + if (field == null) { |
| + docsWithField = null;//all docs |
| + } else { |
| + //NOTE By using the FieldCache we re-use a cache |
| + // which is nice but loading it in this way might be slower than say using an |
| + // intersects filter against the world bounds. So do we add a method to the |
| + // strategy, perhaps? But the strategy can't cache it. |
| + docsWithField = FieldCache.DEFAULT.getDocsWithField(context.reader(), field); |
| + |
| + final int maxDoc = context.reader().maxDoc(); |
| + if (docsWithField.length() != maxDoc ) |
| + throw new IllegalStateException("Bits length should be maxDoc ("+maxDoc+") but wasn't: "+docsWithField); |
| + |
| + if (docsWithField instanceof Bits.MatchNoBits) { |
| + return null;//match nothing |
| + } else if (docsWithField instanceof Bits.MatchAllBits) { |
| + docsWithField = null;//all docs |
| + } |
| + } |
| + |
| + //not so much a chain but a way to conveniently invert the Filter |
| + DocIdSet docIdSet = new ChainedFilter(new Filter[]{intersectsFilter}, ChainedFilter.ANDNOT).getDocIdSet(context, acceptDocs); |
| + return BitsFilteredDocIdSet.wrap(docIdSet, docsWithField); |
| + } |
| + |
| +} |