blob: c0d5ea26ec93af80195942ba75f5b550d1208c32 [file] [log] [blame]
Index: lucene/spatial/src/java/org/apache/lucene/spatial/prefix/RecursivePrefixTreeStrategy.java
IDEA additional info:
Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP
<+>UTF-8
===================================================================
--- lucene/spatial/src/java/org/apache/lucene/spatial/prefix/RecursivePrefixTreeStrategy.java (revision 1465678)
+++ lucene/spatial/src/java/org/apache/lucene/spatial/prefix/RecursivePrefixTreeStrategy.java (revision )
@@ -19,6 +19,7 @@
import com.spatial4j.core.shape.Shape;
import org.apache.lucene.search.Filter;
+import org.apache.lucene.spatial.DisjointSpatialFilter;
import org.apache.lucene.spatial.prefix.tree.SpatialPrefixTree;
import org.apache.lucene.spatial.query.SpatialArgs;
import org.apache.lucene.spatial.query.SpatialOperation;
@@ -62,12 +63,14 @@
@Override
public Filter makeFilter(SpatialArgs args) {
+ final SpatialOperation op = args.getOperation();
+ if (op == SpatialOperation.IsDisjointTo)
+ return new DisjointSpatialFilter(this, args, getFieldName());
Shape shape = args.getShape();
int detailLevel = grid.getLevelForDistance(args.resolveDistErr(ctx, distErrPct));
final boolean hasIndexedLeaves = true;
- final SpatialOperation op = args.getOperation();
if (op == SpatialOperation.Intersects) {
return new IntersectsPrefixTreeFilter(
shape, getFieldName(), grid, detailLevel, prefixGridScanLevel,
Index: lucene/spatial/src/test/org/apache/lucene/spatial/prefix/SpatialOpRecursivePrefixTreeTest.java
IDEA additional info:
Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP
<+>UTF-8
===================================================================
--- lucene/spatial/src/test/org/apache/lucene/spatial/prefix/SpatialOpRecursivePrefixTreeTest.java (revision 1465679)
+++ lucene/spatial/src/test/org/apache/lucene/spatial/prefix/SpatialOpRecursivePrefixTreeTest.java (revision )
@@ -24,6 +24,10 @@
import com.spatial4j.core.shape.Shape;
import com.spatial4j.core.shape.SpatialRelation;
import com.spatial4j.core.shape.impl.RectangleImpl;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.document.StoredField;
+import org.apache.lucene.document.StringField;
import org.apache.lucene.search.Query;
import org.apache.lucene.spatial.StrategyTestCase;
import org.apache.lucene.spatial.prefix.tree.Cell;
@@ -35,15 +39,22 @@
import org.junit.Test;
import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Collections;
import java.util.Iterator;
import java.util.LinkedHashMap;
+import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
-import java.util.TreeSet;
import static com.carrotsearch.randomizedtesting.RandomizedTest.randomInt;
import static com.carrotsearch.randomizedtesting.RandomizedTest.randomIntBetween;
+import static com.spatial4j.core.shape.SpatialRelation.CONTAINS;
+import static com.spatial4j.core.shape.SpatialRelation.DISJOINT;
+import static com.spatial4j.core.shape.SpatialRelation.INTERSECTS;
+import static com.spatial4j.core.shape.SpatialRelation.WITHIN;
public class SpatialOpRecursivePrefixTreeTest extends StrategyTestCase {
@@ -66,7 +77,7 @@
this.strategy = new RecursivePrefixTreeStrategy(grid, getClass().getSimpleName());
//((PrefixTreeStrategy) strategy).setDistErrPct(0);//fully precise to grid
- System.out.println("Strategy: "+strategy.toString());
+ System.out.println("Strategy: " + strategy.toString());
}
@Test
@@ -91,14 +102,21 @@
}
@Test
+ @Repeat(iterations = 10)
+ public void testDisjoint() throws IOException {
+ mySetup(-1);
+ doTest(SpatialOperation.IsDisjointTo);
+ }
+
+ @Test
public void testWithinDisjointParts() throws IOException {
mySetup(7);
-
//one shape comprised of two parts, quite separated apart
- adoc("0", new ShapePair(ctx.makeRectangle(0, 10, -120, -100), ctx.makeRectangle(220, 240, 110, 125)));
+ adoc("0", new ShapePair(ctx.makeRectangle(0, 10, -120, -100), ctx.makeRectangle(220, 240, 110, 125), false));
commit();
//query surrounds only the second part of the indexed shape
- Query query = strategy.makeQuery(new SpatialArgs(SpatialOperation.IsWithin, ctx.makeRectangle(210, 245, 105, 128)));
+ Query query = strategy.makeQuery(new SpatialArgs(SpatialOperation.IsWithin,
+ ctx.makeRectangle(210, 245, 105, 128)));
SearchResults searchResults = executeQuery(query, 1);
//we shouldn't find it because it's not completely within
assertTrue(searchResults.numFound == 0);
@@ -127,30 +145,73 @@
), 1).numFound==1);//match
}
+ //Override so we can index parts of a pair separately, resulting in the detailLevel
+ // being independent for each shape vs the whole thing
+ @Override
+ protected Document newDoc(String id, Shape shape) {
+ Document doc = new Document();
+ doc.add(new StringField("id", id, Field.Store.YES));
+ if (shape != null) {
+ Collection<Shape> shapes;
+ if (shape instanceof ShapePair) {
+ shapes = new ArrayList<>(2);
+ shapes.add(((ShapePair)shape).shape1);
+ shapes.add(((ShapePair)shape).shape2);
+ } else {
+ shapes = Collections.singleton(shape);
+ }
+ for (Shape shapei : shapes) {
+ for (Field f : strategy.createIndexableFields(shapei)) {
+ doc.add(f);
+ }
+ }
+ if (storeShape)
+ doc.add(new StoredField(strategy.getFieldName(), ctx.toString(shape)));
+ }
+ return doc;
+ }
+
private void doTest(final SpatialOperation operation) throws IOException {
+ final boolean biasContains = (operation == SpatialOperation.Contains);
+
Map<String, Shape> indexedShapes = new LinkedHashMap<String, Shape>();
+ Map<String, Shape> indexedShapesGS = new LinkedHashMap<String, Shape>();
final int numIndexedShapes = randomIntBetween(1, 6);
for (int i = 0; i < numIndexedShapes; i++) {
- String id = ""+i;
+ String id = "" + i;
Shape indexedShape;
- if (random().nextInt(4) == 0) {
- indexedShape = new ShapePair( gridSnapp(randomRectangle()), gridSnapp(randomRectangle()) );
+ Shape indexedShapeGS; //(grid-snapped)
+ int R = random().nextInt(12);
+ if (R == 0) {//1 in 10
+ indexedShape = null; //no shape for this doc
+ indexedShapeGS = null;
+ } else if (R % 4 == 0) {//3 in 12
+ //comprised of more than one shape
+ Rectangle shape1 = randomRectangle();
+ Rectangle shape2 = randomRectangle();
+ indexedShape = new ShapePair(shape1, shape2, biasContains);
+ indexedShapeGS = new ShapePair(gridSnap(shape1), gridSnap(shape2), biasContains);
} else {
- indexedShape = gridSnapp(randomRectangle());
+ //just one shape
+ indexedShape = randomRectangle();
+ indexedShapeGS = gridSnap(indexedShape);
}
indexedShapes.put(id, indexedShape);
+ indexedShapesGS.put(id, indexedShapeGS);
+
adoc(id, indexedShape);
+
if (random().nextInt(10) == 0)
- commit();
- }
+ commit();//intermediate commit, produces extra segments
- //delete some
+ }
Iterator<String> idIter = indexedShapes.keySet().iterator();
while (idIter.hasNext()) {
String id = idIter.next();
if (random().nextInt(10) == 0) {
deleteDoc(id);
idIter.remove();
+ indexedShapesGS.remove(id);
}
}
@@ -160,35 +221,80 @@
for (int i = 0; i < numQueryShapes; i++) {
int scanLevel = randomInt(grid.getMaxLevels());
((RecursivePrefixTreeStrategy) strategy).setPrefixGridScanLevel(scanLevel);
- Shape queryShape = gridSnapp(randomRectangle());
+ final Shape queryShape = randomRectangle();
- //Generate truth via brute force
- Set<String> expectedIds = new TreeSet<String>();
+ final boolean DISJOINT = operation.equals(SpatialOperation.IsDisjointTo);
+
+ //Generate truth via brute force:
+ // We really try to ensure true-positive matches (if the predicate on the raw shapes match
+ // then the search should find those same matches).
+ // approximations, false-positive matches
+ Set <String> expectedIds = new LinkedHashSet<String>();//true-positives
+ Set<String> secondaryIds = new LinkedHashSet<String>();//false-positives (unless disjoint)
for (Map.Entry<String, Shape> entry : indexedShapes.entrySet()) {
- if (operation.evaluate(entry.getValue(), queryShape))
- expectedIds.add(entry.getKey());
+ Shape indexedShapeCompare = entry.getValue();
+ if (indexedShapeCompare == null)
+ continue;
+ Shape queryShapeCompare = queryShape;
+ String id = entry.getKey();
+ if (operation.evaluate(indexedShapeCompare, queryShapeCompare)) {
+ expectedIds.add(id);
+ if (DISJOINT) {
+ //if no longer intersect after buffering them, for disjoint, remember this
+ indexedShapeCompare = indexedShapesGS.get(entry.getKey());
+ queryShapeCompare = gridSnap(queryShape);
+ if (!operation.evaluate(indexedShapeCompare, queryShapeCompare))
+ secondaryIds.add(id);
- }
+ }
+ } else if (!DISJOINT) {
+ //buffer either the indexed or query shape (via gridSnap) and try again
+ if (operation.equals(SpatialOperation.Intersects)) {
+ indexedShapeCompare = indexedShapesGS.get(entry.getKey());
+ queryShapeCompare = gridSnap(queryShape);
+ } else if (operation.equals(SpatialOperation.Contains)) {
+ indexedShapeCompare = indexedShapesGS.get(entry.getKey());
+ } else if (operation.equals(SpatialOperation.IsWithin)) {
+ queryShapeCompare = gridSnap(queryShape);
+ }
+ if (operation.evaluate(indexedShapeCompare, queryShapeCompare))
+ secondaryIds.add(id);
+ }
+ }
//Search and verify results
- Query query = strategy.makeQuery(new SpatialArgs(operation, queryShape));
+ SpatialArgs args = new SpatialArgs(operation, queryShape);
+ Query query = strategy.makeQuery(args);
SearchResults got = executeQuery(query, 100);
- Set<String> remainingExpectedIds = new TreeSet<String>(expectedIds);
- String msg = queryShape.toString()+" Expect: "+expectedIds;
+ Set<String> remainingExpectedIds = new LinkedHashSet<String>(expectedIds);
for (SearchResult result : got.results) {
String id = result.getId();
- Object removed = remainingExpectedIds.remove(id);
- if (removed == null) {
- fail("Shouldn't match " + id + " ("+ indexedShapes.get(id) +") in " + msg);
+ boolean removed = remainingExpectedIds.remove(id);
+ if (!removed && (!DISJOINT && !secondaryIds.contains(id))) {
+ fail("Shouldn't match", id, indexedShapes, indexedShapesGS, queryShape);
}
}
+ if (DISJOINT)
+ remainingExpectedIds.removeAll(secondaryIds);
if (!remainingExpectedIds.isEmpty()) {
- Shape firstFailedMatch = indexedShapes.get(remainingExpectedIds.iterator().next());
- fail("Didn't match " + firstFailedMatch + " in " + msg +" (of "+remainingExpectedIds.size()+")");
+ String id = remainingExpectedIds.iterator().next();
+ fail("Should have matched", id, indexedShapes, indexedShapesGS, queryShape);
}
}
}
- protected Rectangle gridSnapp(Shape snapMe) {
+ private void fail(String label, String id, Map<String, Shape> indexedShapes, Map<String, Shape> indexedShapesGS, Shape queryShape) {
+ System.err.println("Ig:" + indexedShapesGS.get(id) + " Qg:" + gridSnap(queryShape));
+ fail(label + " I #" + id + ":" + indexedShapes.get(id) + " Q:" + queryShape);
+ }
+
+
+// private Rectangle inset(Rectangle r) {
+// //typically inset by 1 (whole numbers are easy to read)
+// double d = Math.min(1.0, grid.getDistanceForLevel(grid.getMaxLevels()) / 4);
+// return ctx.makeRectangle(r.getMinX() + d, r.getMaxX() - d, r.getMinY() + d, r.getMaxY() - d);
+// }
+
+ protected Rectangle gridSnap(Shape snapMe) {
//The next 4 lines mimic PrefixTreeStrategy.createIndexableFields()
double distErrPct = ((PrefixTreeStrategy) strategy).getDistErrPct();
double distErr = SpatialArgs.calcDistanceFromErrPct(snapMe, distErrPct, ctx);
@@ -210,27 +316,57 @@
return ctx.makeRectangle(minX, maxX, minY, maxY);
}
- /** An aggregate of 2 shapes. Only implements what's necessary for the test here.
- * TODO replace with Spatial4j trunk ShapeCollection. */
+ /**
+ * An aggregate of 2 shapes. Only implements what's necessary for the test
+ * here. TODO replace with Spatial4j trunk ShapeCollection.
+ */
private class ShapePair implements Shape {
- Shape shape1, shape2;
+ final Rectangle shape1, shape2;
+ final boolean biasContainsThenWithin;//a hack
- public ShapePair(Shape shape1, Shape shape2) {
+ public ShapePair(Rectangle shape1, Rectangle shape2, boolean containsThenWithin) {
this.shape1 = shape1;
this.shape2 = shape2;
+ biasContainsThenWithin = containsThenWithin;
}
@Override
public SpatialRelation relate(Shape other) {
- //easy to observe is correct; not an optimal code path but this is a test
- if (shape1.relate(other) == SpatialRelation.CONTAINS || shape2.relate(other) == SpatialRelation.CONTAINS)
- return SpatialRelation.CONTAINS;
- if (shape1.relate(other) == SpatialRelation.WITHIN && shape2.relate(other) == SpatialRelation.WITHIN)
- return SpatialRelation.WITHIN;
+ SpatialRelation r = relateApprox(other);
+ if (r != INTERSECTS)
+ return r;
+ //See if the correct answer is actually Contains
+ Rectangle oRect = (Rectangle)other;
+ boolean pairTouches = shape1.relate(shape2).intersects();
+ if (!pairTouches)
+ return r;
+ //test all 4 corners
+ if (relate(ctx.makePoint(oRect.getMinX(), oRect.getMinY())) == CONTAINS
+ && relate(ctx.makePoint(oRect.getMinX(), oRect.getMaxY())) == CONTAINS
+ && relate(ctx.makePoint(oRect.getMaxX(), oRect.getMinY())) == CONTAINS
+ && relate(ctx.makePoint(oRect.getMaxX(), oRect.getMaxY())) == CONTAINS)
+ return CONTAINS;
+ return r;
+ }
+
+ private SpatialRelation relateApprox(Shape other) {
+ if (biasContainsThenWithin) {
+ if (shape1.relate(other) == CONTAINS || shape1.equals(other)
+ || shape2.relate(other) == CONTAINS || shape2.equals(other)) return CONTAINS;
+
+ if (shape1.relate(other) == WITHIN && shape2.relate(other) == WITHIN) return WITHIN;
+
+ } else {
+ if ((shape1.relate(other) == WITHIN || shape1.equals(other))
+ && (shape2.relate(other) == WITHIN || shape2.equals(other))) return WITHIN;
+
+ if (shape1.relate(other) == CONTAINS || shape2.relate(other) == CONTAINS) return CONTAINS;
+ }
+
if (shape1.relate(other).intersects() || shape2.relate(other).intersects())
- return SpatialRelation.INTERSECTS;
- return SpatialRelation.DISJOINT;
+ return INTERSECTS;//might actually be 'CONTAINS' if these 2 are adjacent
+ return DISJOINT;
}
@Override
@@ -251,6 +387,11 @@
@Override
public Point getCenter() {
throw new UnsupportedOperationException("TODO unimplemented");//TODO
+ }
+
+ @Override
+ public String toString() {
+ return "ShapePair(" + shape1 + " , " + shape2 + ")";
}
}
Index: lucene/spatial/src/java/org/apache/lucene/spatial/DisjointSpatialFilter.java
IDEA additional info:
Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP
<+>UTF-8
===================================================================
--- lucene/spatial/src/java/org/apache/lucene/spatial/DisjointSpatialFilter.java (revision )
+++ lucene/spatial/src/java/org/apache/lucene/spatial/DisjointSpatialFilter.java (revision )
@@ -0,0 +1,113 @@
+package org.apache.lucene.spatial;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.index.AtomicReaderContext;
+import org.apache.lucene.queries.ChainedFilter;
+import org.apache.lucene.search.BitsFilteredDocIdSet;
+import org.apache.lucene.search.DocIdSet;
+import org.apache.lucene.search.FieldCache;
+import org.apache.lucene.search.Filter;
+import org.apache.lucene.spatial.query.SpatialArgs;
+import org.apache.lucene.spatial.query.SpatialOperation;
+import org.apache.lucene.util.Bits;
+
+import java.io.IOException;
+
+/**
+ * A Spatial Filter implementing {@link SpatialOperation#IsDisjointTo} in terms
+ * of a {@link SpatialStrategy}'s support for {@link SpatialOperation#Intersects}.
+ * A document is considered disjoint if it has spatial data that does not
+ * intersect with the query shape. Another way of looking at this is that it's
+ * a way to invert a query shape.
+ *
+ * @lucene.experimental
+ */
+public class DisjointSpatialFilter extends Filter {
+
+ private final String field;//maybe null
+ private final Filter intersectsFilter;
+
+ /**
+ *
+ * @param strategy Needed to compute intersects
+ * @param args Used in spatial intersection
+ * @param field This field is used to determine which docs have spatial data via
+ * {@link org.apache.lucene.search.FieldCache#getDocsWithField(org.apache.lucene.index.AtomicReader, String)}.
+ * Passing null will assume all docs have spatial data.
+ */
+ public DisjointSpatialFilter(SpatialStrategy strategy, SpatialArgs args, String field) {
+ this.field = field;
+
+ // TODO consider making SpatialArgs cloneable
+ SpatialOperation origOp = args.getOperation();//copy so we can restore
+ args.setOperation(SpatialOperation.Intersects);//temporarily set to intersects
+ intersectsFilter = strategy.makeFilter(args);
+ args.setOperation(origOp);//restore so it looks like it was
+ }
+
+ @Override
+ public boolean equals(Object o) {
+ if (this == o) return true;
+ if (o == null || getClass() != o.getClass()) return false;
+
+ DisjointSpatialFilter that = (DisjointSpatialFilter) o;
+
+ if (field != null ? !field.equals(that.field) : that.field != null)
+ return false;
+ if (!intersectsFilter.equals(that.intersectsFilter)) return false;
+
+ return true;
+ }
+
+ @Override
+ public int hashCode() {
+ int result = field != null ? field.hashCode() : 0;
+ result = 31 * result + intersectsFilter.hashCode();
+ return result;
+ }
+
+ @Override
+ public DocIdSet getDocIdSet(AtomicReaderContext context, final Bits acceptDocs) throws IOException {
+ Bits docsWithField;
+ if (field == null) {
+ docsWithField = null;//all docs
+ } else {
+ //NOTE By using the FieldCache we re-use a cache
+ // which is nice but loading it in this way might be slower than say using an
+ // intersects filter against the world bounds. So do we add a method to the
+ // strategy, perhaps? But the strategy can't cache it.
+ docsWithField = FieldCache.DEFAULT.getDocsWithField(context.reader(), field);
+
+ final int maxDoc = context.reader().maxDoc();
+ if (docsWithField.length() != maxDoc )
+ throw new IllegalStateException("Bits length should be maxDoc ("+maxDoc+") but wasn't: "+docsWithField);
+
+ if (docsWithField instanceof Bits.MatchNoBits) {
+ return null;//match nothing
+ } else if (docsWithField instanceof Bits.MatchAllBits) {
+ docsWithField = null;//all docs
+ }
+ }
+
+ //not so much a chain but a way to conveniently invert the Filter
+ DocIdSet docIdSet = new ChainedFilter(new Filter[]{intersectsFilter}, ChainedFilter.ANDNOT).getDocIdSet(context, acceptDocs);
+ return BitsFilteredDocIdSet.wrap(docIdSet, docsWithField);
+ }
+
+}