blob: a345d91d12f108c65f6e107aba0be9737acf3ae6 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.search;
import java.io.IOException;
import java.util.Arrays;
import java.util.HashSet;
import java.util.Set;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.NumericDocValuesField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.MultiBits;
import org.apache.lucene.index.MultiDocValues;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.index.SerialMergeScheduler;
import org.apache.lucene.index.Term;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.TestUtil;
/**
* Abstract class to do basic tests for a RangeField query. Testing rigor inspired by {@code BaseGeoPointTestCase}
*/
public abstract class BaseRangeFieldQueryTestCase extends LuceneTestCase {
protected abstract Field newRangeField(Range box);
protected abstract Query newIntersectsQuery(Range box);
protected abstract Query newContainsQuery(Range box);
protected abstract Query newWithinQuery(Range box);
protected abstract Query newCrossesQuery(Range box);
protected abstract Range nextRange(int dimensions) throws Exception;
protected int dimension() {
return random().nextInt(4) + 1;
}
public void testRandomTiny() throws Exception {
// Make sure single-leaf-node case is OK:
for (int i = 0; i < 10; ++i) {
doTestRandom(10, false);
}
}
public void testRandomMedium() throws Exception {
doTestRandom(1000, false);
}
@Nightly
public void testRandomBig() throws Exception {
doTestRandom(200000, false);
}
public void testMultiValued() throws Exception {
doTestRandom(1000, true);
}
public void testAllEqual() throws Exception {
int numDocs = atLeast(1000);
int dimensions = dimension();
Range[][] ranges = new Range[numDocs][];
Range[] theRange = new Range[] {nextRange(dimensions)};
Arrays.fill(ranges, theRange);
verify(ranges);
}
// Force low cardinality leaves
public void testLowCardinality() throws Exception {
int numDocs = atLeast(1000);
int dimensions = dimension();
int cardinality = TestUtil.nextInt(random(), 2, 20);
Range[][] diffRanges = new Range[cardinality][];
for (int i = 0; i < cardinality; i++) {
diffRanges[i] = new Range[] {nextRange(dimensions)};
}
Range[][] ranges = new Range[numDocs][];
for (int i = 0; i < numDocs; i++) {
ranges[i] = diffRanges[random().nextInt(cardinality)];
}
verify(ranges);
}
private void doTestRandom(int count, boolean multiValued) throws Exception {
int numDocs = atLeast(count);
int dimensions = dimension();
if (VERBOSE) {
System.out.println("TEST: numDocs=" + numDocs);
}
Range[][] ranges = new Range[numDocs][];
boolean haveRealDoc = true;
nextdoc: for (int id=0; id<numDocs; ++id) {
int x = random().nextInt(20);
if (ranges[id] == null) {
ranges[id] = new Range[] {nextRange(dimensions)};
}
if (x == 17) {
// some docs don't have a box:
ranges[id][0].isMissing = true;
if (VERBOSE) {
System.out.println(" id=" + id + " is missing");
}
continue;
}
if (multiValued == true && random().nextBoolean()) {
// randomly add multi valued documents (up to 2 fields)
int n = random().nextInt(2) + 1;
ranges[id] = new Range[n];
for (int i=0; i<n; ++i) {
ranges[id][i] = nextRange(dimensions);
}
}
if (id > 0 && x < 9 && haveRealDoc) {
int oldID;
int i=0;
// don't step on missing ranges:
while (true) {
oldID = random().nextInt(id);
if (ranges[oldID][0].isMissing == false) {
break;
} else if (++i > id) {
continue nextdoc;
}
}
if (x == dimensions*2) {
// Fully identical box (use first box in case current is multivalued but old is not)
for (int d=0; d<dimensions; ++d) {
ranges[id][0].setMin(d, ranges[oldID][0].getMin(d));
ranges[id][0].setMax(d, ranges[oldID][0].getMax(d));
}
if (VERBOSE) {
System.out.println(" id=" + id + " box=" + ranges[id] + " (same box as doc=" + oldID + ")");
}
} else {
for (int m = 0, even = dimensions % 2; m < dimensions * 2; ++m) {
if (x == m) {
int d = (int)Math.floor(m/2);
// current could be multivalue but old may not be, so use first box
if (even == 0) { // even is min
ranges[id][0].setMin(d, ranges[oldID][0].getMin(d));
if (VERBOSE) {
System.out.println(" id=" + id + " box=" + ranges[id] + " (same min[" + d + "] as doc=" + oldID + ")");
}
} else { // odd is max
ranges[id][0].setMax(d, ranges[oldID][0].getMax(d));
if (VERBOSE) {
System.out.println(" id=" + id + " box=" + ranges[id] + " (same max[" + d + "] as doc=" + oldID + ")");
}
}
}
}
}
}
}
verify(ranges);
}
private void verify(Range[][] ranges) throws Exception {
IndexWriterConfig iwc = newIndexWriterConfig();
// Else seeds may not reproduce:
iwc.setMergeScheduler(new SerialMergeScheduler());
// Else we can get O(N^2) merging
int mbd = iwc.getMaxBufferedDocs();
if (mbd != -1 && mbd < ranges.length/100) {
iwc.setMaxBufferedDocs(ranges.length/100);
}
Directory dir;
if (ranges.length > 50000) {
dir = newFSDirectory(createTempDir(getClass().getSimpleName()));
} else {
dir = newDirectory();
}
Set<Integer> deleted = new HashSet<>();
IndexWriter w = new IndexWriter(dir, iwc);
for (int id=0; id < ranges.length; ++id) {
Document doc = new Document();
doc.add(newStringField("id", ""+id, Field.Store.NO));
doc.add(new NumericDocValuesField("id", id));
if (ranges[id][0].isMissing == false) {
for (int n=0; n<ranges[id].length; ++n) {
addRange(doc, ranges[id][n]);
}
}
w.addDocument(doc);
if (id > 0 && random().nextInt(100) == 1) {
int idToDelete = random().nextInt(id);
w.deleteDocuments(new Term("id", ""+idToDelete));
deleted.add(idToDelete);
if (VERBOSE) {
System.out.println(" delete id=" + idToDelete);
}
}
}
if (random().nextBoolean()) {
w.forceMerge(1);
}
final IndexReader r = DirectoryReader.open(w);
w.close();
IndexSearcher s = newSearcher(r);
int dimensions = ranges[0][0].numDimensions();
int iters = atLeast(25);
Bits liveDocs = MultiBits.getLiveDocs(s.getIndexReader());
int maxDoc = s.getIndexReader().maxDoc();
for (int iter=0; iter<iters; ++iter) {
if (VERBOSE) {
System.out.println("\nTEST: iter=" + iter + " s=" + s);
}
// occasionally test open ended bounding ranges
Range queryRange = nextRange(dimensions);
int rv = random().nextInt(4);
Query query;
Range.QueryType queryType;
if (rv == 0) {
queryType = Range.QueryType.INTERSECTS;
query = newIntersectsQuery(queryRange);
} else if (rv == 1) {
queryType = Range.QueryType.CONTAINS;
query = newContainsQuery(queryRange);
} else if (rv == 2) {
queryType = Range.QueryType.WITHIN;
query = newWithinQuery(queryRange);
} else {
queryType = Range.QueryType.CROSSES;
query = newCrossesQuery(queryRange);
}
if (VERBOSE) {
System.out.println(" query=" + query);
}
final FixedBitSet hits = new FixedBitSet(maxDoc);
s.search(query, new SimpleCollector() {
private int docBase;
@Override
public void collect(int doc) {
hits.set(docBase + doc);
}
@Override
protected void doSetNextReader(LeafReaderContext context) throws IOException {
docBase = context.docBase;
}
@Override
public ScoreMode scoreMode() { return ScoreMode.COMPLETE_NO_SCORES; }
});
NumericDocValues docIDToID = MultiDocValues.getNumericValues(r, "id");
for (int docID=0; docID<maxDoc; ++docID) {
assertEquals(docID, docIDToID.nextDoc());
int id = (int) docIDToID.longValue();
boolean expected;
if (liveDocs != null && liveDocs.get(docID) == false) {
// document is deleted
expected = false;
} else if (ranges[id][0].isMissing) {
expected = false;
} else {
expected = expectedResult(queryRange, ranges[id], queryType);
}
if (hits.get(docID) != expected) {
StringBuilder b = new StringBuilder();
b.append("FAIL (iter ").append(iter).append("): ");
if (expected == true) {
b.append("id=").append(id).append(ranges[id].length > 1 ? " (MultiValue) " : " ").append("should match but did not\n");
} else {
b.append("id=").append(id).append(" should not match but did\n");
}
b.append(" queryRange=").append(queryRange).append("\n");
b.append(" box").append((ranges[id].length > 1) ? "es=" : "=").append(ranges[id][0]);
for (int n=1; n<ranges[id].length; ++n) {
b.append(", ");
b.append(ranges[id][n]);
}
b.append("\n queryType=").append(queryType).append("\n");
b.append(" deleted?=").append(liveDocs != null && liveDocs.get(docID) == false);
fail("wrong hit (first of possibly more):\n\n" + b);
}
}
}
IOUtils.close(r, dir);
}
protected void addRange(Document doc, Range box) {
doc.add(newRangeField(box));
}
protected boolean expectedResult(Range queryRange, Range[] range, Range.QueryType queryType) {
for (int i=0; i<range.length; ++i) {
if (expectedBBoxQueryResult(queryRange, range[i], queryType) == true) {
return true;
}
}
return false;
}
protected boolean expectedBBoxQueryResult(Range queryRange, Range range, Range.QueryType queryType) {
if (queryRange.isEqual(range) && queryType != Range.QueryType.CROSSES) {
return true;
}
Range.QueryType relation = range.relate(queryRange);
if (queryType == Range.QueryType.INTERSECTS) {
return relation != null;
} else if (queryType == Range.QueryType.CROSSES) {
// by definition, RangeFields that CONTAIN the query are also considered to cross
return relation == queryType || relation == Range.QueryType.CONTAINS;
}
return relation == queryType;
}
/** base class for range verification */
protected abstract static class Range {
protected boolean isMissing = false;
/** supported query relations */
protected enum QueryType { INTERSECTS, WITHIN, CONTAINS, CROSSES }
protected abstract int numDimensions();
protected abstract Object getMin(int dim);
protected abstract void setMin(int dim, Object val);
protected abstract Object getMax(int dim);
protected abstract void setMax(int dim, Object val);
protected abstract boolean isEqual(Range other);
protected abstract boolean isDisjoint(Range other);
protected abstract boolean isWithin(Range other);
protected abstract boolean contains(Range other);
protected QueryType relate(Range other) {
if (isDisjoint(other)) {
// if disjoint; return null:
return null;
} else if (isWithin(other)) {
return QueryType.WITHIN;
} else if (contains(other)) {
return QueryType.CONTAINS;
}
return QueryType.CROSSES;
}
}
}