blob: aa261f3d3dbcd8d5e2ed3d6f5fa148488c9c7e11 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.search;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Random;
import java.util.Set;
import com.carrotsearch.randomizedtesting.generators.RandomPicks;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.document.NumericDocValuesField;
import org.apache.lucene.document.StringField;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.MockRandomMergePolicy;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.SerialMergeScheduler;
import org.apache.lucene.index.Term;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.TestUtil;
public class TestTopFieldCollectorEarlyTermination extends LuceneTestCase {
private int numDocs;
private List<String> terms;
private Directory dir;
private final Sort sort = new Sort(new SortField("ndv1", SortField.Type.LONG));
private RandomIndexWriter iw;
private IndexReader reader;
private static final int FORCE_MERGE_MAX_SEGMENT_COUNT = 5;
private Document randomDocument() {
final Document doc = new Document();
doc.add(new NumericDocValuesField("ndv1", random().nextInt(10)));
doc.add(new NumericDocValuesField("ndv2", random().nextInt(10)));
doc.add(new StringField("s", RandomPicks.randomFrom(random(), terms), Store.YES));
return doc;
}
private void createRandomIndex(boolean singleSortedSegment) throws IOException {
dir = newDirectory();
numDocs = atLeast(150);
final int numTerms = TestUtil.nextInt(random(), 1, numDocs / 5);
Set<String> randomTerms = new HashSet<>();
while (randomTerms.size() < numTerms) {
randomTerms.add(TestUtil.randomSimpleString(random()));
}
terms = new ArrayList<>(randomTerms);
final long seed = random().nextLong();
final IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(new Random(seed)));
if (iwc.getMergePolicy() instanceof MockRandomMergePolicy) {
// MockRandomMP randomly wraps the leaf readers which makes merging angry
iwc.setMergePolicy(newTieredMergePolicy());
}
iwc.setMergeScheduler(new SerialMergeScheduler()); // for reproducible tests
iwc.setIndexSort(sort);
iw = new RandomIndexWriter(new Random(seed), dir, iwc);
iw.setDoRandomForceMerge(false); // don't do this, it may happen anyway with MockRandomMP
for (int i = 0; i < numDocs; ++i) {
final Document doc = randomDocument();
iw.addDocument(doc);
if (i == numDocs / 2 || (i != numDocs - 1 && random().nextInt(8) == 0)) {
iw.commit();
}
if (random().nextInt(15) == 0) {
final String term = RandomPicks.randomFrom(random(), terms);
iw.deleteDocuments(new Term("s", term));
}
}
if (singleSortedSegment) {
iw.forceMerge(1);
}
else if (random().nextBoolean()) {
iw.forceMerge(FORCE_MERGE_MAX_SEGMENT_COUNT);
}
reader = iw.getReader();
if (reader.numDocs() == 0) {
iw.addDocument(new Document());
reader.close();
reader = iw.getReader();
}
}
private void closeIndex() throws IOException {
reader.close();
iw.close();
dir.close();
}
public void testEarlyTermination() throws IOException {
doTestEarlyTermination(false);
}
public void testEarlyTerminationWhenPaging() throws IOException {
doTestEarlyTermination(true);
}
private void doTestEarlyTermination(boolean paging) throws IOException {
final int iters = atLeast(1);
for (int i = 0; i < iters; ++i) {
createRandomIndex(false);
int maxSegmentSize = 0;
for (LeafReaderContext ctx : reader.leaves()) {
maxSegmentSize = Math.max(ctx.reader().numDocs(), maxSegmentSize);
}
for (int j = 0; j < iters; ++j) {
final IndexSearcher searcher = newSearcher(reader);
final int numHits = TestUtil.nextInt(random(), 1, numDocs);
FieldDoc after;
if (paging) {
assert searcher.getIndexReader().numDocs() > 0;
TopFieldDocs td = searcher.search(new MatchAllDocsQuery(), 10, sort);
after = (FieldDoc) td.scoreDocs[td.scoreDocs.length - 1];
} else {
after = null;
}
final TopFieldCollector collector1 = TopFieldCollector.create(sort, numHits, after, Integer.MAX_VALUE);
final TopFieldCollector collector2 = TopFieldCollector.create(sort, numHits, after, 1);
final Query query;
if (random().nextBoolean()) {
query = new TermQuery(new Term("s", RandomPicks.randomFrom(random(), terms)));
} else {
query = new MatchAllDocsQuery();
}
searcher.search(query, collector1);
searcher.search(query, collector2);
TopDocs td1 = collector1.topDocs();
TopDocs td2 = collector2.topDocs();
assertFalse(collector1.isEarlyTerminated());
if (paging == false && maxSegmentSize > numHits && query instanceof MatchAllDocsQuery) {
// Make sure that we sometimes early terminate
assertTrue(collector2.isEarlyTerminated());
}
if (collector2.isEarlyTerminated()) {
assertTrue(td2.totalHits.value >= td1.scoreDocs.length);
assertTrue(td2.totalHits.value <= reader.maxDoc());
} else {
assertEquals(td2.totalHits.value, td1.totalHits.value);
}
CheckHits.checkEqual(query, td1.scoreDocs, td2.scoreDocs);
}
closeIndex();
}
}
public void testCanEarlyTerminateOnDocId() {
assertTrue(TopFieldCollector.canEarlyTerminate(
new Sort(SortField.FIELD_DOC),
new Sort(SortField.FIELD_DOC)));
assertTrue(TopFieldCollector.canEarlyTerminate(
new Sort(SortField.FIELD_DOC),
null));
assertFalse(TopFieldCollector.canEarlyTerminate(
new Sort(new SortField("a", SortField.Type.LONG)),
null));
assertFalse(TopFieldCollector.canEarlyTerminate(
new Sort(new SortField("a", SortField.Type.LONG)),
new Sort(new SortField("b", SortField.Type.LONG))));
assertTrue(TopFieldCollector.canEarlyTerminate(
new Sort(SortField.FIELD_DOC),
new Sort(new SortField("b", SortField.Type.LONG))));
assertTrue(TopFieldCollector.canEarlyTerminate(
new Sort(SortField.FIELD_DOC),
new Sort(new SortField("b", SortField.Type.LONG), SortField.FIELD_DOC)));
assertFalse(TopFieldCollector.canEarlyTerminate(
new Sort(new SortField("a", SortField.Type.LONG)),
new Sort(SortField.FIELD_DOC)));
assertFalse(TopFieldCollector.canEarlyTerminate(
new Sort(new SortField("a", SortField.Type.LONG), SortField.FIELD_DOC),
new Sort(SortField.FIELD_DOC)));
}
public void testCanEarlyTerminateOnPrefix() {
assertTrue(TopFieldCollector.canEarlyTerminate(
new Sort(new SortField("a", SortField.Type.LONG)),
new Sort(new SortField("a", SortField.Type.LONG))));
assertTrue(TopFieldCollector.canEarlyTerminate(
new Sort(new SortField("a", SortField.Type.LONG), new SortField("b", SortField.Type.STRING)),
new Sort(new SortField("a", SortField.Type.LONG), new SortField("b", SortField.Type.STRING))));
assertTrue(TopFieldCollector.canEarlyTerminate(
new Sort(new SortField("a", SortField.Type.LONG)),
new Sort(new SortField("a", SortField.Type.LONG), new SortField("b", SortField.Type.STRING))));
assertFalse(TopFieldCollector.canEarlyTerminate(
new Sort(new SortField("a", SortField.Type.LONG, true)),
null));
assertFalse(TopFieldCollector.canEarlyTerminate(
new Sort(new SortField("a", SortField.Type.LONG, true)),
new Sort(new SortField("a", SortField.Type.LONG, false))));
assertFalse(TopFieldCollector.canEarlyTerminate(
new Sort(new SortField("a", SortField.Type.LONG), new SortField("b", SortField.Type.STRING)),
new Sort(new SortField("a", SortField.Type.LONG))));
assertFalse(TopFieldCollector.canEarlyTerminate(
new Sort(new SortField("a", SortField.Type.LONG), new SortField("b", SortField.Type.STRING)),
new Sort(new SortField("a", SortField.Type.LONG), new SortField("c", SortField.Type.STRING))));
assertFalse(TopFieldCollector.canEarlyTerminate(
new Sort(new SortField("a", SortField.Type.LONG), new SortField("b", SortField.Type.STRING)),
new Sort(new SortField("c", SortField.Type.LONG), new SortField("b", SortField.Type.STRING))));
}
}