blob: db8b10503cb0457fd5e71b58c2cc4125e49e585a [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.search;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FloatDocValuesField;
import org.apache.lucene.document.NumericDocValuesField;
import org.apache.lucene.document.SortedDocValuesField;
import org.apache.lucene.index.CompositeReaderContext;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexReaderContext;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.ReaderUtil;
import org.apache.lucene.index.Term;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.TestUtil;
public class TestTopDocsMerge extends LuceneTestCase {
private static class ShardSearcher extends IndexSearcher {
private final List<LeafReaderContext> ctx;
public ShardSearcher(LeafReaderContext ctx, IndexReaderContext parent) {
super(parent);
this.ctx = Collections.singletonList(ctx);
}
public void search(Weight weight, Collector collector) throws IOException {
search(ctx, weight, collector);
}
public TopDocs search(Weight weight, int topN) throws IOException {
TopScoreDocCollector collector = TopScoreDocCollector.create(topN, Integer.MAX_VALUE);
search(ctx, weight, collector);
return collector.topDocs(); }
@Override
public String toString() {
return "ShardSearcher(" + ctx.get(0) + ")";
}
}
public void testSort_1() throws Exception {
testSort(false);
}
public void testSort_2() throws Exception {
testSort(true);
}
public void testInconsistentTopDocsFail() {
TopDocs[] topDocs = new TopDocs[] {
new TopDocs(new TotalHits(1, TotalHits.Relation.EQUAL_TO), new ScoreDoc[] { new ScoreDoc(1, 1.0f) }),
new TopDocs(new TotalHits(1, TotalHits.Relation.EQUAL_TO), new ScoreDoc[] { new ScoreDoc(1, 1.0f, -1) })
};
if (random().nextBoolean()) {
ArrayUtil.swap(topDocs, 0, 1);
}
expectThrows(IllegalArgumentException.class, () -> {
TopDocs.merge(0, 1, topDocs, false);
});
}
public void testPreAssignedShardIndex() {
boolean useConstantScore = random().nextBoolean();
int numTopDocs = 2 + random().nextInt(10);
ArrayList<TopDocs> topDocs = new ArrayList<>(numTopDocs);
Map<Integer, TopDocs> shardResultMapping = new HashMap<>();
int numHitsTotal = 0;
for (int i = 0; i < numTopDocs; i++) {
int numHits = 1 + random().nextInt(10);
numHitsTotal += numHits;
ScoreDoc[] scoreDocs = new ScoreDoc[numHits];
for (int j = 0; j < scoreDocs.length; j++) {
float score = useConstantScore ? 1.0f : random().nextFloat();
// we set the shard index to index in the list here but shuffle the entire list below
scoreDocs[j] = new ScoreDoc((100 * i) + j, score , i);
}
topDocs.add(new TopDocs(new TotalHits(numHits, TotalHits.Relation.EQUAL_TO), scoreDocs));
shardResultMapping.put(i, topDocs.get(i));
}
// shuffle the entire thing such that we don't get 1 to 1 mapping of shard index to index in the array
// -- well likely ;)
Collections.shuffle(topDocs, random());
final int from = random().nextInt(numHitsTotal-1);
final int size = 1 + random().nextInt(numHitsTotal - from);
// passing false here means TopDocs.merge uses the incoming ScoreDoc.shardIndex
// that we already set, instead of the position of that TopDocs in the array:
TopDocs merge = TopDocs.merge(from, size, topDocs.toArray(new TopDocs[0]), false);
assertTrue(merge.scoreDocs.length > 0);
for (ScoreDoc scoreDoc : merge.scoreDocs) {
assertTrue(scoreDoc.shardIndex != -1);
TopDocs shardTopDocs = shardResultMapping.get(scoreDoc.shardIndex);
assertNotNull(shardTopDocs);
boolean found = false;
for (ScoreDoc shardScoreDoc : shardTopDocs.scoreDocs) {
if (shardScoreDoc == scoreDoc) {
found = true;
break;
}
}
assertTrue(found);
}
// now ensure merge is stable even if we use our own shard IDs
Collections.shuffle(topDocs, random());
TopDocs merge2 = TopDocs.merge(from, size, topDocs.toArray(new TopDocs[0]), false);
assertArrayEquals(merge.scoreDocs, merge2.scoreDocs);
}
void testSort(boolean useFrom) throws Exception {
IndexReader reader = null;
Directory dir = null;
final int numDocs = TEST_NIGHTLY ? atLeast(1000) : atLeast(100);
final String[] tokens = new String[] {"a", "b", "c", "d", "e"};
if (VERBOSE) {
System.out.println("TEST: make index");
}
{
dir = newDirectory();
final RandomIndexWriter w = new RandomIndexWriter(random(), dir);
// w.setDoRandomForceMerge(false);
// w.w.getConfig().setMaxBufferedDocs(atLeast(100));
final String[] content = new String[atLeast(20)];
for(int contentIDX=0;contentIDX<content.length;contentIDX++) {
final StringBuilder sb = new StringBuilder();
final int numTokens = TestUtil.nextInt(random(), 1, 10);
for(int tokenIDX=0;tokenIDX<numTokens;tokenIDX++) {
sb.append(tokens[random().nextInt(tokens.length)]).append(' ');
}
content[contentIDX] = sb.toString();
}
for(int docIDX=0;docIDX<numDocs;docIDX++) {
final Document doc = new Document();
doc.add(new SortedDocValuesField("string", new BytesRef(TestUtil.randomRealisticUnicodeString(random()))));
doc.add(newTextField("text", content[random().nextInt(content.length)], Field.Store.NO));
doc.add(new FloatDocValuesField("float", random().nextFloat()));
final int intValue;
if (random().nextInt(100) == 17) {
intValue = Integer.MIN_VALUE;
} else if (random().nextInt(100) == 17) {
intValue = Integer.MAX_VALUE;
} else {
intValue = random().nextInt();
}
doc.add(new NumericDocValuesField("int", intValue));
if (VERBOSE) {
System.out.println(" doc=" + doc);
}
w.addDocument(doc);
}
reader = w.getReader();
w.close();
}
// NOTE: sometimes reader has just one segment, which is
// important to test
final IndexSearcher searcher = newSearcher(reader);
final IndexReaderContext ctx = searcher.getTopReaderContext();
final ShardSearcher[] subSearchers;
final int[] docStarts;
if (ctx instanceof LeafReaderContext) {
subSearchers = new ShardSearcher[1];
docStarts = new int[1];
subSearchers[0] = new ShardSearcher((LeafReaderContext) ctx, ctx);
docStarts[0] = 0;
} else {
final CompositeReaderContext compCTX = (CompositeReaderContext) ctx;
final int size = compCTX.leaves().size();
subSearchers = new ShardSearcher[size];
docStarts = new int[size];
int docBase = 0;
for(int searcherIDX=0;searcherIDX<subSearchers.length;searcherIDX++) {
final LeafReaderContext leave = compCTX.leaves().get(searcherIDX);
subSearchers[searcherIDX] = new ShardSearcher(leave, compCTX);
docStarts[searcherIDX] = docBase;
docBase += leave.reader().maxDoc();
}
}
final List<SortField> sortFields = new ArrayList<>();
sortFields.add(new SortField("string", SortField.Type.STRING, true));
sortFields.add(new SortField("string", SortField.Type.STRING, false));
sortFields.add(new SortField("int", SortField.Type.INT, true));
sortFields.add(new SortField("int", SortField.Type.INT, false));
sortFields.add(new SortField("float", SortField.Type.FLOAT, true));
sortFields.add(new SortField("float", SortField.Type.FLOAT, false));
sortFields.add(new SortField(null, SortField.Type.SCORE, true));
sortFields.add(new SortField(null, SortField.Type.SCORE, false));
sortFields.add(new SortField(null, SortField.Type.DOC, true));
sortFields.add(new SortField(null, SortField.Type.DOC, false));
int numIters = atLeast(300);
for(int iter=0;iter<numIters;iter++) {
// TODO: custom FieldComp...
final Query query = new TermQuery(new Term("text", tokens[random().nextInt(tokens.length)]));
final Sort sort;
if (random().nextInt(10) == 4) {
// Sort by score
sort = null;
} else {
final SortField[] randomSortFields = new SortField[TestUtil.nextInt(random(), 1, 3)];
for(int sortIDX=0;sortIDX<randomSortFields.length;sortIDX++) {
randomSortFields[sortIDX] = sortFields.get(random().nextInt(sortFields.size()));
}
sort = new Sort(randomSortFields);
}
final int numHits = TestUtil.nextInt(random(), 1, numDocs + 5);
//final int numHits = 5;
if (VERBOSE) {
System.out.println("TEST: search query=" + query + " sort=" + sort + " numHits=" + numHits);
}
int from = -1;
int size = -1;
// First search on whole index:
final TopDocs topHits;
if (sort == null) {
if (useFrom) {
TopScoreDocCollector c = TopScoreDocCollector.create(numHits, Integer.MAX_VALUE);
searcher.search(query, c);
from = TestUtil.nextInt(random(), 0, numHits - 1);
size = numHits - from;
TopDocs tempTopHits = c.topDocs();
if (from < tempTopHits.scoreDocs.length) {
// Can't use TopDocs#topDocs(start, howMany), since it has different behaviour when start >= hitCount
// than TopDocs#merge currently has
ScoreDoc[] newScoreDocs = new ScoreDoc[Math.min(size, tempTopHits.scoreDocs.length - from)];
System.arraycopy(tempTopHits.scoreDocs, from, newScoreDocs, 0, newScoreDocs.length);
tempTopHits.scoreDocs = newScoreDocs;
topHits = tempTopHits;
} else {
topHits = new TopDocs(tempTopHits.totalHits, new ScoreDoc[0]);
}
} else {
topHits = searcher.search(query, numHits);
}
} else {
final TopFieldCollector c = TopFieldCollector.create(sort, numHits, Integer.MAX_VALUE);
searcher.search(query, c);
if (useFrom) {
from = TestUtil.nextInt(random(), 0, numHits - 1);
size = numHits - from;
TopDocs tempTopHits = c.topDocs();
if (from < tempTopHits.scoreDocs.length) {
// Can't use TopDocs#topDocs(start, howMany), since it has different behaviour when start >= hitCount
// than TopDocs#merge currently has
ScoreDoc[] newScoreDocs = new ScoreDoc[Math.min(size, tempTopHits.scoreDocs.length - from)];
System.arraycopy(tempTopHits.scoreDocs, from, newScoreDocs, 0, newScoreDocs.length);
tempTopHits.scoreDocs = newScoreDocs;
topHits = tempTopHits;
} else {
topHits = new TopDocs(tempTopHits.totalHits, new ScoreDoc[0]);
}
} else {
topHits = c.topDocs(0, numHits);
}
}
if (VERBOSE) {
if (useFrom) {
System.out.println("from=" + from + " size=" + size);
}
System.out.println(" top search: " + topHits.totalHits.value + " totalHits; hits=" + (topHits.scoreDocs == null ? "null" : topHits.scoreDocs.length));
if (topHits.scoreDocs != null) {
for(int hitIDX=0;hitIDX<topHits.scoreDocs.length;hitIDX++) {
final ScoreDoc sd = topHits.scoreDocs[hitIDX];
System.out.println(" doc=" + sd.doc + " score=" + sd.score);
}
}
}
// ... then all shards:
final Weight w = searcher.createWeight(searcher.rewrite(query), ScoreMode.COMPLETE, 1);
final TopDocs[] shardHits;
if (sort == null) {
shardHits = new TopDocs[subSearchers.length];
} else {
shardHits = new TopFieldDocs[subSearchers.length];
}
for(int shardIDX=0;shardIDX<subSearchers.length;shardIDX++) {
final TopDocs subHits;
final ShardSearcher subSearcher = subSearchers[shardIDX];
if (sort == null) {
subHits = subSearcher.search(w, numHits);
} else {
final TopFieldCollector c = TopFieldCollector.create(sort, numHits, Integer.MAX_VALUE);
subSearcher.search(w, c);
subHits = c.topDocs(0, numHits);
}
shardHits[shardIDX] = subHits;
if (VERBOSE) {
System.out.println(" shard=" + shardIDX + " " + subHits.totalHits.value + " totalHits hits=" + (subHits.scoreDocs == null ? "null" : subHits.scoreDocs.length));
if (subHits.scoreDocs != null) {
for(ScoreDoc sd : subHits.scoreDocs) {
System.out.println(" doc=" + sd.doc + " score=" + sd.score);
}
}
}
}
// Merge:
final TopDocs mergedHits;
if (useFrom) {
if (sort == null) {
mergedHits = TopDocs.merge(from, size, shardHits, true);
} else {
mergedHits = TopDocs.merge(sort, from, size, (TopFieldDocs[]) shardHits, true);
}
} else {
if (sort == null) {
mergedHits = TopDocs.merge(numHits, shardHits);
} else {
mergedHits = TopDocs.merge(sort, numHits, (TopFieldDocs[]) shardHits);
}
}
if (mergedHits.scoreDocs != null) {
// Make sure the returned shards are correct:
for(int hitIDX=0;hitIDX<mergedHits.scoreDocs.length;hitIDX++) {
final ScoreDoc sd = mergedHits.scoreDocs[hitIDX];
assertEquals("doc=" + sd.doc + " wrong shard",
ReaderUtil.subIndex(sd.doc, docStarts),
sd.shardIndex);
}
}
TestUtil.assertConsistent(topHits, mergedHits);
}
reader.close();
dir.close();
}
public void testMergeTotalHitsRelation() {
TopDocs topDocs1 = new TopDocs(new TotalHits(2, TotalHits.Relation.EQUAL_TO), new ScoreDoc[] { new ScoreDoc(42, 2f) });
TopDocs topDocs2 = new TopDocs(new TotalHits(1, TotalHits.Relation.EQUAL_TO), new ScoreDoc[] { new ScoreDoc(42, 2f) });
TopDocs topDocs3 = new TopDocs(new TotalHits(1, TotalHits.Relation.GREATER_THAN_OR_EQUAL_TO), new ScoreDoc[] { new ScoreDoc(42, 2f) });
TopDocs topDocs4 = new TopDocs(new TotalHits(3, TotalHits.Relation.GREATER_THAN_OR_EQUAL_TO), new ScoreDoc[] { new ScoreDoc(42, 2f) });
TopDocs merged1 = TopDocs.merge(1, new TopDocs[] {topDocs1, topDocs2});
assertEquals(new TotalHits(3, TotalHits.Relation.EQUAL_TO), merged1.totalHits);
TopDocs merged2 = TopDocs.merge(1, new TopDocs[] {topDocs1, topDocs3});
assertEquals(new TotalHits(3, TotalHits.Relation.GREATER_THAN_OR_EQUAL_TO), merged2.totalHits);
TopDocs merged3 = TopDocs.merge(1, new TopDocs[] {topDocs3, topDocs4});
assertEquals(new TotalHits(4, TotalHits.Relation.GREATER_THAN_OR_EQUAL_TO), merged3.totalHits);
TopDocs merged4 = TopDocs.merge(1, new TopDocs[] {topDocs4, topDocs2});
assertEquals(new TotalHits(4, TotalHits.Relation.GREATER_THAN_OR_EQUAL_TO), merged4.totalHits);
}
}