blob: 9ec87be90195f9028fb1addaa8afea8186a1fa35 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.search;
import static org.apache.lucene.search.SortField.FIELD_DOC;
import static org.apache.lucene.search.SortField.FIELD_SCORE;
import com.carrotsearch.randomizedtesting.generators.RandomPicks;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.document.FloatDocValuesField;
import org.apache.lucene.document.FloatPoint;
import org.apache.lucene.document.IntPoint;
import org.apache.lucene.document.IntRange;
import org.apache.lucene.document.LongPoint;
import org.apache.lucene.document.NumericDocValuesField;
import org.apache.lucene.document.SortedDocValuesField;
import org.apache.lucene.document.SortedNumericDocValuesField;
import org.apache.lucene.document.StoredField;
import org.apache.lucene.document.StringField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.FilterDirectoryReader;
import org.apache.lucene.index.FilterLeafReader;
import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.PointValues;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.VectorEncoding;
import org.apache.lucene.index.VectorSimilarityFunction;
import org.apache.lucene.search.SortField.Type;
import org.apache.lucene.store.Directory;
import org.apache.lucene.tests.index.RandomIndexWriter;
import org.apache.lucene.tests.search.CheckHits;
import org.apache.lucene.tests.util.LuceneTestCase;
import org.apache.lucene.tests.util.TestUtil;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IOUtils;
public class TestSortOptimization extends LuceneTestCase {
public void testLongSortOptimization() throws IOException {
final Directory dir = newDirectory();
IndexWriterConfig config =
new IndexWriterConfig()
// Make sure to use the default codec, otherwise some random points formats that have
// large values for maxPointsPerLeaf might not enable skipping with only 10k docs
.setCodec(TestUtil.getDefaultCodec());
final IndexWriter writer = new IndexWriter(dir, config);
final int numDocs = atLeast(10000);
for (int i = 0; i < numDocs; ++i) {
final Document doc = new Document();
doc.add(new NumericDocValuesField("my_field", i));
doc.add(new LongPoint("my_field", i));
writer.addDocument(doc);
if (i == 7000) writer.flush(); // two segments
}
final IndexReader reader = DirectoryReader.open(writer);
writer.close();
// single threaded so totalhits is deterministic
IndexSearcher searcher = newSearcher(reader, true, true, false);
final SortField sortField = new SortField("my_field", SortField.Type.LONG);
final Sort sort = new Sort(sortField);
final int numHits = 3;
final int totalHitsThreshold = 3;
{ // simple sort
CollectorManager<TopFieldCollector, TopFieldDocs> manager =
TopFieldCollector.createSharedManager(sort, numHits, null, totalHitsThreshold);
TopDocs topDocs = searcher.search(new MatchAllDocsQuery(), manager);
assertEquals(topDocs.scoreDocs.length, numHits);
for (int i = 0; i < numHits; i++) {
FieldDoc fieldDoc = (FieldDoc) topDocs.scoreDocs[i];
assertEquals(i, ((Long) fieldDoc.fields[0]).intValue());
}
assertEquals(TotalHits.Relation.GREATER_THAN_OR_EQUAL_TO, topDocs.totalHits.relation);
assertNonCompetitiveHitsAreSkipped(topDocs.totalHits.value, numDocs);
}
{ // paging sort with after
long afterValue = 2;
FieldDoc after = new FieldDoc(2, Float.NaN, new Long[] {afterValue});
CollectorManager<TopFieldCollector, TopFieldDocs> manager =
TopFieldCollector.createSharedManager(sort, numHits, after, totalHitsThreshold);
TopDocs topDocs = searcher.search(new MatchAllDocsQuery(), manager);
assertEquals(topDocs.scoreDocs.length, numHits);
for (int i = 0; i < numHits; i++) {
FieldDoc fieldDoc = (FieldDoc) topDocs.scoreDocs[i];
assertEquals(afterValue + 1 + i, fieldDoc.fields[0]);
}
assertEquals(TotalHits.Relation.GREATER_THAN_OR_EQUAL_TO, topDocs.totalHits.relation);
assertNonCompetitiveHitsAreSkipped(topDocs.totalHits.value, numDocs);
}
{ // test that if there is the secondary sort on _score, scores are filled correctly
CollectorManager<TopFieldCollector, TopFieldDocs> manager =
TopFieldCollector.createSharedManager(
new Sort(sortField, FIELD_SCORE), numHits, null, totalHitsThreshold);
TopDocs topDocs = searcher.search(new MatchAllDocsQuery(), manager);
assertEquals(topDocs.scoreDocs.length, numHits);
for (int i = 0; i < numHits; i++) {
FieldDoc fieldDoc = (FieldDoc) topDocs.scoreDocs[i];
assertEquals(i, ((Long) fieldDoc.fields[0]).intValue());
float score = (float) fieldDoc.fields[1];
assertEquals(1.0, score, 0.001);
}
assertEquals(TotalHits.Relation.GREATER_THAN_OR_EQUAL_TO, topDocs.totalHits.relation);
assertNonCompetitiveHitsAreSkipped(topDocs.totalHits.value, numDocs);
}
{ // test that if numeric field is a secondary sort, no optimization is run
CollectorManager<TopFieldCollector, TopFieldDocs> manager =
TopFieldCollector.createSharedManager(
new Sort(FIELD_SCORE, sortField), numHits, null, totalHitsThreshold);
TopDocs topDocs = searcher.search(new MatchAllDocsQuery(), manager);
assertEquals(topDocs.scoreDocs.length, numHits);
assertEquals(
topDocs.totalHits.value,
numDocs); // assert that all documents were collected => optimization was not run
}
reader.close();
dir.close();
}
/**
* test that even if a field is not indexed with points, optimized sort still works as expected,
* although no optimization will be run
*/
public void testLongSortOptimizationOnFieldNotIndexedWithPoints() throws IOException {
final Directory dir = newDirectory();
final IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig());
final int numDocs = atLeast(100);
// my_field is not indexed with points
for (int i = 0; i < numDocs; ++i) {
final Document doc = new Document();
doc.add(new NumericDocValuesField("my_field", i));
writer.addDocument(doc);
}
final IndexReader reader = DirectoryReader.open(writer);
writer.close();
// single threaded so totalhits is deterministic
IndexSearcher searcher =
newSearcher(reader, random().nextBoolean(), random().nextBoolean(), false);
final SortField sortField = new SortField("my_field", SortField.Type.LONG);
final Sort sort = new Sort(sortField);
final int numHits = 3;
final int totalHitsThreshold = 3;
CollectorManager<TopFieldCollector, TopFieldDocs> manager =
TopFieldCollector.createSharedManager(sort, numHits, null, totalHitsThreshold);
TopDocs topDocs = searcher.search(new MatchAllDocsQuery(), manager);
assertEquals(
topDocs.scoreDocs.length, numHits); // sort still works and returns expected number of docs
for (int i = 0; i < numHits; i++) {
FieldDoc fieldDoc = (FieldDoc) topDocs.scoreDocs[i];
assertEquals(i, ((Long) fieldDoc.fields[0]).intValue()); // returns expected values
}
assertEquals(
topDocs.totalHits.value,
numDocs); // assert that all documents were collected => optimization was not run
reader.close();
dir.close();
}
public void testSortOptimizationWithMissingValues() throws IOException {
final Directory dir = newDirectory();
IndexWriterConfig config =
new IndexWriterConfig()
// Make sure to use the default codec, otherwise some random points formats that have
// large values for maxPointsPerLeaf might not enable skipping with only 10k docs
.setCodec(TestUtil.getDefaultCodec());
final IndexWriter writer = new IndexWriter(dir, config);
final int numDocs = atLeast(10000);
for (int i = 0; i < numDocs; ++i) {
final Document doc = new Document();
if ((i % 500) != 0) { // miss values on every 500th document
doc.add(new NumericDocValuesField("my_field", i));
doc.add(new LongPoint("my_field", i));
}
writer.addDocument(doc);
if (i == 7000) writer.flush(); // two segments
}
final IndexReader reader = DirectoryReader.open(writer);
writer.close();
// single threaded so totalhits is deterministic
IndexSearcher searcher =
newSearcher(reader, random().nextBoolean(), random().nextBoolean(), false);
final int numHits = 3;
final int totalHitsThreshold = 3;
{ // test that optimization is not run when missing value setting of SortField is competitive
final SortField sortField = new SortField("my_field", SortField.Type.LONG);
sortField.setMissingValue(0L); // set a competitive missing value
final Sort sort = new Sort(sortField);
CollectorManager<TopFieldCollector, TopFieldDocs> manager =
TopFieldCollector.createSharedManager(sort, numHits, null, totalHitsThreshold);
TopDocs topDocs = searcher.search(new MatchAllDocsQuery(), manager);
assertEquals(topDocs.scoreDocs.length, numHits);
assertEquals(
topDocs.totalHits.value,
numDocs); // assert that all documents were collected => optimization was not run
}
{ // test that optimization is run when missing value setting of SortField is NOT competitive
final SortField sortField = new SortField("my_field", SortField.Type.LONG);
sortField.setMissingValue(100L); // set a NON competitive missing value
final Sort sort = new Sort(sortField);
CollectorManager<TopFieldCollector, TopFieldDocs> manager =
TopFieldCollector.createSharedManager(sort, numHits, null, totalHitsThreshold);
TopDocs topDocs = searcher.search(new MatchAllDocsQuery(), manager);
assertEquals(topDocs.scoreDocs.length, numHits);
assertNonCompetitiveHitsAreSkipped(topDocs.totalHits.value, numDocs);
}
reader.close();
dir.close();
}
public void testSortOptimizationEqualValues() throws IOException {
final Directory dir = newDirectory();
IndexWriterConfig config =
new IndexWriterConfig()
// Make sure to use the default codec, otherwise some random points formats that have
// large values for maxPointsPerLeaf might not enable skipping with only 10k docs
.setCodec(TestUtil.getDefaultCodec());
final IndexWriter writer = new IndexWriter(dir, config);
final int numDocs = atLeast(TEST_NIGHTLY ? 50_000 : 10_000);
for (int i = 1; i <= numDocs; ++i) {
final Document doc = new Document();
doc.add(
new NumericDocValuesField("my_field1", 100)); // all docs have the same value of my_field1
doc.add(new IntPoint("my_field1", 100));
doc.add(
new NumericDocValuesField(
"my_field2", numDocs - i)); // diff values for the field my_field2
writer.addDocument(doc);
if (i == 7000) writer.flush(); // two segments
}
final IndexReader reader = DirectoryReader.open(writer);
writer.close();
// single threaded so totalhits is deterministic
IndexSearcher searcher =
newSearcher(reader, random().nextBoolean(), random().nextBoolean(), false);
final int numHits = 3;
final int totalHitsThreshold = 3;
{ // test that sorting on a single field with equal values uses the optimization
final SortField sortField = new SortField("my_field1", SortField.Type.INT);
final Sort sort = new Sort(sortField);
CollectorManager<TopFieldCollector, TopFieldDocs> manager =
TopFieldCollector.createSharedManager(sort, numHits, null, totalHitsThreshold);
TopDocs topDocs = searcher.search(new MatchAllDocsQuery(), manager);
assertEquals(topDocs.scoreDocs.length, numHits);
for (int i = 0; i < numHits; i++) {
FieldDoc fieldDoc = (FieldDoc) topDocs.scoreDocs[i];
assertEquals(100, fieldDoc.fields[0]);
}
assertNonCompetitiveHitsAreSkipped(topDocs.totalHits.value, numDocs);
}
{ // test that sorting on a single field with equal values and after parameter
// doesn't use the optimization
final int afterValue = 100;
final int afterDocID = 10 + random().nextInt(1000);
final SortField sortField = new SortField("my_field1", SortField.Type.INT);
final Sort sort = new Sort(sortField);
FieldDoc after = new FieldDoc(afterDocID, Float.NaN, new Integer[] {afterValue});
CollectorManager<TopFieldCollector, TopFieldDocs> manager =
TopFieldCollector.createSharedManager(sort, numHits, after, totalHitsThreshold);
TopDocs topDocs = searcher.search(new MatchAllDocsQuery(), manager);
assertEquals(topDocs.scoreDocs.length, numHits);
for (int i = 0; i < numHits; i++) {
FieldDoc fieldDoc = (FieldDoc) topDocs.scoreDocs[i];
assertEquals(100, fieldDoc.fields[0]);
assertTrue(fieldDoc.doc > afterDocID);
}
assertEquals(topDocs.totalHits.value, numDocs);
}
{ // test that sorting on main field with equal values + another field for tie breaks doesn't
// use optimization
final SortField sortField1 = new SortField("my_field1", SortField.Type.INT);
final SortField sortField2 = new SortField("my_field2", SortField.Type.INT);
final Sort sort = new Sort(sortField1, sortField2);
CollectorManager<TopFieldCollector, TopFieldDocs> manager =
TopFieldCollector.createSharedManager(sort, numHits, null, totalHitsThreshold);
TopDocs topDocs = searcher.search(new MatchAllDocsQuery(), manager);
assertEquals(topDocs.scoreDocs.length, numHits);
for (int i = 0; i < numHits; i++) {
FieldDoc fieldDoc = (FieldDoc) topDocs.scoreDocs[i];
assertEquals(100, fieldDoc.fields[0]); // sort on 1st field as expected
assertEquals(i, fieldDoc.fields[1]); // sort on 2nd field as expected
}
assertEquals(topDocs.scoreDocs.length, numHits);
assertEquals(
topDocs.totalHits.value,
numDocs); // assert that all documents were collected => optimization was not run
}
reader.close();
dir.close();
}
public void testFloatSortOptimization() throws IOException {
final Directory dir = newDirectory();
final IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig());
final int numDocs = atLeast(10000);
for (int i = 0; i < numDocs; ++i) {
final Document doc = new Document();
float f = 1f * i;
doc.add(new FloatDocValuesField("my_field", f));
doc.add(new FloatPoint("my_field", i));
writer.addDocument(doc);
}
final IndexReader reader = DirectoryReader.open(writer);
writer.close();
// single threaded so totalhits is deterministic
IndexSearcher searcher =
newSearcher(reader, random().nextBoolean(), random().nextBoolean(), false);
final SortField sortField = new SortField("my_field", SortField.Type.FLOAT);
final Sort sort = new Sort(sortField);
final int numHits = 3;
final int totalHitsThreshold = 3;
{ // simple sort
CollectorManager<TopFieldCollector, TopFieldDocs> manager =
TopFieldCollector.createSharedManager(sort, numHits, null, totalHitsThreshold);
TopDocs topDocs = searcher.search(new MatchAllDocsQuery(), manager);
assertEquals(topDocs.scoreDocs.length, numHits);
for (int i = 0; i < numHits; i++) {
FieldDoc fieldDoc = (FieldDoc) topDocs.scoreDocs[i];
assertEquals(1f * i, fieldDoc.fields[0]);
}
assertEquals(TotalHits.Relation.GREATER_THAN_OR_EQUAL_TO, topDocs.totalHits.relation);
assertNonCompetitiveHitsAreSkipped(topDocs.totalHits.value, numDocs);
}
reader.close();
dir.close();
}
/**
* Test that a search with sort on [_doc, other fields] across multiple indices doesn't miss any
* documents.
*/
public void testDocSortOptimizationMultipleIndices() throws IOException {
final int numIndices = 3;
final int numDocsInIndex = atLeast(50);
Directory[] dirs = new Directory[numIndices];
IndexReader[] readers = new IndexReader[numIndices];
for (int i = 0; i < numIndices; i++) {
dirs[i] = newDirectory();
IndexWriterConfig config =
new IndexWriterConfig()
// Make sure to use the default codec, otherwise some random points formats that have
// large values for maxPointsPerLeaf might not enable skipping with only 10k docs
.setCodec(TestUtil.getDefaultCodec());
try (IndexWriter writer = new IndexWriter(dirs[i], config)) {
for (int docID = 0; docID < numDocsInIndex; docID++) {
final Document doc = new Document();
doc.add(new NumericDocValuesField("my_field", docID * numIndices + i));
writer.addDocument(doc);
}
writer.flush();
}
readers[i] = DirectoryReader.open(dirs[i]);
}
final int size = 7;
final int totalHitsThreshold = 7;
final Sort sort = new Sort(FIELD_DOC, new SortField("my_field", SortField.Type.LONG));
TopFieldDocs[] topDocs = new TopFieldDocs[numIndices];
int curNumHits;
FieldDoc after = null;
long collectedDocs = 0;
long totalDocs = 0;
int numHits = 0;
do {
for (int i = 0; i < numIndices; i++) {
// single threaded so totalhits is deterministic
IndexSearcher searcher =
newSearcher(readers[i], random().nextBoolean(), random().nextBoolean(), false);
CollectorManager<TopFieldCollector, TopFieldDocs> manager =
TopFieldCollector.createSharedManager(sort, size, after, totalHitsThreshold);
topDocs[i] = searcher.search(new MatchAllDocsQuery(), manager);
for (int docID = 0; docID < topDocs[i].scoreDocs.length; docID++) {
topDocs[i].scoreDocs[docID].shardIndex = i;
}
collectedDocs += topDocs[i].totalHits.value;
totalDocs += numDocsInIndex;
}
TopFieldDocs mergedTopDcs = TopDocs.merge(sort, size, topDocs);
curNumHits = mergedTopDcs.scoreDocs.length;
numHits += curNumHits;
if (curNumHits > 0) {
after = (FieldDoc) mergedTopDcs.scoreDocs[curNumHits - 1];
}
} while (curNumHits > 0);
for (int i = 0; i < numIndices; i++) {
readers[i].close();
dirs[i].close();
}
final int expectedNumHits = numDocsInIndex * numIndices;
assertEquals(expectedNumHits, numHits);
assertNonCompetitiveHitsAreSkipped(collectedDocs, totalDocs);
}
public void testDocSortOptimizationWithAfter() throws IOException {
final Directory dir = newDirectory();
final IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig());
final int numDocs = atLeast(150);
for (int i = 0; i < numDocs; ++i) {
final Document doc = new Document();
writer.addDocument(doc);
if ((i > 0) && (i % 50 == 0)) {
writer.flush();
}
}
final IndexReader reader = DirectoryReader.open(writer);
writer.close();
// single threaded so totalhits is deterministic
IndexSearcher searcher =
newSearcher(reader, random().nextBoolean(), random().nextBoolean(), false);
final int numHits = 10;
final int totalHitsThreshold = 10;
final int[] searchAfters = {3, 10, numDocs - 10};
for (int searchAfter : searchAfters) {
// sort by _doc with search after should trigger optimization
{
final Sort sort = new Sort(FIELD_DOC);
FieldDoc after = new FieldDoc(searchAfter, Float.NaN, new Integer[] {searchAfter});
CollectorManager<TopFieldCollector, TopFieldDocs> manager =
TopFieldCollector.createSharedManager(sort, numHits, after, totalHitsThreshold);
TopDocs topDocs = searcher.search(new MatchAllDocsQuery(), manager);
int expNumHits =
(searchAfter >= (numDocs - numHits)) ? (numDocs - searchAfter - 1) : numHits;
assertEquals(expNumHits, topDocs.scoreDocs.length);
for (int i = 0; i < topDocs.scoreDocs.length; i++) {
int expectedDocID = searchAfter + 1 + i;
assertEquals(expectedDocID, topDocs.scoreDocs[i].doc);
}
assertEquals(TotalHits.Relation.GREATER_THAN_OR_EQUAL_TO, topDocs.totalHits.relation);
assertNonCompetitiveHitsAreSkipped(topDocs.totalHits.value, numDocs);
}
// sort by _doc + _score with search after should trigger optimization
{
final Sort sort = new Sort(FIELD_DOC, FIELD_SCORE);
FieldDoc after = new FieldDoc(searchAfter, Float.NaN, new Object[] {searchAfter, 1.0f});
CollectorManager<TopFieldCollector, TopFieldDocs> manager =
TopFieldCollector.createSharedManager(sort, numHits, after, totalHitsThreshold);
TopDocs topDocs = searcher.search(new MatchAllDocsQuery(), manager);
int expNumHits =
(searchAfter >= (numDocs - numHits)) ? (numDocs - searchAfter - 1) : numHits;
assertEquals(expNumHits, topDocs.scoreDocs.length);
for (int i = 0; i < topDocs.scoreDocs.length; i++) {
int expectedDocID = searchAfter + 1 + i;
assertEquals(expectedDocID, topDocs.scoreDocs[i].doc);
}
assertEquals(TotalHits.Relation.GREATER_THAN_OR_EQUAL_TO, topDocs.totalHits.relation);
assertNonCompetitiveHitsAreSkipped(topDocs.totalHits.value, numDocs);
}
// sort by _doc desc should not trigger optimization
{
final Sort sort = new Sort(new SortField(null, SortField.Type.DOC, true));
FieldDoc after = new FieldDoc(searchAfter, Float.NaN, new Integer[] {searchAfter});
CollectorManager<TopFieldCollector, TopFieldDocs> manager =
TopFieldCollector.createSharedManager(sort, numHits, after, totalHitsThreshold);
TopDocs topDocs = searcher.search(new MatchAllDocsQuery(), manager);
int expNumHits = (searchAfter < numHits) ? searchAfter : numHits;
assertEquals(expNumHits, topDocs.scoreDocs.length);
for (int i = 0; i < topDocs.scoreDocs.length; i++) {
int expectedDocID = searchAfter - 1 - i;
assertEquals(expectedDocID, topDocs.scoreDocs[i].doc);
}
// assert that all documents were collected
assertEquals(numDocs, topDocs.totalHits.value);
}
}
reader.close();
dir.close();
}
public void testDocSortOptimizationWithAfterCollectsAllDocs() throws IOException {
final Directory dir = newDirectory();
final IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig());
final int numDocs = atLeast(TEST_NIGHTLY ? 50_000 : 5_000);
final boolean multipleSegments = random().nextBoolean();
final int numDocsInSegment = numDocs / 10 + random().nextInt(numDocs / 10);
for (int i = 1; i <= numDocs; ++i) {
final Document doc = new Document();
writer.addDocument(doc);
if (multipleSegments && (i % numDocsInSegment == 0)) {
writer.flush();
}
}
writer.flush();
IndexReader reader = DirectoryReader.open(writer);
IndexSearcher searcher = newSearcher(reader);
int visitedHits = 0;
ScoreDoc after = null;
while (visitedHits < numDocs) {
int batch = 1 + random().nextInt(500);
Query query = new MatchAllDocsQuery();
TopDocs topDocs = searcher.searchAfter(after, query, batch, new Sort(FIELD_DOC));
int expectedHits = Math.min(numDocs - visitedHits, batch);
assertEquals(expectedHits, topDocs.scoreDocs.length);
after = topDocs.scoreDocs[expectedHits - 1];
for (int i = 0; i < topDocs.scoreDocs.length; i++) {
assertEquals(visitedHits, topDocs.scoreDocs[i].doc);
visitedHits++;
}
}
assertEquals(visitedHits, numDocs);
IOUtils.close(writer, reader, dir);
}
public void testDocSortOptimization() throws IOException {
final Directory dir = newDirectory();
final IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig());
final int numDocs = atLeast(100);
int seg = 1;
for (int i = 0; i < numDocs; ++i) {
final Document doc = new Document();
doc.add(new LongPoint("lf", i));
doc.add(new StoredField("slf", i));
doc.add(new StringField("tf", "seg" + seg, Field.Store.YES));
writer.addDocument(doc);
if ((i > 0) && (i % 50 == 0)) {
writer.flush();
seg++;
}
}
final IndexReader reader = DirectoryReader.open(writer);
writer.close();
// single threaded so totalhits is deterministic
IndexSearcher searcher =
newSearcher(reader, random().nextBoolean(), random().nextBoolean(), false);
final int numHits = 3;
final int totalHitsThreshold = 3;
final Sort sort = new Sort(FIELD_DOC);
// sort by _doc should skip all non-competitive documents
{
CollectorManager<TopFieldCollector, TopFieldDocs> manager =
TopFieldCollector.createSharedManager(sort, numHits, null, totalHitsThreshold);
TopDocs topDocs = searcher.search(new MatchAllDocsQuery(), manager);
assertEquals(numHits, topDocs.scoreDocs.length);
for (int i = 0; i < numHits; i++) {
assertEquals(i, topDocs.scoreDocs[i].doc);
}
assertEquals(TotalHits.Relation.GREATER_THAN_OR_EQUAL_TO, topDocs.totalHits.relation);
assertNonCompetitiveHitsAreSkipped(topDocs.totalHits.value, 10);
}
// sort by _doc with a bool query should skip all non-competitive documents
{
CollectorManager<TopFieldCollector, TopFieldDocs> manager =
TopFieldCollector.createSharedManager(sort, numHits, null, totalHitsThreshold);
int lowerRange = 40;
BooleanQuery.Builder bq = new BooleanQuery.Builder();
bq.add(LongPoint.newRangeQuery("lf", lowerRange, Long.MAX_VALUE), BooleanClause.Occur.MUST);
bq.add(new TermQuery(new Term("tf", "seg1")), BooleanClause.Occur.MUST);
TopDocs topDocs = searcher.search(bq.build(), manager);
assertEquals(numHits, topDocs.scoreDocs.length);
for (int i = 0; i < numHits; i++) {
Document d = searcher.doc(topDocs.scoreDocs[i].doc);
assertEquals(Integer.toString(i + lowerRange), d.get("slf"));
assertEquals("seg1", d.get("tf"));
}
assertEquals(TotalHits.Relation.GREATER_THAN_OR_EQUAL_TO, topDocs.totalHits.relation);
assertNonCompetitiveHitsAreSkipped(topDocs.totalHits.value, 10);
}
reader.close();
dir.close();
}
/**
* Test that sorting on _doc works correctly. This test goes through
* DefaultBulkSorter::scoreRange, where scorerIterator is BitSetIterator. As a conjunction of this
* BitSetIterator with DocComparator's iterator, we get BitSetConjunctionDISI.
* BitSetConjuctionDISI advances based on the DocComparator's iterator, and doesn't consider that
* its BitSetIterator may have advanced passed a certain doc.
*/
public void testDocSort() throws IOException {
final Directory dir = newDirectory();
final IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig());
final int numDocs = 4;
for (int i = 0; i < numDocs; ++i) {
final Document doc = new Document();
doc.add(new StringField("id", "id" + i, Field.Store.NO));
if (i < 2) {
doc.add(new LongPoint("lf", 1));
}
writer.addDocument(doc);
}
final IndexReader reader = DirectoryReader.open(writer);
writer.close();
IndexSearcher searcher = newSearcher(reader, random().nextBoolean(), random().nextBoolean());
searcher.setQueryCache(null);
final int numHits = 10;
final int totalHitsThreshold = 10;
final Sort sort = new Sort(FIELD_DOC);
{
CollectorManager<TopFieldCollector, TopFieldDocs> manager =
TopFieldCollector.createSharedManager(sort, numHits, null, totalHitsThreshold);
BooleanQuery.Builder bq = new BooleanQuery.Builder();
bq.add(LongPoint.newExactQuery("lf", 1), BooleanClause.Occur.MUST);
bq.add(new TermQuery(new Term("id", "id3")), BooleanClause.Occur.MUST_NOT);
TopDocs topDocs = searcher.search(bq.build(), manager);
assertEquals(2, topDocs.scoreDocs.length);
}
reader.close();
dir.close();
}
public void testPointValidation() throws IOException {
final Directory dir = newDirectory();
final RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
Document doc = new Document();
doc.add(new IntPoint("intField", 4));
doc.add(new NumericDocValuesField("intField", 4));
doc.add(new LongPoint("longField", 42));
doc.add(new NumericDocValuesField("longField", 42));
doc.add(new IntRange("intRange", new int[] {1}, new int[] {10}));
doc.add(new NumericDocValuesField("intRange", 4));
writer.addDocument(doc);
IndexReader reader = writer.getReader();
writer.close();
IndexSearcher searcher = newSearcher(reader, random().nextBoolean(), random().nextBoolean());
SortField longSortOnIntField = new SortField("intField", SortField.Type.LONG);
assertThrows(
IllegalArgumentException.class,
() -> searcher.search(new MatchAllDocsQuery(), 1, new Sort(longSortOnIntField)));
// assert that when sort optimization is disabled we can use LONG sort on int field
longSortOnIntField.setOptimizeSortWithPoints(false);
searcher.search(new MatchAllDocsQuery(), 1, new Sort(longSortOnIntField));
SortField intSortOnLongField = new SortField("longField", SortField.Type.INT);
assertThrows(
IllegalArgumentException.class,
() -> searcher.search(new MatchAllDocsQuery(), 1, new Sort(intSortOnLongField)));
// assert that when sort optimization is disabled we can use INT sort on long field
intSortOnLongField.setOptimizeSortWithPoints(false);
searcher.search(new MatchAllDocsQuery(), 1, new Sort(intSortOnLongField));
SortField intSortOnIntRangeField = new SortField("intRange", SortField.Type.INT);
assertThrows(
IllegalArgumentException.class,
() -> searcher.search(new MatchAllDocsQuery(), 1, new Sort(intSortOnIntRangeField)));
// assert that when sort optimization is disabled we can use INT sort on intRange field
intSortOnIntRangeField.setOptimizeSortWithPoints(false);
searcher.search(new MatchAllDocsQuery(), 1, new Sort(intSortOnIntRangeField));
reader.close();
dir.close();
}
public void testMaxDocVisited() throws IOException {
Directory dir = newDirectory();
IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig());
int numDocs = atLeast(10000);
long offset = 100 + random().nextInt(100);
long smallestValue = 50 + random().nextInt(50);
boolean flushed = false;
for (int i = 0; i < numDocs; ++i) {
Document doc = new Document();
doc.add(new NumericDocValuesField("my_field", i + offset));
doc.add(new LongPoint("my_field", i + offset));
writer.addDocument(doc);
if (i >= 5000 && flushed == false) {
flushed = true;
writer.flush();
// Index the smallest value to the first slot of the second segment
doc = new Document();
doc.add(new NumericDocValuesField("my_field", smallestValue));
doc.add(new LongPoint("my_field", smallestValue));
writer.addDocument(doc);
}
}
IndexReader reader = DirectoryReader.open(writer);
writer.close();
IndexSearcher searcher = newSearcher(reader, random().nextBoolean(), random().nextBoolean());
SortField sortField = new SortField("my_field", SortField.Type.LONG);
TopFieldDocs topDocs =
searcher.search(new MatchAllDocsQuery(), 1 + random().nextInt(100), new Sort(sortField));
FieldDoc fieldDoc = (FieldDoc) topDocs.scoreDocs[0];
assertEquals(smallestValue, ((Long) fieldDoc.fields[0]).intValue());
reader.close();
dir.close();
}
public void testRandomLong() throws IOException {
Directory dir = newDirectory();
IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig());
List<Long> seqNos = new ArrayList<>();
int limit = TEST_NIGHTLY ? 10000 : 1000;
int iterations = limit + random().nextInt(limit);
long seqNoGenerator = random().nextInt(1000);
for (long i = 0; i < iterations; i++) {
int copies = random().nextInt(100) <= 5 ? 1 : 1 + random().nextInt(5);
for (int j = 0; j < copies; j++) {
seqNos.add(seqNoGenerator);
}
seqNos.add(seqNoGenerator);
seqNoGenerator++;
if (random().nextInt(100) <= 5) {
seqNoGenerator += random().nextInt(10);
}
}
Collections.shuffle(seqNos, random());
int pendingDocs = 0;
for (long seqNo : seqNos) {
Document doc = new Document();
doc.add(new NumericDocValuesField("seq_no", seqNo));
doc.add(new LongPoint("seq_no", seqNo));
writer.addDocument(doc);
pendingDocs++;
if (pendingDocs > 500 && random().nextInt(100) <= 5) {
pendingDocs = 0;
writer.flush();
}
}
writer.flush();
seqNos.sort(Long::compare);
IndexReader reader = DirectoryReader.open(writer);
writer.close();
IndexSearcher searcher = newSearcher(reader, random().nextBoolean(), random().nextBoolean());
SortField sortField = new SortField("seq_no", SortField.Type.LONG);
int visitedHits = 0;
ScoreDoc after = null;
while (visitedHits < seqNos.size()) {
int batch = 1 + random().nextInt(100);
Query query =
random().nextBoolean()
? new MatchAllDocsQuery()
: LongPoint.newRangeQuery("seq_no", 0, Long.MAX_VALUE);
TopDocs topDocs = searcher.searchAfter(after, query, batch, new Sort(sortField));
int expectedHits = Math.min(seqNos.size() - visitedHits, batch);
assertEquals(expectedHits, topDocs.scoreDocs.length);
after = topDocs.scoreDocs[expectedHits - 1];
for (int i = 0; i < topDocs.scoreDocs.length; i++) {
FieldDoc fieldDoc = (FieldDoc) topDocs.scoreDocs[i];
long expectedSeqNo = seqNos.get(visitedHits);
assertEquals(expectedSeqNo, ((Long) fieldDoc.fields[0]).intValue());
visitedHits++;
}
}
reader.close();
dir.close();
}
// Test that sort on sorted numeric field without sort optimization and
// with sort optimization produce the same results
public void testSortOptimizationOnSortedNumericField() throws IOException {
final Directory dir = newDirectory();
final IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig());
final int numDocs = atLeast(5000);
for (int i = 0; i < numDocs; ++i) {
int value = random().nextInt();
int value2 = random().nextInt();
final Document doc = new Document();
doc.add(new SortedNumericDocValuesField("my_field", value));
doc.add(new SortedNumericDocValuesField("my_field", value2));
doc.add(new LongPoint("my_field", value));
doc.add(new LongPoint("my_field", value2));
writer.addDocument(doc);
}
final IndexReader reader = DirectoryReader.open(writer);
writer.close();
// single threaded so totalhits is deterministic
IndexSearcher searcher = newSearcher(reader, true, true, false);
SortedNumericSelector.Type type =
RandomPicks.randomFrom(random(), SortedNumericSelector.Type.values());
boolean reverse = random().nextBoolean();
final SortField sortField =
new SortedNumericSortField("my_field", SortField.Type.LONG, reverse, type);
sortField.setOptimizeSortWithPoints(false);
final Sort sort = new Sort(sortField); // sort without sort optimization
final SortField sortField2 =
new SortedNumericSortField("my_field", SortField.Type.LONG, reverse, type);
final Sort sort2 = new Sort(sortField2); // sort with sort optimization
Query query = new MatchAllDocsQuery();
final int totalHitsThreshold = 3;
long expectedCollectedHits = 0;
long collectedHits = 0;
long collectedHits2 = 0;
int visitedHits = 0;
ScoreDoc after = null;
while (visitedHits < numDocs) {
int batch = 1 + random().nextInt(100);
int expectedHits = Math.min(numDocs - visitedHits, batch);
CollectorManager<TopFieldCollector, TopFieldDocs> manager =
TopFieldCollector.createSharedManager(sort, batch, (FieldDoc) after, totalHitsThreshold);
TopDocs topDocs = searcher.search(query, manager);
ScoreDoc[] scoreDocs = topDocs.scoreDocs;
CollectorManager<TopFieldCollector, TopFieldDocs> manager2 =
TopFieldCollector.createSharedManager(sort2, batch, (FieldDoc) after, totalHitsThreshold);
TopDocs topDocs2 = searcher.search(query, manager2);
ScoreDoc[] scoreDocs2 = topDocs2.scoreDocs;
// assert that the resulting hits are the same
assertEquals(expectedHits, topDocs.scoreDocs.length);
assertEquals(topDocs.scoreDocs.length, topDocs2.scoreDocs.length);
for (int i = 0; i < scoreDocs.length; i++) {
FieldDoc fieldDoc = (FieldDoc) scoreDocs[i];
FieldDoc fieldDoc2 = (FieldDoc) scoreDocs2[i];
assertEquals(fieldDoc.fields[0], fieldDoc2.fields[0]);
assertEquals(fieldDoc.doc, fieldDoc2.doc);
visitedHits++;
}
expectedCollectedHits += numDocs;
collectedHits += topDocs.totalHits.value;
collectedHits2 += topDocs2.totalHits.value;
after = scoreDocs[expectedHits - 1];
}
assertEquals(visitedHits, numDocs);
assertEquals(expectedCollectedHits, collectedHits);
// assert that the second sort with optimization collected less or equal hits
assertTrue(collectedHits >= collectedHits2);
// System.out.println(expectedCollectedHits + "\t" + collectedHits + "\t" + collectedHits2);
reader.close();
dir.close();
}
private void assertNonCompetitiveHitsAreSkipped(long collectedHits, long numDocs) {
if (collectedHits >= numDocs) {
fail(
"Expected some non-competitive hits are skipped; got collected_hits="
+ collectedHits
+ " num_docs="
+ numDocs);
}
}
public void testStringSortOptimization() throws IOException {
final Directory dir = newDirectory();
final IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig());
final int numDocs = atLeast(10000);
for (int i = 0; i < numDocs; ++i) {
final Document doc = new Document();
final BytesRef value = new BytesRef(Integer.toString(random().nextInt(1000)));
doc.add(new StringField("my_field", value, Store.NO));
doc.add(new SortedDocValuesField("my_field", value));
writer.addDocument(doc);
if (i % 2000 == 0) writer.flush(); // multiple segments
}
final DirectoryReader reader = DirectoryReader.open(writer);
writer.close();
doTestStringSortOptimization(reader);
reader.close();
dir.close();
}
public void testStringSortOptimizationWithMissingValues() throws IOException {
final Directory dir = newDirectory();
final IndexWriter writer =
new IndexWriter(dir, new IndexWriterConfig().setMergePolicy(newLogMergePolicy()));
final int numDocs = atLeast(10000);
// one segment with all values missing to start with
writer.addDocument(new Document());
for (int i = 0; i < numDocs - 2; ++i) {
if (i % 2000 == 0) writer.flush(); // multiple segments
final Document doc = new Document();
if (random().nextInt(2) == 0) {
final BytesRef value = new BytesRef(Integer.toString(random().nextInt(1000)));
doc.add(new StringField("my_field", value, Store.NO));
doc.add(new SortedDocValuesField("my_field", value));
}
writer.addDocument(doc);
}
writer.flush();
// And one empty segment with all values missing to finish with
writer.addDocument(new Document());
final DirectoryReader reader = DirectoryReader.open(writer);
writer.close();
doTestStringSortOptimization(reader);
reader.close();
dir.close();
}
private void doTestStringSortOptimization(DirectoryReader reader) throws IOException {
final int numDocs = reader.numDocs();
final int numHits = 5;
{ // simple ascending sort
SortField sortField = new SortField("my_field", SortField.Type.STRING);
sortField.setMissingValue(SortField.STRING_LAST);
Sort sort = new Sort(sortField);
TopDocs topDocs = assertSort(reader, sort, numHits, null);
assertNonCompetitiveHitsAreSkipped(topDocs.totalHits.value, numDocs);
}
{ // simple descending sort
SortField sortField = new SortField("my_field", SortField.Type.STRING, true);
sortField.setMissingValue(SortField.STRING_FIRST);
Sort sort = new Sort(sortField);
TopDocs topDocs = assertSort(reader, sort, numHits, null);
assertNonCompetitiveHitsAreSkipped(topDocs.totalHits.value, numDocs);
}
{ // ascending sort that returns missing values first
SortField sortField = new SortField("my_field", SortField.Type.STRING);
sortField.setMissingValue(SortField.STRING_FIRST);
Sort sort = new Sort(sortField);
assertSort(reader, sort, numHits, null);
}
{ // descending sort that returns missing values last
SortField sortField = new SortField("my_field", SortField.Type.STRING, true);
sortField.setMissingValue(SortField.STRING_LAST);
Sort sort = new Sort(sortField);
assertSort(reader, sort, numHits, null);
}
{ // paging ascending sort with after
SortField sortField = new SortField("my_field", SortField.Type.STRING);
sortField.setMissingValue(SortField.STRING_LAST);
Sort sort = new Sort(sortField);
BytesRef afterValue = new BytesRef(random().nextBoolean() ? "23" : "230000000");
FieldDoc after = new FieldDoc(2, Float.NaN, new Object[] {afterValue});
TopDocs topDocs = assertSort(reader, sort, numHits, after);
assertNonCompetitiveHitsAreSkipped(topDocs.totalHits.value, numDocs);
}
{ // paging descending sort with after
SortField sortField = new SortField("my_field", SortField.Type.STRING, true);
sortField.setMissingValue(SortField.STRING_FIRST);
Sort sort = new Sort(sortField);
BytesRef afterValue = new BytesRef(random().nextBoolean() ? "17" : "170000000");
FieldDoc after = new FieldDoc(2, Float.NaN, new Object[] {afterValue});
TopDocs topDocs = assertSort(reader, sort, numHits, after);
assertNonCompetitiveHitsAreSkipped(topDocs.totalHits.value, numDocs);
}
{ // paging ascending sort with after that returns missing values first
SortField sortField = new SortField("my_field", SortField.Type.STRING);
sortField.setMissingValue(SortField.STRING_FIRST);
Sort sort = new Sort(sortField);
BytesRef afterValue = new BytesRef(random().nextBoolean() ? "23" : "230000000");
FieldDoc after = new FieldDoc(2, Float.NaN, new Object[] {afterValue});
TopDocs topDocs = assertSort(reader, sort, numHits, after);
assertNonCompetitiveHitsAreSkipped(topDocs.totalHits.value, numDocs);
}
{ // paging descending sort with after that returns missing values first
SortField sortField = new SortField("my_field", SortField.Type.STRING, true);
sortField.setMissingValue(SortField.STRING_LAST);
Sort sort = new Sort(sortField);
BytesRef afterValue = new BytesRef(random().nextBoolean() ? "17" : "170000000");
FieldDoc after = new FieldDoc(2, Float.NaN, new Object[] {afterValue});
TopDocs topDocs = assertSort(reader, sort, numHits, after);
assertNonCompetitiveHitsAreSkipped(topDocs.totalHits.value, numDocs);
}
{ // test that if there is the secondary sort on _score, hits are still skipped
SortField sortField = new SortField("my_field", SortField.Type.STRING);
sortField.setMissingValue(SortField.STRING_LAST);
Sort sort = new Sort(sortField, FIELD_SCORE);
TopDocs topDocs = assertSort(reader, sort, numHits, null);
assertNonCompetitiveHitsAreSkipped(topDocs.totalHits.value, numDocs);
}
{ // test that if string field is a secondary sort, no optimization is run
SortField sortField = new SortField("my_field", SortField.Type.STRING);
sortField.setMissingValue(SortField.STRING_LAST);
Sort sort = new Sort(FIELD_SCORE, sortField);
TopDocs topDocs = assertSort(reader, sort, numHits, null);
assertEquals(
topDocs.totalHits.value,
numDocs); // assert that all documents were collected => optimization was not run
}
}
private TopDocs assertSort(DirectoryReader reader, Sort sort, int n, FieldDoc after)
throws IOException {
TopDocs topDocs = assertSearchHits(reader, sort, n, after);
SortField[] sortField2 = ArrayUtil.growExact(sort.getSort(), sort.getSort().length + 1);
// A secondary sort on reverse doc ID is the best way to catch bugs if the comparator filters
// too aggressively
sortField2[sortField2.length - 1] = new SortField(null, Type.DOC, true);
FieldDoc after2 = null;
if (after != null) {
Object[] afterFields2 = ArrayUtil.growExact(after.fields, after.fields.length + 1);
afterFields2[afterFields2.length - 1] = Integer.MAX_VALUE;
after2 = new FieldDoc(after.doc, after.score, afterFields2);
}
assertSearchHits(reader, new Sort(sortField2), n, after2);
return topDocs;
}
private TopDocs assertSearchHits(DirectoryReader reader, Sort sort, int n, FieldDoc after)
throws IOException {
// single threaded so totalhits is deterministic
IndexSearcher searcher = newSearcher(reader, true, true, false);
Query query = new MatchAllDocsQuery();
CollectorManager<TopFieldCollector, TopFieldDocs> manager =
TopFieldCollector.createSharedManager(sort, n, after, n);
TopDocs topDocs = searcher.search(query, manager);
IndexSearcher unoptimizedSearcher =
newSearcher(new NoIndexDirectoryReader(reader), true, true, false);
TopDocs unoptimizedTopDocs = unoptimizedSearcher.search(query, manager);
CheckHits.checkEqual(query, unoptimizedTopDocs.scoreDocs, topDocs.scoreDocs);
return topDocs;
}
private static final class NoIndexDirectoryReader extends FilterDirectoryReader {
public NoIndexDirectoryReader(DirectoryReader in) throws IOException {
super(
in,
new SubReaderWrapper() {
@Override
public LeafReader wrap(LeafReader reader) {
return new NoIndexLeafReader(reader);
}
});
}
@Override
protected DirectoryReader doWrapDirectoryReader(DirectoryReader in) throws IOException {
throw new UnsupportedOperationException();
}
@Override
public CacheHelper getReaderCacheHelper() {
return null;
}
}
private static final class NoIndexLeafReader extends FilterLeafReader {
NoIndexLeafReader(LeafReader in) {
super(in);
}
@Override
public CacheHelper getCoreCacheHelper() {
return null;
}
@Override
public CacheHelper getReaderCacheHelper() {
return null;
}
@Override
public Terms terms(String field) throws IOException {
return null;
}
@Override
public PointValues getPointValues(String field) throws IOException {
return null;
}
@Override
public FieldInfos getFieldInfos() {
FieldInfo[] newInfos = new FieldInfo[super.getFieldInfos().size()];
int i = 0;
for (FieldInfo fi : super.getFieldInfos()) {
FieldInfo noIndexFI =
new FieldInfo(
fi.name,
fi.number,
false,
false,
false,
IndexOptions.NONE,
fi.getDocValuesType(),
fi.getDocValuesGen(),
fi.attributes(),
0,
0,
0,
0,
VectorEncoding.FLOAT32,
VectorSimilarityFunction.DOT_PRODUCT,
fi.isSoftDeletesField());
newInfos[i] = noIndexFI;
i++;
}
return new FieldInfos(newInfos);
}
}
}