blob: 96e6ad941ebc7cd4df4bb1f401f23468d9b161ef [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.search;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FloatDocValuesField;
import org.apache.lucene.document.LongPoint;
import org.apache.lucene.document.IntPoint;
import org.apache.lucene.document.FloatPoint;
import org.apache.lucene.document.NumericDocValuesField;
import org.apache.lucene.document.StoredField;
import org.apache.lucene.document.StringField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.Term;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.LuceneTestCase;
import java.io.IOException;
import static org.apache.lucene.search.SortField.FIELD_DOC;
import static org.apache.lucene.search.SortField.FIELD_SCORE;
public class TestFieldSortOptimizationSkipping extends LuceneTestCase {
public void testLongSortOptimization() throws IOException {
final Directory dir = newDirectory();
final IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig());
final int numDocs = atLeast(10000);
for (int i = 0; i < numDocs; ++i) {
final Document doc = new Document();
doc.add(new NumericDocValuesField("my_field", i));
doc.add(new LongPoint("my_field", i));
writer.addDocument(doc);
if (i == 7000) writer.flush(); // two segments
}
final IndexReader reader = DirectoryReader.open(writer);
writer.close();
IndexSearcher searcher = new IndexSearcher(reader);
final SortField sortField = new SortField("my_field", SortField.Type.LONG);
sortField.setCanUsePoints();
final Sort sort = new Sort(sortField);
final int numHits = 3;
final int totalHitsThreshold = 3;
{ // simple sort
final TopFieldCollector collector = TopFieldCollector.create(sort, numHits, null, totalHitsThreshold);
searcher.search(new MatchAllDocsQuery(), collector);
TopDocs topDocs = collector.topDocs();
assertEquals(topDocs.scoreDocs.length, numHits);
for (int i = 0; i < numHits; i++) {
FieldDoc fieldDoc = (FieldDoc) topDocs.scoreDocs[i];
assertEquals(i, ((Long) fieldDoc.fields[0]).intValue());
}
assertTrue(collector.isEarlyTerminated());
assertTrue(topDocs.totalHits.value < numDocs);
}
{ // paging sort with after
long afterValue = 2;
FieldDoc after = new FieldDoc(2, Float.NaN, new Long[] {afterValue});
final TopFieldCollector collector = TopFieldCollector.create(sort, numHits, after, totalHitsThreshold);
searcher.search(new MatchAllDocsQuery(), collector);
TopDocs topDocs = collector.topDocs();
assertEquals(topDocs.scoreDocs.length, numHits);
for (int i = 0; i < numHits; i++) {
FieldDoc fieldDoc = (FieldDoc) topDocs.scoreDocs[i];
assertEquals(afterValue + 1 + i, fieldDoc.fields[0]);
}
assertTrue(collector.isEarlyTerminated());
assertTrue(topDocs.totalHits.value < numDocs);
}
{ // test that if there is the secondary sort on _score, scores are filled correctly
final TopFieldCollector collector = TopFieldCollector.create(new Sort(sortField, FIELD_SCORE), numHits, null, totalHitsThreshold);
searcher.search(new MatchAllDocsQuery(), collector);
TopDocs topDocs = collector.topDocs();
assertEquals(topDocs.scoreDocs.length, numHits);
for (int i = 0; i < numHits; i++) {
FieldDoc fieldDoc = (FieldDoc) topDocs.scoreDocs[i];
assertEquals(i, ((Long) fieldDoc.fields[0]).intValue());
float score = (float) fieldDoc.fields[1];
assertEquals(1.0, score, 0.001);
}
assertTrue(collector.isEarlyTerminated());
assertTrue(topDocs.totalHits.value < numDocs);
}
{ // test that if numeric field is a secondary sort, no optimization is run
final TopFieldCollector collector = TopFieldCollector.create(new Sort(FIELD_SCORE, sortField), numHits, null, totalHitsThreshold);
searcher.search(new MatchAllDocsQuery(), collector);
TopDocs topDocs = collector.topDocs();
assertEquals(topDocs.scoreDocs.length, numHits);
assertEquals(topDocs.totalHits.value, numDocs); // assert that all documents were collected => optimization was not run
}
{ // test that by default (if we don't set sortField.setCanUsePoints()), the optimization is not run
final SortField sortField2 = new SortField("my_field", SortField.Type.LONG);
final Sort sort2 = new Sort(sortField2);
final TopFieldCollector collector = TopFieldCollector.create(sort2, numHits, null, totalHitsThreshold);
searcher.search(new MatchAllDocsQuery(), collector);
TopDocs topDocs = collector.topDocs();
assertEquals(topDocs.scoreDocs.length, numHits);
assertEquals(topDocs.totalHits.value, numDocs); // assert that all documents were collected => optimization was not run
}
reader.close();
dir.close();
}
/**
* test that even if a field is not indexed with points, optimized sort still works as expected,
* although no optimization will be run
*/
public void testLongSortOptimizationOnFieldNotIndexedWithPoints() throws IOException {
final Directory dir = newDirectory();
final IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig());
final int numDocs = atLeast(100);
// my_field is not indexed with points
for (int i = 0; i < numDocs; ++i) {
final Document doc = new Document();
doc.add(new NumericDocValuesField("my_field", i));
writer.addDocument(doc);
}
final IndexReader reader = DirectoryReader.open(writer);
writer.close();
IndexSearcher searcher = new IndexSearcher(reader);
final SortField sortField = new SortField("my_field", SortField.Type.LONG);
sortField.setCanUsePoints();
final Sort sort = new Sort(sortField);
final int numHits = 3;
final int totalHitsThreshold = 3;
final TopFieldCollector collector = TopFieldCollector.create(sort, numHits, null, totalHitsThreshold);
searcher.search(new MatchAllDocsQuery(), collector);
TopDocs topDocs = collector.topDocs();
assertEquals(topDocs.scoreDocs.length, numHits); // sort still works and returns expected number of docs
for (int i = 0; i < numHits; i++) {
FieldDoc fieldDoc = (FieldDoc) topDocs.scoreDocs[i];
assertEquals(i, ((Long) fieldDoc.fields[0]).intValue()); // returns expected values
}
assertEquals(topDocs.totalHits.value, numDocs); // assert that all documents were collected => optimization was not run
reader.close();
dir.close();
}
public void testSortOptimizationWithMissingValues() throws IOException {
final Directory dir = newDirectory();
final IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig());
final int numDocs = atLeast(10000);
for (int i = 0; i < numDocs; ++i) {
final Document doc = new Document();
if ((i % 500) != 0) { // miss values on every 500th document
doc.add(new NumericDocValuesField("my_field", i));
doc.add(new LongPoint("my_field", i));
}
writer.addDocument(doc);
if (i == 7000) writer.flush(); // two segments
}
final IndexReader reader = DirectoryReader.open(writer);
writer.close();
IndexSearcher searcher = new IndexSearcher(reader);
final int numHits = 3;
final int totalHitsThreshold = 3;
{ // test that optimization is not run when missing value setting of SortField is competitive
final SortField sortField = new SortField("my_field", SortField.Type.LONG);
sortField.setMissingValue(0L); // set a competitive missing value
sortField.setCanUsePoints();
final Sort sort = new Sort(sortField);
final TopFieldCollector collector = TopFieldCollector.create(sort, numHits, null, totalHitsThreshold);
searcher.search(new MatchAllDocsQuery(), collector);
TopDocs topDocs = collector.topDocs();
assertEquals(topDocs.scoreDocs.length, numHits);
assertEquals(topDocs.totalHits.value, numDocs); // assert that all documents were collected => optimization was not run
}
{ // test that optimization is run when missing value setting of SortField is NOT competitive
final SortField sortField = new SortField("my_field", SortField.Type.LONG);
sortField.setMissingValue(100L); // set a NON competitive missing value
sortField.setCanUsePoints();
final Sort sort = new Sort(sortField);
final TopFieldCollector collector = TopFieldCollector.create(sort, numHits, null, totalHitsThreshold);
searcher.search(new MatchAllDocsQuery(), collector);
TopDocs topDocs = collector.topDocs();
assertEquals(topDocs.scoreDocs.length, numHits);
assertTrue(topDocs.totalHits.value < numDocs); // assert that some docs were skipped => optimization was run
}
reader.close();
dir.close();
}
public void testSortOptimizationEqualValues() throws IOException {
final Directory dir = newDirectory();
final IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig());
final int numDocs = atLeast(10000);
for (int i = 1; i <= numDocs; ++i) {
final Document doc = new Document();
doc.add(new NumericDocValuesField("my_field1", 100)); // all docs have the same value of my_field1
doc.add(new IntPoint("my_field1", 100));
doc.add(new NumericDocValuesField("my_field2", numDocs - i)); // diff values for the field my_field2
writer.addDocument(doc);
if (i == 7000) writer.flush(); // two segments
}
final IndexReader reader = DirectoryReader.open(writer);
writer.close();
IndexSearcher searcher = new IndexSearcher(reader);
final int numHits = 3;
final int totalHitsThreshold = 3;
{ // test that sorting on a single field with equal values uses the optimization
final SortField sortField = new SortField("my_field1", SortField.Type.INT);
sortField.setCanUsePoints();
final Sort sort = new Sort(sortField);
final TopFieldCollector collector = TopFieldCollector.create(sort, numHits, null, totalHitsThreshold);
searcher.search(new MatchAllDocsQuery(), collector);
TopDocs topDocs = collector.topDocs();
assertEquals(topDocs.scoreDocs.length, numHits);
for (int i = 0; i < numHits; i++) {
FieldDoc fieldDoc = (FieldDoc) topDocs.scoreDocs[i];
assertEquals(100, fieldDoc.fields[0]);
}
assertTrue(topDocs.totalHits.value < numDocs); // assert that some docs were skipped => optimization was run
}
{ // test that sorting on a single field with equal values and after parameter uses the optimization
final int afterValue = 100;
final SortField sortField = new SortField("my_field1", SortField.Type.INT);
sortField.setCanUsePoints();
final Sort sort = new Sort(sortField);
FieldDoc after = new FieldDoc(10, Float.NaN, new Integer[] {afterValue});
final TopFieldCollector collector = TopFieldCollector.create(sort, numHits, after, totalHitsThreshold);
searcher.search(new MatchAllDocsQuery(), collector);
TopDocs topDocs = collector.topDocs();
assertEquals(topDocs.scoreDocs.length, numHits);
for (int i = 0; i < numHits; i++) {
FieldDoc fieldDoc = (FieldDoc) topDocs.scoreDocs[i];
assertEquals(100, fieldDoc.fields[0]);
}
assertTrue(topDocs.totalHits.value < numDocs); // assert that some docs were skipped => optimization was run
}
{ // test that sorting on main field with equal values + another field for tie breaks doesn't use optimization
final SortField sortField1 = new SortField("my_field1", SortField.Type.INT);
sortField1.setCanUsePoints();
final SortField sortField2 = new SortField("my_field2", SortField.Type.INT);
final Sort sort = new Sort(sortField1, sortField2);
final TopFieldCollector collector = TopFieldCollector.create(sort, numHits, null, totalHitsThreshold);
searcher.search(new MatchAllDocsQuery(), collector);
TopDocs topDocs = collector.topDocs();
assertEquals(topDocs.scoreDocs.length, numHits);
for (int i = 0; i < numHits; i++) {
FieldDoc fieldDoc = (FieldDoc) topDocs.scoreDocs[i];
assertEquals(100, fieldDoc.fields[0]); // sort on 1st field as expected
assertEquals(i, fieldDoc.fields[1]); // sort on 2nd field as expected
}
assertEquals(topDocs.scoreDocs.length, numHits);
assertEquals(topDocs.totalHits.value, numDocs); // assert that all documents were collected => optimization was not run
}
reader.close();
dir.close();
}
public void testFloatSortOptimization() throws IOException {
final Directory dir = newDirectory();
final IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig());
final int numDocs = atLeast(10000);
for (int i = 0; i < numDocs; ++i) {
final Document doc = new Document();
float f = 1f * i;
doc.add(new FloatDocValuesField("my_field", f));
doc.add(new FloatPoint("my_field", i));
writer.addDocument(doc);
}
final IndexReader reader = DirectoryReader.open(writer);
writer.close();
IndexSearcher searcher = new IndexSearcher(reader);
final SortField sortField = new SortField("my_field", SortField.Type.FLOAT);
sortField.setCanUsePoints();
final Sort sort = new Sort(sortField);
final int numHits = 3;
final int totalHitsThreshold = 3;
{ // simple sort
final TopFieldCollector collector = TopFieldCollector.create(sort, numHits, null, totalHitsThreshold);
searcher.search(new MatchAllDocsQuery(), collector);
TopDocs topDocs = collector.topDocs();
assertEquals(topDocs.scoreDocs.length, numHits);
for (int i = 0; i < numHits; i++) {
FieldDoc fieldDoc = (FieldDoc) topDocs.scoreDocs[i];
assertEquals(1f * i, fieldDoc.fields[0]);
}
assertTrue(collector.isEarlyTerminated());
assertTrue(topDocs.totalHits.value < numDocs);
}
reader.close();
dir.close();
}
public void testDocSortOptimizationWithAfter() throws IOException {
final Directory dir = newDirectory();
final IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig());
final int numDocs = atLeast(150);
for (int i = 0; i < numDocs; ++i) {
final Document doc = new Document();
writer.addDocument(doc);
if ((i > 0) && (i % 50 == 0)) {
writer.flush();
}
}
final IndexReader reader = DirectoryReader.open(writer);
writer.close();
IndexSearcher searcher = newSearcher(reader);
final int numHits = 10;
final int totalHitsThreshold = 10;
final int[] searchAfters = {3, 10, numDocs - 10};
for (int searchAfter : searchAfters) {
// sort by _doc with search after should trigger optimization
{
final Sort sort = new Sort(FIELD_DOC);
FieldDoc after = new FieldDoc(searchAfter, Float.NaN, new Integer[]{searchAfter});
final TopFieldCollector collector = TopFieldCollector.create(sort, numHits, after, totalHitsThreshold);
searcher.search(new MatchAllDocsQuery(), collector);
TopDocs topDocs = collector.topDocs();
int expNumHits = (searchAfter >= (numDocs - numHits)) ? (numDocs - searchAfter - 1) : numHits;
assertEquals(expNumHits, topDocs.scoreDocs.length);
for (int i = 0; i < topDocs.scoreDocs.length; i++) {
int expectedDocID = searchAfter + 1 + i;
assertEquals(expectedDocID, topDocs.scoreDocs[i].doc);
}
assertTrue(collector.isEarlyTerminated());
// check that very few docs were collected
assertTrue(topDocs.totalHits.value < numDocs);
}
// sort by _doc + _score with search after should trigger optimization
{
final Sort sort = new Sort(FIELD_DOC, FIELD_SCORE);
FieldDoc after = new FieldDoc(searchAfter, Float.NaN, new Object[]{searchAfter, 1.0f});
final TopFieldCollector collector = TopFieldCollector.create(sort, numHits, after, totalHitsThreshold);
searcher.search(new MatchAllDocsQuery(), collector);
TopDocs topDocs = collector.topDocs();
int expNumHits = (searchAfter >= (numDocs - numHits)) ? (numDocs - searchAfter - 1) : numHits;
assertEquals(expNumHits, topDocs.scoreDocs.length);
for (int i = 0; i < topDocs.scoreDocs.length; i++) {
int expectedDocID = searchAfter + 1 + i;
assertEquals(expectedDocID, topDocs.scoreDocs[i].doc);
}
assertTrue(collector.isEarlyTerminated());
// assert that very few docs were collected
assertTrue(topDocs.totalHits.value < numDocs);
}
// sort by _doc desc should not trigger optimization
{
final Sort sort = new Sort(new SortField(null, SortField.Type.DOC, true));
FieldDoc after = new FieldDoc(searchAfter, Float.NaN, new Integer[]{searchAfter});
final TopFieldCollector collector = TopFieldCollector.create(sort, numHits, after, totalHitsThreshold);
searcher.search(new MatchAllDocsQuery(), collector);
TopDocs topDocs = collector.topDocs();
int expNumHits = (searchAfter < numHits) ? searchAfter : numHits;
assertEquals(expNumHits, topDocs.scoreDocs.length);
for (int i = 0; i < topDocs.scoreDocs.length; i++) {
int expectedDocID = searchAfter - 1 - i;
assertEquals(expectedDocID, topDocs.scoreDocs[i].doc);
}
// assert that all documents were collected
assertEquals(numDocs, topDocs.totalHits.value);
}
}
reader.close();
dir.close();
}
public void testDocSortOptimization() throws IOException {
final Directory dir = newDirectory();
final IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig());
final int numDocs = atLeast(100);
int seg = 1;
for (int i = 0; i < numDocs; ++i) {
final Document doc = new Document();
doc.add(new LongPoint("lf", i));
doc.add(new StoredField("slf", i));
doc.add(new StringField("tf", "seg" + seg, Field.Store.YES));
writer.addDocument(doc);
if ((i > 0) && (i % 50 == 0)) {
writer.flush();
seg++;
}
}
final IndexReader reader = DirectoryReader.open(writer);
writer.close();
IndexSearcher searcher = newSearcher(reader);;
final int numHits = 3;
final int totalHitsThreshold = 3;
final Sort sort = new Sort(FIELD_DOC);
// sort by _doc should skip all non-competitive documents
{
final TopFieldCollector collector = TopFieldCollector.create(sort, numHits, null, totalHitsThreshold);
searcher.search(new MatchAllDocsQuery(), collector);
TopDocs topDocs = collector.topDocs();
assertEquals(numHits, topDocs.scoreDocs.length);
for (int i = 0; i < numHits; i++) {
assertEquals(i, topDocs.scoreDocs[i].doc);
}
assertTrue(collector.isEarlyTerminated());
assertTrue(topDocs.totalHits.value < 10); // assert that very few docs were collected
}
// sort by _doc with a bool query should skip all non-competitive documents
{
final TopFieldCollector collector = TopFieldCollector.create(sort, numHits, null, totalHitsThreshold);
int lowerRange = 40;
BooleanQuery.Builder bq = new BooleanQuery.Builder();
bq.add(LongPoint.newRangeQuery("lf", lowerRange, Long.MAX_VALUE), BooleanClause.Occur.MUST);
bq.add(new TermQuery(new Term("tf", "seg1")), BooleanClause.Occur.MUST);
searcher.search(bq.build(), collector);
TopDocs topDocs = collector.topDocs();
assertEquals(numHits, topDocs.scoreDocs.length);
for (int i = 0; i < numHits; i++) {
Document d = searcher.doc(topDocs.scoreDocs[i].doc);
assertEquals(Integer.toString(i + lowerRange), d.get("slf"));
assertEquals("seg1", d.get("tf"));
}
assertTrue(collector.isEarlyTerminated());
assertTrue(topDocs.totalHits.value < 10); // assert that very few docs were collected
}
reader.close();
dir.close();
}
/**
* Test that sorting on _doc works correctly.
* This test goes through DefaultBulkSorter::scoreRange, where scorerIterator is BitSetIterator.
* As a conjunction of this BitSetIterator with DocComparator's iterator, we get BitSetConjunctionDISI.
* BitSetConjunctionDISI advances based on the DocComparator's iterator, and doesn't consider
* that its BitSetIterator may have advanced passed a certain doc.
*/
public void testDocSort() throws IOException {
final Directory dir = newDirectory();
final IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig());
final int numDocs = 4;
for (int i = 0; i < numDocs; ++i) {
final Document doc = new Document();
doc.add(new StringField("id", "id" + i, Field.Store.NO));
if (i < 2) {
doc.add(new LongPoint("lf", 1));
}
writer.addDocument(doc);
}
final IndexReader reader = DirectoryReader.open(writer);
writer.close();
IndexSearcher searcher = newSearcher(reader);
searcher.setQueryCache(null);
final int numHits = 10;
final int totalHitsThreshold = 10;
final Sort sort = new Sort(FIELD_DOC);
{
final TopFieldCollector collector = TopFieldCollector.create(sort, numHits, null, totalHitsThreshold);
BooleanQuery.Builder bq = new BooleanQuery.Builder();
bq.add(LongPoint.newExactQuery("lf", 1), BooleanClause.Occur.MUST);
bq.add(new TermQuery(new Term("id", "id3")), BooleanClause.Occur.MUST_NOT);
searcher.search(bq.build(), collector);
TopDocs topDocs = collector.topDocs();
assertEquals(2, topDocs.scoreDocs.length);
}
reader.close();
dir.close();
}
}