blob: fec408559fb4185340644f61dad63cb5d07af20b [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.search.comparators;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.FieldComparator;
import org.apache.lucene.search.LeafFieldComparator;
import org.apache.lucene.search.Scorable;
import java.io.IOException;
/**
* Comparator that sorts by asc _doc
*/
public class DocComparator extends FieldComparator<Integer> {
private final int[] docIDs;
private final boolean enableSkipping; // if skipping functionality should be enabled
private int bottom;
private int topValue;
private boolean topValueSet;
private boolean bottomValueSet;
private boolean hitsThresholdReached;
/**
* Creates a new comparator based on document ids for {@code numHits}
*/
public DocComparator(int numHits, boolean reverse, int sortPost) {
this.docIDs = new int[numHits];
// skipping functionality is enabled if we are sorting by _doc in asc order as a primary sort
this.enableSkipping = (reverse == false && sortPost == 0);
}
@Override
public int compare(int slot1, int slot2) {
// No overflow risk because docIDs are non-negative
return docIDs[slot1] - docIDs[slot2];
}
@Override
public LeafFieldComparator getLeafComparator(LeafReaderContext context) {
// TODO: can we "map" our docIDs to the current
// reader? saves having to then subtract on every
// compare call
return new DocLeafComparator(context);
}
@Override
public void setTopValue(Integer value) {
topValue = value;
topValueSet = true;
}
@Override
public Integer value(int slot) {
return Integer.valueOf(docIDs[slot]);
}
/**
* DocLeafComparator with skipping functionality.
* When sort by _doc asc, after collecting top N matches and enough hits, the comparator
* can skip all the following documents.
* When sort by _doc asc and "top" document is set after which search should start,
* the comparator provides an iterator that can quickly skip to the desired "top" document.
*/
private class DocLeafComparator implements LeafFieldComparator {
private final int docBase;
private final int minDoc;
private final int maxDoc;
private DocIdSetIterator competitiveIterator; // iterator that starts from topValue
public DocLeafComparator(LeafReaderContext context) {
this.docBase = context.docBase;
if (enableSkipping) {
// Skip docs before topValue, but include docs starting with topValue.
// Including topValue is necessary when doing sort on [_doc, other fields]
// in a distributed search where there are docs from different indices
// with the same docID.
this.minDoc = topValue;
this.maxDoc = context.reader().maxDoc();
this.competitiveIterator = DocIdSetIterator.all(maxDoc);
} else {
this.minDoc = -1;
this.maxDoc = -1;
this.competitiveIterator = null;
}
}
@Override
public void setBottom(int slot) {
bottom = docIDs[slot];
bottomValueSet = true;
updateIterator();
}
@Override
public int compareBottom(int doc) {
// No overflow risk because docIDs are non-negative
return bottom - (docBase + doc);
}
@Override
public int compareTop(int doc) {
int docValue = docBase + doc;
return Integer.compare(topValue, docValue);
}
@Override
public void copy(int slot, int doc) throws IOException {
docIDs[slot] = docBase + doc;
}
@Override
public void setScorer(Scorable scorer) throws IOException {
// update an iterator on a new segment
updateIterator();
}
@Override
public DocIdSetIterator competitiveIterator() {
if (enableSkipping == false) {
return null;
} else {
return new DocIdSetIterator() {
private int docID = competitiveIterator.docID();
@Override
public int nextDoc() throws IOException {
return advance(docID + 1);
}
@Override
public int docID() {
return docID;
}
@Override
public long cost() {
return competitiveIterator.cost();
}
@Override
public int advance(int target) throws IOException {
return docID = competitiveIterator.advance(target);
}
};
}
}
@Override
public void setHitsThresholdReached() {
hitsThresholdReached = true;
updateIterator();
}
private void updateIterator() {
if (enableSkipping == false || hitsThresholdReached == false) return;
if (bottomValueSet) {
// since we've collected top N matches, we can early terminate
// Currently early termination on _doc is also implemented in TopFieldCollector, but this will be removed
// once all bulk scores uses collectors' iterators
competitiveIterator = DocIdSetIterator.empty();
} else if (topValueSet) {
// skip to the desired top doc
if (docBase + maxDoc <= minDoc) {
competitiveIterator = DocIdSetIterator.empty(); // skip this segment
} else {
int segmentMinDoc = Math.max(competitiveIterator.docID(), minDoc - docBase);
competitiveIterator = new MinDocIterator(segmentMinDoc, maxDoc);
}
}
}
}
}