blob: adbbafc4ab2d32eab010fd5e71acedbd7a1a553c [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.search;
import java.io.IOException;
import java.util.Collection;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.search.MaxScoreAccumulator.DocAndScore;
/**
* A {@link Collector} implementation that collects the top-scoring hits,
* returning them as a {@link TopDocs}. This is used by {@link IndexSearcher} to
* implement {@link TopDocs}-based search. Hits are sorted by score descending
* and then (when the scores are tied) docID ascending. When you create an
* instance of this collector you should know in advance whether documents are
* going to be collected in doc Id order or not.
*
* <p><b>NOTE</b>: The values {@link Float#NaN} and
* {@link Float#NEGATIVE_INFINITY} are not valid scores. This
* collector will not properly collect hits with such
* scores.
*/
public abstract class TopScoreDocCollector extends TopDocsCollector<ScoreDoc> {
abstract static class ScorerLeafCollector implements LeafCollector {
Scorable scorer;
@Override
public void setScorer(Scorable scorer) throws IOException {
this.scorer = scorer;
}
}
private static class SimpleTopScoreDocCollector extends TopScoreDocCollector {
SimpleTopScoreDocCollector(int numHits, HitsThresholdChecker hitsThresholdChecker,
MaxScoreAccumulator minScoreAcc) {
super(numHits, hitsThresholdChecker, minScoreAcc);
}
@Override
public LeafCollector getLeafCollector(LeafReaderContext context) throws IOException {
// reset the minimum competitive score
docBase = context.docBase;
return new ScorerLeafCollector() {
@Override
public void setScorer(Scorable scorer) throws IOException {
super.setScorer(scorer);
minCompetitiveScore = 0f;
updateMinCompetitiveScore(scorer);
if (minScoreAcc != null) {
updateGlobalMinCompetitiveScore(scorer);
}
}
@Override
public void collect(int doc) throws IOException {
float score = scorer.score();
// This collector relies on the fact that scorers produce positive values:
assert score >= 0; // NOTE: false for NaN
totalHits++;
hitsThresholdChecker.incrementHitCount();
if (minScoreAcc != null && (totalHits & minScoreAcc.modInterval) == 0) {
updateGlobalMinCompetitiveScore(scorer);
}
if (score <= pqTop.score) {
if (totalHitsRelation == TotalHits.Relation.EQUAL_TO) {
// we just reached totalHitsThreshold, we can start setting the min
// competitive score now
updateMinCompetitiveScore(scorer);
}
// Since docs are returned in-order (i.e., increasing doc Id), a document
// with equal score to pqTop.score cannot compete since HitQueue favors
// documents with lower doc Ids. Therefore reject those docs too.
return;
}
pqTop.doc = doc + docBase;
pqTop.score = score;
pqTop = pq.updateTop();
updateMinCompetitiveScore(scorer);
}
};
}
}
private static class PagingTopScoreDocCollector extends TopScoreDocCollector {
private final ScoreDoc after;
private int collectedHits;
PagingTopScoreDocCollector(int numHits, ScoreDoc after, HitsThresholdChecker hitsThresholdChecker,
MaxScoreAccumulator minScoreAcc) {
super(numHits, hitsThresholdChecker, minScoreAcc);
this.after = after;
this.collectedHits = 0;
}
@Override
protected int topDocsSize() {
return collectedHits < pq.size() ? collectedHits : pq.size();
}
@Override
protected TopDocs newTopDocs(ScoreDoc[] results, int start) {
return results == null
? new TopDocs(new TotalHits(totalHits, totalHitsRelation), new ScoreDoc[0])
: new TopDocs(new TotalHits(totalHits, totalHitsRelation), results);
}
@Override
public LeafCollector getLeafCollector(LeafReaderContext context) throws IOException {
docBase = context.docBase;
final int afterDoc = after.doc - context.docBase;
return new ScorerLeafCollector() {
@Override
public void collect(int doc) throws IOException {
float score = scorer.score();
// This collector relies on the fact that scorers produce positive values:
assert score >= 0; // NOTE: false for NaN
totalHits++;
hitsThresholdChecker.incrementHitCount();
if (minScoreAcc != null && (totalHits & minScoreAcc.modInterval) == 0) {
updateGlobalMinCompetitiveScore(scorer);
}
if (score > after.score || (score == after.score && doc <= afterDoc)) {
// hit was collected on a previous page
if (totalHitsRelation == TotalHits.Relation.EQUAL_TO) {
// we just reached totalHitsThreshold, we can start setting the min
// competitive score now
updateMinCompetitiveScore(scorer);
}
return;
}
if (score <= pqTop.score) {
if (totalHitsRelation == TotalHits.Relation.EQUAL_TO) {
// we just reached totalHitsThreshold, we can start setting the min
// competitive score now
updateMinCompetitiveScore(scorer);
}
// Since docs are returned in-order (i.e., increasing doc Id), a document
// with equal score to pqTop.score cannot compete since HitQueue favors
// documents with lower doc Ids. Therefore reject those docs too.
return;
}
collectedHits++;
pqTop.doc = doc + docBase;
pqTop.score = score;
pqTop = pq.updateTop();
updateMinCompetitiveScore(scorer);
}
};
}
}
/**
* Creates a new {@link TopScoreDocCollector} given the number of hits to
* collect and the number of hits to count accurately.
*
* <p><b>NOTE</b>: If the total hit count of the top docs is less than or exactly
* {@code totalHitsThreshold} then this value is accurate. On the other hand,
* if the {@link TopDocs#totalHits} value is greater than {@code totalHitsThreshold}
* then its value is a lower bound of the hit count. A value of {@link Integer#MAX_VALUE}
* will make the hit count accurate but will also likely make query processing slower.
* <p><b>NOTE</b>: The instances returned by this method
* pre-allocate a full array of length
* <code>numHits</code>, and fill the array with sentinel
* objects.
*/
public static TopScoreDocCollector create(int numHits, int totalHitsThreshold) {
return create(numHits, null, totalHitsThreshold);
}
/**
* Creates a new {@link TopScoreDocCollector} given the number of hits to
* collect, the bottom of the previous page, and the number of hits to count
* accurately.
*
* <p><b>NOTE</b>: If the total hit count of the top docs is less than or exactly
* {@code totalHitsThreshold} then this value is accurate. On the other hand,
* if the {@link TopDocs#totalHits} value is greater than {@code totalHitsThreshold}
* then its value is a lower bound of the hit count. A value of {@link Integer#MAX_VALUE}
* will make the hit count accurate but will also likely make query processing slower.
* <p><b>NOTE</b>: The instances returned by this method
* pre-allocate a full array of length
* <code>numHits</code>, and fill the array with sentinel
* objects.
*/
public static TopScoreDocCollector create(int numHits, ScoreDoc after, int totalHitsThreshold) {
return create(numHits, after, HitsThresholdChecker.create(Math.max(totalHitsThreshold, numHits)), null);
}
static TopScoreDocCollector create(int numHits, ScoreDoc after, HitsThresholdChecker hitsThresholdChecker,
MaxScoreAccumulator minScoreAcc) {
if (numHits <= 0) {
throw new IllegalArgumentException("numHits must be > 0; please use TotalHitCountCollector if you just need the total hit count");
}
if (hitsThresholdChecker == null) {
throw new IllegalArgumentException("hitsThresholdChecker must be non null");
}
if (after == null) {
return new SimpleTopScoreDocCollector(numHits, hitsThresholdChecker, minScoreAcc);
} else {
return new PagingTopScoreDocCollector(numHits, after, hitsThresholdChecker, minScoreAcc);
}
}
/**
* Create a CollectorManager which uses a shared hit counter to maintain number of hits
* and a shared {@link MaxScoreAccumulator} to propagate the minimum score accross segments
*/
public static CollectorManager<TopScoreDocCollector, TopDocs> createSharedManager(int numHits, FieldDoc after,
int totalHitsThreshold) {
return new CollectorManager<TopScoreDocCollector, TopDocs>() {
private final HitsThresholdChecker hitsThresholdChecker = HitsThresholdChecker.createShared(Math.max(totalHitsThreshold, numHits));
private final MaxScoreAccumulator minScoreAcc = new MaxScoreAccumulator();
@Override
public TopScoreDocCollector newCollector() throws IOException {
return TopScoreDocCollector.create(numHits, after, hitsThresholdChecker, minScoreAcc);
}
@Override
public TopDocs reduce(Collection<TopScoreDocCollector> collectors) throws IOException {
final TopDocs[] topDocs = new TopDocs[collectors.size()];
int i = 0;
for (TopScoreDocCollector collector : collectors) {
topDocs[i++] = collector.topDocs();
}
return TopDocs.merge(numHits, topDocs);
}
};
}
int docBase;
ScoreDoc pqTop;
final HitsThresholdChecker hitsThresholdChecker;
final MaxScoreAccumulator minScoreAcc;
float minCompetitiveScore;
// prevents instantiation
TopScoreDocCollector(int numHits, HitsThresholdChecker hitsThresholdChecker,
MaxScoreAccumulator minScoreAcc) {
super(new HitQueue(numHits, true));
assert hitsThresholdChecker != null;
// HitQueue implements getSentinelObject to return a ScoreDoc, so we know
// that at this point top() is already initialized.
pqTop = pq.top();
this.hitsThresholdChecker = hitsThresholdChecker;
this.minScoreAcc = minScoreAcc;
}
@Override
protected TopDocs newTopDocs(ScoreDoc[] results, int start) {
if (results == null) {
return EMPTY_TOPDOCS;
}
return new TopDocs(new TotalHits(totalHits, totalHitsRelation), results);
}
@Override
public ScoreMode scoreMode() {
return hitsThresholdChecker.scoreMode();
}
protected void updateGlobalMinCompetitiveScore(Scorable scorer) throws IOException {
assert minScoreAcc != null;
DocAndScore maxMinScore = minScoreAcc.get();
if (maxMinScore != null) {
// since we tie-break on doc id and collect in doc id order we can require
// the next float if the global minimum score is set on a document id that is
// smaller than the ids in the current leaf
float score = docBase > maxMinScore.docID ? Math.nextUp(maxMinScore.score) : maxMinScore.score;
if (score > minCompetitiveScore) {
assert hitsThresholdChecker.isThresholdReached();
scorer.setMinCompetitiveScore(score);
minCompetitiveScore = score;
totalHitsRelation = TotalHits.Relation.GREATER_THAN_OR_EQUAL_TO;
}
}
}
protected void updateMinCompetitiveScore(Scorable scorer) throws IOException {
if (hitsThresholdChecker.isThresholdReached()
&& pqTop != null
&& pqTop.score != Float.NEGATIVE_INFINITY) { // -Infinity is the score of sentinels
// since we tie-break on doc id and collect in doc id order, we can require
// the next float
float localMinScore = Math.nextUp(pqTop.score);
if (localMinScore > minCompetitiveScore) {
scorer.setMinCompetitiveScore(localMinScore);
totalHitsRelation = TotalHits.Relation.GREATER_THAN_OR_EQUAL_TO;
minCompetitiveScore = localMinScore;
if (minScoreAcc != null) {
// we don't use the next float but we register the document
// id so that other leaves can require it if they are after
// the current maximum
minScoreAcc.accumulate(pqTop.doc, pqTop.score);
}
}
}
}
}