blob: b28124f88861574ff58d7158262289f9e6042935 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.search;
import java.io.IOException;
import java.util.Arrays;
import java.util.Collection;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.FutureObjects;
import org.apache.lucene.util.PriorityQueue;
/**
* {@link BulkScorer} that is used for pure disjunctions and disjunctions
* that have low values of {@link BooleanQuery.Builder#setMinimumNumberShouldMatch(int)}
* and dense clauses. This scorer scores documents by batches of 2048 docs.
*/
final class BooleanScorer extends BulkScorer {
static final int SHIFT = 11;
static final int SIZE = 1 << SHIFT;
static final int MASK = SIZE - 1;
static final int SET_SIZE = 1 << (SHIFT - 6);
static final int SET_MASK = SET_SIZE - 1;
static class Bucket {
double score;
int freq;
}
private class BulkScorerAndDoc {
final BulkScorer scorer;
final long cost;
int next;
BulkScorerAndDoc(BulkScorer scorer) {
this.scorer = scorer;
this.cost = scorer.cost();
this.next = -1;
}
void advance(int min) throws IOException {
score(orCollector, null, min, min);
}
void score(LeafCollector collector, Bits acceptDocs, int min, int max) throws IOException {
next = scorer.score(collector, acceptDocs, min, max);
}
}
// See WANDScorer for an explanation
private static long cost(Collection<BulkScorer> scorers, int minShouldMatch) {
final PriorityQueue<BulkScorer> pq = new PriorityQueue<BulkScorer>(scorers.size() - minShouldMatch + 1) {
@Override
protected boolean lessThan(BulkScorer a, BulkScorer b) {
return a.cost() > b.cost();
}
};
for (BulkScorer scorer : scorers) {
pq.insertWithOverflow(scorer);
}
long cost = 0;
for (BulkScorer scorer = pq.pop(); scorer != null; scorer = pq.pop()) {
cost += scorer.cost();
}
return cost;
}
static final class HeadPriorityQueue extends PriorityQueue<BulkScorerAndDoc> {
public HeadPriorityQueue(int maxSize) {
super(maxSize);
}
@Override
protected boolean lessThan(BulkScorerAndDoc a, BulkScorerAndDoc b) {
return a.next < b.next;
}
}
static final class TailPriorityQueue extends PriorityQueue<BulkScorerAndDoc> {
public TailPriorityQueue(int maxSize) {
super(maxSize);
}
@Override
protected boolean lessThan(BulkScorerAndDoc a, BulkScorerAndDoc b) {
return a.cost < b.cost;
}
public BulkScorerAndDoc get(int i) {
FutureObjects.checkIndex(i, size());
return (BulkScorerAndDoc) getHeapArray()[1 + i];
}
}
final Bucket[] buckets = new Bucket[SIZE];
// This is basically an inlined FixedBitSet... seems to help with bound checks
final long[] matching = new long[SET_SIZE];
final BulkScorerAndDoc[] leads;
final HeadPriorityQueue head;
final TailPriorityQueue tail;
final ScoreAndDoc scoreAndDoc = new ScoreAndDoc();
final int minShouldMatch;
final long cost;
final class OrCollector implements LeafCollector {
Scorable scorer;
@Override
public void setScorer(Scorable scorer) {
this.scorer = scorer;
}
@Override
public void collect(int doc) throws IOException {
final int i = doc & MASK;
final int idx = i >>> 6;
matching[idx] |= 1L << i;
final Bucket bucket = buckets[i];
bucket.freq++;
bucket.score += scorer.score();
}
}
final OrCollector orCollector = new OrCollector();
BooleanScorer(BooleanWeight weight, Collection<BulkScorer> scorers, int minShouldMatch, boolean needsScores) {
if (minShouldMatch < 1 || minShouldMatch > scorers.size()) {
throw new IllegalArgumentException("minShouldMatch should be within 1..num_scorers. Got " + minShouldMatch);
}
if (scorers.size() <= 1) {
throw new IllegalArgumentException("This scorer can only be used with two scorers or more, got " + scorers.size());
}
for (int i = 0; i < buckets.length; i++) {
buckets[i] = new Bucket();
}
this.leads = new BulkScorerAndDoc[scorers.size()];
this.head = new HeadPriorityQueue(scorers.size() - minShouldMatch + 1);
this.tail = new TailPriorityQueue(minShouldMatch - 1);
this.minShouldMatch = minShouldMatch;
for (BulkScorer scorer : scorers) {
if (needsScores == false) {
// OrCollector calls score() all the time so we have to explicitly
// disable scoring in order to avoid decoding useless norms
scorer = BooleanWeight.disableScoring(scorer);
}
final BulkScorerAndDoc evicted = tail.insertWithOverflow(new BulkScorerAndDoc(scorer));
if (evicted != null) {
head.add(evicted);
}
}
this.cost = cost(scorers, minShouldMatch);
}
@Override
public long cost() {
return cost;
}
private void scoreDocument(LeafCollector collector, int base, int i) throws IOException {
final ScoreAndDoc scoreAndDoc = this.scoreAndDoc;
final Bucket bucket = buckets[i];
if (bucket.freq >= minShouldMatch) {
scoreAndDoc.score = (float) bucket.score;
final int doc = base | i;
scoreAndDoc.doc = doc;
collector.collect(doc);
}
bucket.freq = 0;
bucket.score = 0;
}
private void scoreMatches(LeafCollector collector, int base) throws IOException {
long matching[] = this.matching;
for (int idx = 0; idx < matching.length; idx++) {
long bits = matching[idx];
while (bits != 0L) {
int ntz = Long.numberOfTrailingZeros(bits);
int doc = idx << 6 | ntz;
scoreDocument(collector, base, doc);
bits ^= 1L << ntz;
}
}
}
private void scoreWindowIntoBitSetAndReplay(LeafCollector collector, Bits acceptDocs,
int base, int min, int max, BulkScorerAndDoc[] scorers, int numScorers) throws IOException {
for (int i = 0; i < numScorers; ++i) {
final BulkScorerAndDoc scorer = scorers[i];
assert scorer.next < max;
scorer.score(orCollector, acceptDocs, min, max);
}
scoreMatches(collector, base);
Arrays.fill(matching, 0L);
}
private BulkScorerAndDoc advance(int min) throws IOException {
assert tail.size() == minShouldMatch - 1;
final HeadPriorityQueue head = this.head;
final TailPriorityQueue tail = this.tail;
BulkScorerAndDoc headTop = head.top();
BulkScorerAndDoc tailTop = tail.top();
while (headTop.next < min) {
if (tailTop == null || headTop.cost <= tailTop.cost) {
headTop.advance(min);
headTop = head.updateTop();
} else {
// swap the top of head and tail
final BulkScorerAndDoc previousHeadTop = headTop;
tailTop.advance(min);
headTop = head.updateTop(tailTop);
tailTop = tail.updateTop(previousHeadTop);
}
}
return headTop;
}
private void scoreWindowMultipleScorers(LeafCollector collector, Bits acceptDocs, int windowBase, int windowMin, int windowMax, int maxFreq) throws IOException {
while (maxFreq < minShouldMatch && maxFreq + tail.size() >= minShouldMatch) {
// a match is still possible
final BulkScorerAndDoc candidate = tail.pop();
candidate.advance(windowMin);
if (candidate.next < windowMax) {
leads[maxFreq++] = candidate;
} else {
head.add(candidate);
}
}
if (maxFreq >= minShouldMatch) {
// There might be matches in other scorers from the tail too
for (int i = 0; i < tail.size(); ++i) {
leads[maxFreq++] = tail.get(i);
}
tail.clear();
scoreWindowIntoBitSetAndReplay(collector, acceptDocs, windowBase, windowMin, windowMax, leads, maxFreq);
}
// Push back scorers into head and tail
for (int i = 0; i < maxFreq; ++i) {
final BulkScorerAndDoc evicted = head.insertWithOverflow(leads[i]);
if (evicted != null) {
tail.add(evicted);
}
}
}
private void scoreWindowSingleScorer(BulkScorerAndDoc bulkScorer, LeafCollector collector,
Bits acceptDocs, int windowMin, int windowMax, int max) throws IOException {
assert tail.size() == 0;
final int nextWindowBase = head.top().next & ~MASK;
final int end = Math.max(windowMax, Math.min(max, nextWindowBase));
bulkScorer.score(collector, acceptDocs, windowMin, end);
// reset the scorer that should be used for the general case
collector.setScorer(scoreAndDoc);
}
private BulkScorerAndDoc scoreWindow(BulkScorerAndDoc top, LeafCollector collector,
Bits acceptDocs, int min, int max) throws IOException {
final int windowBase = top.next & ~MASK; // find the window that the next match belongs to
final int windowMin = Math.max(min, windowBase);
final int windowMax = Math.min(max, windowBase + SIZE);
// Fill 'leads' with all scorers from 'head' that are in the right window
leads[0] = head.pop();
int maxFreq = 1;
while (head.size() > 0 && head.top().next < windowMax) {
leads[maxFreq++] = head.pop();
}
if (minShouldMatch == 1 && maxFreq == 1) {
// special case: only one scorer can match in the current window,
// we can collect directly
final BulkScorerAndDoc bulkScorer = leads[0];
scoreWindowSingleScorer(bulkScorer, collector, acceptDocs, windowMin, windowMax, max);
return head.add(bulkScorer);
} else {
// general case, collect through a bit set first and then replay
scoreWindowMultipleScorers(collector, acceptDocs, windowBase, windowMin, windowMax, maxFreq);
return head.top();
}
}
@Override
public int score(LeafCollector collector, Bits acceptDocs, int min, int max) throws IOException {
scoreAndDoc.doc = -1;
collector.setScorer(scoreAndDoc);
BulkScorerAndDoc top = advance(min);
while (top.next < max) {
top = scoreWindow(top, collector, acceptDocs, min, max);
}
return top.next;
}
}