blob: 303e2d19415c7c2de2e5bdc6cdd34cd5a3dc0a59 [file] [log] [blame]
package org.apache.lucene.search;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import org.apache.lucene.index.TermPositions;
/** Expert: Scoring functionality for phrase queries.
* <br>A document is considered matching if it contains the phrase-query terms
* at "valid" positions. What "valid positions" are
* depends on the type of the phrase query: for an exact phrase query terms are required
* to appear in adjacent locations, while for a sloppy phrase query some distance between
* the terms is allowed. The abstract method {@link #phraseFreq()} of extending classes
* is invoked for each document containing all the phrase query terms, in order to
* compute the frequency of the phrase query in that document. A non zero frequency
* means a match.
*/
abstract class PhraseScorer extends Scorer {
private Weight weight;
protected byte[] norms;
protected float value;
private boolean firstTime = true;
private boolean more = true;
protected PhraseQueue pq;
protected PhrasePositions first, last;
private float freq; //phrase frequency in current doc as computed by phraseFreq().
PhraseScorer(Weight weight, TermPositions[] tps, int[] offsets,
Similarity similarity, byte[] norms) {
super(similarity);
this.norms = norms;
this.weight = weight;
this.value = weight.getValue();
// convert tps to a list of phrase positions.
// note: phrase-position differs from term-position in that its position
// reflects the phrase offset: pp.pos = tp.pos - offset.
// this allows to easily identify a matching (exact) phrase
// when all PhrasePositions have exactly the same position.
for (int i = 0; i < tps.length; i++) {
PhrasePositions pp = new PhrasePositions(tps[i], offsets[i]);
if (last != null) { // add next to end of list
last.next = pp;
} else {
first = pp;
}
last = pp;
}
pq = new PhraseQueue(tps.length); // construct empty pq
first.doc = -1;
}
@Override
public int docID() { return first.doc; }
@Override
public int nextDoc() throws IOException {
if (firstTime) {
init();
firstTime = false;
} else if (more) {
more = last.next(); // trigger further scanning
}
if (!doNext()) {
first.doc = NO_MORE_DOCS;
}
return first.doc;
}
// next without initial increment
private boolean doNext() throws IOException {
while (more) {
while (more && first.doc < last.doc) { // find doc w/ all the terms
more = first.skipTo(last.doc); // skip first upto last
firstToLast(); // and move it to the end
}
if (more) {
// found a doc with all of the terms
freq = phraseFreq(); // check for phrase
if (freq == 0.0f) // no match
more = last.next(); // trigger further scanning
else
return true; // found a match
}
}
return false; // no more matches
}
@Override
public float score() throws IOException {
//System.out.println("scoring " + first.doc);
float raw = getSimilarity().tf(freq) * value; // raw score
return norms == null ? raw : raw * Similarity.decodeNorm(norms[first.doc]); // normalize
}
@Override
public int advance(int target) throws IOException {
firstTime = false;
for (PhrasePositions pp = first; more && pp != null; pp = pp.next) {
more = pp.skipTo(target);
}
if (more) {
sort(); // re-sort
}
if (!doNext()) {
first.doc = NO_MORE_DOCS;
}
return first.doc;
}
/**
* phrase frequency in current doc as computed by phraseFreq().
*/
public final float currentFreq() { return freq; }
/**
* For a document containing all the phrase query terms, compute the
* frequency of the phrase in that document.
* A non zero frequency means a match.
* <br>Note, that containing all phrase terms does not guarantee a match - they have to be found in matching locations.
* @return frequency of the phrase in current doc, 0 if not found.
*/
protected abstract float phraseFreq() throws IOException;
private void init() throws IOException {
for (PhrasePositions pp = first; more && pp != null; pp = pp.next) {
more = pp.next();
}
if (more) {
sort();
}
}
private void sort() {
pq.clear();
for (PhrasePositions pp = first; pp != null; pp = pp.next) {
pq.add(pp);
}
pqToList();
}
protected final void pqToList() {
last = first = null;
while (pq.top() != null) {
PhrasePositions pp = pq.pop();
if (last != null) { // add next to end of list
last.next = pp;
} else
first = pp;
last = pp;
pp.next = null;
}
}
protected final void firstToLast() {
last.next = first; // move first to end of list
last = first;
first = first.next;
last.next = null;
}
@Override
public String toString() { return "scorer(" + weight + ")"; }
}