blob: c77feb1afdad48e13472fa4e07e88a2cff866e1f [file] [log] [blame]
package org.apache.lucene.search;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import org.apache.lucene.search.similarities.Similarity;
/** Expert: Scoring functionality for phrase queries.
* <br>A document is considered matching if it contains the phrase-query terms
* at "valid" positions. What "valid positions" are
* depends on the type of the phrase query: for an exact phrase query terms are required
* to appear in adjacent locations, while for a sloppy phrase query some distance between
* the terms is allowed. The abstract method {@link #phraseFreq()} of extending classes
* is invoked for each document containing all the phrase query terms, in order to
* compute the frequency of the phrase query in that document. A non zero frequency
* means a match.
*/
abstract class PhraseScorer extends Scorer {
PhrasePositions min, max;
protected float freq; //phrase frequency in current doc as computed by phraseFreq().
final Similarity.SloppySimScorer docScorer;
PhraseScorer(Weight weight, PhraseQuery.PostingsAndFreq[] postings,
Similarity.SloppySimScorer docScorer) {
super(weight);
this.docScorer = docScorer;
// convert tps to a list of phrase positions.
// note: phrase-position differs from term-position in that its position
// reflects the phrase offset: pp.pos = tp.pos - offset.
// this allows to easily identify a matching (exact) phrase
// when all PhrasePositions have exactly the same position.
if (postings.length > 0) {
min = new PhrasePositions(postings[0].postings, postings[0].position, 0, postings[0].terms);
max = min;
max.doc = -1;
for (int i = 1; i < postings.length; i++) {
PhrasePositions pp = new PhrasePositions(postings[i].postings, postings[i].position, i, postings[i].terms);
max.next = pp;
max = pp;
max.doc = -1;
}
max.next = min; // make it cyclic for easier manipulation
}
}
@Override
public int docID() {
return max.doc;
}
@Override
public int nextDoc() throws IOException {
return advance(max.doc);
}
@Override
public float score() throws IOException {
return docScorer.score(max.doc, freq);
}
private boolean advanceMin(int target) throws IOException {
if (!min.skipTo(target)) {
max.doc = NO_MORE_DOCS; // for further calls to docID()
return false;
}
min = min.next; // cyclic
max = max.next; // cyclic
return true;
}
@Override
public int advance(int target) throws IOException {
freq = 0.0f;
if (!advanceMin(target)) {
return NO_MORE_DOCS;
}
boolean restart=false;
while (freq == 0.0f) {
while (min.doc < max.doc || restart) {
restart = false;
if (!advanceMin(max.doc)) {
return NO_MORE_DOCS;
}
}
// found a doc with all of the terms
freq = phraseFreq(); // check for phrase
restart = true;
}
// found a match
return max.doc;
}
/**
* For a document containing all the phrase query terms, compute the
* frequency of the phrase in that document.
* A non zero frequency means a match.
* <br>Note, that containing all phrase terms does not guarantee a match - they have to be found in matching locations.
* @return frequency of the phrase in current doc, 0 if not found.
*/
abstract float phraseFreq() throws IOException;
@Override
public String toString() { return "scorer(" + weight + ")"; }
}