blob: 238a074946033aa59f950174763ecb3dc9c9c607 [file] [log] [blame]
Index: lucene/src/java/org/apache/lucene/search/MatchOnlyTermScorer.java
===================================================================
--- lucene/src/java/org/apache/lucene/search/MatchOnlyTermScorer.java (revision 0)
+++ lucene/src/java/org/apache/lucene/search/MatchOnlyTermScorer.java (revision 0)
@@ -0,0 +1,244 @@
+package org.apache.lucene.search;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+
+import org.apache.lucene.index.BulkPostingsEnum;
+import org.apache.lucene.index.BulkPostingsEnum.BlockReader;
+import org.apache.lucene.util.Bits;
+
+/** Expert: A <code>Scorer</code> for documents matching a <code>Term</code>.
+ * This scorer only makes sense for the omitTF=true case
+ */
+final class MatchOnlyTermScorer extends Scorer {
+ private final BulkPostingsEnum docsEnum;
+ private final byte[] norms;
+ private int doc;
+
+ private final int[] docDeltas;
+ private int docPointer;
+ private int docPointerMax;
+ private boolean first = true;
+
+ private final float rawScore;
+ private final BlockReader docDeltasReader;
+ private final Bits skipDocs;
+ private final int docFreq;
+ private int count;
+
+ /**
+ * Construct a <code>TermScorer</code>.
+ *
+ * @param weight
+ * The weight of the <code>Term</code> in the query.
+ * @param td
+ * An iterator over the documents matching the <code>Term</code>.
+ * @param similarity
+ * The </code>Similarity</code> implementation to be used for score
+ * computations.
+ * @param norms
+ * The field norms of the document fields for the <code>Term</code>.
+ */
+ MatchOnlyTermScorer(Weight weight, BulkPostingsEnum td, BlockReader docDeltasReader, int docFreq, Bits skipDocs, Similarity similarity, byte[] norms) throws IOException {
+ super(similarity, weight);
+
+ assert td.getFreqsReader() == null;
+
+ this.docsEnum = td;
+ this.docFreq = docFreq;
+ this.docDeltasReader = docDeltasReader;
+ docDeltas = docDeltasReader.getBuffer();
+ reset();
+
+ this.skipDocs = skipDocs;
+ this.norms = norms;
+ rawScore = getSimilarity().tf(1f) * weight.getValue();
+ }
+
+ @Override
+ public void score(Collector c) throws IOException {
+ score(c, Integer.MAX_VALUE, nextDoc());
+ }
+
+ // firstDocID is ignored since nextDoc() sets 'doc'
+ @Override
+ protected boolean score(Collector c, int end, int firstDocID) throws IOException {
+ c.setScorer(this);
+ // nocommit -- this can leave scorer on a deleted doc...
+ while (doc < end) { // for docs in window
+ if (skipDocs == null || !skipDocs.get(doc)) {
+ c.collect(doc); // collect
+ }
+ if (count == docFreq) {
+ doc = NO_MORE_DOCS;
+ return false;
+ }
+ count++;
+ fillDocDeltas();
+ doc += docDeltas[docPointer];
+ }
+ return true;
+ }
+
+
+
+ @Override
+ public int docID() {
+ return first ? -1 : doc;
+ }
+
+ @Override
+ public float freq() {
+ return 1.0f;
+ }
+
+ /**
+ * Advances to the next document matching the query. <br>
+ * The iterator over the matching documents is buffered using
+ * {@link TermDocs#read(int[],int[])}.
+ *
+ * @return the document matching the query or NO_MORE_DOCS if there are no more documents.
+ */
+ @Override
+ public int nextDoc() throws IOException {
+ while(count < docFreq) {
+ fillDocDeltas();
+ count++;
+ doc += docDeltas[docPointer];
+ first = false;
+ assert doc >= 0 && (skipDocs == null || doc < skipDocs.length()) && doc != NO_MORE_DOCS: "doc=" + doc + " skipDocs=" + skipDocs + " skipDocs.length=" + (skipDocs==null? "n/a" : skipDocs.length());
+ if (skipDocs == null || !skipDocs.get(doc)) {
+ return doc;
+ }
+ }
+
+ return doc = NO_MORE_DOCS;
+ }
+
+ @Override
+ public float score() {
+ assert !first;
+ assert doc != NO_MORE_DOCS;
+
+ return norms == null ? rawScore : rawScore * getSimilarity().decodeNormValue(norms[doc]); // normalize for field
+ }
+
+ /**
+ * Advances to the first match beyond the current whose document number is
+ * greater than or equal to a given target. <br>
+ * The implementation uses {@link DocsEnum#advance(int)}.
+ *
+ * @param target
+ * The target document number.
+ * @return the matching document or NO_MORE_DOCS if none exist.
+ */
+ @Override
+ public int advance(final int target) throws IOException {
+
+ // nocommit: should we, here, optimize .advance(target that isn't
+ // too far away) into scan? seems like simple win?
+
+ // first scan current doc deltas block
+ for (docPointer++; docPointer < docPointerMax && count < docFreq; docPointer++) {
+ assert first || docDeltas[docPointer] > 0;
+ doc += docDeltas[docPointer];
+ first = false;
+ count++;
+
+ if (doc >= target && (skipDocs == null || !skipDocs.get(doc))) {
+ return doc;
+ }
+ }
+
+ if (count == docFreq) {
+ return doc = NO_MORE_DOCS;
+ }
+
+ // not found in current block, seek underlying stream
+ final BulkPostingsEnum.JumpResult jumpResult;
+ if (target - doc > docDeltas.length && // avoid useless jumps
+ (jumpResult = docsEnum.jump(target, count)) != null) {
+ count = jumpResult.count;
+ doc = jumpResult.docID;
+ first = false;
+ reset();
+ } else {
+ // seek did not jump -- just fill next buffer
+ docPointerMax = docDeltasReader.fill();
+ if (docPointerMax != 0) {
+ docPointer = 0;
+ assert first || docDeltas[0] > 0;
+ doc += docDeltas[0];
+ count++;
+ first = false;
+ } else {
+ return doc = NO_MORE_DOCS;
+ }
+ }
+
+ // now scan
+ return scan(target);
+ }
+
+ private int scan(final int target) throws IOException {
+ while(true) {
+ assert doc >= 0 && doc != NO_MORE_DOCS;
+ if (doc >= target && (skipDocs == null || !skipDocs.get(doc))) {
+ return doc;
+ }
+
+ if (count >= docFreq) {
+ break;
+ }
+
+ if (++docPointer >= docPointerMax) {
+ docPointerMax = docDeltasReader.fill();
+ if (docPointerMax != 0) {
+ docPointer = 0;
+ } else {
+ return doc = NO_MORE_DOCS;
+ }
+ }
+
+ assert first || docDeltas[docPointer] > 0;
+ doc += docDeltas[docPointer];
+ count++;
+ }
+ return doc = NO_MORE_DOCS;
+ }
+
+ private void fillDocDeltas() throws IOException {
+ if (++docPointer >= docPointerMax) {
+ docPointerMax = docDeltasReader.fill();
+ assert docPointerMax != 0;
+ docPointer = 0;
+ }
+ }
+
+ private void reset() throws IOException {
+ docPointerMax = docDeltasReader.end();
+ docPointer = docDeltasReader.offset();
+ docPointer--;
+ }
+
+ /** Returns a string representation of this <code>TermScorer</code>. */
+ @Override
+ public String toString() { return "scorer(" + weight + ")"; }
+
+}
Property changes on: lucene/src/java/org/apache/lucene/search/MatchOnlyTermScorer.java
___________________________________________________________________
Added: svn:eol-style
+ native
Added: svn:keywords
+ Date Author Id Revision HeadURL
Index: lucene/src/java/org/apache/lucene/search/TermQuery.java
===================================================================
--- lucene/src/java/org/apache/lucene/search/TermQuery.java (revision 1049503)
+++ lucene/src/java/org/apache/lucene/search/TermQuery.java (working copy)
@@ -24,6 +24,7 @@
import org.apache.lucene.index.BulkPostingsEnum;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.BulkPostingsEnum.BlockReader;
import org.apache.lucene.search.Explanation.IDFExplanation;
import org.apache.lucene.util.ToStringUtils;
@@ -85,10 +86,17 @@
if (docs == null) {
return null;
}
-
// nocommit: we need this docfreq from TermState, MTQ knows it... but tosses it away.
- return new TermScorer(this, docs, reader.docFreq(term.field(), term.bytes()),
- reader.getDeletedDocs(), similarity, reader.norms(term.field()));
+ final int docFreq = reader.docFreq(term.field(), term.bytes());
+ final BlockReader docDeltas = docs.getDocDeltasReader();
+ final BlockReader frequencies = docs.getFreqsReader();
+ if (frequencies == null) {
+ return new MatchOnlyTermScorer(this, docs, docDeltas, docFreq,
+ reader.getDeletedDocs(), similarity, reader.norms(term.field()));
+ } else {
+ return new TermScorer(this, docs, docDeltas, frequencies, docFreq,
+ reader.getDeletedDocs(), similarity, reader.norms(term.field()));
+ }
}
@Override
Index: lucene/src/java/org/apache/lucene/search/TermScorer.java
===================================================================
--- lucene/src/java/org/apache/lucene/search/TermScorer.java (revision 1049503)
+++ lucene/src/java/org/apache/lucene/search/TermScorer.java (working copy)
@@ -20,6 +20,7 @@
import java.io.IOException;
import org.apache.lucene.index.BulkPostingsEnum;
+import org.apache.lucene.index.BulkPostingsEnum.BlockReader;
import org.apache.lucene.util.Bits;
// nocommit -- break out aligned & not cases?
@@ -28,9 +29,9 @@
/** Expert: A <code>Scorer</code> for documents matching a <code>Term</code>.
*/
final class TermScorer extends Scorer {
- private BulkPostingsEnum docsEnum;
- private byte[] norms;
- private float weightValue;
+ private final BulkPostingsEnum docsEnum;
+ private final byte[] norms;
+ private final float weightValue;
private int doc;
private final int[] docDeltas;
@@ -43,9 +44,9 @@
private int freqPointerMax;
private static final int SCORE_CACHE_SIZE = 32;
- private float[] scoreCache = new float[SCORE_CACHE_SIZE];
- private final BulkPostingsEnum.BlockReader freqsReader;
- private final BulkPostingsEnum.BlockReader docDeltasReader;
+ private final float[] scoreCache = new float[SCORE_CACHE_SIZE];
+ private final BlockReader freqsReader;
+ private final BlockReader docDeltasReader;
private final Bits skipDocs;
private final int docFreq;
private int count;
@@ -63,27 +64,15 @@
* @param norms
* The field norms of the document fields for the <code>Term</code>.
*/
- TermScorer(Weight weight, BulkPostingsEnum td, int docFreq, Bits skipDocs, Similarity similarity, byte[] norms) throws IOException {
+ TermScorer(Weight weight, BulkPostingsEnum td, BlockReader docDeltaReader, BlockReader freqReader, int docFreq, Bits skipDocs, Similarity similarity, byte[] norms) throws IOException {
super(similarity, weight);
-
this.docsEnum = td;
this.docFreq = docFreq;
- docDeltasReader = td.getDocDeltasReader();
+ this.docDeltasReader = docDeltaReader;
docDeltas = docDeltasReader.getBuffer();
- docPointerMax = docDeltasReader.end();
- docPointer = docDeltasReader.offset();
- docPointer--;
-
- freqsReader = td.getFreqsReader();
- if (freqsReader != null) {
- freqs = freqsReader.getBuffer();
- freqPointerMax = freqsReader.end();
- freqPointer = freqsReader.offset();
- freqPointer--;
- } else {
- freqs = null;
- }
-
+ this.freqsReader = freqReader;
+ freqs = freqsReader.getBuffer();
+ reset();
this.skipDocs = skipDocs;
this.norms = norms;
this.weightValue = weight.getValue();
@@ -101,11 +90,9 @@
@Override
protected boolean score(Collector c, int end, int firstDocID) throws IOException {
c.setScorer(this);
- //System.out.println("ts.collect firstdocID=" + firstDocID + " term=" + term + " end=" + end + " doc=" + doc);
// nocommit -- this can leave scorer on a deleted doc...
while (doc < end) { // for docs in window
if (skipDocs == null || !skipDocs.get(doc)) {
- //System.out.println("ts.collect doc=" + doc + " skipDocs=" + skipDocs + " count=" + count + " vs dF=" + docFreq);
c.collect(doc); // collect
}
if (count == docFreq) {
@@ -113,40 +100,8 @@
return false;
}
count++;
- docPointer++;
-
- //System.out.println("dp=" + docPointer + " dpMax=" + docPointerMax + " count=" + count + " countMax=" + docFreq);
-
- if (docPointer >= docPointerMax) {
- docPointerMax = docDeltasReader.fill();
- //System.out.println(" refill! dpMax=" + docPointerMax + " reader=" + docDeltasReader);
- assert docPointerMax != 0;
- docPointer = 0;
-
- if (freqsReader != null) {
- freqPointer++;
- // NOTE: this code is intentionally dup'd
- // (specialized) w/ the else clause, for better CPU
- // branch prediction (assuming compiler doesn't
- // de-dup): for codecs that always bulk read same
- // number of docDeltas & freqs (standard, for,
- // pfor), this if will always be true. Other codecs
- // (simple9/16) will not be aligned:
- if (freqPointer >= freqPointerMax) {
- freqPointerMax = freqsReader.fill();
- assert freqPointerMax != 0;
- freqPointer = 0;
- }
- }
- } else if (freqsReader != null) {
- freqPointer++;
- if (freqPointer >= freqPointerMax) {
- freqPointerMax = freqsReader.fill();
- assert freqPointerMax != 0;
- freqPointer = 0;
- }
- }
-
+ fillDeltas();
+ fillFreq();
doc += docDeltas[docPointer];
}
return true;
@@ -159,11 +114,7 @@
@Override
public float freq() {
- if (freqsReader != null) {
- return freqs[freqPointer];
- } else {
- return 1.0f;
- }
+ return freqs[freqPointer];
}
/**
@@ -175,64 +126,25 @@
*/
@Override
public int nextDoc() throws IOException {
- //System.out.println("ts.nextDoc " + this + " count=" + count + " vs docFreq=" + docFreq);
while(count < docFreq) {
- docPointer++;
- if (docPointer >= docPointerMax) {
- //System.out.println("ts.nd refill docs");
- docPointerMax = docDeltasReader.fill();
- assert docPointerMax != 0;
- docPointer = 0;
- if (freqsReader != null) {
- // NOTE: this code is intentionally dup'd
- // (specialized) w/ the else clause, for better CPU
- // branch prediction (assuming compiler doesn't
- // de-dup): for codecs that always bulk read same
- // number of docDeltas & freqs (standard, for,
- // pfor), this if will always be true. Other codecs
- // (simple9/16) will not be aligned:
- freqPointer++;
- if (freqPointer >= freqPointerMax) {
- //System.out.println("ts.nd refill freqs");
- freqPointerMax = freqsReader.fill();
- assert freqPointerMax != 0;
- freqPointer = 0;
- }
- }
- } else {
- if (freqsReader != null) {
- freqPointer++;
- if (freqPointer >= freqPointerMax) {
- //System.out.println("ts.nd refill freqs");
- freqPointerMax = freqsReader.fill();
- assert freqPointerMax != 0;
- freqPointer = 0;
- }
- }
- }
+ fillDeltas();
+ fillFreq();
count++;
doc += docDeltas[docPointer];
first = false;
assert doc >= 0 && (skipDocs == null || doc < skipDocs.length()) && doc != NO_MORE_DOCS: "doc=" + doc + " skipDocs=" + skipDocs + " skipDocs.length=" + (skipDocs==null? "n/a" : skipDocs.length());
if (skipDocs == null || !skipDocs.get(doc)) {
- //System.out.println(" ret doc=" + doc + " freq=" + freq());
return doc;
}
}
- //System.out.println(" end");
return doc = NO_MORE_DOCS;
}
-
+
@Override
public float score() {
assert !first;
- final int freq;
- if (freqsReader == null) {
- freq = 1;
- } else {
- freq = freqs[freqPointer];
- }
+ final int freq = freqs[freqPointer];
assert freq > 0;
assert doc != NO_MORE_DOCS;
float raw = // compute tf(f)*weight
@@ -253,7 +165,7 @@
* @return the matching document or NO_MORE_DOCS if none exist.
*/
@Override
- public int advance(int target) throws IOException {
+ public int advance(final int target) throws IOException {
// nocommit: should we, here, optimize .advance(target that isn't
// too far away) into scan? seems like simple win?
@@ -264,11 +176,7 @@
doc += docDeltas[docPointer];
first = false;
count++;
- if (freqsReader != null && ++freqPointer >= freqPointerMax) {
- freqPointerMax = freqsReader.fill();
- assert freqPointerMax != 0;
- freqPointer = 0;
- }
+ fillFreq();
if (doc >= target && (skipDocs == null || !skipDocs.get(doc))) {
return doc;
}
@@ -279,20 +187,13 @@
}
// not found in current block, seek underlying stream
- BulkPostingsEnum.JumpResult jumpResult;
+ final BulkPostingsEnum.JumpResult jumpResult;
if (target - doc > docDeltas.length && // avoid useless jumps
(jumpResult = docsEnum.jump(target, count)) != null) {
count = jumpResult.count;
doc = jumpResult.docID;
first = false;
- docPointer = docDeltasReader.offset();
- docPointerMax = docDeltasReader.end();
- docPointer--;
- if (freqsReader != null) {
- freqPointer = freqsReader.offset();
- freqPointerMax = freqsReader.end();
- freqPointer--;
- }
+ reset();
} else {
// seek did not jump -- just fill next buffer
docPointerMax = docDeltasReader.fill();
@@ -305,14 +206,14 @@
} else {
return doc = NO_MORE_DOCS;
}
- if (freqsReader != null && ++freqPointer >= freqPointerMax) {
- freqPointerMax = freqsReader.fill();
- assert freqPointerMax != 0;
- freqPointer = 0;
- }
+ fillFreq();
}
- // now scan
+ // now scan -- let the compiler inline this
+ return scan(target);
+ }
+
+ private int scan(final int target) throws IOException {
while(true) {
assert doc >= 0 && doc != NO_MORE_DOCS;
if (doc >= target && (skipDocs == null || !skipDocs.get(doc))) {
@@ -332,12 +233,7 @@
}
}
- if (freqsReader != null && ++freqPointer >= freqPointerMax) {
- freqPointerMax = freqsReader.fill();
- assert freqPointerMax != 0;
- freqPointer = 0;
- }
-
+ fillFreq();
assert first || docDeltas[docPointer] > 0;
doc += docDeltas[docPointer];
count++;
@@ -348,5 +244,29 @@
/** Returns a string representation of this <code>TermScorer</code>. */
@Override
public String toString() { return "scorer(" + weight + ")"; }
-
+
+ private final void fillFreq() throws IOException {
+ if (++freqPointer >= freqPointerMax) {
+ freqPointerMax = freqsReader.fill();
+ assert freqPointerMax != 0;
+ freqPointer = 0;
+ }
+ }
+
+ private void fillDeltas() throws IOException {
+ if (++docPointer >= docPointerMax) {
+ docPointerMax = docDeltasReader.fill();
+ assert docPointerMax != 0;
+ docPointer = 0;
+ }
+ }
+
+ private final void reset() throws IOException {
+ docPointer = docDeltasReader.offset();
+ docPointerMax = docDeltasReader.end();
+ freqPointer = freqsReader.offset();
+ freqPointerMax = freqsReader.end();
+ --docPointer;
+ --freqPointer;
+ }
}