| Index: lucene/src/java/org/apache/lucene/search/MatchOnlyTermScorer.java |
| =================================================================== |
| --- lucene/src/java/org/apache/lucene/search/MatchOnlyTermScorer.java (revision 0) |
| +++ lucene/src/java/org/apache/lucene/search/MatchOnlyTermScorer.java (revision 0) |
| @@ -0,0 +1,244 @@ |
| +package org.apache.lucene.search; |
| + |
| +/** |
| + * Licensed to the Apache Software Foundation (ASF) under one or more |
| + * contributor license agreements. See the NOTICE file distributed with |
| + * this work for additional information regarding copyright ownership. |
| + * The ASF licenses this file to You under the Apache License, Version 2.0 |
| + * (the "License"); you may not use this file except in compliance with |
| + * the License. You may obtain a copy of the License at |
| + * |
| + * http://www.apache.org/licenses/LICENSE-2.0 |
| + * |
| + * Unless required by applicable law or agreed to in writing, software |
| + * distributed under the License is distributed on an "AS IS" BASIS, |
| + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| + * See the License for the specific language governing permissions and |
| + * limitations under the License. |
| + */ |
| + |
| +import java.io.IOException; |
| + |
| +import org.apache.lucene.index.BulkPostingsEnum; |
| +import org.apache.lucene.index.BulkPostingsEnum.BlockReader; |
| +import org.apache.lucene.util.Bits; |
| + |
| +/** Expert: A <code>Scorer</code> for documents matching a <code>Term</code>. |
| + * This scorer only makes sense for the omitTF=true case |
| + */ |
| +final class MatchOnlyTermScorer extends Scorer { |
| + private final BulkPostingsEnum docsEnum; |
| + private final byte[] norms; |
| + private int doc; |
| + |
| + private final int[] docDeltas; |
| + private int docPointer; |
| + private int docPointerMax; |
| + private boolean first = true; |
| + |
| + private final float rawScore; |
| + private final BlockReader docDeltasReader; |
| + private final Bits skipDocs; |
| + private final int docFreq; |
| + private int count; |
| + |
| + /** |
| + * Construct a <code>TermScorer</code>. |
| + * |
| + * @param weight |
| + * The weight of the <code>Term</code> in the query. |
| + * @param td |
| + * An iterator over the documents matching the <code>Term</code>. |
| + * @param similarity |
| + * The </code>Similarity</code> implementation to be used for score |
| + * computations. |
| + * @param norms |
| + * The field norms of the document fields for the <code>Term</code>. |
| + */ |
| + MatchOnlyTermScorer(Weight weight, BulkPostingsEnum td, BlockReader docDeltasReader, int docFreq, Bits skipDocs, Similarity similarity, byte[] norms) throws IOException { |
| + super(similarity, weight); |
| + |
| + assert td.getFreqsReader() == null; |
| + |
| + this.docsEnum = td; |
| + this.docFreq = docFreq; |
| + this.docDeltasReader = docDeltasReader; |
| + docDeltas = docDeltasReader.getBuffer(); |
| + reset(); |
| + |
| + this.skipDocs = skipDocs; |
| + this.norms = norms; |
| + rawScore = getSimilarity().tf(1f) * weight.getValue(); |
| + } |
| + |
| + @Override |
| + public void score(Collector c) throws IOException { |
| + score(c, Integer.MAX_VALUE, nextDoc()); |
| + } |
| + |
| + // firstDocID is ignored since nextDoc() sets 'doc' |
| + @Override |
| + protected boolean score(Collector c, int end, int firstDocID) throws IOException { |
| + c.setScorer(this); |
| + // nocommit -- this can leave scorer on a deleted doc... |
| + while (doc < end) { // for docs in window |
| + if (skipDocs == null || !skipDocs.get(doc)) { |
| + c.collect(doc); // collect |
| + } |
| + if (count == docFreq) { |
| + doc = NO_MORE_DOCS; |
| + return false; |
| + } |
| + count++; |
| + fillDocDeltas(); |
| + doc += docDeltas[docPointer]; |
| + } |
| + return true; |
| + } |
| + |
| + |
| + |
| + @Override |
| + public int docID() { |
| + return first ? -1 : doc; |
| + } |
| + |
| + @Override |
| + public float freq() { |
| + return 1.0f; |
| + } |
| + |
| + /** |
| + * Advances to the next document matching the query. <br> |
| + * The iterator over the matching documents is buffered using |
| + * {@link TermDocs#read(int[],int[])}. |
| + * |
| + * @return the document matching the query or NO_MORE_DOCS if there are no more documents. |
| + */ |
| + @Override |
| + public int nextDoc() throws IOException { |
| + while(count < docFreq) { |
| + fillDocDeltas(); |
| + count++; |
| + doc += docDeltas[docPointer]; |
| + first = false; |
| + assert doc >= 0 && (skipDocs == null || doc < skipDocs.length()) && doc != NO_MORE_DOCS: "doc=" + doc + " skipDocs=" + skipDocs + " skipDocs.length=" + (skipDocs==null? "n/a" : skipDocs.length()); |
| + if (skipDocs == null || !skipDocs.get(doc)) { |
| + return doc; |
| + } |
| + } |
| + |
| + return doc = NO_MORE_DOCS; |
| + } |
| + |
| + @Override |
| + public float score() { |
| + assert !first; |
| + assert doc != NO_MORE_DOCS; |
| + |
| + return norms == null ? rawScore : rawScore * getSimilarity().decodeNormValue(norms[doc]); // normalize for field |
| + } |
| + |
| + /** |
| + * Advances to the first match beyond the current whose document number is |
| + * greater than or equal to a given target. <br> |
| + * The implementation uses {@link DocsEnum#advance(int)}. |
| + * |
| + * @param target |
| + * The target document number. |
| + * @return the matching document or NO_MORE_DOCS if none exist. |
| + */ |
| + @Override |
| + public int advance(final int target) throws IOException { |
| + |
| + // nocommit: should we, here, optimize .advance(target that isn't |
| + // too far away) into scan? seems like simple win? |
| + |
| + // first scan current doc deltas block |
| + for (docPointer++; docPointer < docPointerMax && count < docFreq; docPointer++) { |
| + assert first || docDeltas[docPointer] > 0; |
| + doc += docDeltas[docPointer]; |
| + first = false; |
| + count++; |
| + |
| + if (doc >= target && (skipDocs == null || !skipDocs.get(doc))) { |
| + return doc; |
| + } |
| + } |
| + |
| + if (count == docFreq) { |
| + return doc = NO_MORE_DOCS; |
| + } |
| + |
| + // not found in current block, seek underlying stream |
| + final BulkPostingsEnum.JumpResult jumpResult; |
| + if (target - doc > docDeltas.length && // avoid useless jumps |
| + (jumpResult = docsEnum.jump(target, count)) != null) { |
| + count = jumpResult.count; |
| + doc = jumpResult.docID; |
| + first = false; |
| + reset(); |
| + } else { |
| + // seek did not jump -- just fill next buffer |
| + docPointerMax = docDeltasReader.fill(); |
| + if (docPointerMax != 0) { |
| + docPointer = 0; |
| + assert first || docDeltas[0] > 0; |
| + doc += docDeltas[0]; |
| + count++; |
| + first = false; |
| + } else { |
| + return doc = NO_MORE_DOCS; |
| + } |
| + } |
| + |
| + // now scan |
| + return scan(target); |
| + } |
| + |
| + private int scan(final int target) throws IOException { |
| + while(true) { |
| + assert doc >= 0 && doc != NO_MORE_DOCS; |
| + if (doc >= target && (skipDocs == null || !skipDocs.get(doc))) { |
| + return doc; |
| + } |
| + |
| + if (count >= docFreq) { |
| + break; |
| + } |
| + |
| + if (++docPointer >= docPointerMax) { |
| + docPointerMax = docDeltasReader.fill(); |
| + if (docPointerMax != 0) { |
| + docPointer = 0; |
| + } else { |
| + return doc = NO_MORE_DOCS; |
| + } |
| + } |
| + |
| + assert first || docDeltas[docPointer] > 0; |
| + doc += docDeltas[docPointer]; |
| + count++; |
| + } |
| + return doc = NO_MORE_DOCS; |
| + } |
| + |
| + private void fillDocDeltas() throws IOException { |
| + if (++docPointer >= docPointerMax) { |
| + docPointerMax = docDeltasReader.fill(); |
| + assert docPointerMax != 0; |
| + docPointer = 0; |
| + } |
| + } |
| + |
| + private void reset() throws IOException { |
| + docPointerMax = docDeltasReader.end(); |
| + docPointer = docDeltasReader.offset(); |
| + docPointer--; |
| + } |
| + |
| + /** Returns a string representation of this <code>TermScorer</code>. */ |
| + @Override |
| + public String toString() { return "scorer(" + weight + ")"; } |
| + |
| +} |
| |
| Property changes on: lucene/src/java/org/apache/lucene/search/MatchOnlyTermScorer.java |
| ___________________________________________________________________ |
| Added: svn:eol-style |
| + native |
| Added: svn:keywords |
| + Date Author Id Revision HeadURL |
| |
| Index: lucene/src/java/org/apache/lucene/search/TermQuery.java |
| =================================================================== |
| --- lucene/src/java/org/apache/lucene/search/TermQuery.java (revision 1049503) |
| +++ lucene/src/java/org/apache/lucene/search/TermQuery.java (working copy) |
| @@ -24,6 +24,7 @@ |
| import org.apache.lucene.index.BulkPostingsEnum; |
| import org.apache.lucene.index.Term; |
| import org.apache.lucene.index.IndexReader; |
| +import org.apache.lucene.index.BulkPostingsEnum.BlockReader; |
| import org.apache.lucene.search.Explanation.IDFExplanation; |
| import org.apache.lucene.util.ToStringUtils; |
| |
| @@ -85,10 +86,17 @@ |
| if (docs == null) { |
| return null; |
| } |
| - |
| // nocommit: we need this docfreq from TermState, MTQ knows it... but tosses it away. |
| - return new TermScorer(this, docs, reader.docFreq(term.field(), term.bytes()), |
| - reader.getDeletedDocs(), similarity, reader.norms(term.field())); |
| + final int docFreq = reader.docFreq(term.field(), term.bytes()); |
| + final BlockReader docDeltas = docs.getDocDeltasReader(); |
| + final BlockReader frequencies = docs.getFreqsReader(); |
| + if (frequencies == null) { |
| + return new MatchOnlyTermScorer(this, docs, docDeltas, docFreq, |
| + reader.getDeletedDocs(), similarity, reader.norms(term.field())); |
| + } else { |
| + return new TermScorer(this, docs, docDeltas, frequencies, docFreq, |
| + reader.getDeletedDocs(), similarity, reader.norms(term.field())); |
| + } |
| } |
| |
| @Override |
| Index: lucene/src/java/org/apache/lucene/search/TermScorer.java |
| =================================================================== |
| --- lucene/src/java/org/apache/lucene/search/TermScorer.java (revision 1049503) |
| +++ lucene/src/java/org/apache/lucene/search/TermScorer.java (working copy) |
| @@ -20,6 +20,7 @@ |
| import java.io.IOException; |
| |
| import org.apache.lucene.index.BulkPostingsEnum; |
| +import org.apache.lucene.index.BulkPostingsEnum.BlockReader; |
| import org.apache.lucene.util.Bits; |
| |
| // nocommit -- break out aligned & not cases? |
| @@ -28,9 +29,9 @@ |
| /** Expert: A <code>Scorer</code> for documents matching a <code>Term</code>. |
| */ |
| final class TermScorer extends Scorer { |
| - private BulkPostingsEnum docsEnum; |
| - private byte[] norms; |
| - private float weightValue; |
| + private final BulkPostingsEnum docsEnum; |
| + private final byte[] norms; |
| + private final float weightValue; |
| private int doc; |
| |
| private final int[] docDeltas; |
| @@ -43,9 +44,9 @@ |
| private int freqPointerMax; |
| |
| private static final int SCORE_CACHE_SIZE = 32; |
| - private float[] scoreCache = new float[SCORE_CACHE_SIZE]; |
| - private final BulkPostingsEnum.BlockReader freqsReader; |
| - private final BulkPostingsEnum.BlockReader docDeltasReader; |
| + private final float[] scoreCache = new float[SCORE_CACHE_SIZE]; |
| + private final BlockReader freqsReader; |
| + private final BlockReader docDeltasReader; |
| private final Bits skipDocs; |
| private final int docFreq; |
| private int count; |
| @@ -63,27 +64,15 @@ |
| * @param norms |
| * The field norms of the document fields for the <code>Term</code>. |
| */ |
| - TermScorer(Weight weight, BulkPostingsEnum td, int docFreq, Bits skipDocs, Similarity similarity, byte[] norms) throws IOException { |
| + TermScorer(Weight weight, BulkPostingsEnum td, BlockReader docDeltaReader, BlockReader freqReader, int docFreq, Bits skipDocs, Similarity similarity, byte[] norms) throws IOException { |
| super(similarity, weight); |
| - |
| this.docsEnum = td; |
| this.docFreq = docFreq; |
| - docDeltasReader = td.getDocDeltasReader(); |
| + this.docDeltasReader = docDeltaReader; |
| docDeltas = docDeltasReader.getBuffer(); |
| - docPointerMax = docDeltasReader.end(); |
| - docPointer = docDeltasReader.offset(); |
| - docPointer--; |
| - |
| - freqsReader = td.getFreqsReader(); |
| - if (freqsReader != null) { |
| - freqs = freqsReader.getBuffer(); |
| - freqPointerMax = freqsReader.end(); |
| - freqPointer = freqsReader.offset(); |
| - freqPointer--; |
| - } else { |
| - freqs = null; |
| - } |
| - |
| + this.freqsReader = freqReader; |
| + freqs = freqsReader.getBuffer(); |
| + reset(); |
| this.skipDocs = skipDocs; |
| this.norms = norms; |
| this.weightValue = weight.getValue(); |
| @@ -101,11 +90,9 @@ |
| @Override |
| protected boolean score(Collector c, int end, int firstDocID) throws IOException { |
| c.setScorer(this); |
| - //System.out.println("ts.collect firstdocID=" + firstDocID + " term=" + term + " end=" + end + " doc=" + doc); |
| // nocommit -- this can leave scorer on a deleted doc... |
| while (doc < end) { // for docs in window |
| if (skipDocs == null || !skipDocs.get(doc)) { |
| - //System.out.println("ts.collect doc=" + doc + " skipDocs=" + skipDocs + " count=" + count + " vs dF=" + docFreq); |
| c.collect(doc); // collect |
| } |
| if (count == docFreq) { |
| @@ -113,40 +100,8 @@ |
| return false; |
| } |
| count++; |
| - docPointer++; |
| - |
| - //System.out.println("dp=" + docPointer + " dpMax=" + docPointerMax + " count=" + count + " countMax=" + docFreq); |
| - |
| - if (docPointer >= docPointerMax) { |
| - docPointerMax = docDeltasReader.fill(); |
| - //System.out.println(" refill! dpMax=" + docPointerMax + " reader=" + docDeltasReader); |
| - assert docPointerMax != 0; |
| - docPointer = 0; |
| - |
| - if (freqsReader != null) { |
| - freqPointer++; |
| - // NOTE: this code is intentionally dup'd |
| - // (specialized) w/ the else clause, for better CPU |
| - // branch prediction (assuming compiler doesn't |
| - // de-dup): for codecs that always bulk read same |
| - // number of docDeltas & freqs (standard, for, |
| - // pfor), this if will always be true. Other codecs |
| - // (simple9/16) will not be aligned: |
| - if (freqPointer >= freqPointerMax) { |
| - freqPointerMax = freqsReader.fill(); |
| - assert freqPointerMax != 0; |
| - freqPointer = 0; |
| - } |
| - } |
| - } else if (freqsReader != null) { |
| - freqPointer++; |
| - if (freqPointer >= freqPointerMax) { |
| - freqPointerMax = freqsReader.fill(); |
| - assert freqPointerMax != 0; |
| - freqPointer = 0; |
| - } |
| - } |
| - |
| + fillDeltas(); |
| + fillFreq(); |
| doc += docDeltas[docPointer]; |
| } |
| return true; |
| @@ -159,11 +114,7 @@ |
| |
| @Override |
| public float freq() { |
| - if (freqsReader != null) { |
| - return freqs[freqPointer]; |
| - } else { |
| - return 1.0f; |
| - } |
| + return freqs[freqPointer]; |
| } |
| |
| /** |
| @@ -175,64 +126,25 @@ |
| */ |
| @Override |
| public int nextDoc() throws IOException { |
| - //System.out.println("ts.nextDoc " + this + " count=" + count + " vs docFreq=" + docFreq); |
| while(count < docFreq) { |
| - docPointer++; |
| - if (docPointer >= docPointerMax) { |
| - //System.out.println("ts.nd refill docs"); |
| - docPointerMax = docDeltasReader.fill(); |
| - assert docPointerMax != 0; |
| - docPointer = 0; |
| - if (freqsReader != null) { |
| - // NOTE: this code is intentionally dup'd |
| - // (specialized) w/ the else clause, for better CPU |
| - // branch prediction (assuming compiler doesn't |
| - // de-dup): for codecs that always bulk read same |
| - // number of docDeltas & freqs (standard, for, |
| - // pfor), this if will always be true. Other codecs |
| - // (simple9/16) will not be aligned: |
| - freqPointer++; |
| - if (freqPointer >= freqPointerMax) { |
| - //System.out.println("ts.nd refill freqs"); |
| - freqPointerMax = freqsReader.fill(); |
| - assert freqPointerMax != 0; |
| - freqPointer = 0; |
| - } |
| - } |
| - } else { |
| - if (freqsReader != null) { |
| - freqPointer++; |
| - if (freqPointer >= freqPointerMax) { |
| - //System.out.println("ts.nd refill freqs"); |
| - freqPointerMax = freqsReader.fill(); |
| - assert freqPointerMax != 0; |
| - freqPointer = 0; |
| - } |
| - } |
| - } |
| + fillDeltas(); |
| + fillFreq(); |
| count++; |
| doc += docDeltas[docPointer]; |
| first = false; |
| assert doc >= 0 && (skipDocs == null || doc < skipDocs.length()) && doc != NO_MORE_DOCS: "doc=" + doc + " skipDocs=" + skipDocs + " skipDocs.length=" + (skipDocs==null? "n/a" : skipDocs.length()); |
| if (skipDocs == null || !skipDocs.get(doc)) { |
| - //System.out.println(" ret doc=" + doc + " freq=" + freq()); |
| return doc; |
| } |
| } |
| |
| - //System.out.println(" end"); |
| return doc = NO_MORE_DOCS; |
| } |
| - |
| + |
| @Override |
| public float score() { |
| assert !first; |
| - final int freq; |
| - if (freqsReader == null) { |
| - freq = 1; |
| - } else { |
| - freq = freqs[freqPointer]; |
| - } |
| + final int freq = freqs[freqPointer]; |
| assert freq > 0; |
| assert doc != NO_MORE_DOCS; |
| float raw = // compute tf(f)*weight |
| @@ -253,7 +165,7 @@ |
| * @return the matching document or NO_MORE_DOCS if none exist. |
| */ |
| @Override |
| - public int advance(int target) throws IOException { |
| + public int advance(final int target) throws IOException { |
| |
| // nocommit: should we, here, optimize .advance(target that isn't |
| // too far away) into scan? seems like simple win? |
| @@ -264,11 +176,7 @@ |
| doc += docDeltas[docPointer]; |
| first = false; |
| count++; |
| - if (freqsReader != null && ++freqPointer >= freqPointerMax) { |
| - freqPointerMax = freqsReader.fill(); |
| - assert freqPointerMax != 0; |
| - freqPointer = 0; |
| - } |
| + fillFreq(); |
| if (doc >= target && (skipDocs == null || !skipDocs.get(doc))) { |
| return doc; |
| } |
| @@ -279,20 +187,13 @@ |
| } |
| |
| // not found in current block, seek underlying stream |
| - BulkPostingsEnum.JumpResult jumpResult; |
| + final BulkPostingsEnum.JumpResult jumpResult; |
| if (target - doc > docDeltas.length && // avoid useless jumps |
| (jumpResult = docsEnum.jump(target, count)) != null) { |
| count = jumpResult.count; |
| doc = jumpResult.docID; |
| first = false; |
| - docPointer = docDeltasReader.offset(); |
| - docPointerMax = docDeltasReader.end(); |
| - docPointer--; |
| - if (freqsReader != null) { |
| - freqPointer = freqsReader.offset(); |
| - freqPointerMax = freqsReader.end(); |
| - freqPointer--; |
| - } |
| + reset(); |
| } else { |
| // seek did not jump -- just fill next buffer |
| docPointerMax = docDeltasReader.fill(); |
| @@ -305,14 +206,14 @@ |
| } else { |
| return doc = NO_MORE_DOCS; |
| } |
| - if (freqsReader != null && ++freqPointer >= freqPointerMax) { |
| - freqPointerMax = freqsReader.fill(); |
| - assert freqPointerMax != 0; |
| - freqPointer = 0; |
| - } |
| + fillFreq(); |
| } |
| |
| - // now scan |
| + // now scan -- let the compiler inline this |
| + return scan(target); |
| + } |
| + |
| + private int scan(final int target) throws IOException { |
| while(true) { |
| assert doc >= 0 && doc != NO_MORE_DOCS; |
| if (doc >= target && (skipDocs == null || !skipDocs.get(doc))) { |
| @@ -332,12 +233,7 @@ |
| } |
| } |
| |
| - if (freqsReader != null && ++freqPointer >= freqPointerMax) { |
| - freqPointerMax = freqsReader.fill(); |
| - assert freqPointerMax != 0; |
| - freqPointer = 0; |
| - } |
| - |
| + fillFreq(); |
| assert first || docDeltas[docPointer] > 0; |
| doc += docDeltas[docPointer]; |
| count++; |
| @@ -348,5 +244,29 @@ |
| /** Returns a string representation of this <code>TermScorer</code>. */ |
| @Override |
| public String toString() { return "scorer(" + weight + ")"; } |
| - |
| + |
| + private final void fillFreq() throws IOException { |
| + if (++freqPointer >= freqPointerMax) { |
| + freqPointerMax = freqsReader.fill(); |
| + assert freqPointerMax != 0; |
| + freqPointer = 0; |
| + } |
| + } |
| + |
| + private void fillDeltas() throws IOException { |
| + if (++docPointer >= docPointerMax) { |
| + docPointerMax = docDeltasReader.fill(); |
| + assert docPointerMax != 0; |
| + docPointer = 0; |
| + } |
| + } |
| + |
| + private final void reset() throws IOException { |
| + docPointer = docDeltasReader.offset(); |
| + docPointerMax = docDeltasReader.end(); |
| + freqPointer = freqsReader.offset(); |
| + freqPointerMax = freqsReader.end(); |
| + --docPointer; |
| + --freqPointer; |
| + } |
| } |