docs/attachments/LUCENE-2723/LUCENE-2723-termscorer.patch - lucene-jira-archive - Git at Google

 Index: lucene/src/java/org/apache/lucene/search/MatchOnlyTermScorer.java
 ===================================================================
 --- lucene/src/java/org/apache/lucene/search/MatchOnlyTermScorer.java	(revision 0)
 +++ lucene/src/java/org/apache/lucene/search/MatchOnlyTermScorer.java	(revision 0)
 @@ -0,0 +1,244 @@
 +package org.apache.lucene.search;
 +
 +/**
 + * Licensed to the Apache Software Foundation (ASF) under one or more
 + * contributor license agreements.  See the NOTICE file distributed with
 + * this work for additional information regarding copyright ownership.
 + * The ASF licenses this file to You under the Apache License, Version 2.0
 + * (the "License"); you may not use this file except in compliance with
 + * the License.  You may obtain a copy of the License at
 + *
 + *     http://www.apache.org/licenses/LICENSE-2.0
 + *
 + * Unless required by applicable law or agreed to in writing, software
 + * distributed under the License is distributed on an "AS IS" BASIS,
 + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 + * See the License for the specific language governing permissions and
 + * limitations under the License.
 + */
 +
 +import java.io.IOException;
 +
 +import org.apache.lucene.index.BulkPostingsEnum;
 +import org.apache.lucene.index.BulkPostingsEnum.BlockReader;
 +import org.apache.lucene.util.Bits;
 +
 +/** Expert: A <code>Scorer</code> for documents matching a <code>Term</code>.
 + * This scorer only makes sense for the omitTF=true case
 + */
 +final class MatchOnlyTermScorer extends Scorer {
 +  private final BulkPostingsEnum docsEnum;
 +  private final byte[] norms;
 +  private int doc;
 +
 +  private final int[] docDeltas;
 +  private int docPointer;
 +  private int docPointerMax;
 +  private boolean first = true;
 +
 +  private final float rawScore;
 +  private final BlockReader docDeltasReader;
 +  private final Bits skipDocs;
 +  private final int docFreq;
 +  private int count;
 +
 +  /**
 +   * Construct a <code>TermScorer</code>.
 +   *
 +   * @param weight
 +   *          The weight of the <code>Term</code> in the query.
 +   * @param td
 +   *          An iterator over the documents matching the <code>Term</code>.
 +   * @param similarity
 +   *          The </code>Similarity</code> implementation to be used for score
 +   *          computations.
 +   * @param norms
 +   *          The field norms of the document fields for the <code>Term</code>.
 +   */
 +  MatchOnlyTermScorer(Weight weight, BulkPostingsEnum td, BlockReader docDeltasReader, int docFreq, Bits skipDocs, Similarity similarity, byte[] norms) throws IOException {
 +    super(similarity, weight);
 +
 +    assert td.getFreqsReader() == null;
 +
 +    this.docsEnum = td;
 +    this.docFreq = docFreq;
 +    this.docDeltasReader = docDeltasReader;
 +    docDeltas = docDeltasReader.getBuffer();
 +    reset();
 +
 +    this.skipDocs = skipDocs;
 +    this.norms = norms;
 +    rawScore = getSimilarity().tf(1f) * weight.getValue();
 +  }
 +
 +  @Override
 +  public void score(Collector c) throws IOException {
 +    score(c, Integer.MAX_VALUE, nextDoc());
 +  }
 +
 +  // firstDocID is ignored since nextDoc() sets 'doc'
 +  @Override
 +  protected boolean score(Collector c, int end, int firstDocID) throws IOException {
 +    c.setScorer(this);
 +    // nocommit -- this can leave scorer on a deleted doc...
 +    while (doc < end) {                           // for docs in window
 +      if (skipDocs == null || !skipDocs.get(doc)) {
 +        c.collect(doc);                      // collect
 +      }
 +      if (count == docFreq) {
 +        doc = NO_MORE_DOCS;
 +        return false;
 +      }
 +      count++;
 +      fillDocDeltas();
 +      doc += docDeltas[docPointer];
 +    }
 +    return true;
 +  }
 +
 +
 +
 +  @Override
 +  public int docID() {
 +    return first ? -1 : doc;
 +  }
 +
 +  @Override
 +  public float freq() {
 +    return 1.0f;
 +  }
 +
 +  /**
 +   * Advances to the next document matching the query. <br>
 +   * The iterator over the matching documents is buffered using
 +   * {@link TermDocs#read(int[],int[])}.
 +   *
 +   * @return the document matching the query or NO_MORE_DOCS if there are no more documents.
 +   */
 +  @Override
 +  public int nextDoc() throws IOException {
 +    while(count < docFreq) {
 +      fillDocDeltas();
 +      count++;
 +      doc += docDeltas[docPointer];
 +      first = false;
 +      assert doc >= 0 && (skipDocs == null || doc < skipDocs.length()) && doc != NO_MORE_DOCS: "doc=" + doc + " skipDocs=" + skipDocs + " skipDocs.length=" + (skipDocs==null? "n/a" : skipDocs.length());
 +      if (skipDocs == null || !skipDocs.get(doc)) {
 +        return doc;
 +      }
 +    }
 +
 +    return doc = NO_MORE_DOCS;
 +  }
 +
 +  @Override
 +  public float score() {
 +    assert !first;
 +    assert doc != NO_MORE_DOCS;
 +
 +    return norms == null ? rawScore : rawScore * getSimilarity().decodeNormValue(norms[doc]); // normalize for field
 +  }
 +
 +  /**
 +   * Advances to the first match beyond the current whose document number is
 +   * greater than or equal to a given target. <br>
 +   * The implementation uses {@link DocsEnum#advance(int)}.
 +   *
 +   * @param target
 +   *          The target document number.
 +   * @return the matching document or NO_MORE_DOCS if none exist.
 +   */
 +  @Override
 +  public int advance(final int target) throws IOException {
 +
 +    // nocommit: should we, here, optimize .advance(target that isn't
 +    // too far away) into scan?  seems like simple win?
 +
 +    // first scan current doc deltas block
 +    for (docPointer++; docPointer < docPointerMax && count < docFreq; docPointer++) {
 +      assert first || docDeltas[docPointer] > 0;
 +      doc += docDeltas[docPointer];
 +      first = false;
 +      count++;
 +
 +      if (doc >= target && (skipDocs == null || !skipDocs.get(doc))) {
 +        return doc;
 +      }
 +    }
 +
 +    if (count == docFreq) {
 +      return doc = NO_MORE_DOCS;
 +    }
 +
 +    // not found in current block, seek underlying stream
 +    final BulkPostingsEnum.JumpResult jumpResult;
 +    if (target - doc > docDeltas.length && // avoid useless jumps
 +        (jumpResult = docsEnum.jump(target, count)) != null) {
 +      count = jumpResult.count;
 +      doc = jumpResult.docID;
 +      first = false;
 +      reset();
 +    } else {
 +      // seek did not jump -- just fill next buffer
 +      docPointerMax = docDeltasReader.fill();
 +      if (docPointerMax != 0) {
 +        docPointer = 0;
 +        assert first || docDeltas[0] > 0;
 +        doc += docDeltas[0];
 +        count++;
 +        first = false;
 +      } else {
 +        return doc = NO_MORE_DOCS;
 +      }
 +    }
 +
 +    // now scan
 +    return scan(target);
 +  }
 +
 +  private int scan(final int target) throws IOException {
 +    while(true) {
 +      assert doc >= 0 && doc != NO_MORE_DOCS;
 +      if (doc >= target && (skipDocs == null || !skipDocs.get(doc))) {
 +        return doc;
 +      }
 +
 +      if (count >= docFreq) {
 +        break;
 +      }
 +
 +      if (++docPointer >= docPointerMax) {
 +        docPointerMax = docDeltasReader.fill();
 +        if (docPointerMax != 0) {
 +          docPointer = 0;
 +        } else {
 +          return doc = NO_MORE_DOCS;
 +        }
 +      }
 +
 +      assert first || docDeltas[docPointer] > 0;
 +      doc += docDeltas[docPointer];
 +      count++;
 +    }
 +    return doc = NO_MORE_DOCS;
 +  }
 +
 +  private void fillDocDeltas() throws IOException {
 +    if (++docPointer >= docPointerMax) {
 +      docPointerMax = docDeltasReader.fill();
 +      assert docPointerMax != 0;
 +      docPointer = 0;
 +    }
 +  }
 +
 +  private void reset() throws IOException {
 +    docPointerMax = docDeltasReader.end();
 +    docPointer = docDeltasReader.offset();
 +    docPointer--;
 +  }
 +
 +  /** Returns a string representation of this <code>TermScorer</code>. */
 +  @Override
 +  public String toString() { return "scorer(" + weight + ")"; }
 +
 +}

 Property changes on: lucene/src/java/org/apache/lucene/search/MatchOnlyTermScorer.java
 ___________________________________________________________________
 Added: svn:eol-style
    + native
 Added: svn:keywords
    + Date Author Id Revision HeadURL

 Index: lucene/src/java/org/apache/lucene/search/TermQuery.java
 ===================================================================
 --- lucene/src/java/org/apache/lucene/search/TermQuery.java	(revision 1049503)
 +++ lucene/src/java/org/apache/lucene/search/TermQuery.java	(working copy)
 @@ -24,6 +24,7 @@
  import org.apache.lucene.index.BulkPostingsEnum;
  import org.apache.lucene.index.Term;
  import org.apache.lucene.index.IndexReader;
 +import org.apache.lucene.index.BulkPostingsEnum.BlockReader;
  import org.apache.lucene.search.Explanation.IDFExplanation;
  import org.apache.lucene.util.ToStringUtils;

 @@ -85,10 +86,17 @@
        if (docs == null) {
          return null;
        }
 -
        // nocommit: we need this docfreq from TermState, MTQ knows it... but tosses it away.
 -      return new TermScorer(this, docs, reader.docFreq(term.field(), term.bytes()),
 -                            reader.getDeletedDocs(), similarity, reader.norms(term.field()));
 +      final int docFreq = reader.docFreq(term.field(), term.bytes());
 +      final BlockReader docDeltas = docs.getDocDeltasReader();
 +      final BlockReader frequencies = docs.getFreqsReader();
 +      if (frequencies == null) {
 +        return new MatchOnlyTermScorer(this, docs, docDeltas, docFreq,
 +            reader.getDeletedDocs(), similarity, reader.norms(term.field()));
 +      } else {
 +        return new TermScorer(this, docs, docDeltas, frequencies, docFreq,
 +            reader.getDeletedDocs(), similarity, reader.norms(term.field()));
 +      }
      }

      @Override
 Index: lucene/src/java/org/apache/lucene/search/TermScorer.java
 ===================================================================
 --- lucene/src/java/org/apache/lucene/search/TermScorer.java	(revision 1049503)
 +++ lucene/src/java/org/apache/lucene/search/TermScorer.java	(working copy)
 @@ -20,6 +20,7 @@
  import java.io.IOException;

  import org.apache.lucene.index.BulkPostingsEnum;
 +import org.apache.lucene.index.BulkPostingsEnum.BlockReader;
  import org.apache.lucene.util.Bits;

  // nocommit -- break out aligned & not cases?
 @@ -28,9 +29,9 @@
  /** Expert: A <code>Scorer</code> for documents matching a <code>Term</code>.
   */
  final class TermScorer extends Scorer {
 -  private BulkPostingsEnum docsEnum;
 -  private byte[] norms;
 -  private float weightValue;
 +  private final BulkPostingsEnum docsEnum;
 +  private final byte[] norms;
 +  private final float weightValue;
    private int doc;

    private final int[] docDeltas;
 @@ -43,9 +44,9 @@
    private int freqPointerMax;

    private static final int SCORE_CACHE_SIZE = 32;
 -  private float[] scoreCache = new float[SCORE_CACHE_SIZE];
 -  private final BulkPostingsEnum.BlockReader freqsReader;
 -  private final BulkPostingsEnum.BlockReader docDeltasReader;
 +  private final float[] scoreCache = new float[SCORE_CACHE_SIZE];
 +  private final BlockReader freqsReader;
 +  private final BlockReader docDeltasReader;
    private final Bits skipDocs;
    private final int docFreq;
    private int count;
 @@ -63,27 +64,15 @@
     * @param norms
     *          The field norms of the document fields for the <code>Term</code>.
     */
 -  TermScorer(Weight weight, BulkPostingsEnum td, int docFreq, Bits skipDocs, Similarity similarity, byte[] norms) throws IOException {
 +  TermScorer(Weight weight, BulkPostingsEnum td, BlockReader docDeltaReader, BlockReader freqReader, int docFreq, Bits skipDocs, Similarity similarity, byte[] norms) throws IOException {
      super(similarity, weight);
 -
      this.docsEnum = td;
      this.docFreq = docFreq;
 -    docDeltasReader = td.getDocDeltasReader();
 +    this.docDeltasReader = docDeltaReader;
      docDeltas = docDeltasReader.getBuffer();
 -    docPointerMax = docDeltasReader.end();
 -    docPointer = docDeltasReader.offset();
 -    docPointer--;
 -
 -    freqsReader = td.getFreqsReader();
 -    if (freqsReader != null) {
 -      freqs = freqsReader.getBuffer();
 -      freqPointerMax = freqsReader.end();
 -      freqPointer = freqsReader.offset();
 -      freqPointer--;
 -    } else {
 -      freqs = null;
 -    }
 -
 +    this.freqsReader = freqReader;
 +    freqs = freqsReader.getBuffer();
 +    reset();
      this.skipDocs = skipDocs;
      this.norms = norms;
      this.weightValue = weight.getValue();
 @@ -101,11 +90,9 @@
    @Override
    protected boolean score(Collector c, int end, int firstDocID) throws IOException {
      c.setScorer(this);
 -    //System.out.println("ts.collect firstdocID=" + firstDocID + " term=" + term + " end=" + end + " doc=" + doc);
      // nocommit -- this can leave scorer on a deleted doc...
      while (doc < end) {                           // for docs in window
        if (skipDocs == null || !skipDocs.get(doc)) {
 -        //System.out.println("ts.collect doc=" + doc + " skipDocs=" + skipDocs + " count=" + count + " vs dF=" + docFreq);
          c.collect(doc);                      // collect
        }
        if (count == docFreq) {
 @@ -113,40 +100,8 @@
          return false;
        }
        count++;
 -      docPointer++;
 -
 -      //System.out.println("dp=" + docPointer + " dpMax=" + docPointerMax + " count=" + count + " countMax=" + docFreq);
 -
 -      if (docPointer >= docPointerMax) {
 -        docPointerMax = docDeltasReader.fill();
 -        //System.out.println("    refill!  dpMax=" + docPointerMax + " reader=" + docDeltasReader);
 -        assert docPointerMax != 0;
 -        docPointer = 0;
 -
 -        if (freqsReader != null) {
 -          freqPointer++;
 -          // NOTE: this code is intentionally dup'd
 -          // (specialized) w/ the else clause, for better CPU
 -          // branch prediction (assuming compiler doesn't
 -          // de-dup): for codecs that always bulk read same
 -          // number of docDeltas & freqs (standard, for,
 -          // pfor), this if will always be true.  Other codecs
 -          // (simple9/16) will not be aligned:
 -          if (freqPointer >= freqPointerMax) {
 -            freqPointerMax = freqsReader.fill();
 -            assert freqPointerMax != 0;
 -            freqPointer = 0;
 -          }
 -        }
 -      } else if (freqsReader != null) {
 -        freqPointer++;
 -        if (freqPointer >= freqPointerMax) {
 -          freqPointerMax = freqsReader.fill();
 -          assert freqPointerMax != 0;
 -          freqPointer = 0;
 -        }
 -      }
 -
 +      fillDeltas();
 +      fillFreq();
        doc += docDeltas[docPointer];
      }
      return true;
 @@ -159,11 +114,7 @@

    @Override
    public float freq() {
 -    if (freqsReader != null) {
 -      return freqs[freqPointer];
 -    } else {
 -      return 1.0f;
 -    }
 +    return freqs[freqPointer];
    }

    /**
 @@ -175,64 +126,25 @@
     */
    @Override
    public int nextDoc() throws IOException {
 -    //System.out.println("ts.nextDoc " + this + " count=" + count + " vs docFreq=" + docFreq);
      while(count < docFreq) {
 -      docPointer++;
 -      if (docPointer >= docPointerMax) {
 -        //System.out.println("ts.nd refill docs");
 -        docPointerMax = docDeltasReader.fill();
 -        assert docPointerMax != 0;
 -        docPointer = 0;
 -        if (freqsReader != null) {
 -          // NOTE: this code is intentionally dup'd
 -          // (specialized) w/ the else clause, for better CPU
 -          // branch prediction (assuming compiler doesn't
 -          // de-dup): for codecs that always bulk read same
 -          // number of docDeltas & freqs (standard, for,
 -          // pfor), this if will always be true.  Other codecs
 -          // (simple9/16) will not be aligned:
 -          freqPointer++;
 -          if (freqPointer >= freqPointerMax) {
 -            //System.out.println("ts.nd refill freqs");
 -            freqPointerMax = freqsReader.fill();
 -            assert freqPointerMax != 0;
 -            freqPointer = 0;
 -          }
 -        }
 -      } else {
 -        if (freqsReader != null) {
 -          freqPointer++;
 -          if (freqPointer >= freqPointerMax) {
 -            //System.out.println("ts.nd refill freqs");
 -            freqPointerMax = freqsReader.fill();
 -            assert freqPointerMax != 0;
 -            freqPointer = 0;
 -          }
 -        }
 -      }
 +      fillDeltas();
 +      fillFreq();
        count++;
        doc += docDeltas[docPointer];
        first = false;
        assert doc >= 0 && (skipDocs == null || doc < skipDocs.length()) && doc != NO_MORE_DOCS: "doc=" + doc + " skipDocs=" + skipDocs + " skipDocs.length=" + (skipDocs==null? "n/a" : skipDocs.length());
        if (skipDocs == null || !skipDocs.get(doc)) {
 -        //System.out.println("  ret doc=" + doc + " freq=" + freq());
          return doc;
        }
      }

 -    //System.out.println("  end");
      return doc = NO_MORE_DOCS;
    }
 -
 +
    @Override
    public float score() {
      assert !first;
 -    final int freq;
 -    if (freqsReader == null) {
 -      freq = 1;
 -    } else {
 -      freq = freqs[freqPointer];
 -    }
 +    final int freq = freqs[freqPointer];
      assert freq > 0;
      assert doc != NO_MORE_DOCS;
      float raw =                                   // compute tf(f)*weight
 @@ -253,7 +165,7 @@
     * @return the matching document or NO_MORE_DOCS if none exist.
     */
    @Override
 -  public int advance(int target) throws IOException {
 +  public int advance(final int target) throws IOException {

      // nocommit: should we, here, optimize .advance(target that isn't
      // too far away) into scan?  seems like simple win?
 @@ -264,11 +176,7 @@
        doc += docDeltas[docPointer];
        first = false;
        count++;
 -      if (freqsReader != null && ++freqPointer >= freqPointerMax) {
 -        freqPointerMax = freqsReader.fill();
 -        assert freqPointerMax != 0;
 -        freqPointer = 0;
 -      }
 +      fillFreq();
        if (doc >= target && (skipDocs == null || !skipDocs.get(doc))) {
          return doc;
        }
 @@ -279,20 +187,13 @@
      }

      // not found in current block, seek underlying stream
 -    BulkPostingsEnum.JumpResult jumpResult;
 +    final BulkPostingsEnum.JumpResult jumpResult;
      if (target - doc > docDeltas.length && // avoid useless jumps
          (jumpResult = docsEnum.jump(target, count)) != null) {
        count = jumpResult.count;
        doc = jumpResult.docID;
        first = false;
 -      docPointer = docDeltasReader.offset();
 -      docPointerMax = docDeltasReader.end();
 -      docPointer--;
 -      if (freqsReader != null) {
 -        freqPointer = freqsReader.offset();
 -        freqPointerMax = freqsReader.end();
 -        freqPointer--;
 -      }
 +      reset();
      } else {
        // seek did not jump -- just fill next buffer
        docPointerMax = docDeltasReader.fill();
 @@ -305,14 +206,14 @@
        } else {
          return doc = NO_MORE_DOCS;
        }
 -      if (freqsReader != null && ++freqPointer >= freqPointerMax) {
 -        freqPointerMax = freqsReader.fill();
 -        assert freqPointerMax != 0;
 -        freqPointer = 0;
 -      }
 +     fillFreq();
      }

 -    // now scan
 +    // now scan -- let the compiler inline this
 +    return scan(target);
 +  }
 +
 +  private int scan(final int target) throws IOException {
      while(true) {
        assert doc >= 0 && doc != NO_MORE_DOCS;
        if (doc >= target && (skipDocs == null || !skipDocs.get(doc))) {
 @@ -332,12 +233,7 @@
          }
        }

 -      if (freqsReader != null && ++freqPointer >= freqPointerMax) {
 -        freqPointerMax = freqsReader.fill();
 -        assert freqPointerMax != 0;
 -        freqPointer = 0;
 -      }
 -
 +      fillFreq();
        assert first || docDeltas[docPointer] > 0;
        doc += docDeltas[docPointer];
        count++;
 @@ -348,5 +244,29 @@
    /** Returns a string representation of this <code>TermScorer</code>. */
    @Override
    public String toString() { return "scorer(" + weight + ")"; }
 -
 +
 +  private final void fillFreq() throws IOException {
 +    if (++freqPointer >= freqPointerMax) {
 +      freqPointerMax = freqsReader.fill();
 +      assert freqPointerMax != 0;
 +      freqPointer = 0;
 +    }
 +  }
 +
 +  private void fillDeltas() throws IOException {
 +    if (++docPointer >= docPointerMax) {
 +      docPointerMax = docDeltasReader.fill();
 +      assert docPointerMax != 0;
 +      docPointer = 0;
 +    }
 +  }
 +
 +  private final void reset() throws IOException {
 +    docPointer = docDeltasReader.offset();
 +    docPointerMax = docDeltasReader.end();
 +    freqPointer = freqsReader.offset();
 +    freqPointerMax = freqsReader.end();
 +    --docPointer;
 +    --freqPointer;
 +  }
  }
	Index: lucene/src/java/org/apache/lucene/search/MatchOnlyTermScorer.java
	===================================================================
	--- lucene/src/java/org/apache/lucene/search/MatchOnlyTermScorer.java (revision 0)
	+++ lucene/src/java/org/apache/lucene/search/MatchOnlyTermScorer.java (revision 0)
	@@ -0,0 +1,244 @@
	+package org.apache.lucene.search;
	+
	+/**
	+ * Licensed to the Apache Software Foundation (ASF) under one or more
	+ * contributor license agreements. See the NOTICE file distributed with
	+ * this work for additional information regarding copyright ownership.
	+ * The ASF licenses this file to You under the Apache License, Version 2.0
	+ * (the "License"); you may not use this file except in compliance with
	+ * the License. You may obtain a copy of the License at
	+ *
	+ * http://www.apache.org/licenses/LICENSE-2.0
	+ *
	+ * Unless required by applicable law or agreed to in writing, software
	+ * distributed under the License is distributed on an "AS IS" BASIS,
	+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	+ * See the License for the specific language governing permissions and
	+ * limitations under the License.
	+ */
	+
	+import java.io.IOException;
	+
	+import org.apache.lucene.index.BulkPostingsEnum;
	+import org.apache.lucene.index.BulkPostingsEnum.BlockReader;
	+import org.apache.lucene.util.Bits;
	+
	+/** Expert: A <code>Scorer</code> for documents matching a <code>Term</code>.
	+ * This scorer only makes sense for the omitTF=true case
	+ */
	+final class MatchOnlyTermScorer extends Scorer {
	+ private final BulkPostingsEnum docsEnum;
	+ private final byte[] norms;
	+ private int doc;
	+
	+ private final int[] docDeltas;
	+ private int docPointer;
	+ private int docPointerMax;
	+ private boolean first = true;
	+
	+ private final float rawScore;
	+ private final BlockReader docDeltasReader;
	+ private final Bits skipDocs;
	+ private final int docFreq;
	+ private int count;
	+
	+ /**
	+ * Construct a <code>TermScorer</code>.
	+ *
	+ * @param weight
	+ * The weight of the <code>Term</code> in the query.
	+ * @param td
	+ * An iterator over the documents matching the <code>Term</code>.
	+ * @param similarity
	+ * The </code>Similarity</code> implementation to be used for score
	+ * computations.
	+ * @param norms
	+ * The field norms of the document fields for the <code>Term</code>.
	+ */
	+ MatchOnlyTermScorer(Weight weight, BulkPostingsEnum td, BlockReader docDeltasReader, int docFreq, Bits skipDocs, Similarity similarity, byte[] norms) throws IOException {
	+ super(similarity, weight);
	+
	+ assert td.getFreqsReader() == null;
	+
	+ this.docsEnum = td;
	+ this.docFreq = docFreq;
	+ this.docDeltasReader = docDeltasReader;
	+ docDeltas = docDeltasReader.getBuffer();
	+ reset();
	+
	+ this.skipDocs = skipDocs;
	+ this.norms = norms;
	+ rawScore = getSimilarity().tf(1f) * weight.getValue();
	+ }
	+
	+ @Override
	+ public void score(Collector c) throws IOException {
	+ score(c, Integer.MAX_VALUE, nextDoc());
	+ }
	+
	+ // firstDocID is ignored since nextDoc() sets 'doc'
	+ @Override
	+ protected boolean score(Collector c, int end, int firstDocID) throws IOException {
	+ c.setScorer(this);
	+ // nocommit -- this can leave scorer on a deleted doc...
	+ while (doc < end) { // for docs in window
	+ if (skipDocs == null \|\| !skipDocs.get(doc)) {
	+ c.collect(doc); // collect
	+ }
	+ if (count == docFreq) {
	+ doc = NO_MORE_DOCS;
	+ return false;
	+ }
	+ count++;
	+ fillDocDeltas();
	+ doc += docDeltas[docPointer];
	+ }
	+ return true;
	+ }
	+
	+
	+
	+ @Override
	+ public int docID() {
	+ return first ? -1 : doc;
	+ }
	+
	+ @Override
	+ public float freq() {
	+ return 1.0f;
	+ }
	+
	+ /**
	+ * Advances to the next document matching the query. <br>
	+ * The iterator over the matching documents is buffered using
	+ * {@link TermDocs#read(int[],int[])}.
	+ *
	+ * @return the document matching the query or NO_MORE_DOCS if there are no more documents.
	+ */
	+ @Override
	+ public int nextDoc() throws IOException {
	+ while(count < docFreq) {
	+ fillDocDeltas();
	+ count++;
	+ doc += docDeltas[docPointer];
	+ first = false;
	+ assert doc >= 0 && (skipDocs == null \|\| doc < skipDocs.length()) && doc != NO_MORE_DOCS: "doc=" + doc + " skipDocs=" + skipDocs + " skipDocs.length=" + (skipDocs==null? "n/a" : skipDocs.length());
	+ if (skipDocs == null \|\| !skipDocs.get(doc)) {
	+ return doc;
	+ }
	+ }
	+
	+ return doc = NO_MORE_DOCS;
	+ }
	+
	+ @Override
	+ public float score() {
	+ assert !first;
	+ assert doc != NO_MORE_DOCS;
	+
	+ return norms == null ? rawScore : rawScore * getSimilarity().decodeNormValue(norms[doc]); // normalize for field
	+ }
	+
	+ /**
	+ * Advances to the first match beyond the current whose document number is
	+ * greater than or equal to a given target. <br>
	+ * The implementation uses {@link DocsEnum#advance(int)}.
	+ *
	+ * @param target
	+ * The target document number.
	+ * @return the matching document or NO_MORE_DOCS if none exist.
	+ */
	+ @Override
	+ public int advance(final int target) throws IOException {
	+
	+ // nocommit: should we, here, optimize .advance(target that isn't
	+ // too far away) into scan? seems like simple win?
	+
	+ // first scan current doc deltas block
	+ for (docPointer++; docPointer < docPointerMax && count < docFreq; docPointer++) {
	+ assert first \|\| docDeltas[docPointer] > 0;
	+ doc += docDeltas[docPointer];
	+ first = false;
	+ count++;
	+
	+ if (doc >= target && (skipDocs == null \|\| !skipDocs.get(doc))) {
	+ return doc;
	+ }
	+ }
	+
	+ if (count == docFreq) {
	+ return doc = NO_MORE_DOCS;
	+ }
	+
	+ // not found in current block, seek underlying stream
	+ final BulkPostingsEnum.JumpResult jumpResult;
	+ if (target - doc > docDeltas.length && // avoid useless jumps
	+ (jumpResult = docsEnum.jump(target, count)) != null) {
	+ count = jumpResult.count;
	+ doc = jumpResult.docID;
	+ first = false;
	+ reset();
	+ } else {
	+ // seek did not jump -- just fill next buffer
	+ docPointerMax = docDeltasReader.fill();
	+ if (docPointerMax != 0) {
	+ docPointer = 0;
	+ assert first \|\| docDeltas[0] > 0;
	+ doc += docDeltas[0];
	+ count++;
	+ first = false;
	+ } else {
	+ return doc = NO_MORE_DOCS;
	+ }
	+ }
	+
	+ // now scan
	+ return scan(target);
	+ }
	+
	+ private int scan(final int target) throws IOException {
	+ while(true) {
	+ assert doc >= 0 && doc != NO_MORE_DOCS;
	+ if (doc >= target && (skipDocs == null \|\| !skipDocs.get(doc))) {
	+ return doc;
	+ }
	+
	+ if (count >= docFreq) {
	+ break;
	+ }
	+
	+ if (++docPointer >= docPointerMax) {
	+ docPointerMax = docDeltasReader.fill();
	+ if (docPointerMax != 0) {
	+ docPointer = 0;
	+ } else {
	+ return doc = NO_MORE_DOCS;
	+ }
	+ }
	+
	+ assert first \|\| docDeltas[docPointer] > 0;
	+ doc += docDeltas[docPointer];
	+ count++;
	+ }
	+ return doc = NO_MORE_DOCS;
	+ }
	+
	+ private void fillDocDeltas() throws IOException {
	+ if (++docPointer >= docPointerMax) {
	+ docPointerMax = docDeltasReader.fill();
	+ assert docPointerMax != 0;
	+ docPointer = 0;
	+ }
	+ }
	+
	+ private void reset() throws IOException {
	+ docPointerMax = docDeltasReader.end();
	+ docPointer = docDeltasReader.offset();
	+ docPointer--;
	+ }
	+
	+ /** Returns a string representation of this <code>TermScorer</code>. */
	+ @Override
	+ public String toString() { return "scorer(" + weight + ")"; }
	+
	+}

	Property changes on: lucene/src/java/org/apache/lucene/search/MatchOnlyTermScorer.java
	___________________________________________________________________
	Added: svn:eol-style
	+ native
	Added: svn:keywords
	+ Date Author Id Revision HeadURL

	Index: lucene/src/java/org/apache/lucene/search/TermQuery.java
	===================================================================
	--- lucene/src/java/org/apache/lucene/search/TermQuery.java (revision 1049503)
	+++ lucene/src/java/org/apache/lucene/search/TermQuery.java (working copy)
	@@ -24,6 +24,7 @@
	import org.apache.lucene.index.BulkPostingsEnum;
	import org.apache.lucene.index.Term;
	import org.apache.lucene.index.IndexReader;
	+import org.apache.lucene.index.BulkPostingsEnum.BlockReader;
	import org.apache.lucene.search.Explanation.IDFExplanation;
	import org.apache.lucene.util.ToStringUtils;

	@@ -85,10 +86,17 @@
	if (docs == null) {
	return null;
	}
	-
	// nocommit: we need this docfreq from TermState, MTQ knows it... but tosses it away.
	- return new TermScorer(this, docs, reader.docFreq(term.field(), term.bytes()),
	- reader.getDeletedDocs(), similarity, reader.norms(term.field()));
	+ final int docFreq = reader.docFreq(term.field(), term.bytes());
	+ final BlockReader docDeltas = docs.getDocDeltasReader();
	+ final BlockReader frequencies = docs.getFreqsReader();
	+ if (frequencies == null) {
	+ return new MatchOnlyTermScorer(this, docs, docDeltas, docFreq,
	+ reader.getDeletedDocs(), similarity, reader.norms(term.field()));
	+ } else {
	+ return new TermScorer(this, docs, docDeltas, frequencies, docFreq,
	+ reader.getDeletedDocs(), similarity, reader.norms(term.field()));
	+ }
	}

	@Override
	Index: lucene/src/java/org/apache/lucene/search/TermScorer.java
	===================================================================
	--- lucene/src/java/org/apache/lucene/search/TermScorer.java (revision 1049503)
	+++ lucene/src/java/org/apache/lucene/search/TermScorer.java (working copy)
	@@ -20,6 +20,7 @@
	import java.io.IOException;

	import org.apache.lucene.index.BulkPostingsEnum;
	+import org.apache.lucene.index.BulkPostingsEnum.BlockReader;
	import org.apache.lucene.util.Bits;

	// nocommit -- break out aligned & not cases?
	@@ -28,9 +29,9 @@
	/** Expert: A <code>Scorer</code> for documents matching a <code>Term</code>.
	*/
	final class TermScorer extends Scorer {
	- private BulkPostingsEnum docsEnum;
	- private byte[] norms;
	- private float weightValue;
	+ private final BulkPostingsEnum docsEnum;
	+ private final byte[] norms;
	+ private final float weightValue;
	private int doc;

	private final int[] docDeltas;
	@@ -43,9 +44,9 @@
	private int freqPointerMax;

	private static final int SCORE_CACHE_SIZE = 32;
	- private float[] scoreCache = new float[SCORE_CACHE_SIZE];
	- private final BulkPostingsEnum.BlockReader freqsReader;
	- private final BulkPostingsEnum.BlockReader docDeltasReader;
	+ private final float[] scoreCache = new float[SCORE_CACHE_SIZE];
	+ private final BlockReader freqsReader;
	+ private final BlockReader docDeltasReader;
	private final Bits skipDocs;
	private final int docFreq;
	private int count;
	@@ -63,27 +64,15 @@
	* @param norms
	* The field norms of the document fields for the <code>Term</code>.
	*/
	- TermScorer(Weight weight, BulkPostingsEnum td, int docFreq, Bits skipDocs, Similarity similarity, byte[] norms) throws IOException {
	+ TermScorer(Weight weight, BulkPostingsEnum td, BlockReader docDeltaReader, BlockReader freqReader, int docFreq, Bits skipDocs, Similarity similarity, byte[] norms) throws IOException {
	super(similarity, weight);
	-
	this.docsEnum = td;
	this.docFreq = docFreq;
	- docDeltasReader = td.getDocDeltasReader();
	+ this.docDeltasReader = docDeltaReader;
	docDeltas = docDeltasReader.getBuffer();
	- docPointerMax = docDeltasReader.end();
	- docPointer = docDeltasReader.offset();
	- docPointer--;
	-
	- freqsReader = td.getFreqsReader();
	- if (freqsReader != null) {
	- freqs = freqsReader.getBuffer();
	- freqPointerMax = freqsReader.end();
	- freqPointer = freqsReader.offset();
	- freqPointer--;
	- } else {
	- freqs = null;
	- }
	-
	+ this.freqsReader = freqReader;
	+ freqs = freqsReader.getBuffer();
	+ reset();
	this.skipDocs = skipDocs;
	this.norms = norms;
	this.weightValue = weight.getValue();
	@@ -101,11 +90,9 @@
	@Override
	protected boolean score(Collector c, int end, int firstDocID) throws IOException {
	c.setScorer(this);
	- //System.out.println("ts.collect firstdocID=" + firstDocID + " term=" + term + " end=" + end + " doc=" + doc);
	// nocommit -- this can leave scorer on a deleted doc...
	while (doc < end) { // for docs in window
	if (skipDocs == null \|\| !skipDocs.get(doc)) {
	- //System.out.println("ts.collect doc=" + doc + " skipDocs=" + skipDocs + " count=" + count + " vs dF=" + docFreq);
	c.collect(doc); // collect
	}
	if (count == docFreq) {
	@@ -113,40 +100,8 @@
	return false;
	}
	count++;
	- docPointer++;
	-
	- //System.out.println("dp=" + docPointer + " dpMax=" + docPointerMax + " count=" + count + " countMax=" + docFreq);
	-
	- if (docPointer >= docPointerMax) {
	- docPointerMax = docDeltasReader.fill();
	- //System.out.println(" refill! dpMax=" + docPointerMax + " reader=" + docDeltasReader);
	- assert docPointerMax != 0;
	- docPointer = 0;
	-
	- if (freqsReader != null) {
	- freqPointer++;
	- // NOTE: this code is intentionally dup'd
	- // (specialized) w/ the else clause, for better CPU
	- // branch prediction (assuming compiler doesn't
	- // de-dup): for codecs that always bulk read same
	- // number of docDeltas & freqs (standard, for,
	- // pfor), this if will always be true. Other codecs
	- // (simple9/16) will not be aligned:
	- if (freqPointer >= freqPointerMax) {
	- freqPointerMax = freqsReader.fill();
	- assert freqPointerMax != 0;
	- freqPointer = 0;
	- }
	- }
	- } else if (freqsReader != null) {
	- freqPointer++;
	- if (freqPointer >= freqPointerMax) {
	- freqPointerMax = freqsReader.fill();
	- assert freqPointerMax != 0;
	- freqPointer = 0;
	- }
	- }
	-
	+ fillDeltas();
	+ fillFreq();
	doc += docDeltas[docPointer];
	}
	return true;
	@@ -159,11 +114,7 @@

	@Override
	public float freq() {
	- if (freqsReader != null) {
	- return freqs[freqPointer];
	- } else {
	- return 1.0f;
	- }
	+ return freqs[freqPointer];
	}

	/**
	@@ -175,64 +126,25 @@
	*/
	@Override
	public int nextDoc() throws IOException {
	- //System.out.println("ts.nextDoc " + this + " count=" + count + " vs docFreq=" + docFreq);
	while(count < docFreq) {
	- docPointer++;
	- if (docPointer >= docPointerMax) {
	- //System.out.println("ts.nd refill docs");
	- docPointerMax = docDeltasReader.fill();
	- assert docPointerMax != 0;
	- docPointer = 0;
	- if (freqsReader != null) {
	- // NOTE: this code is intentionally dup'd
	- // (specialized) w/ the else clause, for better CPU
	- // branch prediction (assuming compiler doesn't
	- // de-dup): for codecs that always bulk read same
	- // number of docDeltas & freqs (standard, for,
	- // pfor), this if will always be true. Other codecs
	- // (simple9/16) will not be aligned:
	- freqPointer++;
	- if (freqPointer >= freqPointerMax) {
	- //System.out.println("ts.nd refill freqs");
	- freqPointerMax = freqsReader.fill();
	- assert freqPointerMax != 0;
	- freqPointer = 0;
	- }
	- }
	- } else {
	- if (freqsReader != null) {
	- freqPointer++;
	- if (freqPointer >= freqPointerMax) {
	- //System.out.println("ts.nd refill freqs");
	- freqPointerMax = freqsReader.fill();
	- assert freqPointerMax != 0;
	- freqPointer = 0;
	- }
	- }
	- }
	+ fillDeltas();
	+ fillFreq();
	count++;
	doc += docDeltas[docPointer];
	first = false;
	assert doc >= 0 && (skipDocs == null \|\| doc < skipDocs.length()) && doc != NO_MORE_DOCS: "doc=" + doc + " skipDocs=" + skipDocs + " skipDocs.length=" + (skipDocs==null? "n/a" : skipDocs.length());
	if (skipDocs == null \|\| !skipDocs.get(doc)) {
	- //System.out.println(" ret doc=" + doc + " freq=" + freq());
	return doc;
	}
	}

	- //System.out.println(" end");
	return doc = NO_MORE_DOCS;
	}
	-
	+
	@Override
	public float score() {
	assert !first;
	- final int freq;
	- if (freqsReader == null) {
	- freq = 1;
	- } else {
	- freq = freqs[freqPointer];
	- }
	+ final int freq = freqs[freqPointer];
	assert freq > 0;
	assert doc != NO_MORE_DOCS;
	float raw = // compute tf(f)*weight
	@@ -253,7 +165,7 @@
	* @return the matching document or NO_MORE_DOCS if none exist.
	*/
	@Override
	- public int advance(int target) throws IOException {
	+ public int advance(final int target) throws IOException {

	// nocommit: should we, here, optimize .advance(target that isn't
	// too far away) into scan? seems like simple win?
	@@ -264,11 +176,7 @@
	doc += docDeltas[docPointer];
	first = false;
	count++;
	- if (freqsReader != null && ++freqPointer >= freqPointerMax) {
	- freqPointerMax = freqsReader.fill();
	- assert freqPointerMax != 0;
	- freqPointer = 0;
	- }
	+ fillFreq();
	if (doc >= target && (skipDocs == null \|\| !skipDocs.get(doc))) {
	return doc;
	}
	@@ -279,20 +187,13 @@
	}

	// not found in current block, seek underlying stream
	- BulkPostingsEnum.JumpResult jumpResult;
	+ final BulkPostingsEnum.JumpResult jumpResult;
	if (target - doc > docDeltas.length && // avoid useless jumps
	(jumpResult = docsEnum.jump(target, count)) != null) {
	count = jumpResult.count;
	doc = jumpResult.docID;
	first = false;
	- docPointer = docDeltasReader.offset();
	- docPointerMax = docDeltasReader.end();
	- docPointer--;
	- if (freqsReader != null) {
	- freqPointer = freqsReader.offset();
	- freqPointerMax = freqsReader.end();
	- freqPointer--;
	- }
	+ reset();
	} else {
	// seek did not jump -- just fill next buffer
	docPointerMax = docDeltasReader.fill();
	@@ -305,14 +206,14 @@
	} else {
	return doc = NO_MORE_DOCS;
	}
	- if (freqsReader != null && ++freqPointer >= freqPointerMax) {
	- freqPointerMax = freqsReader.fill();
	- assert freqPointerMax != 0;
	- freqPointer = 0;
	- }
	+ fillFreq();
	}

	- // now scan
	+ // now scan -- let the compiler inline this
	+ return scan(target);
	+ }
	+
	+ private int scan(final int target) throws IOException {
	while(true) {
	assert doc >= 0 && doc != NO_MORE_DOCS;
	if (doc >= target && (skipDocs == null \|\| !skipDocs.get(doc))) {
	@@ -332,12 +233,7 @@
	}
	}

	- if (freqsReader != null && ++freqPointer >= freqPointerMax) {
	- freqPointerMax = freqsReader.fill();
	- assert freqPointerMax != 0;
	- freqPointer = 0;
	- }
	-
	+ fillFreq();
	assert first \|\| docDeltas[docPointer] > 0;
	doc += docDeltas[docPointer];
	count++;
	@@ -348,5 +244,29 @@
	/** Returns a string representation of this <code>TermScorer</code>. */
	@Override
	public String toString() { return "scorer(" + weight + ")"; }
	-
	+
	+ private final void fillFreq() throws IOException {
	+ if (++freqPointer >= freqPointerMax) {
	+ freqPointerMax = freqsReader.fill();
	+ assert freqPointerMax != 0;
	+ freqPointer = 0;
	+ }
	+ }
	+
	+ private void fillDeltas() throws IOException {
	+ if (++docPointer >= docPointerMax) {
	+ docPointerMax = docDeltasReader.fill();
	+ assert docPointerMax != 0;
	+ docPointer = 0;
	+ }
	+ }
	+
	+ private final void reset() throws IOException {
	+ docPointer = docDeltasReader.offset();
	+ docPointerMax = docDeltasReader.end();
	+ freqPointer = freqsReader.offset();
	+ freqPointerMax = freqsReader.end();
	+ --docPointer;
	+ --freqPointer;
	+ }
	}