docs/attachments/LUCENE-6880/LUCENE-6880.patch - lucene-jira-archive - Git at Google

 diff --git lucene/suggest/src/java/org/apache/lucene/search/suggest/document/CompletionScorer.java lucene/suggest/src/java/org/apache/lucene/search/suggest/document/CompletionScorer.java
 index 9b5c987..a8a7db1 100644
 --- lucene/suggest/src/java/org/apache/lucene/search/suggest/document/CompletionScorer.java
 +++ lucene/suggest/src/java/org/apache/lucene/search/suggest/document/CompletionScorer.java
 @@ -65,10 +65,10 @@ public class CompletionScorer extends BulkScorer {

    @Override
    public int score(LeafCollector collector, Bits acceptDocs, int min, int max) throws IOException {
 -    if (!(collector instanceof TopSuggestDocsCollector)) {
 -      throw new IllegalArgumentException("collector is not of type TopSuggestDocsCollector");
 +    if (!(collector instanceof TopSuggestionsLeafCollector)) {
 +      throw new IllegalArgumentException("collector is not of type TopSuggestionsCollector");
      }
 -    suggester.lookup(this, acceptDocs, ((TopSuggestDocsCollector) collector));
 +    suggester.lookup(this, acceptDocs, ((TopSuggestionsLeafCollector) collector));
      return max;
    }

 @@ -103,4 +103,5 @@ public class CompletionScorer extends BulkScorer {
      }
      return weight * boost;
    }
 +
  }
 diff --git lucene/suggest/src/java/org/apache/lucene/search/suggest/document/NRTSuggester.java lucene/suggest/src/java/org/apache/lucene/search/suggest/document/NRTSuggester.java
 index db09482..c431c1c 100644
 --- lucene/suggest/src/java/org/apache/lucene/search/suggest/document/NRTSuggester.java
 +++ lucene/suggest/src/java/org/apache/lucene/search/suggest/document/NRTSuggester.java
 @@ -45,7 +45,7 @@ import static org.apache.lucene.search.suggest.document.NRTSuggester.PayLoadProc
   * <p>
   * NRTSuggester executes Top N search on a weighted FST specified by a {@link CompletionScorer}
   * <p>
 - * See {@link #lookup(CompletionScorer, Bits, TopSuggestDocsCollector)} for more implementation
 + * See {@link #lookup(CompletionScorer, Bits, TopSuggestionsLeafCollector)} for more implementation
   * details.
   * <p>
   * FST Format:
 @@ -115,7 +115,7 @@ public final class NRTSuggester implements Accountable {
    }

    /**
 -   * Collects at most {@link TopSuggestDocsCollector#getCountToCollect()} completions that
 +   * Collects at most {@link TopSuggestionsLeafCollector#getCountToCollect()} completions that
     * match the provided {@link CompletionScorer}.
     * <p>
     * The {@link CompletionScorer#automaton} is intersected with the {@link #fst}.
 @@ -124,9 +124,9 @@ public final class NRTSuggester implements Accountable {
     * the matched partial paths. Upon reaching a completed path, {@link CompletionScorer#accept(int, Bits)}
     * and {@link CompletionScorer#score(float, float)} is used on the document id, index weight
     * and query boost to filter and score the entry, before being collected via
 -   * {@link TopSuggestDocsCollector#collect(int, CharSequence, CharSequence, float)}
 +   * {@link TopSuggestionsLeafCollector#collect(int, CharSequence, CharSequence, float)}
     */
 -  public void lookup(final CompletionScorer scorer, Bits acceptDocs, final TopSuggestDocsCollector collector) throws IOException {
 +  public void lookup(final CompletionScorer scorer, Bits acceptDocs, final TopSuggestionsLeafCollector collector) throws IOException {
      final double liveDocsRatio = calculateLiveDocRatio(scorer.reader.numDocs(), scorer.reader.maxDoc());
      if (liveDocsRatio == -1) {
        return;
 @@ -144,6 +144,7 @@ public final class NRTSuggester implements Accountable {
      final int topN = collector.getCountToCollect() * prefixPaths.size();
      final int queueSize = getMaxTopNSearcherQueueSize(topN, scorer.reader.numDocs(), liveDocsRatio, scorer.filtered);
      Comparator<Pair<Long, BytesRef>> comparator = getComparator();
 +    //TODO: make TopNSearcher iterative such that topN value is not required upfront
      Util.TopNSearcher<Pair<Long, BytesRef>> searcher = new Util.TopNSearcher<Pair<Long, BytesRef>>(fst, topN, queueSize, comparator,
          new ScoringPathComparator(scorer)) {

 diff --git lucene/suggest/src/java/org/apache/lucene/search/suggest/document/SuggestIndexSearcher.java lucene/suggest/src/java/org/apache/lucene/search/suggest/document/SuggestIndexSearcher.java
 index 25566dc..08287d4 100644
 --- lucene/suggest/src/java/org/apache/lucene/search/suggest/document/SuggestIndexSearcher.java
 +++ lucene/suggest/src/java/org/apache/lucene/search/suggest/document/SuggestIndexSearcher.java
 @@ -52,19 +52,16 @@ public class SuggestIndexSearcher extends IndexSearcher {
     * <code>query</code>
     */
    public TopSuggestDocs suggest(CompletionQuery query, int n) throws IOException {
 -    TopSuggestDocsCollector collector = new TopSuggestDocsCollector(n);
 +    TopSuggestsCollector collector = new TopSuggestsCollector(n);
      suggest(query, collector);
 -    return collector.get();
 +    return ((TopSuggestDocs) collector.topDocs());
    }

    /**
     * Lower-level suggest API.
     * Collects completion hits through <code>collector</code> for <code>query</code>.
 -   *
 -   * <p>{@link TopSuggestDocsCollector#collect(int, CharSequence, CharSequence, float)}
 -   * is called for every matching completion hit.
     */
 -  public void suggest(CompletionQuery query, TopSuggestDocsCollector collector) throws IOException {
 +  public void suggest(CompletionQuery query, TopSuggestsCollector collector) throws IOException {
      // TODO use IndexSearcher.rewrite instead
      // have to implement equals() and hashCode() in CompletionQuerys and co
      query = (CompletionQuery) query.rewrite(getIndexReader());
 diff --git lucene/suggest/src/java/org/apache/lucene/search/suggest/document/SuggestScoreDocPriorityQueue.java lucene/suggest/src/java/org/apache/lucene/search/suggest/document/SuggestScoreDocPriorityQueue.java
 index 634383c..cd17732 100644
 --- lucene/suggest/src/java/org/apache/lucene/search/suggest/document/SuggestScoreDocPriorityQueue.java
 +++ lucene/suggest/src/java/org/apache/lucene/search/suggest/document/SuggestScoreDocPriorityQueue.java
 @@ -41,16 +41,4 @@ final class SuggestScoreDocPriorityQueue extends PriorityQueue<SuggestScoreDoc>
      }
      return a.score < b.score;
    }
 -
 -  /**
 -   * Returns the top N results in descending order.
 -   */
 -  public SuggestScoreDoc[] getResults() {
 -    int size = size();
 -    SuggestScoreDoc[] res = new SuggestScoreDoc[size];
 -    for (int i = size - 1; i >= 0; i--) {
 -      res[i] = pop();
 -    }
 -    return res;
 -  }
  }
 diff --git lucene/suggest/src/java/org/apache/lucene/search/suggest/document/TopSuggestDocs.java lucene/suggest/src/java/org/apache/lucene/search/suggest/document/TopSuggestDocs.java
 index 049f73a..7aefb45 100644
 --- lucene/suggest/src/java/org/apache/lucene/search/suggest/document/TopSuggestDocs.java
 +++ lucene/suggest/src/java/org/apache/lucene/search/suggest/document/TopSuggestDocs.java
 @@ -17,6 +17,9 @@ package org.apache.lucene.search.suggest.document;
   * limitations under the License.
   */

 +import java.util.ArrayList;
 +import java.util.List;
 +
  import org.apache.lucene.search.ScoreDoc;
  import org.apache.lucene.search.TopDocs;
  import org.apache.lucene.search.suggest.Lookup;
 @@ -38,34 +41,36 @@ public class TopSuggestDocs extends TopDocs {
     * {@link org.apache.lucene.search.ScoreDoc} with an
     * additional CharSequence key
     */
 -  public static class SuggestScoreDoc extends ScoreDoc implements Comparable<SuggestScoreDoc> {
 +  public static class SuggestScoreDoc extends ScoreDoc {
 +
 +    /**
 +     * Matched completion keys
 +     */
 +    public final List<CharSequence> keys = new ArrayList<>();

      /**
 -     * Matched completion key
 +     * Contexts for the completions
       */
 -    public final CharSequence key;
 +    public final List<CharSequence> contexts = new ArrayList<>();

      /**
 -     * Context for the completion
 +     * Scores for the completions
       */
 -    public final CharSequence context;
 +    public final List<Float> scores = new ArrayList<>();

      /**
       * Creates a SuggestScoreDoc instance
       *
       * @param doc   document id (hit)
       * @param key   matched completion
 +     * @param context context of the matched completion
       * @param score weight of the matched completion
       */
      public SuggestScoreDoc(int doc, CharSequence key, CharSequence context, float score) {
        super(doc, score);
 -      this.key = key;
 -      this.context = context;
 -    }
 -
 -    @Override
 -    public int compareTo(SuggestScoreDoc o) {
 -      return Lookup.CHARSEQUENCE_COMPARATOR.compare(key, o.key);
 +      this.keys.add(key);
 +      this.contexts.add(context);
 +      this.scores.add(score);
      }
    }

 @@ -96,20 +101,23 @@ public class TopSuggestDocs extends TopDocs {
     * NOTE: assumes every <code>shardHit</code> is already sorted by score
     */
    public static TopSuggestDocs merge(int topN, TopSuggestDocs[] shardHits) {
 -    SuggestScoreDocPriorityQueue priorityQueue = new SuggestScoreDocPriorityQueue(topN);
 +    SuggestScoreDocPriorityQueue queue = new SuggestScoreDocPriorityQueue(topN);
      for (TopSuggestDocs shardHit : shardHits) {
        for (SuggestScoreDoc scoreDoc : shardHit.scoreLookupDocs()) {
 -        if (scoreDoc == priorityQueue.insertWithOverflow(scoreDoc)) {
 +        if (scoreDoc == queue.insertWithOverflow(scoreDoc)) {
            break;
          }
        }
      }
 -    SuggestScoreDoc[] topNResults = priorityQueue.getResults();
 +    int size = queue.size() < topN ? queue.size() : topN;
 +    SuggestScoreDoc[] topNResults = new SuggestScoreDoc[size];
 +    for (int i = size - 1; i >= 0; i--) {
 +      topNResults[i] = queue.pop();
 +    }
      if (topNResults.length > 0) {
        return new TopSuggestDocs(topNResults.length, topNResults, topNResults[0].score);
      } else {
        return TopSuggestDocs.EMPTY;
      }
    }
 -
  }
 diff --git lucene/suggest/src/java/org/apache/lucene/search/suggest/document/TopSuggestDocsCollector.java lucene/suggest/src/java/org/apache/lucene/search/suggest/document/TopSuggestDocsCollector.java
 deleted file mode 100644
 index 1cb3277..0000000
 --- lucene/suggest/src/java/org/apache/lucene/search/suggest/document/TopSuggestDocsCollector.java
 +++ /dev/null
 @@ -1,129 +0,0 @@
 -package org.apache.lucene.search.suggest.document;
 -
 -/*
 - * Licensed to the Apache Software Foundation (ASF) under one or more
 - * contributor license agreements.  See the NOTICE file distributed with
 - * this work for additional information regarding copyright ownership.
 - * The ASF licenses this file to You under the Apache License, Version 2.0
 - * (the "License"); you may not use this file except in compliance with
 - * the License.  You may obtain a copy of the License at
 - *
 - *     http://www.apache.org/licenses/LICENSE-2.0
 - *
 - * Unless required by applicable law or agreed to in writing, software
 - * distributed under the License is distributed on an "AS IS" BASIS,
 - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 - * See the License for the specific language governing permissions and
 - * limitations under the License.
 - */
 -
 -import java.io.IOException;
 -
 -import org.apache.lucene.index.LeafReaderContext;
 -import org.apache.lucene.search.CollectionTerminatedException;
 -import org.apache.lucene.search.SimpleCollector;
 -
 -import static org.apache.lucene.search.suggest.document.TopSuggestDocs.SuggestScoreDoc;
 -
 -/**
 - * {@link org.apache.lucene.search.Collector} that collects completion and
 - * score, along with document id
 - * <p>
 - * Non scoring collector that collect completions in order of their
 - * pre-computed scores.
 - * <p>
 - * NOTE: One document can be collected multiple times if a document
 - * is matched for multiple unique completions for a given query
 - * <p>
 - * Subclasses should only override
 - * {@link TopSuggestDocsCollector#collect(int, CharSequence, CharSequence, float)}.
 - * <p>
 - * NOTE: {@link #setScorer(org.apache.lucene.search.Scorer)} and
 - * {@link #collect(int)} is not used
 - *
 - * @lucene.experimental
 - */
 -public class TopSuggestDocsCollector extends SimpleCollector {
 -
 -  private final SuggestScoreDocPriorityQueue priorityQueue;
 -  private final int num;
 -
 -  /**
 -   * Document base offset for the current Leaf
 -   */
 -  protected int docBase;
 -
 -  /**
 -   * Sole constructor
 -   *
 -   * Collects at most <code>num</code> completions
 -   * with corresponding document and weight
 -   */
 -  public TopSuggestDocsCollector(int num) {
 -    if (num <= 0) {
 -      throw new IllegalArgumentException("'num' must be > 0");
 -    }
 -    this.num = num;
 -    this.priorityQueue = new SuggestScoreDocPriorityQueue(num);
 -  }
 -
 -  /**
 -   * Returns the number of results to be collected
 -   */
 -  public int getCountToCollect() {
 -    return num;
 -  }
 -
 -  @Override
 -  protected void doSetNextReader(LeafReaderContext context) throws IOException {
 -    docBase = context.docBase;
 -  }
 -
 -  /**
 -   * Called for every matched completion,
 -   * similar to {@link org.apache.lucene.search.LeafCollector#collect(int)}
 -   * but for completions.
 -   *
 -   * NOTE: collection at the leaf level is guaranteed to be in
 -   * descending order of score
 -   */
 -  public void collect(int docID, CharSequence key, CharSequence context, float score) throws IOException {
 -    SuggestScoreDoc current = new SuggestScoreDoc(docBase + docID, key, context, score);
 -    if (current == priorityQueue.insertWithOverflow(current)) {
 -      // if the current SuggestScoreDoc has overflown from pq,
 -      // we can assume all of the successive collections from
 -      // this leaf will be overflown as well
 -      // TODO: reuse the overflow instance?
 -      throw new CollectionTerminatedException();
 -    }
 -  }
 -
 -  /**
 -   * Returns at most <code>num</code> Top scoring {@link org.apache.lucene.search.suggest.document.TopSuggestDocs}s
 -   */
 -  public TopSuggestDocs get() throws IOException {
 -    SuggestScoreDoc[] suggestScoreDocs = priorityQueue.getResults();
 -    if (suggestScoreDocs.length > 0) {
 -      return new TopSuggestDocs(suggestScoreDocs.length, suggestScoreDocs, suggestScoreDocs[0].score);
 -    } else {
 -      return TopSuggestDocs.EMPTY;
 -    }
 -  }
 -
 -  /**
 -   * Ignored
 -   */
 -  @Override
 -  public void collect(int doc) throws IOException {
 -    // {@link #collect(int, CharSequence, CharSequence, long)} is used
 -    // instead
 -  }
 -
 -  /**
 -   * Ignored
 -   */
 -  @Override
 -  public boolean needsScores() {
 -    return true;
 -  }
 -}
 diff --git lucene/suggest/src/java/org/apache/lucene/search/suggest/document/TopSuggestionDocumentsCollector.java lucene/suggest/src/java/org/apache/lucene/search/suggest/document/TopSuggestionDocumentsCollector.java
 new file mode 100644
 index 0000000..cd3f36b
 --- /dev/null
 +++ lucene/suggest/src/java/org/apache/lucene/search/suggest/document/TopSuggestionDocumentsCollector.java
 @@ -0,0 +1,117 @@
 +package org.apache.lucene.search.suggest.document;
 +
 +/*
 + * Licensed to the Apache Software Foundation (ASF) under one or more
 + * contributor license agreements.  See the NOTICE file distributed with
 + * this work for additional information regarding copyright ownership.
 + * The ASF licenses this file to You under the Apache License, Version 2.0
 + * (the "License"); you may not use this file except in compliance with
 + * the License.  You may obtain a copy of the License at
 + *
 + *     http://www.apache.org/licenses/LICENSE-2.0
 + *
 + * Unless required by applicable law or agreed to in writing, software
 + * distributed under the License is distributed on an "AS IS" BASIS,
 + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 + * See the License for the specific language governing permissions and
 + * limitations under the License.
 + */
 +
 +import java.io.IOException;
 +import java.util.LinkedHashMap;
 +import java.util.Map;
 +
 +import org.apache.lucene.index.LeafReaderContext;
 +import org.apache.lucene.search.CollectionTerminatedException;
 +import org.apache.lucene.search.LeafCollector;
 +import org.apache.lucene.search.TopDocs;
 +import org.apache.lucene.util.PriorityQueue;
 +
 +/**
 + * A {@link org.apache.lucene.search.TopDocsCollector} that collects
 + * <code>num</code> documents along with completion, pre-computed score
 + * and contexts using {@link TopSuggestionsLeafCollector}
 + * <p>
 + *   This collects <code>num</code> unique documents for each leaf and
 + *   uses a priority queue for the global top documents.
 + * <p>
 + *   NOTE: this collector uses a mutable map across its leaf collectors,
 + *   hence it is not thread-safe
 + * @lucene.experimental
 + */
 +public class TopSuggestionDocumentsCollector extends TopSuggestsCollector {
 +  private final Map<Integer, TopSuggestDocs.SuggestScoreDoc> scoreDocMap;
 +
 +  /**
 +   * Collects at most <code>num</code> documents
 +   * along with completions, pre-computed scores and
 +   * contexts
 +   */
 +  public TopSuggestionDocumentsCollector(int num) {
 +    this(num, new SuggestScoreDocPriorityQueue(num));
 +  }
 +
 +  /**
 +   * Sub-classes can pass in their pq implementation
 +   */
 +  protected TopSuggestionDocumentsCollector(int num, PriorityQueue<TopSuggestDocs.SuggestScoreDoc> pq) {
 +    super(num, pq);
 +    this.scoreDocMap = new LinkedHashMap<>(num);
 +  }
 +
 +  /**
 +   * {@inheritDoc}
 +   */
 +  @Override
 +  public LeafCollector getLeafCollector(LeafReaderContext context) throws IOException {
 +    final LeafReaderContext ctx = context;
 +    updateResults();
 +    // NOTE: we set countToCollect to max doc, to increase the topN value set
 +    // in the underlying TopNSearcher, allowing it to collect more paths
 +    // that might belong to a document that has already been collected
 +    // (same document can have more than one path in the FST).
 +    // This can still lead to collecting less paths then needed, in case of having
 +    // more paths for a document than the number of documents in the segment.
 +    // This case has been worked around for the case of completions with contexts
 +    // as we can increase this value by the number of contexts that matched a query
 +    return new TopSuggestionsLeafCollector(context.reader().maxDoc()) {
 +      @Override
 +      public void collect(int docID, CharSequence key, CharSequence context, float score) throws IOException {
 +        if (scoreDocMap.containsKey(docID)) {
 +          TopSuggestDocs.SuggestScoreDoc suggestDoc = scoreDocMap.get(docID);
 +          suggestDoc.keys.add(key);
 +          suggestDoc.contexts.add(context);
 +          suggestDoc.scores.add(score);
 +        } else if (scoreDocMap.size() <= num) {
 +          scoreDocMap.put(docID, new TopSuggestDocs.SuggestScoreDoc(ctx.docBase + docID, key, context, score));
 +        } else {
 +          throw new CollectionTerminatedException();
 +        }
 +      }
 +    };
 +  }
 +
 +  private void updateResults() {
 +    // populate the pq with documents collected by the last leaf collector
 +    for (TopSuggestDocs.SuggestScoreDoc currentSuggestDoc : scoreDocMap.values()) {
 +      TopSuggestDocs.SuggestScoreDoc overflow = pq.insertWithOverflow(currentSuggestDoc);
 +      if (overflow == currentSuggestDoc) {
 +        break;
 +      } else if (overflow == null) {
 +        totalHits++;
 +      }
 +    }
 +    // clear the map for the next leaf collector to start populating
 +    scoreDocMap.clear();
 +  }
 +
 +  @Override
 +  public TopDocs topDocs(int start, int howMany) {
 +    updateResults();
 +    // we pass in topDocsSize() instead of howMany
 +    // as updateResults add suggest docs from the
 +    // last leaf collector to the pq
 +    return super.topDocs(start, topDocsSize());
 +  }
 +}
 +
 diff --git lucene/suggest/src/java/org/apache/lucene/search/suggest/document/TopSuggestionsLeafCollector.java lucene/suggest/src/java/org/apache/lucene/search/suggest/document/TopSuggestionsLeafCollector.java
 new file mode 100644
 index 0000000..49194db
 --- /dev/null
 +++ lucene/suggest/src/java/org/apache/lucene/search/suggest/document/TopSuggestionsLeafCollector.java
 @@ -0,0 +1,83 @@
 +package org.apache.lucene.search.suggest.document;
 +
 +/*
 + * Licensed to the Apache Software Foundation (ASF) under one or more
 + * contributor license agreements.  See the NOTICE file distributed with
 + * this work for additional information regarding copyright ownership.
 + * The ASF licenses this file to You under the Apache License, Version 2.0
 + * (the "License"); you may not use this file except in compliance with
 + * the License.  You may obtain a copy of the License at
 + *
 + *     http://www.apache.org/licenses/LICENSE-2.0
 + *
 + * Unless required by applicable law or agreed to in writing, software
 + * distributed under the License is distributed on an "AS IS" BASIS,
 + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 + * See the License for the specific language governing permissions and
 + * limitations under the License.
 + */
 +
 +import java.io.IOException;
 +
 +import org.apache.lucene.search.LeafCollector;
 +import org.apache.lucene.search.Scorer;
 +
 +/**
 + * {@link org.apache.lucene.search.LeafCollector} that collects completion
 + * and score, along with document id in order of their pre-computed scores
 + *
 + * <p>
 + * NOTE:
 + * <ul>
 + *   <li>
 + *    A document can be collected multiple times if a document is matched
 + *    for multiple unique completions
 + *   </li>
 + *   <li>
 + *     Documents are not collected in order of their pre-computed score
 + *     rather than their docIDs
 + *   </li>
 + * </ul>
 + * @lucene.experimental
 + */
 +public abstract class TopSuggestionsLeafCollector implements LeafCollector {
 +
 +  /**
 +   * Number of results to collect
 +   */
 +  protected final int countToCollect;
 +
 +  /**
 +   * Collects <code>countToCollect</code> completions
 +   */
 +  protected TopSuggestionsLeafCollector(int countToCollect) {
 +    this.countToCollect = countToCollect;
 +  }
 +
 +  /**
 +   * Returns the number of results to be collected
 +   */
 +  public int getCountToCollect() {
 +    return countToCollect;
 +  }
 +
 +  /**
 +   * Called for every matched completion,
 +   * similar to {@link org.apache.lucene.search.LeafCollector#collect(int)}
 +   * but for completions.
 +   *
 +   * NOTE: collection at the leaf level is guaranteed to be in
 +   * descending order of score
 +   */
 +  public abstract void collect(int docID, CharSequence key, CharSequence context, float score) throws IOException;
 +
 +  @Override
 +  public void setScorer(Scorer scorer) throws IOException {
 +    // no-op
 +  }
 +
 +  @Override
 +  public void collect(int doc) throws IOException {
 +    // no-op
 +  }
 +}
 diff --git lucene/suggest/src/java/org/apache/lucene/search/suggest/document/TopSuggestsCollector.java lucene/suggest/src/java/org/apache/lucene/search/suggest/document/TopSuggestsCollector.java
 new file mode 100644
 index 0000000..21b58fe
 --- /dev/null
 +++ lucene/suggest/src/java/org/apache/lucene/search/suggest/document/TopSuggestsCollector.java
 @@ -0,0 +1,110 @@
 +package org.apache.lucene.search.suggest.document;
 +
 +/*
 + * Licensed to the Apache Software Foundation (ASF) under one or more
 + * contributor license agreements.  See the NOTICE file distributed with
 + * this work for additional information regarding copyright ownership.
 + * The ASF licenses this file to You under the Apache License, Version 2.0
 + * (the "License"); you may not use this file except in compliance with
 + * the License.  You may obtain a copy of the License at
 + *
 + *     http://www.apache.org/licenses/LICENSE-2.0
 + *
 + * Unless required by applicable law or agreed to in writing, software
 + * distributed under the License is distributed on an "AS IS" BASIS,
 + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 + * See the License for the specific language governing permissions and
 + * limitations under the License.
 + */
 +
 +import java.io.IOException;
 +
 +import org.apache.lucene.index.LeafReaderContext;
 +import org.apache.lucene.search.CollectionTerminatedException;
 +import org.apache.lucene.search.LeafCollector;
 +import org.apache.lucene.search.TopDocs;
 +import org.apache.lucene.search.TopDocsCollector;
 +import org.apache.lucene.util.PriorityQueue;
 +
 +import static org.apache.lucene.search.suggest.document.TopSuggestDocs.SuggestScoreDoc;
 +
 +/**
 + * A {@link TopDocsCollector} that collects <code>num</code> completions,
 + * pre-computed score and contexts using {@link TopSuggestionsLeafCollector}
 + *
 + * NOTE: In case of matching a document multiple times (e.g. docs with multiple contexts),
 + * a single document will be collected more than once. To ensure collecting
 + * <code>num</code> unique documents, use {@link TopSuggestionDocumentsCollector}
 + * instead
 + *
 + * @lucene.experimental
 + */
 +public class TopSuggestsCollector extends TopDocsCollector<SuggestScoreDoc> {
 +
 +  /**
 +   * Number of completions to collect
 +   */
 +  protected final int num;
 +
 +  /**
 +   * Collects at most <code>num</code> completions
 +   * with corresponding document and weight
 +   */
 +  public TopSuggestsCollector(int num) {
 +    this(num, new SuggestScoreDocPriorityQueue(num));
 +  }
 +
 +  /**
 +   * Sub-classes can pass in their pq implementation
 +   */
 +  protected TopSuggestsCollector(int num, PriorityQueue<SuggestScoreDoc> pq) {
 +    super(pq);
 +    this.num = num;
 +  }
 +
 +  /**
 +   * Creates a {@link TopSuggestionsLeafCollector} for the given <code>context</code>
 +   */
 +  @Override
 +  public LeafCollector getLeafCollector(LeafReaderContext context) throws IOException {
 +    final LeafReaderContext ctx = context;
 +    return new TopSuggestionsLeafCollector(num) {
 +      @Override
 +      public void collect(int docID, CharSequence key, CharSequence context, float score) throws IOException {
 +        SuggestScoreDoc current = new SuggestScoreDoc(ctx.docBase + docID, key, context, score);
 +        SuggestScoreDoc overflow = pq.insertWithOverflow(current);
 +        if (overflow == current) {
 +          // if the current SuggestScoreDoc has overflown from pq,
 +          // we can assume all of the successive collections from
 +          // this leaf will be overflown as well
 +          // TODO: reuse the overflow instance?
 +          throw new CollectionTerminatedException();
 +        } else if (overflow == null) {
 +          totalHits++;
 +        }
 +      }
 +    };
 +  }
 +
 +  @Override
 +  public TopDocs topDocs(int start, int howMany) {
 +    if (start != 0) {
 +      throw new IllegalArgumentException("start > 0 is not supported");
 +    }
 +    int size = topDocsSize();
 +    if (size == 0) {
 +      return TopSuggestDocs.EMPTY;
 +    }
 +    TopSuggestDocs.SuggestScoreDoc[] suggestScoreDocs = new TopSuggestDocs.SuggestScoreDoc[size];
 +    populateResults(suggestScoreDocs, howMany);
 +    return new TopSuggestDocs(totalHits, suggestScoreDocs, suggestScoreDocs[0].score);
 +  }
 +
 +  /**
 +   * Ignored
 +   */
 +  @Override
 +  public boolean needsScores() {
 +    return true;
 +  }
 +}
 diff --git lucene/suggest/src/test/org/apache/lucene/search/suggest/document/TestContextQuery.java lucene/suggest/src/test/org/apache/lucene/search/suggest/document/TestContextQuery.java
 index 20a76e6..9d0f10c 100644
 --- lucene/suggest/src/test/org/apache/lucene/search/suggest/document/TestContextQuery.java
 +++ lucene/suggest/src/test/org/apache/lucene/search/suggest/document/TestContextQuery.java
 @@ -475,6 +475,41 @@ public class TestContextQuery extends LuceneTestCase {
    }

    @Test
 +  public void testWithTopSuggestionDocCollector() throws Exception {
 +    Analyzer analyzer = new MockAnalyzer(random());
 +    RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwcWithSuggestField(analyzer, "suggest_field"));
 +    Document document = new Document();
 +
 +    document.add(new ContextSuggestField("suggest_field", "suggestion1", 4, "type1"));
 +    document.add(new ContextSuggestField("suggest_field", "suggestion2", 3, "type2"));
 +    document.add(new ContextSuggestField("suggest_field", "suggestion3", 2, "type3"));
 +    iw.addDocument(document);
 +
 +    document = new Document();
 +    document.add(new ContextSuggestField("suggest_field", "suggestion4", 1, "type4"));
 +    iw.addDocument(document);
 +
 +    if (rarely()) {
 +      iw.commit();
 +    }
 +
 +    DirectoryReader reader = iw.getReader();
 +    SuggestIndexSearcher suggestIndexSearcher = new SuggestIndexSearcher(reader);
 +    ContextQuery query = new ContextQuery(new PrefixCompletionQuery(analyzer, new Term("suggest_field", "sugg")));
 +    TopSuggestionDocumentsCollector collector = new TopSuggestionDocumentsCollector(2);
 +    suggestIndexSearcher.suggest(query, collector);
 +    TopSuggestDocs suggestDocs = (TopSuggestDocs) collector.topDocs();
 +    assertSuggestions(suggestDocs,
 +        new Entry(Arrays.asList("suggestion1", "suggestion2", "suggestion3"),
 +            Arrays.asList("type1", "type2", "type3"),
 +            Arrays.asList(4f, 3f, 2f)),
 +        new Entry("suggestion4", "type4", 1));
 +
 +    reader.close();
 +    iw.close();
 +  }
 +
 +  @Test
    public void testRandomContextQueryScoring() throws Exception {
      Analyzer analyzer = new MockAnalyzer(random());
      try(RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwcWithSuggestField(analyzer, "suggest_field"))) {
 @@ -508,11 +543,11 @@ public class TestContextQuery extends LuceneTestCase {
        ArrayUtil.introSort(expectedResults, new Comparator<Entry>() {
          @Override
          public int compare(Entry o1, Entry o2) {
 -          int cmp = Float.compare(o2.value, o1.value);
 +          int cmp = Float.compare(o2.scores.get(0), o1.scores.get(0));
            if (cmp != 0) {
              return cmp;
            } else {
 -            return o1.output.compareTo(o2.output);
 +            return o1.keys.get(0).compareTo(o2.keys.get(0));
            }
          }
        });
 diff --git lucene/suggest/src/test/org/apache/lucene/search/suggest/document/TestPrefixCompletionQuery.java lucene/suggest/src/test/org/apache/lucene/search/suggest/document/TestPrefixCompletionQuery.java
 index 20a2df1..e3ca124 100644
 --- lucene/suggest/src/test/org/apache/lucene/search/suggest/document/TestPrefixCompletionQuery.java
 +++ lucene/suggest/src/test/org/apache/lucene/search/suggest/document/TestPrefixCompletionQuery.java
 @@ -18,6 +18,7 @@ package org.apache.lucene.search.suggest.document;
   */

  import java.io.IOException;
 +import java.util.Arrays;
  import java.util.Objects;

  import org.apache.lucene.analysis.Analyzer;
 @@ -175,7 +176,7 @@ public class TestPrefixCompletionQuery extends LuceneTestCase {
      // the search should be admissible for a single segment
      TopSuggestDocs suggest = indexSearcher.suggest(query, num);
      assertTrue(suggest.totalHits >= 1);
 -    assertThat(suggest.scoreLookupDocs()[0].key.toString(), equalTo("abc_" + topScore));
 +    assertThat(suggest.scoreLookupDocs()[0].keys.get(0).toString(), equalTo("abc_" + topScore));
      assertThat(suggest.scoreLookupDocs()[0].score, equalTo((float) topScore));

      filter = new NumericRangeBitsProducer("filter_int_fld", 0, 0);
 @@ -309,5 +310,4 @@ public class TestPrefixCompletionQuery extends LuceneTestCase {
      reader.close();
      iw.close();
    }
 -
  }
 diff --git lucene/suggest/src/test/org/apache/lucene/search/suggest/document/TestSuggestField.java lucene/suggest/src/test/org/apache/lucene/search/suggest/document/TestSuggestField.java
 index 9699e5d..f1ff201 100644
 --- lucene/suggest/src/test/org/apache/lucene/search/suggest/document/TestSuggestField.java
 +++ lucene/suggest/src/test/org/apache/lucene/search/suggest/document/TestSuggestField.java
 @@ -21,6 +21,7 @@ import java.io.ByteArrayOutputStream;
  import java.io.IOException;
  import java.util.ArrayList;
  import java.util.Arrays;
 +import java.util.Collections;
  import java.util.HashMap;
  import java.util.List;
  import java.util.Map;
 @@ -444,7 +445,7 @@ public class TestSuggestField extends LuceneTestCase {
      TopSuggestDocs suggest = indexSearcher.suggest(query, num);
      assertEquals(num, suggest.totalHits);
      for (SuggestScoreDoc suggestScoreDoc : suggest.scoreLookupDocs()) {
 -      String key = suggestScoreDoc.key.toString();
 +      String key = suggestScoreDoc.keys.get(0).toString();
        assertTrue(key.startsWith("abc_"));
        String substring = key.substring(4);
        int fieldValue = Integer.parseInt(substring);
 @@ -489,8 +490,8 @@ public class TestSuggestField extends LuceneTestCase {
            assertTrue(topScore >= scoreDoc.score);
          }
          topScore = scoreDoc.score;
 -        assertThat((float) mappings.get(scoreDoc.key.toString()), equalTo(scoreDoc.score));
 -        assertNotNull(mappings.remove(scoreDoc.key.toString()));
 +        assertThat((float) mappings.get(scoreDoc.keys.get(0).toString()), equalTo(scoreDoc.score));
 +        assertNotNull(mappings.remove(scoreDoc.keys.get(0).toString()));
        }
      }

 @@ -615,18 +616,24 @@ public class TestSuggestField extends LuceneTestCase {
    }

    static class Entry {
 -    final String output;
 -    final float value;
 -    final String context;
 +    final List<String> keys;
 +    final List<Float> scores;
 +    final List<String> contexts;

 -    Entry(String output, float value) {
 -      this(output, null, value);
 +    Entry(String keys, float scores) {
 +      this(keys, null, scores);
      }

 -    Entry(String output, String context, float value) {
 -      this.output = output;
 -      this.value = value;
 -      this.context = context;
 +    Entry(String keys, String contexts, float scores) {
 +      this(Collections.singletonList(keys),
 +          Collections.singletonList(contexts),
 +          Collections.singletonList(scores));
 +    }
 +
 +    Entry(List<String> keys, List<String> contexts, List<Float> scores) {
 +      this.keys = keys;
 +      this.scores = scores;
 +      this.contexts = contexts;
      }
    }

 @@ -636,18 +643,28 @@ public class TestSuggestField extends LuceneTestCase {
      for (int i = 0; i < suggestScoreDocs.length; i++) {
        SuggestScoreDoc lookupDoc = suggestScoreDocs[i];
        String msg = "Expected: " + toString(expected[i]) + " Actual: " + toString(lookupDoc);
 -      assertThat(msg, lookupDoc.key.toString(), equalTo(expected[i].output));
 -      assertThat(msg, lookupDoc.score, equalTo(expected[i].value));
 -      assertThat(msg, lookupDoc.context, equalTo(expected[i].context));
 +      assertThat(lookupDoc.keys.size(), equalTo(expected[i].keys.size()));
 +      for (int keyIndex = 0; keyIndex < lookupDoc.keys.size(); keyIndex++) {
 +        assertThat(msg, lookupDoc.keys.get(keyIndex).toString(), equalTo(expected[i].keys.get(keyIndex)));
 +      }
 +      assertThat(lookupDoc.scores.size(), equalTo(expected[i].scores.size()));
 +      for (int scoreIndex = 0; scoreIndex < lookupDoc.keys.size(); scoreIndex++) {
 +        assertThat(msg, lookupDoc.scores.get(scoreIndex), equalTo(expected[i].scores.get(scoreIndex)));
 +      }
 +      assertThat(msg, lookupDoc.score, equalTo(expected[i].scores.get(0)));
 +      assertThat(lookupDoc.contexts.size(), equalTo(expected[i].contexts.size()));
 +      for (int contextIndex = 0; contextIndex < lookupDoc.contexts.size(); contextIndex++) {
 +        assertThat(msg, lookupDoc.contexts.get(contextIndex), equalTo(expected[i].contexts.get(contextIndex)));
 +      }
      }
    }

    private static String toString(Entry expected) {
 -    return "key:"+ expected.output+" score:"+expected.value+" context:"+expected.context;
 +    return "key:"+ expected.keys +" score:"+expected.scores +" context:"+expected.contexts;
    }

    private static String toString(SuggestScoreDoc actual) {
 -    return "key:"+ actual.key.toString()+" score:"+actual.score+" context:"+actual.context;
 +    return "key:"+ actual.keys.toString()+" score:"+actual.score+" context:"+actual.contexts.toString();
    }

    static IndexWriterConfig iwcWithSuggestField(Analyzer analyzer, String... suggestFields) {
 diff --git lucene/suggest/src/test/org/apache/lucene/search/suggest/document/TestTopSuggestionDocumentsCollector.java lucene/suggest/src/test/org/apache/lucene/search/suggest/document/TestTopSuggestionDocumentsCollector.java
 new file mode 100644
 index 0000000..d3cf35a
 --- /dev/null
 +++ lucene/suggest/src/test/org/apache/lucene/search/suggest/document/TestTopSuggestionDocumentsCollector.java
 @@ -0,0 +1,69 @@
 +package org.apache.lucene.search.suggest.document;
 +
 +/*
 + * Licensed to the Apache Software Foundation (ASF) under one or more
 + * contributor license agreements.  See the NOTICE file distributed with
 + * this work for additional information regarding copyright ownership.
 + * The ASF licenses this file to You under the Apache License, Version 2.0
 + * (the "License"); you may not use this file except in compliance with
 + * the License.  You may obtain a copy of the License at
 + *
 + *     http://www.apache.org/licenses/LICENSE-2.0
 + *
 + * Unless required by applicable law or agreed to in writing, software
 + * distributed under the License is distributed on an "AS IS" BASIS,
 + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 + * See the License for the specific language governing permissions and
 + * limitations under the License.
 + */
 +
 +import org.apache.lucene.analysis.Analyzer;
 +import org.apache.lucene.analysis.MockAnalyzer;
 +import org.apache.lucene.document.Document;
 +import org.apache.lucene.index.DirectoryReader;
 +import org.apache.lucene.index.RandomIndexWriter;
 +import org.apache.lucene.index.Term;
 +import org.apache.lucene.search.TopDocs;
 +import org.apache.lucene.store.Directory;
 +import org.apache.lucene.util.LuceneTestCase;
 +import org.apache.lucene.util.TestUtil;
 +import org.junit.Test;
 +
 +import static org.apache.lucene.search.suggest.document.TestSuggestField.iwcWithSuggestField;
 +import static org.hamcrest.core.IsEqual.equalTo;
 +
 +public class TestTopSuggestionDocumentsCollector extends LuceneTestCase {
 +
 +  @Test
 +  public void testEarlyTermination() throws Exception {
 +    try (Directory dir = newDirectory()) {
 +      Analyzer analyzer = new MockAnalyzer(random());
 +      int numSuggestions = atLeast(20);
 +      int numContexts = atLeast(5);
 +      try (RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwcWithSuggestField(analyzer, "suggest_field"))) {
 +        for (int i = 0; i < numSuggestions; i++) {
 +          String suggestion = "sugg_" + TestUtil.randomSimpleString(random(), 10);
 +          Document document = new Document();
 +          for (int j = 0; j < numContexts; j++) {
 +            CharSequence context = TestUtil.randomSimpleString(random(), 10) + i;
 +            document.add(new ContextSuggestField("suggest_field", suggestion, i + 1, context));
 +          }
 +          iw.addDocument(document);
 +          if (rarely()) {
 +            iw.commit();
 +          }
 +        }
 +        try (DirectoryReader reader = iw.getReader()) {
 +          SuggestIndexSearcher suggestIndexSearcher = new SuggestIndexSearcher(reader);
 +          for (int suggestions = 1; suggestions <= numSuggestions; suggestions++) {
 +            TopSuggestionDocumentsCollector collector = new TopSuggestionDocumentsCollector(suggestions);
 +            CompletionQuery query = new PrefixCompletionQuery(analyzer, new Term("suggest_field", "sugg"));
 +            suggestIndexSearcher.suggest(query, collector);
 +            TopDocs topDocs = collector.topDocs();
 +            assertThat(topDocs.totalHits, equalTo(suggestions));
 +          }
 +        }
 +      }
 +    }
 +  }
 +}