blob: 23772e18f230d79916160f83768ac7a46a273a0d [file] [log] [blame]
package org.apache.lucene.index.sorter;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.search.CollectionTerminatedException;
import org.apache.lucene.search.Collector;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.TopDocsCollector;
import org.apache.lucene.search.TotalHitCountCollector;
/**
* A {@link Collector} that early terminates collection of documents on a
* per-segment basis, if the segment was sorted according to the given
* {@link Sort}.
*
* <p>
* <b>NOTE:</b> the {@code Collector} detects sorted segments according to
* {@link SortingMergePolicy}, so it's best used in conjunction with it. Also,
* it collects up to a specified {@code numDocsToCollect} from each segment,
* and therefore is mostly suitable for use in conjunction with collectors such as
* {@link TopDocsCollector}, and not e.g. {@link TotalHitCountCollector}.
* <p>
* <b>NOTE</b>: If you wrap a {@code TopDocsCollector} that sorts in the same
* order as the index order, the returned {@link TopDocsCollector#topDocs() TopDocs}
* will be correct. However the total of {@link TopDocsCollector#getTotalHits()
* hit count} will be underestimated since not all matching documents will have
* been collected.
* <p>
* <b>NOTE</b>: This {@code Collector} uses {@link Sort#toString()} to detect
* whether a segment was sorted with the same {@code Sort}. This has
* two implications:
* <ul>
* <li>if a custom comparator is not implemented correctly and returns
* different identifiers for equivalent instances, this collector will not
* detect sorted segments,</li>
* <li>if you suddenly change the {@link IndexWriter}'s
* {@code SortingMergePolicy} to sort according to another criterion and if both
* the old and the new {@code Sort}s have the same identifier, this
* {@code Collector} will incorrectly detect sorted segments.</li>
* </ul>
*
* @lucene.experimental
*/
public class EarlyTerminatingSortingCollector extends Collector {
/** The wrapped Collector */
protected final Collector in;
/** Sort used to sort the search results */
protected final Sort sort;
/** Number of documents to collect in each segment */
protected final int numDocsToCollect;
/** Number of documents to collect in the current segment being processed */
protected int segmentTotalCollect;
/** True if the current segment being processed is sorted by {@link #sort} */
protected boolean segmentSorted;
private int numCollected;
/**
* Create a new {@link EarlyTerminatingSortingCollector} instance.
*
* @param in
* the collector to wrap
* @param sort
* the sort you are sorting the search results on
* @param numDocsToCollect
* the number of documents to collect on each segment. When wrapping
* a {@link TopDocsCollector}, this number should be the number of
* hits.
*/
public EarlyTerminatingSortingCollector(Collector in, Sort sort, int numDocsToCollect) {
if (numDocsToCollect <= 0) {
throw new IllegalStateException("numDocsToCollect must always be > 0, got " + segmentTotalCollect);
}
this.in = in;
this.sort = sort;
this.numDocsToCollect = numDocsToCollect;
}
@Override
public void setScorer(Scorer scorer) throws IOException {
in.setScorer(scorer);
}
@Override
public void collect(int doc) throws IOException {
in.collect(doc);
if (++numCollected >= segmentTotalCollect) {
throw new CollectionTerminatedException();
}
}
@Override
public void setNextReader(AtomicReaderContext context) throws IOException {
in.setNextReader(context);
segmentSorted = SortingMergePolicy.isSorted(context.reader(), sort);
segmentTotalCollect = segmentSorted ? numDocsToCollect : Integer.MAX_VALUE;
numCollected = 0;
}
@Override
public boolean acceptsDocsOutOfOrder() {
return !segmentSorted && in.acceptsDocsOutOfOrder();
}
}