blob: f334626f8a3f72f603e2b88cd7772d1e59831686 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.index;
import java.io.IOException;
import java.util.List;
import org.apache.lucene.search.DocIdSetIterator; // javadocs
import org.apache.lucene.util.PriorityQueue;
import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS;
/** Utility class to help merging documents from sub-readers according to either simple
* concatenated (unsorted) order, or by a specified index-time sort, skipping
* deleted documents and remapping non-deleted documents. */
public abstract class DocIDMerger<T extends DocIDMerger.Sub> {
/** Represents one sub-reader being merged */
public static abstract class Sub {
/** Mapped doc ID */
public int mappedDocID;
final MergeState.DocMap docMap;
/** Sole constructor */
public Sub(MergeState.DocMap docMap) {
this.docMap = docMap;
}
/** Returns the next document ID from this sub reader, and {@link DocIdSetIterator#NO_MORE_DOCS} when done */
public abstract int nextDoc() throws IOException;
/**
* Like {@link #nextDoc()} but skips over unmapped docs and returns the next mapped doc ID, or
* {@link DocIdSetIterator#NO_MORE_DOCS} when exhausted. This method sets {@link #mappedDocID}
* as a side effect.
*/
public final int nextMappedDoc() throws IOException {
while (true) {
int doc = nextDoc();
if (doc == NO_MORE_DOCS) {
return this.mappedDocID = NO_MORE_DOCS;
}
int mappedDoc = docMap.get(doc);
if (mappedDoc != -1) {
return this.mappedDocID = mappedDoc;
}
}
}
}
/** Construct this from the provided subs, specifying the maximum sub count */
public static <T extends DocIDMerger.Sub> DocIDMerger<T> of(List<T> subs, int maxCount, boolean indexIsSorted) throws IOException {
if (indexIsSorted && maxCount > 1) {
return new SortedDocIDMerger<>(subs, maxCount);
} else {
return new SequentialDocIDMerger<>(subs);
}
}
/** Construct this from the provided subs */
public static <T extends DocIDMerger.Sub> DocIDMerger<T> of(List<T> subs, boolean indexIsSorted) throws IOException {
return of(subs, subs.size(), indexIsSorted);
}
/** Reuse API, currently only used by postings during merge */
public abstract void reset() throws IOException;
/** Returns null when done.
* <b>NOTE:</b> after the iterator has exhausted you should not call this
* method, as it may result in unpredicted behavior. */
public abstract T next() throws IOException;
private DocIDMerger() {}
private static class SequentialDocIDMerger<T extends DocIDMerger.Sub> extends DocIDMerger<T> {
private final List<T> subs;
private T current;
private int nextIndex;
private SequentialDocIDMerger(List<T> subs) throws IOException {
this.subs = subs;
reset();
}
@Override
public void reset() throws IOException {
if (subs.size() > 0) {
current = subs.get(0);
nextIndex = 1;
} else {
current = null;
nextIndex = 0;
}
}
@Override
public T next() throws IOException {
while (current.nextMappedDoc() == NO_MORE_DOCS) {
if (nextIndex == subs.size()) {
current = null;
return null;
}
current = subs.get(nextIndex);
nextIndex++;
}
return current;
}
}
private static class SortedDocIDMerger<T extends DocIDMerger.Sub> extends DocIDMerger<T> {
private final List<T> subs;
private T current;
private final PriorityQueue<T> queue;
private SortedDocIDMerger(List<T> subs, int maxCount) throws IOException {
if (maxCount <= 1) {
throw new IllegalArgumentException();
}
this.subs = subs;
queue = new PriorityQueue<T>(maxCount - 1) {
@Override
protected boolean lessThan(Sub a, Sub b) {
assert a.mappedDocID != b.mappedDocID;
return a.mappedDocID < b.mappedDocID;
}
};
reset();
}
@Override
public void reset() throws IOException {
// caller may not have fully consumed the queue:
queue.clear();
current = null;
boolean first = true;
for (T sub : subs) {
if (first) {
// by setting mappedDocID = -1, this entry is guaranteed to be the top of the queue
// so the first call to next() will advance it
sub.mappedDocID = -1;
current = sub;
first = false;
} else if (sub.nextMappedDoc() != NO_MORE_DOCS) {
queue.add(sub);
} // else all docs in this sub were deleted; do not add it to the queue!
}
}
@Override
public T next() throws IOException {
int nextDoc = current.nextMappedDoc();
if (nextDoc == NO_MORE_DOCS) {
if (queue.size() == 0) {
current = null;
} else {
current = queue.pop();
}
} else if (queue.size() > 0 && nextDoc > queue.top().mappedDocID) {
T newCurrent = queue.top();
queue.updateTop(current);
current = newCurrent;
}
return current;
}
}
}