blob: db7325fc7b35dfe5f1a1f09b1078b8a5fb4647b6 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.index;
import java.io.IOException;
import java.util.Arrays;
import java.util.Collections;
import java.util.Comparator;
import java.util.List;
/** Base class for implementing {@link CompositeReader}s based on an array
* of sub-readers. The implementing class has to add code for
* correctly refcounting and closing the sub-readers.
*
* <p>User code will most likely use {@link MultiReader} to build a
* composite reader on a set of sub-readers (like several
* {@link DirectoryReader}s).
*
* <p> For efficiency, in this API documents are often referred to via
* <i>document numbers</i>, non-negative integers which each name a unique
* document in the index. These document numbers are ephemeral -- they may change
* as documents are added to and deleted from an index. Clients should thus not
* rely on a given document having the same number between sessions.
*
* <p><a name="thread-safety"></a><p><b>NOTE</b>: {@link
* IndexReader} instances are completely thread
* safe, meaning multiple threads can call any of its methods,
* concurrently. If your application requires external
* synchronization, you should <b>not</b> synchronize on the
* <code>IndexReader</code> instance; use your own
* (non-Lucene) objects instead.
* @see MultiReader
* @lucene.internal
*/
public abstract class BaseCompositeReader<R extends IndexReader> extends CompositeReader {
private final R[] subReaders;
/** A comparator for sorting sub-readers */
protected final Comparator<R> subReadersSorter;
private final int[] starts; // 1st docno for each reader
private final int maxDoc;
private int numDocs = -1; // computed lazily
/** List view solely for {@link #getSequentialSubReaders()},
* for effectiveness the array is used internally. */
private final List<R> subReadersList;
/**
* Constructs a {@code BaseCompositeReader} on the given subReaders.
* @param subReaders the wrapped sub-readers. This array is returned by
* {@link #getSequentialSubReaders} and used to resolve the correct
* subreader for docID-based methods. <b>Please note:</b> This array is <b>not</b>
* cloned and not protected for modification, the subclass is responsible
* to do this.
* @param subReadersSorter – a comparator for sorting sub readers. If not {@code null}, this
* comparator is used to sort sub readers, before using the for resolving doc IDs.
*/
protected BaseCompositeReader(R[] subReaders, Comparator<R> subReadersSorter) throws IOException {
if (subReadersSorter != null) {
Arrays.sort(subReaders, subReadersSorter);
}
this.subReaders = subReaders;
this.subReadersSorter = subReadersSorter;
this.subReadersList = Collections.unmodifiableList(Arrays.asList(subReaders));
starts = new int[subReaders.length + 1]; // build starts array
long maxDoc = 0;
for (int i = 0; i < subReaders.length; i++) {
starts[i] = (int) maxDoc;
final IndexReader r = subReaders[i];
maxDoc += r.maxDoc(); // compute maxDocs
r.registerParentReader(this);
}
if (maxDoc > IndexWriter.getActualMaxDocs()) {
if (this instanceof DirectoryReader) {
// A single index has too many documents and it is corrupt (IndexWriter prevents this as of LUCENE-6299)
throw new CorruptIndexException("Too many documents: an index cannot exceed " + IndexWriter.getActualMaxDocs() + " but readers have total maxDoc=" + maxDoc, Arrays.toString(subReaders));
} else {
// Caller is building a MultiReader and it has too many documents; this case is just illegal arguments:
throw new IllegalArgumentException("Too many documents: composite IndexReaders cannot exceed " + IndexWriter.getActualMaxDocs() + " but readers have total maxDoc=" + maxDoc);
}
}
this.maxDoc = Math.toIntExact(maxDoc);
starts[subReaders.length] = this.maxDoc;
}
@Override
public final Fields getTermVectors(int docID) throws IOException {
ensureOpen();
final int i = readerIndex(docID); // find subreader num
return subReaders[i].getTermVectors(docID - starts[i]); // dispatch to subreader
}
@Override
public final int numDocs() {
// Don't call ensureOpen() here (it could affect performance)
// We want to compute numDocs() lazily so that creating a wrapper that hides
// some documents isn't slow at wrapping time, but on the first time that
// numDocs() is called. This can help as there are lots of use-cases of a
// reader that don't involve calling numDocs().
// However it's not crucial to make sure that we don't call numDocs() more
// than once on the sub readers, since they likely cache numDocs() anyway,
// hence the lack of synchronization.
int numDocs = this.numDocs;
if (numDocs == -1) {
numDocs = 0;
for (IndexReader r : subReaders) {
numDocs += r.numDocs();
}
assert numDocs >= 0;
this.numDocs = numDocs;
}
return numDocs;
}
@Override
public final int maxDoc() {
// Don't call ensureOpen() here (it could affect performance)
return maxDoc;
}
@Override
public final void document(int docID, StoredFieldVisitor visitor) throws IOException {
ensureOpen();
final int i = readerIndex(docID); // find subreader num
subReaders[i].document(docID - starts[i], visitor); // dispatch to subreader
}
@Override
public final int docFreq(Term term) throws IOException {
ensureOpen();
int total = 0; // sum freqs in subreaders
for (int i = 0; i < subReaders.length; i++) {
int sub = subReaders[i].docFreq(term);
assert sub >= 0;
assert sub <= subReaders[i].getDocCount(term.field());
total += sub;
}
return total;
}
@Override
public final long totalTermFreq(Term term) throws IOException {
ensureOpen();
long total = 0; // sum freqs in subreaders
for (int i = 0; i < subReaders.length; i++) {
long sub = subReaders[i].totalTermFreq(term);
assert sub >= 0;
assert sub <= subReaders[i].getSumTotalTermFreq(term.field());
total += sub;
}
return total;
}
@Override
public final long getSumDocFreq(String field) throws IOException {
ensureOpen();
long total = 0; // sum doc freqs in subreaders
for (R reader : subReaders) {
long sub = reader.getSumDocFreq(field);
assert sub >= 0;
assert sub <= reader.getSumTotalTermFreq(field);
total += sub;
}
return total;
}
@Override
public final int getDocCount(String field) throws IOException {
ensureOpen();
int total = 0; // sum doc counts in subreaders
for (R reader : subReaders) {
int sub = reader.getDocCount(field);
assert sub >= 0;
assert sub <= reader.maxDoc();
total += sub;
}
return total;
}
@Override
public final long getSumTotalTermFreq(String field) throws IOException {
ensureOpen();
long total = 0; // sum doc total term freqs in subreaders
for (R reader : subReaders) {
long sub = reader.getSumTotalTermFreq(field);
assert sub >= 0;
assert sub >= reader.getSumDocFreq(field);
total += sub;
}
return total;
}
/** Helper method for subclasses to get the corresponding reader for a doc ID */
protected final int readerIndex(int docID) {
if (docID < 0 || docID >= maxDoc) {
throw new IllegalArgumentException("docID must be >= 0 and < maxDoc=" + maxDoc + " (got docID=" + docID + ")");
}
return ReaderUtil.subIndex(docID, this.starts);
}
/** Helper method for subclasses to get the docBase of the given sub-reader index. */
protected final int readerBase(int readerIndex) {
if (readerIndex < 0 || readerIndex >= subReaders.length) {
throw new IllegalArgumentException("readerIndex must be >= 0 and < getSequentialSubReaders().size()");
}
return this.starts[readerIndex];
}
@Override
protected final List<? extends R> getSequentialSubReaders() {
return subReadersList;
}
}