blob: 4f117b64c30e3fc45d0b7f99efd2eff0c3d62899 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.index;
import java.io.IOException;
import java.util.Collections;
import java.util.IdentityHashMap;
import java.util.List;
import java.util.Set;
/**
* An {@link CompositeReader} which reads multiple, parallel indexes. Each index added must have the
* same number of documents, and exactly the same number of leaves (with equal {@code maxDoc}), but
* typically each contains different fields. Deletions are taken from the first reader. Each
* document contains the union of the fields of all documents with the same document number. When
* searching, matches for a query term are from the first index added that has the field.
*
* <p>This is useful, e.g., with collections that have large fields which change rarely and small
* fields that change more frequently. The smaller fields may be re-indexed in a new index and both
* indexes may be searched together.
*
* <p><strong>Warning:</strong> It is up to you to make sure all indexes are created and modified
* the same way. For example, if you add documents to one index, you need to add the same documents
* in the same order to the other indexes. <em>Failure to do so will result in undefined
* behavior</em>. A good strategy to create suitable indexes with {@link IndexWriter} is to use
* {@link LogDocMergePolicy}, as this one does not reorder documents during merging (like {@code
* TieredMergePolicy}) and triggers merges by number of documents per segment. If you use different
* {@link MergePolicy}s it might happen that the segment structure of your index is no longer
* predictable.
*/
public class ParallelCompositeReader extends BaseCompositeReader<LeafReader> {
private final boolean closeSubReaders;
private final Set<IndexReader> completeReaderSet =
Collections.newSetFromMap(new IdentityHashMap<IndexReader, Boolean>());
private final CacheHelper cacheHelper;
/**
* Create a ParallelCompositeReader based on the provided readers; auto-closes the given readers
* on {@link #close()}.
*/
public ParallelCompositeReader(CompositeReader... readers) throws IOException {
this(true, readers);
}
/** Create a ParallelCompositeReader based on the provided readers. */
public ParallelCompositeReader(boolean closeSubReaders, CompositeReader... readers)
throws IOException {
this(closeSubReaders, readers, readers);
}
/**
* Expert: create a ParallelCompositeReader based on the provided readers and storedFieldReaders;
* when a document is loaded, only storedFieldsReaders will be used.
*/
public ParallelCompositeReader(
boolean closeSubReaders, CompositeReader[] readers, CompositeReader[] storedFieldReaders)
throws IOException {
super(prepareLeafReaders(readers, storedFieldReaders));
this.closeSubReaders = closeSubReaders;
Collections.addAll(completeReaderSet, readers);
Collections.addAll(completeReaderSet, storedFieldReaders);
// update ref-counts (like MultiReader):
if (!closeSubReaders) {
for (final IndexReader reader : completeReaderSet) {
reader.incRef();
}
}
// finally add our own synthetic readers, so we close or decRef them, too (it does not matter
// what we do)
completeReaderSet.addAll(getSequentialSubReaders());
// ParallelReader instances can be short-lived, which would make caching trappy
// so we do not cache on them, unless they wrap a single reader in which
// case we delegate
if (readers.length == 1
&& storedFieldReaders.length == 1
&& readers[0] == storedFieldReaders[0]) {
cacheHelper = readers[0].getReaderCacheHelper();
} else {
cacheHelper = null;
}
}
private static LeafReader[] prepareLeafReaders(
CompositeReader[] readers, CompositeReader[] storedFieldsReaders) throws IOException {
if (readers.length == 0) {
if (storedFieldsReaders.length > 0)
throw new IllegalArgumentException(
"There must be at least one main reader if storedFieldsReaders are used.");
return new LeafReader[0];
} else {
final List<? extends LeafReaderContext> firstLeaves = readers[0].leaves();
// check compatibility:
final int maxDoc = readers[0].maxDoc(), noLeaves = firstLeaves.size();
final int[] leafMaxDoc = new int[noLeaves];
for (int i = 0; i < noLeaves; i++) {
final LeafReader r = firstLeaves.get(i).reader();
leafMaxDoc[i] = r.maxDoc();
}
validate(readers, maxDoc, leafMaxDoc);
validate(storedFieldsReaders, maxDoc, leafMaxDoc);
// flatten structure of each Composite to just LeafReader[]
// and combine parallel structure with ParallelLeafReaders:
final LeafReader[] wrappedLeaves = new LeafReader[noLeaves];
for (int i = 0; i < wrappedLeaves.length; i++) {
final LeafReader[] subs = new LeafReader[readers.length];
for (int j = 0; j < readers.length; j++) {
subs[j] = readers[j].leaves().get(i).reader();
}
final LeafReader[] storedSubs = new LeafReader[storedFieldsReaders.length];
for (int j = 0; j < storedFieldsReaders.length; j++) {
storedSubs[j] = storedFieldsReaders[j].leaves().get(i).reader();
}
// We pass true for closeSubs and we prevent touching of subreaders in doClose():
// By this the synthetic throw-away readers used here are completely invisible to
// ref-counting
wrappedLeaves[i] =
new ParallelLeafReader(true, subs, storedSubs) {
@Override
protected void doClose() {}
};
}
return wrappedLeaves;
}
}
private static void validate(CompositeReader[] readers, int maxDoc, int[] leafMaxDoc) {
for (int i = 0; i < readers.length; i++) {
final CompositeReader reader = readers[i];
final List<? extends LeafReaderContext> subs = reader.leaves();
if (reader.maxDoc() != maxDoc) {
throw new IllegalArgumentException(
"All readers must have same maxDoc: " + maxDoc + "!=" + reader.maxDoc());
}
final int noSubs = subs.size();
if (noSubs != leafMaxDoc.length) {
throw new IllegalArgumentException("All readers must have same number of leaf readers");
}
for (int subIDX = 0; subIDX < noSubs; subIDX++) {
final LeafReader r = subs.get(subIDX).reader();
if (r.maxDoc() != leafMaxDoc[subIDX]) {
throw new IllegalArgumentException(
"All leaf readers must have same corresponding subReader maxDoc");
}
}
}
}
@Override
public CacheHelper getReaderCacheHelper() {
return cacheHelper;
}
@Override
protected synchronized void doClose() throws IOException {
IOException ioe = null;
for (final IndexReader reader : completeReaderSet) {
try {
if (closeSubReaders) {
reader.close();
} else {
reader.decRef();
}
} catch (IOException e) {
if (ioe == null) ioe = e;
}
}
// throw the first exception
if (ioe != null) throw ioe;
}
}