blob: 8e76d37520ba46457bc93ec9cbfcea7c9f6bd096 [file] [log] [blame]
package org.apache.lucene.index;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import java.util.Collections;
import java.util.IdentityHashMap;
import java.util.Iterator;
import java.util.Map;
import java.util.Set;
import java.util.SortedMap;
import java.util.TreeMap;
import org.apache.lucene.util.Bits;
/** An {@link AtomicReader} which reads multiple, parallel indexes. Each index
* added must have the same number of documents, but typically each contains
* different fields. Deletions are taken from the first reader.
* Each document contains the union of the fields of all documents
* with the same document number. When searching, matches for a
* query term are from the first index added that has the field.
*
* <p>This is useful, e.g., with collections that have large fields which
* change rarely and small fields that change more frequently. The smaller
* fields may be re-indexed in a new index and both indexes may be searched
* together.
*
* <p><strong>Warning:</strong> It is up to you to make sure all indexes
* are created and modified the same way. For example, if you add
* documents to one index, you need to add the same documents in the
* same order to the other indexes. <em>Failure to do so will result in
* undefined behavior</em>.
*/
public final class ParallelAtomicReader extends AtomicReader {
private final FieldInfos fieldInfos = new FieldInfos();
private final ParallelFields fields = new ParallelFields();
private final AtomicReader[] parallelReaders, storedFieldsReaders;
private final Set<AtomicReader> completeReaderSet =
Collections.newSetFromMap(new IdentityHashMap<AtomicReader,Boolean>());
private final boolean closeSubReaders;
private final int maxDoc, numDocs;
private final boolean hasDeletions;
private final SortedMap<String,AtomicReader> fieldToReader = new TreeMap<String,AtomicReader>();
private final SortedMap<String,AtomicReader> tvFieldToReader = new TreeMap<String,AtomicReader>();
/** Create a ParallelAtomicReader based on the provided
* readers; auto-closes the given readers on {@link #close()}. */
public ParallelAtomicReader(AtomicReader... readers) throws IOException {
this(true, readers);
}
/** Create a ParallelAtomicReader based on the provided
* readers. */
public ParallelAtomicReader(boolean closeSubReaders, AtomicReader... readers) throws IOException {
this(closeSubReaders, readers, readers);
}
/** Expert: create a ParallelAtomicReader based on the provided
* readers and storedFieldReaders; when a document is
* loaded, only storedFieldsReaders will be used. */
public ParallelAtomicReader(boolean closeSubReaders, AtomicReader[] readers, AtomicReader[] storedFieldsReaders) throws IOException {
this.closeSubReaders = closeSubReaders;
if (readers.length == 0 && storedFieldsReaders.length > 0)
throw new IllegalArgumentException("There must be at least one main reader if storedFieldsReaders are used.");
this.parallelReaders = readers.clone();
this.storedFieldsReaders = storedFieldsReaders.clone();
if (parallelReaders.length > 0) {
final AtomicReader first = parallelReaders[0];
this.maxDoc = first.maxDoc();
this.numDocs = first.numDocs();
this.hasDeletions = first.hasDeletions();
} else {
this.maxDoc = this.numDocs = 0;
this.hasDeletions = false;
}
Collections.addAll(completeReaderSet, this.parallelReaders);
Collections.addAll(completeReaderSet, this.storedFieldsReaders);
// check compatibility:
for(AtomicReader reader : completeReaderSet) {
if (reader.maxDoc() != maxDoc) {
throw new IllegalArgumentException("All readers must have same maxDoc: "+maxDoc+"!="+reader.maxDoc());
}
}
// build FieldInfos and fieldToReader map:
for (final AtomicReader reader : this.parallelReaders) {
final FieldInfos readerFieldInfos = reader.getFieldInfos();
for (FieldInfo fieldInfo : readerFieldInfos) {
// NOTE: first reader having a given field "wins":
if (!fieldToReader.containsKey(fieldInfo.name)) {
fieldInfos.add(fieldInfo);
fieldToReader.put(fieldInfo.name, reader);
if (fieldInfo.storeTermVector) {
tvFieldToReader.put(fieldInfo.name, reader);
}
}
}
}
// build Fields instance
for (final AtomicReader reader : this.parallelReaders) {
final Fields readerFields = reader.fields();
if (readerFields != null) {
final FieldsEnum it = readerFields.iterator();
String name;
while ((name = it.next()) != null) {
// only add if the reader responsible for that field name is the current:
if (fieldToReader.get(name) == reader) {
this.fields.addField(name, it.terms());
}
}
}
}
// do this finally so any Exceptions occurred before don't affect refcounts:
for (AtomicReader reader : completeReaderSet) {
if (!closeSubReaders) {
reader.incRef();
}
reader.registerParentReader(this);
}
}
@Override
public String toString() {
final StringBuilder buffer = new StringBuilder("ParallelAtomicReader(");
for (final Iterator<AtomicReader> iter = completeReaderSet.iterator(); iter.hasNext();) {
buffer.append(iter.next());
if (iter.hasNext()) buffer.append(", ");
}
return buffer.append(')').toString();
}
private final class ParallelFieldsEnum extends FieldsEnum {
private String currentField;
private final Iterator<String> keys;
private final ParallelFields fields;
ParallelFieldsEnum(ParallelFields fields) {
this.fields = fields;
keys = fields.fields.keySet().iterator();
}
@Override
public String next() throws IOException {
if (keys.hasNext()) {
currentField = keys.next();
} else {
currentField = null;
}
return currentField;
}
@Override
public Terms terms() throws IOException {
return fields.terms(currentField);
}
}
// Single instance of this, per ParallelReader instance
private final class ParallelFields extends Fields {
final Map<String,Terms> fields = new TreeMap<String,Terms>();
ParallelFields() {
}
void addField(String fieldName, Terms terms) throws IOException {
fields.put(fieldName, terms);
}
@Override
public FieldsEnum iterator() throws IOException {
return new ParallelFieldsEnum(this);
}
@Override
public Terms terms(String field) throws IOException {
return fields.get(field);
}
@Override
public int size() throws IOException {
return fields.size();
}
}
@Override
public FieldInfos getFieldInfos() {
return fieldInfos;
}
@Override
public Bits getLiveDocs() {
ensureOpen();
return hasDeletions ? parallelReaders[0].getLiveDocs() : null;
}
@Override
public Fields fields() {
ensureOpen();
return fields;
}
@Override
public int numDocs() {
// Don't call ensureOpen() here (it could affect performance)
return numDocs;
}
@Override
public int maxDoc() {
// Don't call ensureOpen() here (it could affect performance)
return maxDoc;
}
@Override
public boolean hasDeletions() {
ensureOpen();
return hasDeletions;
}
@Override
public void document(int docID, StoredFieldVisitor visitor) throws CorruptIndexException, IOException {
ensureOpen();
for (final AtomicReader reader: storedFieldsReaders) {
reader.document(docID, visitor);
}
}
@Override
public Fields getTermVectors(int docID) throws IOException {
ensureOpen();
ParallelFields fields = null;
for (Map.Entry<String,AtomicReader> ent : tvFieldToReader.entrySet()) {
String fieldName = ent.getKey();
Terms vector = ent.getValue().getTermVector(docID, fieldName);
if (vector != null) {
if (fields == null) {
fields = new ParallelFields();
}
fields.addField(fieldName, vector);
}
}
return fields;
}
@Override
protected synchronized void doClose() throws IOException {
IOException ioe = null;
for (AtomicReader reader : completeReaderSet) {
try {
if (closeSubReaders) {
reader.close();
} else {
reader.decRef();
}
} catch (IOException e) {
if (ioe == null) ioe = e;
}
}
// throw the first exception
if (ioe != null) throw ioe;
}
@Override
public DocValues docValues(String field) throws IOException {
ensureOpen();
AtomicReader reader = fieldToReader.get(field);
return reader == null ? null : reader.docValues(field);
}
@Override
public DocValues normValues(String field) throws IOException {
ensureOpen();
AtomicReader reader = fieldToReader.get(field);
return reader == null ? null : reader.normValues(field);
}
}