| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| package org.apache.lucene.index; |
| |
| import java.io.IOException; |
| import java.util.Collections; |
| import java.util.IdentityHashMap; |
| import java.util.List; |
| import java.util.Set; |
| |
| /** |
| * An {@link CompositeReader} which reads multiple, parallel indexes. Each index added must have the |
| * same number of documents, and exactly the same number of leaves (with equal {@code maxDoc}), but |
| * typically each contains different fields. Deletions are taken from the first reader. Each |
| * document contains the union of the fields of all documents with the same document number. When |
| * searching, matches for a query term are from the first index added that has the field. |
| * |
| * <p>This is useful, e.g., with collections that have large fields which change rarely and small |
| * fields that change more frequently. The smaller fields may be re-indexed in a new index and both |
| * indexes may be searched together. |
| * |
| * <p><strong>Warning:</strong> It is up to you to make sure all indexes are created and modified |
| * the same way. For example, if you add documents to one index, you need to add the same documents |
| * in the same order to the other indexes. <em>Failure to do so will result in undefined |
| * behavior</em>. A good strategy to create suitable indexes with {@link IndexWriter} is to use |
| * {@link LogDocMergePolicy}, as this one does not reorder documents during merging (like {@code |
| * TieredMergePolicy}) and triggers merges by number of documents per segment. If you use different |
| * {@link MergePolicy}s it might happen that the segment structure of your index is no longer |
| * predictable. |
| */ |
| public class ParallelCompositeReader extends BaseCompositeReader<LeafReader> { |
| private final boolean closeSubReaders; |
| private final Set<IndexReader> completeReaderSet = |
| Collections.newSetFromMap(new IdentityHashMap<IndexReader, Boolean>()); |
| private final CacheHelper cacheHelper; |
| |
| /** |
| * Create a ParallelCompositeReader based on the provided readers; auto-closes the given readers |
| * on {@link #close()}. |
| */ |
| public ParallelCompositeReader(CompositeReader... readers) throws IOException { |
| this(true, readers); |
| } |
| |
| /** Create a ParallelCompositeReader based on the provided readers. */ |
| public ParallelCompositeReader(boolean closeSubReaders, CompositeReader... readers) |
| throws IOException { |
| this(closeSubReaders, readers, readers); |
| } |
| |
| /** |
| * Expert: create a ParallelCompositeReader based on the provided readers and storedFieldReaders; |
| * when a document is loaded, only storedFieldsReaders will be used. |
| */ |
| public ParallelCompositeReader( |
| boolean closeSubReaders, CompositeReader[] readers, CompositeReader[] storedFieldReaders) |
| throws IOException { |
| super(prepareLeafReaders(readers, storedFieldReaders)); |
| this.closeSubReaders = closeSubReaders; |
| Collections.addAll(completeReaderSet, readers); |
| Collections.addAll(completeReaderSet, storedFieldReaders); |
| // update ref-counts (like MultiReader): |
| if (!closeSubReaders) { |
| for (final IndexReader reader : completeReaderSet) { |
| reader.incRef(); |
| } |
| } |
| // finally add our own synthetic readers, so we close or decRef them, too (it does not matter |
| // what we do) |
| completeReaderSet.addAll(getSequentialSubReaders()); |
| // ParallelReader instances can be short-lived, which would make caching trappy |
| // so we do not cache on them, unless they wrap a single reader in which |
| // case we delegate |
| if (readers.length == 1 |
| && storedFieldReaders.length == 1 |
| && readers[0] == storedFieldReaders[0]) { |
| cacheHelper = readers[0].getReaderCacheHelper(); |
| } else { |
| cacheHelper = null; |
| } |
| } |
| |
| private static LeafReader[] prepareLeafReaders( |
| CompositeReader[] readers, CompositeReader[] storedFieldsReaders) throws IOException { |
| if (readers.length == 0) { |
| if (storedFieldsReaders.length > 0) |
| throw new IllegalArgumentException( |
| "There must be at least one main reader if storedFieldsReaders are used."); |
| return new LeafReader[0]; |
| } else { |
| final List<? extends LeafReaderContext> firstLeaves = readers[0].leaves(); |
| |
| // check compatibility: |
| final int maxDoc = readers[0].maxDoc(), noLeaves = firstLeaves.size(); |
| final int[] leafMaxDoc = new int[noLeaves]; |
| for (int i = 0; i < noLeaves; i++) { |
| final LeafReader r = firstLeaves.get(i).reader(); |
| leafMaxDoc[i] = r.maxDoc(); |
| } |
| validate(readers, maxDoc, leafMaxDoc); |
| validate(storedFieldsReaders, maxDoc, leafMaxDoc); |
| |
| // flatten structure of each Composite to just LeafReader[] |
| // and combine parallel structure with ParallelLeafReaders: |
| final LeafReader[] wrappedLeaves = new LeafReader[noLeaves]; |
| for (int i = 0; i < wrappedLeaves.length; i++) { |
| final LeafReader[] subs = new LeafReader[readers.length]; |
| for (int j = 0; j < readers.length; j++) { |
| subs[j] = readers[j].leaves().get(i).reader(); |
| } |
| final LeafReader[] storedSubs = new LeafReader[storedFieldsReaders.length]; |
| for (int j = 0; j < storedFieldsReaders.length; j++) { |
| storedSubs[j] = storedFieldsReaders[j].leaves().get(i).reader(); |
| } |
| // We pass true for closeSubs and we prevent touching of subreaders in doClose(): |
| // By this the synthetic throw-away readers used here are completely invisible to |
| // ref-counting |
| wrappedLeaves[i] = |
| new ParallelLeafReader(true, subs, storedSubs) { |
| @Override |
| protected void doClose() {} |
| }; |
| } |
| return wrappedLeaves; |
| } |
| } |
| |
| private static void validate(CompositeReader[] readers, int maxDoc, int[] leafMaxDoc) { |
| for (int i = 0; i < readers.length; i++) { |
| final CompositeReader reader = readers[i]; |
| final List<? extends LeafReaderContext> subs = reader.leaves(); |
| if (reader.maxDoc() != maxDoc) { |
| throw new IllegalArgumentException( |
| "All readers must have same maxDoc: " + maxDoc + "!=" + reader.maxDoc()); |
| } |
| final int noSubs = subs.size(); |
| if (noSubs != leafMaxDoc.length) { |
| throw new IllegalArgumentException("All readers must have same number of leaf readers"); |
| } |
| for (int subIDX = 0; subIDX < noSubs; subIDX++) { |
| final LeafReader r = subs.get(subIDX).reader(); |
| if (r.maxDoc() != leafMaxDoc[subIDX]) { |
| throw new IllegalArgumentException( |
| "All leaf readers must have same corresponding subReader maxDoc"); |
| } |
| } |
| } |
| } |
| |
| @Override |
| public CacheHelper getReaderCacheHelper() { |
| return cacheHelper; |
| } |
| |
| @Override |
| protected synchronized void doClose() throws IOException { |
| IOException ioe = null; |
| for (final IndexReader reader : completeReaderSet) { |
| try { |
| if (closeSubReaders) { |
| reader.close(); |
| } else { |
| reader.decRef(); |
| } |
| } catch (IOException e) { |
| if (ioe == null) ioe = e; |
| } |
| } |
| // throw the first exception |
| if (ioe != null) throw ioe; |
| } |
| } |