lucene/core/src/java/org/apache/lucene/index/ParallelCompositeReader.java - lucene-solr - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
  * The ASF licenses this file to You under the Apache License, Version 2.0
  * (the "License"); you may not use this file except in compliance with
  * the License.  You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
 package org.apache.lucene.index;


 import java.io.IOException;
 import java.util.Collections;
 import java.util.IdentityHashMap;
 import java.util.List;
 import java.util.Set;

 /** An {@link CompositeReader} which reads multiple, parallel indexes.  Each
  * index added must have the same number of documents, and exactly the same
  * number of leaves (with equal {@code maxDoc}), but typically each contains
  * different fields. Deletions are taken from the first reader. Each document
  * contains the union of the fields of all documents with the same document
  * number.  When searching, matches for a query term are from the first index
  * added that has the field.
  *
  * <p>This is useful, e.g., with collections that have large fields which
  * change rarely and small fields that change more frequently.  The smaller
  * fields may be re-indexed in a new index and both indexes may be searched
  * together.
  *
  * <p><strong>Warning:</strong> It is up to you to make sure all indexes
  * are created and modified the same way. For example, if you add
  * documents to one index, you need to add the same documents in the
  * same order to the other indexes. <em>Failure to do so will result in
  * undefined behavior</em>.
  * A good strategy to create suitable indexes with {@link IndexWriter} is to use
  * {@link LogDocMergePolicy}, as this one does not reorder documents
  * during merging (like {@code TieredMergePolicy}) and triggers merges
  * by number of documents per segment. If you use different {@link MergePolicy}s
  * it might happen that the segment structure of your index is no longer predictable.
  */
 public class ParallelCompositeReader extends BaseCompositeReader<LeafReader> {
   private final boolean closeSubReaders;
   private final Set<IndexReader> completeReaderSet =
     Collections.newSetFromMap(new IdentityHashMap<IndexReader,Boolean>());
   private final CacheHelper cacheHelper;

   /** Create a ParallelCompositeReader based on the provided
    *  readers; auto-closes the given readers on {@link #close()}. */
   public ParallelCompositeReader(CompositeReader... readers) throws IOException {
     this(true, readers);
   }

   /** Create a ParallelCompositeReader based on the provided
    *  readers. */
   public ParallelCompositeReader(boolean closeSubReaders, CompositeReader... readers) throws IOException {
     this(closeSubReaders, readers, readers);
   }

   /** Expert: create a ParallelCompositeReader based on the provided
    *  readers and storedFieldReaders; when a document is
    *  loaded, only storedFieldsReaders will be used. */
   public ParallelCompositeReader(boolean closeSubReaders, CompositeReader[] readers, CompositeReader[] storedFieldReaders) throws IOException {
     super(prepareLeafReaders(readers, storedFieldReaders), null);
     this.closeSubReaders = closeSubReaders;
     Collections.addAll(completeReaderSet, readers);
     Collections.addAll(completeReaderSet, storedFieldReaders);
     // update ref-counts (like MultiReader):
     if (!closeSubReaders) {
       for (final IndexReader reader : completeReaderSet) {
         reader.incRef();
       }
     }
     // finally add our own synthetic readers, so we close or decRef them, too (it does not matter what we do)
     completeReaderSet.addAll(getSequentialSubReaders());
     // ParallelReader instances can be short-lived, which would make caching trappy
     // so we do not cache on them, unless they wrap a single reader in which
     // case we delegate
     if (readers.length == 1 && storedFieldReaders.length == 1 && readers[0] == storedFieldReaders[0]) {
       cacheHelper = readers[0].getReaderCacheHelper();
     } else {
       cacheHelper = null;
     }
   }

   private static LeafReader[] prepareLeafReaders(CompositeReader[] readers, CompositeReader[] storedFieldsReaders) throws IOException {
     if (readers.length == 0) {
       if (storedFieldsReaders.length > 0)
         throw new IllegalArgumentException("There must be at least one main reader if storedFieldsReaders are used.");
       return new LeafReader[0];
     } else {
       final List<? extends LeafReaderContext> firstLeaves = readers[0].leaves();

       // check compatibility:
       final int maxDoc = readers[0].maxDoc(), noLeaves = firstLeaves.size();
       final int[] leafMaxDoc = new int[noLeaves];
       for (int i = 0; i < noLeaves; i++) {
         final LeafReader r = firstLeaves.get(i).reader();
         leafMaxDoc[i] = r.maxDoc();
       }
       validate(readers, maxDoc, leafMaxDoc);
       validate(storedFieldsReaders, maxDoc, leafMaxDoc);

       // flatten structure of each Composite to just LeafReader[]
       // and combine parallel structure with ParallelLeafReaders:
       final LeafReader[] wrappedLeaves = new LeafReader[noLeaves];
       for (int i = 0; i < wrappedLeaves.length; i++) {
         final LeafReader[] subs = new LeafReader[readers.length];
         for (int j = 0; j < readers.length; j++) {
           subs[j] = readers[j].leaves().get(i).reader();
         }
         final LeafReader[] storedSubs = new LeafReader[storedFieldsReaders.length];
         for (int j = 0; j < storedFieldsReaders.length; j++) {
           storedSubs[j] = storedFieldsReaders[j].leaves().get(i).reader();
         }
         // We pass true for closeSubs and we prevent touching of subreaders in doClose():
         // By this the synthetic throw-away readers used here are completely invisible to ref-counting
         wrappedLeaves[i] = new ParallelLeafReader(true, subs, storedSubs) {
           @Override
           protected void doClose() {}
         };
       }
       return wrappedLeaves;
     }
   }

   private static void validate(CompositeReader[] readers, int maxDoc, int[] leafMaxDoc) {
     for (int i = 0; i < readers.length; i++) {
       final CompositeReader reader = readers[i];
       final List<? extends LeafReaderContext> subs = reader.leaves();
       if (reader.maxDoc() != maxDoc) {
         throw new IllegalArgumentException("All readers must have same maxDoc: "+maxDoc+"!="+reader.maxDoc());
       }
       final int noSubs = subs.size();
       if (noSubs != leafMaxDoc.length) {
         throw new IllegalArgumentException("All readers must have same number of leaf readers");
       }
       for (int subIDX = 0; subIDX < noSubs; subIDX++) {
         final LeafReader r = subs.get(subIDX).reader();
         if (r.maxDoc() != leafMaxDoc[subIDX]) {
           throw new IllegalArgumentException("All leaf readers must have same corresponding subReader maxDoc");
         }
       }
     }
   }

   @Override
   public CacheHelper getReaderCacheHelper() {
     return cacheHelper;
   }

   @Override
   protected synchronized void doClose() throws IOException {
     IOException ioe = null;
     for (final IndexReader reader : completeReaderSet) {
       try {
         if (closeSubReaders) {
           reader.close();
         } else {
           reader.decRef();
         }
       } catch (IOException e) {
         if (ioe == null) ioe = e;
       }
     }
     // throw the first exception
     if (ioe != null) throw ioe;
   }
 }
	/*
	* Licensed to the Apache Software Foundation (ASF) under one or more
	* contributor license agreements. See the NOTICE file distributed with
	* this work for additional information regarding copyright ownership.
	* The ASF licenses this file to You under the Apache License, Version 2.0
	* (the "License"); you may not use this file except in compliance with
	* the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/
	package org.apache.lucene.index;


	import java.io.IOException;
	import java.util.Collections;
	import java.util.IdentityHashMap;
	import java.util.List;
	import java.util.Set;

	/** An {@link CompositeReader} which reads multiple, parallel indexes. Each
	* index added must have the same number of documents, and exactly the same
	* number of leaves (with equal {@code maxDoc}), but typically each contains
	* different fields. Deletions are taken from the first reader. Each document
	* contains the union of the fields of all documents with the same document
	* number. When searching, matches for a query term are from the first index
	* added that has the field.
	*
	* <p>This is useful, e.g., with collections that have large fields which
	* change rarely and small fields that change more frequently. The smaller
	* fields may be re-indexed in a new index and both indexes may be searched
	* together.
	*
	* <p><strong>Warning:</strong> It is up to you to make sure all indexes
	* are created and modified the same way. For example, if you add
	* documents to one index, you need to add the same documents in the
	* same order to the other indexes. <em>Failure to do so will result in
	* undefined behavior</em>.
	* A good strategy to create suitable indexes with {@link IndexWriter} is to use
	* {@link LogDocMergePolicy}, as this one does not reorder documents
	* during merging (like {@code TieredMergePolicy}) and triggers merges
	* by number of documents per segment. If you use different {@link MergePolicy}s
	* it might happen that the segment structure of your index is no longer predictable.
	*/
	public class ParallelCompositeReader extends BaseCompositeReader<LeafReader> {
	private final boolean closeSubReaders;
	private final Set<IndexReader> completeReaderSet =
	Collections.newSetFromMap(new IdentityHashMap<IndexReader,Boolean>());
	private final CacheHelper cacheHelper;

	/** Create a ParallelCompositeReader based on the provided
	* readers; auto-closes the given readers on {@link #close()}. */
	public ParallelCompositeReader(CompositeReader... readers) throws IOException {
	this(true, readers);
	}

	/** Create a ParallelCompositeReader based on the provided
	* readers. */
	public ParallelCompositeReader(boolean closeSubReaders, CompositeReader... readers) throws IOException {
	this(closeSubReaders, readers, readers);
	}

	/** Expert: create a ParallelCompositeReader based on the provided
	* readers and storedFieldReaders; when a document is
	* loaded, only storedFieldsReaders will be used. */
	public ParallelCompositeReader(boolean closeSubReaders, CompositeReader[] readers, CompositeReader[] storedFieldReaders) throws IOException {
	super(prepareLeafReaders(readers, storedFieldReaders), null);
	this.closeSubReaders = closeSubReaders;
	Collections.addAll(completeReaderSet, readers);
	Collections.addAll(completeReaderSet, storedFieldReaders);
	// update ref-counts (like MultiReader):
	if (!closeSubReaders) {
	for (final IndexReader reader : completeReaderSet) {
	reader.incRef();
	}
	}
	// finally add our own synthetic readers, so we close or decRef them, too (it does not matter what we do)
	completeReaderSet.addAll(getSequentialSubReaders());
	// ParallelReader instances can be short-lived, which would make caching trappy
	// so we do not cache on them, unless they wrap a single reader in which
	// case we delegate
	if (readers.length == 1 && storedFieldReaders.length == 1 && readers[0] == storedFieldReaders[0]) {
	cacheHelper = readers[0].getReaderCacheHelper();
	} else {
	cacheHelper = null;
	}
	}

	private static LeafReader[] prepareLeafReaders(CompositeReader[] readers, CompositeReader[] storedFieldsReaders) throws IOException {
	if (readers.length == 0) {
	if (storedFieldsReaders.length > 0)
	throw new IllegalArgumentException("There must be at least one main reader if storedFieldsReaders are used.");
	return new LeafReader[0];
	} else {
	final List<? extends LeafReaderContext> firstLeaves = readers[0].leaves();

	// check compatibility:
	final int maxDoc = readers[0].maxDoc(), noLeaves = firstLeaves.size();
	final int[] leafMaxDoc = new int[noLeaves];
	for (int i = 0; i < noLeaves; i++) {
	final LeafReader r = firstLeaves.get(i).reader();
	leafMaxDoc[i] = r.maxDoc();
	}
	validate(readers, maxDoc, leafMaxDoc);
	validate(storedFieldsReaders, maxDoc, leafMaxDoc);

	// flatten structure of each Composite to just LeafReader[]
	// and combine parallel structure with ParallelLeafReaders:
	final LeafReader[] wrappedLeaves = new LeafReader[noLeaves];
	for (int i = 0; i < wrappedLeaves.length; i++) {
	final LeafReader[] subs = new LeafReader[readers.length];
	for (int j = 0; j < readers.length; j++) {
	subs[j] = readers[j].leaves().get(i).reader();
	}
	final LeafReader[] storedSubs = new LeafReader[storedFieldsReaders.length];
	for (int j = 0; j < storedFieldsReaders.length; j++) {
	storedSubs[j] = storedFieldsReaders[j].leaves().get(i).reader();
	}
	// We pass true for closeSubs and we prevent touching of subreaders in doClose():
	// By this the synthetic throw-away readers used here are completely invisible to ref-counting
	wrappedLeaves[i] = new ParallelLeafReader(true, subs, storedSubs) {
	@Override
	protected void doClose() {}
	};
	}
	return wrappedLeaves;
	}
	}

	private static void validate(CompositeReader[] readers, int maxDoc, int[] leafMaxDoc) {
	for (int i = 0; i < readers.length; i++) {
	final CompositeReader reader = readers[i];
	final List<? extends LeafReaderContext> subs = reader.leaves();
	if (reader.maxDoc() != maxDoc) {
	throw new IllegalArgumentException("All readers must have same maxDoc: "+maxDoc+"!="+reader.maxDoc());
	}
	final int noSubs = subs.size();
	if (noSubs != leafMaxDoc.length) {
	throw new IllegalArgumentException("All readers must have same number of leaf readers");
	}
	for (int subIDX = 0; subIDX < noSubs; subIDX++) {
	final LeafReader r = subs.get(subIDX).reader();
	if (r.maxDoc() != leafMaxDoc[subIDX]) {
	throw new IllegalArgumentException("All leaf readers must have same corresponding subReader maxDoc");
	}
	}
	}
	}

	@Override
	public CacheHelper getReaderCacheHelper() {
	return cacheHelper;
	}

	@Override
	protected synchronized void doClose() throws IOException {
	IOException ioe = null;
	for (final IndexReader reader : completeReaderSet) {
	try {
	if (closeSubReaders) {
	reader.close();
	} else {
	reader.decRef();
	}
	} catch (IOException e) {
	if (ioe == null) ioe = e;
	}
	}
	// throw the first exception
	if (ioe != null) throw ioe;
	}
	}