lucene/core/src/java/org/apache/lucene/index/ParallelCompositeReader.java - lucene-solr - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
  * The ASF licenses this file to You under the Apache License, Version 2.0
  * (the "License"); you may not use this file except in compliance with
  * the License.  You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
 package org.apache.lucene.index;

 import java.io.IOException;
 import java.util.Collections;
 import java.util.IdentityHashMap;
 import java.util.List;
 import java.util.Set;

 /**
  * An {@link CompositeReader} which reads multiple, parallel indexes. Each index added must have the
  * same number of documents, and exactly the same number of leaves (with equal {@code maxDoc}), but
  * typically each contains different fields. Deletions are taken from the first reader. Each
  * document contains the union of the fields of all documents with the same document number. When
  * searching, matches for a query term are from the first index added that has the field.
  *
  * <p>This is useful, e.g., with collections that have large fields which change rarely and small
  * fields that change more frequently. The smaller fields may be re-indexed in a new index and both
  * indexes may be searched together.
  *
  * <p><strong>Warning:</strong> It is up to you to make sure all indexes are created and modified
  * the same way. For example, if you add documents to one index, you need to add the same documents
  * in the same order to the other indexes. <em>Failure to do so will result in undefined
  * behavior</em>. A good strategy to create suitable indexes with {@link IndexWriter} is to use
  * {@link LogDocMergePolicy}, as this one does not reorder documents during merging (like {@code
  * TieredMergePolicy}) and triggers merges by number of documents per segment. If you use different
  * {@link MergePolicy}s it might happen that the segment structure of your index is no longer
  * predictable.
  */
 public class ParallelCompositeReader extends BaseCompositeReader<LeafReader> {
   private final boolean closeSubReaders;
   private final Set<IndexReader> completeReaderSet =
       Collections.newSetFromMap(new IdentityHashMap<IndexReader, Boolean>());
   private final CacheHelper cacheHelper;

   /**
    * Create a ParallelCompositeReader based on the provided readers; auto-closes the given readers
    * on {@link #close()}.
    */
   public ParallelCompositeReader(CompositeReader... readers) throws IOException {
     this(true, readers);
   }

   /** Create a ParallelCompositeReader based on the provided readers. */
   public ParallelCompositeReader(boolean closeSubReaders, CompositeReader... readers)
       throws IOException {
     this(closeSubReaders, readers, readers);
   }

   /**
    * Expert: create a ParallelCompositeReader based on the provided readers and storedFieldReaders;
    * when a document is loaded, only storedFieldsReaders will be used.
    */
   public ParallelCompositeReader(
       boolean closeSubReaders, CompositeReader[] readers, CompositeReader[] storedFieldReaders)
       throws IOException {
     super(prepareLeafReaders(readers, storedFieldReaders));
     this.closeSubReaders = closeSubReaders;
     Collections.addAll(completeReaderSet, readers);
     Collections.addAll(completeReaderSet, storedFieldReaders);
     // update ref-counts (like MultiReader):
     if (!closeSubReaders) {
       for (final IndexReader reader : completeReaderSet) {
         reader.incRef();
       }
     }
     // finally add our own synthetic readers, so we close or decRef them, too (it does not matter
     // what we do)
     completeReaderSet.addAll(getSequentialSubReaders());
     // ParallelReader instances can be short-lived, which would make caching trappy
     // so we do not cache on them, unless they wrap a single reader in which
     // case we delegate
     if (readers.length == 1
         && storedFieldReaders.length == 1
         && readers[0] == storedFieldReaders[0]) {
       cacheHelper = readers[0].getReaderCacheHelper();
     } else {
       cacheHelper = null;
     }
   }

   private static LeafReader[] prepareLeafReaders(
       CompositeReader[] readers, CompositeReader[] storedFieldsReaders) throws IOException {
     if (readers.length == 0) {
       if (storedFieldsReaders.length > 0)
         throw new IllegalArgumentException(
             "There must be at least one main reader if storedFieldsReaders are used.");
       return new LeafReader[0];
     } else {
       final List<? extends LeafReaderContext> firstLeaves = readers[0].leaves();

       // check compatibility:
       final int maxDoc = readers[0].maxDoc(), noLeaves = firstLeaves.size();
       final int[] leafMaxDoc = new int[noLeaves];
       for (int i = 0; i < noLeaves; i++) {
         final LeafReader r = firstLeaves.get(i).reader();
         leafMaxDoc[i] = r.maxDoc();
       }
       validate(readers, maxDoc, leafMaxDoc);
       validate(storedFieldsReaders, maxDoc, leafMaxDoc);

       // flatten structure of each Composite to just LeafReader[]
       // and combine parallel structure with ParallelLeafReaders:
       final LeafReader[] wrappedLeaves = new LeafReader[noLeaves];
       for (int i = 0; i < wrappedLeaves.length; i++) {
         final LeafReader[] subs = new LeafReader[readers.length];
         for (int j = 0; j < readers.length; j++) {
           subs[j] = readers[j].leaves().get(i).reader();
         }
         final LeafReader[] storedSubs = new LeafReader[storedFieldsReaders.length];
         for (int j = 0; j < storedFieldsReaders.length; j++) {
           storedSubs[j] = storedFieldsReaders[j].leaves().get(i).reader();
         }
         // We pass true for closeSubs and we prevent touching of subreaders in doClose():
         // By this the synthetic throw-away readers used here are completely invisible to
         // ref-counting
         wrappedLeaves[i] =
             new ParallelLeafReader(true, subs, storedSubs) {
               @Override
               protected void doClose() {}
             };
       }
       return wrappedLeaves;
     }
   }

   private static void validate(CompositeReader[] readers, int maxDoc, int[] leafMaxDoc) {
     for (int i = 0; i < readers.length; i++) {
       final CompositeReader reader = readers[i];
       final List<? extends LeafReaderContext> subs = reader.leaves();
       if (reader.maxDoc() != maxDoc) {
         throw new IllegalArgumentException(
             "All readers must have same maxDoc: " + maxDoc + "!=" + reader.maxDoc());
       }
       final int noSubs = subs.size();
       if (noSubs != leafMaxDoc.length) {
         throw new IllegalArgumentException("All readers must have same number of leaf readers");
       }
       for (int subIDX = 0; subIDX < noSubs; subIDX++) {
         final LeafReader r = subs.get(subIDX).reader();
         if (r.maxDoc() != leafMaxDoc[subIDX]) {
           throw new IllegalArgumentException(
               "All leaf readers must have same corresponding subReader maxDoc");
         }
       }
     }
   }

   @Override
   public CacheHelper getReaderCacheHelper() {
     return cacheHelper;
   }

   @Override
   protected synchronized void doClose() throws IOException {
     IOException ioe = null;
     for (final IndexReader reader : completeReaderSet) {
       try {
         if (closeSubReaders) {
           reader.close();
         } else {
           reader.decRef();
         }
       } catch (IOException e) {
         if (ioe == null) ioe = e;
       }
     }
     // throw the first exception
     if (ioe != null) throw ioe;
   }
 }
	/*
	* Licensed to the Apache Software Foundation (ASF) under one or more
	* contributor license agreements. See the NOTICE file distributed with
	* this work for additional information regarding copyright ownership.
	* The ASF licenses this file to You under the Apache License, Version 2.0
	* (the "License"); you may not use this file except in compliance with
	* the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/
	package org.apache.lucene.index;

	import java.io.IOException;
	import java.util.Collections;
	import java.util.IdentityHashMap;
	import java.util.List;
	import java.util.Set;

	/**
	* An {@link CompositeReader} which reads multiple, parallel indexes. Each index added must have the
	* same number of documents, and exactly the same number of leaves (with equal {@code maxDoc}), but
	* typically each contains different fields. Deletions are taken from the first reader. Each
	* document contains the union of the fields of all documents with the same document number. When
	* searching, matches for a query term are from the first index added that has the field.
	*
	* <p>This is useful, e.g., with collections that have large fields which change rarely and small
	* fields that change more frequently. The smaller fields may be re-indexed in a new index and both
	* indexes may be searched together.
	*
	* <p><strong>Warning:</strong> It is up to you to make sure all indexes are created and modified
	* the same way. For example, if you add documents to one index, you need to add the same documents
	* in the same order to the other indexes. <em>Failure to do so will result in undefined
	* behavior</em>. A good strategy to create suitable indexes with {@link IndexWriter} is to use
	* {@link LogDocMergePolicy}, as this one does not reorder documents during merging (like {@code
	* TieredMergePolicy}) and triggers merges by number of documents per segment. If you use different
	* {@link MergePolicy}s it might happen that the segment structure of your index is no longer
	* predictable.
	*/
	public class ParallelCompositeReader extends BaseCompositeReader<LeafReader> {
	private final boolean closeSubReaders;
	private final Set<IndexReader> completeReaderSet =
	Collections.newSetFromMap(new IdentityHashMap<IndexReader, Boolean>());
	private final CacheHelper cacheHelper;

	/**
	* Create a ParallelCompositeReader based on the provided readers; auto-closes the given readers
	* on {@link #close()}.
	*/
	public ParallelCompositeReader(CompositeReader... readers) throws IOException {
	this(true, readers);
	}

	/** Create a ParallelCompositeReader based on the provided readers. */
	public ParallelCompositeReader(boolean closeSubReaders, CompositeReader... readers)
	throws IOException {
	this(closeSubReaders, readers, readers);
	}

	/**
	* Expert: create a ParallelCompositeReader based on the provided readers and storedFieldReaders;
	* when a document is loaded, only storedFieldsReaders will be used.
	*/
	public ParallelCompositeReader(
	boolean closeSubReaders, CompositeReader[] readers, CompositeReader[] storedFieldReaders)
	throws IOException {
	super(prepareLeafReaders(readers, storedFieldReaders));
	this.closeSubReaders = closeSubReaders;
	Collections.addAll(completeReaderSet, readers);
	Collections.addAll(completeReaderSet, storedFieldReaders);
	// update ref-counts (like MultiReader):
	if (!closeSubReaders) {
	for (final IndexReader reader : completeReaderSet) {
	reader.incRef();
	}
	}
	// finally add our own synthetic readers, so we close or decRef them, too (it does not matter
	// what we do)
	completeReaderSet.addAll(getSequentialSubReaders());
	// ParallelReader instances can be short-lived, which would make caching trappy
	// so we do not cache on them, unless they wrap a single reader in which
	// case we delegate
	if (readers.length == 1
	&& storedFieldReaders.length == 1
	&& readers[0] == storedFieldReaders[0]) {
	cacheHelper = readers[0].getReaderCacheHelper();
	} else {
	cacheHelper = null;
	}
	}

	private static LeafReader[] prepareLeafReaders(
	CompositeReader[] readers, CompositeReader[] storedFieldsReaders) throws IOException {
	if (readers.length == 0) {
	if (storedFieldsReaders.length > 0)
	throw new IllegalArgumentException(
	"There must be at least one main reader if storedFieldsReaders are used.");
	return new LeafReader[0];
	} else {
	final List<? extends LeafReaderContext> firstLeaves = readers[0].leaves();

	// check compatibility:
	final int maxDoc = readers[0].maxDoc(), noLeaves = firstLeaves.size();
	final int[] leafMaxDoc = new int[noLeaves];
	for (int i = 0; i < noLeaves; i++) {
	final LeafReader r = firstLeaves.get(i).reader();
	leafMaxDoc[i] = r.maxDoc();
	}
	validate(readers, maxDoc, leafMaxDoc);
	validate(storedFieldsReaders, maxDoc, leafMaxDoc);

	// flatten structure of each Composite to just LeafReader[]
	// and combine parallel structure with ParallelLeafReaders:
	final LeafReader[] wrappedLeaves = new LeafReader[noLeaves];
	for (int i = 0; i < wrappedLeaves.length; i++) {
	final LeafReader[] subs = new LeafReader[readers.length];
	for (int j = 0; j < readers.length; j++) {
	subs[j] = readers[j].leaves().get(i).reader();
	}
	final LeafReader[] storedSubs = new LeafReader[storedFieldsReaders.length];
	for (int j = 0; j < storedFieldsReaders.length; j++) {
	storedSubs[j] = storedFieldsReaders[j].leaves().get(i).reader();
	}
	// We pass true for closeSubs and we prevent touching of subreaders in doClose():
	// By this the synthetic throw-away readers used here are completely invisible to
	// ref-counting
	wrappedLeaves[i] =
	new ParallelLeafReader(true, subs, storedSubs) {
	@Override
	protected void doClose() {}
	};
	}
	return wrappedLeaves;
	}
	}

	private static void validate(CompositeReader[] readers, int maxDoc, int[] leafMaxDoc) {
	for (int i = 0; i < readers.length; i++) {
	final CompositeReader reader = readers[i];
	final List<? extends LeafReaderContext> subs = reader.leaves();
	if (reader.maxDoc() != maxDoc) {
	throw new IllegalArgumentException(
	"All readers must have same maxDoc: " + maxDoc + "!=" + reader.maxDoc());
	}
	final int noSubs = subs.size();
	if (noSubs != leafMaxDoc.length) {
	throw new IllegalArgumentException("All readers must have same number of leaf readers");
	}
	for (int subIDX = 0; subIDX < noSubs; subIDX++) {
	final LeafReader r = subs.get(subIDX).reader();
	if (r.maxDoc() != leafMaxDoc[subIDX]) {
	throw new IllegalArgumentException(
	"All leaf readers must have same corresponding subReader maxDoc");
	}
	}
	}
	}

	@Override
	public CacheHelper getReaderCacheHelper() {
	return cacheHelper;
	}

	@Override
	protected synchronized void doClose() throws IOException {
	IOException ioe = null;
	for (final IndexReader reader : completeReaderSet) {
	try {
	if (closeSubReaders) {
	reader.close();
	} else {
	reader.decRef();
	}
	} catch (IOException e) {
	if (ioe == null) ioe = e;
	}
	}
	// throw the first exception
	if (ioe != null) throw ioe;
	}
	}