lucene/core/src/java/org/apache/lucene/index/BaseCompositeReader.java - lucene-solr - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
  * The ASF licenses this file to You under the Apache License, Version 2.0
  * (the "License"); you may not use this file except in compliance with
  * the License.  You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
 package org.apache.lucene.index;


 import java.io.IOException;
 import java.util.Arrays;
 import java.util.Collections;
 import java.util.Comparator;
 import java.util.List;

 /** Base class for implementing {@link CompositeReader}s based on an array
  * of sub-readers. The implementing class has to add code for
  * correctly refcounting and closing the sub-readers.
  *
  * <p>User code will most likely use {@link MultiReader} to build a
  * composite reader on a set of sub-readers (like several
  * {@link DirectoryReader}s).
  *
  * <p> For efficiency, in this API documents are often referred to via
  * <i>document numbers</i>, non-negative integers which each name a unique
  * document in the index.  These document numbers are ephemeral -- they may change
  * as documents are added to and deleted from an index.  Clients should thus not
  * rely on a given document having the same number between sessions.
  *
  * <p><a name="thread-safety"></a><p><b>NOTE</b>: {@link
  * IndexReader} instances are completely thread
  * safe, meaning multiple threads can call any of its methods,
  * concurrently.  If your application requires external
  * synchronization, you should <b>not</b> synchronize on the
  * <code>IndexReader</code> instance; use your own
  * (non-Lucene) objects instead.
  * @see MultiReader
  * @lucene.internal
  */
 public abstract class BaseCompositeReader<R extends IndexReader> extends CompositeReader {
   private final R[] subReaders;
   /** A comparator for sorting sub-readers */
   protected final Comparator<R> subReadersSorter;
   private final int[] starts;       // 1st docno for each reader
   private final int maxDoc;
   private int numDocs = -1;         // computed lazily

   /** List view solely for {@link #getSequentialSubReaders()},
    * for effectiveness the array is used internally. */
   private final List<R> subReadersList;

   /**
    * Constructs a {@code BaseCompositeReader} on the given subReaders.
    * @param subReaders the wrapped sub-readers. This array is returned by
    * {@link #getSequentialSubReaders} and used to resolve the correct
    * subreader for docID-based methods. <b>Please note:</b> This array is <b>not</b>
    * cloned and not protected for modification, the subclass is responsible
    * to do this.
    * @param subReadersSorter – a comparator for sorting sub readers. If not {@code null}, this
    * comparator is used to sort sub readers, before using the for resolving doc IDs.
    */
   protected BaseCompositeReader(R[] subReaders, Comparator<R> subReadersSorter) throws IOException {
     if (subReadersSorter != null) {
       Arrays.sort(subReaders, subReadersSorter);
     }
     this.subReaders = subReaders;
     this.subReadersSorter = subReadersSorter;
     this.subReadersList = Collections.unmodifiableList(Arrays.asList(subReaders));
     starts = new int[subReaders.length + 1];    // build starts array
     long maxDoc = 0;
     for (int i = 0; i < subReaders.length; i++) {
       starts[i] = (int) maxDoc;
       final IndexReader r = subReaders[i];
       maxDoc += r.maxDoc();      // compute maxDocs
       r.registerParentReader(this);
     }

     if (maxDoc > IndexWriter.getActualMaxDocs()) {
       if (this instanceof DirectoryReader) {
         // A single index has too many documents and it is corrupt (IndexWriter prevents this as of LUCENE-6299)
         throw new CorruptIndexException("Too many documents: an index cannot exceed " + IndexWriter.getActualMaxDocs() + " but readers have total maxDoc=" + maxDoc, Arrays.toString(subReaders));
       } else {
         // Caller is building a MultiReader and it has too many documents; this case is just illegal arguments:
         throw new IllegalArgumentException("Too many documents: composite IndexReaders cannot exceed " + IndexWriter.getActualMaxDocs() + " but readers have total maxDoc=" + maxDoc);
       }
     }

     this.maxDoc = Math.toIntExact(maxDoc);
     starts[subReaders.length] = this.maxDoc;
   }

   @Override
   public final Fields getTermVectors(int docID) throws IOException {
     ensureOpen();
     final int i = readerIndex(docID);        // find subreader num
     return subReaders[i].getTermVectors(docID - starts[i]); // dispatch to subreader
   }

   @Override
   public final int numDocs() {
     // Don't call ensureOpen() here (it could affect performance)
     // We want to compute numDocs() lazily so that creating a wrapper that hides
     // some documents isn't slow at wrapping time, but on the first time that
     // numDocs() is called. This can help as there are lots of use-cases of a
     // reader that don't involve calling numDocs().
     // However it's not crucial to make sure that we don't call numDocs() more
     // than once on the sub readers, since they likely cache numDocs() anyway,
     // hence the lack of synchronization.
     int numDocs = this.numDocs;
     if (numDocs == -1) {
       numDocs = 0;
       for (IndexReader r : subReaders) {
         numDocs += r.numDocs();
       }
       assert numDocs >= 0;
       this.numDocs = numDocs;
     }
     return numDocs;
   }

   @Override
   public final int maxDoc() {
     // Don't call ensureOpen() here (it could affect performance)
     return maxDoc;
   }

   @Override
   public final void document(int docID, StoredFieldVisitor visitor) throws IOException {
     ensureOpen();
     final int i = readerIndex(docID);                          // find subreader num
     subReaders[i].document(docID - starts[i], visitor);    // dispatch to subreader
   }

   @Override
   public final int docFreq(Term term) throws IOException {
     ensureOpen();
     int total = 0;          // sum freqs in subreaders
     for (int i = 0; i < subReaders.length; i++) {
       int sub = subReaders[i].docFreq(term);
       assert sub >= 0;
       assert sub <= subReaders[i].getDocCount(term.field());
       total += sub;
     }
     return total;
   }

   @Override
   public final long totalTermFreq(Term term) throws IOException {
     ensureOpen();
     long total = 0;        // sum freqs in subreaders
     for (int i = 0; i < subReaders.length; i++) {
       long sub = subReaders[i].totalTermFreq(term);
       assert sub >= 0;
       assert sub <= subReaders[i].getSumTotalTermFreq(term.field());
       total += sub;
     }
     return total;
   }

   @Override
   public final long getSumDocFreq(String field) throws IOException {
     ensureOpen();
     long total = 0; // sum doc freqs in subreaders
     for (R reader : subReaders) {
       long sub = reader.getSumDocFreq(field);
       assert sub >= 0;
       assert sub <= reader.getSumTotalTermFreq(field);
       total += sub;
     }
     return total;
   }

   @Override
   public final int getDocCount(String field) throws IOException {
     ensureOpen();
     int total = 0; // sum doc counts in subreaders
     for (R reader : subReaders) {
       int sub = reader.getDocCount(field);
       assert sub >= 0;
       assert sub <= reader.maxDoc();
       total += sub;
     }
     return total;
   }

   @Override
   public final long getSumTotalTermFreq(String field) throws IOException {
     ensureOpen();
     long total = 0; // sum doc total term freqs in subreaders
     for (R reader : subReaders) {
       long sub = reader.getSumTotalTermFreq(field);
       assert sub >= 0;
       assert sub >= reader.getSumDocFreq(field);
       total += sub;
     }
     return total;
   }

   /** Helper method for subclasses to get the corresponding reader for a doc ID */
   protected final int readerIndex(int docID) {
     if (docID < 0 || docID >= maxDoc) {
       throw new IllegalArgumentException("docID must be >= 0 and < maxDoc=" + maxDoc + " (got docID=" + docID + ")");
     }
     return ReaderUtil.subIndex(docID, this.starts);
   }

   /** Helper method for subclasses to get the docBase of the given sub-reader index. */
   protected final int readerBase(int readerIndex) {
     if (readerIndex < 0 || readerIndex >= subReaders.length) {
       throw new IllegalArgumentException("readerIndex must be >= 0 and < getSequentialSubReaders().size()");
     }
     return this.starts[readerIndex];
   }

   @Override
   protected final List<? extends R> getSequentialSubReaders() {
     return subReadersList;
   }
 }
	/*
	* Licensed to the Apache Software Foundation (ASF) under one or more
	* contributor license agreements. See the NOTICE file distributed with
	* this work for additional information regarding copyright ownership.
	* The ASF licenses this file to You under the Apache License, Version 2.0
	* (the "License"); you may not use this file except in compliance with
	* the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/
	package org.apache.lucene.index;


	import java.io.IOException;
	import java.util.Arrays;
	import java.util.Collections;
	import java.util.Comparator;
	import java.util.List;

	/** Base class for implementing {@link CompositeReader}s based on an array
	* of sub-readers. The implementing class has to add code for
	* correctly refcounting and closing the sub-readers.
	*
	* <p>User code will most likely use {@link MultiReader} to build a
	* composite reader on a set of sub-readers (like several
	* {@link DirectoryReader}s).
	*
	* <p> For efficiency, in this API documents are often referred to via
	* <i>document numbers</i>, non-negative integers which each name a unique
	* document in the index. These document numbers are ephemeral -- they may change
	* as documents are added to and deleted from an index. Clients should thus not
	* rely on a given document having the same number between sessions.
	*
	* <p><a name="thread-safety"></a><p><b>NOTE</b>: {@link
	* IndexReader} instances are completely thread
	* safe, meaning multiple threads can call any of its methods,
	* concurrently. If your application requires external
	* synchronization, you should <b>not</b> synchronize on the
	* <code>IndexReader</code> instance; use your own
	* (non-Lucene) objects instead.
	* @see MultiReader
	* @lucene.internal
	*/
	public abstract class BaseCompositeReader<R extends IndexReader> extends CompositeReader {
	private final R[] subReaders;
	/** A comparator for sorting sub-readers */
	protected final Comparator<R> subReadersSorter;
	private final int[] starts; // 1st docno for each reader
	private final int maxDoc;
	private int numDocs = -1; // computed lazily

	/** List view solely for {@link #getSequentialSubReaders()},
	* for effectiveness the array is used internally. */
	private final List<R> subReadersList;

	/**
	* Constructs a {@code BaseCompositeReader} on the given subReaders.
	* @param subReaders the wrapped sub-readers. This array is returned by
	* {@link #getSequentialSubReaders} and used to resolve the correct
	* subreader for docID-based methods. <b>Please note:</b> This array is <b>not</b>
	* cloned and not protected for modification, the subclass is responsible
	* to do this.
	* @param subReadersSorter – a comparator for sorting sub readers. If not {@code null}, this
	* comparator is used to sort sub readers, before using the for resolving doc IDs.
	*/
	protected BaseCompositeReader(R[] subReaders, Comparator<R> subReadersSorter) throws IOException {
	if (subReadersSorter != null) {
	Arrays.sort(subReaders, subReadersSorter);
	}
	this.subReaders = subReaders;
	this.subReadersSorter = subReadersSorter;
	this.subReadersList = Collections.unmodifiableList(Arrays.asList(subReaders));
	starts = new int[subReaders.length + 1]; // build starts array
	long maxDoc = 0;
	for (int i = 0; i < subReaders.length; i++) {
	starts[i] = (int) maxDoc;
	final IndexReader r = subReaders[i];
	maxDoc += r.maxDoc(); // compute maxDocs
	r.registerParentReader(this);
	}

	if (maxDoc > IndexWriter.getActualMaxDocs()) {
	if (this instanceof DirectoryReader) {
	// A single index has too many documents and it is corrupt (IndexWriter prevents this as of LUCENE-6299)
	throw new CorruptIndexException("Too many documents: an index cannot exceed " + IndexWriter.getActualMaxDocs() + " but readers have total maxDoc=" + maxDoc, Arrays.toString(subReaders));
	} else {
	// Caller is building a MultiReader and it has too many documents; this case is just illegal arguments:
	throw new IllegalArgumentException("Too many documents: composite IndexReaders cannot exceed " + IndexWriter.getActualMaxDocs() + " but readers have total maxDoc=" + maxDoc);
	}
	}

	this.maxDoc = Math.toIntExact(maxDoc);
	starts[subReaders.length] = this.maxDoc;
	}

	@Override
	public final Fields getTermVectors(int docID) throws IOException {
	ensureOpen();
	final int i = readerIndex(docID); // find subreader num
	return subReaders[i].getTermVectors(docID - starts[i]); // dispatch to subreader
	}

	@Override
	public final int numDocs() {
	// Don't call ensureOpen() here (it could affect performance)
	// We want to compute numDocs() lazily so that creating a wrapper that hides
	// some documents isn't slow at wrapping time, but on the first time that
	// numDocs() is called. This can help as there are lots of use-cases of a
	// reader that don't involve calling numDocs().
	// However it's not crucial to make sure that we don't call numDocs() more
	// than once on the sub readers, since they likely cache numDocs() anyway,
	// hence the lack of synchronization.
	int numDocs = this.numDocs;
	if (numDocs == -1) {
	numDocs = 0;
	for (IndexReader r : subReaders) {
	numDocs += r.numDocs();
	}
	assert numDocs >= 0;
	this.numDocs = numDocs;
	}
	return numDocs;
	}

	@Override
	public final int maxDoc() {
	// Don't call ensureOpen() here (it could affect performance)
	return maxDoc;
	}

	@Override
	public final void document(int docID, StoredFieldVisitor visitor) throws IOException {
	ensureOpen();
	final int i = readerIndex(docID); // find subreader num
	subReaders[i].document(docID - starts[i], visitor); // dispatch to subreader
	}

	@Override
	public final int docFreq(Term term) throws IOException {
	ensureOpen();
	int total = 0; // sum freqs in subreaders
	for (int i = 0; i < subReaders.length; i++) {
	int sub = subReaders[i].docFreq(term);
	assert sub >= 0;
	assert sub <= subReaders[i].getDocCount(term.field());
	total += sub;
	}
	return total;
	}

	@Override
	public final long totalTermFreq(Term term) throws IOException {
	ensureOpen();
	long total = 0; // sum freqs in subreaders
	for (int i = 0; i < subReaders.length; i++) {
	long sub = subReaders[i].totalTermFreq(term);
	assert sub >= 0;
	assert sub <= subReaders[i].getSumTotalTermFreq(term.field());
	total += sub;
	}
	return total;
	}

	@Override
	public final long getSumDocFreq(String field) throws IOException {
	ensureOpen();
	long total = 0; // sum doc freqs in subreaders
	for (R reader : subReaders) {
	long sub = reader.getSumDocFreq(field);
	assert sub >= 0;
	assert sub <= reader.getSumTotalTermFreq(field);
	total += sub;
	}
	return total;
	}

	@Override
	public final int getDocCount(String field) throws IOException {
	ensureOpen();
	int total = 0; // sum doc counts in subreaders
	for (R reader : subReaders) {
	int sub = reader.getDocCount(field);
	assert sub >= 0;
	assert sub <= reader.maxDoc();
	total += sub;
	}
	return total;
	}

	@Override
	public final long getSumTotalTermFreq(String field) throws IOException {
	ensureOpen();
	long total = 0; // sum doc total term freqs in subreaders
	for (R reader : subReaders) {
	long sub = reader.getSumTotalTermFreq(field);
	assert sub >= 0;
	assert sub >= reader.getSumDocFreq(field);
	total += sub;
	}
	return total;
	}

	/** Helper method for subclasses to get the corresponding reader for a doc ID */
	protected final int readerIndex(int docID) {
	if (docID < 0 \|\| docID >= maxDoc) {
	throw new IllegalArgumentException("docID must be >= 0 and < maxDoc=" + maxDoc + " (got docID=" + docID + ")");
	}
	return ReaderUtil.subIndex(docID, this.starts);
	}

	/** Helper method for subclasses to get the docBase of the given sub-reader index. */
	protected final int readerBase(int readerIndex) {
	if (readerIndex < 0 \|\| readerIndex >= subReaders.length) {
	throw new IllegalArgumentException("readerIndex must be >= 0 and < getSequentialSubReaders().size()");
	}
	return this.starts[readerIndex];
	}

	@Override
	protected final List<? extends R> getSequentialSubReaders() {
	return subReadersList;
	}
	}