| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| package org.apache.lucene.index; |
| |
| import java.io.IOException; |
| |
| import org.apache.lucene.util.Bits; |
| |
| /** {@code LeafReader} is an abstract class, providing an interface for accessing an |
| index. Search of an index is done entirely through this abstract interface, |
| so that any subclass which implements it is searchable. IndexReaders implemented |
| by this subclass do not consist of several sub-readers, |
| they are atomic. They support retrieval of stored fields, doc values, terms, |
| and postings. |
| |
| <p>For efficiency, in this API documents are often referred to via |
| <i>document numbers</i>, non-negative integers which each name a unique |
| document in the index. These document numbers are ephemeral -- they may change |
| as documents are added to and deleted from an index. Clients should thus not |
| rely on a given document having the same number between sessions. |
| |
| <p> |
| <a name="thread-safety"></a><p><b>NOTE</b>: {@link |
| IndexReader} instances are completely thread |
| safe, meaning multiple threads can call any of its methods, |
| concurrently. If your application requires external |
| synchronization, you should <b>not</b> synchronize on the |
| <code>IndexReader</code> instance; use your own |
| (non-Lucene) objects instead. |
| */ |
| public abstract class LeafReader extends IndexReader { |
| |
| private final LeafReaderContext readerContext = new LeafReaderContext(this); |
| |
| /** Sole constructor. (For invocation by subclass |
| * constructors, typically implicit.) */ |
| protected LeafReader() { |
| super(); |
| } |
| |
| @Override |
| public final LeafReaderContext getContext() { |
| ensureOpen(); |
| return readerContext; |
| } |
| |
| /** |
| * Optional method: Return a {@link IndexReader.CacheHelper} that can be used to cache |
| * based on the content of this leaf regardless of deletions. Two readers |
| * that have the same data but different sets of deleted documents or doc |
| * values updates may be considered equal. Consider using |
| * {@link #getReaderCacheHelper} if you need deletions or dv updates to be |
| * taken into account. |
| * <p>A return value of {@code null} indicates that this reader is not suited |
| * for caching, which is typically the case for short-lived wrappers that |
| * alter the content of the wrapped leaf reader. |
| * @lucene.experimental |
| */ |
| public abstract CacheHelper getCoreCacheHelper(); |
| |
| @Override |
| public final int docFreq(Term term) throws IOException { |
| final Terms terms = terms(term.field()); |
| if (terms == null) { |
| return 0; |
| } |
| final TermsEnum termsEnum = terms.iterator(); |
| if (termsEnum.seekExact(term.bytes())) { |
| return termsEnum.docFreq(); |
| } else { |
| return 0; |
| } |
| } |
| |
| /** Returns the number of documents containing the term |
| * <code>t</code>. This method returns 0 if the term or |
| * field does not exists. This method does not take into |
| * account deleted documents that have not yet been merged |
| * away. */ |
| @Override |
| public final long totalTermFreq(Term term) throws IOException { |
| final Terms terms = terms(term.field()); |
| if (terms == null) { |
| return 0; |
| } |
| final TermsEnum termsEnum = terms.iterator(); |
| if (termsEnum.seekExact(term.bytes())) { |
| return termsEnum.totalTermFreq(); |
| } else { |
| return 0; |
| } |
| } |
| |
| @Override |
| public final long getSumDocFreq(String field) throws IOException { |
| final Terms terms = terms(field); |
| if (terms == null) { |
| return 0; |
| } |
| return terms.getSumDocFreq(); |
| } |
| |
| @Override |
| public final int getDocCount(String field) throws IOException { |
| final Terms terms = terms(field); |
| if (terms == null) { |
| return 0; |
| } |
| return terms.getDocCount(); |
| } |
| |
| @Override |
| public final long getSumTotalTermFreq(String field) throws IOException { |
| final Terms terms = terms(field); |
| if (terms == null) { |
| return 0; |
| } |
| return terms.getSumTotalTermFreq(); |
| } |
| |
| /** Returns the {@link Terms} index for this field, or null if it has none. */ |
| public abstract Terms terms(String field) throws IOException; |
| |
| /** Returns {@link PostingsEnum} for the specified term. |
| * This will return null if either the field or |
| * term does not exist. |
| * <p><b>NOTE:</b> The returned {@link PostingsEnum} may contain deleted docs. |
| * @see TermsEnum#postings(PostingsEnum) */ |
| public final PostingsEnum postings(Term term, int flags) throws IOException { |
| assert term.field() != null; |
| assert term.bytes() != null; |
| final Terms terms = terms(term.field()); |
| if (terms != null) { |
| final TermsEnum termsEnum = terms.iterator(); |
| if (termsEnum.seekExact(term.bytes())) { |
| return termsEnum.postings(null, flags); |
| } |
| } |
| return null; |
| } |
| |
| /** Returns {@link PostingsEnum} for the specified term |
| * with {@link PostingsEnum#FREQS}. |
| * <p> |
| * Use this method if you only require documents and frequencies, |
| * and do not need any proximity data. |
| * This method is equivalent to |
| * {@link #postings(Term, int) postings(term, PostingsEnum.FREQS)} |
| * <p><b>NOTE:</b> The returned {@link PostingsEnum} may contain deleted docs. |
| * @see #postings(Term, int) |
| */ |
| public final PostingsEnum postings(Term term) throws IOException { |
| return postings(term, PostingsEnum.FREQS); |
| } |
| |
| /** Returns {@link NumericDocValues} for this field, or |
| * null if no numeric doc values were indexed for |
| * this field. The returned instance should only be |
| * used by a single thread. */ |
| public abstract NumericDocValues getNumericDocValues(String field) throws IOException; |
| |
| /** Returns {@link BinaryDocValues} for this field, or |
| * null if no binary doc values were indexed for |
| * this field. The returned instance should only be |
| * used by a single thread. */ |
| public abstract BinaryDocValues getBinaryDocValues(String field) throws IOException; |
| |
| /** Returns {@link SortedDocValues} for this field, or |
| * null if no {@link SortedDocValues} were indexed for |
| * this field. The returned instance should only be |
| * used by a single thread. */ |
| public abstract SortedDocValues getSortedDocValues(String field) throws IOException; |
| |
| /** Returns {@link SortedNumericDocValues} for this field, or |
| * null if no {@link SortedNumericDocValues} were indexed for |
| * this field. The returned instance should only be |
| * used by a single thread. */ |
| public abstract SortedNumericDocValues getSortedNumericDocValues(String field) throws IOException; |
| |
| /** Returns {@link SortedSetDocValues} for this field, or |
| * null if no {@link SortedSetDocValues} were indexed for |
| * this field. The returned instance should only be |
| * used by a single thread. */ |
| public abstract SortedSetDocValues getSortedSetDocValues(String field) throws IOException; |
| |
| /** Returns {@link NumericDocValues} representing norms |
| * for this field, or null if no {@link NumericDocValues} |
| * were indexed. The returned instance should only be |
| * used by a single thread. */ |
| public abstract NumericDocValues getNormValues(String field) throws IOException; |
| |
| /** |
| * Get the {@link FieldInfos} describing all fields in |
| * this reader. |
| * |
| * Note: Implementations should cache the FieldInfos |
| * instance returned by this method such that subsequent |
| * calls to this method return the same instance. |
| * @lucene.experimental |
| */ |
| public abstract FieldInfos getFieldInfos(); |
| |
| /** Returns the {@link Bits} representing live (not |
| * deleted) docs. A set bit indicates the doc ID has not |
| * been deleted. If this method returns null it means |
| * there are no deleted documents (all documents are |
| * live). |
| * |
| * The returned instance has been safely published for |
| * use by multiple threads without additional |
| * synchronization. |
| */ |
| public abstract Bits getLiveDocs(); |
| |
| /** Returns the {@link PointValues} used for numeric or |
| * spatial searches for the given field, or null if there |
| * are no point fields. */ |
| public abstract PointValues getPointValues(String field) throws IOException; |
| |
| /** |
| * Checks consistency of this reader. |
| * <p> |
| * Note that this may be costly in terms of I/O, e.g. |
| * may involve computing a checksum value against large data files. |
| * @lucene.internal |
| */ |
| public abstract void checkIntegrity() throws IOException; |
| |
| /** |
| * Return metadata about this leaf. |
| * @lucene.experimental */ |
| public abstract LeafMetaData getMetaData(); |
| } |