| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| package org.apache.lucene.index; |
| |
| import java.io.IOException; |
| import org.apache.lucene.util.Bits; |
| |
| /** |
| * {@code LeafReader} is an abstract class, providing an interface for accessing an index. Search of |
| * an index is done entirely through this abstract interface, so that any subclass which implements |
| * it is searchable. IndexReaders implemented by this subclass do not consist of several |
| * sub-readers, they are atomic. They support retrieval of stored fields, doc values, terms, and |
| * postings. |
| * |
| * <p>For efficiency, in this API documents are often referred to via <i>document numbers</i>, |
| * non-negative integers which each name a unique document in the index. These document numbers are |
| * ephemeral -- they may change as documents are added to and deleted from an index. Clients should |
| * thus not rely on a given document having the same number between sessions. |
| * |
| * <p><a id="thread-safety"></a> |
| * |
| * <p><b>NOTE</b>: {@link IndexReader} instances are completely thread safe, meaning multiple |
| * threads can call any of its methods, concurrently. If your application requires external |
| * synchronization, you should <b>not</b> synchronize on the <code>IndexReader</code> instance; use |
| * your own (non-Lucene) objects instead. |
| */ |
| public abstract class LeafReader extends IndexReader { |
| |
| private final LeafReaderContext readerContext = new LeafReaderContext(this); |
| |
| /** Sole constructor. (For invocation by subclass constructors, typically implicit.) */ |
| protected LeafReader() { |
| super(); |
| } |
| |
| @Override |
| public final LeafReaderContext getContext() { |
| ensureOpen(); |
| return readerContext; |
| } |
| |
| /** |
| * Optional method: Return a {@link IndexReader.CacheHelper} that can be used to cache based on |
| * the content of this leaf regardless of deletions. Two readers that have the same data but |
| * different sets of deleted documents or doc values updates may be considered equal. Consider |
| * using {@link #getReaderCacheHelper} if you need deletions or dv updates to be taken into |
| * account. |
| * |
| * <p>A return value of {@code null} indicates that this reader is not suited for caching, which |
| * is typically the case for short-lived wrappers that alter the content of the wrapped leaf |
| * reader. |
| * |
| * @lucene.experimental |
| */ |
| public abstract CacheHelper getCoreCacheHelper(); |
| |
| @Override |
| public final int docFreq(Term term) throws IOException { |
| final Terms terms = terms(term.field()); |
| if (terms == null) { |
| return 0; |
| } |
| final TermsEnum termsEnum = terms.iterator(); |
| if (termsEnum.seekExact(term.bytes())) { |
| return termsEnum.docFreq(); |
| } else { |
| return 0; |
| } |
| } |
| |
| /** |
| * Returns the number of documents containing the term <code>t</code>. This method returns 0 if |
| * the term or field does not exists. This method does not take into account deleted documents |
| * that have not yet been merged away. |
| */ |
| @Override |
| public final long totalTermFreq(Term term) throws IOException { |
| final Terms terms = terms(term.field()); |
| if (terms == null) { |
| return 0; |
| } |
| final TermsEnum termsEnum = terms.iterator(); |
| if (termsEnum.seekExact(term.bytes())) { |
| return termsEnum.totalTermFreq(); |
| } else { |
| return 0; |
| } |
| } |
| |
| @Override |
| public final long getSumDocFreq(String field) throws IOException { |
| final Terms terms = terms(field); |
| if (terms == null) { |
| return 0; |
| } |
| return terms.getSumDocFreq(); |
| } |
| |
| @Override |
| public final int getDocCount(String field) throws IOException { |
| final Terms terms = terms(field); |
| if (terms == null) { |
| return 0; |
| } |
| return terms.getDocCount(); |
| } |
| |
| @Override |
| public final long getSumTotalTermFreq(String field) throws IOException { |
| final Terms terms = terms(field); |
| if (terms == null) { |
| return 0; |
| } |
| return terms.getSumTotalTermFreq(); |
| } |
| |
| /** Returns the {@link Terms} index for this field, or null if it has none. */ |
| public abstract Terms terms(String field) throws IOException; |
| |
| /** |
| * Returns {@link PostingsEnum} for the specified term. This will return null if either the field |
| * or term does not exist. |
| * |
| * <p><b>NOTE:</b> The returned {@link PostingsEnum} may contain deleted docs. |
| * |
| * @see TermsEnum#postings(PostingsEnum) |
| */ |
| public final PostingsEnum postings(Term term, int flags) throws IOException { |
| assert term.field() != null; |
| assert term.bytes() != null; |
| final Terms terms = terms(term.field()); |
| if (terms != null) { |
| final TermsEnum termsEnum = terms.iterator(); |
| if (termsEnum.seekExact(term.bytes())) { |
| return termsEnum.postings(null, flags); |
| } |
| } |
| return null; |
| } |
| |
| /** |
| * Returns {@link PostingsEnum} for the specified term with {@link PostingsEnum#FREQS}. |
| * |
| * <p>Use this method if you only require documents and frequencies, and do not need any proximity |
| * data. This method is equivalent to {@link #postings(Term, int) postings(term, |
| * PostingsEnum.FREQS)} |
| * |
| * <p><b>NOTE:</b> The returned {@link PostingsEnum} may contain deleted docs. |
| * |
| * @see #postings(Term, int) |
| */ |
| public final PostingsEnum postings(Term term) throws IOException { |
| return postings(term, PostingsEnum.FREQS); |
| } |
| |
| /** |
| * Returns {@link NumericDocValues} for this field, or null if no numeric doc values were indexed |
| * for this field. The returned instance should only be used by a single thread. |
| */ |
| public abstract NumericDocValues getNumericDocValues(String field) throws IOException; |
| |
| /** |
| * Returns {@link BinaryDocValues} for this field, or null if no binary doc values were indexed |
| * for this field. The returned instance should only be used by a single thread. |
| */ |
| public abstract BinaryDocValues getBinaryDocValues(String field) throws IOException; |
| |
| /** |
| * Returns {@link SortedDocValues} for this field, or null if no {@link SortedDocValues} were |
| * indexed for this field. The returned instance should only be used by a single thread. |
| */ |
| public abstract SortedDocValues getSortedDocValues(String field) throws IOException; |
| |
| /** |
| * Returns {@link SortedNumericDocValues} for this field, or null if no {@link |
| * SortedNumericDocValues} were indexed for this field. The returned instance should only be used |
| * by a single thread. |
| */ |
| public abstract SortedNumericDocValues getSortedNumericDocValues(String field) throws IOException; |
| |
| /** |
| * Returns {@link SortedSetDocValues} for this field, or null if no {@link SortedSetDocValues} |
| * were indexed for this field. The returned instance should only be used by a single thread. |
| */ |
| public abstract SortedSetDocValues getSortedSetDocValues(String field) throws IOException; |
| |
| /** |
| * Returns {@link NumericDocValues} representing norms for this field, or null if no {@link |
| * NumericDocValues} were indexed. The returned instance should only be used by a single thread. |
| */ |
| public abstract NumericDocValues getNormValues(String field) throws IOException; |
| |
| /** |
| * Returns {@link VectorValues} for this field, or null if no {@link VectorValues} were indexed. |
| * The returned instance should only be used by a single thread. |
| */ |
| public abstract VectorValues getVectorValues(String field) throws IOException; |
| |
| /** |
| * Get the {@link FieldInfos} describing all fields in this reader. |
| * |
| * <p>Note: Implementations should cache the FieldInfos instance returned by this method such that |
| * subsequent calls to this method return the same instance. |
| * |
| * @lucene.experimental |
| */ |
| public abstract FieldInfos getFieldInfos(); |
| |
| /** |
| * Returns the {@link Bits} representing live (not deleted) docs. A set bit indicates the doc ID |
| * has not been deleted. If this method returns null it means there are no deleted documents (all |
| * documents are live). |
| * |
| * <p>The returned instance has been safely published for use by multiple threads without |
| * additional synchronization. |
| */ |
| public abstract Bits getLiveDocs(); |
| |
| /** |
| * Returns the {@link PointValues} used for numeric or spatial searches for the given field, or |
| * null if there are no point fields. |
| */ |
| public abstract PointValues getPointValues(String field) throws IOException; |
| |
| /** |
| * Checks consistency of this reader. |
| * |
| * <p>Note that this may be costly in terms of I/O, e.g. may involve computing a checksum value |
| * against large data files. |
| * |
| * @lucene.internal |
| */ |
| public abstract void checkIntegrity() throws IOException; |
| |
| /** |
| * Return metadata about this leaf. |
| * |
| * @lucene.experimental |
| */ |
| public abstract LeafMetaData getMetaData(); |
| } |