| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| package org.apache.lucene.index; |
| |
| import java.io.IOException; |
| import org.apache.lucene.util.AttributeSource; |
| import org.apache.lucene.util.BytesRef; |
| import org.apache.lucene.util.BytesRefIterator; |
| |
| /** |
| * Iterator to seek ({@link #seekCeil(BytesRef)}, {@link #seekExact(BytesRef)}) or step through |
| * ({@link #next} terms to obtain frequency information ({@link #docFreq}), {@link PostingsEnum} or |
| * {@link PostingsEnum} for the current term ({@link #postings}. |
| * |
| * <p>Term enumerations are always ordered by BytesRef.compareTo, which is Unicode sort order if the |
| * terms are UTF-8 bytes. Each term in the enumeration is greater than the one before it. |
| * |
| * <p>The TermsEnum is unpositioned when you first obtain it and you must first successfully call |
| * {@link #next} or one of the <code>seek</code> methods. |
| * |
| * @lucene.experimental |
| */ |
| public abstract class TermsEnum implements BytesRefIterator { |
| |
| /** Sole constructor. (For invocation by subclass constructors, typically implicit.) */ |
| protected TermsEnum() {} |
| |
| /** Returns the related attributes. */ |
| public abstract AttributeSource attributes(); |
| |
| /** Represents returned result from {@link #seekCeil}. */ |
| public enum SeekStatus { |
| /** The term was not found, and the end of iteration was hit. */ |
| END, |
| /** The precise term was found. */ |
| FOUND, |
| /** A different term was found after the requested term */ |
| NOT_FOUND |
| }; |
| |
| /** |
| * Attempts to seek to the exact term, returning true if the term is found. If this returns false, |
| * the enum is unpositioned. For some codecs, seekExact may be substantially faster than {@link |
| * #seekCeil}. |
| * |
| * <p> |
| * |
| * @return true if the term is found; return false if the enum is unpositioned. |
| */ |
| public abstract boolean seekExact(BytesRef text) throws IOException; |
| |
| /** |
| * Seeks to the specified term, if it exists, or to the next (ceiling) term. Returns SeekStatus to |
| * indicate whether exact term was found, a different term was found, or EOF was hit. The target |
| * term may be before or after the current term. If this returns SeekStatus.END, the enum is |
| * unpositioned. |
| */ |
| public abstract SeekStatus seekCeil(BytesRef text) throws IOException; |
| |
| /** |
| * Seeks to the specified term by ordinal (position) as previously returned by {@link #ord}. The |
| * target ord may be before or after the current ord, and must be within bounds. |
| */ |
| public abstract void seekExact(long ord) throws IOException; |
| |
| /** |
| * Expert: Seeks a specific position by {@link TermState} previously obtained from {@link |
| * #termState()}. Callers should maintain the {@link TermState} to use this method. Low-level |
| * implementations may position the TermsEnum without re-seeking the term dictionary. |
| * |
| * <p>Seeking by {@link TermState} should only be used iff the state was obtained from the same |
| * {@link TermsEnum} instance. |
| * |
| * <p>NOTE: Using this method with an incompatible {@link TermState} might leave this {@link |
| * TermsEnum} in undefined state. On a segment level {@link TermState} instances are compatible |
| * only iff the source and the target {@link TermsEnum} operate on the same field. If operating on |
| * segment level, TermState instances must not be used across segments. |
| * |
| * <p>NOTE: A seek by {@link TermState} might not restore the {@link AttributeSource}'s state. |
| * {@link AttributeSource} states must be maintained separately if this method is used. |
| * |
| * @param term the term the TermState corresponds to |
| * @param state the {@link TermState} |
| */ |
| public abstract void seekExact(BytesRef term, TermState state) throws IOException; |
| |
| /** Returns current term. Do not call this when the enum is unpositioned. */ |
| public abstract BytesRef term() throws IOException; |
| |
| /** |
| * Returns ordinal position for current term. This is an optional method (the codec may throw |
| * {@link UnsupportedOperationException}). Do not call this when the enum is unpositioned. |
| */ |
| public abstract long ord() throws IOException; |
| |
| /** |
| * Returns the number of documents containing the current term. Do not call this when the enum is |
| * unpositioned. {@link SeekStatus#END}. |
| */ |
| public abstract int docFreq() throws IOException; |
| |
| /** |
| * Returns the total number of occurrences of this term across all documents (the sum of the |
| * freq() for each doc that has this term). Note that, like other term measures, this measure does |
| * not take deleted documents into account. |
| */ |
| public abstract long totalTermFreq() throws IOException; |
| |
| /** |
| * Get {@link PostingsEnum} for the current term. Do not call this when the enum is unpositioned. |
| * This method will not return null. |
| * |
| * <p><b>NOTE</b>: the returned iterator may return deleted documents, so deleted documents have |
| * to be checked on top of the {@link PostingsEnum}. |
| * |
| * <p>Use this method if you only require documents and frequencies, and do not need any proximity |
| * data. This method is equivalent to {@link #postings(PostingsEnum, int) postings(reuse, |
| * PostingsEnum.FREQS)} |
| * |
| * @param reuse pass a prior PostingsEnum for possible reuse |
| * @see #postings(PostingsEnum, int) |
| */ |
| public final PostingsEnum postings(PostingsEnum reuse) throws IOException { |
| return postings(reuse, PostingsEnum.FREQS); |
| } |
| |
| /** |
| * Get {@link PostingsEnum} for the current term, with control over whether freqs, positions, |
| * offsets or payloads are required. Do not call this when the enum is unpositioned. This method |
| * will not return null. |
| * |
| * <p><b>NOTE</b>: the returned iterator may return deleted documents, so deleted documents have |
| * to be checked on top of the {@link PostingsEnum}. |
| * |
| * @param reuse pass a prior PostingsEnum for possible reuse |
| * @param flags specifies which optional per-document values you require; see {@link |
| * PostingsEnum#FREQS} |
| */ |
| public abstract PostingsEnum postings(PostingsEnum reuse, int flags) throws IOException; |
| |
| /** |
| * Return a {@link ImpactsEnum}. |
| * |
| * @see #postings(PostingsEnum, int) |
| */ |
| public abstract ImpactsEnum impacts(int flags) throws IOException; |
| |
| /** |
| * Expert: Returns the TermsEnums internal state to position the TermsEnum without re-seeking the |
| * term dictionary. |
| * |
| * <p>NOTE: A seek by {@link TermState} might not capture the {@link AttributeSource}'s state. |
| * Callers must maintain the {@link AttributeSource} states separately |
| * |
| * @see TermState |
| * @see #seekExact(BytesRef, TermState) |
| */ |
| public abstract TermState termState() throws IOException; |
| |
| /** |
| * An empty TermsEnum for quickly returning an empty instance e.g. in {@link |
| * org.apache.lucene.search.MultiTermQuery} |
| * |
| * <p><em>Please note:</em> This enum should be unmodifiable, but it is currently possible to add |
| * Attributes to it. This should not be a problem, as the enum is always empty and the existence |
| * of unused Attributes does not matter. |
| */ |
| public static final TermsEnum EMPTY = |
| new TermsEnum() { |
| |
| private AttributeSource atts = null; |
| |
| @Override |
| public SeekStatus seekCeil(BytesRef term) { |
| return SeekStatus.END; |
| } |
| |
| @Override |
| public void seekExact(long ord) {} |
| |
| @Override |
| public BytesRef term() { |
| throw new IllegalStateException("this method should never be called"); |
| } |
| |
| @Override |
| public int docFreq() { |
| throw new IllegalStateException("this method should never be called"); |
| } |
| |
| @Override |
| public long totalTermFreq() { |
| throw new IllegalStateException("this method should never be called"); |
| } |
| |
| @Override |
| public long ord() { |
| throw new IllegalStateException("this method should never be called"); |
| } |
| |
| @Override |
| public PostingsEnum postings(PostingsEnum reuse, int flags) { |
| throw new IllegalStateException("this method should never be called"); |
| } |
| |
| @Override |
| public ImpactsEnum impacts(int flags) throws IOException { |
| throw new IllegalStateException("this method should never be called"); |
| } |
| |
| @Override |
| public BytesRef next() { |
| return null; |
| } |
| |
| @Override // make it synchronized here, to prevent double lazy init |
| public synchronized AttributeSource attributes() { |
| if (atts == null) { |
| atts = new AttributeSource(); |
| } |
| return atts; |
| } |
| |
| @Override |
| public boolean seekExact(BytesRef text) throws IOException { |
| return seekCeil(text) == SeekStatus.FOUND; |
| } |
| |
| @Override |
| public TermState termState() { |
| throw new IllegalStateException("this method should never be called"); |
| } |
| |
| @Override |
| public void seekExact(BytesRef term, TermState state) { |
| throw new IllegalStateException("this method should never be called"); |
| } |
| }; |
| } |