| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| package org.apache.lucene.index; |
| |
| |
| import java.io.IOException; |
| |
| import org.apache.lucene.util.AttributeSource; |
| import org.apache.lucene.util.BytesRef; |
| import org.apache.lucene.util.BytesRefIterator; |
| |
| /** Iterator to seek ({@link #seekCeil(BytesRef)}, {@link |
| * #seekExact(BytesRef)}) or step through ({@link |
| * #next} terms to obtain frequency information ({@link |
| * #docFreq}), {@link PostingsEnum} or {@link |
| * PostingsEnum} for the current term ({@link |
| * #postings}. |
| * |
| * <p>Term enumerations are always ordered by |
| * BytesRef.compareTo, which is Unicode sort |
| * order if the terms are UTF-8 bytes. Each term in the |
| * enumeration is greater than the one before it.</p> |
| * |
| * <p>The TermsEnum is unpositioned when you first obtain it |
| * and you must first successfully call {@link #next} or one |
| * of the <code>seek</code> methods. |
| * |
| * @lucene.experimental */ |
| public abstract class TermsEnum implements BytesRefIterator { |
| |
| /** Sole constructor. (For invocation by subclass |
| * constructors, typically implicit.) */ |
| protected TermsEnum() { |
| } |
| |
| /** Returns the related attributes. */ |
| public abstract AttributeSource attributes(); |
| |
| /** Represents returned result from {@link #seekCeil}. */ |
| public enum SeekStatus { |
| /** The term was not found, and the end of iteration was hit. */ |
| END, |
| /** The precise term was found. */ |
| FOUND, |
| /** A different term was found after the requested term */ |
| NOT_FOUND |
| }; |
| |
| /** |
| * Attempts to seek to the exact term, returning true if the term is found. If this returns false, the enum is |
| * unpositioned. For some codecs, seekExact may be substantially faster than {@link #seekCeil}. |
| * @return true if the term is found; return false if the enum is unpositioned. |
| */ |
| public abstract boolean seekExact(BytesRef text) throws IOException; |
| |
| /** Seeks to the specified term, if it exists, or to the |
| * next (ceiling) term. Returns SeekStatus to |
| * indicate whether exact term was found, a different |
| * term was found, or EOF was hit. The target term may |
| * be before or after the current term. If this returns |
| * SeekStatus.END, the enum is unpositioned. */ |
| public abstract SeekStatus seekCeil(BytesRef text) throws IOException; |
| |
| /** Seeks to the specified term by ordinal (position) as |
| * previously returned by {@link #ord}. The target ord |
| * may be before or after the current ord, and must be |
| * within bounds. */ |
| public abstract void seekExact(long ord) throws IOException; |
| |
| /** |
| * Expert: Seeks a specific position by {@link TermState} previously obtained |
| * from {@link #termState()}. Callers should maintain the {@link TermState} to |
| * use this method. Low-level implementations may position the TermsEnum |
| * without re-seeking the term dictionary. |
| * <p> |
| * Seeking by {@link TermState} should only be used iff the state was obtained |
| * from the same {@link TermsEnum} instance. |
| * <p> |
| * NOTE: Using this method with an incompatible {@link TermState} might leave |
| * this {@link TermsEnum} in undefined state. On a segment level |
| * {@link TermState} instances are compatible only iff the source and the |
| * target {@link TermsEnum} operate on the same field. If operating on segment |
| * level, TermState instances must not be used across segments. |
| * <p> |
| * NOTE: A seek by {@link TermState} might not restore the |
| * {@link AttributeSource}'s state. {@link AttributeSource} states must be |
| * maintained separately if this method is used. |
| * @param term the term the TermState corresponds to |
| * @param state the {@link TermState} |
| * */ |
| public abstract void seekExact(BytesRef term, TermState state) throws IOException; |
| |
| /** Returns current term. Do not call this when the enum |
| * is unpositioned. */ |
| public abstract BytesRef term() throws IOException; |
| |
| /** Returns ordinal position for current term. This is an |
| * optional method (the codec may throw {@link |
| * UnsupportedOperationException}). Do not call this |
| * when the enum is unpositioned. */ |
| public abstract long ord() throws IOException; |
| |
| /** Returns the number of documents containing the current |
| * term. Do not call this when the enum is unpositioned. |
| * {@link SeekStatus#END}.*/ |
| public abstract int docFreq() throws IOException; |
| |
| /** Returns the total number of occurrences of this term |
| * across all documents (the sum of the freq() for each |
| * doc that has this term). Note that, like |
| * other term measures, this measure does not take |
| * deleted documents into account. */ |
| public abstract long totalTermFreq() throws IOException; |
| |
| /** Get {@link PostingsEnum} for the current term. Do not |
| * call this when the enum is unpositioned. This method |
| * will not return null. |
| * <p> |
| * <b>NOTE</b>: the returned iterator may return deleted documents, so |
| * deleted documents have to be checked on top of the {@link PostingsEnum}. |
| * <p> |
| * Use this method if you only require documents and frequencies, |
| * and do not need any proximity data. |
| * This method is equivalent to |
| * {@link #postings(PostingsEnum, int) postings(reuse, PostingsEnum.FREQS)} |
| * |
| * @param reuse pass a prior PostingsEnum for possible reuse |
| * @see #postings(PostingsEnum, int) |
| */ |
| public final PostingsEnum postings(PostingsEnum reuse) throws IOException { |
| return postings(reuse, PostingsEnum.FREQS); |
| } |
| |
| /** Get {@link PostingsEnum} for the current term, with |
| * control over whether freqs, positions, offsets or payloads |
| * are required. Do not call this when the enum is |
| * unpositioned. This method will not return null. |
| * <p> |
| * <b>NOTE</b>: the returned iterator may return deleted documents, so |
| * deleted documents have to be checked on top of the {@link PostingsEnum}. |
| * |
| * @param reuse pass a prior PostingsEnum for possible reuse |
| * @param flags specifies which optional per-document values |
| * you require; see {@link PostingsEnum#FREQS} |
| */ |
| public abstract PostingsEnum postings(PostingsEnum reuse, int flags) throws IOException; |
| |
| /** |
| * Return a {@link ImpactsEnum}. |
| * @see #postings(PostingsEnum, int) |
| */ |
| public abstract ImpactsEnum impacts(int flags) throws IOException; |
| |
| /** |
| * Expert: Returns the TermsEnums internal state to position the TermsEnum |
| * without re-seeking the term dictionary. |
| * <p> |
| * NOTE: A seek by {@link TermState} might not capture the |
| * {@link AttributeSource}'s state. Callers must maintain the |
| * {@link AttributeSource} states separately |
| * |
| * @see TermState |
| * @see #seekExact(BytesRef, TermState) |
| */ |
| public abstract TermState termState() throws IOException; |
| |
| /** An empty TermsEnum for quickly returning an empty instance e.g. |
| * in {@link org.apache.lucene.search.MultiTermQuery} |
| * <p><em>Please note:</em> This enum should be unmodifiable, |
| * but it is currently possible to add Attributes to it. |
| * This should not be a problem, as the enum is always empty and |
| * the existence of unused Attributes does not matter. |
| */ |
| public static final TermsEnum EMPTY = |
| new TermsEnum() { |
| |
| private AttributeSource atts = null; |
| |
| @Override |
| public SeekStatus seekCeil(BytesRef term) { |
| return SeekStatus.END; |
| } |
| |
| @Override |
| public void seekExact(long ord) {} |
| |
| @Override |
| public BytesRef term() { |
| throw new IllegalStateException("this method should never be called"); |
| } |
| |
| @Override |
| public int docFreq() { |
| throw new IllegalStateException("this method should never be called"); |
| } |
| |
| @Override |
| public long totalTermFreq() { |
| throw new IllegalStateException("this method should never be called"); |
| } |
| |
| @Override |
| public long ord() { |
| throw new IllegalStateException("this method should never be called"); |
| } |
| |
| @Override |
| public PostingsEnum postings(PostingsEnum reuse, int flags) { |
| throw new IllegalStateException("this method should never be called"); |
| } |
| |
| @Override |
| public ImpactsEnum impacts(int flags) throws IOException { |
| throw new IllegalStateException("this method should never be called"); |
| } |
| |
| @Override |
| public BytesRef next() { |
| return null; |
| } |
| |
| @Override // make it synchronized here, to prevent double lazy init |
| public synchronized AttributeSource attributes() { |
| if (atts == null) { |
| atts = new AttributeSource(); |
| } |
| return atts; |
| } |
| |
| @Override |
| public boolean seekExact(BytesRef text) throws IOException { |
| return seekCeil(text) == SeekStatus.FOUND; |
| } |
| |
| @Override |
| public TermState termState() { |
| throw new IllegalStateException("this method should never be called"); |
| } |
| |
| @Override |
| public void seekExact(BytesRef term, TermState state) { |
| throw new IllegalStateException("this method should never be called"); |
| } |
| }; |
| } |