blob: de0234be9088ef3099208ad2e4bbf679a7a83691 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.index;
import java.io.IOException;
import org.apache.lucene.util.AttributeSource;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefIterator;
/**
* Iterator to seek ({@link #seekCeil(BytesRef)}, {@link #seekExact(BytesRef)}) or step through
* ({@link #next} terms to obtain frequency information ({@link #docFreq}), {@link PostingsEnum} or
* {@link PostingsEnum} for the current term ({@link #postings}.
*
* <p>Term enumerations are always ordered by BytesRef.compareTo, which is Unicode sort order if the
* terms are UTF-8 bytes. Each term in the enumeration is greater than the one before it.
*
* <p>The TermsEnum is unpositioned when you first obtain it and you must first successfully call
* {@link #next} or one of the <code>seek</code> methods.
*
* @lucene.experimental
*/
public abstract class TermsEnum implements BytesRefIterator {
/** Sole constructor. (For invocation by subclass constructors, typically implicit.) */
protected TermsEnum() {}
/** Returns the related attributes. */
public abstract AttributeSource attributes();
/** Represents returned result from {@link #seekCeil}. */
public enum SeekStatus {
/** The term was not found, and the end of iteration was hit. */
END,
/** The precise term was found. */
FOUND,
/** A different term was found after the requested term */
NOT_FOUND
};
/**
* Attempts to seek to the exact term, returning true if the term is found. If this returns false,
* the enum is unpositioned. For some codecs, seekExact may be substantially faster than {@link
* #seekCeil}.
*
* <p>
*
* @return true if the term is found; return false if the enum is unpositioned.
*/
public abstract boolean seekExact(BytesRef text) throws IOException;
/**
* Seeks to the specified term, if it exists, or to the next (ceiling) term. Returns SeekStatus to
* indicate whether exact term was found, a different term was found, or EOF was hit. The target
* term may be before or after the current term. If this returns SeekStatus.END, the enum is
* unpositioned.
*/
public abstract SeekStatus seekCeil(BytesRef text) throws IOException;
/**
* Seeks to the specified term by ordinal (position) as previously returned by {@link #ord}. The
* target ord may be before or after the current ord, and must be within bounds.
*/
public abstract void seekExact(long ord) throws IOException;
/**
* Expert: Seeks a specific position by {@link TermState} previously obtained from {@link
* #termState()}. Callers should maintain the {@link TermState} to use this method. Low-level
* implementations may position the TermsEnum without re-seeking the term dictionary.
*
* <p>Seeking by {@link TermState} should only be used iff the state was obtained from the same
* {@link TermsEnum} instance.
*
* <p>NOTE: Using this method with an incompatible {@link TermState} might leave this {@link
* TermsEnum} in undefined state. On a segment level {@link TermState} instances are compatible
* only iff the source and the target {@link TermsEnum} operate on the same field. If operating on
* segment level, TermState instances must not be used across segments.
*
* <p>NOTE: A seek by {@link TermState} might not restore the {@link AttributeSource}'s state.
* {@link AttributeSource} states must be maintained separately if this method is used.
*
* @param term the term the TermState corresponds to
* @param state the {@link TermState}
*/
public abstract void seekExact(BytesRef term, TermState state) throws IOException;
/** Returns current term. Do not call this when the enum is unpositioned. */
public abstract BytesRef term() throws IOException;
/**
* Returns ordinal position for current term. This is an optional method (the codec may throw
* {@link UnsupportedOperationException}). Do not call this when the enum is unpositioned.
*/
public abstract long ord() throws IOException;
/**
* Returns the number of documents containing the current term. Do not call this when the enum is
* unpositioned. {@link SeekStatus#END}.
*/
public abstract int docFreq() throws IOException;
/**
* Returns the total number of occurrences of this term across all documents (the sum of the
* freq() for each doc that has this term). Note that, like other term measures, this measure does
* not take deleted documents into account.
*/
public abstract long totalTermFreq() throws IOException;
/**
* Get {@link PostingsEnum} for the current term. Do not call this when the enum is unpositioned.
* This method will not return null.
*
* <p><b>NOTE</b>: the returned iterator may return deleted documents, so deleted documents have
* to be checked on top of the {@link PostingsEnum}.
*
* <p>Use this method if you only require documents and frequencies, and do not need any proximity
* data. This method is equivalent to {@link #postings(PostingsEnum, int) postings(reuse,
* PostingsEnum.FREQS)}
*
* @param reuse pass a prior PostingsEnum for possible reuse
* @see #postings(PostingsEnum, int)
*/
public final PostingsEnum postings(PostingsEnum reuse) throws IOException {
return postings(reuse, PostingsEnum.FREQS);
}
/**
* Get {@link PostingsEnum} for the current term, with control over whether freqs, positions,
* offsets or payloads are required. Do not call this when the enum is unpositioned. This method
* will not return null.
*
* <p><b>NOTE</b>: the returned iterator may return deleted documents, so deleted documents have
* to be checked on top of the {@link PostingsEnum}.
*
* @param reuse pass a prior PostingsEnum for possible reuse
* @param flags specifies which optional per-document values you require; see {@link
* PostingsEnum#FREQS}
*/
public abstract PostingsEnum postings(PostingsEnum reuse, int flags) throws IOException;
/**
* Return a {@link ImpactsEnum}.
*
* @see #postings(PostingsEnum, int)
*/
public abstract ImpactsEnum impacts(int flags) throws IOException;
/**
* Expert: Returns the TermsEnums internal state to position the TermsEnum without re-seeking the
* term dictionary.
*
* <p>NOTE: A seek by {@link TermState} might not capture the {@link AttributeSource}'s state.
* Callers must maintain the {@link AttributeSource} states separately
*
* @see TermState
* @see #seekExact(BytesRef, TermState)
*/
public abstract TermState termState() throws IOException;
/**
* An empty TermsEnum for quickly returning an empty instance e.g. in {@link
* org.apache.lucene.search.MultiTermQuery}
*
* <p><em>Please note:</em> This enum should be unmodifiable, but it is currently possible to add
* Attributes to it. This should not be a problem, as the enum is always empty and the existence
* of unused Attributes does not matter.
*/
public static final TermsEnum EMPTY =
new TermsEnum() {
private AttributeSource atts = null;
@Override
public SeekStatus seekCeil(BytesRef term) {
return SeekStatus.END;
}
@Override
public void seekExact(long ord) {}
@Override
public BytesRef term() {
throw new IllegalStateException("this method should never be called");
}
@Override
public int docFreq() {
throw new IllegalStateException("this method should never be called");
}
@Override
public long totalTermFreq() {
throw new IllegalStateException("this method should never be called");
}
@Override
public long ord() {
throw new IllegalStateException("this method should never be called");
}
@Override
public PostingsEnum postings(PostingsEnum reuse, int flags) {
throw new IllegalStateException("this method should never be called");
}
@Override
public ImpactsEnum impacts(int flags) throws IOException {
throw new IllegalStateException("this method should never be called");
}
@Override
public BytesRef next() {
return null;
}
@Override // make it synchronized here, to prevent double lazy init
public synchronized AttributeSource attributes() {
if (atts == null) {
atts = new AttributeSource();
}
return atts;
}
@Override
public boolean seekExact(BytesRef text) throws IOException {
return seekCeil(text) == SeekStatus.FOUND;
}
@Override
public TermState termState() {
throw new IllegalStateException("this method should never be called");
}
@Override
public void seekExact(BytesRef term, TermState state) {
throw new IllegalStateException("this method should never be called");
}
};
}