blob: e1bd0e31239f222c09128ee13f3b8b6da18eb508 [file] [log] [blame]
package org.apache.lucene.index;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import java.util.Comparator;
import org.apache.lucene.util.AttributeSource;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
/** Iterator to seek ({@link #seekCeil(BytesRef)}, {@link
* #seekExact(BytesRef,boolean)}) or step through ({@link
* #next} terms to obtain frequency information ({@link
* #docFreq}), {@link DocsEnum} or {@link
* DocsAndPositionsEnum} for the current term ({@link
* #docs}.
*
* <p>Term enumerations are always ordered by
* {@link #getComparator}. Each term in the enumeration is
* greater than the one before it.</p>
*
* <p>The TermsEnum is unpositioned when you first obtain it
* and you must first successfully call {@link #next} or one
* of the <code>seek</code> methods.
*
* @lucene.experimental */
public abstract class TermsEnum {
private AttributeSource atts = null;
/** Returns the related attributes. */
public AttributeSource attributes() {
if (atts == null) atts = new AttributeSource();
return atts;
}
/** Represents returned result from {@link #seekCeil}.
* If status is FOUND, then the precise term was found.
* If status is NOT_FOUND, then a different term was
* found. If the status is END, the end of the iteration
* was hit. */
public static enum SeekStatus {END, FOUND, NOT_FOUND};
/** Attemps to seek to the exact term, returning
* true if the term is found. If this returns false, the
* enum is unpositioned. For some codecs, seekExact may
* be substantially faster than {@link #seekCeil}. */
public boolean seekExact(BytesRef text, boolean useCache) throws IOException {
return seekCeil(text, useCache) == SeekStatus.FOUND;
}
/** Expert: just like {@link #seekCeil(BytesRef)} but allows
* you to control whether the implementation should
* attempt to use its term cache (if it uses one). */
public abstract SeekStatus seekCeil(BytesRef text, boolean useCache) throws IOException;
/** Seeks to the specified term, if it exists, or to the
* next (ceiling) term. Returns SeekStatus to
* indicate whether exact term was found, a different
* term was found, or EOF was hit. The target term may
* be before or after the current term. If this returns
* SeekStatus.END, the enum is unpositioned. */
public final SeekStatus seekCeil(BytesRef text) throws IOException {
return seekCeil(text, true);
}
/** Seeks to the specified term by ordinal (position) as
* previously returned by {@link #ord}. The target ord
* may be before or after the current ord, and must be
* within bounds. */
public abstract void seekExact(long ord) throws IOException;
/**
* Expert: Seeks a specific position by {@link TermState} previously obtained
* from {@link #termState()}. Callers should maintain the {@link TermState} to
* use this method. Low-level implementations may position the TermsEnum
* without re-seeking the term dictionary.
* <p>
* Seeking by {@link TermState} should only be used iff the enum the state was
* obtained from and the enum the state is used for seeking are obtained from
* the same {@link IndexReader}.
* <p>
* NOTE: Using this method with an incompatible {@link TermState} might leave
* this {@link TermsEnum} in undefined state. On a segment level
* {@link TermState} instances are compatible only iff the source and the
* target {@link TermsEnum} operate on the same field. If operating on segment
* level, TermState instances must not be used across segments.
* <p>
* NOTE: A seek by {@link TermState} might not restore the
* {@link AttributeSource}'s state. {@link AttributeSource} states must be
* maintained separately if this method is used.
* @param term the term the TermState corresponds to
* @param state the {@link TermState}
* */
public void seekExact(BytesRef term, TermState state) throws IOException {
if (!seekExact(term, true)) {
throw new IllegalArgumentException("term=" + term + " does not exist");
}
}
/** Increments the enumeration to the next term.
* Returns the resulting term, or null if the end was
* hit (which means the enum is unpositioned). The
* returned BytesRef may be re-used across calls to next. */
public abstract BytesRef next() throws IOException;
/** Returns current term. Do not call this when the enum
* is unpositioned. */
public abstract BytesRef term() throws IOException;
/** Returns ordinal position for current term. This is an
* optional method (the codec may throw {@link
* UnsupportedOperationException}). Do not call this
* when the enum is unpositioned. */
public abstract long ord() throws IOException;
/** Returns the number of documents containing the current
* term. Do not call this when the enum is unpositioned.
* {@link SeekStatus#END}.*/
public abstract int docFreq() throws IOException;
/** Returns the total number of occurrences of this term
* across all documents (the sum of the freq() for each
* doc that has this term). This will be -1 if the
* codec doesn't support this measure. Note that, like
* other term measures, this measure does not take
* deleted documents into account. */
public abstract long totalTermFreq() throws IOException;
/** Get {@link DocsEnum} for the current term. Do not
* call this when the enum is unpositioned. This method
* will not return null.
*
* @param liveDocs set bits are documents that should not
* be returned
* @param reuse pass a prior DocsEnum for possible reuse */
public abstract DocsEnum docs(Bits liveDocs, DocsEnum reuse) throws IOException;
/** Get {@link DocsAndPositionsEnum} for the current term.
* Do not call this when the enum is unpositioned.
* This method will only return null if positions were
* not indexed into the postings by this codec. */
public abstract DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse) throws IOException;
/**
* Expert: Returns the TermsEnums internal state to position the TermsEnum
* without re-seeking the term dictionary.
* <p>
* NOTE: A seek by {@link TermState} might not capture the
* {@link AttributeSource}'s state. Callers must maintain the
* {@link AttributeSource} states separately
*
* @see TermState
* @see #seekExact(BytesRef, TermState)
*/
public TermState termState() throws IOException {
return new TermState() {
@Override
public void copyFrom(TermState other) {
}
};
}
/** Return the {@link BytesRef} Comparator used to sort
* terms provided by the iterator. This may return
* null if there are no terms. Callers may invoke this
* method many times, so it's best to cache a single
* instance & reuse it. */
public abstract Comparator<BytesRef> getComparator() throws IOException;
/** An empty TermsEnum for quickly returning an empty instance e.g.
* in {@link org.apache.lucene.search.MultiTermQuery}
* <p><em>Please note:</em> This enum should be unmodifiable,
* but it is currently possible to add Attributes to it.
* This should not be a problem, as the enum is always empty and
* the existence of unused Attributes does not matter.
*/
public static final TermsEnum EMPTY = new TermsEnum() {
@Override
public SeekStatus seekCeil(BytesRef term, boolean useCache) { return SeekStatus.END; }
@Override
public void seekExact(long ord) {}
@Override
public BytesRef term() {
throw new IllegalStateException("this method should never be called");
}
@Override
public Comparator<BytesRef> getComparator() {
return null;
}
@Override
public int docFreq() {
throw new IllegalStateException("this method should never be called");
}
@Override
public long totalTermFreq() {
throw new IllegalStateException("this method should never be called");
}
@Override
public long ord() {
throw new IllegalStateException("this method should never be called");
}
@Override
public DocsEnum docs(Bits liveDocs, DocsEnum reuse) {
throw new IllegalStateException("this method should never be called");
}
@Override
public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse) {
throw new IllegalStateException("this method should never be called");
}
@Override
public BytesRef next() {
return null;
}
@Override // make it synchronized here, to prevent double lazy init
public synchronized AttributeSource attributes() {
return super.attributes();
}
@Override
public TermState termState() throws IOException {
throw new IllegalStateException("this method should never be called");
}
@Override
public void seekExact(BytesRef term, TermState state) throws IOException {
throw new IllegalStateException("this method should never be called");
}
};
}