blob: 3f84fc11fea6005e87eb288026123b067c8deee1 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.index;
import java.io.IOException;
import org.apache.lucene.util.AttributeSource;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefIterator;
/** Iterator to seek ({@link #seekCeil(BytesRef)}, {@link
* #seekExact(BytesRef)}) or step through ({@link
* #next} terms to obtain frequency information ({@link
* #docFreq}), {@link PostingsEnum} or {@link
* PostingsEnum} for the current term ({@link
* #postings}.
*
* <p>Term enumerations are always ordered by
* BytesRef.compareTo, which is Unicode sort
* order if the terms are UTF-8 bytes. Each term in the
* enumeration is greater than the one before it.</p>
*
* <p>The TermsEnum is unpositioned when you first obtain it
* and you must first successfully call {@link #next} or one
* of the <code>seek</code> methods.
*
* @lucene.experimental */
public abstract class TermsEnum implements BytesRefIterator {
/** Sole constructor. (For invocation by subclass
* constructors, typically implicit.) */
protected TermsEnum() {
}
/** Returns the related attributes. */
public abstract AttributeSource attributes();
/** Represents returned result from {@link #seekCeil}. */
public enum SeekStatus {
/** The term was not found, and the end of iteration was hit. */
END,
/** The precise term was found. */
FOUND,
/** A different term was found after the requested term */
NOT_FOUND
};
/**
* Attempts to seek to the exact term, returning true if the term is found. If this returns false, the enum is
* unpositioned. For some codecs, seekExact may be substantially faster than {@link #seekCeil}.
* @return true if the term is found; return false if the enum is unpositioned.
*/
public abstract boolean seekExact(BytesRef text) throws IOException;
/** Seeks to the specified term, if it exists, or to the
* next (ceiling) term. Returns SeekStatus to
* indicate whether exact term was found, a different
* term was found, or EOF was hit. The target term may
* be before or after the current term. If this returns
* SeekStatus.END, the enum is unpositioned. */
public abstract SeekStatus seekCeil(BytesRef text) throws IOException;
/** Seeks to the specified term by ordinal (position) as
* previously returned by {@link #ord}. The target ord
* may be before or after the current ord, and must be
* within bounds. */
public abstract void seekExact(long ord) throws IOException;
/**
* Expert: Seeks a specific position by {@link TermState} previously obtained
* from {@link #termState()}. Callers should maintain the {@link TermState} to
* use this method. Low-level implementations may position the TermsEnum
* without re-seeking the term dictionary.
* <p>
* Seeking by {@link TermState} should only be used iff the state was obtained
* from the same {@link TermsEnum} instance.
* <p>
* NOTE: Using this method with an incompatible {@link TermState} might leave
* this {@link TermsEnum} in undefined state. On a segment level
* {@link TermState} instances are compatible only iff the source and the
* target {@link TermsEnum} operate on the same field. If operating on segment
* level, TermState instances must not be used across segments.
* <p>
* NOTE: A seek by {@link TermState} might not restore the
* {@link AttributeSource}'s state. {@link AttributeSource} states must be
* maintained separately if this method is used.
* @param term the term the TermState corresponds to
* @param state the {@link TermState}
* */
public abstract void seekExact(BytesRef term, TermState state) throws IOException;
/** Returns current term. Do not call this when the enum
* is unpositioned. */
public abstract BytesRef term() throws IOException;
/** Returns ordinal position for current term. This is an
* optional method (the codec may throw {@link
* UnsupportedOperationException}). Do not call this
* when the enum is unpositioned. */
public abstract long ord() throws IOException;
/** Returns the number of documents containing the current
* term. Do not call this when the enum is unpositioned.
* {@link SeekStatus#END}.*/
public abstract int docFreq() throws IOException;
/** Returns the total number of occurrences of this term
* across all documents (the sum of the freq() for each
* doc that has this term). Note that, like
* other term measures, this measure does not take
* deleted documents into account. */
public abstract long totalTermFreq() throws IOException;
/** Get {@link PostingsEnum} for the current term. Do not
* call this when the enum is unpositioned. This method
* will not return null.
* <p>
* <b>NOTE</b>: the returned iterator may return deleted documents, so
* deleted documents have to be checked on top of the {@link PostingsEnum}.
* <p>
* Use this method if you only require documents and frequencies,
* and do not need any proximity data.
* This method is equivalent to
* {@link #postings(PostingsEnum, int) postings(reuse, PostingsEnum.FREQS)}
*
* @param reuse pass a prior PostingsEnum for possible reuse
* @see #postings(PostingsEnum, int)
*/
public final PostingsEnum postings(PostingsEnum reuse) throws IOException {
return postings(reuse, PostingsEnum.FREQS);
}
/** Get {@link PostingsEnum} for the current term, with
* control over whether freqs, positions, offsets or payloads
* are required. Do not call this when the enum is
* unpositioned. This method will not return null.
* <p>
* <b>NOTE</b>: the returned iterator may return deleted documents, so
* deleted documents have to be checked on top of the {@link PostingsEnum}.
*
* @param reuse pass a prior PostingsEnum for possible reuse
* @param flags specifies which optional per-document values
* you require; see {@link PostingsEnum#FREQS}
*/
public abstract PostingsEnum postings(PostingsEnum reuse, int flags) throws IOException;
/**
* Return a {@link ImpactsEnum}.
* @see #postings(PostingsEnum, int)
*/
public abstract ImpactsEnum impacts(int flags) throws IOException;
/**
* Expert: Returns the TermsEnums internal state to position the TermsEnum
* without re-seeking the term dictionary.
* <p>
* NOTE: A seek by {@link TermState} might not capture the
* {@link AttributeSource}'s state. Callers must maintain the
* {@link AttributeSource} states separately
*
* @see TermState
* @see #seekExact(BytesRef, TermState)
*/
public abstract TermState termState() throws IOException;
/** An empty TermsEnum for quickly returning an empty instance e.g.
* in {@link org.apache.lucene.search.MultiTermQuery}
* <p><em>Please note:</em> This enum should be unmodifiable,
* but it is currently possible to add Attributes to it.
* This should not be a problem, as the enum is always empty and
* the existence of unused Attributes does not matter.
*/
public static final TermsEnum EMPTY =
new TermsEnum() {
private AttributeSource atts = null;
@Override
public SeekStatus seekCeil(BytesRef term) {
return SeekStatus.END;
}
@Override
public void seekExact(long ord) {}
@Override
public BytesRef term() {
throw new IllegalStateException("this method should never be called");
}
@Override
public int docFreq() {
throw new IllegalStateException("this method should never be called");
}
@Override
public long totalTermFreq() {
throw new IllegalStateException("this method should never be called");
}
@Override
public long ord() {
throw new IllegalStateException("this method should never be called");
}
@Override
public PostingsEnum postings(PostingsEnum reuse, int flags) {
throw new IllegalStateException("this method should never be called");
}
@Override
public ImpactsEnum impacts(int flags) throws IOException {
throw new IllegalStateException("this method should never be called");
}
@Override
public BytesRef next() {
return null;
}
@Override // make it synchronized here, to prevent double lazy init
public synchronized AttributeSource attributes() {
if (atts == null) {
atts = new AttributeSource();
}
return atts;
}
@Override
public boolean seekExact(BytesRef text) throws IOException {
return seekCeil(text) == SeekStatus.FOUND;
}
@Override
public TermState termState() {
throw new IllegalStateException("this method should never be called");
}
@Override
public void seekExact(BytesRef term, TermState state) {
throw new IllegalStateException("this method should never be called");
}
};
}