blob: 5f0da7518334c5e36454581b760427d100111fab [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.codecs.uniformsplit;
import java.io.IOException;
import org.apache.lucene.codecs.PostingsReaderBase;
import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.util.Accountable;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.RamUsageEstimator;
import org.apache.lucene.util.automaton.CompiledAutomaton;
/**
* {@link Terms} based on the Uniform Split technique.
* <p>
* The {@link IndexDictionary index dictionary} is lazy loaded only when
* {@link TermsEnum#seekCeil} or {@link TermsEnum#seekExact} are called
* (it is not loaded for a direct terms enumeration).
*
* @see UniformSplitTermsWriter
* @lucene.experimental
*/
public class UniformSplitTerms extends Terms implements Accountable {
private static final long BASE_RAM_USAGE = RamUsageEstimator.shallowSizeOfInstance(UniformSplitTerms.class);
protected final IndexInput blockInput;
protected final FieldMetadata fieldMetadata;
protected final PostingsReaderBase postingsReader;
protected final BlockDecoder blockDecoder;
protected final IndexDictionary.BrowserSupplier dictionaryBrowserSupplier;
/**
* @param blockDecoder Optional block decoder, may be null if none. It can be used for decompression or decryption.
*/
protected UniformSplitTerms(IndexInput blockInput, FieldMetadata fieldMetadata,
PostingsReaderBase postingsReader, BlockDecoder blockDecoder,
IndexDictionary.BrowserSupplier dictionaryBrowserSupplier) {
assert fieldMetadata != null;
assert fieldMetadata.getFieldInfo() != null;
assert fieldMetadata.getLastTerm() != null;
assert dictionaryBrowserSupplier != null;
this.blockInput = blockInput;
this.fieldMetadata = fieldMetadata;
this.postingsReader = postingsReader;
this.blockDecoder = blockDecoder;
this.dictionaryBrowserSupplier = dictionaryBrowserSupplier;
}
@Override
public TermsEnum iterator() throws IOException {
return new BlockReader(dictionaryBrowserSupplier, blockInput, postingsReader, fieldMetadata, blockDecoder);
}
@Override
public TermsEnum intersect(CompiledAutomaton compiled, BytesRef startTerm) throws IOException {
checkIntersectAutomatonType(compiled);
return new IntersectBlockReader(compiled, startTerm, dictionaryBrowserSupplier, blockInput, postingsReader, fieldMetadata, blockDecoder);
}
protected void checkIntersectAutomatonType(CompiledAutomaton automaton) {
// This check is consistent with other impls and precondition stated in javadoc.
if (automaton.type != CompiledAutomaton.AUTOMATON_TYPE.NORMAL) {
throw new IllegalArgumentException("please use CompiledAutomaton.getTermsEnum instead");
}
}
@Override
public BytesRef getMax() {
return fieldMetadata.getLastTerm();
}
@Override
public long size() {
return fieldMetadata.getNumTerms();
}
@Override
public long getSumTotalTermFreq() {
return fieldMetadata.getSumTotalTermFreq();
}
@Override
public long getSumDocFreq() {
return fieldMetadata.getSumDocFreq();
}
@Override
public int getDocCount() {
return fieldMetadata.getDocCount();
}
@Override
public boolean hasFreqs() {
return fieldMetadata.getFieldInfo().getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS) >= 0;
}
@Override
public boolean hasOffsets() {
return fieldMetadata.getFieldInfo().getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
}
@Override
public boolean hasPositions() {
return fieldMetadata.getFieldInfo().getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0;
}
@Override
public boolean hasPayloads() {
return fieldMetadata.getFieldInfo().hasPayloads();
}
@Override
public long ramBytesUsed() {
return ramBytesUsedWithoutDictionary() + getDictionaryRamBytesUsed();
}
public long ramBytesUsedWithoutDictionary() {
return BASE_RAM_USAGE + fieldMetadata.ramBytesUsed();
}
public long getDictionaryRamBytesUsed() {
return dictionaryBrowserSupplier.ramBytesUsed();
}
}