| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| package org.apache.lucene.codecs.uniformsplit.sharedterms; |
| |
| import java.io.IOException; |
| |
| import org.apache.lucene.codecs.FieldsConsumer; |
| import org.apache.lucene.codecs.FieldsProducer; |
| import org.apache.lucene.codecs.PostingsFormat; |
| import org.apache.lucene.codecs.PostingsReaderBase; |
| import org.apache.lucene.codecs.PostingsWriterBase; |
| import org.apache.lucene.codecs.uniformsplit.BlockDecoder; |
| import org.apache.lucene.codecs.uniformsplit.BlockEncoder; |
| import org.apache.lucene.codecs.uniformsplit.UniformSplitPostingsFormat; |
| import org.apache.lucene.codecs.uniformsplit.UniformSplitTermsWriter; |
| import org.apache.lucene.index.SegmentReadState; |
| import org.apache.lucene.index.SegmentWriteState; |
| |
| /** |
| * {@link PostingsFormat} based on the Uniform Split technique and supporting |
| * Shared Terms. |
| * <p> |
| * Shared Terms means the terms of all fields are stored in the same block file, |
| * with multiple fields associated to one term (one block line). In the same way, |
| * the dictionary trie is also shared between all fields. This highly reduces |
| * the memory required by the field dictionary compared to having one separate |
| * dictionary per field. |
| * |
| * @lucene.experimental |
| */ |
| public class STUniformSplitPostingsFormat extends UniformSplitPostingsFormat { |
| |
| /** |
| * Extension of the file containing the terms dictionary (the FST "trie"). |
| */ |
| public static final String TERMS_DICTIONARY_EXTENSION = "stustd"; |
| /** |
| * Extension of the file containing the terms blocks for each field and the fields metadata. |
| */ |
| public static final String TERMS_BLOCKS_EXTENSION = "stustb"; |
| |
| public static final int VERSION_CURRENT = UniformSplitPostingsFormat.VERSION_CURRENT; |
| |
| public static final String NAME = "SharedTermsUniformSplit"; |
| |
| /** |
| * Creates a {@link STUniformSplitPostingsFormat} with default settings. |
| */ |
| public STUniformSplitPostingsFormat() { |
| this(UniformSplitTermsWriter.DEFAULT_TARGET_NUM_BLOCK_LINES, UniformSplitTermsWriter.DEFAULT_DELTA_NUM_LINES, |
| null, null, false); |
| } |
| |
| /** |
| * @see UniformSplitPostingsFormat#UniformSplitPostingsFormat(int, int, BlockEncoder, BlockDecoder, boolean) |
| */ |
| public STUniformSplitPostingsFormat(int targetNumBlockLines, int deltaNumLines, BlockEncoder blockEncoder, BlockDecoder blockDecoder, |
| boolean dictionaryOnHeap) { |
| this(NAME, targetNumBlockLines, deltaNumLines, blockEncoder, blockDecoder, dictionaryOnHeap); |
| } |
| |
| protected STUniformSplitPostingsFormat(String name, int targetNumBlockLines, int deltaNumLines, BlockEncoder blockEncoder, |
| BlockDecoder blockDecoder, boolean dictionaryOnHeap) { |
| super(name, targetNumBlockLines, deltaNumLines, blockEncoder, blockDecoder, dictionaryOnHeap); |
| } |
| |
| @Override |
| protected FieldsConsumer createUniformSplitTermsWriter(PostingsWriterBase postingsWriter, SegmentWriteState state, |
| int targetNumBlockLines, int deltaNumLines, BlockEncoder blockEncoder) throws IOException { |
| return new STUniformSplitTermsWriter(postingsWriter, state, targetNumBlockLines, deltaNumLines, blockEncoder); |
| } |
| |
| @Override |
| protected FieldsProducer createUniformSplitTermsReader(PostingsReaderBase postingsReader, SegmentReadState state, |
| BlockDecoder blockDecoder) throws IOException { |
| return new STUniformSplitTermsReader(postingsReader, state, blockDecoder, dictionaryOnHeap); |
| } |
| } |