blob: c79545dbfd463cf91801605f1815220109133c24 [file] [log] [blame]
package org.apache.lucene.index.codecs.pulsing;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import java.util.Set;
import org.apache.lucene.index.SegmentInfo;
import org.apache.lucene.index.SegmentWriteState;
import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.index.codecs.Codec;
import org.apache.lucene.index.codecs.PostingsWriterBase;
import org.apache.lucene.index.codecs.standard.StandardPostingsWriter;
import org.apache.lucene.index.codecs.PostingsReaderBase;
import org.apache.lucene.index.codecs.standard.StandardPostingsReader;
import org.apache.lucene.index.codecs.FieldsConsumer;
import org.apache.lucene.index.codecs.FieldsProducer;
import org.apache.lucene.index.codecs.VariableGapTermsIndexReader;
import org.apache.lucene.index.codecs.VariableGapTermsIndexWriter;
import org.apache.lucene.index.codecs.BlockTermsReader;
import org.apache.lucene.index.codecs.BlockTermsWriter;
import org.apache.lucene.index.codecs.TermsIndexReaderBase;
import org.apache.lucene.index.codecs.TermsIndexWriterBase;
import org.apache.lucene.index.codecs.standard.StandardCodec;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.IOUtils;
/** This codec "inlines" the postings for terms that have
* low docFreq. It wraps another codec, which is used for
* writing the non-inlined terms.
*
* Currently in only inlines docFreq=1 terms, and
* otherwise uses the normal "standard" codec.
* @lucene.experimental */
public class PulsingCodec extends Codec {
private final int freqCutoff;
/** Terms with freq <= freqCutoff are inlined into terms
* dict. */
public PulsingCodec(int freqCutoff) {
name = "Pulsing";
this.freqCutoff = freqCutoff;
}
@Override
public String toString() {
return name + "(freqCutoff=" + freqCutoff + ")";
}
@Override
public FieldsConsumer fieldsConsumer(SegmentWriteState state) throws IOException {
// We wrap StandardPostingsWriter, but any StandardPostingsWriter
// will work:
PostingsWriterBase docsWriter = new StandardPostingsWriter(state);
// Terms that have <= freqCutoff number of docs are
// "pulsed" (inlined):
PostingsWriterBase pulsingWriter = new PulsingPostingsWriterImpl(freqCutoff, docsWriter);
// Terms dict index
TermsIndexWriterBase indexWriter;
boolean success = false;
try {
indexWriter = new VariableGapTermsIndexWriter(state, new VariableGapTermsIndexWriter.EveryNTermSelector(state.termIndexInterval));
success = true;
} finally {
if (!success) {
IOUtils.closeSafely(true, pulsingWriter);
}
}
// Terms dict
success = false;
try {
FieldsConsumer ret = new BlockTermsWriter(indexWriter, state, pulsingWriter);
success = true;
return ret;
} finally {
if (!success) {
IOUtils.closeSafely(true, pulsingWriter, indexWriter);
}
}
}
@Override
public FieldsProducer fieldsProducer(SegmentReadState state) throws IOException {
// We wrap StandardPostingsReader, but any StandardPostingsReader
// will work:
PostingsReaderBase docsReader = new StandardPostingsReader(state.dir, state.segmentInfo, state.readBufferSize, state.codecId);
PostingsReaderBase pulsingReader = new PulsingPostingsReaderImpl(docsReader);
// Terms dict index reader
TermsIndexReaderBase indexReader;
boolean success = false;
try {
indexReader = new VariableGapTermsIndexReader(state.dir,
state.fieldInfos,
state.segmentInfo.name,
state.termsIndexDivisor,
state.codecId);
success = true;
} finally {
if (!success) {
pulsingReader.close();
}
}
// Terms dict reader
success = false;
try {
FieldsProducer ret = new BlockTermsReader(indexReader,
state.dir, state.fieldInfos, state.segmentInfo.name,
pulsingReader,
state.readBufferSize,
StandardCodec.TERMS_CACHE_SIZE,
state.codecId);
success = true;
return ret;
} finally {
if (!success) {
try {
pulsingReader.close();
} finally {
indexReader.close();
}
}
}
}
@Override
public void files(Directory dir, SegmentInfo segmentInfo, String id, Set<String> files) throws IOException {
StandardPostingsReader.files(dir, segmentInfo, id, files);
BlockTermsReader.files(dir, segmentInfo, id, files);
VariableGapTermsIndexReader.files(dir, segmentInfo, id, files);
}
@Override
public void getExtensions(Set<String> extensions) {
StandardCodec.getStandardExtensions(extensions);
}
}