lucene/src/java/org/apache/lucene/index/codecs/pulsing/PulsingCodec.java - lucene-solr - Git at Google

 package org.apache.lucene.index.codecs.pulsing;

 /**
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
  * The ASF licenses this file to You under the Apache License, Version 2.0
  * (the "License"); you may not use this file except in compliance with
  * the License.  You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */

 import java.io.IOException;
 import java.util.Set;

 import org.apache.lucene.index.SegmentInfo;
 import org.apache.lucene.index.SegmentWriteState;
 import org.apache.lucene.index.SegmentReadState;
 import org.apache.lucene.index.codecs.Codec;
 import org.apache.lucene.index.codecs.PostingsWriterBase;
 import org.apache.lucene.index.codecs.standard.StandardPostingsWriter;
 import org.apache.lucene.index.codecs.PostingsReaderBase;
 import org.apache.lucene.index.codecs.standard.StandardPostingsReader;
 import org.apache.lucene.index.codecs.FieldsConsumer;
 import org.apache.lucene.index.codecs.FieldsProducer;
 import org.apache.lucene.index.codecs.VariableGapTermsIndexReader;
 import org.apache.lucene.index.codecs.VariableGapTermsIndexWriter;
 import org.apache.lucene.index.codecs.BlockTermsReader;
 import org.apache.lucene.index.codecs.BlockTermsWriter;
 import org.apache.lucene.index.codecs.TermsIndexReaderBase;
 import org.apache.lucene.index.codecs.TermsIndexWriterBase;
 import org.apache.lucene.index.codecs.standard.StandardCodec;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.util.IOUtils;

 /** This codec "inlines" the postings for terms that have
  *  low docFreq.  It wraps another codec, which is used for
  *  writing the non-inlined terms.
  *
  *  Currently in only inlines docFreq=1 terms, and
  *  otherwise uses the normal "standard" codec.
  *  @lucene.experimental */

 public class PulsingCodec extends Codec {

   private final int freqCutoff;

   /** Terms with freq <= freqCutoff are inlined into terms
    *  dict. */
   public PulsingCodec(int freqCutoff) {
     name = "Pulsing";
     this.freqCutoff = freqCutoff;
   }

   @Override
   public String toString() {
     return name + "(freqCutoff=" + freqCutoff + ")";
   }

   @Override
   public FieldsConsumer fieldsConsumer(SegmentWriteState state) throws IOException {
     // We wrap StandardPostingsWriter, but any StandardPostingsWriter
     // will work:
     PostingsWriterBase docsWriter = new StandardPostingsWriter(state);

     // Terms that have <= freqCutoff number of docs are
     // "pulsed" (inlined):
     PostingsWriterBase pulsingWriter = new PulsingPostingsWriterImpl(freqCutoff, docsWriter);

     // Terms dict index
     TermsIndexWriterBase indexWriter;
     boolean success = false;
     try {
       indexWriter = new VariableGapTermsIndexWriter(state, new VariableGapTermsIndexWriter.EveryNTermSelector(state.termIndexInterval));
       success = true;
     } finally {
       if (!success) {
         IOUtils.closeSafely(true, pulsingWriter);
       }
     }

     // Terms dict
     success = false;
     try {
       FieldsConsumer ret = new BlockTermsWriter(indexWriter, state, pulsingWriter);
       success = true;
       return ret;
     } finally {
       if (!success) {
         IOUtils.closeSafely(true, pulsingWriter, indexWriter);
       }
     }
   }

   @Override
   public FieldsProducer fieldsProducer(SegmentReadState state) throws IOException {

     // We wrap StandardPostingsReader, but any StandardPostingsReader
     // will work:
     PostingsReaderBase docsReader = new StandardPostingsReader(state.dir, state.segmentInfo, state.readBufferSize, state.codecId);
     PostingsReaderBase pulsingReader = new PulsingPostingsReaderImpl(docsReader);

     // Terms dict index reader
     TermsIndexReaderBase indexReader;

     boolean success = false;
     try {
       indexReader = new VariableGapTermsIndexReader(state.dir,
                                                     state.fieldInfos,
                                                     state.segmentInfo.name,
                                                     state.termsIndexDivisor,
                                                     state.codecId);
       success = true;
     } finally {
       if (!success) {
         pulsingReader.close();
       }
     }

     // Terms dict reader
     success = false;
     try {
       FieldsProducer ret = new BlockTermsReader(indexReader,
                                                 state.dir, state.fieldInfos, state.segmentInfo.name,
                                                 pulsingReader,
                                                 state.readBufferSize,
                                                 StandardCodec.TERMS_CACHE_SIZE,
                                                 state.codecId);
       success = true;
       return ret;
     } finally {
       if (!success) {
         try {
           pulsingReader.close();
         } finally {
           indexReader.close();
         }
       }
     }
   }

   @Override
   public void files(Directory dir, SegmentInfo segmentInfo, String id, Set<String> files) throws IOException {
     StandardPostingsReader.files(dir, segmentInfo, id, files);
     BlockTermsReader.files(dir, segmentInfo, id, files);
     VariableGapTermsIndexReader.files(dir, segmentInfo, id, files);
   }

   @Override
   public void getExtensions(Set<String> extensions) {
     StandardCodec.getStandardExtensions(extensions);
   }
 }
	package org.apache.lucene.index.codecs.pulsing;

	/**
	* Licensed to the Apache Software Foundation (ASF) under one or more
	* contributor license agreements. See the NOTICE file distributed with
	* this work for additional information regarding copyright ownership.
	* The ASF licenses this file to You under the Apache License, Version 2.0
	* (the "License"); you may not use this file except in compliance with
	* the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/

	import java.io.IOException;
	import java.util.Set;

	import org.apache.lucene.index.SegmentInfo;
	import org.apache.lucene.index.SegmentWriteState;
	import org.apache.lucene.index.SegmentReadState;
	import org.apache.lucene.index.codecs.Codec;
	import org.apache.lucene.index.codecs.PostingsWriterBase;
	import org.apache.lucene.index.codecs.standard.StandardPostingsWriter;
	import org.apache.lucene.index.codecs.PostingsReaderBase;
	import org.apache.lucene.index.codecs.standard.StandardPostingsReader;
	import org.apache.lucene.index.codecs.FieldsConsumer;
	import org.apache.lucene.index.codecs.FieldsProducer;
	import org.apache.lucene.index.codecs.VariableGapTermsIndexReader;
	import org.apache.lucene.index.codecs.VariableGapTermsIndexWriter;
	import org.apache.lucene.index.codecs.BlockTermsReader;
	import org.apache.lucene.index.codecs.BlockTermsWriter;
	import org.apache.lucene.index.codecs.TermsIndexReaderBase;
	import org.apache.lucene.index.codecs.TermsIndexWriterBase;
	import org.apache.lucene.index.codecs.standard.StandardCodec;
	import org.apache.lucene.store.Directory;
	import org.apache.lucene.util.IOUtils;

	/** This codec "inlines" the postings for terms that have
	* low docFreq. It wraps another codec, which is used for
	* writing the non-inlined terms.
	*
	* Currently in only inlines docFreq=1 terms, and
	* otherwise uses the normal "standard" codec.
	* @lucene.experimental */

	public class PulsingCodec extends Codec {

	private final int freqCutoff;

	/** Terms with freq <= freqCutoff are inlined into terms
	* dict. */
	public PulsingCodec(int freqCutoff) {
	name = "Pulsing";
	this.freqCutoff = freqCutoff;
	}

	@Override
	public String toString() {
	return name + "(freqCutoff=" + freqCutoff + ")";
	}

	@Override
	public FieldsConsumer fieldsConsumer(SegmentWriteState state) throws IOException {
	// We wrap StandardPostingsWriter, but any StandardPostingsWriter
	// will work:
	PostingsWriterBase docsWriter = new StandardPostingsWriter(state);

	// Terms that have <= freqCutoff number of docs are
	// "pulsed" (inlined):
	PostingsWriterBase pulsingWriter = new PulsingPostingsWriterImpl(freqCutoff, docsWriter);

	// Terms dict index
	TermsIndexWriterBase indexWriter;
	boolean success = false;
	try {
	indexWriter = new VariableGapTermsIndexWriter(state, new VariableGapTermsIndexWriter.EveryNTermSelector(state.termIndexInterval));
	success = true;
	} finally {
	if (!success) {
	IOUtils.closeSafely(true, pulsingWriter);
	}
	}

	// Terms dict
	success = false;
	try {
	FieldsConsumer ret = new BlockTermsWriter(indexWriter, state, pulsingWriter);
	success = true;
	return ret;
	} finally {
	if (!success) {
	IOUtils.closeSafely(true, pulsingWriter, indexWriter);
	}
	}
	}

	@Override
	public FieldsProducer fieldsProducer(SegmentReadState state) throws IOException {

	// We wrap StandardPostingsReader, but any StandardPostingsReader
	// will work:
	PostingsReaderBase docsReader = new StandardPostingsReader(state.dir, state.segmentInfo, state.readBufferSize, state.codecId);
	PostingsReaderBase pulsingReader = new PulsingPostingsReaderImpl(docsReader);

	// Terms dict index reader
	TermsIndexReaderBase indexReader;

	boolean success = false;
	try {
	indexReader = new VariableGapTermsIndexReader(state.dir,
	state.fieldInfos,
	state.segmentInfo.name,
	state.termsIndexDivisor,
	state.codecId);
	success = true;
	} finally {
	if (!success) {
	pulsingReader.close();
	}
	}

	// Terms dict reader
	success = false;
	try {
	FieldsProducer ret = new BlockTermsReader(indexReader,
	state.dir, state.fieldInfos, state.segmentInfo.name,
	pulsingReader,
	state.readBufferSize,
	StandardCodec.TERMS_CACHE_SIZE,
	state.codecId);
	success = true;
	return ret;
	} finally {
	if (!success) {
	try {
	pulsingReader.close();
	} finally {
	indexReader.close();
	}
	}
	}
	}

	@Override
	public void files(Directory dir, SegmentInfo segmentInfo, String id, Set<String> files) throws IOException {
	StandardPostingsReader.files(dir, segmentInfo, id, files);
	BlockTermsReader.files(dir, segmentInfo, id, files);
	VariableGapTermsIndexReader.files(dir, segmentInfo, id, files);
	}

	@Override
	public void getExtensions(Set<String> extensions) {
	StandardCodec.getStandardExtensions(extensions);
	}
	}