lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingTermVectorsFormat.java - lucene-solr - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
  * The ASF licenses this file to You under the Apache License, Version 2.0
  * (the "License"); you may not use this file except in compliance with
  * the License.  You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
 package org.apache.lucene.codecs.compressing;


 import java.io.IOException;

 import org.apache.lucene.codecs.CodecUtil;
 import org.apache.lucene.codecs.StoredFieldsFormat;
 import org.apache.lucene.codecs.TermVectorsFormat;
 import org.apache.lucene.codecs.TermVectorsReader;
 import org.apache.lucene.codecs.TermVectorsWriter;
 import org.apache.lucene.index.FieldInfos;
 import org.apache.lucene.index.SegmentInfo;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.store.IOContext;

 /**
  * A {@link TermVectorsFormat} that compresses chunks of documents together in
  * order to improve the compression ratio.
  * @lucene.experimental
  */
 public class CompressingTermVectorsFormat extends TermVectorsFormat {

   private final String formatName;
   private final String segmentSuffix;
   private final CompressionMode compressionMode;
   private final int chunkSize;
   private final int blockSize;
   private final int maxDocsPerChunk;

   /**
    * Create a new {@link CompressingTermVectorsFormat}.
    * <p>
    * <code>formatName</code> is the name of the format. This name will be used
    * in the file formats to perform
    * {@link CodecUtil#checkIndexHeader codec header checks}.
    * <p>
    * The <code>compressionMode</code> parameter allows you to choose between
    * compression algorithms that have various compression and decompression
    * speeds so that you can pick the one that best fits your indexing and
    * searching throughput. You should never instantiate two
    * {@link CompressingTermVectorsFormat}s that have the same name but
    * different {@link CompressionMode}s.
    * <p>
    * <code>chunkSize</code> is the minimum byte size of a chunk of documents.
    * Higher values of <code>chunkSize</code> should improve the compression
    * ratio but will require more memory at indexing time and might make document
    * loading a little slower (depending on the size of your OS cache compared
    * to the size of your index).
    *
    * @param formatName the name of the {@link StoredFieldsFormat}
    * @param segmentSuffix a suffix to append to files created by this format
    * @param compressionMode the {@link CompressionMode} to use
    * @param chunkSize the minimum number of bytes of a single chunk of stored documents
    * @param maxDocsPerChunk the maximum number of documents in a single chunk
    * @param blockSize the number of chunks to store in an index block.
    * @see CompressionMode
    */
   public CompressingTermVectorsFormat(String formatName, String segmentSuffix,
       CompressionMode compressionMode, int chunkSize, int maxDocsPerChunk, int blockSize) {
     this.formatName = formatName;
     this.segmentSuffix = segmentSuffix;
     this.compressionMode = compressionMode;
     if (chunkSize < 1) {
       throw new IllegalArgumentException("chunkSize must be >= 1");
     }
     this.chunkSize = chunkSize;
     this.maxDocsPerChunk = maxDocsPerChunk;
     if (blockSize < 1) {
       throw new IllegalArgumentException("blockSize must be >= 1");
     }
     this.blockSize = blockSize;
   }

   @Override
   public final TermVectorsReader vectorsReader(Directory directory,
       SegmentInfo segmentInfo, FieldInfos fieldInfos, IOContext context)
       throws IOException {
     return new CompressingTermVectorsReader(directory, segmentInfo, segmentSuffix,
         fieldInfos, context, formatName, compressionMode);
   }

   @Override
   public final TermVectorsWriter vectorsWriter(
       Directory directory, SegmentInfo segmentInfo, IOContext context) throws IOException {
     return new CompressingTermVectorsWriter(
         directory,
         segmentInfo,
         segmentSuffix,
         context,
         formatName,
         compressionMode,
         chunkSize,
         maxDocsPerChunk,
         blockSize);
   }

   @Override
   public String toString() {
     return getClass().getSimpleName()
         + "(compressionMode="
         + compressionMode
         + ", chunkSize="
         + chunkSize
         + ", maxDocsPerChunk="
         + maxDocsPerChunk
         + ", blockSize="
         + blockSize
         + ")";
   }

 }
	/*
	* Licensed to the Apache Software Foundation (ASF) under one or more
	* contributor license agreements. See the NOTICE file distributed with
	* this work for additional information regarding copyright ownership.
	* The ASF licenses this file to You under the Apache License, Version 2.0
	* (the "License"); you may not use this file except in compliance with
	* the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/
	package org.apache.lucene.codecs.compressing;


	import java.io.IOException;

	import org.apache.lucene.codecs.CodecUtil;
	import org.apache.lucene.codecs.StoredFieldsFormat;
	import org.apache.lucene.codecs.TermVectorsFormat;
	import org.apache.lucene.codecs.TermVectorsReader;
	import org.apache.lucene.codecs.TermVectorsWriter;
	import org.apache.lucene.index.FieldInfos;
	import org.apache.lucene.index.SegmentInfo;
	import org.apache.lucene.store.Directory;
	import org.apache.lucene.store.IOContext;

	/**
	* A {@link TermVectorsFormat} that compresses chunks of documents together in
	* order to improve the compression ratio.
	* @lucene.experimental
	*/
	public class CompressingTermVectorsFormat extends TermVectorsFormat {

	private final String formatName;
	private final String segmentSuffix;
	private final CompressionMode compressionMode;
	private final int chunkSize;
	private final int blockSize;
	private final int maxDocsPerChunk;

	/**
	* Create a new {@link CompressingTermVectorsFormat}.
	* <p>
	* <code>formatName</code> is the name of the format. This name will be used
	* in the file formats to perform
	* {@link CodecUtil#checkIndexHeader codec header checks}.
	* <p>
	* The <code>compressionMode</code> parameter allows you to choose between
	* compression algorithms that have various compression and decompression
	* speeds so that you can pick the one that best fits your indexing and
	* searching throughput. You should never instantiate two
	* {@link CompressingTermVectorsFormat}s that have the same name but
	* different {@link CompressionMode}s.
	* <p>
	* <code>chunkSize</code> is the minimum byte size of a chunk of documents.
	* Higher values of <code>chunkSize</code> should improve the compression
	* ratio but will require more memory at indexing time and might make document
	* loading a little slower (depending on the size of your OS cache compared
	* to the size of your index).
	*
	* @param formatName the name of the {@link StoredFieldsFormat}
	* @param segmentSuffix a suffix to append to files created by this format
	* @param compressionMode the {@link CompressionMode} to use
	* @param chunkSize the minimum number of bytes of a single chunk of stored documents
	* @param maxDocsPerChunk the maximum number of documents in a single chunk
	* @param blockSize the number of chunks to store in an index block.
	* @see CompressionMode
	*/
	public CompressingTermVectorsFormat(String formatName, String segmentSuffix,
	CompressionMode compressionMode, int chunkSize, int maxDocsPerChunk, int blockSize) {
	this.formatName = formatName;
	this.segmentSuffix = segmentSuffix;
	this.compressionMode = compressionMode;
	if (chunkSize < 1) {
	throw new IllegalArgumentException("chunkSize must be >= 1");
	}
	this.chunkSize = chunkSize;
	this.maxDocsPerChunk = maxDocsPerChunk;
	if (blockSize < 1) {
	throw new IllegalArgumentException("blockSize must be >= 1");
	}
	this.blockSize = blockSize;
	}

	@Override
	public final TermVectorsReader vectorsReader(Directory directory,
	SegmentInfo segmentInfo, FieldInfos fieldInfos, IOContext context)
	throws IOException {
	return new CompressingTermVectorsReader(directory, segmentInfo, segmentSuffix,
	fieldInfos, context, formatName, compressionMode);
	}

	@Override
	public final TermVectorsWriter vectorsWriter(
	Directory directory, SegmentInfo segmentInfo, IOContext context) throws IOException {
	return new CompressingTermVectorsWriter(
	directory,
	segmentInfo,
	segmentSuffix,
	context,
	formatName,
	compressionMode,
	chunkSize,
	maxDocsPerChunk,
	blockSize);
	}

	@Override
	public String toString() {
	return getClass().getSimpleName()
	+ "(compressionMode="
	+ compressionMode
	+ ", chunkSize="
	+ chunkSize
	+ ", maxDocsPerChunk="
	+ maxDocsPerChunk
	+ ", blockSize="
	+ blockSize
	+ ")";
	}

	}