| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| package org.apache.lucene.backward_codecs.lucene87; |
| |
| import java.io.IOException; |
| import java.util.zip.DataFormatException; |
| import java.util.zip.Deflater; |
| import java.util.zip.Inflater; |
| import org.apache.lucene.codecs.compressing.CompressionMode; |
| import org.apache.lucene.codecs.compressing.Compressor; |
| import org.apache.lucene.codecs.compressing.Decompressor; |
| import org.apache.lucene.index.CorruptIndexException; |
| import org.apache.lucene.store.DataInput; |
| import org.apache.lucene.store.DataOutput; |
| import org.apache.lucene.util.ArrayUtil; |
| import org.apache.lucene.util.BytesRef; |
| |
| /** |
| * A compression mode that trades speed for compression ratio. Although compression and |
| * decompression might be slow, this compression mode should provide a good compression ratio. This |
| * mode might be interesting if/when your index size is much bigger than your OS cache. |
| * |
| * @lucene.internal |
| */ |
| public final class DeflateWithPresetDictCompressionMode extends CompressionMode { |
| |
| // Shoot for 10 sub blocks |
| private static final int NUM_SUB_BLOCKS = 10; |
| // And a dictionary whose size is about 6x smaller than sub blocks |
| private static final int DICT_SIZE_FACTOR = 6; |
| |
| /** Sole constructor. */ |
| public DeflateWithPresetDictCompressionMode() {} |
| |
| @Override |
| public Compressor newCompressor() { |
| // notes: |
| // 3 is the highest level that doesn't have lazy match evaluation |
| // 6 is the default, higher than that is just a waste of cpu |
| return new DeflateWithPresetDictCompressor(6); |
| } |
| |
| @Override |
| public Decompressor newDecompressor() { |
| return new DeflateWithPresetDictDecompressor(); |
| } |
| |
| @Override |
| public String toString() { |
| return "BEST_COMPRESSION"; |
| } |
| |
| private static final class DeflateWithPresetDictDecompressor extends Decompressor { |
| |
| byte[] compressed; |
| |
| DeflateWithPresetDictDecompressor() { |
| compressed = new byte[0]; |
| } |
| |
| private void doDecompress(DataInput in, Inflater decompressor, BytesRef bytes) |
| throws IOException { |
| final int compressedLength = in.readVInt(); |
| if (compressedLength == 0) { |
| return; |
| } |
| // pad with extra "dummy byte": see javadocs for using Inflater(true) |
| // we do it for compliance, but it's unnecessary for years in zlib. |
| final int paddedLength = compressedLength + 1; |
| compressed = ArrayUtil.grow(compressed, paddedLength); |
| in.readBytes(compressed, 0, compressedLength); |
| compressed[compressedLength] = 0; // explicitly set dummy byte to 0 |
| |
| // extra "dummy byte" |
| decompressor.setInput(compressed, 0, paddedLength); |
| try { |
| bytes.length += |
| decompressor.inflate(bytes.bytes, bytes.length, bytes.bytes.length - bytes.length); |
| } catch (DataFormatException e) { |
| throw new IOException(e); |
| } |
| if (decompressor.finished() == false) { |
| throw new CorruptIndexException( |
| "Invalid decoder state: needsInput=" |
| + decompressor.needsInput() |
| + ", needsDict=" |
| + decompressor.needsDictionary(), |
| in); |
| } |
| } |
| |
| @Override |
| public void decompress(DataInput in, int originalLength, int offset, int length, BytesRef bytes) |
| throws IOException { |
| assert offset + length <= originalLength; |
| if (length == 0) { |
| bytes.length = 0; |
| return; |
| } |
| final int dictLength = in.readVInt(); |
| final int blockLength = in.readVInt(); |
| bytes.bytes = ArrayUtil.grow(bytes.bytes, dictLength); |
| bytes.offset = bytes.length = 0; |
| |
| final Inflater decompressor = new Inflater(true); |
| try { |
| // Read the dictionary |
| doDecompress(in, decompressor, bytes); |
| if (dictLength != bytes.length) { |
| throw new CorruptIndexException("Unexpected dict length", in); |
| } |
| |
| int offsetInBlock = dictLength; |
| int offsetInBytesRef = offset; |
| |
| // Skip unneeded blocks |
| while (offsetInBlock + blockLength < offset) { |
| final int compressedLength = in.readVInt(); |
| in.skipBytes(compressedLength); |
| offsetInBlock += blockLength; |
| offsetInBytesRef -= blockLength; |
| } |
| |
| // Read blocks that intersect with the interval we need |
| while (offsetInBlock < offset + length) { |
| bytes.bytes = ArrayUtil.grow(bytes.bytes, bytes.length + blockLength); |
| decompressor.reset(); |
| decompressor.setDictionary(bytes.bytes, 0, dictLength); |
| doDecompress(in, decompressor, bytes); |
| offsetInBlock += blockLength; |
| } |
| |
| bytes.offset = offsetInBytesRef; |
| bytes.length = length; |
| assert bytes.isValid(); |
| } finally { |
| decompressor.end(); |
| } |
| } |
| |
| @Override |
| public Decompressor clone() { |
| return new DeflateWithPresetDictDecompressor(); |
| } |
| } |
| |
| private static class DeflateWithPresetDictCompressor extends Compressor { |
| |
| final Deflater compressor; |
| final BugfixDeflater_JDK8252739 deflaterBugfix; |
| byte[] compressed; |
| boolean closed; |
| |
| DeflateWithPresetDictCompressor(int level) { |
| compressor = new Deflater(level, true); |
| deflaterBugfix = BugfixDeflater_JDK8252739.createBugfix(compressor); |
| compressed = new byte[64]; |
| } |
| |
| private void doCompress(byte[] bytes, int off, int len, DataOutput out) throws IOException { |
| if (len == 0) { |
| out.writeVInt(0); |
| return; |
| } |
| compressor.setInput(bytes, off, len); |
| compressor.finish(); |
| if (compressor.needsInput()) { |
| throw new IllegalStateException(); |
| } |
| |
| int totalCount = 0; |
| for (; ; ) { |
| final int count = |
| compressor.deflate(compressed, totalCount, compressed.length - totalCount); |
| totalCount += count; |
| assert totalCount <= compressed.length; |
| if (compressor.finished()) { |
| break; |
| } else { |
| compressed = ArrayUtil.grow(compressed); |
| } |
| } |
| |
| out.writeVInt(totalCount); |
| out.writeBytes(compressed, totalCount); |
| } |
| |
| @Override |
| public void compress(byte[] bytes, int off, int len, DataOutput out) throws IOException { |
| final int dictLength = len / (NUM_SUB_BLOCKS * DICT_SIZE_FACTOR); |
| final int blockLength = (len - dictLength + NUM_SUB_BLOCKS - 1) / NUM_SUB_BLOCKS; |
| out.writeVInt(dictLength); |
| out.writeVInt(blockLength); |
| final int end = off + len; |
| |
| // Compress the dictionary first |
| compressor.reset(); |
| doCompress(bytes, off, dictLength, out); |
| |
| // And then sub blocks |
| for (int start = off + dictLength; start < end; start += blockLength) { |
| compressor.reset(); |
| deflaterBugfix.setDictionary(bytes, off, dictLength); |
| doCompress(bytes, start, Math.min(blockLength, off + len - start), out); |
| } |
| } |
| |
| @Override |
| public void close() throws IOException { |
| if (closed == false) { |
| compressor.end(); |
| closed = true; |
| } |
| } |
| } |
| } |