blob: e5a3c4e70bf5d88af02518925b629a31d0bb27f9 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.codecs.compressing;
import java.io.IOException;
import java.util.zip.DataFormatException;
import java.util.zip.Deflater;
import java.util.zip.Inflater;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.store.DataInput;
import org.apache.lucene.store.DataOutput;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.compress.LZ4;
/**
* A compression mode. Tells how much effort should be spent on compression and
* decompression of stored fields.
* @lucene.experimental
*/
public abstract class CompressionMode {
/**
* A compression mode that trades compression ratio for speed. Although the
* compression ratio might remain high, compression and decompression are
* very fast. Use this mode with indices that have a high update rate but
* should be able to load documents from disk quickly.
*/
public static final CompressionMode FAST = new CompressionMode() {
@Override
public Compressor newCompressor() {
return new LZ4FastCompressor();
}
@Override
public Decompressor newDecompressor() {
return LZ4_DECOMPRESSOR;
}
@Override
public String toString() {
return "FAST";
}
};
/**
* A compression mode that trades speed for compression ratio. Although
* compression and decompression might be slow, this compression mode should
* provide a good compression ratio. This mode might be interesting if/when
* your index size is much bigger than your OS cache.
*/
public static final CompressionMode HIGH_COMPRESSION = new CompressionMode() {
@Override
public Compressor newCompressor() {
// notes:
// 3 is the highest level that doesn't have lazy match evaluation
// 6 is the default, higher than that is just a waste of cpu
return new DeflateCompressor(6);
}
@Override
public Decompressor newDecompressor() {
return new DeflateDecompressor();
}
@Override
public String toString() {
return "HIGH_COMPRESSION";
}
};
/**
* This compression mode is similar to {@link #FAST} but it spends more time
* compressing in order to improve the compression ratio. This compression
* mode is best used with indices that have a low update rate but should be
* able to load documents from disk quickly.
*/
public static final CompressionMode FAST_DECOMPRESSION = new CompressionMode() {
@Override
public Compressor newCompressor() {
return new LZ4HighCompressor();
}
@Override
public Decompressor newDecompressor() {
return LZ4_DECOMPRESSOR;
}
@Override
public String toString() {
return "FAST_DECOMPRESSION";
}
};
/** Sole constructor. */
protected CompressionMode() {}
/**
* Create a new {@link Compressor} instance.
*/
public abstract Compressor newCompressor();
/**
* Create a new {@link Decompressor} instance.
*/
public abstract Decompressor newDecompressor();
private static final Decompressor LZ4_DECOMPRESSOR = new Decompressor() {
@Override
public void decompress(DataInput in, int originalLength, int offset, int length, BytesRef bytes) throws IOException {
assert offset + length <= originalLength;
// add 7 padding bytes, this is not necessary but can help decompression run faster
if (bytes.bytes.length < originalLength + 7) {
bytes.bytes = new byte[ArrayUtil.oversize(originalLength + 7, 1)];
}
final int decompressedLength = LZ4.decompress(in, offset + length, bytes.bytes, 0);
if (decompressedLength > originalLength) {
throw new CorruptIndexException("Corrupted: lengths mismatch: " + decompressedLength + " > " + originalLength, in);
}
bytes.offset = offset;
bytes.length = length;
}
@Override
public Decompressor clone() {
return this;
}
};
private static final class LZ4FastCompressor extends Compressor {
private final LZ4.FastCompressionHashTable ht;
LZ4FastCompressor() {
ht = new LZ4.FastCompressionHashTable();
}
@Override
public void compress(byte[] bytes, int off, int len, DataOutput out)
throws IOException {
LZ4.compress(bytes, off, len, out, ht);
}
@Override
public void close() throws IOException {
// no-op
}
}
private static final class LZ4HighCompressor extends Compressor {
private final LZ4.HighCompressionHashTable ht;
LZ4HighCompressor() {
ht = new LZ4.HighCompressionHashTable();
}
@Override
public void compress(byte[] bytes, int off, int len, DataOutput out)
throws IOException {
LZ4.compress(bytes, off, len, out, ht);
}
@Override
public void close() throws IOException {
// no-op
}
}
private static final class DeflateDecompressor extends Decompressor {
byte[] compressed;
DeflateDecompressor() {
compressed = new byte[0];
}
@Override
public void decompress(DataInput in, int originalLength, int offset, int length, BytesRef bytes) throws IOException {
assert offset + length <= originalLength;
if (length == 0) {
bytes.length = 0;
return;
}
final int compressedLength = in.readVInt();
// pad with extra "dummy byte": see javadocs for using Inflater(true)
// we do it for compliance, but it's unnecessary for years in zlib.
final int paddedLength = compressedLength + 1;
compressed = ArrayUtil.grow(compressed, paddedLength);
in.readBytes(compressed, 0, compressedLength);
compressed[compressedLength] = 0; // explicitly set dummy byte to 0
final Inflater decompressor = new Inflater(true);
try {
// extra "dummy byte"
decompressor.setInput(compressed, 0, paddedLength);
bytes.offset = bytes.length = 0;
bytes.bytes = ArrayUtil.grow(bytes.bytes, originalLength);
try {
bytes.length = decompressor.inflate(bytes.bytes, bytes.length, originalLength);
} catch (DataFormatException e) {
throw new IOException(e);
}
if (!decompressor.finished()) {
throw new CorruptIndexException("Invalid decoder state: needsInput=" + decompressor.needsInput()
+ ", needsDict=" + decompressor.needsDictionary(), in);
}
} finally {
decompressor.end();
}
if (bytes.length != originalLength) {
throw new CorruptIndexException("Lengths mismatch: " + bytes.length + " != " + originalLength, in);
}
bytes.offset = offset;
bytes.length = length;
}
@Override
public Decompressor clone() {
return new DeflateDecompressor();
}
}
private static class DeflateCompressor extends Compressor {
final Deflater compressor;
byte[] compressed;
boolean closed;
DeflateCompressor(int level) {
compressor = new Deflater(level, true);
compressed = new byte[64];
}
@Override
public void compress(byte[] bytes, int off, int len, DataOutput out) throws IOException {
compressor.reset();
compressor.setInput(bytes, off, len);
compressor.finish();
if (compressor.needsInput()) {
// no output
assert len == 0 : len;
out.writeVInt(0);
return;
}
int totalCount = 0;
for (;;) {
final int count = compressor.deflate(compressed, totalCount, compressed.length - totalCount);
totalCount += count;
assert totalCount <= compressed.length;
if (compressor.finished()) {
break;
} else {
compressed = ArrayUtil.grow(compressed);
}
}
out.writeVInt(totalCount);
out.writeBytes(compressed, totalCount);
}
@Override
public void close() throws IOException {
if (closed == false) {
compressor.end();
closed = true;
}
}
}
}