blob: 5efb49b6966ff184cdaea752d61c7e19e77b11e6 [file] [log] [blame]
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* See the License for the specific language governing permissions and
* limitations under the License.
package org.apache.lucene.backward_codecs.lucene87;
import org.apache.lucene.codecs.compressing.CompressionMode;
import org.apache.lucene.codecs.compressing.Compressor;
import org.apache.lucene.codecs.compressing.Decompressor;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.compress.LZ4;
* A compression mode that compromises on the compression ratio to provide fast compression and
* decompression.
* @lucene.internal
public final class LZ4WithPresetDictCompressionMode extends CompressionMode {
// Shoot for 10 sub blocks
private static final int NUM_SUB_BLOCKS = 10;
// And a dictionary whose size is about 16x smaller than sub blocks
private static final int DICT_SIZE_FACTOR = 16;
/** Sole constructor. */
public LZ4WithPresetDictCompressionMode() {}
public Compressor newCompressor() {
return new LZ4WithPresetDictCompressor();
public Decompressor newDecompressor() {
return new LZ4WithPresetDictDecompressor();
public String toString() {
return "BEST_SPEED";
private static final class LZ4WithPresetDictDecompressor extends Decompressor {
private int[] compressedLengths;
private byte[] buffer;
LZ4WithPresetDictDecompressor() {
compressedLengths = new int[0];
buffer = new byte[0];
private int readCompressedLengths(
DataInput in, int originalLength, int dictLength, int blockLength) throws IOException {
in.readVInt(); // compressed length of the dictionary, unused
int totalLength = dictLength;
int i = 0;
while (totalLength < originalLength) {
compressedLengths = ArrayUtil.grow(compressedLengths, i + 1);
compressedLengths[i++] = in.readVInt();
totalLength += blockLength;
return i;
public void decompress(DataInput in, int originalLength, int offset, int length, BytesRef bytes)
throws IOException {
assert offset + length <= originalLength;
if (length == 0) {
bytes.length = 0;
final int dictLength = in.readVInt();
final int blockLength = in.readVInt();
final int numBlocks = readCompressedLengths(in, originalLength, dictLength, blockLength);
buffer = ArrayUtil.grow(buffer, dictLength + blockLength);
bytes.length = 0;
// Read the dictionary
if (LZ4.decompress(in, dictLength, buffer, 0) != dictLength) {
throw new CorruptIndexException("Illegal dict length", in);
int offsetInBlock = dictLength;
int offsetInBytesRef = offset;
if (offset >= dictLength) {
offsetInBytesRef -= dictLength;
// Skip unneeded blocks
int numBytesToSkip = 0;
for (int i = 0; i < numBlocks && offsetInBlock + blockLength < offset; ++i) {
int compressedBlockLength = compressedLengths[i];
numBytesToSkip += compressedBlockLength;
offsetInBlock += blockLength;
offsetInBytesRef -= blockLength;
} else {
// The dictionary contains some bytes we need, copy its content to the BytesRef
bytes.bytes = ArrayUtil.grow(bytes.bytes, dictLength);
System.arraycopy(buffer, 0, bytes.bytes, 0, dictLength);
bytes.length = dictLength;
// Read blocks that intersect with the interval we need
while (offsetInBlock < offset + length) {
final int bytesToDecompress = Math.min(blockLength, offset + length - offsetInBlock);
LZ4.decompress(in, bytesToDecompress, buffer, dictLength);
bytes.bytes = ArrayUtil.grow(bytes.bytes, bytes.length + bytesToDecompress);
System.arraycopy(buffer, dictLength, bytes.bytes, bytes.length, bytesToDecompress);
bytes.length += bytesToDecompress;
offsetInBlock += blockLength;
bytes.offset = offsetInBytesRef;
bytes.length = length;
assert bytes.isValid();
public Decompressor clone() {
return new LZ4WithPresetDictDecompressor();
private static class LZ4WithPresetDictCompressor extends Compressor {
final ByteBuffersDataOutput compressed;
final LZ4.FastCompressionHashTable hashTable;
byte[] buffer;
LZ4WithPresetDictCompressor() {
compressed = ByteBuffersDataOutput.newResettableInstance();
hashTable = new LZ4.FastCompressionHashTable();
buffer = BytesRef.EMPTY_BYTES;
private void doCompress(byte[] bytes, int dictLen, int len, DataOutput out) throws IOException {
long prevCompressedSize = compressed.size();
LZ4.compressWithDictionary(bytes, 0, dictLen, len, compressed, hashTable);
// Write the number of compressed bytes
out.writeVInt(Math.toIntExact(compressed.size() - prevCompressedSize));
public void compress(byte[] bytes, int off, int len, DataOutput out) throws IOException {
final int dictLength = len / (NUM_SUB_BLOCKS * DICT_SIZE_FACTOR);
final int blockLength = (len - dictLength + NUM_SUB_BLOCKS - 1) / NUM_SUB_BLOCKS;
buffer = ArrayUtil.grow(buffer, dictLength + blockLength);
final int end = off + len;
// Compress the dictionary first
System.arraycopy(bytes, off, buffer, 0, dictLength);
doCompress(buffer, 0, dictLength, out);
// And then sub blocks
for (int start = off + dictLength; start < end; start += blockLength) {
int l = Math.min(blockLength, off + len - start);
System.arraycopy(bytes, start, buffer, dictLength, l);
doCompress(buffer, dictLength, l, out);
// We only wrote lengths so far, now write compressed data
public void close() throws IOException {
// no-op