blob: 92ff881d2f5d4ba5313ed80f9432372e90b73c4c [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.commons.compress.compressors.lz77support;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import org.junit.Test;
import static java.nio.charset.StandardCharsets.*;
import static org.junit.Assert.assertArrayEquals;
import static org.junit.Assert.assertEquals;
public class LZ77CompressorTest {
private static final byte[] BLA, SAM, ONE_TO_TEN;
static {
/*
* Example from "An Explanation of the Deflate Algorithm" by "Antaeus Feldspar".
* @see "http://zlib.net/feldspar.html"
*/
BLA = "Blah blah blah blah blah!".getBytes(US_ASCII);
/*
* Example from Wikipedia article about LZSS.
* Note the example uses indices instead of offsets.
* @see "https://en.wikipedia.org/wiki/Lempel%E2%80%93Ziv%E2%80%93Storer%E2%80%93Szymanski"
*/
SAM = ("I am Sam\n"
+ "\n"
+ "Sam I am\n"
+ "\n"
+ "That Sam-I-am!\n"
+ "That Sam-I-am!\n"
+ "I do not like\n"
+ "that Sam-I-am!\n"
+ "\n"
+ "Do you like green eggs and ham?\n"
+ "\n"
+ "I do not like them, Sam-I-am.\n"
+ "I do not like green eggs and ham.").getBytes(US_ASCII);
ONE_TO_TEN = new byte[] { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
}
private List<LZ77Compressor.Block> compress(final Parameters params, final byte[]... chunks) throws IOException {
final List<LZ77Compressor.Block> blocks = new ArrayList<>();
final LZ77Compressor c = new LZ77Compressor(params, block -> {
//System.err.println(block);
if (block instanceof LZ77Compressor.LiteralBlock) {
// replace with a real copy of data so tests
// can see the results as they've been when
// the callback has been called
final LZ77Compressor.LiteralBlock b = (LZ77Compressor.LiteralBlock) block;
final int len = b.getLength();
block = new LZ77Compressor.LiteralBlock(
Arrays.copyOfRange(b.getData(), b.getOffset(), b.getOffset() + len),
0, len);
}
blocks.add(block);
});
for (final byte[] chunk : chunks) {
c.compress(chunk);
}
c.finish();
return blocks;
}
@Test
public void nonCompressableWithLengthSmallerThanLiteralMax() throws IOException {
final List<LZ77Compressor.Block> blocks = compress(newParameters(128), ONE_TO_TEN);
assertSize(2, blocks);
assertLiteralBlock(ONE_TO_TEN, blocks.get(0));
}
@Test
public void nonCompressableWithLengthGreaterThanLiteralMaxButLessThanTwiceWindowSize() throws IOException {
final List<LZ77Compressor.Block> blocks = compress(newParameters(8), ONE_TO_TEN);
assertSize(3, blocks);
assertLiteralBlock(new byte[] { 1, 2, 3, 4, 5, 6, 7, 8 }, blocks.get(0));
assertLiteralBlock(new byte[] { 9, 10 }, blocks.get(1));
}
@Test
public void nonCompressableWithLengthThatForcesWindowSlide() throws IOException {
final List<LZ77Compressor.Block> blocks = compress(newParameters(4), ONE_TO_TEN);
assertSize(4, blocks);
assertLiteralBlock(new byte[] { 1, 2, 3, 4, }, blocks.get(0));
assertLiteralBlock(new byte[] { 5, 6, 7, 8 }, blocks.get(1));
assertLiteralBlock(new byte[] { 9, 10 }, blocks.get(2));
}
@Test
public void nonCompressableSentAsSingleBytes() throws IOException {
final List<LZ77Compressor.Block> blocks = compress(newParameters(8), stagger(ONE_TO_TEN));
assertSize(3, blocks);
assertLiteralBlock(new byte[] { 1, 2, 3, 4, 5, 6, 7, 8 }, blocks.get(0));
assertLiteralBlock(new byte[] { 9, 10 }, blocks.get(1));
}
@Test
public void blaExampleWithFullArrayAvailableForCompression()
throws IOException {
final List<LZ77Compressor.Block> blocks = compress(newParameters(128), BLA);
assertSize(4, blocks);
assertLiteralBlock("Blah b", blocks.get(0));
assertBackReference(5, 18, blocks.get(1));
assertLiteralBlock("!", blocks.get(2));
}
@Test
public void blaExampleWithShorterBackReferenceLength() throws IOException {
final List<LZ77Compressor.Block> blocks = compress(newParameters(128, 3, 5, 0, 0), BLA);
assertSize(7, blocks);
assertLiteralBlock("Blah b", blocks.get(0));
assertBackReference(5, 5, blocks.get(1));
assertBackReference(5, 5, blocks.get(2));
assertBackReference(5, 5, blocks.get(3));
assertBackReference(5, 3, blocks.get(4));
assertLiteralBlock("!", blocks.get(5));
}
@Test
public void blaExampleSmallerWindowSize() throws IOException {
final List<LZ77Compressor.Block> blocks = compress(newParameters(8), BLA);
assertSize(6, blocks);
assertLiteralBlock("Blah b", blocks.get(0));
assertBackReference(5, 7, blocks.get(1));
assertBackReference(5, 3, blocks.get(2));
assertBackReference(5, 7, blocks.get(3));
assertLiteralBlock("h!", blocks.get(4));
}
@Test
public void blaExampleWithSingleByteWrites() throws IOException {
final List<LZ77Compressor.Block> blocks = compress(newParameters(128), stagger(BLA));
assertEquals(9, blocks.size());
assertLiteralBlock("Blah b", blocks.get(0));
assertBackReference(5, 3, blocks.get(1));
assertBackReference(5, 3, blocks.get(2));
assertBackReference(5, 3, blocks.get(3));
assertBackReference(5, 3, blocks.get(4));
assertBackReference(5, 3, blocks.get(5));
assertBackReference(5, 3, blocks.get(6));
assertLiteralBlock("!", blocks.get(7));
}
@Test
public void samIAmExampleWithFullArrayAvailableForCompression() throws IOException {
final List<LZ77Compressor.Block> blocks = compress(newParameters(1024), SAM);
assertEquals(21, blocks.size());
assertLiteralBlock("I am Sam\n\n", blocks.get(0));
assertBackReference(5, 3, blocks.get(1));
assertLiteralBlock(" ", blocks.get(2));
assertBackReference(14, 4, blocks.get(3));
assertLiteralBlock("\n\nThat", blocks.get(4));
assertBackReference(20, 4, blocks.get(5));
assertLiteralBlock("-I-am!", blocks.get(6));
assertBackReference(15, 16, blocks.get(7));
assertLiteralBlock("I do not like\nt", blocks.get(8));
assertBackReference(29, 14, blocks.get(9));
assertLiteralBlock("\nDo you", blocks.get(10));
assertBackReference(28, 5, blocks.get(11));
assertLiteralBlock(" green eggs and ham?\n", blocks.get(12));
assertBackReference(63, 14, blocks.get(13));
assertLiteralBlock(" them,", blocks.get(14));
assertBackReference(64, 9, blocks.get(15));
assertLiteralBlock(".", blocks.get(16));
assertBackReference(30, 15, blocks.get(17));
assertBackReference(65, 18, blocks.get(18));
assertLiteralBlock(".", blocks.get(19));
}
@Test
public void blaExampleWithPrefill() throws IOException {
final List<LZ77Compressor.Block> blocks = new ArrayList<>();
final LZ77Compressor c = new LZ77Compressor(newParameters(128), block -> {
//System.err.println(block);
if (block instanceof LZ77Compressor.LiteralBlock) {
// replace with a real copy of data so tests
// can see the results as they've been when
// the callback has been called
final LZ77Compressor.LiteralBlock b = (LZ77Compressor.LiteralBlock) block;
final int len = b.getLength();
block = new LZ77Compressor.LiteralBlock(
Arrays.copyOfRange(b.getData(), b.getOffset(), b.getOffset() + len),
0, len);
}
blocks.add(block);
});
c.prefill(Arrays.copyOfRange(BLA, 0, 6));
c.compress(Arrays.copyOfRange(BLA, 6, BLA.length));
c.finish();
assertSize(3, blocks);
assertBackReference(5, 18, blocks.get(0));
assertLiteralBlock("!", blocks.get(1));
}
@Test
public void blaExampleWithShortPrefill() throws IOException {
final List<LZ77Compressor.Block> blocks = new ArrayList<>();
final LZ77Compressor c = new LZ77Compressor(newParameters(128), block -> {
//System.err.println(block);
if (block instanceof LZ77Compressor.LiteralBlock) {
// replace with a real copy of data so tests
// can see the results as they've been when
// the callback has been called
final LZ77Compressor.LiteralBlock b = (LZ77Compressor.LiteralBlock) block;
final int len = b.getLength();
block = new LZ77Compressor.LiteralBlock(
Arrays.copyOfRange(b.getData(), b.getOffset(), b.getOffset() + len),
0, len);
}
blocks.add(block);
});
c.prefill(Arrays.copyOfRange(BLA, 0, 2));
c.compress(Arrays.copyOfRange(BLA, 2, BLA.length));
c.finish();
assertSize(4, blocks);
assertLiteralBlock("ah b", blocks.get(0));
assertBackReference(5, 18, blocks.get(1));
assertLiteralBlock("!", blocks.get(2));
}
@Test
public void blaExampleWithPrefillBiggerThanWindowSize() throws IOException {
final List<LZ77Compressor.Block> blocks = new ArrayList<>();
final LZ77Compressor c = new LZ77Compressor(newParameters(4), block -> {
//System.err.println(block);
if (block instanceof LZ77Compressor.LiteralBlock) {
// replace with a real copy of data so tests
// can see the results as they've been when
// the callback has been called
final LZ77Compressor.LiteralBlock b = (LZ77Compressor.LiteralBlock) block;
final int len = b.getLength();
block = new LZ77Compressor.LiteralBlock(
Arrays.copyOfRange(b.getData(), b.getOffset(), b.getOffset() + len),
0, len);
}
blocks.add(block);
});
c.prefill(Arrays.copyOfRange(BLA, 0, 6));
c.compress(Arrays.copyOfRange(BLA, 6, BLA.length));
c.finish();
assertSize(6, blocks);
assertLiteralBlock("lah ", blocks.get(0));
assertLiteralBlock("blah", blocks.get(1));
assertLiteralBlock(" bla", blocks.get(2));
assertLiteralBlock("h bl", blocks.get(3));
assertLiteralBlock("ah!", blocks.get(4));
}
@Test(expected = IllegalStateException.class)
public void cantPrefillTwice() {
final LZ77Compressor c = new LZ77Compressor(newParameters(128), block -> {
});
c.prefill(Arrays.copyOfRange(BLA, 0, 2));
c.prefill(Arrays.copyOfRange(BLA, 2, 4));
}
@Test(expected = IllegalStateException.class)
public void cantPrefillAfterCompress() throws IOException {
final LZ77Compressor c = new LZ77Compressor(newParameters(128), block -> {
});
c.compress(Arrays.copyOfRange(BLA, 0, 2));
c.prefill(Arrays.copyOfRange(BLA, 2, 4));
}
private static final void assertSize(final int expectedSize, final List<LZ77Compressor.Block> blocks) {
assertEquals(expectedSize, blocks.size());
assertEquals(LZ77Compressor.Block.BlockType.EOD, blocks.get(expectedSize - 1).getType());
}
private static final void assertLiteralBlock(final String expectedContent, final LZ77Compressor.Block block) {
assertLiteralBlock(expectedContent.getBytes(US_ASCII), block);
}
private static final void assertLiteralBlock(final byte[] expectedContent, final LZ77Compressor.Block block) {
assertEquals(LZ77Compressor.LiteralBlock.class, block.getClass());
assertArrayEquals(expectedContent, ((LZ77Compressor.LiteralBlock) block).getData());
}
private static final void assertBackReference(final int expectedOffset, final int expectedLength, final LZ77Compressor.Block block) {
assertEquals(LZ77Compressor.BackReference.class, block.getClass());
final LZ77Compressor.BackReference b = (LZ77Compressor.BackReference) block;
assertEquals(expectedOffset, b.getOffset());
assertEquals(expectedLength, b.getLength());
}
private static final byte[][] stagger(final byte[] data) {
final byte[][] r = new byte[data.length][1];
for (int i = 0; i < data.length; i++) {
r[i][0] = data[i];
}
return r;
}
private static Parameters newParameters(final int windowSize) {
return Parameters.builder(windowSize).build();
}
private static Parameters newParameters(final int windowSize, final int minBackReferenceLength, final int maxBackReferenceLength,
final int maxOffset, final int maxLiteralLength) {
return Parameters.builder(windowSize)
.withMinBackReferenceLength(minBackReferenceLength)
.withMaxBackReferenceLength(maxBackReferenceLength)
.withMaxOffset(maxOffset)
.withMaxLiteralLength(maxLiteralLength)
.tunedForCompressionRatio()
.build();
}
}