src/main/java/org/apache/commons/compress/compressors/lz77support/LZ77Compressor.java - commons-compress - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
  * regarding copyright ownership.  The ASF licenses this file
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
  *
  * http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing,
  * software distributed under the License is distributed on an
  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
  * KIND, either express or implied.  See the License for the
  * specific language governing permissions and limitations
  * under the License.
  */
 package org.apache.commons.compress.compressors.lz77support;

 import java.io.IOException;
 import java.util.Arrays;
 import java.util.Objects;

 /**
  * Helper class for compression algorithms that use the ideas of LZ77.
  *
  * <p>Most LZ77 derived algorithms split input data into blocks of
  * uncompressed data (called literal blocks) and back-references
  * (pairs of offsets and lengths) that state "add <code>length</code>
  * bytes that are the same as those already written starting
  * <code>offset</code> bytes before the current position. The details
  * of how those blocks and back-references are encoded are quite
  * different between the algorithms and some algorithms perform
  * additional steps (Huffman encoding in the case of DEFLATE for
  * example).</p>
  *
  * <p>This class attempts to extract the core logic - finding
  * back-references - so it can be re-used. It follows the algorithm
  * explained in section 4 of RFC 1951 (DEFLATE) and currently doesn't
  * implement the "lazy match" optimization. The three-byte hash
  * function used in this class is the same as the one used by zlib and
  * InfoZIP's ZIP implementation of DEFLATE. The whole class is
  * strongly inspired by InfoZIP's implementation.</p>
  *
  * <p>LZ77 is used vaguely here (as well as many other places that
  * talk about it :-), LZSS would likely be closer to the truth but
  * LZ77 has become the synonym for a whole family of algorithms.</p>
  *
  * <p>The API consists of a compressor that is fed <code>byte</code>s
  * and emits {@link Block}s to a registered callback where the blocks
  * represent either {@link LiteralBlock literal blocks}, {@link
  * BackReference back-references} or {@link EOD end of data
  * markers}. In order to ensure the callback receives all information,
  * the {@code #finish} method must be used once all data has been fed
  * into the compressor.</p>
  *
  * <p>Several parameters influence the outcome of the "compression":</p>
  * <dl>
  *
  *  <dt><code>windowSize</code></dt> <dd>the size of the sliding
  *  window, must be a power of two - this determines the maximum
  *  offset a back-reference can take. The compressor maintains a
  *  buffer of twice of <code>windowSize</code> - real world values are
  *  in the area of 32k.</dd>
  *
  *  <dt><code>minBackReferenceLength</code></dt>
  *  <dd>Minimal length of a back-reference found. A true minimum of 3 is
  *  hard-coded inside of this implementation but bigger lengths can be
  *  configured.</dd>
  *
  *  <dt><code>maxBackReferenceLength</code></dt>
  *  <dd>Maximal length of a back-reference found.</dd>
  *
  *  <dt><code>maxOffset</code></dt>
  *  <dd>Maximal offset of a back-reference.</dd>
  *
  *  <dt><code>maxLiteralLength</code></dt>
  *  <dd>Maximal length of a literal block.</dd>
  * </dl>
  *
  * @see "https://tools.ietf.org/html/rfc1951#section-4"
  * @since 1.14
  * @NotThreadSafe
  */
 public class LZ77Compressor {

     /**
      * Base class representing blocks the compressor may emit.
      *
      * <p>This class is not supposed to be subclassed by classes
      * outside of Commons Compress so it is considered internal and
      * changed that would break subclasses may get introduced with
      * future releases.</p>
      */
     public static abstract class Block {
         /** Enumeration of the block types the compressor may emit. */
         public enum BlockType {
             LITERAL, BACK_REFERENCE, EOD
         }
         public abstract BlockType getType();
     }

     /**
      * Represents a literal block of data.
      *
      * <p>For performance reasons this encapsulates the real data, not
      * a copy of it. Don't modify the data and process it inside of
      * {@link Callback#accept} immediately as it will get overwritten
      * sooner or later.</p>
      */
     public static final class LiteralBlock extends Block {
         private final byte[] data;
         private final int offset, length;
         public LiteralBlock(final byte[] data, final int offset, final int length) {
             this.data = data;
             this.offset = offset;
             this.length = length;
         }
         /**
          * The literal data.
          *
          * <p>This returns a life view of the actual data in order to
          * avoid copying, modify the array at your own risk.</p>
          * @return the data
          */
         public byte[] getData() {
             return data;
         }
         /**
          * Offset into data where the literal block starts.
          * @return the offset
          */
         public int getOffset() {
             return offset;
         }
         /**
          * Length of literal block.
          * @return the length
          */
         public int getLength() {
             return length;
         }
         @Override
         public BlockType getType() {
             return BlockType.LITERAL;
         }
         @Override
         public String toString() {
             return "LiteralBlock starting at " + offset + " with length " + length;
         }
     }

     /**
      * Represents a back-reference.
      */
     public static final class BackReference extends Block {
         private final int offset, length;
         public BackReference(final int offset, final int length) {
             this.offset = offset;
             this.length = length;
         }
         /**
          * Provides the offset of the back-reference.
          * @return the offset
          */
         public int getOffset() {
             return offset;
         }
         /**
          * Provides the length of the back-reference.
          * @return the length
          */
         public int getLength() {
             return length;
         }
         @Override
         public BlockType getType() {
             return BlockType.BACK_REFERENCE;
         }
         @Override
         public String toString() {
             return "BackReference with offset " + offset + " and length " + length;
         }
     }

     /** A simple "we are done" marker. */
     public static final class EOD extends Block {
         @Override
         public BlockType getType() {
             return BlockType.EOD;
         }
     }

     private static final Block THE_EOD = new EOD();

     /**
      * Callback invoked while the compressor processes data.
      *
      * <p>The callback is invoked on the same thread that receives the
      * bytes to compress and may be invoked multiple times during the
      * execution of {@link #compress} or {@link #finish}.</p>
      */
     public interface Callback {
         /**
          * Consumes a block.
          * @param b the block to consume
          * @throws IOException in case of an error
          */
         void accept(Block b) throws IOException;
     }

     static final int NUMBER_OF_BYTES_IN_HASH = 3;
     private static final int NO_MATCH = -1;

     private final Parameters params;
     private final Callback callback;

     // the sliding window, twice as big as "windowSize" parameter
     private final byte[] window;
     // the head of hash-chain - indexed by hash-code, points to the
     // location inside of window of the latest sequence of bytes with
     // the given hash.
     private final int[] head;
     // for each window-location points to the latest earlier location
     // with the same hash. Only stores values for the latest
     // "windowSize" elements, the index is "window location modulo
     // windowSize".
     private final int[] prev;

     // bit mask used when indexing into prev
     private final int wMask;

     private boolean initialized = false;
     // the position inside of window that shall be encoded right now
     private int currentPosition;
     // the number of bytes available to compress including the one at
     // currentPosition
     private int lookahead = 0;
     // the hash of the three bytes stating at the current position
     private int insertHash = 0;
     // the position inside of the window where the current literal
     // block starts (in case we are inside of a literal block).
     private int blockStart = 0;
     // position of the current match
     private int matchStart = NO_MATCH;
     // number of missed insertString calls for the up to three last
     // bytes of the last match that can only be performed once more
     // data has been read
     private int missedInserts = 0;

     /**
      * Initializes a compressor with parameters and a callback.
      * @param params the parameters
      * @param callback the callback
      * @throws NullPointerException if either parameter is <code>null</code>
      */
     public LZ77Compressor(final Parameters params, final Callback callback) {
         Objects.requireNonNull(params, "params");
         Objects.requireNonNull(callback, "callback");

         this.params = params;
         this.callback = callback;

         final int wSize = params.getWindowSize();
         window = new byte[wSize * 2];
         wMask = wSize - 1;
         head = new int[HASH_SIZE];
         Arrays.fill(head, NO_MATCH);
         prev = new int[wSize];
     }

     /**
      * Feeds bytes into the compressor which in turn may emit zero or
      * more blocks to the callback during the execution of this
      * method.
      * @param data the data to compress - must not be null
      * @throws IOException if the callback throws an exception
      */
     public void compress(final byte[] data) throws IOException {
         compress(data, 0, data.length);
     }

     /**
      * Feeds bytes into the compressor which in turn may emit zero or
      * more blocks to the callback during the execution of this
      * method.
      * @param data the data to compress - must not be null
      * @param off the start offset of the data
      * @param len the number of bytes to compress
      * @throws IOException if the callback throws an exception
      */
     public void compress(final byte[] data, int off, int len) throws IOException {
         final int wSize = params.getWindowSize();
         while (len > wSize) { // chop into windowSize sized chunks
             doCompress(data, off, wSize);
             off += wSize;
             len -= wSize;
         }
         if (len > 0) {
             doCompress(data, off, len);
         }
     }

     /**
      * Tells the compressor to process all remaining data and signal
      * end of data to the callback.
      *
      * <p>The compressor will in turn emit at least one block ({@link
      * EOD}) but potentially multiple blocks to the callback during
      * the execution of this method.</p>
      * @throws IOException if the callback throws an exception
      */
     public void finish() throws IOException {
         if (blockStart != currentPosition || lookahead > 0) {
             currentPosition += lookahead;
             flushLiteralBlock();
         }
         callback.accept(THE_EOD);
     }

     /**
      * Adds some initial data to fill the window with.
      *
      * <p>This is used if the stream has been cut into blocks and
      * back-references of one block may refer to data of the previous
      * block(s). One such example is the LZ4 frame format using block
      * dependency.</p>
      *
      * @param data the data to fill the window with.
      * @throws IllegalStateException if the compressor has already started to accept data
      */
     public void prefill(final byte[] data) {
         if (currentPosition != 0 || lookahead != 0) {
             throw new IllegalStateException("The compressor has already started to accept data, can't prefill anymore");
         }

         // don't need more than windowSize for back-references
         final int len = Math.min(params.getWindowSize(), data.length);
         System.arraycopy(data, data.length - len, window, 0, len);

         if (len >= NUMBER_OF_BYTES_IN_HASH) {
             initialize();
             final int stop = len - NUMBER_OF_BYTES_IN_HASH + 1;
             for (int i = 0; i < stop; i++) {
                 insertString(i);
             }
             missedInserts = NUMBER_OF_BYTES_IN_HASH - 1;
         } else { // not enough data to hash anything
             missedInserts = len;
         }
         blockStart = currentPosition = len;
     }

     // we use a 15 bit hashcode as calculated in updateHash
     private static final int HASH_SIZE = 1 << 15;
     private static final int HASH_MASK = HASH_SIZE - 1;
     private static final int H_SHIFT = 5;

     /**
      * Assumes we are calculating the hash for three consecutive bytes
      * as a rolling hash, i.e. for bytes ABCD if H is the hash of ABC
      * the new hash for BCD is nextHash(H, D).
      *
      * <p>The hash is shifted by five bits on each update so all
      * effects of A have been swapped after the third update.</p>
      */
     private int nextHash(final int oldHash, final byte nextByte) {
         final int nextVal = nextByte & 0xFF;
         return ((oldHash << H_SHIFT) ^ nextVal) & HASH_MASK;
     }

     // performs the actual algorithm with the pre-condition len <= windowSize
     private void doCompress(final byte[] data, final int off, final int len) throws IOException {
         final int spaceLeft = window.length - currentPosition - lookahead;
         if (len > spaceLeft) {
             slide();
         }
         System.arraycopy(data, off, window, currentPosition + lookahead, len);
         lookahead += len;
         if (!initialized && lookahead >= params.getMinBackReferenceLength()) {
             initialize();
         }
         if (initialized) {
             compress();
         }
     }

     private void slide() throws IOException {
         final int wSize = params.getWindowSize();
         if (blockStart != currentPosition && blockStart < wSize) {
             flushLiteralBlock();
             blockStart = currentPosition;
         }
         System.arraycopy(window, wSize, window, 0, wSize);
         currentPosition -= wSize;
         matchStart -= wSize;
         blockStart -= wSize;
         for (int i = 0; i < HASH_SIZE; i++) {
             final int h = head[i];
             head[i] = h >= wSize ? h - wSize : NO_MATCH;
         }
         for (int i = 0; i < wSize; i++) {
             final int p = prev[i];
             prev[i] = p >= wSize ? p - wSize : NO_MATCH;
         }
     }

     private void initialize() {
         for (int i = 0; i < NUMBER_OF_BYTES_IN_HASH - 1; i++) {
             insertHash = nextHash(insertHash, window[i]);
         }
         initialized = true;
     }

     private void compress() throws IOException {
         final int minMatch = params.getMinBackReferenceLength();
         final boolean lazy = params.getLazyMatching();
         final int lazyThreshold = params.getLazyMatchingThreshold();

         while (lookahead >= minMatch) {
             catchUpMissedInserts();
             int matchLength = 0;
             final int hashHead = insertString(currentPosition);
             if (hashHead != NO_MATCH && hashHead - currentPosition <= params.getMaxOffset()) {
                 // sets matchStart as a side effect
                 matchLength = longestMatch(hashHead);

                 if (lazy && matchLength <= lazyThreshold && lookahead > minMatch) {
                     // try to find a longer match using the next position
                     matchLength = longestMatchForNextPosition(matchLength);
                 }
             }
             if (matchLength >= minMatch) {
                 if (blockStart != currentPosition) {
                     // emit preceeding literal block
                     flushLiteralBlock();
                     blockStart = NO_MATCH;
                 }
                 flushBackReference(matchLength);
                 insertStringsInMatch(matchLength);
                 lookahead -= matchLength;
                 currentPosition += matchLength;
                 blockStart = currentPosition;
             } else {
                 // no match, append to current or start a new literal
                 lookahead--;
                 currentPosition++;
                 if (currentPosition - blockStart >= params.getMaxLiteralLength()) {
                     flushLiteralBlock();
                     blockStart = currentPosition;
                 }
             }
         }
     }

     /**
      * Inserts the current three byte sequence into the dictionary and
      * returns the previous head of the hash-chain.
      *
      * <p>Updates <code>insertHash</code> and <code>prev</code> as a
      * side effect.</p>
      */
     private int insertString(final int pos) {
         insertHash = nextHash(insertHash, window[pos - 1 + NUMBER_OF_BYTES_IN_HASH]);
         final int hashHead = head[insertHash];
         prev[pos & wMask] = hashHead;
         head[insertHash] = pos;
         return hashHead;
     }

     private int longestMatchForNextPosition(final int prevMatchLength) {
         // save a bunch of values to restore them if the next match isn't better than the current one
         final int prevMatchStart = matchStart;
         final int prevInsertHash = insertHash;

         lookahead--;
         currentPosition++;
         final int hashHead = insertString(currentPosition);
         final int prevHashHead = prev[currentPosition & wMask];
         int matchLength = longestMatch(hashHead);

         if (matchLength <= prevMatchLength) {
             // use the first match, as the next one isn't any better
             matchLength = prevMatchLength;
             matchStart = prevMatchStart;

             // restore modified values
             head[insertHash] = prevHashHead;
             insertHash = prevInsertHash;
             currentPosition--;
             lookahead++;
         }
         return matchLength;
     }

     private void insertStringsInMatch(final int matchLength) {
         // inserts strings contained in current match
         // insertString inserts the byte 2 bytes after position, which may not yet be available -> missedInserts
         final int stop = Math.min(matchLength - 1, lookahead - NUMBER_OF_BYTES_IN_HASH);
         // currentPosition has been inserted already
         for (int i = 1; i <= stop; i++) {
             insertString(currentPosition + i);
         }
         missedInserts = matchLength - stop - 1;
     }

     private void catchUpMissedInserts() {
         while (missedInserts > 0) {
             insertString(currentPosition - missedInserts--);
         }
     }

     private void flushBackReference(final int matchLength) throws IOException {
         callback.accept(new BackReference(currentPosition - matchStart, matchLength));
     }

     private void flushLiteralBlock() throws IOException {
         callback.accept(new LiteralBlock(window, blockStart, currentPosition - blockStart));
     }

     /**
      * Searches the hash chain for real matches and returns the length
      * of the longest match (0 if none were found) that isn't too far
      * away (WRT maxOffset).
      *
      * <p>Sets matchStart to the index of the start position of the
      * longest match as a side effect.</p>
      */
     private int longestMatch(int matchHead) {
         final int minLength = params.getMinBackReferenceLength();
         int longestMatchLength = minLength - 1;
         final int maxPossibleLength = Math.min(params.getMaxBackReferenceLength(), lookahead);
         final int minIndex = Math.max(0, currentPosition - params.getMaxOffset());
         final int niceBackReferenceLength = Math.min(maxPossibleLength, params.getNiceBackReferenceLength());
         final int maxCandidates = params.getMaxCandidates();
         for (int candidates = 0; candidates < maxCandidates && matchHead >= minIndex; candidates++) {
             int currentLength = 0;
             for (int i = 0; i < maxPossibleLength; i++) {
                 if (window[matchHead + i] != window[currentPosition + i]) {
                     break;
                 }
                 currentLength++;
             }
             if (currentLength > longestMatchLength) {
                 longestMatchLength = currentLength;
                 matchStart = matchHead;
                 if (currentLength >= niceBackReferenceLength) {
                     // no need to search any further
                     break;
                 }
             }
             matchHead = prev[matchHead & wMask];
         }
         return longestMatchLength; // < minLength if no matches have been found, will be ignored in compress()
     }
 }
	/*
	* Licensed to the Apache Software Foundation (ASF) under one
	* or more contributor license agreements. See the NOTICE file
	* distributed with this work for additional information
	* regarding copyright ownership. The ASF licenses this file
	* to you under the Apache License, Version 2.0 (the
	* "License"); you may not use this file except in compliance
	* with the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing,
	* software distributed under the License is distributed on an
	* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
	* KIND, either express or implied. See the License for the
	* specific language governing permissions and limitations
	* under the License.
	*/
	package org.apache.commons.compress.compressors.lz77support;

	import java.io.IOException;
	import java.util.Arrays;
	import java.util.Objects;

	/**
	* Helper class for compression algorithms that use the ideas of LZ77.
	*
	* <p>Most LZ77 derived algorithms split input data into blocks of
	* uncompressed data (called literal blocks) and back-references
	* (pairs of offsets and lengths) that state "add <code>length</code>
	* bytes that are the same as those already written starting
	* <code>offset</code> bytes before the current position. The details
	* of how those blocks and back-references are encoded are quite
	* different between the algorithms and some algorithms perform
	* additional steps (Huffman encoding in the case of DEFLATE for
	* example).</p>
	*
	* <p>This class attempts to extract the core logic - finding
	* back-references - so it can be re-used. It follows the algorithm
	* explained in section 4 of RFC 1951 (DEFLATE) and currently doesn't
	* implement the "lazy match" optimization. The three-byte hash
	* function used in this class is the same as the one used by zlib and
	* InfoZIP's ZIP implementation of DEFLATE. The whole class is
	* strongly inspired by InfoZIP's implementation.</p>
	*
	* <p>LZ77 is used vaguely here (as well as many other places that
	* talk about it :-), LZSS would likely be closer to the truth but
	* LZ77 has become the synonym for a whole family of algorithms.</p>
	*
	* <p>The API consists of a compressor that is fed <code>byte</code>s
	* and emits {@link Block}s to a registered callback where the blocks
	* represent either {@link LiteralBlock literal blocks}, {@link
	* BackReference back-references} or {@link EOD end of data
	* markers}. In order to ensure the callback receives all information,
	* the {@code #finish} method must be used once all data has been fed
	* into the compressor.</p>
	*
	* <p>Several parameters influence the outcome of the "compression":</p>
	* <dl>
	*
	* <dt><code>windowSize</code></dt> <dd>the size of the sliding
	* window, must be a power of two - this determines the maximum
	* offset a back-reference can take. The compressor maintains a
	* buffer of twice of <code>windowSize</code> - real world values are
	* in the area of 32k.</dd>
	*
	* <dt><code>minBackReferenceLength</code></dt>
	* <dd>Minimal length of a back-reference found. A true minimum of 3 is
	* hard-coded inside of this implementation but bigger lengths can be
	* configured.</dd>
	*
	* <dt><code>maxBackReferenceLength</code></dt>
	* <dd>Maximal length of a back-reference found.</dd>
	*
	* <dt><code>maxOffset</code></dt>
	* <dd>Maximal offset of a back-reference.</dd>
	*
	* <dt><code>maxLiteralLength</code></dt>
	* <dd>Maximal length of a literal block.</dd>
	* </dl>
	*
	* @see "https://tools.ietf.org/html/rfc1951#section-4"
	* @since 1.14
	* @NotThreadSafe
	*/
	public class LZ77Compressor {

	/**
	* Base class representing blocks the compressor may emit.
	*
	* <p>This class is not supposed to be subclassed by classes
	* outside of Commons Compress so it is considered internal and
	* changed that would break subclasses may get introduced with
	* future releases.</p>
	*/
	public static abstract class Block {
	/** Enumeration of the block types the compressor may emit. */
	public enum BlockType {
	LITERAL, BACK_REFERENCE, EOD
	}
	public abstract BlockType getType();
	}

	/**
	* Represents a literal block of data.
	*
	* <p>For performance reasons this encapsulates the real data, not
	* a copy of it. Don't modify the data and process it inside of
	* {@link Callback#accept} immediately as it will get overwritten
	* sooner or later.</p>
	*/
	public static final class LiteralBlock extends Block {
	private final byte[] data;
	private final int offset, length;
	public LiteralBlock(final byte[] data, final int offset, final int length) {
	this.data = data;
	this.offset = offset;
	this.length = length;
	}
	/**
	* The literal data.
	*
	* <p>This returns a life view of the actual data in order to
	* avoid copying, modify the array at your own risk.</p>
	* @return the data
	*/
	public byte[] getData() {
	return data;
	}
	/**
	* Offset into data where the literal block starts.
	* @return the offset
	*/
	public int getOffset() {
	return offset;
	}
	/**
	* Length of literal block.
	* @return the length
	*/
	public int getLength() {
	return length;
	}
	@Override
	public BlockType getType() {
	return BlockType.LITERAL;
	}
	@Override
	public String toString() {
	return "LiteralBlock starting at " + offset + " with length " + length;
	}
	}

	/**
	* Represents a back-reference.
	*/
	public static final class BackReference extends Block {
	private final int offset, length;
	public BackReference(final int offset, final int length) {
	this.offset = offset;
	this.length = length;
	}
	/**
	* Provides the offset of the back-reference.
	* @return the offset
	*/
	public int getOffset() {
	return offset;
	}
	/**
	* Provides the length of the back-reference.
	* @return the length
	*/
	public int getLength() {
	return length;
	}
	@Override
	public BlockType getType() {
	return BlockType.BACK_REFERENCE;
	}
	@Override
	public String toString() {
	return "BackReference with offset " + offset + " and length " + length;
	}
	}

	/** A simple "we are done" marker. */
	public static final class EOD extends Block {
	@Override
	public BlockType getType() {
	return BlockType.EOD;
	}
	}

	private static final Block THE_EOD = new EOD();

	/**
	* Callback invoked while the compressor processes data.
	*
	* <p>The callback is invoked on the same thread that receives the
	* bytes to compress and may be invoked multiple times during the
	* execution of {@link #compress} or {@link #finish}.</p>
	*/
	public interface Callback {
	/**
	* Consumes a block.
	* @param b the block to consume
	* @throws IOException in case of an error
	*/
	void accept(Block b) throws IOException;
	}

	static final int NUMBER_OF_BYTES_IN_HASH = 3;
	private static final int NO_MATCH = -1;

	private final Parameters params;
	private final Callback callback;

	// the sliding window, twice as big as "windowSize" parameter
	private final byte[] window;
	// the head of hash-chain - indexed by hash-code, points to the
	// location inside of window of the latest sequence of bytes with
	// the given hash.
	private final int[] head;
	// for each window-location points to the latest earlier location
	// with the same hash. Only stores values for the latest
	// "windowSize" elements, the index is "window location modulo
	// windowSize".
	private final int[] prev;

	// bit mask used when indexing into prev
	private final int wMask;

	private boolean initialized = false;
	// the position inside of window that shall be encoded right now
	private int currentPosition;
	// the number of bytes available to compress including the one at
	// currentPosition
	private int lookahead = 0;
	// the hash of the three bytes stating at the current position
	private int insertHash = 0;
	// the position inside of the window where the current literal
	// block starts (in case we are inside of a literal block).
	private int blockStart = 0;
	// position of the current match
	private int matchStart = NO_MATCH;
	// number of missed insertString calls for the up to three last
	// bytes of the last match that can only be performed once more
	// data has been read
	private int missedInserts = 0;

	/**
	* Initializes a compressor with parameters and a callback.
	* @param params the parameters
	* @param callback the callback
	* @throws NullPointerException if either parameter is <code>null</code>
	*/
	public LZ77Compressor(final Parameters params, final Callback callback) {
	Objects.requireNonNull(params, "params");
	Objects.requireNonNull(callback, "callback");

	this.params = params;
	this.callback = callback;

	final int wSize = params.getWindowSize();
	window = new byte[wSize * 2];
	wMask = wSize - 1;
	head = new int[HASH_SIZE];
	Arrays.fill(head, NO_MATCH);
	prev = new int[wSize];
	}

	/**
	* Feeds bytes into the compressor which in turn may emit zero or
	* more blocks to the callback during the execution of this
	* method.
	* @param data the data to compress - must not be null
	* @throws IOException if the callback throws an exception
	*/
	public void compress(final byte[] data) throws IOException {
	compress(data, 0, data.length);
	}

	/**
	* Feeds bytes into the compressor which in turn may emit zero or
	* more blocks to the callback during the execution of this
	* method.
	* @param data the data to compress - must not be null
	* @param off the start offset of the data
	* @param len the number of bytes to compress
	* @throws IOException if the callback throws an exception
	*/
	public void compress(final byte[] data, int off, int len) throws IOException {
	final int wSize = params.getWindowSize();
	while (len > wSize) { // chop into windowSize sized chunks
	doCompress(data, off, wSize);
	off += wSize;
	len -= wSize;
	}
	if (len > 0) {
	doCompress(data, off, len);
	}
	}

	/**
	* Tells the compressor to process all remaining data and signal
	* end of data to the callback.
	*
	* <p>The compressor will in turn emit at least one block ({@link
	* EOD}) but potentially multiple blocks to the callback during
	* the execution of this method.</p>
	* @throws IOException if the callback throws an exception
	*/
	public void finish() throws IOException {
	if (blockStart != currentPosition \|\| lookahead > 0) {
	currentPosition += lookahead;
	flushLiteralBlock();
	}
	callback.accept(THE_EOD);
	}

	/**
	* Adds some initial data to fill the window with.
	*
	* <p>This is used if the stream has been cut into blocks and
	* back-references of one block may refer to data of the previous
	* block(s). One such example is the LZ4 frame format using block
	* dependency.</p>
	*
	* @param data the data to fill the window with.
	* @throws IllegalStateException if the compressor has already started to accept data
	*/
	public void prefill(final byte[] data) {
	if (currentPosition != 0 \|\| lookahead != 0) {
	throw new IllegalStateException("The compressor has already started to accept data, can't prefill anymore");
	}

	// don't need more than windowSize for back-references
	final int len = Math.min(params.getWindowSize(), data.length);
	System.arraycopy(data, data.length - len, window, 0, len);

	if (len >= NUMBER_OF_BYTES_IN_HASH) {
	initialize();
	final int stop = len - NUMBER_OF_BYTES_IN_HASH + 1;
	for (int i = 0; i < stop; i++) {
	insertString(i);
	}
	missedInserts = NUMBER_OF_BYTES_IN_HASH - 1;
	} else { // not enough data to hash anything
	missedInserts = len;
	}
	blockStart = currentPosition = len;
	}

	// we use a 15 bit hashcode as calculated in updateHash
	private static final int HASH_SIZE = 1 << 15;
	private static final int HASH_MASK = HASH_SIZE - 1;
	private static final int H_SHIFT = 5;

	/**
	* Assumes we are calculating the hash for three consecutive bytes
	* as a rolling hash, i.e. for bytes ABCD if H is the hash of ABC
	* the new hash for BCD is nextHash(H, D).
	*
	* <p>The hash is shifted by five bits on each update so all
	* effects of A have been swapped after the third update.</p>
	*/
	private int nextHash(final int oldHash, final byte nextByte) {
	final int nextVal = nextByte & 0xFF;
	return ((oldHash << H_SHIFT) ^ nextVal) & HASH_MASK;
	}

	// performs the actual algorithm with the pre-condition len <= windowSize
	private void doCompress(final byte[] data, final int off, final int len) throws IOException {
	final int spaceLeft = window.length - currentPosition - lookahead;
	if (len > spaceLeft) {
	slide();
	}
	System.arraycopy(data, off, window, currentPosition + lookahead, len);
	lookahead += len;
	if (!initialized && lookahead >= params.getMinBackReferenceLength()) {
	initialize();
	}
	if (initialized) {
	compress();
	}
	}

	private void slide() throws IOException {
	final int wSize = params.getWindowSize();
	if (blockStart != currentPosition && blockStart < wSize) {
	flushLiteralBlock();
	blockStart = currentPosition;
	}
	System.arraycopy(window, wSize, window, 0, wSize);
	currentPosition -= wSize;
	matchStart -= wSize;
	blockStart -= wSize;
	for (int i = 0; i < HASH_SIZE; i++) {
	final int h = head[i];
	head[i] = h >= wSize ? h - wSize : NO_MATCH;
	}
	for (int i = 0; i < wSize; i++) {
	final int p = prev[i];
	prev[i] = p >= wSize ? p - wSize : NO_MATCH;
	}
	}

	private void initialize() {
	for (int i = 0; i < NUMBER_OF_BYTES_IN_HASH - 1; i++) {
	insertHash = nextHash(insertHash, window[i]);
	}
	initialized = true;
	}

	private void compress() throws IOException {
	final int minMatch = params.getMinBackReferenceLength();
	final boolean lazy = params.getLazyMatching();
	final int lazyThreshold = params.getLazyMatchingThreshold();

	while (lookahead >= minMatch) {
	catchUpMissedInserts();
	int matchLength = 0;
	final int hashHead = insertString(currentPosition);
	if (hashHead != NO_MATCH && hashHead - currentPosition <= params.getMaxOffset()) {
	// sets matchStart as a side effect
	matchLength = longestMatch(hashHead);

	if (lazy && matchLength <= lazyThreshold && lookahead > minMatch) {
	// try to find a longer match using the next position
	matchLength = longestMatchForNextPosition(matchLength);
	}
	}
	if (matchLength >= minMatch) {
	if (blockStart != currentPosition) {
	// emit preceeding literal block
	flushLiteralBlock();
	blockStart = NO_MATCH;
	}
	flushBackReference(matchLength);
	insertStringsInMatch(matchLength);
	lookahead -= matchLength;
	currentPosition += matchLength;
	blockStart = currentPosition;
	} else {
	// no match, append to current or start a new literal
	lookahead--;
	currentPosition++;
	if (currentPosition - blockStart >= params.getMaxLiteralLength()) {
	flushLiteralBlock();
	blockStart = currentPosition;
	}
	}
	}
	}

	/**
	* Inserts the current three byte sequence into the dictionary and
	* returns the previous head of the hash-chain.
	*
	* <p>Updates <code>insertHash</code> and <code>prev</code> as a
	* side effect.</p>
	*/
	private int insertString(final int pos) {
	insertHash = nextHash(insertHash, window[pos - 1 + NUMBER_OF_BYTES_IN_HASH]);
	final int hashHead = head[insertHash];
	prev[pos & wMask] = hashHead;
	head[insertHash] = pos;
	return hashHead;
	}

	private int longestMatchForNextPosition(final int prevMatchLength) {
	// save a bunch of values to restore them if the next match isn't better than the current one
	final int prevMatchStart = matchStart;
	final int prevInsertHash = insertHash;

	lookahead--;
	currentPosition++;
	final int hashHead = insertString(currentPosition);
	final int prevHashHead = prev[currentPosition & wMask];
	int matchLength = longestMatch(hashHead);

	if (matchLength <= prevMatchLength) {
	// use the first match, as the next one isn't any better
	matchLength = prevMatchLength;
	matchStart = prevMatchStart;

	// restore modified values
	head[insertHash] = prevHashHead;
	insertHash = prevInsertHash;
	currentPosition--;
	lookahead++;
	}
	return matchLength;
	}

	private void insertStringsInMatch(final int matchLength) {
	// inserts strings contained in current match
	// insertString inserts the byte 2 bytes after position, which may not yet be available -> missedInserts
	final int stop = Math.min(matchLength - 1, lookahead - NUMBER_OF_BYTES_IN_HASH);
	// currentPosition has been inserted already
	for (int i = 1; i <= stop; i++) {
	insertString(currentPosition + i);
	}
	missedInserts = matchLength - stop - 1;
	}

	private void catchUpMissedInserts() {
	while (missedInserts > 0) {
	insertString(currentPosition - missedInserts--);
	}
	}

	private void flushBackReference(final int matchLength) throws IOException {
	callback.accept(new BackReference(currentPosition - matchStart, matchLength));
	}

	private void flushLiteralBlock() throws IOException {
	callback.accept(new LiteralBlock(window, blockStart, currentPosition - blockStart));
	}

	/**
	* Searches the hash chain for real matches and returns the length
	* of the longest match (0 if none were found) that isn't too far
	* away (WRT maxOffset).
	*
	* <p>Sets matchStart to the index of the start position of the
	* longest match as a side effect.</p>
	*/
	private int longestMatch(int matchHead) {
	final int minLength = params.getMinBackReferenceLength();
	int longestMatchLength = minLength - 1;
	final int maxPossibleLength = Math.min(params.getMaxBackReferenceLength(), lookahead);
	final int minIndex = Math.max(0, currentPosition - params.getMaxOffset());
	final int niceBackReferenceLength = Math.min(maxPossibleLength, params.getNiceBackReferenceLength());
	final int maxCandidates = params.getMaxCandidates();
	for (int candidates = 0; candidates < maxCandidates && matchHead >= minIndex; candidates++) {
	int currentLength = 0;
	for (int i = 0; i < maxPossibleLength; i++) {
	if (window[matchHead + i] != window[currentPosition + i]) {
	break;
	}
	currentLength++;
	}
	if (currentLength > longestMatchLength) {
	longestMatchLength = currentLength;
	matchStart = matchHead;
	if (currentLength >= niceBackReferenceLength) {
	// no need to search any further
	break;
	}
	}
	matchHead = prev[matchHead & wMask];
	}
	return longestMatchLength; // < minLength if no matches have been found, will be ignored in compress()
	}
	}