| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| * |
| */ |
| |
| /* |
| * This package is based on the work done by Keiron Liddle, Aftex Software |
| * <keiron@aftexsw.com> to whom the Ant project is very grateful for his |
| * great code. |
| */ |
| package org.apache.hadoop.io.compress.bzip2; |
| |
| import java.io.BufferedInputStream; |
| import java.io.InputStream; |
| import java.io.IOException; |
| |
| import org.apache.hadoop.io.compress.SplittableCompressionCodec.READ_MODE; |
| |
| |
| /** |
| * An input stream that decompresses from the BZip2 format (without the file |
| * header chars) to be read as any other stream. |
| * |
| * <p> |
| * The decompression requires large amounts of memory. Thus you should call the |
| * {@link #close() close()} method as soon as possible, to force |
| * <tt>CBZip2InputStream</tt> to release the allocated memory. See |
| * {@link CBZip2OutputStream CBZip2OutputStream} for information about memory |
| * usage. |
| * </p> |
| * |
| * <p> |
| * <tt>CBZip2InputStream</tt> reads bytes from the compressed source stream via |
| * the single byte {@link java.io.InputStream#read() read()} method exclusively. |
| * Thus you should consider to use a buffered source stream. |
| * </p> |
| * |
| * <p> |
| * This Ant code was enhanced so that it can de-compress blocks of bzip2 data. |
| * Current position in the stream is an important statistic for Hadoop. For |
| * example in LineRecordReader, we solely depend on the current position in the |
| * stream to know about the progress. The notion of position becomes complicated |
| * for compressed files. The Hadoop splitting is done in terms of compressed |
| * file. But a compressed file deflates to a large amount of data. So we have |
| * handled this problem in the following way. |
| * |
| * On object creation time, we find the next block start delimiter. Once such a |
| * marker is found, the stream stops there (we discard any read compressed data |
| * in this process) and the position is reported as the beginning of the block |
| * start delimiter. At this point we are ready for actual reading |
| * (i.e. decompression) of data. |
| * |
| * The subsequent read calls give out data. The position is updated when the |
| * caller of this class has read off the current block + 1 bytes. In between the |
| * block reading, position is not updated. (We can only update the position on |
| * block boundaries). |
| * </p> |
| * |
| * <p> |
| * Instances of this class are not threadsafe. |
| * </p> |
| */ |
| public class CBZip2InputStream extends InputStream implements BZip2Constants { |
| |
| |
| public static final long BLOCK_DELIMITER = 0X314159265359L;// start of block |
| public static final long EOS_DELIMITER = 0X177245385090L;// end of bzip2 stream |
| private static final int DELIMITER_BIT_LENGTH = 48; |
| READ_MODE readMode = READ_MODE.CONTINUOUS; |
| // The variable records the current advertised position of the stream. |
| private long reportedBytesReadFromCompressedStream = 0L; |
| // The following variable keep record of compressed bytes read. |
| private long bytesReadFromCompressedStream = 0L; |
| private boolean lazyInitialization = false; |
| private byte array[] = new byte[1]; |
| |
| /** |
| * Index of the last char in the block, so the block size == last + 1. |
| */ |
| private int last; |
| |
| /** |
| * Index in zptr[] of original string after sorting. |
| */ |
| private int origPtr; |
| |
| /** |
| * always: in the range 0 .. 9. The current block size is 100000 * this |
| * number. |
| */ |
| private int blockSize100k; |
| |
| private boolean blockRandomised = false; |
| |
| private long bsBuff; |
| private long bsLive; |
| private final CRC crc = new CRC(); |
| |
| private int nInUse; |
| |
| private BufferedInputStream in; |
| |
| private int currentChar = -1; |
| |
| /** |
| * A state machine to keep track of current state of the de-coder |
| * |
| */ |
| public enum STATE { |
| EOF, START_BLOCK_STATE, RAND_PART_A_STATE, RAND_PART_B_STATE, RAND_PART_C_STATE, NO_RAND_PART_A_STATE, NO_RAND_PART_B_STATE, NO_RAND_PART_C_STATE, NO_PROCESS_STATE |
| }; |
| |
| private STATE currentState = STATE.START_BLOCK_STATE; |
| |
| private int storedBlockCRC, storedCombinedCRC; |
| private int computedBlockCRC, computedCombinedCRC; |
| |
| private boolean skipResult = false;// used by skipToNextMarker |
| private boolean skipDecompression = false; |
| |
| // Variables used by setup* methods exclusively |
| |
| private int su_count; |
| private int su_ch2; |
| private int su_chPrev; |
| private int su_i2; |
| private int su_j2; |
| private int su_rNToGo; |
| private int su_rTPos; |
| private int su_tPos; |
| private char su_z; |
| |
| /** |
| * All memory intensive stuff. This field is initialized by initBlock(). |
| */ |
| private CBZip2InputStream.Data data; |
| |
| /** |
| * This method reports the processed bytes so far. Please note that this |
| * statistic is only updated on block boundaries and only when the stream is |
| * initiated in BYBLOCK mode. |
| */ |
| public long getProcessedByteCount() { |
| return reportedBytesReadFromCompressedStream; |
| } |
| |
| /** |
| * This method keeps track of raw processed compressed |
| * bytes. |
| * |
| * @param count count is the number of bytes to be |
| * added to raw processed bytes |
| */ |
| |
| protected void updateProcessedByteCount(int count) { |
| this.bytesReadFromCompressedStream += count; |
| } |
| |
| /** |
| * This method is called by the client of this |
| * class in case there are any corrections in |
| * the stream position. One common example is |
| * when client of this code removes starting BZ |
| * characters from the compressed stream. |
| * |
| * @param count count bytes are added to the reported bytes |
| * |
| */ |
| public void updateReportedByteCount(int count) { |
| this.reportedBytesReadFromCompressedStream += count; |
| this.updateProcessedByteCount(count); |
| } |
| |
| /** |
| * This method reads a Byte from the compressed stream. Whenever we need to |
| * read from the underlying compressed stream, this method should be called |
| * instead of directly calling the read method of the underlying compressed |
| * stream. This method does important record keeping to have the statistic |
| * that how many bytes have been read off the compressed stream. |
| */ |
| private int readAByte(InputStream inStream) throws IOException { |
| int read = inStream.read(); |
| if (read >= 0) { |
| this.updateProcessedByteCount(1); |
| } |
| return read; |
| } |
| |
| /** |
| * This method tries to find the marker (passed to it as the first parameter) |
| * in the stream. It can find bit patterns of length <= 63 bits. Specifically |
| * this method is used in CBZip2InputStream to find the end of block (EOB) |
| * delimiter in the stream, starting from the current position of the stream. |
| * If marker is found, the stream position will be at the byte containing |
| * the starting bit of the marker. |
| * |
| * @param marker The bit pattern to be found in the stream |
| * @param markerBitLength No of bits in the marker |
| * @return true if the marker was found otherwise false |
| * |
| * @throws IOException |
| * @throws IllegalArgumentException if marketBitLength is greater than 63 |
| */ |
| public boolean skipToNextMarker(long marker, int markerBitLength) |
| throws IOException, IllegalArgumentException { |
| try { |
| if (markerBitLength > 63) { |
| throw new IllegalArgumentException( |
| "skipToNextMarker can not find patterns greater than 63 bits"); |
| } |
| // pick next marketBitLength bits in the stream |
| long bytes = 0; |
| bytes = this.bsR(markerBitLength); |
| if (bytes == -1) { |
| this.reportedBytesReadFromCompressedStream = |
| this.bytesReadFromCompressedStream; |
| return false; |
| } |
| while (true) { |
| if (bytes == marker) { |
| // Report the byte position where the marker starts |
| long markerBytesRead = (markerBitLength + this.bsLive + 7) / 8; |
| this.reportedBytesReadFromCompressedStream = |
| this.bytesReadFromCompressedStream - markerBytesRead; |
| return true; |
| } else { |
| bytes = bytes << 1; |
| bytes = bytes & ((1L << markerBitLength) - 1); |
| int oneBit = (int) this.bsR(1); |
| if (oneBit != -1) { |
| bytes = bytes | oneBit; |
| } else { |
| this.reportedBytesReadFromCompressedStream = |
| this.bytesReadFromCompressedStream; |
| return false; |
| } |
| } |
| } |
| } catch (IOException ex) { |
| this.reportedBytesReadFromCompressedStream = |
| this.bytesReadFromCompressedStream; |
| return false; |
| } |
| } |
| |
| protected void reportCRCError() throws IOException { |
| throw new IOException("crc error"); |
| } |
| |
| private void makeMaps() { |
| final boolean[] inUse = this.data.inUse; |
| final byte[] seqToUnseq = this.data.seqToUnseq; |
| |
| int nInUseShadow = 0; |
| |
| for (int i = 0; i < 256; i++) { |
| if (inUse[i]) |
| seqToUnseq[nInUseShadow++] = (byte) i; |
| } |
| |
| this.nInUse = nInUseShadow; |
| } |
| |
| /** |
| * Constructs a new CBZip2InputStream which decompresses bytes read from the |
| * specified stream. |
| * |
| * <p> |
| * Although BZip2 headers are marked with the magic <tt>"Bz"</tt> this |
| * constructor expects the next byte in the stream to be the first one after |
| * the magic. Thus callers have to skip the first two bytes. Otherwise this |
| * constructor will throw an exception. |
| * </p> |
| * |
| * @throws IOException |
| * if the stream content is malformed or an I/O error occurs. |
| * @throws NullPointerException |
| * if <tt>in == null</tt> |
| */ |
| public CBZip2InputStream(final InputStream in, READ_MODE readMode) |
| throws IOException { |
| this(in, readMode, false); |
| } |
| |
| private CBZip2InputStream(final InputStream in, READ_MODE readMode, boolean skipDecompression) |
| throws IOException { |
| |
| super(); |
| int blockSize = 0X39;// i.e 9 |
| this.blockSize100k = blockSize - '0'; |
| this.in = new BufferedInputStream(in, 1024 * 9);// >1 MB buffer |
| this.readMode = readMode; |
| this.skipDecompression = skipDecompression; |
| if (readMode == READ_MODE.CONTINUOUS) { |
| currentState = STATE.START_BLOCK_STATE; |
| lazyInitialization = (in.available() == 0)?true:false; |
| if(!lazyInitialization){ |
| init(); |
| } |
| } else if (readMode == READ_MODE.BYBLOCK) { |
| this.currentState = STATE.NO_PROCESS_STATE; |
| skipResult = this.skipToNextMarker(CBZip2InputStream.BLOCK_DELIMITER,DELIMITER_BIT_LENGTH); |
| if(!skipDecompression){ |
| changeStateToProcessABlock(); |
| } |
| } |
| } |
| |
| /** |
| * Returns the number of bytes between the current stream position |
| * and the immediate next BZip2 block marker. |
| * |
| * @param in |
| * The InputStream |
| * |
| * @return long Number of bytes between current stream position and the |
| * next BZip2 block start marker. |
| * @throws IOException |
| * |
| */ |
| public static long numberOfBytesTillNextMarker(final InputStream in) throws IOException{ |
| CBZip2InputStream anObject = new CBZip2InputStream(in, READ_MODE.BYBLOCK, true); |
| return anObject.getProcessedByteCount(); |
| } |
| |
| public CBZip2InputStream(final InputStream in) throws IOException { |
| this(in, READ_MODE.CONTINUOUS); |
| } |
| |
| private void changeStateToProcessABlock() throws IOException { |
| if (skipResult == true) { |
| initBlock(); |
| setupBlock(); |
| } else { |
| this.currentState = STATE.EOF; |
| } |
| } |
| |
| |
| @Override |
| public int read() throws IOException { |
| |
| if (this.in != null) { |
| int result = this.read(array, 0, 1); |
| int value = 0XFF & array[0]; |
| return (result > 0 ? value : result); |
| |
| } else { |
| throw new IOException("stream closed"); |
| } |
| } |
| |
| /** |
| * In CONTINOUS reading mode, this read method starts from the |
| * start of the compressed stream and end at the end of file by |
| * emitting un-compressed data. In this mode stream positioning |
| * is not announced and should be ignored. |
| * |
| * In BYBLOCK reading mode, this read method informs about the end |
| * of a BZip2 block by returning EOB. At this event, the compressed |
| * stream position is also announced. This announcement tells that |
| * how much of the compressed stream has been de-compressed and read |
| * out of this class. In between EOB events, the stream position is |
| * not updated. |
| * |
| * |
| * @throws IOException |
| * if the stream content is malformed or an I/O error occurs. |
| * |
| * @return int The return value greater than 0 are the bytes read. A value |
| * of -1 means end of stream while -2 represents end of block |
| */ |
| |
| |
| @Override |
| public int read(final byte[] dest, final int offs, final int len) |
| throws IOException { |
| if (offs < 0) { |
| throw new IndexOutOfBoundsException("offs(" + offs + ") < 0."); |
| } |
| if (len < 0) { |
| throw new IndexOutOfBoundsException("len(" + len + ") < 0."); |
| } |
| if (offs + len > dest.length) { |
| throw new IndexOutOfBoundsException("offs(" + offs + ") + len(" |
| + len + ") > dest.length(" + dest.length + ")."); |
| } |
| if (this.in == null) { |
| throw new IOException("stream closed"); |
| } |
| |
| if(lazyInitialization){ |
| this.init(); |
| this.lazyInitialization = false; |
| } |
| |
| if(skipDecompression){ |
| changeStateToProcessABlock(); |
| skipDecompression = false; |
| } |
| |
| final int hi = offs + len; |
| int destOffs = offs; |
| int b = 0; |
| |
| |
| |
| for (; ((destOffs < hi) && ((b = read0())) >= 0);) { |
| dest[destOffs++] = (byte) b; |
| |
| } |
| |
| int result = destOffs - offs; |
| if (result == 0) { |
| //report 'end of block' or 'end of stream' |
| result = b; |
| |
| skipResult = this.skipToNextMarker(CBZip2InputStream.BLOCK_DELIMITER, DELIMITER_BIT_LENGTH); |
| |
| changeStateToProcessABlock(); |
| } |
| return result; |
| } |
| |
| private int read0() throws IOException { |
| final int retChar = this.currentChar; |
| |
| switch (this.currentState) { |
| case EOF: |
| return END_OF_STREAM;// return -1 |
| |
| case NO_PROCESS_STATE: |
| return END_OF_BLOCK;// return -2 |
| |
| case START_BLOCK_STATE: |
| throw new IllegalStateException(); |
| |
| case RAND_PART_A_STATE: |
| throw new IllegalStateException(); |
| |
| case RAND_PART_B_STATE: |
| setupRandPartB(); |
| break; |
| |
| case RAND_PART_C_STATE: |
| setupRandPartC(); |
| break; |
| |
| case NO_RAND_PART_A_STATE: |
| throw new IllegalStateException(); |
| |
| case NO_RAND_PART_B_STATE: |
| setupNoRandPartB(); |
| break; |
| |
| case NO_RAND_PART_C_STATE: |
| setupNoRandPartC(); |
| break; |
| |
| default: |
| throw new IllegalStateException(); |
| } |
| |
| return retChar; |
| } |
| |
| private void init() throws IOException { |
| int magic2 = this.readAByte(in); |
| if (magic2 != 'h') { |
| throw new IOException("Stream is not BZip2 formatted: expected 'h'" |
| + " as first byte but got '" + (char) magic2 + "'"); |
| } |
| |
| int blockSize = this.readAByte(in); |
| if ((blockSize < '1') || (blockSize > '9')) { |
| throw new IOException("Stream is not BZip2 formatted: illegal " |
| + "blocksize " + (char) blockSize); |
| } |
| |
| this.blockSize100k = blockSize - '0'; |
| |
| initBlock(); |
| setupBlock(); |
| } |
| |
| private void initBlock() throws IOException { |
| if (this.readMode == READ_MODE.BYBLOCK) { |
| // this.checkBlockIntegrity(); |
| this.storedBlockCRC = bsGetInt(); |
| this.blockRandomised = bsR(1) == 1; |
| |
| /** |
| * Allocate data here instead in constructor, so we do not allocate |
| * it if the input file is empty. |
| */ |
| if (this.data == null) { |
| this.data = new Data(this.blockSize100k); |
| } |
| |
| // currBlockNo++; |
| getAndMoveToFrontDecode(); |
| |
| this.crc.initialiseCRC(); |
| this.currentState = STATE.START_BLOCK_STATE; |
| return; |
| } |
| |
| char magic0 = bsGetUByte(); |
| char magic1 = bsGetUByte(); |
| char magic2 = bsGetUByte(); |
| char magic3 = bsGetUByte(); |
| char magic4 = bsGetUByte(); |
| char magic5 = bsGetUByte(); |
| |
| if (magic0 == 0x17 && magic1 == 0x72 && magic2 == 0x45 |
| && magic3 == 0x38 && magic4 == 0x50 && magic5 == 0x90) { |
| complete(); // end of file |
| } else if (magic0 != 0x31 || // '1' |
| magic1 != 0x41 || // ')' |
| magic2 != 0x59 || // 'Y' |
| magic3 != 0x26 || // '&' |
| magic4 != 0x53 || // 'S' |
| magic5 != 0x59 // 'Y' |
| ) { |
| this.currentState = STATE.EOF; |
| throw new IOException("bad block header"); |
| } else { |
| this.storedBlockCRC = bsGetInt(); |
| this.blockRandomised = bsR(1) == 1; |
| |
| /** |
| * Allocate data here instead in constructor, so we do not allocate |
| * it if the input file is empty. |
| */ |
| if (this.data == null) { |
| this.data = new Data(this.blockSize100k); |
| } |
| |
| // currBlockNo++; |
| getAndMoveToFrontDecode(); |
| |
| this.crc.initialiseCRC(); |
| this.currentState = STATE.START_BLOCK_STATE; |
| } |
| } |
| |
| private void endBlock() throws IOException { |
| this.computedBlockCRC = this.crc.getFinalCRC(); |
| |
| // A bad CRC is considered a fatal error. |
| if (this.storedBlockCRC != this.computedBlockCRC) { |
| // make next blocks readable without error |
| // (repair feature, not yet documented, not tested) |
| this.computedCombinedCRC = (this.storedCombinedCRC << 1) |
| | (this.storedCombinedCRC >>> 31); |
| this.computedCombinedCRC ^= this.storedBlockCRC; |
| |
| reportCRCError(); |
| } |
| |
| this.computedCombinedCRC = (this.computedCombinedCRC << 1) |
| | (this.computedCombinedCRC >>> 31); |
| this.computedCombinedCRC ^= this.computedBlockCRC; |
| } |
| |
| private void complete() throws IOException { |
| this.storedCombinedCRC = bsGetInt(); |
| this.currentState = STATE.EOF; |
| this.data = null; |
| |
| if (this.storedCombinedCRC != this.computedCombinedCRC) { |
| reportCRCError(); |
| } |
| } |
| |
| @Override |
| public void close() throws IOException { |
| InputStream inShadow = this.in; |
| if (inShadow != null) { |
| try { |
| if (inShadow != System.in) { |
| inShadow.close(); |
| } |
| } finally { |
| this.data = null; |
| this.in = null; |
| } |
| } |
| } |
| |
| private long bsR(final long n) throws IOException { |
| long bsLiveShadow = this.bsLive; |
| long bsBuffShadow = this.bsBuff; |
| |
| if (bsLiveShadow < n) { |
| final InputStream inShadow = this.in; |
| do { |
| int thech = readAByte(inShadow); |
| |
| if (thech < 0) { |
| throw new IOException("unexpected end of stream"); |
| } |
| |
| bsBuffShadow = (bsBuffShadow << 8) | thech; |
| bsLiveShadow += 8; |
| } while (bsLiveShadow < n); |
| |
| this.bsBuff = bsBuffShadow; |
| } |
| |
| this.bsLive = bsLiveShadow - n; |
| return (bsBuffShadow >> (bsLiveShadow - n)) & ((1L << n) - 1); |
| } |
| |
| private boolean bsGetBit() throws IOException { |
| long bsLiveShadow = this.bsLive; |
| long bsBuffShadow = this.bsBuff; |
| |
| if (bsLiveShadow < 1) { |
| int thech = this.readAByte(in); |
| |
| if (thech < 0) { |
| throw new IOException("unexpected end of stream"); |
| } |
| |
| bsBuffShadow = (bsBuffShadow << 8) | thech; |
| bsLiveShadow += 8; |
| this.bsBuff = bsBuffShadow; |
| } |
| |
| this.bsLive = bsLiveShadow - 1; |
| return ((bsBuffShadow >> (bsLiveShadow - 1)) & 1) != 0; |
| } |
| |
| private char bsGetUByte() throws IOException { |
| return (char) bsR(8); |
| } |
| |
| private int bsGetInt() throws IOException { |
| return (int) ((((((bsR(8) << 8) | bsR(8)) << 8) | bsR(8)) << 8) | bsR(8)); |
| } |
| |
| /** |
| * Called by createHuffmanDecodingTables() exclusively. |
| */ |
| private static void hbCreateDecodeTables(final int[] limit, |
| final int[] base, final int[] perm, final char[] length, |
| final int minLen, final int maxLen, final int alphaSize) { |
| for (int i = minLen, pp = 0; i <= maxLen; i++) { |
| for (int j = 0; j < alphaSize; j++) { |
| if (length[j] == i) { |
| perm[pp++] = j; |
| } |
| } |
| } |
| |
| for (int i = MAX_CODE_LEN; --i > 0;) { |
| base[i] = 0; |
| limit[i] = 0; |
| } |
| |
| for (int i = 0; i < alphaSize; i++) { |
| base[length[i] + 1]++; |
| } |
| |
| for (int i = 1, b = base[0]; i < MAX_CODE_LEN; i++) { |
| b += base[i]; |
| base[i] = b; |
| } |
| |
| for (int i = minLen, vec = 0, b = base[i]; i <= maxLen; i++) { |
| final int nb = base[i + 1]; |
| vec += nb - b; |
| b = nb; |
| limit[i] = vec - 1; |
| vec <<= 1; |
| } |
| |
| for (int i = minLen + 1; i <= maxLen; i++) { |
| base[i] = ((limit[i - 1] + 1) << 1) - base[i]; |
| } |
| } |
| |
| private void recvDecodingTables() throws IOException { |
| final Data dataShadow = this.data; |
| final boolean[] inUse = dataShadow.inUse; |
| final byte[] pos = dataShadow.recvDecodingTables_pos; |
| final byte[] selector = dataShadow.selector; |
| final byte[] selectorMtf = dataShadow.selectorMtf; |
| |
| int inUse16 = 0; |
| |
| /* Receive the mapping table */ |
| for (int i = 0; i < 16; i++) { |
| if (bsGetBit()) { |
| inUse16 |= 1 << i; |
| } |
| } |
| |
| for (int i = 256; --i >= 0;) { |
| inUse[i] = false; |
| } |
| |
| for (int i = 0; i < 16; i++) { |
| if ((inUse16 & (1 << i)) != 0) { |
| final int i16 = i << 4; |
| for (int j = 0; j < 16; j++) { |
| if (bsGetBit()) { |
| inUse[i16 + j] = true; |
| } |
| } |
| } |
| } |
| |
| makeMaps(); |
| final int alphaSize = this.nInUse + 2; |
| |
| /* Now the selectors */ |
| final int nGroups = (int) bsR(3); |
| final int nSelectors = (int) bsR(15); |
| |
| for (int i = 0; i < nSelectors; i++) { |
| int j = 0; |
| while (bsGetBit()) { |
| j++; |
| } |
| selectorMtf[i] = (byte) j; |
| } |
| |
| /* Undo the MTF values for the selectors. */ |
| for (int v = nGroups; --v >= 0;) { |
| pos[v] = (byte) v; |
| } |
| |
| for (int i = 0; i < nSelectors; i++) { |
| int v = selectorMtf[i] & 0xff; |
| final byte tmp = pos[v]; |
| while (v > 0) { |
| // nearly all times v is zero, 4 in most other cases |
| pos[v] = pos[v - 1]; |
| v--; |
| } |
| pos[0] = tmp; |
| selector[i] = tmp; |
| } |
| |
| final char[][] len = dataShadow.temp_charArray2d; |
| |
| /* Now the coding tables */ |
| for (int t = 0; t < nGroups; t++) { |
| int curr = (int) bsR(5); |
| final char[] len_t = len[t]; |
| for (int i = 0; i < alphaSize; i++) { |
| while (bsGetBit()) { |
| curr += bsGetBit() ? -1 : 1; |
| } |
| len_t[i] = (char) curr; |
| } |
| } |
| |
| // finally create the Huffman tables |
| createHuffmanDecodingTables(alphaSize, nGroups); |
| } |
| |
| /** |
| * Called by recvDecodingTables() exclusively. |
| */ |
| private void createHuffmanDecodingTables(final int alphaSize, |
| final int nGroups) { |
| final Data dataShadow = this.data; |
| final char[][] len = dataShadow.temp_charArray2d; |
| final int[] minLens = dataShadow.minLens; |
| final int[][] limit = dataShadow.limit; |
| final int[][] base = dataShadow.base; |
| final int[][] perm = dataShadow.perm; |
| |
| for (int t = 0; t < nGroups; t++) { |
| int minLen = 32; |
| int maxLen = 0; |
| final char[] len_t = len[t]; |
| for (int i = alphaSize; --i >= 0;) { |
| final char lent = len_t[i]; |
| if (lent > maxLen) { |
| maxLen = lent; |
| } |
| if (lent < minLen) { |
| minLen = lent; |
| } |
| } |
| hbCreateDecodeTables(limit[t], base[t], perm[t], len[t], minLen, |
| maxLen, alphaSize); |
| minLens[t] = minLen; |
| } |
| } |
| |
| private void getAndMoveToFrontDecode() throws IOException { |
| this.origPtr = (int) bsR(24); |
| recvDecodingTables(); |
| |
| final InputStream inShadow = this.in; |
| final Data dataShadow = this.data; |
| final byte[] ll8 = dataShadow.ll8; |
| final int[] unzftab = dataShadow.unzftab; |
| final byte[] selector = dataShadow.selector; |
| final byte[] seqToUnseq = dataShadow.seqToUnseq; |
| final char[] yy = dataShadow.getAndMoveToFrontDecode_yy; |
| final int[] minLens = dataShadow.minLens; |
| final int[][] limit = dataShadow.limit; |
| final int[][] base = dataShadow.base; |
| final int[][] perm = dataShadow.perm; |
| final int limitLast = this.blockSize100k * 100000; |
| |
| /* |
| * Setting up the unzftab entries here is not strictly necessary, but it |
| * does save having to do it later in a separate pass, and so saves a |
| * block's worth of cache misses. |
| */ |
| for (int i = 256; --i >= 0;) { |
| yy[i] = (char) i; |
| unzftab[i] = 0; |
| } |
| |
| int groupNo = 0; |
| int groupPos = G_SIZE - 1; |
| final int eob = this.nInUse + 1; |
| int nextSym = getAndMoveToFrontDecode0(0); |
| int bsBuffShadow = (int) this.bsBuff; |
| int bsLiveShadow = (int) this.bsLive; |
| int lastShadow = -1; |
| int zt = selector[groupNo] & 0xff; |
| int[] base_zt = base[zt]; |
| int[] limit_zt = limit[zt]; |
| int[] perm_zt = perm[zt]; |
| int minLens_zt = minLens[zt]; |
| |
| while (nextSym != eob) { |
| if ((nextSym == RUNA) || (nextSym == RUNB)) { |
| int s = -1; |
| |
| for (int n = 1; true; n <<= 1) { |
| if (nextSym == RUNA) { |
| s += n; |
| } else if (nextSym == RUNB) { |
| s += n << 1; |
| } else { |
| break; |
| } |
| |
| if (groupPos == 0) { |
| groupPos = G_SIZE - 1; |
| zt = selector[++groupNo] & 0xff; |
| base_zt = base[zt]; |
| limit_zt = limit[zt]; |
| perm_zt = perm[zt]; |
| minLens_zt = minLens[zt]; |
| } else { |
| groupPos--; |
| } |
| |
| int zn = minLens_zt; |
| |
| while (bsLiveShadow < zn) { |
| final int thech = readAByte(inShadow); |
| if (thech >= 0) { |
| bsBuffShadow = (bsBuffShadow << 8) | thech; |
| bsLiveShadow += 8; |
| continue; |
| } else { |
| throw new IOException("unexpected end of stream"); |
| } |
| } |
| long zvec = (bsBuffShadow >> (bsLiveShadow - zn)) |
| & ((1 << zn) - 1); |
| bsLiveShadow -= zn; |
| |
| while (zvec > limit_zt[zn]) { |
| zn++; |
| while (bsLiveShadow < 1) { |
| final int thech = readAByte(inShadow); |
| if (thech >= 0) { |
| bsBuffShadow = (bsBuffShadow << 8) | thech; |
| bsLiveShadow += 8; |
| continue; |
| } else { |
| throw new IOException( |
| "unexpected end of stream"); |
| } |
| } |
| bsLiveShadow--; |
| zvec = (zvec << 1) |
| | ((bsBuffShadow >> bsLiveShadow) & 1); |
| } |
| nextSym = perm_zt[(int) (zvec - base_zt[zn])]; |
| } |
| |
| final byte ch = seqToUnseq[yy[0]]; |
| unzftab[ch & 0xff] += s + 1; |
| |
| while (s-- >= 0) { |
| ll8[++lastShadow] = ch; |
| } |
| |
| if (lastShadow >= limitLast) { |
| throw new IOException("block overrun"); |
| } |
| } else { |
| if (++lastShadow >= limitLast) { |
| throw new IOException("block overrun"); |
| } |
| |
| final char tmp = yy[nextSym - 1]; |
| unzftab[seqToUnseq[tmp] & 0xff]++; |
| ll8[lastShadow] = seqToUnseq[tmp]; |
| |
| /* |
| * This loop is hammered during decompression, hence avoid |
| * native method call overhead of System.arraycopy for very |
| * small ranges to copy. |
| */ |
| if (nextSym <= 16) { |
| for (int j = nextSym - 1; j > 0;) { |
| yy[j] = yy[--j]; |
| } |
| } else { |
| System.arraycopy(yy, 0, yy, 1, nextSym - 1); |
| } |
| |
| yy[0] = tmp; |
| |
| if (groupPos == 0) { |
| groupPos = G_SIZE - 1; |
| zt = selector[++groupNo] & 0xff; |
| base_zt = base[zt]; |
| limit_zt = limit[zt]; |
| perm_zt = perm[zt]; |
| minLens_zt = minLens[zt]; |
| } else { |
| groupPos--; |
| } |
| |
| int zn = minLens_zt; |
| |
| while (bsLiveShadow < zn) { |
| final int thech = readAByte(inShadow); |
| if (thech >= 0) { |
| bsBuffShadow = (bsBuffShadow << 8) | thech; |
| bsLiveShadow += 8; |
| continue; |
| } else { |
| throw new IOException("unexpected end of stream"); |
| } |
| } |
| int zvec = (bsBuffShadow >> (bsLiveShadow - zn)) |
| & ((1 << zn) - 1); |
| bsLiveShadow -= zn; |
| |
| while (zvec > limit_zt[zn]) { |
| zn++; |
| while (bsLiveShadow < 1) { |
| final int thech = readAByte(inShadow); |
| if (thech >= 0) { |
| bsBuffShadow = (bsBuffShadow << 8) | thech; |
| bsLiveShadow += 8; |
| continue; |
| } else { |
| throw new IOException("unexpected end of stream"); |
| } |
| } |
| bsLiveShadow--; |
| zvec = ((zvec << 1) | ((bsBuffShadow >> bsLiveShadow) & 1)); |
| } |
| nextSym = perm_zt[zvec - base_zt[zn]]; |
| } |
| } |
| |
| this.last = lastShadow; |
| this.bsLive = bsLiveShadow; |
| this.bsBuff = bsBuffShadow; |
| } |
| |
| private int getAndMoveToFrontDecode0(final int groupNo) throws IOException { |
| final InputStream inShadow = this.in; |
| final Data dataShadow = this.data; |
| final int zt = dataShadow.selector[groupNo] & 0xff; |
| final int[] limit_zt = dataShadow.limit[zt]; |
| int zn = dataShadow.minLens[zt]; |
| int zvec = (int) bsR(zn); |
| int bsLiveShadow = (int) this.bsLive; |
| int bsBuffShadow = (int) this.bsBuff; |
| |
| while (zvec > limit_zt[zn]) { |
| zn++; |
| while (bsLiveShadow < 1) { |
| final int thech = readAByte(inShadow); |
| |
| if (thech >= 0) { |
| bsBuffShadow = (bsBuffShadow << 8) | thech; |
| bsLiveShadow += 8; |
| continue; |
| } else { |
| throw new IOException("unexpected end of stream"); |
| } |
| } |
| bsLiveShadow--; |
| zvec = (zvec << 1) | ((bsBuffShadow >> bsLiveShadow) & 1); |
| } |
| |
| this.bsLive = bsLiveShadow; |
| this.bsBuff = bsBuffShadow; |
| |
| return dataShadow.perm[zt][zvec - dataShadow.base[zt][zn]]; |
| } |
| |
| private void setupBlock() throws IOException { |
| if (this.data == null) { |
| return; |
| } |
| |
| final int[] cftab = this.data.cftab; |
| final int[] tt = this.data.initTT(this.last + 1); |
| final byte[] ll8 = this.data.ll8; |
| cftab[0] = 0; |
| System.arraycopy(this.data.unzftab, 0, cftab, 1, 256); |
| |
| for (int i = 1, c = cftab[0]; i <= 256; i++) { |
| c += cftab[i]; |
| cftab[i] = c; |
| } |
| |
| for (int i = 0, lastShadow = this.last; i <= lastShadow; i++) { |
| tt[cftab[ll8[i] & 0xff]++] = i; |
| } |
| |
| if ((this.origPtr < 0) || (this.origPtr >= tt.length)) { |
| throw new IOException("stream corrupted"); |
| } |
| |
| this.su_tPos = tt[this.origPtr]; |
| this.su_count = 0; |
| this.su_i2 = 0; |
| this.su_ch2 = 256; /* not a char and not EOF */ |
| |
| if (this.blockRandomised) { |
| this.su_rNToGo = 0; |
| this.su_rTPos = 0; |
| setupRandPartA(); |
| } else { |
| setupNoRandPartA(); |
| } |
| } |
| |
| private void setupRandPartA() throws IOException { |
| if (this.su_i2 <= this.last) { |
| this.su_chPrev = this.su_ch2; |
| int su_ch2Shadow = this.data.ll8[this.su_tPos] & 0xff; |
| this.su_tPos = this.data.tt[this.su_tPos]; |
| if (this.su_rNToGo == 0) { |
| this.su_rNToGo = BZip2Constants.rNums[this.su_rTPos] - 1; |
| if (++this.su_rTPos == 512) { |
| this.su_rTPos = 0; |
| } |
| } else { |
| this.su_rNToGo--; |
| } |
| this.su_ch2 = su_ch2Shadow ^= (this.su_rNToGo == 1) ? 1 : 0; |
| this.su_i2++; |
| this.currentChar = su_ch2Shadow; |
| this.currentState = STATE.RAND_PART_B_STATE; |
| this.crc.updateCRC(su_ch2Shadow); |
| } else { |
| endBlock(); |
| if (readMode == READ_MODE.CONTINUOUS) { |
| initBlock(); |
| setupBlock(); |
| } else if (readMode == READ_MODE.BYBLOCK) { |
| this.currentState = STATE.NO_PROCESS_STATE; |
| } |
| } |
| } |
| |
| private void setupNoRandPartA() throws IOException { |
| if (this.su_i2 <= this.last) { |
| this.su_chPrev = this.su_ch2; |
| int su_ch2Shadow = this.data.ll8[this.su_tPos] & 0xff; |
| this.su_ch2 = su_ch2Shadow; |
| this.su_tPos = this.data.tt[this.su_tPos]; |
| this.su_i2++; |
| this.currentChar = su_ch2Shadow; |
| this.currentState = STATE.NO_RAND_PART_B_STATE; |
| this.crc.updateCRC(su_ch2Shadow); |
| } else { |
| this.currentState = STATE.NO_RAND_PART_A_STATE; |
| endBlock(); |
| if (readMode == READ_MODE.CONTINUOUS) { |
| initBlock(); |
| setupBlock(); |
| } else if (readMode == READ_MODE.BYBLOCK) { |
| this.currentState = STATE.NO_PROCESS_STATE; |
| } |
| } |
| } |
| |
| private void setupRandPartB() throws IOException { |
| if (this.su_ch2 != this.su_chPrev) { |
| this.currentState = STATE.RAND_PART_A_STATE; |
| this.su_count = 1; |
| setupRandPartA(); |
| } else if (++this.su_count >= 4) { |
| this.su_z = (char) (this.data.ll8[this.su_tPos] & 0xff); |
| this.su_tPos = this.data.tt[this.su_tPos]; |
| if (this.su_rNToGo == 0) { |
| this.su_rNToGo = BZip2Constants.rNums[this.su_rTPos] - 1; |
| if (++this.su_rTPos == 512) { |
| this.su_rTPos = 0; |
| } |
| } else { |
| this.su_rNToGo--; |
| } |
| this.su_j2 = 0; |
| this.currentState = STATE.RAND_PART_C_STATE; |
| if (this.su_rNToGo == 1) { |
| this.su_z ^= 1; |
| } |
| setupRandPartC(); |
| } else { |
| this.currentState = STATE.RAND_PART_A_STATE; |
| setupRandPartA(); |
| } |
| } |
| |
| private void setupRandPartC() throws IOException { |
| if (this.su_j2 < this.su_z) { |
| this.currentChar = this.su_ch2; |
| this.crc.updateCRC(this.su_ch2); |
| this.su_j2++; |
| } else { |
| this.currentState = STATE.RAND_PART_A_STATE; |
| this.su_i2++; |
| this.su_count = 0; |
| setupRandPartA(); |
| } |
| } |
| |
| private void setupNoRandPartB() throws IOException { |
| if (this.su_ch2 != this.su_chPrev) { |
| this.su_count = 1; |
| setupNoRandPartA(); |
| } else if (++this.su_count >= 4) { |
| this.su_z = (char) (this.data.ll8[this.su_tPos] & 0xff); |
| this.su_tPos = this.data.tt[this.su_tPos]; |
| this.su_j2 = 0; |
| setupNoRandPartC(); |
| } else { |
| setupNoRandPartA(); |
| } |
| } |
| |
| private void setupNoRandPartC() throws IOException { |
| if (this.su_j2 < this.su_z) { |
| int su_ch2Shadow = this.su_ch2; |
| this.currentChar = su_ch2Shadow; |
| this.crc.updateCRC(su_ch2Shadow); |
| this.su_j2++; |
| this.currentState = STATE.NO_RAND_PART_C_STATE; |
| } else { |
| this.su_i2++; |
| this.su_count = 0; |
| setupNoRandPartA(); |
| } |
| } |
| |
| private static final class Data extends Object { |
| |
| // (with blockSize 900k) |
| final boolean[] inUse = new boolean[256]; // 256 byte |
| |
| final byte[] seqToUnseq = new byte[256]; // 256 byte |
| final byte[] selector = new byte[MAX_SELECTORS]; // 18002 byte |
| final byte[] selectorMtf = new byte[MAX_SELECTORS]; // 18002 byte |
| |
| /** |
| * Freq table collected to save a pass over the data during |
| * decompression. |
| */ |
| final int[] unzftab = new int[256]; // 1024 byte |
| |
| final int[][] limit = new int[N_GROUPS][MAX_ALPHA_SIZE]; // 6192 byte |
| final int[][] base = new int[N_GROUPS][MAX_ALPHA_SIZE]; // 6192 byte |
| final int[][] perm = new int[N_GROUPS][MAX_ALPHA_SIZE]; // 6192 byte |
| final int[] minLens = new int[N_GROUPS]; // 24 byte |
| |
| final int[] cftab = new int[257]; // 1028 byte |
| final char[] getAndMoveToFrontDecode_yy = new char[256]; // 512 byte |
| final char[][] temp_charArray2d = new char[N_GROUPS][MAX_ALPHA_SIZE]; // 3096 |
| // byte |
| final byte[] recvDecodingTables_pos = new byte[N_GROUPS]; // 6 byte |
| // --------------- |
| // 60798 byte |
| |
| int[] tt; // 3600000 byte |
| byte[] ll8; // 900000 byte |
| |
| // --------------- |
| // 4560782 byte |
| // =============== |
| |
| Data(int blockSize100k) { |
| super(); |
| |
| this.ll8 = new byte[blockSize100k * BZip2Constants.baseBlockSize]; |
| } |
| |
| /** |
| * Initializes the {@link #tt} array. |
| * |
| * This method is called when the required length of the array is known. |
| * I don't initialize it at construction time to avoid unneccessary |
| * memory allocation when compressing small files. |
| */ |
| final int[] initTT(int length) { |
| int[] ttShadow = this.tt; |
| |
| // tt.length should always be >= length, but theoretically |
| // it can happen, if the compressor mixed small and large |
| // blocks. Normally only the last block will be smaller |
| // than others. |
| if ((ttShadow == null) || (ttShadow.length < length)) { |
| this.tt = ttShadow = new int[length]; |
| } |
| |
| return ttShadow; |
| } |
| |
| } |
| } |