blob: 3029cb6fa35d5545f5985c109c9bdb29b89e4db2 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.tika.parser.microsoft.chm;
import java.math.BigInteger;
import org.apache.tika.exception.TikaException;
import org.apache.tika.parser.microsoft.chm.ChmCommons.IntelState;
import org.apache.tika.parser.microsoft.chm.ChmCommons.LzxState;
/**
* Decompresses a chm block. Depending on chm block type chooses most relevant
* decompressing method. A chm block type can be as follows:</br> <li>UNDEFINED
* - no action taken, i.e. skipping the block <li>VERBATIM <li>ALIGNED_OFFSET
* <li>UNCOMPRESSED the most simplest In addition there are unknown types (4-7).
* Currently relying on previous chm block these types changing according to the
* previous chm block type. We need to invent more appropriate way to handle
* such types.
*/
public class ChmLzxBlock {
private int block_number;
private long block_length;
private ChmLzxState state;
private byte[] content = null;
private ChmSection chmSection = null;
private int contentLength = 0;
// trying to find solution for bad blocks ...
private int previousBlockType = -1;
public ChmLzxBlock(int blockNumber, byte[] dataSegment, long blockLength, ChmLzxBlock prevBlock)
throws TikaException {
try {
if (validateConstructorParams(blockNumber, dataSegment, blockLength)) {
setBlockNumber(blockNumber);
if (prevBlock != null && prevBlock.getState().getBlockLength() >
prevBlock.getState().getBlockRemaining()) {
setChmSection(new ChmSection(dataSegment, prevBlock.getContent()));
} else {
setChmSection(new ChmSection(dataSegment));
}
setBlockLength(blockLength);
// ============================================
// we need to take care of previous context
// ============================================
checkLzxBlock(prevBlock);
if (prevBlock == null || blockLength < (int) getBlockLength()) {
setContent((int) getBlockLength());
} else {
setContent((int) blockLength);
}
if (prevBlock != null && prevBlock.getState() != null) {
previousBlockType = prevBlock.getState().getBlockType();
}
extractContent();
} else {
throw new TikaException("Check your chm lzx block parameters");
}
} catch (TikaException e) {
throw e;
}
}
protected int getContentLength() {
return contentLength;
}
protected void setContentLength(int contentLength) {
this.contentLength = contentLength;
}
private ChmSection getChmSection() {
return chmSection;
}
private void setChmSection(ChmSection chmSection) {
this.chmSection = chmSection;
}
private void assertStateNotNull() throws TikaException {
if (getState() == null) {
throw new ChmParsingException("state is null");
}
}
private void extractContent() throws TikaException {
assertStateNotNull();
if (getChmSection().getData() != null) {
boolean continueLoop = true;
while (continueLoop && getContentLength() < getBlockLength()) {
if (getState() != null && getState().getBlockRemaining() == 0) {
if (getState().getHadStarted() == LzxState.NOT_STARTED_DECODING) {
getState().setHadStarted(LzxState.STARTED_DECODING);
if (getChmSection().getSyncBits(1) == 1) {
int intelSizeTemp = (getChmSection().getSyncBits(16) << 16) +
getChmSection().getSyncBits(16);
if (intelSizeTemp >= 0) {
getState().setIntelFileSize(intelSizeTemp);
} else {
getState().setIntelFileSize(0);
}
}
}
getState().setBlockType(getChmSection().getSyncBits(3));
getState().setBlockLength((getChmSection().getSyncBits(16) << 8) +
getChmSection().getSyncBits(8));
getState().setBlockRemaining(getState().getBlockLength());
// ----------------------------------------
// Trying to handle 3 - 7 block types
// ----------------------------------------
if (getState().getBlockType() > 3) {
if (previousBlockType >= 0 && previousBlockType < 3) {
getState().setBlockType(previousBlockType);
}
}
switch (getState().getBlockType()) {
case ChmCommons.ALIGNED_OFFSET:
createAlignedTreeTable();
//fall through
case ChmCommons.VERBATIM:
/* Creates mainTreeTable */
createMainTreeTable();
createLengthTreeTable();
if (getState().getMainTreeLengtsTable()[0xe8] != 0) {
getState().setIntelState(IntelState.STARTED);
}
break;
case ChmCommons.UNCOMPRESSED:
getState().setIntelState(IntelState.STARTED);
if (getChmSection().getTotal() > 16) {
getChmSection().setSwath(getChmSection().getSwath() - 1);
}
getState().setR0((new BigInteger(getChmSection()
.reverseByteOrder(getChmSection().unmarshalBytes(4)))
.longValue()));
getState().setR1((new BigInteger(getChmSection()
.reverseByteOrder(getChmSection().unmarshalBytes(4)))
.longValue()));
getState().setR2((new BigInteger(getChmSection()
.reverseByteOrder(getChmSection().unmarshalBytes(4)))
.longValue()));
break;
default:
break;
}
} //end of if BlockRemaining == 0
int tempLen;
if (getContentLength() + getState().getBlockRemaining() > getBlockLength()) {
getState().setBlockRemaining(
getContentLength() + getState().getBlockRemaining() -
(int) getBlockLength());
tempLen = (int) getBlockLength();
} else {
tempLen = getContentLength() + getState().getBlockRemaining();
getState().setBlockRemaining(0);
}
int lastLength = getContentLength();
switch (getState().getBlockType()) {
case ChmCommons.ALIGNED_OFFSET:
// if(prevblock.lzxState.length>prevblock.lzxState.remaining)
decompressAlignedBlock(tempLen, getChmSection().getPrevContent() == null ?
getChmSection().getData() :
getChmSection().getPrevContent());// prevcontext
break;
case ChmCommons.VERBATIM:
decompressVerbatimBlock(tempLen, getChmSection().getPrevContent() == null ?
getChmSection().getData() : getChmSection().getPrevContent());
break;
case ChmCommons.UNCOMPRESSED:
decompressUncompressedBlock(tempLen,
getChmSection().getPrevContent() == null ?
getChmSection().getData() :
getChmSection().getPrevContent());
break;
}
getState().increaseFramesRead();
if ((getState().getFramesRead() < 32768) && getState().getIntelFileSize() != 0) {
intelE8Decoding();
}
continueLoop = getContentLength() > lastLength;
}
}
}
protected void intelE8Decoding() {
if (getBlockLength() <= ChmConstants.LZX_PRETREE_TABLEBITS ||
(getState().getIntelState() == IntelState.NOT_STARTED)) {
getState().setBlockRemaining(getState().getBlockRemaining() - (int) getBlockLength());
} else {
long curpos = getState().getBlockRemaining();
getState().setBlockRemaining(getState().getBlockRemaining() - (int) getBlockLength());
int i = 0;
while (i < getBlockLength() - 10) {
if (content[i] != 0xe8) {
i++;
continue;
}
byte[] b = new byte[4];
b[0] = getContent()[i + 3];
b[1] = getContent()[i + 2];
b[2] = getContent()[i + 1];
b[3] = getContent()[i + 0];
long absoff = (new BigInteger(b)).longValue();
if ((absoff >= -curpos) && (absoff < getState().getIntelFileSize())) {
long reloff = (absoff >= 0) ? absoff - curpos :
absoff + getState().getIntelFileSize();
getContent()[i + 0] = (byte) reloff;
getContent()[i + 1] = (byte) (reloff >>> 8);
getContent()[i + 2] = (byte) (reloff >>> 16);
getContent()[i + 3] = (byte) (reloff >>> 24);
}
i += 4;
curpos += 5;
}
}
}
private short[] createPreLenTable() {
short[] tmp = new short[ChmConstants.LZX_PRETREE_MAXSYMBOLS];
for (int i = 0; i < ChmConstants.LZX_PRETREE_MAXSYMBOLS; i++) {
tmp[i] =
(short) getChmSection().getSyncBits(ChmConstants.LZX_PRETREE_NUM_ELEMENTS_BITS);
}
return tmp;
}
private void createLengthTreeTable() throws TikaException {
//Read Pre Tree Table
short[] prelentable = createPreLenTable();
if (prelentable == null) {
throw new ChmParsingException("pretreetable is null");
}
short[] pretreetable = createTreeTable2(prelentable,
(1 << ChmConstants.LZX_PRETREE_TABLEBITS) +
(ChmConstants.LZX_PRETREE_MAXSYMBOLS << 1),
ChmConstants.LZX_PRETREE_TABLEBITS, ChmConstants.LZX_PRETREE_MAXSYMBOLS);
if (pretreetable == null) {
throw new ChmParsingException("pretreetable is null");
}
//Build Length Tree
createLengthTreeLenTable(0, ChmConstants.LZX_NUM_SECONDARY_LENGTHS, pretreetable,
prelentable);
getState().setLengthTreeTable(createTreeTable2(getState().getLengthTreeLengtsTable(),
(1 << ChmConstants.LZX_LENGTH_TABLEBITS) +
(ChmConstants.LZX_LENGTH_MAXSYMBOLS << 1),
ChmConstants.LZX_LENGTH_TABLEBITS, ChmConstants.LZX_NUM_SECONDARY_LENGTHS));
}
private void decompressUncompressedBlock(int len, byte[] prevcontent) {
if (getContentLength() + getState().getBlockRemaining() <= getBlockLength()) {
for (int i = getContentLength();
i < (getContentLength() + getState().getBlockRemaining()); i++)
content[i] = getChmSection().getByte();
setContentLength(getContentLength() + getState().getBlockRemaining());
getState().setBlockRemaining(0);
} else {
for (int i = getContentLength(); i < getBlockLength(); i++)
content[i] = getChmSection().getByte();
getState()
.setBlockRemaining((int) getBlockLength() - getContentLength());// = blockLen -
// contentlen;
setContentLength((int) getBlockLength());
}
}
private void decompressAlignedBlock(int len, byte[] prevcontent) throws TikaException {
if ((getChmSection() == null) || (getState() == null) ||
(getState().getMainTreeTable() == null)) {
throw new ChmParsingException("chm section is null");
}
short s;
int x, i, border;
int matchlen = 0, matchfooter = 0, extra, rundest, runsrc;
int matchoffset = 0;
for (i = getContentLength(); i < len; i++) {
/* new code */
//read huffman tree from main tree
border = getChmSection().peekBits(ChmConstants.LZX_MAINTREE_TABLEBITS);
if (border >= getState().mainTreeTable.length) {
throw new ChmParsingException("error decompressing aligned block.");
}
//break;
/* end new code */
s = getState().mainTreeTable[getChmSection()
.peekBits(ChmConstants.LZX_MAINTREE_TABLEBITS)];
if (s >= getState().getMainTreeElements()) {
x = ChmConstants.LZX_MAINTREE_TABLEBITS;
do {
x++;
s <<= 1;
s += getChmSection().checkBit(x);
} while ((s = getState().mainTreeTable[s]) >= getState().getMainTreeElements());
}
//System.out.printf("%d,", s);
//?getChmSection().getSyncBits(getState().mainTreeTable[s]);
getChmSection().getSyncBits(getState().getMainTreeLengtsTable()[s]);
if (s < ChmConstants.LZX_NUM_CHARS) {
content[i] = (byte) s;
} else {
s -= ChmConstants.LZX_NUM_CHARS;
matchlen = s & ChmConstants.LZX_NUM_PRIMARY_LENGTHS;
if (matchlen == ChmConstants.LZX_NUM_PRIMARY_LENGTHS) {
matchfooter = getState().lengthTreeTable[getChmSection().peekBits(
ChmConstants.LZX_LENGTH_TABLEBITS)];//.LZX_MAINTREE_TABLEBITS)];
if (matchfooter >=
ChmConstants.LZX_LENGTH_MAXSYMBOLS/*?LZX_LENGTH_TABLEBITS*/) {
x = ChmConstants.LZX_LENGTH_TABLEBITS;
do {
x++;
matchfooter <<= 1;
matchfooter += getChmSection().checkBit(x);
} while ((matchfooter = getState().lengthTreeTable[matchfooter]) >=
ChmConstants.LZX_NUM_SECONDARY_LENGTHS);
}
getChmSection().getSyncBits(getState().lengthTreeLengtsTable[matchfooter]);
matchlen += matchfooter;
}
matchlen += ChmConstants.LZX_MIN_MATCH;
matchoffset = s >>> 3;
if (matchoffset > 2) {
extra = ChmConstants.EXTRA_BITS[matchoffset];
matchoffset = (ChmConstants.POSITION_BASE[matchoffset] - 2);
if (extra > 3) {
extra -= 3;
long verbatim_bits = getChmSection().getSyncBits(extra);
matchoffset += (verbatim_bits << 3);
//READ HUFF SYM in Aligned Tree
int aligned_bits =
getChmSection().peekBits(ChmConstants.LZX_NUM_PRIMARY_LENGTHS);
int t = getState().getAlignedTreeTable()[aligned_bits];
if (t >= getState().getMainTreeElements()) {
x = ChmConstants.LZX_ALIGNED_TABLEBITS; //?LZX_MAINTREE_TABLEBITS;
// ?LZX_ALIGNED_TABLEBITS
do {
x++;
t <<= 1;
t += getChmSection().checkBit(x);
} while ((t = getState().getAlignedTreeTable()[t]) >=
getState().getMainTreeElements());
}
getChmSection().getSyncBits(getState().getAlignedLenTable()[t]);
matchoffset += t;
} else if (extra == 3) {
int g = getChmSection().peekBits(ChmConstants.LZX_NUM_PRIMARY_LENGTHS);
int t = getState().getAlignedTreeTable()[g];
if (t >= getState().getMainTreeElements()) {
x = ChmConstants.LZX_ALIGNED_TABLEBITS; //?LZX_MAINTREE_TABLEBITS;
do {
x++;
t <<= 1;
t += getChmSection().checkBit(x);
} while ((t = getState().getAlignedTreeTable()[t]) >=
getState().getMainTreeElements());
}
getChmSection().getSyncBits(getState().getAlignedLenTable()[t]);
matchoffset += t;
} else if (extra > 0) {
long l = getChmSection().getSyncBits(extra);
matchoffset += l;
} else {
matchoffset = 1;
}
getState().setR2(getState().getR1());
getState().setR1(getState().getR0());
getState().setR0(matchoffset);
} else if (matchoffset == 0) {
matchoffset = (int) getState().getR0();
} else if (matchoffset == 1) {
matchoffset = (int) getState().getR1();
getState().setR1(getState().getR0());
getState().setR0(matchoffset);
} else /** match_offset == 2 */ {
matchoffset = (int) getState().getR2();
getState().setR2(getState().getR0());
getState().setR0(matchoffset);
}
rundest = i;
runsrc = rundest - matchoffset;
i += (matchlen - 1);
if (i > len) {
break;
}
if (runsrc < 0) {
if (matchlen + runsrc <= 0) {
runsrc = prevcontent.length + runsrc;
while (matchlen-- > 0) content[rundest++] = prevcontent[runsrc++];
} else {
runsrc = prevcontent.length + runsrc;
while (runsrc < prevcontent.length)
content[rundest++] = prevcontent[runsrc++];
matchlen = matchlen + runsrc - prevcontent.length;
runsrc = 0;
while (matchlen-- > 0) content[rundest++] = content[runsrc++];
}
} else {
/* copies any wrappes around source data */
while ((runsrc < 0) && (matchlen-- > 0)) {
content[rundest++] = content[(int) (runsrc + getBlockLength())];
runsrc++;
}
/* copies match data - no worries about destination wraps */
while (matchlen-- > 0) content[rundest++] = content[runsrc++];
}
}
}
setContentLength(len);
}
private void assertShortArrayNotNull(short[] array) throws TikaException {
if (array == null) {
throw new ChmParsingException("short[] is null");
}
}
private void decompressVerbatimBlock(int len, byte[] prevcontent) throws TikaException {
short s;
int x, i;
int matchlen = 0, matchfooter = 0, extra, rundest, runsrc;
int matchoffset = 0;
for (i = getContentLength(); i < len; i++) {
int f = getChmSection().peekBits(ChmConstants.LZX_MAINTREE_TABLEBITS);
assertShortArrayNotNull(getState().getMainTreeTable());
s = getState().getMainTreeTable()[f];
if (s >= ChmConstants.LZX_MAIN_MAXSYMBOLS) {
x = ChmConstants.LZX_MAINTREE_TABLEBITS;
do {
x++;
s <<= 1;
s += getChmSection().checkBit(x);
} while ((s = getState().getMainTreeTable()[s]) >=
ChmConstants.LZX_MAIN_MAXSYMBOLS);
}
getChmSection().getSyncBits(getState().getMainTreeLengtsTable()[s]);
if (s < ChmConstants.LZX_NUM_CHARS) {
content[i] = (byte) s;
} else {
s -= ChmConstants.LZX_NUM_CHARS;
matchlen = s & ChmConstants.LZX_NUM_PRIMARY_LENGTHS;
if (matchlen == ChmConstants.LZX_NUM_PRIMARY_LENGTHS) {
matchfooter = getState().getLengthTreeTable()[getChmSection()
.peekBits(ChmConstants.LZX_LENGTH_TABLEBITS)];
if (matchfooter >= ChmConstants.LZX_NUM_SECONDARY_LENGTHS) {
x = ChmConstants.LZX_LENGTH_TABLEBITS;
do {
x++;
matchfooter <<= 1;
matchfooter += getChmSection().checkBit(x);
} while ((matchfooter = getState().getLengthTreeTable()[matchfooter]) >=
ChmConstants.LZX_NUM_SECONDARY_LENGTHS);
}
getChmSection().getSyncBits(getState().getLengthTreeLengtsTable()[matchfooter]);
matchlen += matchfooter;
}
matchlen += ChmConstants.LZX_MIN_MATCH;
// shorter than 2
matchoffset = s >>> 3;
if (matchoffset > 2) {
if (matchoffset != 3) { // should get other bits to retrieve
// offset
extra = ChmConstants.EXTRA_BITS[matchoffset];
long l = getChmSection().getSyncBits(extra);
matchoffset = (int) (ChmConstants.POSITION_BASE[matchoffset] - 2 + l);
} else {
matchoffset = 1;
}
getState().setR2(getState().getR1());
getState().setR1(getState().getR0());
getState().setR0(matchoffset);
} else if (matchoffset == 0) {
matchoffset = (int) getState().getR0();
} else if (matchoffset == 1) {
matchoffset = (int) getState().getR1();
getState().setR1(getState().getR0());
getState().setR0(matchoffset);
} else /* match_offset == 2 */ {
matchoffset = (int) getState().getR2();
getState().setR2(getState().getR0());
getState().setR0(matchoffset);
}
rundest = i;
runsrc = rundest - matchoffset;
i += (matchlen - 1);
if (i > len) {
break;
}
if (runsrc < 0) {
if (matchlen + runsrc <= 0) {
runsrc = prevcontent.length + runsrc;
while ((matchlen-- > 0) && (prevcontent != null) && ((runsrc + 1) > 0))
if ((rundest < content.length) && (runsrc < content.length)) {
content[rundest++] = prevcontent[runsrc++];
}
} else {
runsrc = prevcontent.length + runsrc;
while (runsrc < prevcontent.length)
if ((rundest < content.length) && (runsrc < content.length)) {
content[rundest++] = prevcontent[runsrc++];
}
matchlen = matchlen + runsrc - prevcontent.length;
runsrc = 0;
while (matchlen-- > 0) content[rundest++] = content[runsrc++];
}
} else {
/* copies any wrapped source data */
while ((runsrc < 0) && (matchlen-- > 0)) {
content[rundest++] = content[(int) (runsrc + getBlockLength())];
runsrc++;
}
/* copies match data - no worries about destination wraps */
while (matchlen-- > 0) {
if ((rundest < content.length) && (runsrc < content.length)) {
content[rundest++] = content[runsrc++];
}
}
}
}
}
setContentLength(len);
}
private void createLengthTreeLenTable(int offset, int tablelen, short[] pretreetable,
short[] prelentable) throws TikaException {
if (prelentable == null || getChmSection() == null || pretreetable == null ||
prelentable == null) {
throw new ChmParsingException("is null");
}
int i = offset; // represents offset
int z, y, x;// local counters
while (i < tablelen) {
//Read HUFF sym to z
z = pretreetable[getChmSection().peekBits(ChmConstants.LZX_PRETREE_TABLEBITS)];
if (z >= ChmConstants.LZX_PRETREE_NUM_ELEMENTS) { // 1 bug, should be
// 20
x = ChmConstants.LZX_PRETREE_TABLEBITS;
do {
x++;
z <<= 1;
z += getChmSection().checkBit(x);
} while ((z = pretreetable[z]) >= ChmConstants.LZX_PRETREE_NUM_ELEMENTS);
}
getChmSection().getSyncBits(prelentable[z]);
if (z < 17) {
z = getState().getLengthTreeLengtsTable()[i] - z;
if (z < 0) {
z = z + 17;
}
getState().getLengthTreeLengtsTable()[i] = (short) z;
i++;
} else if (z == 17) {
y = getChmSection().getSyncBits(4);
y += 4;
for (int j = 0; j < y; j++)
if (i < getState().getLengthTreeLengtsTable().length) {
getState().getLengthTreeLengtsTable()[i++] = 0;
}
} else if (z == 18) {
y = getChmSection().getSyncBits(5);
y += 20;
for (int j = 0; j < y; j++)
//no tolerate //if (i < getState().getLengthTreeLengtsTable().length)
getState().getLengthTreeLengtsTable()[i++] = 0;
} else if (z == 19) {
y = getChmSection().getSyncBits(1);
y += 4;
z = pretreetable[getChmSection().peekBits(ChmConstants.LZX_PRETREE_TABLEBITS)];
if (z >= ChmConstants.LZX_PRETREE_NUM_ELEMENTS) { // 20
x = ChmConstants.LZX_PRETREE_TABLEBITS;// 6
do {
x++;
z <<= 1;
z += getChmSection().checkBit(x);
} while ((z = pretreetable[z]) >=
ChmConstants.LZX_PRETREE_NUM_ELEMENTS);//LZX_MAINTREE_TABLEBITS);
}
getChmSection().getSyncBits(prelentable[z]);
z = getState().getLengthTreeLengtsTable()[i] - z;
if (z < 0) {
z = z + 17;
}
for (int j = 0; j < y; j++)
getState().getLengthTreeLengtsTable()[i++] = (short) z;
}
}
}
private void createMainTreeTable() throws TikaException {
//Read Pre Tree Table
short[] prelentable = createPreLenTable();
short[] pretreetable = createTreeTable2(prelentable,
(1 << ChmConstants.LZX_PRETREE_TABLEBITS) +
(ChmConstants.LZX_PRETREE_MAXSYMBOLS << 1),
ChmConstants.LZX_PRETREE_TABLEBITS, ChmConstants.LZX_PRETREE_MAXSYMBOLS);
createMainTreeLenTable(0, ChmConstants.LZX_NUM_CHARS, pretreetable, prelentable);
//Read Pre Tree Table
prelentable = createPreLenTable();
pretreetable = createTreeTable2(prelentable,
(1 << ChmConstants.LZX_PRETREE_TABLEBITS) +
(ChmConstants.LZX_PRETREE_MAXSYMBOLS << 1),
ChmConstants.LZX_PRETREE_TABLEBITS,
ChmConstants.LZX_PRETREE_MAXSYMBOLS);
createMainTreeLenTable(ChmConstants.LZX_NUM_CHARS, getState().mainTreeLengtsTable.length,
pretreetable, prelentable);
getState().setMainTreeTable(createTreeTable2(getState().mainTreeLengtsTable,
(1 << ChmConstants.LZX_MAINTREE_TABLEBITS) +
(ChmConstants.LZX_MAINTREE_MAXSYMBOLS << 1),
ChmConstants.LZX_MAINTREE_TABLEBITS, getState().getMainTreeElements()));
}
private void createMainTreeLenTable(int offset, int tablelen, short[] pretreetable,
short[] prelentable) throws TikaException {
if (pretreetable == null) {
throw new ChmParsingException("pretreetable is null");
}
int i = offset;
int z, y, x;
while (i < tablelen) {
int f = getChmSection().peekBits(ChmConstants.LZX_PRETREE_TABLEBITS);
z = pretreetable[f];
if (z >= ChmConstants.LZX_PRETREE_MAXSYMBOLS) {
x = ChmConstants.LZX_PRETREE_TABLEBITS;
do {
x++;
z <<= 1;
z += getChmSection().checkBit(x);
} while ((z = pretreetable[z]) >= ChmConstants.LZX_PRETREE_MAXSYMBOLS);
}
getChmSection().getSyncBits(prelentable[z]);
if (z < 17) {
z = getState().getMainTreeLengtsTable()[i] - z;
if (z < 0) {
z = z + 17;
}
getState().mainTreeLengtsTable[i] = (short) z;
i++;
} else if (z == 17) {
y = getChmSection().getSyncBits(4);
y += 4;
for (int j = 0; j < y; j++) {
assertInRange(getState().getMainTreeLengtsTable(), i);
getState().mainTreeLengtsTable[i++] = 0;
}
} else if (z == 18) {
y = getChmSection().getSyncBits(5);
y += 20;
for (int j = 0; j < y; j++) {
assertInRange(getState().getMainTreeLengtsTable(), i);
getState().mainTreeLengtsTable[i++] = 0;
}
} else if (z == 19) {
y = getChmSection().getSyncBits(1);
y += 4;
z = pretreetable[getChmSection().peekBits(ChmConstants.LZX_PRETREE_TABLEBITS)];
if (z >= ChmConstants.LZX_PRETREE_MAXSYMBOLS) {
x = ChmConstants.LZX_PRETREE_TABLEBITS;
do {
x++;
z <<= 1;
z += getChmSection().checkBit(x);
} while ((z = pretreetable[z]) >= ChmConstants.LZX_PRETREE_MAXSYMBOLS);
}
getChmSection().getSyncBits(prelentable[z]);
z = getState().mainTreeLengtsTable[i] - z;
if (z < 0) {
z = z + 17;
}
for (int j = 0; j < y; j++)
if (i < getState().getMainTreeLengtsTable().length) {
getState().mainTreeLengtsTable[i++] = (short) z;
}
}
}
}
private void assertInRange(short[] array, int index) throws ChmParsingException {
if (index >= array.length) {
throw new ChmParsingException(index + " is bigger than " + array.length);
}
}
private short[] createAlignedLenTable() {
int tablelen = ChmConstants.LZX_ALIGNED_NUM_ELEMENTS;//LZX_BLOCKTYPE_UNCOMPRESSED;//
int bits = ChmConstants.LZX_BLOCKTYPE_UNCOMPRESSED;
short[] tmp = new short[tablelen];
for (int i = 0; i < tablelen; i++) {
tmp[i] = (short) getChmSection().getSyncBits(bits);
}
return tmp;
}
private void createAlignedTreeTable() throws ChmParsingException {
getState().setAlignedLenTable(createAlignedLenTable());
getState().setAlignedTreeTable(//setAlignedLenTable(
createTreeTable2(getState().getAlignedLenTable(),
(1 << ChmConstants.LZX_NUM_PRIMARY_LENGTHS) +
(ChmConstants.LZX_ALIGNED_MAXSYMBOLS << 1),
ChmConstants.LZX_NUM_PRIMARY_LENGTHS, ChmConstants.LZX_ALIGNED_MAXSYMBOLS));
}
private short[] createTreeTable2(short[] lentable, int tablelen, int bits, int maxsymbol)
throws ChmParsingException {
short[] tmp = new short[tablelen];
short sym;
int leaf;
int bit_num = 1;
long fill;
int pos = 0;
/* the current position in the decode table */
long table_mask = (1 << bits);
long bit_mask = (table_mask >> 1);
long next_symbol = bit_mask;
/* fills entries for short codes for a direct mapping */
while (bit_num <= bits) {
for (sym = 0; sym < maxsymbol; sym++) {
if (lentable.length > sym && lentable[sym] == bit_num) {
leaf = pos;
if ((pos += bit_mask) > table_mask) {
/* table overflow */
throw new ChmParsingException("Table overflow");
}
fill = bit_mask;
while (fill-- > 0) tmp[leaf++] = sym;
}
}
bit_mask >>= 1;
bit_num++;
}
/* if there are any codes longer than nbits */
if (pos != table_mask) {
/* clears the remainder of the table */
for (leaf = pos; leaf < table_mask; leaf++)
tmp[leaf] = 0;
/* gives ourselves room for codes to grow by up to 16 more bits */
pos <<= 16;
table_mask <<= 16;
bit_mask = 1 << 15;
while (bit_num <= 16) {
for (sym = 0; sym < maxsymbol; sym++) {
if ((lentable.length > sym) && (lentable[sym] == bit_num)) {
leaf = pos >> 16;
for (fill = 0; fill < bit_num - bits; fill++) {
/*
* if this path hasn't been taken yet, 'allocate'
* two entries
*/
if (tmp[leaf] == 0) {
if (((next_symbol << 1) + 1) < tmp.length) {
tmp[(int) (next_symbol << 1)] = 0;
tmp[(int) (next_symbol << 1) + 1] = 0;
tmp[leaf] = (short) next_symbol++;
}
}
/*
* follows the path and select either left or right
* for next bit
*/
leaf = tmp[leaf] << 1;
if (((pos >> (15 - fill)) & 1) != 0) {
leaf++;
}
}
tmp[leaf] = sym;
if ((pos += bit_mask) > table_mask) {
/* table overflow */
throw new ChmParsingException("Table overflow");
}
}
}
bit_mask >>= 1;
bit_num++;
}
}
/* is it full table? */
if (pos == table_mask) {
return tmp;
}
return tmp;
}
public byte[] getContent() {
return content;
}
private void setContent(int contentLength) {
this.content = new byte[contentLength];
}
public byte[] getContent(int startOffset, int endOffset) throws TikaException {
return (getContent() != null) ?
ChmCommons.copyOfRange(getContent(), startOffset, endOffset) : new byte[1];
}
public byte[] getContent(int start) throws TikaException {
return (getContent() != null) ?
ChmCommons.copyOfRange(getContent(), start, getContent().length) : new byte[1];
}
private void checkLzxBlock(ChmLzxBlock chmPrevLzxBlock) throws TikaException {
if (chmPrevLzxBlock == null && getBlockLength() < Integer.MAX_VALUE) {
setState(new ChmLzxState((int) getBlockLength()));
} else
//use clone to avoid changing a cached or to be cached block
{
setState(chmPrevLzxBlock.getState().clone());
}
}
private boolean validateConstructorParams(int blockNumber, byte[] dataSegment, long blockLength)
throws TikaException {
int goodParameter = 0;
if (blockNumber >= 0) {
++goodParameter;
} else {
throw new ChmParsingException("block number should be possitive");
}
if (dataSegment != null && dataSegment.length > 0) {
++goodParameter;
} else {
throw new ChmParsingException("data segment should not be null");
}
if (blockLength > 0) {
++goodParameter;
} else {
throw new ChmParsingException("block length should be more than zero");
}
return (goodParameter == 3);
}
public int getBlockNumber() {
return block_number;
}
private void setBlockNumber(int block_number) {
this.block_number = block_number;
}
private long getBlockLength() {
return block_length;
}
private void setBlockLength(long block_length) {
this.block_length = block_length;
}
public ChmLzxState getState() {
return state;
}
private void setState(ChmLzxState state) {
this.state = state;
}
}