blob: 32b2ea62f4bfc147a5df2058b51e0de7793d03df [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.tika.parser.microsoft.chm;
import static java.nio.charset.StandardCharsets.UTF_8;
import org.apache.tika.exception.TikaException;
/**
* ::DataSpace/Storage/<SectionName>/ControlData This file contains $20 bytes of
* information on the compression. The information is partially known: 0000:
* DWORD 6 (unknown) 0004: ASCII 'LZXC' Compression type identifier 0008: DWORD
* 2 (Possibly numeric code for LZX) 000C: DWORD The Huffman reset interval in
* $8000-byte blocks 0010: DWORD The window size in $8000-byte blocks 0014:
* DWORD unknown (sometimes 2, sometimes 1, sometimes 0) 0018: DWORD 0 (unknown)
* 001C: DWORD 0 (unknown)
*/
public class ChmLzxcControlData implements ChmAccessor<ChmLzxcControlData> {
private static final long serialVersionUID = -7897854774939631565L;
/* class' members */
private long size; /* 0 */
private byte[] signature;
private long version; /* 8 */
private long resetInterval; /* c */
private long windowSize; /* 10 */
private long windowsPerReset; /* 14 */
private long unknown_18; /* 18 */
/* local usage */
private int dataRemained;
private int currentPlace = 0;
public ChmLzxcControlData() {
signature = ChmConstants.LZXC.getBytes(UTF_8); /*
* 4
* (LZXC
* )
*/
}
/**
* @param args
*/
public static void main(String[] args) {
}
/**
* Returns a remained data
*
* @return dataRemained
*/
private int getDataRemained() {
return dataRemained;
}
/**
* Sets a remained data
*
* @param dataRemained
*/
private void setDataRemained(int dataRemained) {
this.dataRemained = dataRemained;
}
/**
* Returns a place holder
*
* @return current_place
*/
private int getCurrentPlace() {
return currentPlace;
}
/**
* Sets a place holder
*
* @param currentPlace
*/
private void setCurrentPlace(int currentPlace) {
this.currentPlace = currentPlace;
}
/**
* Returns a size of control data
*
* @return size
*/
public long getSize() {
return size;
}
/**
* Sets a size of control data
*
* @param size
*/
protected void setSize(long size) {
this.size = size;
}
/**
* Returns a signature of control data block
*
* @return signature
*/
public byte[] getSignature() {
return signature;
}
/**
* Sets a signature of control data block
*
* @param signature
*/
protected void setSignature(byte[] signature) {
this.signature = signature;
}
/**
* Returns a version of control data block
*
* @return version
*/
public long getVersion() {
return version;
}
/**
* Sets version of control data block
*
* @param version
*/
protected void setVersion(long version) {
this.version = version;
}
/**
* Returns reset interval
*
* @return reset_interval
*/
public long getResetInterval() {
return resetInterval;
}
/**
* Sets a reset interval
*
* @param resetInterval
*/
protected void setResetInterval(long resetInterval) {
this.resetInterval = resetInterval;
}
/**
* Returns a window size
*
* @return window_size
*/
public long getWindowSize() {
return windowSize;
}
/**
* Sets a window size
*
* @param windowSize
*/
protected void setWindowSize(long windowSize) {
this.windowSize = windowSize;
}
/**
* Returns windows per reset
*
* @return
*/
public long getWindowsPerReset() {
return windowsPerReset;
}
/**
* Sets windows per reset
*
* @param windowsPerReset
*/
protected void setWindowsPerReset(long windowsPerReset) {
this.windowsPerReset = windowsPerReset;
}
/**
* Returns unknown 18 bytes
*
* @return unknown_18
*/
public long getUnknown_18() {
return unknown_18;
}
/**
* Sets unknown 18 bytes
*
* @param unknown_18
*/
protected void setUnknown_18(long unknown_18) {
this.unknown_18 = unknown_18;
}
private long unmarshalUInt32(byte[] data, long dest) throws ChmParsingException {
assert (data != null && data.length > 0);
if (4 > getDataRemained()) {
throw new ChmParsingException("4 > dataLenght");
}
dest = data[this.getCurrentPlace()] | data[this.getCurrentPlace() + 1] << 8 |
data[this.getCurrentPlace() + 2] << 16 | data[this.getCurrentPlace() + 3] << 24;
setDataRemained(this.getDataRemained() - 4);
this.setCurrentPlace(this.getCurrentPlace() + 4);
return dest;
}
private void unmarshalCharArray(byte[] data, ChmLzxcControlData chmLzxcControlData, int count)
throws TikaException {
ChmAssert.assertByteArrayNotNull(data);
ChmAssert.assertChmAccessorNotNull(chmLzxcControlData);
ChmAssert.assertPositiveInt(count);
System.arraycopy(data, 4, chmLzxcControlData.getSignature(), 0, count);
this.setCurrentPlace(this.getCurrentPlace() + count);
this.setDataRemained(this.getDataRemained() - count);
}
/**
* Returns textual representation of ChmLzxcControlData
*/
public String toString() {
StringBuilder sb = new StringBuilder();
sb.append("size(unknown):=")
.append(this.getSize())
.append(", ");
sb.append("signature(Compression type identifier):=")
.append(new String(this.getSignature(), UTF_8))
.append(", ");
sb.append("version(Possibly numeric code for LZX):=")
.append(this.getVersion())
.append(System.getProperty("line.separator"));
sb.append("resetInterval(The Huffman reset interval):=")
.append(this.getResetInterval())
.append(", ");
sb.append("windowSize:=")
.append(this.getWindowSize())
.append(", ");
sb.append("windowsPerReset(unknown (sometimes 2, sometimes 1, sometimes 0):=")
.append(this.getWindowsPerReset())
.append(", ");
sb.append("unknown_18:=")
.append(this.getUnknown_18())
.append(System.getProperty("line.separator"));
return sb.toString();
}
// @Override
public void parse(byte[] data, ChmLzxcControlData chmLzxcControlData) throws TikaException {
if (data == null || (data.length < ChmConstants.CHM_LZXC_MIN_LEN)) {
throw new ChmParsingException("we want at least 0x18 bytes");
}
chmLzxcControlData.setDataRemained(data.length);
chmLzxcControlData.setSize(unmarshalUInt32(data, chmLzxcControlData.getSize()));
chmLzxcControlData
.unmarshalCharArray(data, chmLzxcControlData, ChmConstants.CHM_SIGNATURE_LEN);
chmLzxcControlData.setVersion(unmarshalUInt32(data, chmLzxcControlData.getVersion()));
chmLzxcControlData
.setResetInterval(unmarshalUInt32(data, chmLzxcControlData.getResetInterval()));
chmLzxcControlData.setWindowSize(unmarshalUInt32(data, chmLzxcControlData.getWindowSize()));
chmLzxcControlData
.setWindowsPerReset(unmarshalUInt32(data, chmLzxcControlData.getWindowsPerReset()));
if (data.length >= ChmConstants.CHM_LZXC_V2_LEN) {
chmLzxcControlData
.setUnknown_18(unmarshalUInt32(data, chmLzxcControlData.getUnknown_18()));
} else {
chmLzxcControlData.setUnknown_18(0);
}
if (chmLzxcControlData.getVersion() == 2) {
chmLzxcControlData.setWindowSize(getWindowSize() * ChmConstants.CHM_WINDOW_SIZE_BLOCK);
}
if (chmLzxcControlData.getWindowSize() == 0 || chmLzxcControlData.getResetInterval() == 0) {
throw new ChmParsingException("window size / resetInterval should be more than zero");
}
if (chmLzxcControlData.getWindowSize() == 1) {
throw new ChmParsingException("window size / resetInterval should be more than 1");
}
/* checks a signature */
if (!new String(chmLzxcControlData.getSignature(), UTF_8).equals(ChmConstants.LZXC)) {
throw new ChmParsingException("the signature does not seem to be correct");
}
}
}