blob: 87538c4400ec9dafbbc50e31094b45d1a9983a8f [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.tika.parser.microsoft.chm;
import static java.nio.charset.StandardCharsets.UTF_8;
import java.math.BigInteger;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.tika.exception.TikaException;
/**
* Holds chm listing entries
*/
public class ChmDirectoryListingSet {
private static final Logger LOG = LoggerFactory.getLogger(ChmDirectoryListingSet.class);
private List<DirectoryListingEntry> dlel;
private byte[] data;
private int placeHolder = -1;
private long dataOffset = -1;
private int controlDataIndex = -1;
private int resetTableIndex = -1;
private boolean isNotControlDataFound = true;
private boolean isNotResetTableFound = true;
private ChmPmglHeader PMGLheader;
/**
* Constructs chm directory listing set
*
* @param data byte[]
* @param chmItsHeader
* @param chmItspHeader
* @throws TikaException
*/
public ChmDirectoryListingSet(byte[] data, ChmItsfHeader chmItsHeader,
ChmItspHeader chmItspHeader) throws TikaException {
setDirectoryListingEntryList(new ArrayList<>());
ChmCommons.assertByteArrayNotNull(data);
setData(data);
enumerateChmDirectoryListingList(chmItsHeader, chmItspHeader);
}
public static final boolean startsWith(byte[] data, String prefix) {
for (int i = 0; i < prefix.length(); i++) {
if (data[i] != prefix.charAt(i)) {
return false;
}
}
return true;
}
public String toString() {
StringBuilder sb = new StringBuilder();
sb.append("list:=")
.append(getDirectoryListingEntryList().toString())
.append(System.getProperty("line.separator"));
sb.append("number of list items:=")
.append(getDirectoryListingEntryList().size());
return sb.toString();
}
/**
* Returns control data index that located in List
*
* @return control data index
*/
public int getControlDataIndex() {
return controlDataIndex;
}
/**
* Sets control data index
*
* @param controlDataIndex
*/
protected void setControlDataIndex(int controlDataIndex) {
this.controlDataIndex = controlDataIndex;
}
/**
* Return index of reset table
*
* @return reset table index
*/
public int getResetTableIndex() {
return resetTableIndex;
}
/**
* Sets reset table index
*
* @param resetTableIndex
*/
protected void setResetTableIndex(int resetTableIndex) {
this.resetTableIndex = resetTableIndex;
}
/**
* Sets place holder
*
* @param placeHolder
*/
private void setPlaceHolder(int placeHolder) {
this.placeHolder = placeHolder;
}
/**
* Enumerates chm directory listing entries
*
* @param chmItsHeader chm itsf PMGLheader
* @param chmItspHeader chm itsp PMGLheader
*/
private void enumerateChmDirectoryListingList(ChmItsfHeader chmItsHeader,
ChmItspHeader chmItspHeader)
throws TikaException {
try {
int startPmgl = chmItspHeader.getIndex_head();
int stopPmgl = chmItspHeader.getUnknown_0024();
int dir_offset = (int) (chmItsHeader.getDirOffset() + chmItspHeader.getHeader_len());
setDataOffset(chmItsHeader.getDataOffset());
/* loops over all pmgls */
byte[] dir_chunk = null;
Set<Integer> processed = new HashSet<>();
for (int i = startPmgl; i >= 0; ) {
dir_chunk = new byte[(int) chmItspHeader.getBlock_len()];
int start = i * (int) chmItspHeader.getBlock_len() + dir_offset;
dir_chunk = ChmCommons
.copyOfRange(getData(), start, start + (int) chmItspHeader.getBlock_len());
PMGLheader = new ChmPmglHeader();
PMGLheader.parse(dir_chunk, PMGLheader);
enumerateOneSegment(dir_chunk);
int nextBlock = PMGLheader.getBlockNext();
processed.add(i);
if (processed.contains(nextBlock)) {
throw new ChmParsingException("already processed block; avoiding cycle");
}
i = nextBlock;
dir_chunk = null;
}
} catch (ChmParsingException e) {
LOG.warn("Chm parse exception", e);
} finally {
setData(null);
}
}
/**
* Checks control data
*
* @param dle chm directory listing entry
*/
private void checkControlData(DirectoryListingEntry dle) {
if (isNotControlDataFound) {
if (dle.getName().contains(ChmConstants.CONTROL_DATA)) {
setControlDataIndex(getDirectoryListingEntryList().size());
isNotControlDataFound = false;
}
}
}
/**
* Checks reset table
*
* @param dle chm directory listing entry
*/
private void checkResetTable(DirectoryListingEntry dle) {
if (isNotResetTableFound) {
if (dle.getName().contains(ChmConstants.RESET_TABLE)) {
setResetTableIndex(getDirectoryListingEntryList().size());
isNotResetTableFound = false;
}
}
}
/**
* Enumerates chm directory listing entries in single chm segment
*
* @param dir_chunk
*/
private void enumerateOneSegment(byte[] dir_chunk) throws ChmParsingException, TikaException {
// try {
if (dir_chunk != null) {
int header_len;
if (startsWith(dir_chunk, ChmConstants.CHM_PMGI_MARKER)) {
header_len = ChmConstants.CHM_PMGI_LEN;
return; //skip PMGI
} else if (startsWith(dir_chunk, ChmConstants.PMGL)) {
header_len = ChmConstants.CHM_PMGL_LEN;
} else {
throw new ChmParsingException("Bad dir entry block.");
}
placeHolder = header_len;
//setPlaceHolder(header_len);
while (placeHolder > 0 && placeHolder < dir_chunk.length - PMGLheader.getFreeSpace()
/*&& dir_chunk[placeHolder - 1] != 115*/) {
//get entry name length
int strlen = 0;// = getEncint(data);
byte temp;
while ((temp = dir_chunk[placeHolder++]) >= 0x80) {
strlen <<= 7;
strlen += temp & 0x7f;
}
strlen = (strlen << 7) + temp & 0x7f;
if (strlen > dir_chunk.length) {
throw new ChmParsingException("Bad data of a string length.");
}
DirectoryListingEntry dle = new DirectoryListingEntry();
dle.setNameLength(strlen);
dle.setName(new String(ChmCommons
.copyOfRange(dir_chunk, placeHolder, (placeHolder + dle.getNameLength())),
UTF_8));
checkControlData(dle);
checkResetTable(dle);
setPlaceHolder(placeHolder + dle.getNameLength());
/* Sets entry type */
if (placeHolder < dir_chunk.length && dir_chunk[placeHolder] == 0) {
dle.setEntryType(ChmCommons.EntryType.UNCOMPRESSED);
} else {
dle.setEntryType(ChmCommons.EntryType.COMPRESSED);
}
setPlaceHolder(placeHolder + 1);
dle.setOffset(getEncint(dir_chunk));
dle.setLength(getEncint(dir_chunk));
getDirectoryListingEntryList().add(dle);
}
// int indexWorkData = ChmCommons.indexOf(dir_chunk,
// "::".getBytes(UTF_8));
// int indexUserData = ChmCommons.indexOf(dir_chunk,
// "/".getBytes(UTF_8));
//
// if (indexUserData>=0 && indexUserData < indexWorkData)
// setPlaceHolder(indexUserData);
// else if (indexWorkData>=0) {
// setPlaceHolder(indexWorkData);
// }
// else {
// setPlaceHolder(indexUserData);
// }
//
// if (placeHolder > 0 && placeHolder < dir_chunk.length - PMGLheader.getFreeSpace()
// && dir_chunk[placeHolder - 1] != 115) {// #{
// do {
// if (dir_chunk[placeHolder - 1] > 0) {
// DirectoryListingEntry dle = new DirectoryListingEntry();
//
// // two cases: 1. when dir_chunk[placeHolder -
// // 1] == 0x73
// // 2. when dir_chunk[placeHolder + 1] == 0x2f
// doNameCheck(dir_chunk, dle);
//
// // dle.setName(new
// // String(Arrays.copyOfRange(dir_chunk,
// // placeHolder, (placeHolder +
// // dle.getNameLength()))));
// dle.setName(new String(ChmCommons.copyOfRange(
// dir_chunk, placeHolder,
// (placeHolder + dle.getNameLength())), UTF_8));
// checkControlData(dle);
// checkResetTable(dle);
// setPlaceHolder(placeHolder
// + dle.getNameLength());
//
// /* Sets entry type */
// if (placeHolder < dir_chunk.length
// && dir_chunk[placeHolder] == 0)
// dle.setEntryType(ChmCommons.EntryType.UNCOMPRESSED);
// else
// dle.setEntryType(ChmCommons.EntryType.COMPRESSED);
//
// setPlaceHolder(placeHolder + 1);
// dle.setOffset(getEncint(dir_chunk));
// dle.setLength(getEncint(dir_chunk));
// getDirectoryListingEntryList().add(dle);
// } else
// setPlaceHolder(placeHolder + 1);
//
// } while (nextEntry(dir_chunk));
// }
}
// } catch (Exception e) {
// LOG.warn("problem parsing", e);
// }
}
/**
* Returns encrypted integer
*
* @param data_chunk
* @return
*/
private int getEncint(byte[] data_chunk) {
byte ob;
BigInteger bi = BigInteger.ZERO;
byte[] nb = new byte[1];
if (placeHolder < data_chunk.length) {
while ((ob = data_chunk[placeHolder]) < 0) {
nb[0] = (byte) ((ob & 0x7f));
bi = bi.shiftLeft(7).add(new BigInteger(nb));
setPlaceHolder(placeHolder + 1);
}
nb[0] = (byte) ((ob & 0x7f));
bi = bi.shiftLeft(7).add(new BigInteger(nb));
setPlaceHolder(placeHolder + 1);
}
return bi.intValue();
}
/**
* Returns chm directory listing entry list
*
* @return List<DirectoryListingEntry>
*/
public List<DirectoryListingEntry> getDirectoryListingEntryList() {
return dlel;
}
/**
* Sets chm directory listing entry list
*
* @param dlel chm directory listing entry list
*/
public void setDirectoryListingEntryList(List<DirectoryListingEntry> dlel) {
this.dlel = dlel;
}
/**
* Returns data
*
* @return
*/
private byte[] getData() {
return data;
}
/**
* Sets data
*
* @param data
*/
private void setData(byte[] data) {
this.data = data;
}
/**
* Returns data offset
*
* @return dataOffset
*/
public long getDataOffset() {
return dataOffset;
}
/**
* Sets data offset
*
* @param dataOffset
*/
private void setDataOffset(long dataOffset) {
this.dataOffset = dataOffset;
}
}