blob: 395c348169f4d80fbcc9d2add8844367099ae9d9 [file] [log] [blame]
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.pdfbox.jbig2;
import java.io.EOFException;
import java.io.IOException;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.TreeMap;
import javax.imageio.stream.ImageInputStream;
import org.apache.pdfbox.jbig2.io.SubInputStream;
import org.apache.pdfbox.jbig2.util.log.Logger;
import org.apache.pdfbox.jbig2.util.log.LoggerFactory;
/**
* This class represents the document structure with its pages and global segments.
*/
class JBIG2Document {
private static final Logger log = LoggerFactory.getLogger(JBIG2Document.class);
/** ID string in file header, see ISO/IEC 14492:2001, D.4.1 */
private int[] FILE_HEADER_ID = {
0x97, 0x4A, 0x42, 0x32, 0x0D, 0x0A, 0x1A, 0x0A
};
/**
* This map contains all pages of this document. The key is the number of the page.
*/
private final Map<Integer, JBIG2Page> pages = new TreeMap<Integer, JBIG2Page>();
/** BASIC INFORMATION ABOUT THE CURRENT JBIG2 DOCUMENT */
/** The length of the file header if exists */
private short fileHeaderLength = 9;
/**
* According to D.4.2 - File header bit 0
* <p>
* This flag contains information about the file organization:
* <ul>
* <li>{@code 1} for sequential</li>
* <li>{@code 0} for random-access</li>
* </ul>
* You can use the constants {@link #RANDOM} and {@link JBIG2Document#SEQUENTIAL}.
*/
private short organisationType = SEQUENTIAL;
public static final int RANDOM = 0;
public static final int SEQUENTIAL = 1;
/**
* According to D.4.2 - Bit 1<br>
* <ul>
* <li>{@code true} if amount of pages is unknown, amount of pages field is not present</li>
* <li>{@code false} if there is a field in the file header where the amount of pages can be read</li>
* </ul>
*/
private boolean amountOfPagesUnknown = true;
/**
* According to D.4.3 - Number of pages field (4 bytes). Only present if
* {@link #amountOfPagesUnknown} is {@code false}.
*/
private int amountOfPages;
/** Defines whether extended Template is used. */
private boolean gbUseExtTemplate;
/**
* This is the source data stream wrapped into a {@link SubInputStream}.
*/
private final SubInputStream subInputStream;
/**
* Flag:
* <ul>
* <li>{@code true} if stream is embedded in another file format and the file header is missing</li>
* <li>{@code false} if stream is created of a native jbig2 file and the file header should be
* present</li>
* </ul>
*/
/**
* Holds a load of segments, that aren't associated with a page.
*/
private JBIG2Globals globalSegments;
protected JBIG2Document(ImageInputStream input) throws IOException {
this(input, null);
}
protected JBIG2Document(ImageInputStream input, JBIG2Globals globals) throws IOException {
if (input == null)
throw new IllegalArgumentException("imageInputStream must not be null");
this.subInputStream = new SubInputStream(input, 0, Long.MAX_VALUE);
this.globalSegments = globals;
mapStream();
}
/**
* Retrieves the segment with the given segment number considering only segments that aren't
* associated with a page.
*
* @param segmentNr - The number of the wanted segment.
* @return The requested {@link SegmentHeader}.
*/
SegmentHeader getGlobalSegment(int segmentNr) {
if (null != globalSegments) {
return globalSegments.getSegment(segmentNr);
}
if (log.isErrorEnabled()) {
log.error("Segment not found. Returning null.");
}
return null;
}
/**
* Retrieves a {@link JBIG2Page} specified by the given page number.
*
* @param pageNumber - The page number of the wanted {@link JBIG2Page}.
*
* @return The requested {@link JBIG2Page}.
*/
protected JBIG2Page getPage(int pageNumber) {
return pages.get(pageNumber);
}
/**
* Retrieves the amount of pages in this JBIG2 document. If the pages are striped, the document
* will be completely parsed and the amount of pages will be gathered.
*
* @return The amount of pages in this JBIG2 document.
* @throws IOException
*/
protected int getAmountOfPages() throws IOException {
if (amountOfPagesUnknown || amountOfPages == 0) {
if (pages.size() == 0) {
mapStream();
}
return pages.size();
} else {
return amountOfPages;
}
}
/**
* This method maps the stream and stores all segments.
*/
private void mapStream() throws IOException {
final List<SegmentHeader> segments = new LinkedList<SegmentHeader>();
long offset = 0;
int segmentType = 0;
/*
* Parse the file header if there is one.
*/
if (isFileHeaderPresent()) {
parseFileHeader();
offset += fileHeaderLength;
}
if (globalSegments == null) {
globalSegments = new JBIG2Globals();
}
JBIG2Page page = null;
/*
* If organisation type is random-access: walk through the segment headers until EOF segment
* appears (specified with segment number 51)
*/
while (segmentType != 51 && !reachedEndOfStream(offset)) {
SegmentHeader segment = new SegmentHeader(this, subInputStream, offset, organisationType);
final int associatedPage = segment.getPageAssociation();
segmentType = segment.getSegmentType();
if (associatedPage != 0) {
page = getPage(associatedPage);
if (page == null) {
page = new JBIG2Page(this, associatedPage);
pages.put(associatedPage, page);
}
page.add(segment);
} else {
globalSegments.addSegment(segment.getSegmentNr(), segment);
}
segments.add(segment);
if (JBIG2ImageReader.DEBUG) {
if (log.isDebugEnabled()) {
log.debug(segment.toString());
}
}
offset = subInputStream.getStreamPosition();
// Sequential organization skips data part and sets the offset
if (organisationType == SEQUENTIAL) {
offset += segment.getSegmentDataLength();
}
}
/*
* Random organization: segment headers are finished. Data part starts and the offset can be
* set.
*/
determineRandomDataOffsets(segments, offset);
}
private boolean isFileHeaderPresent() throws IOException {
final SubInputStream input = subInputStream;
input.mark();
for (int magicByte : FILE_HEADER_ID) {
if (magicByte != input.read()) {
input.reset();
return false;
}
}
input.reset();
return true;
}
/**
* Determines the start of the data parts and sets the offset.
*
* @param segments
* @param offset
*/
private void determineRandomDataOffsets(List<SegmentHeader> segments, long offset) {
if (organisationType == RANDOM) {
for (SegmentHeader s : segments) {
s.setSegmentDataStartOffset(offset);
offset += s.getSegmentDataLength();
}
}
}
/**
* This method reads the stream and sets variables for information about organization type and
* length etc.
*
* @return
* @throws IOException
*/
private void parseFileHeader() throws IOException {
subInputStream.seek(0);
/* D.4.1 - ID string, read will be skipped */
subInputStream.skipBytes(8);
/*
* D.4.2 Header flag (1 byte)
*/
// Bit 3-7 are reserved and must be 0
subInputStream.readBits(5);
// Bit 2 - Indicates if extended templates are used
if (subInputStream.readBit() == 1) {
gbUseExtTemplate = true;
}
// Bit 1 - Indicates if amount of pages are unknown
if (subInputStream.readBit() != 1) {
amountOfPagesUnknown = false;
}
// Bit 0 - Indicates file organisation type
organisationType = (short) subInputStream.readBit();
// fileHeaderLength = 9;
/*
* D.4.3 Number of pages (field is only present if amount of pages are 'NOT unknown')
*/
if (!amountOfPagesUnknown) {
amountOfPages = (int) subInputStream.readUnsignedInt();
fileHeaderLength = 13;
}
}
/**
* This method checks, if the stream is at its end to avoid {@link EOFException}s and reads 32
* bits.
*
* @param offset
* @return <li>{@code true} if end of stream reached <li>{@code false} if there are more bytes to
* read
* @throws IOException
*/
private boolean reachedEndOfStream(long offset) throws IOException {
try {
subInputStream.seek(offset);
subInputStream.readBits(32);
return false;
} catch (EOFException e) {
return true;
} catch (IndexOutOfBoundsException e) {
return true;
}
}
protected JBIG2Globals getGlobalSegments() {
return globalSegments;
}
protected boolean isAmountOfPagesUnknown() {
return amountOfPagesUnknown;
}
boolean isGbUseExtTemplate() {
return gbUseExtTemplate;
}
}