pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java - pdfbox - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
  * The ASF licenses this file to You under the Apache License, Version 2.0
  * (the "License"); you may not use this file except in compliance with
  * the License.  You may obtain a copy of the License at
  *
  *      http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
 package org.apache.pdfbox.pdfparser;

 import java.io.IOException;
 import java.io.InputStream;
 import java.nio.charset.StandardCharsets;
 import java.security.GeneralSecurityException;
 import java.security.KeyStore;
 import java.util.HashMap;
 import java.util.HashSet;
 import java.util.List;
 import java.util.Map;
 import java.util.Optional;
 import java.util.Map.Entry;
 import java.util.Set;

 import org.apache.logging.log4j.Logger;
 import org.apache.logging.log4j.LogManager;
 import org.apache.pdfbox.cos.COSArray;
 import org.apache.pdfbox.cos.COSBase;
 import org.apache.pdfbox.cos.COSDictionary;
 import org.apache.pdfbox.cos.COSDocument;
 import org.apache.pdfbox.cos.COSName;
 import org.apache.pdfbox.cos.COSNull;
 import org.apache.pdfbox.cos.COSNumber;
 import org.apache.pdfbox.cos.COSObject;
 import org.apache.pdfbox.cos.COSObjectKey;
 import org.apache.pdfbox.cos.COSStream;
 import org.apache.pdfbox.cos.ICOSParser;
 import org.apache.pdfbox.io.IOUtils;
 import org.apache.pdfbox.io.RandomAccessRead;
 import org.apache.pdfbox.io.RandomAccessReadView;
 import org.apache.pdfbox.io.RandomAccessStreamCache.StreamCacheCreateFunction;
 import org.apache.pdfbox.pdfparser.XrefTrailerResolver.XRefType;
 import org.apache.pdfbox.pdmodel.encryption.AccessPermission;
 import org.apache.pdfbox.pdmodel.encryption.DecryptionMaterial;
 import org.apache.pdfbox.pdmodel.encryption.InvalidPasswordException;
 import org.apache.pdfbox.pdmodel.encryption.PDEncryption;
 import org.apache.pdfbox.pdmodel.encryption.ProtectionPolicy;
 import org.apache.pdfbox.pdmodel.encryption.PublicKeyDecryptionMaterial;
 import org.apache.pdfbox.pdmodel.encryption.SecurityHandler;
 import org.apache.pdfbox.pdmodel.encryption.StandardDecryptionMaterial;
 import org.apache.pdfbox.util.StringUtil;

 /**
  * COS-Parser which first reads startxref and xref tables in order to know valid objects and parse only these objects.
  *
  * This class is a much enhanced version of <code>QuickParser</code> presented in
  * <a href="https://issues.apache.org/jira/browse/PDFBOX-1104">PDFBOX-1104</a> by Jeremy Villalobos.
  */
 public class COSParser extends BaseParser implements ICOSParser
 {
     private static final String PDF_HEADER = "%PDF-";
     private static final String FDF_HEADER = "%FDF-";

     private static final String PDF_DEFAULT_VERSION = "1.4";
     private static final String FDF_DEFAULT_VERSION = "1.0";

     private static final char[] XREF_TABLE = { 'x', 'r', 'e', 'f' };
     private static final char[] STARTXREF = { 's','t','a','r','t','x','r','e','f' };

     private static final byte[] ENDSTREAM = { E, N, D, S, T, R, E, A, M };

     private static final byte[] ENDOBJ = { E, N, D, O, B, J };

     private static final long MINIMUM_SEARCH_OFFSET = 6;

     private static final int X = 'x';

     private static final int STRMBUFLEN = 2048;
     private final byte[] strmBuf = new byte[ STRMBUFLEN ];

     private AccessPermission accessPermission;
     private InputStream keyStoreInputStream = null;
     @SuppressWarnings({"squid:S2068"})
     private String password = "";
     private String keyAlias = null;

     /**
      * The range within the %%EOF marker will be searched.
      * Useful if there are additional characters after %%EOF within the PDF.
      */
     public static final String SYSPROP_EOFLOOKUPRANGE =
             "org.apache.pdfbox.pdfparser.nonSequentialPDFParser.eofLookupRange";

     /**
      * How many trailing bytes to read for EOF marker.
      */
     private static final int DEFAULT_TRAIL_BYTECOUNT = 2048;
     /**
      * EOF-marker.
      */
     protected static final char[] EOF_MARKER = { '%', '%', 'E', 'O', 'F' };
     /**
      * obj-marker.
      */
     protected static final char[] OBJ_MARKER = { 'o', 'b', 'j' };

     /**
      * file length.
      */
     protected long fileLen;

     /**
      * is parser using auto healing capacity ?
      */
     private boolean isLenient = true;

     protected boolean initialParseDone = false;

     private boolean trailerWasRebuild = false;

     private BruteForceParser bruteForceParser = null;
     private PDEncryption encryption = null;

     /**
      * Intermediate cache. Contains all objects of already read compressed object streams. Objects are removed after
      * dereferencing them.
      */
     private final Map<Long, Map<COSObjectKey, COSBase>> decompressedObjects = new HashMap<>();

     /**
      * The security handler.
      */
     protected SecurityHandler<? extends ProtectionPolicy> securityHandler = null;

     /**
      *  how many trailing bytes to read for EOF marker.
      */
     private int readTrailBytes = DEFAULT_TRAIL_BYTECOUNT;

     private static final Logger LOG = LogManager.getLogger(COSParser.class);

     /**
      * Collects all Xref/trailer objects and resolves them into single
      * object using startxref reference.
      */
     protected XrefTrailerResolver xrefTrailerResolver = new XrefTrailerResolver();

     /**
      * Default constructor.
      *
      * @param source input representing the pdf.
      *
      * @throws IOException if something went wrong
      */
     public COSParser(RandomAccessRead source) throws IOException
     {
         this(source, null, null, null);
     }

     /**
      * Constructor for encrypted pdfs.
      *
      * @param source input representing the pdf.
      * @param password password to be used for decryption.
      * @param keyStore key store to be used for decryption when using public key security
      * @param keyAlias alias to be used for decryption when using public key security
      *
      * @throws IOException if the source data could not be read
      */
     public COSParser(RandomAccessRead source, String password, InputStream keyStore,
             String keyAlias) throws IOException
     {
         this(source, password, keyStore, keyAlias, null);
     }

     /**
      * Constructor for encrypted pdfs.
      *
      * @param source input representing the pdf.
      * @param password password to be used for decryption.
      * @param keyStore key store to be used for decryption when using public key security
      * @param keyAlias alias to be used for decryption when using public key security
      * @param streamCacheCreateFunction a function to create an instance of the stream cache
      *
      * @throws IOException if the source data could not be read
      */
     public COSParser(RandomAccessRead source, String password, InputStream keyStore,
             String keyAlias, StreamCacheCreateFunction streamCacheCreateFunction) throws IOException
     {
         super(source);
         this.password = password;
         this.keyAlias = keyAlias;
         fileLen = source.length();
         keyStoreInputStream = keyStore;
         init(streamCacheCreateFunction);
     }

     private void init(StreamCacheCreateFunction streamCacheCreateFunction)
     {
         String eofLookupRangeStr = System.getProperty(SYSPROP_EOFLOOKUPRANGE);
         if (eofLookupRangeStr != null)
         {
             try
             {
                 setEOFLookupRange(Integer.parseInt(eofLookupRangeStr));
             }
             catch (NumberFormatException nfe)
             {
                 LOG.warn(
                         "System property " + SYSPROP_EOFLOOKUPRANGE + " does not contain an integer value, but: '{}'",
                         eofLookupRangeStr);
             }
         }
         document = new COSDocument(streamCacheCreateFunction, this);
     }

     /**
      * Sets how many trailing bytes of PDF file are searched for EOF marker and 'startxref' marker. If not set we use
      * default value {@link #DEFAULT_TRAIL_BYTECOUNT}.
      *
      * <p>We check that new value is at least 16. However for practical use cases this value should not be lower than
      * 1000; even 2000 was found to not be enough in some cases where some trailing garbage like HTML snippets followed
      * the EOF marker.</p>
      *
      * <p>
      * In case system property {@link #SYSPROP_EOFLOOKUPRANGE} is defined this value will be set on initialization but
      * can be overwritten later.
      * </p>
      *
      * @param byteCount number of trailing bytes
      */
     public void setEOFLookupRange(int byteCount)
     {
         if (byteCount > 15)
         {
             readTrailBytes = byteCount;
         }
     }

     /**
      * Read the trailer information and provide a COSDictionary containing the trailer information.
      *
      * @return a COSDictionary containing the trailer information
      * @throws IOException if something went wrong
      */
     protected COSDictionary retrieveTrailer() throws IOException
     {
         COSDictionary trailer = null;
         boolean rebuildTrailer = false;
         try
         {
             // parse startxref
             // TODO FDF files don't have a startxref value, so that rebuildTrailer is triggered
             long startXRefOffset = getStartxrefOffset();
             if (startXRefOffset > -1)
             {
                 trailer = parseXref(startXRefOffset);
             }
             else
             {
                 rebuildTrailer = isLenient();
             }
         }
         catch (IOException exception)
         {
             if (isLenient())
             {
                 rebuildTrailer = true;
             }
             else
             {
                 throw exception;
             }
         }
         // check if the trailer contains a Root object
         if (trailer != null && trailer.getItem(COSName.ROOT) == null)
         {
             rebuildTrailer = isLenient();
         }
         if (rebuildTrailer)
         {
             trailer = getBruteForceParser().rebuildTrailer(xrefTrailerResolver, null);
             trailerWasRebuild = true;
             // transfer encryption information from BruteForceParser
             encryption = getBruteForceParser().getEncryption();
             if (encryption != null)
             {
                 securityHandler = encryption.getSecurityHandler();
                 accessPermission = securityHandler.getCurrentAccessPermission();
             }
         }
         else
         {
             // prepare decryption if necessary
             prepareDecryption();
             // don't use the getter as it creates an instance of BruteForceParser
             if (bruteForceParser != null && bruteForceParser.bfSearchTriggered())
             {
                 getBruteForceParser().bfSearchForObjStreams(xrefTrailerResolver, securityHandler);
             }
         }
         if (resetTrailerResolver())
         {
             xrefTrailerResolver.reset();
             xrefTrailerResolver = null;
         }
         return trailer;
     }

     /**
      * Indicates whether the xref trailer resolver should be reset or not. Should be overwritten if the xref trailer
      * resolver is needed after the initial parsing.
      *
      * @return true if the xref trailer resolver should be reset
      */
     protected boolean resetTrailerResolver()
     {
         return true;
     }

     /**
      * Parses cross reference tables.
      *
      * @param startXRefOffset start offset of the first table
      * @return the trailer dictionary
      * @throws IOException if something went wrong
      */
     private COSDictionary parseXref(long startXRefOffset) throws IOException
     {
         source.seek(startXRefOffset);
         long startXrefOffset = Math.max(0, parseStartXref());
         // check the startxref offset
         long fixedOffset = checkXRefOffset(startXrefOffset);
         if (fixedOffset > -1)
         {
             startXrefOffset = fixedOffset;
         }
         document.setStartXref(startXrefOffset);
         long prev = startXrefOffset;
         // ---- parse whole chain of xref tables/object streams using PREV reference
         Set<Long> prevSet = new HashSet<>();
         COSDictionary trailer = null;
         while (prev > 0)
         {
             // save expected position for loop detection
             prevSet.add(prev);
             // seek to xref table
             source.seek(prev);
             // skip white spaces
             skipSpaces();
             // save current position as well due to skipped spaces
             prevSet.add(source.getPosition());
             // -- parse xref
             if (source.peek() == X)
             {
                 // xref table and trailer
                 // use existing parser to parse xref table
                 if (!parseXrefTable(prev) || !parseTrailer())
                 {
                     throw new IOException("Expected trailer object at offset "
                             + source.getPosition());
                 }
                 trailer = xrefTrailerResolver.getCurrentTrailer();
                 // check for a XRef stream, it may contain some object ids of compressed objects
                 if(trailer.containsKey(COSName.XREF_STM))
                 {
                     int streamOffset = trailer.getInt(COSName.XREF_STM);
                     // check the xref stream reference
                     fixedOffset = checkXRefOffset(streamOffset);
                     if (fixedOffset > -1 && fixedOffset != streamOffset)
                     {
                         LOG.warn("/XRefStm offset {} is incorrect, corrected to {}", streamOffset,
                                 fixedOffset);
                         streamOffset = (int)fixedOffset;
                         trailer.setInt(COSName.XREF_STM, streamOffset);
                     }
                     if (streamOffset > 0)
                     {
                         source.seek(streamOffset);
                         skipSpaces();
                         try
                         {
                             parseXrefObjStream(prev, false);
                             document.setHasHybridXRef();
                         }
                         catch (IOException ex)
                         {
                             if (isLenient)
                             {
                                 LOG.error("Failed to parse /XRefStm at offset {}", streamOffset,
                                         ex);
                             }
                             else
                             {
                                 throw ex;
                             }
                         }
                     }
                     else
                     {
                         if(isLenient)
                         {
                             LOG.error("Skipped XRef stream due to a corrupt offset:{}",
                                     streamOffset);
                         }
                         else
                         {
                             throw new IOException("Skipped XRef stream due to a corrupt offset:"+streamOffset);
                         }
                     }
                 }
                 prev = trailer.getLong(COSName.PREV);
             }
             else
             {
                 // parse xref stream
                 prev = parseXrefObjStream(prev, true);
                 trailer = xrefTrailerResolver.getCurrentTrailer();
             }
             if (prev > 0)
             {
                 // check the xref table reference
                 fixedOffset = checkXRefOffset(prev);
                 if (fixedOffset > -1 && fixedOffset != prev)
                 {
                     prev = fixedOffset;
                     trailer.setLong(COSName.PREV, prev);
                 }
             }
             if (prevSet.contains(prev))
             {
                 throw new IOException("/Prev loop at offset " + prev);
             }
         }
         // ---- build valid xrefs out of the xref chain
         xrefTrailerResolver.setStartxref(startXrefOffset);
         trailer = xrefTrailerResolver.getTrailer();
         document.setTrailer(trailer);
         document.setIsXRefStream(XRefType.STREAM == xrefTrailerResolver.getXrefType());
         // check the offsets of all referenced objects
         if (isLenient)
         {
             checkXrefOffsets();
         }
         // copy xref table
         document.addXRefTable(xrefTrailerResolver.getXrefTable());

         // remember the highest XRef object number to avoid it being reused in incremental saving
         Optional<Long> maxValue = document.getXrefTable().keySet().stream() //
                 .map(COSObjectKey::getNumber) //
                 .reduce(Long::max);
         document.setHighestXRefObjectNumber(maxValue.isPresent() ? maxValue.get() : 0);

         return trailer;
     }

     /**
      * Parses an xref object stream starting with indirect object id.
      *
      * @return value of PREV item in dictionary or <code>-1</code> if no such item exists
      */
     private long parseXrefObjStream(long objByteOffset, boolean isStandalone) throws IOException
     {
         // ---- parse indirect object head
         readObjectNumber();
         readGenerationNumber();
         readExpectedString(OBJ_MARKER, true);

         COSDictionary dict = parseCOSDictionary(false);
         try (COSStream xrefStream = parseCOSStream(dict))
         {
             // the cross reference stream of a hybrid xref table will be added to the existing one
             // and we must not override the offset and the trailer
             if ( isStandalone )
             {
                 xrefTrailerResolver.nextXrefObj( objByteOffset, XRefType.STREAM );
                 xrefTrailerResolver.setTrailer(xrefStream);
             }
             PDFXrefStreamParser parser = new PDFXrefStreamParser(xrefStream, document);
             parser.parse(xrefTrailerResolver);
         }

         return dict.getLong(COSName.PREV);
     }

     /**
      * Looks for and parses startxref. We first look for last '%%EOF' marker (within last
      * {@link #DEFAULT_TRAIL_BYTECOUNT} bytes (or range set via {@link #setEOFLookupRange(int)}) and go back to find
      * <code>startxref</code>.
      *
      * @return the offset of StartXref
      * @throws IOException If something went wrong.
      */
     private long getStartxrefOffset() throws IOException
     {
         byte[] buf;
         long skipBytes;
         // read trailing bytes into buffer
         try
         {
             final int trailByteCount = (fileLen < readTrailBytes) ? (int) fileLen : readTrailBytes;
             buf = new byte[trailByteCount];
             skipBytes = fileLen - trailByteCount;
             source.seek(skipBytes);
             int off = 0;
             int readBytes;
             while (off < trailByteCount)
             {
                 readBytes = source.read(buf, off, trailByteCount - off);
                 // in order to not get stuck in a loop we check readBytes (this should never happen)
                 if (readBytes < 1)
                 {
                     throw new IOException(
                             "No more bytes to read for trailing buffer, but expected: "
                                     + (trailByteCount - off));
                 }
                 off += readBytes;
             }
         }
         finally
         {
             source.seek(0);
         }
         // find last '%%EOF'
         int bufOff = lastIndexOf(EOF_MARKER, buf, buf.length);
         if (bufOff < 0)
         {
             if (isLenient)
             {
                 // in lenient mode the '%%EOF' isn't needed
                 bufOff = buf.length;
                 LOG.debug("Missing end of file marker '{}'", new String(EOF_MARKER));
             }
             else
             {
                 throw new IOException("Missing end of file marker '" + new String(EOF_MARKER) + "'");
             }
         }
         // find last startxref preceding EOF marker
         bufOff = lastIndexOf(STARTXREF, buf, bufOff);
         if (bufOff < 0)
         {
             throw new IOException("Missing 'startxref' marker.");
         }
         else
         {
             return skipBytes + bufOff;
         }
     }

     /**
      * Searches last appearance of pattern within buffer. Lookup before _lastOff and goes back until 0.
      *
      * @param pattern pattern to search for
      * @param buf buffer to search pattern in
      * @param endOff offset (exclusive) where lookup starts at
      *
      * @return start offset of pattern within buffer or <code>-1</code> if pattern could not be found
      */
     protected int lastIndexOf(final char[] pattern, final byte[] buf, final int endOff)
     {
         final int lastPatternChOff = pattern.length - 1;

         int bufOff = endOff;
         int patOff = lastPatternChOff;
         char lookupCh = pattern[patOff];

         while (--bufOff >= 0)
         {
             if (buf[bufOff] == lookupCh)
             {
                 if (--patOff < 0)
                 {
                     // whole pattern matched
                     return bufOff;
                 }
                 // matched current char, advance to preceding one
                 lookupCh = pattern[patOff];
             }
             else if (patOff < lastPatternChOff)
             {
                 // no char match but already matched some chars; reset
                 patOff = lastPatternChOff;
                 lookupCh = pattern[patOff];
             }
         }
         return -1;
     }

     /**
      * Return true if parser is lenient. Meaning auto healing capacity of the parser are used.
      *
      * @return true if parser is lenient
      */
     public boolean isLenient()
     {
         return isLenient;
     }

     /**
      * Change the parser leniency flag.
      *
      * This method can only be called before the parsing of the file.
      *
      * @param lenient try to handle malformed PDFs.
      *
      */
     protected void setLenient(boolean lenient)
     {
         if (initialParseDone)
         {
             throw new IllegalArgumentException("Cannot change leniency after parsing");
         }
         this.isLenient = lenient;
     }

     @Override
     public COSBase dereferenceCOSObject(COSObject obj) throws IOException
     {
         long currentPos = source.getPosition();
         COSObjectKey key = obj.getKey();
         COSBase parsedObj = parseObjectDynamically(key, false);
         if (parsedObj != null)
         {
             parsedObj.setDirect(false);
             parsedObj.setKey(key);
         }
         if (currentPos > 0)
         {
             source.seek(currentPos);
         }
         return parsedObj;
     }

     @Override
     public RandomAccessReadView createRandomAccessReadView(long startPosition, long streamLength)
             throws IOException
     {
         return source.createView(startPosition, streamLength);
     }

     /**
      * Parse the object for the given object key.
      *
      * @param objKey key of object to be parsed
      * @param requireExistingNotCompressedObj if <code>true</code> the object to be parsed must be defined in xref
      * (comment: null objects may be missing from xref) and it must not be a compressed object within object stream
      * (this is used to circumvent being stuck in a loop in a malicious PDF)
      *
      * @return the parsed object (which is also added to document object)
      *
      * @throws IOException If an IO error occurs.
      */
     protected synchronized COSBase parseObjectDynamically(COSObjectKey objKey,
             boolean requireExistingNotCompressedObj) throws IOException
     {
         COSObject pdfObject = document.getObjectFromPool(objKey);
         if (!pdfObject.isObjectNull())
         {
             return pdfObject.getObject();
         }
         Long offsetOrObjstmObNr = getObjectOffset(objKey, requireExistingNotCompressedObj);
         COSBase referencedObject = null;
         if (offsetOrObjstmObNr != null)
         {
             if (offsetOrObjstmObNr > 0)
             {
                 referencedObject = parseFileObject(offsetOrObjstmObNr, objKey);
             }
             else
             {
                 // xref value is object nr of object stream containing object to be parsed
                 // since our object was not found it means object stream was not parsed so far
                 referencedObject = parseObjectStreamObject(-offsetOrObjstmObNr, objKey);
             }
         }
         if (referencedObject == null || referencedObject instanceof COSNull)
         {
             // not defined object -> NULL object (Spec. 1.7, chap. 3.2.9)
             // or some other issue with dereferencing
             // remove parser to avoid endless recursion
             pdfObject.setToNull();
         }
         return referencedObject;
     }

     private Long getObjectOffset(COSObjectKey objKey, boolean requireExistingNotCompressedObj)
             throws IOException
     {
         // read offset or object stream object number from xref table
         Long offsetOrObjstmObNr = document.getXrefTable().get(objKey);

         // maybe something is wrong with the xref table -> perform brute force search for all objects
         if (offsetOrObjstmObNr == null && isLenient)
         {
             offsetOrObjstmObNr =  getBruteForceParser().getBFCOSObjectOffsets().get(objKey);
             if (offsetOrObjstmObNr != null)
             {
                 LOG.debug("Set missing offset {} for object {}", offsetOrObjstmObNr, objKey);
                 document.getXrefTable().put(objKey, offsetOrObjstmObNr);
             }
         }

         // test to circumvent loops with broken documents
         if (requireExistingNotCompressedObj
                 && (offsetOrObjstmObNr == null || offsetOrObjstmObNr <= 0))
         {
             throw new IOException("Object must be defined and must not be compressed object: "
                     + objKey.getNumber() + ":" + objKey.getGeneration());
         }
         return offsetOrObjstmObNr;
     }

     private COSBase parseFileObject(Long objOffset, final COSObjectKey objKey)
             throws IOException
     {
         // jump to the object start
         source.seek(objOffset);

         // an indirect object starts with the object number/generation number
         final long readObjNr = readObjectNumber();
         final int readObjGen = readGenerationNumber();
         readExpectedString(OBJ_MARKER, true);

         // consistency check
         if (readObjNr != objKey.getNumber() || readObjGen != objKey.getGeneration())
         {
             throw new IOException("XREF for " + objKey.getNumber() + ":"
                     + objKey.getGeneration() + " points to wrong object: " + readObjNr
                     + ":" + readObjGen + " at offset " + objOffset);
         }

         skipSpaces();
         COSBase parsedObject = parseDirObject();
         if (parsedObject != null)
         {
             parsedObject.setDirect(false);
             parsedObject.setKey(objKey);
         }
         String endObjectKey = readString();

         if (endObjectKey.equals(STREAM_STRING))
         {
             source.rewind(endObjectKey.getBytes(StandardCharsets.ISO_8859_1).length);
             if (parsedObject instanceof COSDictionary)
             {
                 COSStream stream = parseCOSStream((COSDictionary) parsedObject);

                 if (securityHandler != null)
                 {
                     securityHandler.decryptStream(stream, objKey.getNumber(), objKey.getGeneration());
                 }
                 parsedObject = stream;
             }
             else
             {
                 // this is not legal
                 // the combination of a dict and the stream/endstream
                 // forms a complete stream object
                 throw new IOException("Stream not preceded by dictionary (offset: "
                         + objOffset + ").");
             }
             skipSpaces();
             endObjectKey = readLine();

             // we have case with a second 'endstream' before endobj
             if (!endObjectKey.startsWith(ENDOBJ_STRING) && endObjectKey.startsWith(ENDSTREAM_STRING))
             {
                 endObjectKey = endObjectKey.substring(9).trim();
                 if (endObjectKey.isEmpty())
                 {
                     // no other characters in extra endstream line
                     // read next line
                     endObjectKey = readLine();
                 }
             }
         }
         else if (securityHandler != null)
         {
             securityHandler.decrypt(parsedObject, objKey.getNumber(), objKey.getGeneration());
         }

         if (!endObjectKey.startsWith(ENDOBJ_STRING))
         {
             if (isLenient)
             {
                 LOG.warn("Object ({}:{}) at offset {} does not end with 'endobj' but with '{}'",
                         readObjNr, readObjGen, objOffset, endObjectKey);
             }
             else
             {
                 throw new IOException("Object (" + readObjNr + ":" + readObjGen
                         + ") at offset " + objOffset
                         + " does not end with 'endobj' but with '" + endObjectKey + "'");
             }
         }
         return parsedObject;
     }

     /**
      * Parse the object with the given key from the object stream with the given number.
      *
      * @param objstmObjNr the number of the offset stream
      * @param key the key of the object to be parsed
      * @return the parsed object
      * @throws IOException if something went wrong when parsing the object
      */
     protected COSBase parseObjectStreamObject(long objstmObjNr, COSObjectKey key) throws IOException
     {
         Map<COSObjectKey, COSBase> streamObjects = decompressedObjects.computeIfAbsent(objstmObjNr,
                 n -> new HashMap<>());
         // did we already read the compressed object stream?
         COSBase objectStreamObject = streamObjects.remove(key);
         if (objectStreamObject != null)
         {
             return objectStreamObject;
         }
         final COSObjectKey objKey = getObjectKey(objstmObjNr, 0);
         final COSBase objstmBaseObj = document.getObjectFromPool(objKey).getObject();
         if (objstmBaseObj instanceof COSStream)
         {
             try
             {
                 PDFObjectStreamParser parser = new PDFObjectStreamParser((COSStream) objstmBaseObj,
                         document);
                 Map<COSObjectKey, COSBase> allStreamObjects = parser.parseAllObjects();
                 objectStreamObject = allStreamObjects.remove(key);
                 allStreamObjects.entrySet().stream()
                         .forEach(e -> streamObjects.putIfAbsent(e.getKey(), e.getValue()));
             }
             catch (IOException ex)
             {
                 if (isLenient)
                 {
                     LOG.error("object stream {} could not be parsed due to an exception",
                             objstmObjNr, ex);
                 }
                 else
                 {
                     throw ex;
                 }
             }
         }
         return objectStreamObject;
     }

     /**
      * Returns length value referred to or defined in given object.
      */
     private COSNumber getLength(final COSBase lengthBaseObj) throws IOException
     {
         if (lengthBaseObj == null)
         {
             return null;
         }
         // maybe length was given directly
         if (lengthBaseObj instanceof COSNumber)
         {
             return (COSNumber) lengthBaseObj;
         }
         // length in referenced object
         if (lengthBaseObj instanceof COSObject)
         {
             COSObject lengthObj = (COSObject) lengthBaseObj;
             COSBase length = lengthObj.getObject();
             if (length == null)
             {
                 throw new IOException("Length object content was not read.");
             }
             if (COSNull.NULL == length)
             {
                 LOG.warn("Length object ({} {}) not found", lengthObj.getKey());
                 return null;
             }
             if (length instanceof COSNumber)
             {
                 return (COSNumber) length;
             }
             throw new IOException("Wrong type of referenced length object " + lengthObj + ": "
                     + length.getClass().getSimpleName());
         }
         throw new IOException(
                 "Wrong type of length object: " + lengthBaseObj.getClass().getSimpleName());
     }

     /**
      * This will read a COSStream from the input stream using length attribute within dictionary. If
      * length attribute is a indirect reference it is first resolved to get the stream length. This
      * means we copy stream data without testing for 'endstream' or 'endobj' and thus it is no
      * problem if these keywords occur within stream. We require 'endstream' to be found after
      * stream data is read.
      *
      * @param dic dictionary that goes with this stream.
      *
      * @return parsed pdf stream.
      *
      * @throws IOException if an error occurred reading the stream, like problems with reading
      * length attribute, stream does not end with 'endstream' after data read, stream too short etc.
      */
     protected COSStream parseCOSStream(COSDictionary dic) throws IOException
     {
         // read 'stream'; this was already tested in parseObjectsDynamically()
         readString();

         skipWhiteSpaces();

         /*
          * This needs to be dic.getItem because when we are parsing, the underlying object might still be null.
          */
         COSNumber streamLengthObj = getLength(dic.getItem(COSName.LENGTH));
         if (streamLengthObj == null)
         {
             if (isLenient)
             {
                 LOG.warn(
                         "The stream doesn't provide any stream length, using fallback readUntilEnd, at offset {}",
                         source.getPosition());
             }
             else
             {
                 throw new IOException("Missing length for stream.");
             }
         }


         long streamStartPosition = source.getPosition();
         long streamLength;
         if (streamLengthObj != null && validateStreamLength(streamLengthObj.longValue()))
         {
             streamLength = streamLengthObj.longValue();
             // skip stream
             source.seek(source.getPosition() + streamLengthObj.intValue());
         }
         else
         {
             streamLength = readUntilEndStream(new EndstreamFilterStream());
         }
         String endStream = readString();
         if (endStream.equals("endobj") && isLenient)
         {
             LOG.warn("stream ends with 'endobj' instead of 'endstream' at offset {}",
                     source.getPosition());
             // avoid follow-up warning about missing endobj
             source.rewind(ENDOBJ.length);
         }
         else if (endStream.length() > 9 && isLenient && endStream.startsWith(ENDSTREAM_STRING))
         {
             LOG.warn("stream ends with '{}' instead of 'endstream' at offset {}", endStream,
                     source.getPosition());
             // unread the "extra" bytes
             source.rewind(endStream.substring(9).getBytes(StandardCharsets.ISO_8859_1).length);
         }
         else if (!endStream.equals(ENDSTREAM_STRING))
         {
             throw new IOException(
                     "Error reading stream, expected='endstream' actual='"
                     + endStream + "' at offset " + source.getPosition());
         }
         return document.createCOSStream(dic, streamStartPosition, streamLength);
     }

     /**
      * This method will read through the current stream object until
      * we find the keyword "endstream" meaning we're at the end of this
      * object. Some pdf files, however, forget to write some endstream tags
      * and just close off objects with an "endobj" tag so we have to handle
      * this case as well.
      *
      * This method is optimized using buffered IO and reduced number of
      * byte compare operations.
      *
      * @param out  stream we write out to.
      *
      * @throws IOException if something went wrong
      */
     private long readUntilEndStream(final EndstreamFilterStream out) throws IOException
     {
         int bufSize;
         int charMatchCount = 0;
         byte[] keyw = ENDSTREAM;

         // last character position of shortest keyword ('endobj')
         final int quickTestOffset = 5;

         // read next chunk into buffer; already matched chars are added to beginning of buffer
         while ( ( bufSize = source.read( strmBuf, charMatchCount, STRMBUFLEN - charMatchCount ) ) > 0 )
         {
             bufSize += charMatchCount;

             int bIdx = charMatchCount;
             int quickTestIdx;

             // iterate over buffer, trying to find keyword match
             for ( int maxQuicktestIdx = bufSize - quickTestOffset; bIdx < bufSize; bIdx++ )
             {
                 // reduce compare operations by first test last character we would have to
                 // match if current one matches; if it is not a character from keywords
                 // we can move behind the test character; this shortcut is inspired by the
                 // Boyer-Moore string search algorithm and can reduce parsing time by approx. 20%
                 quickTestIdx = bIdx + quickTestOffset;
                 if (charMatchCount == 0 && quickTestIdx < maxQuicktestIdx)
                 {
                     final byte ch = strmBuf[quickTestIdx];
                     if ( ( ch > 't' ) || ( ch < 'a' ) )
                     {
                         // last character we would have to match if current character would match
                         // is not a character from keywords -> jump behind and start over
                         bIdx = quickTestIdx;
                         continue;
                     }
                 }

                 // could be negative - but we only compare to ASCII
                 final byte ch = strmBuf[bIdx];

                 if ( ch == keyw[ charMatchCount ] )
                 {
                     if ( ++charMatchCount == keyw.length )
                     {
                         // match found
                         bIdx++;
                         break;
                     }
                 }
                 else
                 {
                     if ( ( charMatchCount == 3 ) && ( ch == ENDOBJ[ charMatchCount ] ) )
                     {
                         // maybe ENDSTREAM is missing but we could have ENDOBJ
                         keyw = ENDOBJ;
                         charMatchCount++;
                     }
                     else
                     {
                         // no match; incrementing match start by 1 would be dumb since we already know
                         // matched chars depending on current char read we may already have beginning
                         // of a new match: 'e': first char matched; 'n': if we are at match position
                         // idx 7 we already read 'e' thus 2 chars matched for each other char we have
                         // to start matching first keyword char beginning with next read position
                         charMatchCount = ( ch == E ) ? 1 : ( ( ch == N ) && ( charMatchCount == 7 ) ) ? 2 : 0;
                         // search again for 'endstream'
                         keyw = ENDSTREAM;
                     }
                 }
             }

             int contentBytes = Math.max( 0, bIdx - charMatchCount );

             // write buffer content until first matched char to output stream
             if ( contentBytes > 0 )
             {
                 out.filter(strmBuf, 0, contentBytes);
             }
             if ( charMatchCount == keyw.length )
             {
                 // keyword matched; unread matched keyword (endstream/endobj) and following buffered content
                 source.rewind( bufSize - contentBytes );
                 break;
             }
             else
             {
                 // copy matched chars at start of buffer
                 System.arraycopy( keyw, 0, strmBuf, 0, charMatchCount );
             }
         }
         // this writes a lonely CR or drops trailing CR LF and LF
         return out.calculateLength();
     }

     private boolean validateStreamLength(long streamLength) throws IOException
     {
         boolean streamLengthIsValid = true;
         long originOffset = source.getPosition();
         long expectedEndOfStream = originOffset + streamLength;
         if (expectedEndOfStream > fileLen)
         {
             streamLengthIsValid = false;
             LOG.warn(
                     "The end of the stream is out of range, using workaround to read the stream, stream start position: {}, length: {}, expected end position: {}",
                     originOffset, streamLength, expectedEndOfStream);
         }
         else
         {
             source.seek(expectedEndOfStream);
             skipSpaces();
             if (!isString(ENDSTREAM))
             {
                 streamLengthIsValid = false;
                 LOG.warn(
                         "The end of the stream doesn't point to the correct offset, using workaround to read the stream, stream start position: {}, length: {}, expected end position: {}",
                         originOffset, streamLength, expectedEndOfStream);
             }
             source.seek(originOffset);
         }
         return streamLengthIsValid;
     }

     /**
      * Check if the cross reference table/stream can be found at the current offset.
      *
      * @param startXRefOffset
      * @return the revised offset
      * @throws IOException
      */
     private long checkXRefOffset(long startXRefOffset) throws IOException
     {
         // repair mode isn't available in non-lenient mode
         if (!isLenient)
         {
             return startXRefOffset;
         }
         source.seek(startXRefOffset);
         skipSpaces();
         if (isString(XREF_TABLE))
         {
             return startXRefOffset;
         }
         if (startXRefOffset > 0)
         {
             if (checkXRefStreamOffset(startXRefOffset))
             {
                 return startXRefOffset;
             }
             else
             {
                 return calculateXRefFixedOffset(startXRefOffset);
             }
         }
         // can't find a valid offset
         return -1;
     }

     /**
      * Check if the cross reference stream can be found at the current offset.
      *
      * @param startXRefOffset the expected start offset of the XRef stream
      * @return the revised offset
      * @throws IOException if something went wrong
      */
     private boolean checkXRefStreamOffset(long startXRefOffset) throws IOException
     {
         // repair mode isn't available in non-lenient mode
         if (!isLenient || startXRefOffset == 0)
         {
             return true;
         }
         // seek to offset-1
         source.seek(startXRefOffset-1);
         int nextValue = source.read();
         // the first character has to be a whitespace, and then a digit
         if (isWhitespace(nextValue))
         {
             skipSpaces();
             if (isDigit())
             {
                 try
                 {
                     // it's a XRef stream
                     readObjectNumber();
                     readGenerationNumber();
                     readExpectedString(OBJ_MARKER, true);
                     // check the dictionary to avoid false positives
                     COSDictionary dict = parseCOSDictionary(false);
                     source.seek(startXRefOffset);
                     if ("XRef".equals(dict.getNameAsString(COSName.TYPE)))
                     {
                         return true;
                     }
                 }
                 catch (IOException exception)
                 {
                     // there wasn't an object of a xref stream
                     LOG.debug("No Xref stream at given location {}", startXRefOffset, exception);
                     source.seek(startXRefOffset);
                 }
             }
         }
         return false;
     }

     /**
      * Try to find a fixed offset for the given xref table/stream.
      *
      * @param objectOffset the given offset where to look at
      * @return the fixed offset
      *
      * @throws IOException if something went wrong
      */
     private long calculateXRefFixedOffset(long objectOffset) throws IOException
     {
         if (objectOffset < 0)
         {
             LOG.error("Invalid object offset {} when searching for a xref table/stream",
                     objectOffset);
             return 0;
         }
         // search for the offset of the given xref table/stream among those found by a brute force search.
         long newOffset = getBruteForceParser().bfSearchForXRef(objectOffset);
         if (newOffset > -1)
         {
             LOG.debug("Fixed reference for xref table/stream {} -> {}", objectOffset, newOffset);
             return newOffset;
         }
         LOG.error("Can't find the object xref table/stream at offset {}", objectOffset);
         return 0;
     }

     private boolean validateXrefOffsets(Map<COSObjectKey, Long> xrefOffset) throws IOException
     {
         if (xrefOffset == null)
         {
             return true;
         }
         Map<COSObjectKey, COSObjectKey> correctedKeys = new HashMap<>();
         HashSet<COSObjectKey> validKeys = new HashSet<>();
         for (Entry<COSObjectKey, Long> objectEntry : xrefOffset.entrySet())
         {
             COSObjectKey objectKey = objectEntry.getKey();
             Long objectOffset = objectEntry.getValue();
             // a negative offset number represents an object number itself
             // see type 2 entry in xref stream
             if (objectOffset != null && objectOffset >= 0)
             {
                 COSObjectKey foundObjectKey = findObjectKey(objectKey, objectOffset, xrefOffset);
                 if (foundObjectKey == null)
                 {
                     LOG.debug(
                             "Stop checking xref offsets as at least one ({}) couldn't be dereferenced",
                             objectKey);
                     return false;
                 }
                 else if (foundObjectKey != objectKey)
                 {
                     // Generation was fixed - need to update map later, after iteration
                     correctedKeys.put(objectKey, foundObjectKey);
                 }
                 else
                 {
                     validKeys.add(objectKey);
                 }
             }
         }
         Map<COSObjectKey, Long> correctedPointers = new HashMap<>();
         for (Entry<COSObjectKey, COSObjectKey> correctedKeyEntry : correctedKeys.entrySet())
         {
             if (!validKeys.contains(correctedKeyEntry.getValue()))
             {
                 // Only replace entries, if the original entry does not point to a valid object
                 correctedPointers.put(correctedKeyEntry.getValue(),
                         xrefOffset.get(correctedKeyEntry.getKey()));
             }
         }
         // remove old invalid, as some might not be replaced
         correctedKeys.forEach((key, value) -> xrefOffset.remove(key));
         xrefOffset.putAll(correctedPointers);
         return true;
     }

     /**
      * Check the XRef table by dereferencing all objects and fixing the offset if necessary.
      *
      * @throws IOException if something went wrong.
      */
     private void checkXrefOffsets() throws IOException
     {
         Map<COSObjectKey, Long> xrefOffset = xrefTrailerResolver.getXrefTable();
         if (!validateXrefOffsets(xrefOffset))
         {
             Map<COSObjectKey, Long> bfCOSObjectKeyOffsets = getBruteForceParser()
                     .getBFCOSObjectOffsets();
             if (!bfCOSObjectKeyOffsets.isEmpty())
             {
                 LOG.debug("Replaced read xref table with the results of a brute force search");
                 xrefOffset.clear();
                 xrefOffset.putAll(bfCOSObjectKeyOffsets);
             }
         }
     }

     /**
      * Check if the given object can be found at the given offset. Returns the provided object key if everything is ok.
      * If the generation number differs it will be fixed and a new object key is returned.
      *
      * @param objectKey the key of object we are looking for
      * @param offset the offset where to look
      * @param xrefOffset a map with with all known xref entries
      * @return returns the found/fixed object key
      *
      * @throws IOException if something went wrong
      */
     private COSObjectKey findObjectKey(COSObjectKey objectKey, long offset,
             Map<COSObjectKey, Long> xrefOffset) throws IOException
     {
         // there can't be any object at the very beginning of a pdf
         if (offset < MINIMUM_SEARCH_OFFSET)
         {
             return null;
         }
         try
         {
             source.seek(offset);
             skipWhiteSpaces();
             if (source.getPosition() == offset)
             {
                 // ensure that at least one whitespace is skipped in front of the object number
                 source.seek(offset - 1);
                 if (source.getPosition() < offset)
 	            {
 	                if (!isDigit())
 	                {
 	                    // anything else but a digit may be some garbage of the previous object -> just ignore it
 	                    source.read();
 	                }
 	                else
 	                {
 	                    long current = source.getPosition();
 	                    source.seek(--current);
 	                    while (isDigit())
 	                        source.seek(--current);
 	                    long newObjNr = readObjectNumber();
 	                    int newGenNr = readGenerationNumber();
 	                    COSObjectKey newObjKey = new COSObjectKey(newObjNr, newGenNr);
 	                    Long existingOffset = xrefOffset.get(newObjKey);
 	                    // the found object number belongs to another uncompressed object at the same or nearby offset
 	                    // something has to be wrong
 	                    if (existingOffset != null && existingOffset > 0
 	                            && Math.abs(offset - existingOffset) < 10)
 	                    {
                             LOG.debug("Found the object {} instead of {} at offset {} - ignoring",
                                     newObjKey, objectKey, offset);
 	                        return null;
 	                    }
 	                    // something seems to be wrong but it's hard to determine what exactly -> simply continue
 	                    source.seek(offset);
 	                }
 	            }
             }
             // try to read the given object/generation number
             long foundObjectNumber = readObjectNumber();
             if (objectKey.getNumber() != foundObjectNumber)
             {
                 LOG.warn("found wrong object number. expected [{}] found [{}]",
                         objectKey.getNumber(), foundObjectNumber);
                 if (!isLenient)
                 {
                     return null;
                 }
                 else
                 {
                     objectKey = new COSObjectKey(foundObjectNumber, objectKey.getGeneration());
                 }
             }

             int genNumber = readGenerationNumber();
             // finally try to read the object marker
             readExpectedString(OBJ_MARKER, true);
             if (genNumber == objectKey.getGeneration())
             {
                 return objectKey;
             }
             else if (isLenient && genNumber > objectKey.getGeneration())
             {
                 return new COSObjectKey(objectKey.getNumber(), genNumber);
             }
         }
         catch (IOException exception)
         {
             // Swallow the exception, obviously there isn't any valid object number
             LOG.debug("No valid object at given location {} - ignoring", offset, exception);
         }
         return null;
     }

     private BruteForceParser getBruteForceParser() throws IOException
     {
     	if (bruteForceParser == null)
     	{
     		bruteForceParser = new BruteForceParser(source, document);
         }
     	return bruteForceParser;
     }

     /**
      * Check if all entries of the pages dictionary are present. Those which can't be dereferenced are removed.
      *
      * @param root the root dictionary of the pdf
      * @throws java.io.IOException if the page tree root is null
      */
     protected void checkPages(COSDictionary root) throws IOException
     {
         if (trailerWasRebuild)
         {
             // check if all page objects are dereferenced
             COSDictionary pages = root.getCOSDictionary(COSName.PAGES);
             if (pages != null)
             {
                 checkPagesDictionary(pages, new HashSet<>());
             }
         }
         if (root.getCOSDictionary(COSName.PAGES) == null)
         {
             throw new IOException("Page tree root must be a dictionary");
         }
     }

     private int checkPagesDictionary(COSDictionary pagesDict, Set<COSObject> set)
     {
         // check for kids
         COSArray kidsArray = pagesDict.getCOSArray(COSName.KIDS);
         int numberOfPages = 0;
         if (kidsArray != null)
         {
             List<? extends COSBase> kidsList = kidsArray.toList();
             for (COSBase kid : kidsList)
             {
                 if (!(kid instanceof COSObject) || set.contains((COSObject) kid))
                 {
                     kidsArray.remove(kid);
                     continue;
                 }
                 COSObject kidObject = (COSObject) kid;
                 COSBase kidBaseobject = kidObject.getObject();
                 // object wasn't dereferenced -> remove it
                 if (kidBaseobject == null || kidBaseobject.equals(COSNull.NULL))
                 {
                     LOG.warn("Removed null object {} from pages dictionary", kid);
                     kidsArray.remove(kid);
                 }
                 else if (kidBaseobject instanceof COSDictionary)
                 {
                     COSDictionary kidDictionary = (COSDictionary) kidBaseobject;
                     COSName type = kidDictionary.getCOSName(COSName.TYPE);
                     if (COSName.PAGES.equals(type))
                     {
                         // process nested pages dictionaries
                         set.add(kidObject);
                         numberOfPages += checkPagesDictionary(kidDictionary, set);
                     }
                     else if (COSName.PAGE.equals(type))
                     {
                         // count pages
                         numberOfPages++;
                     }
                 }
             }
         }
         // fix counter
         pagesDict.setInt(COSName.COUNT, numberOfPages);
         return numberOfPages;
     }

     /**
      * This will parse the startxref section from the stream. The startxref value is ignored.
      *
      * @return the startxref value or -1 on parsing error
      * @throws IOException If an IO error occurs.
      */
     private long parseStartXref() throws IOException
     {
         long startXref = -1;
         if (isString(STARTXREF))
         {
             readString();
             skipSpaces();
             // This integer is the byte offset of the first object referenced by the xref or xref stream
             startXref = readLong();
         }
         return startXref;
     }

     /**
      * Checks if the given string can be found at the current offset.
      *
      * @param string the bytes of the string to look for
      * @return true if the bytes are in place, false if not
      * @throws IOException if something went wrong
      */
     private boolean isString(byte[] string) throws IOException
     {
         boolean bytesMatching = true;
         long originOffset = source.getPosition();
         for (byte c : string)
         {
             if (source.read() != c)
             {
                 bytesMatching = false;
                 break;
             }
         }
         source.seek(originOffset);
         return bytesMatching;
     }

     /**
      * Checks if the given string can be found at the current offset.
      *
      * @param string the bytes of the string to look for
      * @return true if the bytes are in place, false if not
      * @throws IOException if something went wrong
      */
     protected boolean isString(char[] string) throws IOException
     {
         boolean bytesMatching = true;
         long originOffset = source.getPosition();
         for (char c : string)
         {
             if (source.read() != c)
             {
                 bytesMatching = false;
                 break;
             }
         }
         source.seek(originOffset);
         return bytesMatching;
     }

     /**
      * This will parse the trailer from the stream and add it to the state.
      *
      * @return false on parsing error
      * @throws IOException If an IO error occurs.
      */
     private boolean parseTrailer() throws IOException
     {
         // parse the last trailer.
         long trailerOffset = source.getPosition();
         // PDFBOX-1739 skip extra xref entries in RegisSTAR documents
         if (isLenient)
         {
             int nextCharacter = source.peek();
             while (nextCharacter != 't' && isDigit(nextCharacter))
             {
                 if (source.getPosition() == trailerOffset)
                 {
                     // warn only the first time
                     LOG.warn("Expected trailer object at offset {}, keep trying", trailerOffset);
                 }
                 readLine();
                 nextCharacter = source.peek();
             }
         }
         if(source.peek() != 't')
         {
             return false;
         }
         //read "trailer"
         long currentOffset = source.getPosition();
         String nextLine = readLine();
         if( !nextLine.trim().equals( "trailer" ) )
         {
             // in some cases the EOL is missing and the trailer immediately
             // continues with "<<" or with a blank character
             // even if this does not comply with PDF reference we want to support as many PDFs as possible
             // Acrobat reader can also deal with this.
             if (nextLine.startsWith("trailer"))
             {
                 // we can't just unread a portion of the read data as we don't know if the EOL consist of 1 or 2 bytes
                 int len = "trailer".length();
                 // jump back right after "trailer"
                 source.seek(currentOffset + len);
             }
             else
             {
                 return false;
             }
         }

         // in some cases the EOL is missing and the trailer continues with " <<"
         // even if this does not comply with PDF reference we want to support as many PDFs as possible
         // Acrobat reader can also deal with this.
         skipSpaces();

         COSDictionary parsedTrailer = parseCOSDictionary(true);
         xrefTrailerResolver.setTrailer( parsedTrailer );

         skipSpaces();
         return true;
     }

     /**
      * Parse the header of a pdf.
      *
      * @return true if a PDF header was found
      * @throws IOException if something went wrong
      */
     protected boolean parsePDFHeader() throws IOException
     {
         return parseHeader(PDF_HEADER, PDF_DEFAULT_VERSION);
     }

     /**
      * Parse the header of a fdf.
      *
      * @return true if a FDF header was found
      * @throws IOException if something went wrong
      */
     protected boolean parseFDFHeader() throws IOException
     {
         return parseHeader(FDF_HEADER, FDF_DEFAULT_VERSION);
     }

     private boolean parseHeader(String headerMarker, String defaultVersion) throws IOException
     {
         // read first line
         String header = readLine();
         // some pdf-documents are broken and the pdf-version is in one of the following lines
         if (!header.contains(headerMarker))
         {
             header = readLine();
             while (!header.contains(headerMarker))
             {
                 // if a line starts with a digit, it has to be the first one with data in it
                 if ((!header.isEmpty()) && (Character.isDigit(header.charAt(0))))
                 {
                     break;
                 }
                 header = readLine();
             }
         }

         // nothing found
         if (!header.contains(headerMarker))
         {
             source.seek(0);
             return false;
         }

         //sometimes there is some garbage in the header before the header
         //actually starts, so lets try to find the header first.
         int headerStart = header.indexOf( headerMarker );

         // greater than zero because if it is zero then there is no point of trimming
         if ( headerStart > 0 )
         {
             //trim off any leading characters
             header = header.substring(headerStart);
         }

         // This is used if there is garbage after the header on the same line
         if (header.startsWith(headerMarker) && !header.matches(headerMarker + "\\d.\\d"))
         {
             if (header.length() < headerMarker.length() + 3)
             {
                 // No version number at all, set to 1.4 as default
                 header = headerMarker + defaultVersion;
                 LOG.debug("No version found, set to {} as default.", defaultVersion);
             }
             else
             {
                 String headerGarbage = header.substring(headerMarker.length() + 3) + "\n";
                 header = header.substring(0, headerMarker.length() + 3);
                 source.rewind(headerGarbage.getBytes(StandardCharsets.ISO_8859_1).length);
             }
         }
         float headerVersion = -1;
         try
         {
             String[] headerParts = header.split("-");
             if (headerParts.length == 2)
             {
                 headerVersion = Float.parseFloat(headerParts[1]);
             }
         }
         catch (NumberFormatException exception)
         {
             LOG.debug("Can't parse the header version.", exception);
         }
         if (headerVersion < 0)
         {
             if (isLenient)
             {
                 headerVersion = 1.7f;
             }
             else
             {
                 throw new IOException("Error getting header version: " + header);
             }
         }
         document.setVersion(headerVersion);
         // rewind
         source.seek(0);
         return true;
     }

     /**
      * This will parse the xref table from the stream and add it to the state
      * The XrefTable contents are ignored.
      * @param startByteOffset the offset to start at
      * @return false on parsing error
      * @throws IOException If an IO error occurs.
      */
     protected boolean parseXrefTable(long startByteOffset) throws IOException
     {
         if(source.peek() != 'x')
         {
             return false;
         }
         String xref = readString();
         if( !xref.trim().equals( "xref" ) )
         {
             return false;
         }

         // check for trailer after xref
         String str = readString();
         byte[] b = str.getBytes(StandardCharsets.ISO_8859_1);
         source.rewind(b.length);

         // signal start of new XRef
         xrefTrailerResolver.nextXrefObj( startByteOffset, XRefType.TABLE );

         if (str.startsWith("trailer"))
         {
             LOG.warn("skipping empty xref table");
             return false;
         }

         // Xref tables can have multiple sections. Each starts with a starting object id and a count.
         while(true)
         {
             String currentLine = readLine();
             String[] splitString = StringUtil.splitOnSpace(currentLine);
             if (splitString.length != 2)
             {
                 LOG.warn("Unexpected XRefTable Entry: {}", currentLine);
                 return false;
             }
             // first obj id
             long currObjID;
             try
             {
                 currObjID = Long.parseLong(splitString[0]);
             }
             catch (NumberFormatException exception)
             {
                 LOG.warn("XRefTable: invalid ID for the first object: {}", currentLine);
                 return false;
             }

             // the number of objects in the xref table
             int count = 0;
             try
             {
                 count = Integer.parseInt(splitString[1]);
             }
             catch (NumberFormatException exception)
             {
                 LOG.warn("XRefTable: invalid number of objects: {}", currentLine);
                 return false;
             }

             skipSpaces();
             for(int i = 0; i < count; i++)
             {
                 if (source.isEOF() || isEndOfName(source.peek()))
                 {
                     break;
                 }
                 if(source.peek() == 't')
                 {
                     break;
                 }
                 //Ignore table contents
                 currentLine = readLine();
                 splitString = StringUtil.splitOnSpace(currentLine);
                 if (splitString.length < 3)
                 {
                     LOG.warn("invalid xref line: {}", currentLine);
                     break;
                 }
                 /* This supports the corrupt table as reported in
                  * PDFBOX-474 (XXXX XXX XX n) */
                 if(splitString[splitString.length-1].equals("n"))
                 {
                     try
                     {
                         long currOffset = Long.parseLong(splitString[0]);
                         // skip 0 offsets
                         if (currOffset > 0)
                         {
                             int currGenID = Integer.parseInt(splitString[1]);
                             COSObjectKey objKey = new COSObjectKey(currObjID, currGenID);
                             xrefTrailerResolver.setXRef(objKey, currOffset);
                         }
                     }
                     catch (IllegalArgumentException e)
                     {
                         throw new IOException(e);
                     }
                 }
                 else if(!splitString[2].equals("f"))
                 {
                     throw new IOException("Corrupt XRefTable Entry - ObjID:" + currObjID);
                 }
                 currObjID++;
                 skipSpaces();
             }
             skipSpaces();
             if (!isDigit())
             {
                 break;
             }
         }
         return true;
     }

     /**
      * This will get the encryption dictionary. The document must be parsed before this is called.
      *
      * @return The encryption dictionary of the document that was parsed.
      *
      * @throws IOException If there is an error getting the document.
      */
     protected PDEncryption getEncryption() throws IOException
     {
         if (document == null)
         {
             throw new IOException(
                     "You must parse the document first before calling getEncryption()");
         }
         return encryption;
     }

     /**
      * This will get the AccessPermission. The document must be parsed before this is called.
      *
      * @return The access permission of document that was parsed.
      *
      * @throws IOException If there is an error getting the document.
      */
     protected AccessPermission getAccessPermission() throws IOException
     {
         if (document == null)
         {
             throw new IOException(
                     "You must parse the document first before calling getAccessPermission()");
         }
         return accessPermission;
     }

     /**
      * Prepare for decryption.
      *
      * @throws InvalidPasswordException If the password is incorrect.
      * @throws IOException if something went wrong
      */
     protected void prepareDecryption() throws IOException
     {
         if (encryption != null)
         {
             return;
         }
         COSDictionary encryptionDictionary = document.getEncryptionDictionary();
         if (encryptionDictionary == null)
         {
             return;
         }

         try
         {
             encryption = new PDEncryption(encryptionDictionary);
             DecryptionMaterial decryptionMaterial;
             if (keyStoreInputStream != null)
             {
                 KeyStore ks = KeyStore.getInstance("PKCS12");
                 ks.load(keyStoreInputStream, password.toCharArray());
                 decryptionMaterial = new PublicKeyDecryptionMaterial(ks, keyAlias, password);
             }
             else
             {
                 decryptionMaterial = new StandardDecryptionMaterial(password);
             }

             securityHandler = encryption.getSecurityHandler();
             securityHandler.prepareForDecryption(encryption, document.getDocumentID(),
                     decryptionMaterial);
             accessPermission = securityHandler.getCurrentAccessPermission();
         }
         catch (IOException e)
         {
             throw e;
         }
         catch (GeneralSecurityException e)
         {
             throw new IOException("Error (" + e.getClass().getSimpleName()
                     + ") while creating security handler for decryption", e);
         }
         finally
         {
             if (keyStoreInputStream != null)
             {
                 IOUtils.closeQuietly(keyStoreInputStream);
             }
         }
     }

 }