| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| package org.apache.pdfbox.cos; |
| |
| import java.io.File; |
| import java.io.IOException; |
| import java.util.ArrayList; |
| import java.util.HashMap; |
| import java.util.List; |
| import java.util.Map; |
| |
| import org.apache.commons.logging.Log; |
| import org.apache.commons.logging.LogFactory; |
| import org.apache.pdfbox.exceptions.COSVisitorException; |
| import org.apache.pdfbox.io.RandomAccess; |
| import org.apache.pdfbox.io.RandomAccessBuffer; |
| import org.apache.pdfbox.io.RandomAccessFile; |
| import org.apache.pdfbox.pdfparser.PDFObjectStreamParser; |
| import org.apache.pdfbox.pdmodel.interactive.digitalsignature.SignatureInterface; |
| import org.apache.pdfbox.persistence.util.COSObjectKey; |
| |
| /** |
| * This is the in-memory representation of the PDF document. You need to call |
| * close() on this object when you are done using it!! |
| * |
| * @author <a href="ben@benlitchfield.com">Ben Litchfield</a> |
| * @version $Revision: 1.28 $ |
| */ |
| public class COSDocument extends COSBase |
| { |
| |
| /** |
| * Log instance. |
| */ |
| private static final Log log = LogFactory.getLog(COSDocument.class); |
| |
| private float version; |
| |
| /** |
| * Maps ObjectKeys to a COSObject. Note that references to these objects |
| * are also stored in COSDictionary objects that map a name to a specific object. |
| */ |
| private final Map<COSObjectKey, COSObject> objectPool = |
| new HashMap<COSObjectKey, COSObject>(); |
| |
| /** |
| * Maps object and generation ids to object byte offsets. |
| */ |
| private final Map<COSObjectKey, Integer> xrefTable = |
| new HashMap<COSObjectKey, Integer>(); |
| |
| /** |
| * Document trailer dictionary. |
| */ |
| private COSDictionary trailer; |
| |
| /** |
| * Document signature dictionary |
| */ |
| private COSDictionary signDictionary = null; |
| |
| /** |
| * Some doc |
| */ |
| private SignatureInterface signatureInterface; |
| |
| /** |
| * This file will store the streams in order to conserve memory. |
| */ |
| private final RandomAccess scratchFile; |
| |
| private final File tmpFile; |
| |
| private String headerString = "%PDF-1.4"; |
| |
| private boolean warnMissingClose = true; |
| |
| private int startXref; |
| |
| private boolean closed = false; |
| |
| /** |
| * Flag to skip malformed or otherwise unparseable input where possible. |
| */ |
| private final boolean forceParsing; |
| |
| /** |
| * Constructor that will use the given random access file for storage |
| * of the PDF streams. The client of this method is responsible for |
| * deleting the storage if necessary that this file will write to. The |
| * close method will close the file though. |
| * |
| * @param scratchFile the random access file to use for storage |
| * @param forceParsing flag to skip malformed or otherwise unparseable |
| * document content where possible |
| */ |
| public COSDocument(RandomAccess scratchFile, boolean forceParsing) { |
| this.scratchFile = scratchFile; |
| this.tmpFile = null; |
| this.forceParsing = forceParsing; |
| } |
| |
| /** |
| * Constructor that will use a temporary file in the given directory |
| * for storage of the PDF streams. The temporary file is automatically |
| * removed when this document gets closed. |
| * |
| * @param scratchDir directory for the temporary file, |
| * or <code>null</code> to use the system default |
| * @param forceParsing flag to skip malformed or otherwise unparseable |
| * document content where possible |
| */ |
| public COSDocument(File scratchDir, boolean forceParsing) |
| throws IOException { |
| this.tmpFile = File.createTempFile("pdfbox-", ".tmp", scratchDir); |
| this.scratchFile = new RandomAccessFile(tmpFile, "rw"); |
| this.forceParsing = forceParsing; |
| } |
| |
| /** |
| * Constructor. Uses memory to store stream. |
| * |
| * @throws IOException If there is an error creating the tmp file. |
| */ |
| public COSDocument() throws IOException { |
| this(new RandomAccessBuffer(), false); |
| } |
| |
| /** |
| * Constructor that will create a create a scratch file in the |
| * following directory. |
| * |
| * @param scratchDir The directory to store a scratch file. |
| * |
| * @throws IOException If there is an error creating the tmp file. |
| */ |
| public COSDocument(File scratchDir) throws IOException { |
| this(scratchDir, false); |
| } |
| |
| /** |
| * Constructor that will use the following random access file for storage |
| * of the PDF streams. The client of this method is responsible for deleting |
| * the storage if necessary that this file will write to. The close method |
| * will close the file though. |
| * |
| * @param file The random access file to use for storage. |
| */ |
| public COSDocument(RandomAccess file) { |
| this(file, false); |
| } |
| |
| /** |
| * This will get the scratch file for this document. |
| * |
| * @return The scratch file. |
| */ |
| public RandomAccess getScratchFile() |
| { |
| return scratchFile; |
| } |
| |
| /** |
| * This will get the first dictionary object by type. |
| * |
| * @param type The type of the object. |
| * |
| * @return This will return an object with the specified type. |
| * @throws IOException If there is an error getting the object |
| */ |
| public COSObject getObjectByType( String type ) throws IOException |
| { |
| return getObjectByType( COSName.getPDFName( type ) ); |
| } |
| |
| /** |
| * This will get the first dictionary object by type. |
| * |
| * @param type The type of the object. |
| * |
| * @return This will return an object with the specified type. |
| * @throws IOException If there is an error getting the object |
| */ |
| public COSObject getObjectByType( COSName type ) throws IOException |
| { |
| for( COSObject object : objectPool.values() ) |
| { |
| |
| COSBase realObject = object.getObject(); |
| if( realObject instanceof COSDictionary ) |
| { |
| try |
| { |
| COSDictionary dic = (COSDictionary)realObject; |
| COSName objectType = (COSName)dic.getItem( COSName.TYPE ); |
| if( objectType != null && objectType.equals( type ) ) |
| { |
| return object; |
| } |
| } |
| catch (ClassCastException e) |
| { |
| log.warn(e, e); |
| } |
| } |
| } |
| return null; |
| } |
| |
| /** |
| * This will get all dictionary objects by type. |
| * |
| * @param type The type of the object. |
| * |
| * @return This will return an object with the specified type. |
| * @throws IOException If there is an error getting the object |
| */ |
| public List<COSObject> getObjectsByType( String type ) throws IOException |
| { |
| return getObjectsByType( COSName.getPDFName( type ) ); |
| } |
| |
| /** |
| * This will get a dictionary object by type. |
| * |
| * @param type The type of the object. |
| * |
| * @return This will return an object with the specified type. |
| * @throws IOException If there is an error getting the object |
| */ |
| public List<COSObject> getObjectsByType( COSName type ) throws IOException |
| { |
| List<COSObject> retval = new ArrayList<COSObject>(); |
| for( COSObject object : objectPool.values() ) |
| { |
| COSBase realObject = object.getObject(); |
| if( realObject instanceof COSDictionary ) |
| { |
| try |
| { |
| COSDictionary dic = (COSDictionary)realObject; |
| COSName objectType = (COSName)dic.getItem( COSName.TYPE ); |
| if( objectType != null && objectType.equals( type ) ) |
| { |
| retval.add( object ); |
| } |
| } |
| catch (ClassCastException e) |
| { |
| log.warn(e, e); |
| } |
| } |
| } |
| return retval; |
| } |
| |
| /** |
| * This will print contents to stdout. |
| */ |
| public void print() |
| { |
| for( COSObject object : objectPool.values() ) |
| { |
| System.out.println( object); |
| } |
| } |
| |
| /** |
| * This will set the version of this PDF document. |
| * |
| * @param versionValue The version of the PDF document. |
| */ |
| public void setVersion( float versionValue ) |
| { |
| version = versionValue; |
| } |
| |
| /** |
| * This will get the version of this PDF document. |
| * |
| * @return This documents version. |
| */ |
| public float getVersion() |
| { |
| return version; |
| } |
| |
| /** |
| * This will tell if this is an encrypted document. |
| * |
| * @return true If this document is encrypted. |
| */ |
| public boolean isEncrypted() |
| { |
| boolean encrypted = false; |
| if( trailer != null ) |
| { |
| encrypted = trailer.getDictionaryObject( COSName.ENCRYPT ) != null; |
| } |
| return encrypted; |
| } |
| |
| /** |
| * This will get the encryption dictionary if the document is encrypted or null |
| * if the document is not encrypted. |
| * |
| * @return The encryption dictionary. |
| */ |
| public COSDictionary getEncryptionDictionary() |
| { |
| return (COSDictionary)trailer.getDictionaryObject( COSName.ENCRYPT ); |
| } |
| |
| public SignatureInterface getSignatureInterface() { |
| return signatureInterface; |
| } |
| |
| /** |
| * This will set the encryption dictionary, this should only be called when |
| * encrypting the document. |
| * |
| * @param encDictionary The encryption dictionary. |
| */ |
| public void setEncryptionDictionary( COSDictionary encDictionary ) |
| { |
| trailer.setItem( COSName.ENCRYPT, encDictionary ); |
| } |
| |
| public COSDictionary getLastSignatureDictionary() throws IOException { |
| if (signDictionary == null) |
| { |
| COSObject documentCatalog = getCatalog(); |
| if (documentCatalog != null) |
| { |
| COSDictionary acroForm = (COSDictionary)documentCatalog.getDictionaryObject(COSName.ACRO_FORM); |
| if (acroForm !=null) |
| { |
| COSArray fields = (COSArray)acroForm.getDictionaryObject(COSName.FIELDS); |
| for ( Object object : fields ) |
| { |
| COSObject dict = (COSObject)object; |
| if(dict.getItem(COSName.FT).equals(COSName.SIG)) |
| { |
| COSBase dictionaryObject = dict.getDictionaryObject(COSName.V); |
| |
| if (dictionaryObject != null) |
| { |
| signDictionary = (COSDictionary)dictionaryObject; |
| } |
| } |
| } |
| } |
| } |
| } |
| return signDictionary; |
| } |
| |
| /** |
| * This will get the document ID. |
| * |
| * @return The document id. |
| */ |
| public COSArray getDocumentID() |
| { |
| return (COSArray) getTrailer().getItem(COSName.ID); |
| } |
| |
| /** |
| * This will set the document ID. |
| * |
| * @param id The document id. |
| */ |
| public void setDocumentID( COSArray id ) |
| { |
| getTrailer().setItem(COSName.ID, id); |
| } |
| |
| public void setSignatureInterface(SignatureInterface signatureInterface) { |
| this.signatureInterface = signatureInterface; |
| } |
| |
| /** |
| * This will get the document catalog. |
| * |
| * Maybe this should move to an object at PDFEdit level |
| * |
| * @return catalog is the root of all document activities |
| * |
| * @throws IOException If no catalog can be found. |
| */ |
| public COSObject getCatalog() throws IOException |
| { |
| COSObject catalog = getObjectByType( COSName.CATALOG ); |
| if( catalog == null ) |
| { |
| throw new IOException( "Catalog cannot be found" ); |
| } |
| return catalog; |
| } |
| |
| /** |
| * This will get a list of all available objects. |
| * |
| * @return A list of all objects. |
| */ |
| public List<COSObject> getObjects() |
| { |
| return new ArrayList<COSObject>(objectPool.values()); |
| } |
| |
| /** |
| * This will get the document trailer. |
| * |
| * @return the document trailer dict |
| */ |
| public COSDictionary getTrailer() |
| { |
| return trailer; |
| } |
| |
| /** |
| * // MIT added, maybe this should not be supported as trailer is a persistence construct. |
| * This will set the document trailer. |
| * |
| * @param newTrailer the document trailer dictionary |
| */ |
| public void setTrailer(COSDictionary newTrailer) |
| { |
| trailer = newTrailer; |
| } |
| |
| /** |
| * visitor pattern double dispatch method. |
| * |
| * @param visitor The object to notify when visiting this object. |
| * @return any object, depending on the visitor implementation, or null |
| * @throws COSVisitorException If an error occurs while visiting this object. |
| */ |
| @Override |
| public Object accept(ICOSVisitor visitor) throws COSVisitorException |
| { |
| return visitor.visitFromDocument( this ); |
| } |
| |
| /** |
| * This will close all storage and delete the tmp files. |
| * |
| * @throws IOException If there is an error close resources. |
| */ |
| public void close() throws IOException |
| { |
| if (!closed) { |
| scratchFile.close(); |
| if (tmpFile != null) { |
| tmpFile.delete(); |
| } |
| closed = true; |
| } |
| } |
| |
| /** |
| * Warn the user in the finalizer if he didn't close the PDF document. The method also |
| * closes the document just in case, to avoid abandoned temporary files. It's still a good |
| * idea for the user to close the PDF document at the earliest possible to conserve resources. |
| * @throws IOException if an error occurs while closing the temporary files |
| */ |
| @Override |
| protected void finalize() throws IOException |
| { |
| if (!closed) { |
| if (warnMissingClose) { |
| log.warn( "Warning: You did not close a PDF Document" ); |
| } |
| close(); |
| } |
| } |
| |
| /** |
| * Controls whether this instance shall issue a warning if the PDF document wasn't closed |
| * properly through a call to the {@link #close()} method. If the PDF document is held in |
| * a cache governed by soft references it is impossible to reliably close the document |
| * before the warning is raised. By default, the warning is enabled. |
| * @param warn true enables the warning, false disables it. |
| */ |
| public void setWarnMissingClose(boolean warn) |
| { |
| this.warnMissingClose = warn; |
| } |
| |
| /** |
| * @return Returns the headerString. |
| */ |
| public String getHeaderString() |
| { |
| return headerString; |
| } |
| /** |
| * @param header The headerString to set. |
| */ |
| public void setHeaderString(String header) |
| { |
| headerString = header; |
| } |
| |
| /** |
| * This method will search the list of objects for types of ObjStm. If it finds |
| * them then it will parse out all of the objects from the stream that is contains. |
| * |
| * @throws IOException If there is an error parsing the stream. |
| */ |
| public void dereferenceObjectStreams() throws IOException |
| { |
| for( COSObject objStream : getObjectsByType( COSName.OBJ_STM ) ) |
| { |
| COSStream stream = (COSStream)objStream.getObject(); |
| PDFObjectStreamParser parser = |
| new PDFObjectStreamParser(stream, this, forceParsing); |
| parser.parse(); |
| for( COSObject next : parser.getObjects() ) |
| { |
| COSObjectKey key = new COSObjectKey( next ); |
| if(objectPool.get(key) == null || objectPool.get(key).getObject() == null) |
| { |
| COSObject obj = getObjectFromPool(key); |
| obj.setObject(next.getObject()); |
| } |
| } |
| } |
| } |
| |
| /** |
| * This will get an object from the pool. |
| * |
| * @param key The object key. |
| * |
| * @return The object in the pool or a new one if it has not been parsed yet. |
| * |
| * @throws IOException If there is an error getting the proxy object. |
| */ |
| public COSObject getObjectFromPool(COSObjectKey key) throws IOException |
| { |
| COSObject obj = null; |
| if( key != null ) |
| { |
| obj = objectPool.get(key); |
| } |
| if (obj == null) |
| { |
| // this was a forward reference, make "proxy" object |
| obj = new COSObject(null); |
| if( key != null ) |
| { |
| obj.setObjectNumber( COSInteger.get( key.getNumber() ) ); |
| obj.setGenerationNumber( COSInteger.get( key.getGeneration() ) ); |
| objectPool.put(key, obj); |
| } |
| } |
| return obj; |
| } |
| |
| /** |
| * Removes an object from the object pool. |
| * @param key the object key |
| * @return the object that was removed or null if the object was not found |
| */ |
| public COSObject removeObject(COSObjectKey key) |
| { |
| COSObject obj = objectPool.remove(key); |
| return obj; |
| } |
| |
| /** |
| * Populate XRef HashMap with given values. |
| * Each entry maps ObjectKeys to byte offsets in the file. |
| * @param _xrefTable xref table entries to be added |
| */ |
| public void addXRefTable( Map<COSObjectKey, Integer> xrefTable ) |
| { |
| this.xrefTable.putAll( xrefTable ); |
| } |
| |
| /** |
| * Returns the xrefTable which is a mapping of ObjectKeys |
| * to byte offsets in the file. |
| * @return mapping of ObjectsKeys to byte offsets |
| */ |
| public Map<COSObjectKey, Integer> getXrefTable() |
| { |
| return xrefTable; |
| } |
| |
| /** |
| * This method set the startxref value of the document. This will only |
| * be needed for incremental updates. |
| * |
| * @param readInt |
| */ |
| public void setStartXref(int startXref) |
| { |
| this.startXref = startXref; |
| } |
| |
| /** |
| * Return the startXref Position of the parsed document. This will only be needed for incremental updates. |
| * |
| * @return a int with the old position of the startxref |
| */ |
| public int getStartXref() |
| { |
| return startXref; |
| } |
| } |