blob: 6f5299eade0ccf767310d57eb82bf6903714e361 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.pdfbox.pdmodel;
import java.awt.Point;
import java.awt.image.DataBuffer;
import java.awt.image.Raster;
import java.awt.image.WritableRaster;
import java.io.BufferedOutputStream;
import java.io.Closeable;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Set;
import java.util.stream.Collectors;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.fontbox.ttf.TrueTypeFont;
import org.apache.pdfbox.cos.COSArray;
import org.apache.pdfbox.cos.COSBase;
import org.apache.pdfbox.cos.COSDictionary;
import org.apache.pdfbox.cos.COSDocument;
import org.apache.pdfbox.cos.COSInteger;
import org.apache.pdfbox.cos.COSName;
import org.apache.pdfbox.cos.COSObject;
import org.apache.pdfbox.cos.COSUpdateInfo;
import org.apache.pdfbox.io.IOUtils;
import org.apache.pdfbox.io.MemoryUsageSetting;
import org.apache.pdfbox.io.RandomAccessRead;
import org.apache.pdfbox.pdfwriter.COSWriter;
import org.apache.pdfbox.pdmodel.common.COSArrayList;
import org.apache.pdfbox.pdmodel.common.PDRectangle;
import org.apache.pdfbox.pdmodel.common.PDStream;
import org.apache.pdfbox.pdmodel.encryption.AccessPermission;
import org.apache.pdfbox.pdmodel.encryption.PDEncryption;
import org.apache.pdfbox.pdmodel.encryption.ProtectionPolicy;
import org.apache.pdfbox.pdmodel.encryption.SecurityHandler;
import org.apache.pdfbox.pdmodel.encryption.SecurityHandlerFactory;
import org.apache.pdfbox.pdmodel.font.PDFont;
import org.apache.pdfbox.pdmodel.graphics.color.PDDeviceRGB;
import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotation;
import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotationWidget;
import org.apache.pdfbox.pdmodel.interactive.annotation.PDAppearanceDictionary;
import org.apache.pdfbox.pdmodel.interactive.annotation.PDAppearanceStream;
import org.apache.pdfbox.pdmodel.interactive.digitalsignature.ExternalSigningSupport;
import org.apache.pdfbox.pdmodel.interactive.digitalsignature.PDSignature;
import org.apache.pdfbox.pdmodel.interactive.digitalsignature.SignatureInterface;
import org.apache.pdfbox.pdmodel.interactive.digitalsignature.SignatureOptions;
import org.apache.pdfbox.pdmodel.interactive.digitalsignature.SigningSupport;
import org.apache.pdfbox.pdmodel.interactive.form.PDAcroForm;
import org.apache.pdfbox.pdmodel.interactive.form.PDField;
import org.apache.pdfbox.pdmodel.interactive.form.PDSignatureField;
/**
* This is the in-memory representation of the PDF document.
* The #close() method must be called once the document is no longer needed.
*
* @author Ben Litchfield
*/
public class PDDocument implements Closeable
{
/**
* For signing: large reserve byte range used as placeholder in the saved PDF until the actual
* length of the PDF is known. You'll need to fetch (with
* {@link PDSignature#getByteRange()} ) and reassign this yourself (with
* {@link PDSignature#setByteRange(int[])} ) only if you call
* {@link #saveIncrementalForExternalSigning(java.io.OutputStream) saveIncrementalForExternalSigning()}
* twice.
*/
private static final int[] RESERVE_BYTE_RANGE = new int[] { 0, 1000000000, 1000000000, 1000000000 };
private static final Log LOG = LogFactory.getLog(PDDocument.class);
/**
* avoid concurrency issues with PDDeviceRGB
*/
static
{
try
{
WritableRaster raster = Raster.createBandedRaster(DataBuffer.TYPE_BYTE, 1, 1, 3, new Point(0, 0));
PDDeviceRGB.INSTANCE.toRGBImage(raster);
}
catch (IOException ex)
{
LOG.debug("voodoo error", ex);
}
}
private final COSDocument document;
// cached values
private PDDocumentInformation documentInformation;
private PDDocumentCatalog documentCatalog;
// the encryption will be cached here. When the document is decrypted then
// the COSDocument will not have an "Encrypt" dictionary anymore and this object must be used
private PDEncryption encryption;
// holds a flag which tells us if we should remove all security from this documents.
private boolean allSecurityToBeRemoved;
// keep tracking customized documentId for the trailer. If null, a new id will be generated
// this ID doesn't represent the actual documentId from the trailer
private Long documentId;
// the pdf to be read
private final RandomAccessRead pdfSource;
// the access permissions of the document
private AccessPermission accessPermission;
// fonts to subset before saving
private final Set<PDFont> fontsToSubset = new HashSet<>();
// fonts to close when closing document
private final Set<TrueTypeFont> fontsToClose = new HashSet<>();
// Signature interface
private SignatureInterface signInterface;
// helper class used to create external signature
private SigningSupport signingSupport;
// document-wide cached resources
private ResourceCache resourceCache = new DefaultResourceCache();
// to make sure only one signature is added
private boolean signatureAdded = false;
/**
* Creates an empty PDF document.
* You need to add at least one page for the document to be valid.
*/
public PDDocument()
{
this(MemoryUsageSetting.setupMainMemoryOnly());
}
/**
* Creates an empty PDF document.
* You need to add at least one page for the document to be valid.
*
* @param memUsageSetting defines how memory is used for buffering PDF streams
*/
public PDDocument(MemoryUsageSetting memUsageSetting)
{
document = new COSDocument(memUsageSetting);
pdfSource = null;
// First we need a trailer
COSDictionary trailer = new COSDictionary();
document.setTrailer(trailer);
// Next we need the root dictionary.
COSDictionary rootDictionary = new COSDictionary();
trailer.setItem(COSName.ROOT, rootDictionary);
rootDictionary.setItem(COSName.TYPE, COSName.CATALOG);
rootDictionary.setItem(COSName.VERSION, COSName.getPDFName("1.4"));
// next we need the pages tree structure
COSDictionary pages = new COSDictionary();
rootDictionary.setItem(COSName.PAGES, pages);
pages.setItem(COSName.TYPE, COSName.PAGES);
COSArray kidsArray = new COSArray();
pages.setItem(COSName.KIDS, kidsArray);
pages.setItem(COSName.COUNT, COSInteger.ZERO);
}
/**
* Constructor that uses an existing document. The COSDocument that is passed in must be valid.
*
* @param doc The COSDocument that this document wraps.
*/
public PDDocument(COSDocument doc)
{
this(doc, null);
}
/**
* Constructor that uses an existing document. The COSDocument that is passed in must be valid.
*
* @param doc The COSDocument that this document wraps.
* @param source input representing the pdf
*/
public PDDocument(COSDocument doc, RandomAccessRead source)
{
this(doc, source, null);
}
/**
* Constructor that uses an existing document. The COSDocument that is passed in must be valid.
*
* @param doc The COSDocument that this document wraps.
* @param source input representing the pdf
* @param permission he access permissions of the pdf
*
*/
public PDDocument(COSDocument doc, RandomAccessRead source, AccessPermission permission)
{
document = doc;
pdfSource = source;
accessPermission = permission;
}
/**
* This will add a page to the document. This is a convenience method, that will add the page to the root of the
* hierarchy and set the parent of the page to the root.
*
* @param page The page to add to the document.
*/
public void addPage(PDPage page)
{
getPages().add(page);
}
/**
* Add parameters of signature to be created externally using default signature options. See
* {@link #saveIncrementalForExternalSigning(OutputStream)} method description on external
* signature creation scenario details.
* <p>
* Only one signature may be added in a document. To sign several times,
* load document, add signature, save incremental and close again.
*
* @param sigObject is the PDSignatureField model
* @throws IOException if there is an error creating required fields
* @throws IllegalStateException if one attempts to add several signature
* fields.
*/
public void addSignature(PDSignature sigObject) throws IOException
{
addSignature(sigObject, new SignatureOptions());
}
/**
* Add parameters of signature to be created externally. See
* {@link #saveIncrementalForExternalSigning(OutputStream)} method description on external
* signature creation scenario details.
* <p>
* Only one signature may be added in a document. To sign several times,
* load document, add signature, save incremental and close again.
*
* @param sigObject is the PDSignatureField model
* @param options signature options
* @throws IOException if there is an error creating required fields
* @throws IllegalStateException if one attempts to add several signature
* fields.
*/
public void addSignature(PDSignature sigObject, SignatureOptions options) throws IOException
{
addSignature(sigObject, null, options);
}
/**
* Add a signature to be created using the instance of given interface.
* <p>
* Only one signature may be added in a document. To sign several times,
* load document, add signature, save incremental and close again.
*
* @param sigObject is the PDSignatureField model
* @param signatureInterface is an interface whose implementation provides
* signing capabilities. Can be null if external signing if used.
* @throws IOException if there is an error creating required fields
* @throws IllegalStateException if one attempts to add several signature
* fields.
*/
public void addSignature(PDSignature sigObject, SignatureInterface signatureInterface) throws IOException
{
addSignature(sigObject, signatureInterface, new SignatureOptions());
}
/**
* This will add a signature to the document. If the 0-based page number in the options
* parameter is smaller than 0 or larger than max, the nearest valid page number will be used
* (i.e. 0 or max) and no exception will be thrown.
* <p>
* Only one signature may be added in a document. To sign several times,
* load document, add signature, save incremental and close again.
*
* @param sigObject is the PDSignatureField model
* @param signatureInterface is an interface whose implementation provides
* signing capabilities. Can be null if external signing if used.
* @param options signature options
* @throws IOException if there is an error creating required fields
* @throws IllegalStateException if one attempts to add several signature
* fields.
*/
public void addSignature(PDSignature sigObject, SignatureInterface signatureInterface,
SignatureOptions options) throws IOException
{
if (signatureAdded)
{
throw new IllegalStateException("Only one signature may be added in a document");
}
signatureAdded = true;
// Reserve content
// We need to reserve some space for the signature. Some signatures including
// big certificate chain and we need enough space to store it.
int preferredSignatureSize = options.getPreferredSignatureSize();
if (preferredSignatureSize > 0)
{
sigObject.setContents(new byte[preferredSignatureSize]);
}
else
{
sigObject.setContents(new byte[SignatureOptions.DEFAULT_SIGNATURE_SIZE]);
}
// Reserve ByteRange, will be overwritten in COSWriter
sigObject.setByteRange(RESERVE_BYTE_RANGE);
signInterface = signatureInterface;
// Create SignatureForm for signature and append it to the document
// Get the first valid page
int pageCount = getNumberOfPages();
if (pageCount == 0)
{
throw new IllegalStateException("Cannot sign an empty document");
}
int startIndex = Math.min(Math.max(options.getPage(), 0), pageCount - 1);
PDPage page = getPage(startIndex);
// Get the AcroForm from the Root-Dictionary and append the annotation
PDDocumentCatalog catalog = getDocumentCatalog();
PDAcroForm acroForm = catalog.getAcroForm();
catalog.getCOSObject().setNeedToBeUpdated(true);
if (acroForm == null)
{
acroForm = new PDAcroForm(this);
catalog.setAcroForm(acroForm);
}
else
{
acroForm.getCOSObject().setNeedToBeUpdated(true);
}
PDSignatureField signatureField = null;
if (!(acroForm.getCOSObject().getDictionaryObject(COSName.FIELDS) instanceof COSArray))
{
acroForm.getCOSObject().setItem(COSName.FIELDS, new COSArray());
}
else
{
COSArray fieldArray = (COSArray) acroForm.getCOSObject().getDictionaryObject(COSName.FIELDS);
fieldArray.setNeedToBeUpdated(true);
signatureField = findSignatureField(acroForm.getFieldIterator(), sigObject);
}
if (signatureField == null)
{
signatureField = new PDSignatureField(acroForm);
// append the signature object
signatureField.setValue(sigObject);
// backward linking
signatureField.getWidgets().get(0).setPage(page);
}
else
{
sigObject.getCOSObject().setNeedToBeUpdated(true);
}
// TODO This "overwrites" the settings of the original signature field which might not be intended by the user
// better make it configurable (not all users need/want PDF/A but their own setting):
// to conform PDF/A-1 requirement:
// The /F key's Print flag bit shall be set to 1 and
// its Hidden, Invisible and NoView flag bits shall be set to 0
signatureField.getWidgets().get(0).setPrinted(true);
// Set the AcroForm Fields
List<PDField> acroFormFields = acroForm.getFields();
acroForm.getCOSObject().setDirect(true);
acroForm.setSignaturesExist(true);
acroForm.setAppendOnly(true);
boolean checkFields = checkSignatureField(acroForm.getFieldIterator(), signatureField);
if (checkFields)
{
signatureField.getCOSObject().setNeedToBeUpdated(true);
}
else
{
acroFormFields.add(signatureField);
}
// Get the object from the visual signature
COSDocument visualSignature = options.getVisualSignature();
// Distinction of case for visual and non-visual signature
if (visualSignature == null)
{
prepareNonVisibleSignature(signatureField);
return;
}
prepareVisibleSignature(signatureField, acroForm, visualSignature);
// Create Annotation / Field for signature
List<PDAnnotation> annotations = page.getAnnotations();
// Make /Annots a direct object to avoid problem if it is an existing indirect object:
// it would not be updated in incremental save, and if we'd set the /Annots array "to be updated"
// while keeping it indirect, Adobe Reader would claim that the document had been modified.
page.setAnnotations(annotations);
// Get the annotations of the page and append the signature-annotation to it
// take care that page and acroforms do not share the same array (if so, we don't need to add it twice)
if (!(annotations instanceof COSArrayList &&
acroFormFields instanceof COSArrayList &&
((COSArrayList<?>) annotations).toList().equals(((COSArrayList<?>) acroFormFields).toList()) &&
checkFields))
{
PDAnnotationWidget widget = signatureField.getWidgets().get(0);
// use check to prevent the annotation widget from appearing twice
if (checkSignatureAnnotation(annotations, widget))
{
widget.getCOSObject().setNeedToBeUpdated(true);
}
else
{
annotations.add(widget);
}
}
page.getCOSObject().setNeedToBeUpdated(true);
}
/**
* Search acroform fields for signature field with specific signature dictionary.
*
* @param fieldIterator iterator on all fields.
* @param sigObject signature object (the /V part).
* @return a signature field if found, or null if none was found.
*/
private PDSignatureField findSignatureField(Iterator<PDField> fieldIterator, PDSignature sigObject)
{
PDSignatureField signatureField = null;
while (fieldIterator.hasNext())
{
PDField pdField = fieldIterator.next();
if (pdField instanceof PDSignatureField)
{
PDSignature signature = ((PDSignatureField) pdField).getSignature();
if (signature != null && signature.getCOSObject().equals(sigObject.getCOSObject()))
{
signatureField = (PDSignatureField) pdField;
}
}
}
return signatureField;
}
/**
* Check if the field already exists in the field list.
*
* @param fieldIterator iterator on all fields.
* @param signatureField the signature field.
* @return true if the field already existed in the field list, false if not.
*/
private boolean checkSignatureField(Iterator<PDField> fieldIterator, PDSignatureField signatureField)
{
while (fieldIterator.hasNext())
{
PDField field = fieldIterator.next();
if (field instanceof PDSignatureField
&& field.getCOSObject().equals(signatureField.getCOSObject()))
{
return true;
}
}
return false;
}
/**
* Check if the widget already exists in the annotation list
*
* @param acroFormFields the list of AcroForm fields.
* @param signatureField the signature field.
* @return true if the widget already existed in the annotation list, false if not.
*/
private boolean checkSignatureAnnotation(List<PDAnnotation> annotations, PDAnnotationWidget widget)
{
for (PDAnnotation annotation : annotations)
{
if (annotation.getCOSObject().equals(widget.getCOSObject()))
{
return true;
}
}
return false;
}
private void prepareVisibleSignature(PDSignatureField signatureField, PDAcroForm acroForm,
COSDocument visualSignature)
{
// Obtain visual signature object
boolean annotFound = false;
boolean sigFieldFound = false;
// get all objects
List<COSObject> cosObjects = visualSignature.getXrefTable().keySet().stream() //
.map(visualSignature::getObjectFromPool) //
.collect(Collectors.toList());
for (COSObject cosObject : cosObjects)
{
COSBase base = cosObject.getObject();
if (base instanceof COSDictionary)
{
COSDictionary cosBaseDict = (COSDictionary) base;
// Search for signature annotation
if (!annotFound && COSName.ANNOT.equals(cosBaseDict.getCOSName(COSName.TYPE)))
{
assignSignatureRectangle(signatureField, cosBaseDict);
annotFound = true;
}
// Search for signature field
COSDictionary apDict = cosBaseDict.getCOSDictionary(COSName.AP);
if (apDict != null && !sigFieldFound
&& COSName.SIG.equals(cosBaseDict.getCOSName(COSName.FT)))
{
assignAppearanceDictionary(signatureField, apDict);
assignAcroFormDefaultResource(acroForm, cosBaseDict);
sigFieldFound = true;
}
if (annotFound && sigFieldFound)
{
break;
}
}
}
if (!annotFound || !sigFieldFound)
{
throw new IllegalArgumentException("Template is missing required objects");
}
}
private void assignSignatureRectangle(PDSignatureField signatureField, COSDictionary annotDict)
{
// Read and set the rectangle for visual signature
COSArray rectArray = (COSArray) annotDict.getDictionaryObject(COSName.RECT);
PDRectangle rect = new PDRectangle(rectArray);
PDRectangle existingRectangle = signatureField.getWidgets().get(0).getRectangle();
//in case of an existing field keep the original rect
if (existingRectangle == null || existingRectangle.getCOSArray().size() != 4)
{
signatureField.getWidgets().get(0).setRectangle(rect);
}
}
private void assignAppearanceDictionary(PDSignatureField signatureField, COSDictionary apDict)
{
// read and set Appearance Dictionary
PDAppearanceDictionary ap = new PDAppearanceDictionary(apDict);
apDict.setDirect(true);
signatureField.getWidgets().get(0).setAppearance(ap);
}
private void assignAcroFormDefaultResource(PDAcroForm acroForm, COSDictionary newDict)
{
// read and set/update AcroForm default resource dictionary /DR if available
COSBase newBase = newDict.getDictionaryObject(COSName.DR);
if (newBase instanceof COSDictionary)
{
COSDictionary newDR = (COSDictionary) newBase;
PDResources defaultResources = acroForm.getDefaultResources();
if (defaultResources == null)
{
acroForm.getCOSObject().setItem(COSName.DR, newDR);
newDR.setDirect(true);
newDR.setNeedToBeUpdated(true);
}
else
{
COSDictionary oldDR = defaultResources.getCOSObject();
COSBase newXObjectBase = newDR.getItem(COSName.XOBJECT);
COSBase oldXObjectBase = oldDR.getItem(COSName.XOBJECT);
if (newXObjectBase instanceof COSDictionary &&
oldXObjectBase instanceof COSDictionary)
{
((COSDictionary) oldXObjectBase).addAll((COSDictionary) newXObjectBase);
oldDR.setNeedToBeUpdated(true);
}
}
}
}
private void prepareNonVisibleSignature(PDSignatureField signatureField)
{
// "Signature fields that are not intended to be visible shall
// have an annotation rectangle that has zero height and width."
// Set rectangle for non-visual signature to rectangle array [ 0 0 0 0 ]
signatureField.getWidgets().get(0).setRectangle(new PDRectangle());
// The visual appearance must also exist for an invisible signature but may be empty.
PDAppearanceDictionary appearanceDictionary = new PDAppearanceDictionary();
PDAppearanceStream appearanceStream = new PDAppearanceStream(this);
appearanceStream.setBBox(new PDRectangle());
appearanceDictionary.setNormalAppearance(appearanceStream);
signatureField.getWidgets().get(0).setAppearance(appearanceDictionary);
}
/**
* Remove the page from the document.
*
* @param page The page to remove from the document.
*/
public void removePage(PDPage page)
{
getPages().remove(page);
}
/**
* Remove the page from the document.
*
* @param pageNumber 0 based index to page number.
*/
public void removePage(int pageNumber)
{
getPages().remove(pageNumber);
}
/**
* This will import and copy the contents from another location. Currently the content stream is
* stored in a scratch file. The scratch file is associated with the document. If you are adding
* a page to this document from another document and want to copy the contents to this
* document's scratch file then use this method otherwise just use the {@link #addPage addPage()}
* method.
* <p>
* Unlike {@link #addPage addPage()}, this method creates a new PDPage object. If your page has
* annotations, and if these link to pages not in the target document, then the target document
* might become huge. What you need to do is to delete page references of such annotations. See
* <a href="http://stackoverflow.com/a/35477351/535646">here</a> for how to do this.
* <p>
* Inherited (global) resources are ignored because these can contain resources not needed for
* this page which could bloat your document, see
* <a href="https://issues.apache.org/jira/browse/PDFBOX-28">PDFBOX-28</a> and related issues.
* If you need them, call <code>importedPage.setResources(page.getResources());</code>
* <p>
* This method should only be used to import a page from a loaded document, not from a generated
* document because these can contain unfinished parts, e.g. font subsetting information.
*
* @param page The page to import.
* @return The page that was imported.
*
* @throws IOException If there is an error copying the page.
*/
public PDPage importPage(PDPage page) throws IOException
{
PDPage importedPage = new PDPage(new COSDictionary(page.getCOSObject()), resourceCache);
PDStream dest = new PDStream(this, page.getContents(), COSName.FLATE_DECODE);
importedPage.setContents(dest);
addPage(importedPage);
importedPage.setCropBox(page.getCropBox());
importedPage.setMediaBox(page.getMediaBox());
importedPage.setRotation(page.getRotation());
if (page.getResources() != null && !page.getCOSObject().containsKey(COSName.RESOURCES))
{
LOG.warn("inherited resources of source document are not imported to destination page");
LOG.warn("call importedPage.setResources(page.getResources()) to do this");
}
return importedPage;
}
/**
* This will get the low level document.
*
* @return The document that this layer sits on top of.
*/
public COSDocument getDocument()
{
return document;
}
/**
* This will get the document info dictionary. If it doesn't exist, an empty document info
* dictionary is created in the document trailer.
* <p>
* In PDF 2.0 this is deprecated except for two entries, /CreationDate and /ModDate. For any other
* document level metadata, a metadata stream should be used instead, see
* {@link PDDocumentCatalog#getMetadata()}.
*
* @return The documents /Info dictionary, never null.
*/
public PDDocumentInformation getDocumentInformation()
{
if (documentInformation == null)
{
COSDictionary trailer = document.getTrailer();
COSDictionary infoDic = trailer.getCOSDictionary(COSName.INFO);
if (infoDic == null)
{
infoDic = new COSDictionary();
trailer.setItem(COSName.INFO, infoDic);
}
documentInformation = new PDDocumentInformation(infoDic);
}
return documentInformation;
}
/**
* This will set the document information for this document.
* <p>
* In PDF 2.0 this is deprecated except for two entries, /CreationDate and /ModDate. For any other
* document level metadata, a metadata stream should be used instead, see
* {@link PDDocumentCatalog#setMetadata(org.apache.pdfbox.pdmodel.common.PDMetadata) PDDocumentCatalog#setMetadata(PDMetadata)}.
*
* @param info The updated document information.
*/
public void setDocumentInformation(PDDocumentInformation info)
{
documentInformation = info;
document.getTrailer().setItem(COSName.INFO, info.getCOSObject());
}
/**
* This will get the document CATALOG. This is guaranteed to not return null.
*
* @return The documents /Root dictionary
*/
public PDDocumentCatalog getDocumentCatalog()
{
if (documentCatalog == null)
{
COSDictionary trailer = document.getTrailer();
COSBase dictionary = trailer.getDictionaryObject(COSName.ROOT);
if (dictionary instanceof COSDictionary)
{
documentCatalog = new PDDocumentCatalog(this, (COSDictionary) dictionary);
}
else
{
documentCatalog = new PDDocumentCatalog(this);
}
}
return documentCatalog;
}
/**
* This will tell if this document is encrypted or not.
*
* @return true If this document is encrypted.
*/
public boolean isEncrypted()
{
return document.isEncrypted();
}
/**
* This will get the encryption dictionary for this document. This will still return the parameters if the document
* was decrypted. As the encryption architecture in PDF documents is pluggable this returns an abstract class,
* but the only supported subclass at this time is a
* PDStandardEncryption object.
*
* @return The encryption dictionary(most likely a PDStandardEncryption object)
*/
public PDEncryption getEncryption()
{
if (encryption == null && isEncrypted())
{
encryption = new PDEncryption(document.getEncryptionDictionary());
}
return encryption;
}
/**
* This will set the encryption dictionary for this document.
*
* @param encryption The encryption dictionary(most likely a PDStandardEncryption object)
*/
public void setEncryptionDictionary(PDEncryption encryption)
{
this.encryption = encryption;
}
/**
* This will return the last signature from the field tree. Note that this may not be the
* last in time when empty signature fields are created first but signed after other fields.
*
* @return the last signature as <code>PDSignatureField</code>.
*/
public PDSignature getLastSignatureDictionary()
{
List<PDSignature> signatureDictionaries = getSignatureDictionaries();
int size = signatureDictionaries.size();
if (size > 0)
{
return signatureDictionaries.get(size - 1);
}
return null;
}
/**
* Retrieve all signature fields from the document.
*
* @return a <code>List</code> of <code>PDSignatureField</code>s
*/
public List<PDSignatureField> getSignatureFields()
{
List<PDSignatureField> fields = new ArrayList<>();
PDAcroForm acroForm = getDocumentCatalog().getAcroForm();
if (acroForm != null)
{
for (PDField field : acroForm.getFieldTree())
{
if (field instanceof PDSignatureField)
{
fields.add((PDSignatureField)field);
}
}
}
return fields;
}
/**
* Retrieve all signature dictionaries from the document.
*
* @return a <code>List</code> of <code>PDSignatureField</code>s
*/
public List<PDSignature> getSignatureDictionaries()
{
List<PDSignature> signatures = new ArrayList<>();
for (PDSignatureField field : getSignatureFields())
{
COSBase value = field.getCOSObject().getDictionaryObject(COSName.V);
if (value instanceof COSDictionary)
{
signatures.add(new PDSignature((COSDictionary)value));
}
}
return signatures;
}
/**
* For internal PDFBox use when creating PDF documents: register a TrueTypeFont to make sure it
* is closed when the PDDocument is closed to avoid memory leaks. Users don't have to call this
* method, it is done by the appropriate PDFont classes.
*
* @param ttf
*/
public void registerTrueTypeFontForClosing(TrueTypeFont ttf)
{
fontsToClose.add(ttf);
}
/**
* Returns the list of fonts which will be subset before the document is saved.
*/
Set<PDFont> getFontsToSubset()
{
return fontsToSubset;
}
/**
* Save the document to a file.
*
* @param fileName The file to save as.
*
* @throws IOException if the output could not be written
*/
public void save(String fileName) throws IOException
{
save(new File(fileName));
}
/**
* Save the document to a file.
*
* @param file The file to save as.
*
* @throws IOException if the output could not be written
*/
public void save(File file) throws IOException
{
save(new BufferedOutputStream(new FileOutputStream(file)));
}
/**
* This will save the document to an output stream.
*
* @param output The stream to write to. It will be closed when done. It is recommended to wrap
* it in a {@link java.io.BufferedOutputStream}, unless it is already buffered.
*
* @throws IOException if the output could not be written
*/
public void save(OutputStream output) throws IOException
{
if (document.isClosed())
{
throw new IOException("Cannot save a document which has been closed");
}
// subset designated fonts
for (PDFont font : fontsToSubset)
{
font.subset();
}
fontsToSubset.clear();
// save PDF
try (COSWriter writer = new COSWriter(output))
{
writer.write(this);
}
}
/**
* Save the PDF as an incremental update. This is only possible if the PDF was loaded from a
* file or a stream, not if the document was created in PDFBox itself. There must be a path of
* objects that have {@link COSUpdateInfo#isNeedToBeUpdated()} set, starting from the document
* catalog. For signatures this is taken care by PDFBox itself.
*<p>
* Other usages of this method are for experienced users only. You will usually never need it.
* It is useful only if you are required to keep the current revision and append the changes. A
* typical use case is changing a signed file without invalidating the signature.
*
* @param output stream to write to. It will be closed when done. It
* <i><b>must never</b></i> point to the source file or that one will be
* harmed!
* @throws IOException if the output could not be written
* @throws IllegalStateException if the document was not loaded from a file or a stream.
*/
public void saveIncremental(OutputStream output) throws IOException
{
if (pdfSource == null)
{
throw new IllegalStateException("document was not loaded from a file or a stream");
}
try (COSWriter writer = new COSWriter(output, pdfSource))
{
writer.write(this, signInterface);
}
}
/**
* Save the PDF as an incremental update. This is only possible if the PDF was loaded from a
* file or a stream, not if the document was created in PDFBox itself. This allows to include
* objects even if there is no path of objects that have
* {@link COSUpdateInfo#isNeedToBeUpdated()} set so the incremental update gets smaller. Only
* dictionaries are supported; if you need to update other objects classes, then add their
* parent dictionary.
* <p>
* This method is for experienced users only. You will usually never need it. It is useful only
* if you are required to keep the current revision and append the changes. A typical use case
* is changing a signed file without invalidating the signature. To know which objects are
* getting changed, you need to have some understanding of the PDF specification, and look at
* the saved file with an editor to verify that you are updating the correct objects. You should
* also inspect the page and document structures of the file with PDFDebugger.
*
* @param output stream to write to. It will be closed when done. It
* <i><b>must never</b></i> point to the source file or that one will be harmed!
* @param objectsToWrite objects that <b>must</b> be part of the incremental saving.
* @throws IOException if the output could not be written
* @throws IllegalStateException if the document was not loaded from a file or a stream.
*/
public void saveIncremental(OutputStream output, Set<COSDictionary> objectsToWrite) throws IOException
{
if (pdfSource == null)
{
throw new IllegalStateException("document was not loaded from a file or a stream");
}
try (COSWriter writer = new COSWriter(output, pdfSource, objectsToWrite))
{
writer.write(this, signInterface);
}
}
/**
* Save PDF incrementally without closing for external signature creation scenario. The general
* sequence is:
* <pre>
* PDDocument pdDocument = ...;
* OutputStream outputStream = ...;
* SignatureOptions signatureOptions = ...; // options to specify fine tuned signature options or null for defaults
* PDSignature pdSignature = ...;
*
* // add signature parameters to be used when creating signature dictionary
* pdDocument.addSignature(pdSignature, signatureOptions);
* // prepare PDF for signing and obtain helper class to be used
* ExternalSigningSupport externalSigningSupport = pdDocument.saveIncrementalForExternalSigning(outputStream);
* // get data to be signed
* InputStream dataToBeSigned = externalSigningSupport.getContent();
* // invoke signature service
* byte[] signature = sign(dataToBeSigned);
* // set resulted CMS signature
* externalSigningSupport.setSignature(signature);
*
* // last step is to close the document
* pdDocument.close();
* </pre>
* <p>
* Note that after calling this method, only {@code close()} method may invoked for
* {@code PDDocument} instance and only AFTER {@link ExternalSigningSupport} instance is used.
* </p>
*
* @param output stream to write the final PDF. It will be closed when the
* document is closed. It <i><b>must never</b></i> point to the source file
* or that one will be harmed!
* @return instance to be used for external signing and setting CMS signature
* @throws IOException if the output could not be written
* @throws IllegalStateException if the document was not loaded from a file or a stream or
* signature options were not set.
*/
public ExternalSigningSupport saveIncrementalForExternalSigning(OutputStream output) throws IOException
{
if (pdfSource == null)
{
throw new IllegalStateException("document was not loaded from a file or a stream");
}
// PDFBOX-3978: getLastSignatureDictionary() not helpful if signing into a template
// that is not the last signature. So give higher priority to signature with update flag.
PDSignature foundSignature = null;
for (PDSignature sig : getSignatureDictionaries())
{
foundSignature = sig;
if (sig.getCOSObject().isNeedToBeUpdated())
{
break;
}
}
if (foundSignature == null)
{
throw new IllegalStateException("document does not contain signature fields");
}
int[] byteRange = foundSignature.getByteRange();
if (!Arrays.equals(byteRange, RESERVE_BYTE_RANGE))
{
throw new IllegalStateException("signature reserve byte range has been changed "
+ "after addSignature(), please set the byte range that existed after addSignature()");
}
COSWriter writer = new COSWriter(output, pdfSource);
writer.write(this);
signingSupport = new SigningSupport(writer);
return signingSupport;
}
/**
* Returns the page at the given 0-based index.
* <p>
* This method is too slow to get all the pages from a large PDF document
* (1000 pages or more). For such documents, use the iterator of
* {@link PDDocument#getPages()} instead.
*
* @param pageIndex the 0-based page index
* @return the page at the given index.
*/
public PDPage getPage(int pageIndex) // todo: REPLACE most calls to this method with BELOW method
{
return getDocumentCatalog().getPages().get(pageIndex);
}
/**
* Returns the page tree.
*
* @return the page tree
*/
public PDPageTree getPages()
{
return getDocumentCatalog().getPages();
}
/**
* This will return the total page count of the PDF document.
*
* @return The total number of pages in the PDF document.
*/
public int getNumberOfPages()
{
return getDocumentCatalog().getPages().getCount();
}
/**
* This will close the underlying COSDocument object.
*
* @throws IOException If there is an error releasing resources.
*/
@Override
public void close() throws IOException
{
if (!document.isClosed())
{
// Make sure that:
// - first Exception is kept
// - all IO resources are closed
// - there's a way to see which errors occurred
IOException firstException = null;
// close resources and COSWriter
if (signingSupport != null)
{
firstException = IOUtils.closeAndLogException(signingSupport, LOG, "SigningSupport", firstException);
}
// close all intermediate I/O streams
firstException = IOUtils.closeAndLogException(document, LOG, "COSDocument", firstException);
// close the source PDF stream, if we read from one
if (pdfSource != null)
{
firstException = IOUtils.closeAndLogException(pdfSource, LOG, "RandomAccessRead pdfSource", firstException);
}
// close fonts
for (TrueTypeFont ttf : fontsToClose)
{
firstException = IOUtils.closeAndLogException(ttf, LOG, "TrueTypeFont", firstException);
}
// rethrow first exception to keep method contract
if (firstException != null)
{
throw firstException;
}
}
}
/**
* Protects the document with a protection policy. The document content will be really
* encrypted when it will be saved. This method only marks the document for encryption. It also
* calls {@link #setAllSecurityToBeRemoved(boolean)} with a false argument if it was set to true
* previously and logs a warning.
*
* @see org.apache.pdfbox.pdmodel.encryption.StandardProtectionPolicy
* @see org.apache.pdfbox.pdmodel.encryption.PublicKeyProtectionPolicy
*
* @param policy The protection policy.
* @throws IOException if there isn't any suitable security handler.
*/
public void protect(ProtectionPolicy policy) throws IOException
{
if (isAllSecurityToBeRemoved())
{
LOG.warn("do not call setAllSecurityToBeRemoved(true) before calling protect(), "
+ "as protect() implies setAllSecurityToBeRemoved(false)");
setAllSecurityToBeRemoved(false);
}
if (!isEncrypted())
{
encryption = new PDEncryption();
}
SecurityHandler<ProtectionPolicy> securityHandler =
SecurityHandlerFactory.INSTANCE.newSecurityHandlerForPolicy(policy);
if (securityHandler == null)
{
throw new IOException("No security handler for policy " + policy);
}
getEncryption().setSecurityHandler(securityHandler);
}
/**
* Returns the access permissions granted when the document was decrypted. If the document was not decrypted this
* method returns the access permission for a document owner (ie can do everything). The returned object is in read
* only mode so that permissions cannot be changed. Methods providing access to content should rely on this object
* to verify if the current user is allowed to proceed.
*
* @return the access permissions for the current user on the document.
*/
public AccessPermission getCurrentAccessPermission()
{
if (accessPermission == null)
{
accessPermission = AccessPermission.getOwnerAccessPermission();
}
return accessPermission;
}
/**
* Indicates if all security is removed or not when writing the pdf.
*
* @return returns true if all security shall be removed otherwise false
*/
public boolean isAllSecurityToBeRemoved()
{
return allSecurityToBeRemoved;
}
/**
* Activates/Deactivates the removal of all security when writing the pdf.
*
* @param removeAllSecurity remove all security if set to true
*/
public void setAllSecurityToBeRemoved(boolean removeAllSecurity)
{
allSecurityToBeRemoved = removeAllSecurity;
}
/**
* Provides the document ID.
*
* @return the dcoument ID
*/
public Long getDocumentId()
{
return documentId;
}
/**
* Sets the document ID to the given value.
*
* @param docId the new document ID
*/
public void setDocumentId(Long docId)
{
documentId = docId;
}
/**
* Returns the PDF specification version this document conforms to.
*
* @return the PDF version (e.g. 1.4f)
*/
public float getVersion()
{
float headerVersionFloat = getDocument().getVersion();
// there may be a second version information in the document catalog starting with 1.4
if (headerVersionFloat >= 1.4f)
{
String catalogVersion = getDocumentCatalog().getVersion();
float catalogVersionFloat = -1;
if (catalogVersion != null)
{
try
{
catalogVersionFloat = Float.parseFloat(catalogVersion);
}
catch(NumberFormatException exception)
{
LOG.error("Can't extract the version number of the document catalog.", exception);
}
}
// the most recent version is the correct one
return Math.max(catalogVersionFloat, headerVersionFloat);
}
else
{
return headerVersionFloat;
}
}
/**
* Sets the PDF specification version for this document.
*
* @param newVersion the new PDF version (e.g. 1.4f)
*
*/
public void setVersion(float newVersion)
{
float currentVersion = getVersion();
// nothing to do?
if (Float.compare(newVersion,currentVersion) == 0)
{
return;
}
// the version can't be downgraded
if (newVersion < currentVersion)
{
LOG.error("It's not allowed to downgrade the version of a pdf.");
return;
}
// update the catalog version if the document version is >= 1.4
if (getDocument().getVersion() >= 1.4f)
{
getDocumentCatalog().setVersion(Float.toString(newVersion));
}
else
{
// versions < 1.4f have a version header only
getDocument().setVersion(newVersion);
}
}
/**
* Returns the resource cache associated with this document, or null if there is none.
*/
public ResourceCache getResourceCache()
{
return resourceCache;
}
/**
* Sets the resource cache associated with this document.
*
* @param resourceCache A resource cache, or null.
*/
public void setResourceCache(ResourceCache resourceCache)
{
this.resourceCache = resourceCache;
}
}