| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| package org.apache.pdfbox.pdmodel; |
| |
| import java.io.BufferedOutputStream; |
| import java.io.Closeable; |
| import java.io.File; |
| import java.io.FileOutputStream; |
| import java.io.IOException; |
| import java.io.InputStream; |
| import java.io.OutputStream; |
| import java.util.ArrayList; |
| import java.util.HashSet; |
| import java.util.List; |
| import java.util.Set; |
| import org.apache.commons.logging.Log; |
| import org.apache.commons.logging.LogFactory; |
| import org.apache.pdfbox.cos.COSArray; |
| import org.apache.pdfbox.cos.COSBase; |
| import org.apache.pdfbox.cos.COSDictionary; |
| import org.apache.pdfbox.cos.COSDocument; |
| import org.apache.pdfbox.cos.COSInteger; |
| import org.apache.pdfbox.cos.COSName; |
| import org.apache.pdfbox.cos.COSObject; |
| import org.apache.pdfbox.io.IOUtils; |
| import org.apache.pdfbox.io.MemoryUsageSetting; |
| import org.apache.pdfbox.io.RandomAccessBuffer; |
| import org.apache.pdfbox.io.RandomAccessBufferedFileInputStream; |
| import org.apache.pdfbox.io.RandomAccessRead; |
| import org.apache.pdfbox.io.ScratchFile; |
| import org.apache.pdfbox.pdfparser.PDFParser; |
| import org.apache.pdfbox.pdfwriter.COSWriter; |
| import org.apache.pdfbox.pdmodel.common.COSArrayList; |
| import org.apache.pdfbox.pdmodel.common.PDRectangle; |
| import org.apache.pdfbox.pdmodel.common.PDStream; |
| import org.apache.pdfbox.pdmodel.encryption.AccessPermission; |
| import org.apache.pdfbox.pdmodel.encryption.PDEncryption; |
| import org.apache.pdfbox.pdmodel.encryption.ProtectionPolicy; |
| import org.apache.pdfbox.pdmodel.encryption.SecurityHandler; |
| import org.apache.pdfbox.pdmodel.encryption.SecurityHandlerFactory; |
| import org.apache.pdfbox.pdmodel.font.PDFont; |
| import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotation; |
| import org.apache.pdfbox.pdmodel.interactive.annotation.PDAppearanceDictionary; |
| import org.apache.pdfbox.pdmodel.interactive.digitalsignature.ExternalSigningSupport; |
| import org.apache.pdfbox.pdmodel.interactive.digitalsignature.PDSignature; |
| import org.apache.pdfbox.pdmodel.interactive.digitalsignature.SignatureInterface; |
| import org.apache.pdfbox.pdmodel.interactive.digitalsignature.SignatureOptions; |
| import org.apache.pdfbox.pdmodel.interactive.digitalsignature.SigningSupport; |
| import org.apache.pdfbox.pdmodel.interactive.form.PDAcroForm; |
| import org.apache.pdfbox.pdmodel.interactive.form.PDField; |
| import org.apache.pdfbox.pdmodel.interactive.form.PDSignatureField; |
| |
| /** |
| * This is the in-memory representation of the PDF document. |
| * The #close() method must be called once the document is no longer needed. |
| * |
| * @author Ben Litchfield |
| */ |
| public class PDDocument implements Closeable |
| { |
| private static final Log LOG = LogFactory.getLog(PDDocument.class); |
| |
| private final COSDocument document; |
| |
| // cached values |
| private PDDocumentInformation documentInformation; |
| private PDDocumentCatalog documentCatalog; |
| |
| // the encryption will be cached here. When the document is decrypted then |
| // the COSDocument will not have an "Encrypt" dictionary anymore and this object must be used |
| private PDEncryption encryption; |
| |
| // holds a flag which tells us if we should remove all security from this documents. |
| private boolean allSecurityToBeRemoved; |
| |
| // keep tracking customized documentId for the trailer. If null, a new id will be generated |
| // this ID doesn't represent the actual documentId from the trailer |
| private Long documentId; |
| |
| // the pdf to be read |
| private final RandomAccessRead pdfSource; |
| |
| // the access permissions of the document |
| private AccessPermission accessPermission; |
| |
| // fonts to subset before saving |
| private final Set<PDFont> fontsToSubset = new HashSet<PDFont>(); |
| |
| // Signature interface |
| private SignatureInterface signInterface; |
| |
| // helper class used to create external signature |
| private SigningSupport signingSupport; |
| |
| // document-wide cached resources |
| private ResourceCache resourceCache = new DefaultResourceCache(); |
| |
| /** |
| * Creates an empty PDF document. |
| * You need to add at least one page for the document to be valid. |
| */ |
| public PDDocument() |
| { |
| this(MemoryUsageSetting.setupMainMemoryOnly()); |
| } |
| |
| /** |
| * Creates an empty PDF document. |
| * You need to add at least one page for the document to be valid. |
| * |
| * @param memUsageSetting defines how memory is used for buffering PDF streams |
| */ |
| public PDDocument(MemoryUsageSetting memUsageSetting) |
| { |
| ScratchFile scratchFile = null; |
| try |
| { |
| scratchFile = new ScratchFile(memUsageSetting); |
| } |
| catch (IOException ioe) |
| { |
| LOG.warn("Error initializing scratch file: " + ioe.getMessage() + |
| ". Fall back to main memory usage only."); |
| try |
| { |
| scratchFile = new ScratchFile(MemoryUsageSetting.setupMainMemoryOnly()); |
| } |
| catch (IOException ioe2) {} |
| } |
| |
| document = new COSDocument(scratchFile); |
| pdfSource = null; |
| |
| // First we need a trailer |
| COSDictionary trailer = new COSDictionary(); |
| document.setTrailer(trailer); |
| |
| // Next we need the root dictionary. |
| COSDictionary rootDictionary = new COSDictionary(); |
| trailer.setItem(COSName.ROOT, rootDictionary); |
| rootDictionary.setItem(COSName.TYPE, COSName.CATALOG); |
| rootDictionary.setItem(COSName.VERSION, COSName.getPDFName("1.4")); |
| |
| // next we need the pages tree structure |
| COSDictionary pages = new COSDictionary(); |
| rootDictionary.setItem(COSName.PAGES, pages); |
| pages.setItem(COSName.TYPE, COSName.PAGES); |
| COSArray kidsArray = new COSArray(); |
| pages.setItem(COSName.KIDS, kidsArray); |
| pages.setItem(COSName.COUNT, COSInteger.ZERO); |
| } |
| |
| /** |
| * This will add a page to the document. This is a convenience method, that will add the page to the root of the |
| * hierarchy and set the parent of the page to the root. |
| * |
| * @param page The page to add to the document. |
| */ |
| public void addPage(PDPage page) |
| { |
| getPages().add(page); |
| } |
| |
| /** |
| * Add parameters of signature to be created externally using default signature options. See |
| * {@link #saveIncrementalForExternalSigning(OutputStream)} method description on external |
| * signature creation scenario details. |
| * |
| * @param sigObject is the PDSignatureField model |
| * @throws IOException if there is an error creating required fields |
| */ |
| public void addSignature(PDSignature sigObject) throws IOException |
| { |
| addSignature(sigObject, new SignatureOptions()); |
| } |
| |
| /** |
| * Add parameters of signature to be created externally. See |
| * {@link #saveIncrementalForExternalSigning(OutputStream)} method description on external |
| * signature creation scenario details. |
| * |
| * @param sigObject is the PDSignatureField model |
| * @param options signature options |
| * @throws IOException if there is an error creating required fields |
| */ |
| public void addSignature(PDSignature sigObject, SignatureOptions options) throws IOException |
| { |
| addSignature(sigObject, null, options); |
| } |
| |
| /** |
| * Add a signature to be created using the instance of given interface. |
| * |
| * @param sigObject is the PDSignatureField model |
| * @param signatureInterface is an interface which provides signing capabilities |
| * @throws IOException if there is an error creating required fields |
| */ |
| public void addSignature(PDSignature sigObject, SignatureInterface signatureInterface) throws IOException |
| { |
| addSignature(sigObject, signatureInterface, new SignatureOptions()); |
| } |
| |
| /** |
| * This will add a signature to the document. If the 0-based page number in the options |
| * parameter is smaller than 0 or larger than max, the nearest valid page number will be used |
| * (i.e. 0 or max) and no exception will be thrown. |
| * |
| * @param sigObject is the PDSignatureField model |
| * @param signatureInterface is an interface which provides signing capabilities |
| * @param options signature options |
| * @throws IOException if there is an error creating required fields |
| */ |
| public void addSignature(PDSignature sigObject, SignatureInterface signatureInterface, |
| SignatureOptions options) throws IOException |
| { |
| // Reserve content |
| // We need to reserve some space for the signature. Some signatures including |
| // big certificate chain and we need enough space to store it. |
| int preferredSignatureSize = options.getPreferredSignatureSize(); |
| if (preferredSignatureSize > 0) |
| { |
| sigObject.setContents(new byte[preferredSignatureSize]); |
| } |
| else |
| { |
| sigObject.setContents(new byte[SignatureOptions.DEFAULT_SIGNATURE_SIZE]); |
| } |
| |
| // Reserve ByteRange |
| sigObject.setByteRange(new int[] { 0, 1000000000, 1000000000, 1000000000 }); |
| |
| signInterface = signatureInterface; |
| |
| // Create SignatureForm for signature and append it to the document |
| |
| // Get the first valid page |
| int pageCount = getNumberOfPages(); |
| if (pageCount == 0) |
| { |
| throw new IllegalStateException("Cannot sign an empty document"); |
| } |
| |
| int startIndex = Math.min(Math.max(options.getPage(), 0), pageCount - 1); |
| PDPage page = getPage(startIndex); |
| |
| // Get the AcroForm from the Root-Dictionary and append the annotation |
| PDDocumentCatalog catalog = getDocumentCatalog(); |
| PDAcroForm acroForm = catalog.getAcroForm(); |
| catalog.getCOSObject().setNeedToBeUpdated(true); |
| |
| if (acroForm == null) |
| { |
| acroForm = new PDAcroForm(this); |
| catalog.setAcroForm(acroForm); |
| } |
| else |
| { |
| acroForm.getCOSObject().setNeedToBeUpdated(true); |
| } |
| |
| List<PDField> fields = acroForm.getFields(); |
| if (fields == null) |
| { |
| fields = new ArrayList<PDField>(); |
| acroForm.setFields(fields); |
| } |
| else |
| { |
| COSArray fieldArray = (COSArray) acroForm.getCOSObject().getDictionaryObject(COSName.FIELDS); |
| fieldArray.setNeedToBeUpdated(true); |
| } |
| PDSignatureField signatureField = findSignatureField(fields, sigObject); |
| if (signatureField == null) |
| { |
| signatureField = new PDSignatureField(acroForm); |
| // append the signature object |
| signatureField.setValue(sigObject); |
| // backward linking |
| signatureField.getWidgets().get(0).setPage(page); |
| } |
| // to conform PDF/A-1 requirement: |
| // The /F key's Print flag bit shall be set to 1 and |
| // its Hidden, Invisible and NoView flag bits shall be set to 0 |
| signatureField.getWidgets().get(0).setPrinted(true); |
| |
| // Set the AcroForm Fields |
| List<PDField> acroFormFields = acroForm.getFields(); |
| acroForm.getCOSObject().setDirect(true); |
| acroForm.setSignaturesExist(true); |
| acroForm.setAppendOnly(true); |
| |
| boolean checkFields = checkSignatureField(acroFormFields, signatureField); |
| |
| // Get the object from the visual signature |
| COSDocument visualSignature = options.getVisualSignature(); |
| |
| // Distinction of case for visual and non-visual signature |
| if (visualSignature == null) |
| { |
| prepareNonVisibleSignature(signatureField); |
| return; |
| } |
| |
| prepareVisibleSignature(signatureField, acroForm, visualSignature); |
| |
| // Create Annotation / Field for signature |
| List<PDAnnotation> annotations = page.getAnnotations(); |
| |
| // Make /Annots a direct object to avoid problem if it is an existing indirect object: |
| // it would not be updated in incremental save, and if we'd set the /Annots array "to be updated" |
| // while keeping it indirect, Adobe Reader would claim that the document had been modified. |
| page.setAnnotations(annotations); |
| |
| // Get the annotations of the page and append the signature-annotation to it |
| // take care that page and acroforms do not share the same array (if so, we don't need to add it twice) |
| if (!(annotations instanceof COSArrayList && |
| acroFormFields instanceof COSArrayList && |
| ((COSArrayList<?>) annotations).toList().equals(((COSArrayList<?>) acroFormFields).toList()) && |
| checkFields)) |
| { |
| annotations.add(signatureField.getWidgets().get(0)); |
| } |
| page.getCOSObject().setNeedToBeUpdated(true); |
| } |
| |
| // search acroform field list for signature field with specific signature dictionary |
| private PDSignatureField findSignatureField(List<PDField> fields, PDSignature sigObject) |
| { |
| PDSignatureField signatureField = null; |
| for (PDField pdField : fields) |
| { |
| if (pdField instanceof PDSignatureField) |
| { |
| PDSignature signature = ((PDSignatureField) pdField).getSignature(); |
| if (signature != null && signature.getCOSObject().equals(sigObject.getCOSObject())) |
| { |
| signatureField = (PDSignatureField) pdField; |
| } |
| } |
| } |
| return signatureField; |
| } |
| |
| // return true if the field already existed in the field list, in that case, it is marked for update |
| private boolean checkSignatureField(List<PDField> acroFormFields, PDSignatureField signatureField) |
| { |
| boolean checkFields = false; |
| for (PDField field : acroFormFields) |
| { |
| if (field instanceof PDSignatureField |
| && field.getCOSObject().equals(signatureField.getCOSObject())) |
| { |
| checkFields = true; |
| signatureField.getCOSObject().setNeedToBeUpdated(true); |
| break; |
| } |
| // fixme: this code does not check non-terminal fields, there could be a descendant signature |
| } |
| if (!checkFields) |
| { |
| acroFormFields.add(signatureField); |
| } |
| return checkFields; |
| } |
| |
| private void prepareVisibleSignature(PDSignatureField signatureField, PDAcroForm acroForm, |
| COSDocument visualSignature) |
| { |
| // Obtain visual signature object |
| boolean annotNotFound = true; |
| boolean sigFieldNotFound = true; |
| for (COSObject cosObject : visualSignature.getObjects()) |
| { |
| if (!annotNotFound && !sigFieldNotFound) |
| { |
| break; |
| } |
| |
| COSBase base = cosObject.getObject(); |
| if (base instanceof COSDictionary) |
| { |
| COSDictionary cosBaseDict = (COSDictionary) base; |
| |
| // Search for signature annotation |
| COSBase type = cosBaseDict.getDictionaryObject(COSName.TYPE); |
| if (annotNotFound && COSName.ANNOT.equals(type)) |
| { |
| assignSignatureRectangle(signatureField, cosBaseDict); |
| annotNotFound = false; |
| } |
| |
| // Search for signature field |
| COSBase fieldType = cosBaseDict.getDictionaryObject(COSName.FT); |
| COSBase apDict = cosBaseDict.getDictionaryObject(COSName.AP); |
| if (sigFieldNotFound && COSName.SIG.equals(fieldType) && apDict instanceof COSDictionary) |
| { |
| assignAppearanceDictionary(signatureField, (COSDictionary) apDict); |
| assignAcroFormDefaultResource(acroForm, cosBaseDict); |
| sigFieldNotFound = false; |
| } |
| } |
| } |
| |
| if (annotNotFound || sigFieldNotFound) |
| { |
| throw new IllegalArgumentException("Template is missing required objects"); |
| } |
| } |
| |
| private void assignSignatureRectangle(PDSignatureField signatureField, COSDictionary annotDict) |
| { |
| // Read and set the rectangle for visual signature |
| COSArray rectArray = (COSArray) annotDict.getDictionaryObject(COSName.RECT); |
| PDRectangle rect = new PDRectangle(rectArray); |
| signatureField.getWidgets().get(0).setRectangle(rect); |
| } |
| |
| private void assignAppearanceDictionary(PDSignatureField signatureField, COSDictionary apDict) |
| { |
| // read and set Appearance Dictionary |
| PDAppearanceDictionary ap = new PDAppearanceDictionary(apDict); |
| apDict.setDirect(true); |
| signatureField.getWidgets().get(0).setAppearance(ap); |
| } |
| |
| private void assignAcroFormDefaultResource(PDAcroForm acroForm, COSDictionary dict) |
| { |
| // read and set AcroForm default resource dictionary /DR if available |
| COSBase base = dict.getDictionaryObject(COSName.DR); |
| if (base instanceof COSDictionary) |
| { |
| COSDictionary dr = (COSDictionary) base; |
| dr.setDirect(true); |
| dr.setNeedToBeUpdated(true); |
| acroForm.getCOSObject().setItem(COSName.DR, dr); |
| } |
| } |
| |
| private void prepareNonVisibleSignature(PDSignatureField signatureField) |
| throws IOException |
| { |
| // "Signature fields that are not intended to be visible shall |
| // have an annotation rectangle that has zero height and width." |
| // Set rectangle for non-visual signature to rectangle array [ 0 0 0 0 ] |
| signatureField.getWidgets().get(0).setRectangle(new PDRectangle()); |
| } |
| |
| /** |
| * This will add a signature field to the document. |
| * |
| * @param sigFields are the PDSignatureFields that should be added to the document |
| * @param signatureInterface is a interface which provides signing capabilities |
| * @param options signature options |
| * @throws IOException if there is an error creating required fields |
| */ |
| public void addSignatureField(List<PDSignatureField> sigFields, SignatureInterface signatureInterface, |
| SignatureOptions options) throws IOException |
| { |
| PDDocumentCatalog catalog = getDocumentCatalog(); |
| catalog.getCOSObject().setNeedToBeUpdated(true); |
| |
| PDAcroForm acroForm = catalog.getAcroForm(); |
| if (acroForm == null) |
| { |
| acroForm = new PDAcroForm(this); |
| catalog.setAcroForm(acroForm); |
| } |
| COSDictionary acroFormDict = acroForm.getCOSObject(); |
| acroFormDict.setDirect(true); |
| acroFormDict.setNeedToBeUpdated(true); |
| if (!acroForm.isSignaturesExist()) |
| { |
| // 1 if at least one signature field is available |
| acroForm.setSignaturesExist(true); |
| } |
| |
| List<PDField> acroformFields = acroForm.getFields(); |
| |
| for (PDSignatureField sigField : sigFields) |
| { |
| sigField.getCOSObject().setNeedToBeUpdated(true); |
| |
| // Check if the field already exists |
| checkSignatureField(acroformFields, sigField); |
| |
| // Check if we need to add a signature |
| if (sigField.getSignature() != null) |
| { |
| sigField.getCOSObject().setNeedToBeUpdated(true); |
| if (options == null) |
| { |
| // TODO ?? |
| } |
| addSignature(sigField.getSignature(), signatureInterface, options); |
| } |
| } |
| } |
| |
| /** |
| * Remove the page from the document. |
| * |
| * @param page The page to remove from the document. |
| */ |
| public void removePage(PDPage page) |
| { |
| getPages().remove(page); |
| } |
| |
| /** |
| * Remove the page from the document. |
| * |
| * @param pageNumber 0 based index to page number. |
| */ |
| public void removePage(int pageNumber) |
| { |
| getPages().remove(pageNumber); |
| } |
| |
| /** |
| * This will import and copy the contents from another location. Currently the content stream is stored in a scratch |
| * file. The scratch file is associated with the document. If you are adding a page to this document from another |
| * document and want to copy the contents to this |
| * document's scratch file then use this method otherwise just use the {@link #addPage addPage} |
| * method. |
| * <p> |
| * Unlike {@link #addPage addPage}, this method creates a new PDPage object. If your page has |
| * annotations, and if these link to pages not in the target document, then the target document |
| * might become huge. What you need to do is to delete page references of such annotations. See |
| * <a href="http://stackoverflow.com/a/35477351/535646">here</a> for how to do this. |
| * <p> |
| * Inherited (global) resources are ignored. If you need them, call |
| * <code>importedPage.setRotation(page.getRotation());</code> |
| * |
| * @param page The page to import. |
| * @return The page that was imported. |
| * |
| * @throws IOException If there is an error copying the page. |
| */ |
| public PDPage importPage(PDPage page) throws IOException |
| { |
| PDPage importedPage = new PDPage(new COSDictionary(page.getCOSObject()), resourceCache); |
| InputStream in = null; |
| try |
| { |
| in = page.getContents(); |
| if (in != null) |
| { |
| PDStream dest = new PDStream(this, in, COSName.FLATE_DECODE); |
| importedPage.setContents(dest); |
| } |
| addPage(importedPage); |
| } |
| catch (IOException e) |
| { |
| IOUtils.closeQuietly(in); |
| } |
| importedPage.setCropBox(page.getCropBox()); |
| importedPage.setMediaBox(page.getMediaBox()); |
| importedPage.setRotation(page.getRotation()); |
| if (page.getResources() != null && !page.getCOSObject().containsKey(COSName.RESOURCES)) |
| { |
| LOG.warn("inherited resources of source document are not imported to destination page"); |
| LOG.warn("call importedPage.setResources(page.getResources()) to do this"); |
| } |
| return importedPage; |
| } |
| |
| /** |
| * Constructor that uses an existing document. The COSDocument that is passed in must be valid. |
| * |
| * @param doc The COSDocument that this document wraps. |
| */ |
| public PDDocument(COSDocument doc) |
| { |
| this(doc, null); |
| } |
| |
| /** |
| * Constructor that uses an existing document. The COSDocument that is passed in must be valid. |
| * |
| * @param doc The COSDocument that this document wraps. |
| * @param source the parser which is used to read the pdf |
| */ |
| public PDDocument(COSDocument doc, RandomAccessRead source) |
| { |
| this(doc, source, null); |
| } |
| |
| /** |
| * Constructor that uses an existing document. The COSDocument that is passed in must be valid. |
| * |
| * @param doc The COSDocument that this document wraps. |
| * @param source the parser which is used to read the pdf |
| * @param permission he access permissions of the pdf |
| * |
| */ |
| public PDDocument(COSDocument doc, RandomAccessRead source, AccessPermission permission) |
| { |
| document = doc; |
| pdfSource = source; |
| accessPermission = permission; |
| } |
| |
| /** |
| * This will get the low level document. |
| * |
| * @return The document that this layer sits on top of. |
| */ |
| public COSDocument getDocument() |
| { |
| return document; |
| } |
| |
| /** |
| * This will get the document info dictionary. This is guaranteed to not return null. |
| * |
| * @return The documents /Info dictionary |
| */ |
| public PDDocumentInformation getDocumentInformation() |
| { |
| if (documentInformation == null) |
| { |
| COSDictionary trailer = document.getTrailer(); |
| COSDictionary infoDic = (COSDictionary) trailer.getDictionaryObject(COSName.INFO); |
| if (infoDic == null) |
| { |
| infoDic = new COSDictionary(); |
| trailer.setItem(COSName.INFO, infoDic); |
| } |
| documentInformation = new PDDocumentInformation(infoDic); |
| } |
| return documentInformation; |
| } |
| |
| /** |
| * This will set the document information for this document. |
| * |
| * @param info The updated document information. |
| */ |
| public void setDocumentInformation(PDDocumentInformation info) |
| { |
| documentInformation = info; |
| document.getTrailer().setItem(COSName.INFO, info.getCOSObject()); |
| } |
| |
| /** |
| * This will get the document CATALOG. This is guaranteed to not return null. |
| * |
| * @return The documents /Root dictionary |
| */ |
| public PDDocumentCatalog getDocumentCatalog() |
| { |
| if (documentCatalog == null) |
| { |
| COSDictionary trailer = document.getTrailer(); |
| COSBase dictionary = trailer.getDictionaryObject(COSName.ROOT); |
| if (dictionary instanceof COSDictionary) |
| { |
| documentCatalog = new PDDocumentCatalog(this, (COSDictionary) dictionary); |
| } |
| else |
| { |
| documentCatalog = new PDDocumentCatalog(this); |
| } |
| } |
| return documentCatalog; |
| } |
| |
| /** |
| * This will tell if this document is encrypted or not. |
| * |
| * @return true If this document is encrypted. |
| */ |
| public boolean isEncrypted() |
| { |
| return document.isEncrypted(); |
| } |
| |
| /** |
| * This will get the encryption dictionary for this document. This will still return the parameters if the document |
| * was decrypted. As the encryption architecture in PDF documents is plugable this returns an abstract class, |
| * but the only supported subclass at this time is a |
| * PDStandardEncryption object. |
| * |
| * @return The encryption dictionary(most likely a PDStandardEncryption object) |
| */ |
| public PDEncryption getEncryption() |
| { |
| if (encryption == null && isEncrypted()) |
| { |
| encryption = new PDEncryption(document.getEncryptionDictionary()); |
| } |
| return encryption; |
| } |
| |
| /** |
| * This will set the encryption dictionary for this document. |
| * |
| * @param encryption The encryption dictionary(most likely a PDStandardEncryption object) |
| * |
| * @throws IOException If there is an error determining which security handler to use. |
| */ |
| public void setEncryptionDictionary(PDEncryption encryption) throws IOException |
| { |
| this.encryption = encryption; |
| } |
| |
| /** |
| * This will return the last signature. |
| * |
| * @return the last signature as <code>PDSignatureField</code>. |
| * @throws IOException if no document catalog can be found. |
| */ |
| public PDSignature getLastSignatureDictionary() throws IOException |
| { |
| List<PDSignature> signatureDictionaries = getSignatureDictionaries(); |
| int size = signatureDictionaries.size(); |
| if (size > 0) |
| { |
| return signatureDictionaries.get(size - 1); |
| } |
| return null; |
| } |
| |
| /** |
| * Retrieve all signature fields from the document. |
| * |
| * @return a <code>List</code> of <code>PDSignatureField</code>s |
| * @throws IOException if no document catalog can be found. |
| */ |
| public List<PDSignatureField> getSignatureFields() throws IOException |
| { |
| List<PDSignatureField> fields = new ArrayList<PDSignatureField>(); |
| PDAcroForm acroForm = getDocumentCatalog().getAcroForm(); |
| if (acroForm != null) |
| { |
| for (PDField field : acroForm.getFieldTree()) |
| { |
| if (field instanceof PDSignatureField) |
| { |
| fields.add((PDSignatureField)field); |
| } |
| } |
| } |
| return fields; |
| } |
| |
| /** |
| * Retrieve all signature dictionaries from the document. |
| * |
| * @return a <code>List</code> of <code>PDSignatureField</code>s |
| * @throws IOException if no document catalog can be found. |
| */ |
| public List<PDSignature> getSignatureDictionaries() throws IOException |
| { |
| List<PDSignature> signatures = new ArrayList<PDSignature>(); |
| for (PDSignatureField field : getSignatureFields()) |
| { |
| COSBase value = field.getCOSObject().getDictionaryObject(COSName.V); |
| if (value != null) |
| { |
| signatures.add(new PDSignature((COSDictionary)value)); |
| } |
| } |
| return signatures; |
| } |
| |
| /** |
| * Returns the list of fonts which will be subset before the document is saved. |
| */ |
| Set<PDFont> getFontsToSubset() |
| { |
| return fontsToSubset; |
| } |
| |
| /** |
| * Parses a PDF. Unrestricted main memory will be used for buffering PDF streams. |
| * |
| * @param file file to be loaded |
| * |
| * @return loaded document |
| * |
| * @throws IOException in case of a file reading or parsing error |
| */ |
| public static PDDocument load(File file) throws IOException |
| { |
| return load(file, "", MemoryUsageSetting.setupMainMemoryOnly()); |
| } |
| |
| /** |
| * Parses a PDF. |
| * |
| * @param file file to be loaded |
| * @param memUsageSetting defines how memory is used for buffering PDF streams |
| * |
| * @return loaded document |
| * |
| * @throws IOException in case of a file reading or parsing error |
| */ |
| public static PDDocument load(File file, MemoryUsageSetting memUsageSetting) throws IOException |
| { |
| return load(file, "", null, null, memUsageSetting); |
| } |
| |
| /** |
| * Parses a PDF. Unrestricted main memory will be used for buffering PDF streams. |
| * |
| * @param file file to be loaded |
| * @param password password to be used for decryption |
| * |
| * @return loaded document |
| * |
| * @throws IOException in case of a file reading or parsing error |
| */ |
| public static PDDocument load(File file, String password) throws IOException |
| { |
| return load(file, password, null, null, MemoryUsageSetting.setupMainMemoryOnly()); |
| } |
| |
| /** |
| * Parses a PDF. |
| * |
| * @param file file to be loaded |
| * @param password password to be used for decryption |
| * @param memUsageSetting defines how memory is used for buffering PDF streams |
| * |
| * @return loaded document |
| * |
| * @throws IOException in case of a file reading or parsing error |
| */ |
| public static PDDocument load(File file, String password, MemoryUsageSetting memUsageSetting) throws IOException |
| { |
| return load(file, password, null, null, memUsageSetting); |
| } |
| |
| /** |
| * Parses a PDF. Unrestricted main memory will be used for buffering PDF streams. |
| * |
| * @param file file to be loaded |
| * @param password password to be used for decryption |
| * @param keyStore key store to be used for decryption when using public key security |
| * @param alias alias to be used for decryption when using public key security |
| * |
| * @return loaded document |
| * |
| * @throws IOException in case of a file reading or parsing error |
| */ |
| public static PDDocument load(File file, String password, InputStream keyStore, String alias) |
| throws IOException |
| { |
| return load(file, password, keyStore, alias, MemoryUsageSetting.setupMainMemoryOnly()); |
| } |
| |
| /** |
| * Parses a PDF. |
| * |
| * @param file file to be loaded |
| * @param password password to be used for decryption |
| * @param keyStore key store to be used for decryption when using public key security |
| * @param alias alias to be used for decryption when using public key security |
| * @param memUsageSetting defines how memory is used for buffering PDF streams |
| * |
| * @return loaded document |
| * |
| * @throws IOException in case of a file reading or parsing error |
| */ |
| public static PDDocument load(File file, String password, InputStream keyStore, String alias, |
| MemoryUsageSetting memUsageSetting) throws IOException |
| { |
| RandomAccessBufferedFileInputStream raFile = new RandomAccessBufferedFileInputStream(file); |
| try |
| { |
| ScratchFile scratchFile = new ScratchFile(memUsageSetting); |
| try |
| { |
| PDFParser parser = new PDFParser(raFile, password, keyStore, alias, scratchFile); |
| parser.parse(); |
| return parser.getPDDocument(); |
| } |
| catch (IOException ioe) |
| { |
| IOUtils.closeQuietly(scratchFile); |
| throw ioe; |
| } |
| } |
| catch (IOException ioe) |
| { |
| IOUtils.closeQuietly(raFile); |
| throw ioe; |
| } |
| } |
| |
| /** |
| * Parses a PDF. The given input stream is copied to the memory to enable random access to the pdf. |
| * Unrestricted main memory will be used for buffering PDF streams. |
| * |
| * @param input stream that contains the document. |
| * |
| * @return loaded document |
| * |
| * @throws IOException in case of a file reading or parsing error |
| */ |
| public static PDDocument load(InputStream input) throws IOException |
| { |
| return load(input, "", null, null, MemoryUsageSetting.setupMainMemoryOnly()); |
| } |
| |
| /** |
| * Parses a PDF. Depending on the memory settings parameter the given input |
| * stream is either copied to main memory or to a temporary file to enable |
| * random access to the pdf. |
| * |
| * @param input stream that contains the document. |
| * @param memUsageSetting defines how memory is used for buffering input stream and PDF streams |
| * |
| * @return loaded document |
| * |
| * @throws IOException in case of a file reading or parsing error |
| */ |
| public static PDDocument load(InputStream input, MemoryUsageSetting memUsageSetting) throws IOException |
| { |
| return load(input, "", null, null, memUsageSetting); |
| } |
| |
| /** |
| * Parses a PDF. The given input stream is copied to the memory to enable random access to the pdf. |
| * Unrestricted main memory will be used for buffering PDF streams. |
| * |
| * @param input stream that contains the document. |
| * @param password password to be used for decryption |
| * |
| * @return loaded document |
| * |
| * @throws IOException in case of a file reading or parsing error |
| */ |
| public static PDDocument load(InputStream input, String password) |
| throws IOException |
| { |
| return load(input, password, null, null, MemoryUsageSetting.setupMainMemoryOnly()); |
| } |
| |
| /** |
| * Parses a PDF. The given input stream is copied to the memory to enable random access to the pdf. |
| * Unrestricted main memory will be used for buffering PDF streams. |
| * |
| * @param input stream that contains the document. |
| * @param password password to be used for decryption |
| * @param keyStore key store to be used for decryption when using public key security |
| * @param alias alias to be used for decryption when using public key security |
| * |
| * @return loaded document |
| * |
| * @throws IOException in case of a file reading or parsing error |
| */ |
| public static PDDocument load(InputStream input, String password, InputStream keyStore, String alias) |
| throws IOException |
| { |
| return load(input, password, keyStore, alias, MemoryUsageSetting.setupMainMemoryOnly()); |
| } |
| |
| /** |
| * Parses a PDF. Depending on the memory settings parameter the given input |
| * stream is either copied to main memory or to a temporary file to enable |
| * random access to the pdf. |
| * |
| * @param input stream that contains the document. |
| * @param password password to be used for decryption |
| * @param memUsageSetting defines how memory is used for buffering input stream and PDF streams |
| * |
| * @return loaded document |
| * |
| * @throws IOException in case of a file reading or parsing error |
| */ |
| public static PDDocument load(InputStream input, String password, MemoryUsageSetting memUsageSetting) |
| throws IOException |
| { |
| return load(input, password, null, null, memUsageSetting); |
| } |
| |
| /** |
| * Parses a PDF. Depending on the memory settings parameter the given input |
| * stream is either copied to memory or to a temporary file to enable |
| * random access to the pdf. |
| * |
| * @param input stream that contains the document. |
| * @param password password to be used for decryption |
| * @param keyStore key store to be used for decryption when using public key security |
| * @param alias alias to be used for decryption when using public key security |
| * @param memUsageSetting defines how memory is used for buffering input stream and PDF streams |
| * |
| * @return loaded document |
| * |
| * @throws IOException in case of a file reading or parsing error |
| */ |
| public static PDDocument load(InputStream input, String password, InputStream keyStore, |
| String alias, MemoryUsageSetting memUsageSetting) throws IOException |
| { |
| ScratchFile scratchFile = new ScratchFile(memUsageSetting); |
| try |
| { |
| RandomAccessRead source = scratchFile.createBuffer(input); |
| PDFParser parser = new PDFParser(source, password, keyStore, alias, scratchFile); |
| parser.parse(); |
| return parser.getPDDocument(); |
| } |
| catch (IOException ioe) |
| { |
| IOUtils.closeQuietly(scratchFile); |
| throw ioe; |
| } |
| } |
| |
| /** |
| * Parses a PDF. Unrestricted main memory will be used for buffering PDF streams. |
| * |
| * @param input byte array that contains the document. |
| * |
| * @return loaded document |
| * |
| * @throws IOException in case of a file reading or parsing error |
| */ |
| public static PDDocument load(byte[] input) throws IOException |
| { |
| return load(input, ""); |
| } |
| |
| /** |
| * Parses a PDF. Unrestricted main memory will be used for buffering PDF streams. |
| * |
| * @param input byte array that contains the document. |
| * @param password password to be used for decryption |
| * |
| * @return loaded document |
| * |
| * @throws IOException in case of a file reading or parsing error |
| */ |
| public static PDDocument load(byte[] input, String password) throws IOException |
| { |
| return load(input, password, null, null); |
| } |
| |
| /** |
| * Parses a PDF. Unrestricted main memory will be used for buffering PDF streams. |
| * |
| * @param input byte array that contains the document. |
| * @param password password to be used for decryption |
| * @param keyStore key store to be used for decryption when using public key security |
| * @param alias alias to be used for decryption when using public key security |
| * |
| * @return loaded document |
| * |
| * @throws IOException in case of a file reading or parsing error |
| */ |
| public static PDDocument load(byte[] input, String password, InputStream keyStore, |
| String alias) throws IOException |
| { |
| return load(input, password, keyStore, alias, MemoryUsageSetting.setupMainMemoryOnly()); |
| } |
| |
| /** |
| * Parses a PDF. |
| * |
| * @param input byte array that contains the document. |
| * @param password password to be used for decryption |
| * @param keyStore key store to be used for decryption when using public key security |
| * @param alias alias to be used for decryption when using public key security |
| * @param memUsageSetting defines how memory is used for buffering input stream and PDF streams |
| * |
| * @return loaded document |
| * |
| * @throws IOException in case of a file reading or parsing error |
| */ |
| public static PDDocument load(byte[] input, String password, InputStream keyStore, |
| String alias, MemoryUsageSetting memUsageSetting) throws IOException |
| { |
| ScratchFile scratchFile = new ScratchFile(memUsageSetting); |
| RandomAccessRead source = new RandomAccessBuffer(input); |
| PDFParser parser = new PDFParser(source, password, keyStore, alias, scratchFile); |
| parser.parse(); |
| return parser.getPDDocument(); |
| } |
| |
| /** |
| * Save the document to a file. |
| * |
| * @param fileName The file to save as. |
| * |
| * @throws IOException if the output could not be written |
| */ |
| public void save(String fileName) throws IOException |
| { |
| save(new File(fileName)); |
| } |
| |
| /** |
| * Save the document to a file. |
| * |
| * @param file The file to save as. |
| * |
| * @throws IOException if the output could not be written |
| */ |
| public void save(File file) throws IOException |
| { |
| save(new BufferedOutputStream(new FileOutputStream(file))); |
| } |
| |
| /** |
| * This will save the document to an output stream. |
| * |
| * @param output The stream to write to. |
| * |
| * @throws IOException if the output could not be written |
| */ |
| public void save(OutputStream output) throws IOException |
| { |
| if (document.isClosed()) |
| { |
| throw new IOException("Cannot save a document which has been closed"); |
| } |
| |
| // subset designated fonts |
| for (PDFont font : fontsToSubset) |
| { |
| font.subset(); |
| } |
| fontsToSubset.clear(); |
| |
| // save PDF |
| COSWriter writer = new COSWriter(output); |
| try |
| { |
| writer.write(this); |
| writer.close(); |
| } |
| finally |
| { |
| writer.close(); |
| } |
| } |
| |
| /** |
| * Save the PDF as an incremental update. This is only possible if the PDF was loaded from a |
| * file or a stream, not if the document was created in PDFBox itself. |
| * |
| * @param output stream to write |
| * @throws IOException if the output could not be written |
| * @throws IllegalStateException if the document was not loaded from a file or a stream. |
| */ |
| public void saveIncremental(OutputStream output) throws IOException |
| { |
| COSWriter writer = null; |
| try |
| { |
| if (pdfSource == null) |
| { |
| throw new IllegalStateException("document was not loaded from a file or a stream"); |
| } |
| writer = new COSWriter(output, pdfSource); |
| writer.write(this, signInterface); |
| writer.close(); |
| } |
| finally |
| { |
| if (writer != null) |
| { |
| writer.close(); |
| } |
| } |
| } |
| |
| /** |
| * <p> |
| * <b>(This is a new feature for 2.0.3. The API for external signing might change based on feedback after release!)</b> |
| * <p> |
| * Save PDF incrementally without closing for external signature creation scenario. The general |
| * sequence is: |
| * <pre> |
| * PDDocument pdDocument = ...; |
| * OutputStream outputStream = ...; |
| * SignatureOptions signatureOptions = ...; // options to specify fine tuned signature options or null for defaults |
| * PDSignature pdSignature = ...; |
| * |
| * // add signature parameters to be used when creating signature dictionary |
| * pdDocument.addSignature(pdSignature, signatureOptions); |
| * // prepare PDF for signing and obtain helper class to be used |
| * ExternalSigningSupport externalSigningSupport = pdDocument.saveIncrementalForExternalSigning(outputStream); |
| * // get data to be signed |
| * InputStream dataToBeSigned = externalSigningSupport.getContent(); |
| * // invoke signature service |
| * byte[] signature = sign(dataToBeSigned); |
| * // set resulted CMS signature |
| * externalSigningSupport.setSignature(signature); |
| * |
| * // last step is to close the document |
| * pdDocument.close(); |
| * </pre> |
| * <p> |
| * Note that after calling this method, only {@code close()} method may invoked for |
| * {@code PDDocument} instance and only AFTER {@link ExternalSigningSupport} instance is used. |
| * </p> |
| * |
| * @param output stream to write final PDF |
| * @return instance to be used for external signing and setting CMS signature |
| * @throws IOException if the output could not be written |
| * @throws IllegalStateException if the document was not loaded from a file or a stream or |
| * signature optionss were not set. |
| */ |
| public ExternalSigningSupport saveIncrementalForExternalSigning(OutputStream output) throws IOException |
| { |
| if (pdfSource == null) |
| { |
| throw new IllegalStateException("document was not loaded from a file or a stream"); |
| } |
| COSWriter writer = new COSWriter(output, pdfSource); |
| writer.write(this); |
| signingSupport = new SigningSupport(writer); |
| return signingSupport; |
| } |
| |
| /** |
| * Returns the page at the given index. |
| * |
| * @param pageIndex the page index |
| * @return the page at the given index. |
| */ |
| public PDPage getPage(int pageIndex) // todo: REPLACE most calls to this method with BELOW method |
| { |
| return getDocumentCatalog().getPages().get(pageIndex); |
| } |
| |
| /** |
| * Returns the page tree. |
| * |
| * @return the page tree |
| */ |
| public PDPageTree getPages() |
| { |
| return getDocumentCatalog().getPages(); |
| } |
| |
| /** |
| * This will return the total page count of the PDF document. |
| * |
| * @return The total number of pages in the PDF document. |
| */ |
| public int getNumberOfPages() |
| { |
| return getDocumentCatalog().getPages().getCount(); |
| } |
| |
| /** |
| * This will close the underlying COSDocument object. |
| * |
| * @throws IOException If there is an error releasing resources. |
| */ |
| @Override |
| public void close() throws IOException |
| { |
| if (!document.isClosed()) |
| { |
| // close resources and COSWriter |
| if (signingSupport != null) |
| { |
| signingSupport.close(); |
| } |
| |
| // close all intermediate I/O streams |
| document.close(); |
| |
| // close the source PDF stream, if we read from one |
| if (pdfSource != null) |
| { |
| pdfSource.close(); |
| } |
| } |
| } |
| |
| /** |
| * Protects the document with a protection policy. The document content will be really |
| * encrypted when it will be saved. This method only marks the document for encryption. It also |
| * calls {@link #setAllSecurityToBeRemoved(boolean)} with a false argument if it was set to true |
| * previously and logs a warning. |
| * |
| * @see org.apache.pdfbox.pdmodel.encryption.StandardProtectionPolicy |
| * @see org.apache.pdfbox.pdmodel.encryption.PublicKeyProtectionPolicy |
| * |
| * @param policy The protection policy. |
| * @throws IOException if there isn't any suitable security handler. |
| */ |
| public void protect(ProtectionPolicy policy) throws IOException |
| { |
| if (isAllSecurityToBeRemoved()) |
| { |
| LOG.warn("do not call setAllSecurityToBeRemoved(true) before calling protect(), " |
| + "as protect() implies setAllSecurityToBeRemoved(false)"); |
| setAllSecurityToBeRemoved(false); |
| } |
| |
| if (!isEncrypted()) |
| { |
| encryption = new PDEncryption(); |
| } |
| |
| SecurityHandler securityHandler = SecurityHandlerFactory.INSTANCE.newSecurityHandlerForPolicy(policy); |
| if (securityHandler == null) |
| { |
| throw new IOException("No security handler for policy " + policy); |
| } |
| |
| getEncryption().setSecurityHandler(securityHandler); |
| } |
| |
| /** |
| * Returns the access permissions granted when the document was decrypted. If the document was not decrypted this |
| * method returns the access permission for a document owner (ie can do everything). The returned object is in read |
| * only mode so that permissions cannot be changed. Methods providing access to content should rely on this object |
| * to verify if the current user is allowed to proceed. |
| * |
| * @return the access permissions for the current user on the document. |
| */ |
| public AccessPermission getCurrentAccessPermission() |
| { |
| if (accessPermission == null) |
| { |
| accessPermission = AccessPermission.getOwnerAccessPermission(); |
| } |
| return accessPermission; |
| } |
| |
| /** |
| * Indicates if all security is removed or not when writing the pdf. |
| * |
| * @return returns true if all security shall be removed otherwise false |
| */ |
| public boolean isAllSecurityToBeRemoved() |
| { |
| return allSecurityToBeRemoved; |
| } |
| |
| /** |
| * Activates/Deactivates the removal of all security when writing the pdf. |
| * |
| * @param removeAllSecurity remove all security if set to true |
| */ |
| public void setAllSecurityToBeRemoved(boolean removeAllSecurity) |
| { |
| allSecurityToBeRemoved = removeAllSecurity; |
| } |
| |
| /** |
| * Provides the document ID. |
| * |
| * @return the dcoument ID |
| */ |
| public Long getDocumentId() |
| { |
| return documentId; |
| } |
| |
| /** |
| * Sets the document ID to the given value. |
| * |
| * @param docId the new document ID |
| */ |
| public void setDocumentId(Long docId) |
| { |
| documentId = docId; |
| } |
| |
| /** |
| * Returns the PDF specification version this document conforms to. |
| * |
| * @return the PDF version (e.g. 1.4f) |
| */ |
| public float getVersion() |
| { |
| float headerVersionFloat = getDocument().getVersion(); |
| // there may be a second version information in the document catalog starting with 1.4 |
| if (headerVersionFloat >= 1.4f) |
| { |
| String catalogVersion = getDocumentCatalog().getVersion(); |
| float catalogVersionFloat = -1; |
| if (catalogVersion != null) |
| { |
| try |
| { |
| catalogVersionFloat = Float.parseFloat(catalogVersion); |
| } |
| catch(NumberFormatException exception) |
| { |
| LOG.error("Can't extract the version number of the document catalog.", exception); |
| } |
| } |
| // the most recent version is the correct one |
| return Math.max(catalogVersionFloat, headerVersionFloat); |
| } |
| else |
| { |
| return headerVersionFloat; |
| } |
| } |
| |
| /** |
| * Sets the PDF specification version for this document. |
| * |
| * @param newVersion the new PDF version (e.g. 1.4f) |
| * |
| */ |
| public void setVersion(float newVersion) |
| { |
| float currentVersion = getVersion(); |
| // nothing to do? |
| if (newVersion == currentVersion) |
| { |
| return; |
| } |
| // the version can't be downgraded |
| if (newVersion < currentVersion) |
| { |
| LOG.error("It's not allowed to downgrade the version of a pdf."); |
| return; |
| } |
| // update the catalog version if the document version is >= 1.4 |
| if (getDocument().getVersion() >= 1.4f) |
| { |
| getDocumentCatalog().setVersion(Float.toString(newVersion)); |
| } |
| else |
| { |
| // versions < 1.4f have a version header only |
| getDocument().setVersion(newVersion); |
| } |
| } |
| |
| /** |
| * Returns the resource cache associated with this document, or null if there is none. |
| */ |
| public ResourceCache getResourceCache() |
| { |
| return resourceCache; |
| } |
| |
| /** |
| * Sets the resource cache associated with this document. |
| * |
| * @param resourceCache A resource cache, or null. |
| */ |
| public void setResourceCache(ResourceCache resourceCache) |
| { |
| this.resourceCache = resourceCache; |
| } |
| } |