FOP-2645: Deduplicate PDF streams

git-svn-id: https://svn.apache.org/repos/asf/xmlgraphics/fop-pdf-images/trunk@1759382 13f79535-47bb-0310-9956-ffa450edef68
diff --git a/src/java/org/apache/fop/render/pdf/pdfbox/AbstractPDFBoxHandler.java b/src/java/org/apache/fop/render/pdf/pdfbox/AbstractPDFBoxHandler.java
index b9e1f0e..baeb42b 100644
--- a/src/java/org/apache/fop/render/pdf/pdfbox/AbstractPDFBoxHandler.java
+++ b/src/java/org/apache/fop/render/pdf/pdfbox/AbstractPDFBoxHandler.java
@@ -91,7 +91,7 @@
             getEventProducer(eventBroadcaster).pdfXActive(this);
         }
 
-        Map<Object, Object> objectCache = getObjectCache(originalImageUri, userAgent);
+        Map<Object, Object> objectCachePerFile = getObjectCache(getImagePath(originalImageUri), userAgent);
 
         PDPage page = pddoc.getDocumentCatalog().getPages().get(selectedPage);
 
@@ -102,7 +102,9 @@
             targetPage.put("Resources", res);
         }
 
-        PDFBoxAdapter adapter = new PDFBoxAdapter(targetPage, objectCache, pageNumbers);
+        Map<Object, Object> objectCache = getObjectCache(getClass().getName(), userAgent);
+        PDFBoxAdapter adapter =
+                new PDFBoxAdapter(targetPage, objectCachePerFile, pageNumbers, objectCache);
         if (handler != null) {
             adapter.setCurrentMCID(handler.getPageParentTree().length());
         }
@@ -115,9 +117,8 @@
         return stream;
     }
 
-    private Map<Object, Object> getObjectCache(String originalImageUri, FOUserAgent userAgent) {
+    private Map<Object, Object> getObjectCache(String path, FOUserAgent userAgent) {
         SoftMapCache objectCache = userAgent.getPDFObjectCache();
-        String path = getImagePath(originalImageUri);
         if (objectCache.get(path) == null) {
             objectCache.put(path, new HashMap<Object, Object>());
         }
diff --git a/src/java/org/apache/fop/render/pdf/pdfbox/PDFBoxAdapter.java b/src/java/org/apache/fop/render/pdf/pdfbox/PDFBoxAdapter.java
index e3b1522..213fe5a 100644
--- a/src/java/org/apache/fop/render/pdf/pdfbox/PDFBoxAdapter.java
+++ b/src/java/org/apache/fop/render/pdf/pdfbox/PDFBoxAdapter.java
@@ -93,7 +93,8 @@
     private final PDFPage targetPage;
     private final PDFDocument pdfDoc;
 
-    private final Map clonedVersion;
+    private final Map<Object, Object> clonedVersion;
+    private final Map<Object, Object> objectCache;
     private Map<COSName, String> newXObj = new HashMap<COSName, String>();
     private Map<Integer, PDFArray> pageNumbers;
     private Collection<String> parentFonts = new ArrayList<String>();
@@ -103,14 +104,21 @@
     /**
      * Creates a new PDFBoxAdapter.
      * @param targetPage The target FOP PDF page object
-     * @param objectCache the object cache for reusing objects shared by multiple pages.
+     * @param objectCachePerFile the object cache for reusing objects shared by multiple pages.
      * @param pageNumbers references to page object numbers
      */
-    public PDFBoxAdapter(PDFPage targetPage, Map objectCache, Map<Integer, PDFArray> pageNumbers) {
+    public PDFBoxAdapter(PDFPage targetPage, Map<Object, Object> objectCachePerFile,
+                         Map<Integer, PDFArray> pageNumbers) {
+        this(targetPage, objectCachePerFile, pageNumbers, new HashMap<Object, Object>());
+    }
+
+    public PDFBoxAdapter(PDFPage targetPage, Map<Object, Object> objectCachePerFile,
+                         Map<Integer, PDFArray> pageNumbers, Map<Object, Object> objectCache) {
         this.targetPage = targetPage;
         this.pdfDoc = this.targetPage.getDocument();
-        this.clonedVersion = objectCache;
+        this.clonedVersion = objectCachePerFile;
         this.pageNumbers = pageNumbers;
+        this.objectCache = objectCache;
     }
 
     public PDFPage getTargetPage() {
@@ -239,7 +247,7 @@
         return obj;
     }
 
-    private Object readCOSString(COSString string, Object keyBase) {
+    private Object readCOSString(COSString string, Object keyBase) throws IOException {
         //retval = ((COSString)base).getString(); //this is unsafe for binary content
         byte[] bytes = string.getBytes();
         //Be on the safe side and use the byte array to avoid encoding problems
@@ -275,11 +283,16 @@
         return cacheClonedObject(keyBase, stream);
     }
 
-    protected Object getCachedClone(Object base) {
-        return clonedVersion.get(getBaseKey(base));
+    protected Object getCachedClone(Object base) throws IOException {
+        Object key = getBaseKey(base);
+        Object o = clonedVersion.get(key);
+        if (o == null) {
+            return objectCache.get(key);
+        }
+        return o;
     }
 
-    protected Object cacheClonedObject(Object base, Object cloned) {
+    protected Object cacheClonedObject(Object base, Object cloned) throws IOException {
         Object key = getBaseKey(base);
         if (key == null) {
             return cloned;
@@ -293,12 +306,22 @@
             }
         }
         clonedVersion.put(key, cloned);
+        if (key instanceof Integer) {
+            objectCache.put(key, cloned);
+        }
         return cloned;
     }
 
-    private Object getBaseKey(Object base) {
+    private Object getBaseKey(Object base) throws IOException {
         if (base instanceof COSObject) {
             COSObject obj = (COSObject)base;
+            COSBase o = obj.getObject();
+            if (o instanceof COSStream) {
+                Integer hash = getStreamHash((COSStream) o);
+                if (hash != null) {
+                    return hash;
+                }
+            }
             return obj.getObjectNumber() + " " + obj.getGenerationNumber();
         } else if (base instanceof COSDictionary) {
             return base;
@@ -307,6 +330,17 @@
         }
     }
 
+    private Integer getStreamHash(COSStream o) throws IOException {
+        for (COSBase x : o.getValues()) {
+            if (x instanceof COSObject || x instanceof COSDictionary) {
+                return null;
+            }
+        }
+        InputStream stream = o.getFilteredStream();
+        byte[] b = IOUtils.toByteArray(stream);
+        return Arrays.deepHashCode(new Object[]{b, o.toString()});
+    }
+
     private void transferDict(COSDictionary orgDict, PDFStream targetDict, Set filter) throws IOException {
         transferDict(orgDict, targetDict, filter, false);
     }
diff --git a/test/java/org/apache/fop/render/pdf/PDFBoxAdapterTestCase.java b/test/java/org/apache/fop/render/pdf/PDFBoxAdapterTestCase.java
index 8d67f93..f956127 100644
--- a/test/java/org/apache/fop/render/pdf/PDFBoxAdapterTestCase.java
+++ b/test/java/org/apache/fop/render/pdf/PDFBoxAdapterTestCase.java
@@ -71,6 +71,7 @@
 import org.apache.fop.pdf.PDFGState;
 import org.apache.fop.pdf.PDFPage;
 import org.apache.fop.pdf.PDFResources;
+import org.apache.fop.pdf.PDFStream;
 import org.apache.fop.render.pdf.pdfbox.FOPPDFMultiByteFont;
 import org.apache.fop.render.pdf.pdfbox.FOPPDFSingleByteFont;
 import org.apache.fop.render.pdf.pdfbox.ImageConverterPDF2G2D;
@@ -511,4 +512,23 @@
         res.output(bos);
         Assert.assertTrue(bos.toString("UTF-8").contains("/ExtGState << /GS1"));
     }
+
+    @Test
+    public void testPDFCache() throws IOException {
+        PDFDocument pdfdoc = new PDFDocument("");
+        PDFPage pdfpage = new PDFPage(new PDFResources(pdfdoc), 0, r, r, r, r);
+        pdfdoc.assignObjectNumber(pdfpage);
+        pdfpage.setDocument(pdfdoc);
+        Map<Object, Object> pdfCache = new HashMap<Object, Object>();
+        PDFBoxAdapter adapter = new PDFBoxAdapter(
+                pdfpage, new HashMap<Object, Object>(), new HashMap<Integer, PDFArray>(), pdfCache);
+        PDDocument doc = PDDocument.load(new File(LOOP));
+        PDPage page = doc.getDocumentCatalog().getPages().get(0);
+        adapter.createStreamFromPDFBoxPage(doc, page, "key", new AffineTransform(), null, new Rectangle());
+        doc.close();
+
+        Object item = pdfCache.values().iterator().next();
+        Assert.assertEquals(item.getClass(), PDFStream.class);
+        Assert.assertEquals(pdfCache.size(), 11);
+    }
 }