FOP-2645: Deduplicate PDF streams
git-svn-id: https://svn.apache.org/repos/asf/xmlgraphics/fop-pdf-images/trunk@1759382 13f79535-47bb-0310-9956-ffa450edef68
diff --git a/src/java/org/apache/fop/render/pdf/pdfbox/AbstractPDFBoxHandler.java b/src/java/org/apache/fop/render/pdf/pdfbox/AbstractPDFBoxHandler.java
index b9e1f0e..baeb42b 100644
--- a/src/java/org/apache/fop/render/pdf/pdfbox/AbstractPDFBoxHandler.java
+++ b/src/java/org/apache/fop/render/pdf/pdfbox/AbstractPDFBoxHandler.java
@@ -91,7 +91,7 @@
getEventProducer(eventBroadcaster).pdfXActive(this);
}
- Map<Object, Object> objectCache = getObjectCache(originalImageUri, userAgent);
+ Map<Object, Object> objectCachePerFile = getObjectCache(getImagePath(originalImageUri), userAgent);
PDPage page = pddoc.getDocumentCatalog().getPages().get(selectedPage);
@@ -102,7 +102,9 @@
targetPage.put("Resources", res);
}
- PDFBoxAdapter adapter = new PDFBoxAdapter(targetPage, objectCache, pageNumbers);
+ Map<Object, Object> objectCache = getObjectCache(getClass().getName(), userAgent);
+ PDFBoxAdapter adapter =
+ new PDFBoxAdapter(targetPage, objectCachePerFile, pageNumbers, objectCache);
if (handler != null) {
adapter.setCurrentMCID(handler.getPageParentTree().length());
}
@@ -115,9 +117,8 @@
return stream;
}
- private Map<Object, Object> getObjectCache(String originalImageUri, FOUserAgent userAgent) {
+ private Map<Object, Object> getObjectCache(String path, FOUserAgent userAgent) {
SoftMapCache objectCache = userAgent.getPDFObjectCache();
- String path = getImagePath(originalImageUri);
if (objectCache.get(path) == null) {
objectCache.put(path, new HashMap<Object, Object>());
}
diff --git a/src/java/org/apache/fop/render/pdf/pdfbox/PDFBoxAdapter.java b/src/java/org/apache/fop/render/pdf/pdfbox/PDFBoxAdapter.java
index e3b1522..213fe5a 100644
--- a/src/java/org/apache/fop/render/pdf/pdfbox/PDFBoxAdapter.java
+++ b/src/java/org/apache/fop/render/pdf/pdfbox/PDFBoxAdapter.java
@@ -93,7 +93,8 @@
private final PDFPage targetPage;
private final PDFDocument pdfDoc;
- private final Map clonedVersion;
+ private final Map<Object, Object> clonedVersion;
+ private final Map<Object, Object> objectCache;
private Map<COSName, String> newXObj = new HashMap<COSName, String>();
private Map<Integer, PDFArray> pageNumbers;
private Collection<String> parentFonts = new ArrayList<String>();
@@ -103,14 +104,21 @@
/**
* Creates a new PDFBoxAdapter.
* @param targetPage The target FOP PDF page object
- * @param objectCache the object cache for reusing objects shared by multiple pages.
+ * @param objectCachePerFile the object cache for reusing objects shared by multiple pages.
* @param pageNumbers references to page object numbers
*/
- public PDFBoxAdapter(PDFPage targetPage, Map objectCache, Map<Integer, PDFArray> pageNumbers) {
+ public PDFBoxAdapter(PDFPage targetPage, Map<Object, Object> objectCachePerFile,
+ Map<Integer, PDFArray> pageNumbers) {
+ this(targetPage, objectCachePerFile, pageNumbers, new HashMap<Object, Object>());
+ }
+
+ public PDFBoxAdapter(PDFPage targetPage, Map<Object, Object> objectCachePerFile,
+ Map<Integer, PDFArray> pageNumbers, Map<Object, Object> objectCache) {
this.targetPage = targetPage;
this.pdfDoc = this.targetPage.getDocument();
- this.clonedVersion = objectCache;
+ this.clonedVersion = objectCachePerFile;
this.pageNumbers = pageNumbers;
+ this.objectCache = objectCache;
}
public PDFPage getTargetPage() {
@@ -239,7 +247,7 @@
return obj;
}
- private Object readCOSString(COSString string, Object keyBase) {
+ private Object readCOSString(COSString string, Object keyBase) throws IOException {
//retval = ((COSString)base).getString(); //this is unsafe for binary content
byte[] bytes = string.getBytes();
//Be on the safe side and use the byte array to avoid encoding problems
@@ -275,11 +283,16 @@
return cacheClonedObject(keyBase, stream);
}
- protected Object getCachedClone(Object base) {
- return clonedVersion.get(getBaseKey(base));
+ protected Object getCachedClone(Object base) throws IOException {
+ Object key = getBaseKey(base);
+ Object o = clonedVersion.get(key);
+ if (o == null) {
+ return objectCache.get(key);
+ }
+ return o;
}
- protected Object cacheClonedObject(Object base, Object cloned) {
+ protected Object cacheClonedObject(Object base, Object cloned) throws IOException {
Object key = getBaseKey(base);
if (key == null) {
return cloned;
@@ -293,12 +306,22 @@
}
}
clonedVersion.put(key, cloned);
+ if (key instanceof Integer) {
+ objectCache.put(key, cloned);
+ }
return cloned;
}
- private Object getBaseKey(Object base) {
+ private Object getBaseKey(Object base) throws IOException {
if (base instanceof COSObject) {
COSObject obj = (COSObject)base;
+ COSBase o = obj.getObject();
+ if (o instanceof COSStream) {
+ Integer hash = getStreamHash((COSStream) o);
+ if (hash != null) {
+ return hash;
+ }
+ }
return obj.getObjectNumber() + " " + obj.getGenerationNumber();
} else if (base instanceof COSDictionary) {
return base;
@@ -307,6 +330,17 @@
}
}
+ private Integer getStreamHash(COSStream o) throws IOException {
+ for (COSBase x : o.getValues()) {
+ if (x instanceof COSObject || x instanceof COSDictionary) {
+ return null;
+ }
+ }
+ InputStream stream = o.getFilteredStream();
+ byte[] b = IOUtils.toByteArray(stream);
+ return Arrays.deepHashCode(new Object[]{b, o.toString()});
+ }
+
private void transferDict(COSDictionary orgDict, PDFStream targetDict, Set filter) throws IOException {
transferDict(orgDict, targetDict, filter, false);
}
diff --git a/test/java/org/apache/fop/render/pdf/PDFBoxAdapterTestCase.java b/test/java/org/apache/fop/render/pdf/PDFBoxAdapterTestCase.java
index 8d67f93..f956127 100644
--- a/test/java/org/apache/fop/render/pdf/PDFBoxAdapterTestCase.java
+++ b/test/java/org/apache/fop/render/pdf/PDFBoxAdapterTestCase.java
@@ -71,6 +71,7 @@
import org.apache.fop.pdf.PDFGState;
import org.apache.fop.pdf.PDFPage;
import org.apache.fop.pdf.PDFResources;
+import org.apache.fop.pdf.PDFStream;
import org.apache.fop.render.pdf.pdfbox.FOPPDFMultiByteFont;
import org.apache.fop.render.pdf.pdfbox.FOPPDFSingleByteFont;
import org.apache.fop.render.pdf.pdfbox.ImageConverterPDF2G2D;
@@ -511,4 +512,23 @@
res.output(bos);
Assert.assertTrue(bos.toString("UTF-8").contains("/ExtGState << /GS1"));
}
+
+ @Test
+ public void testPDFCache() throws IOException {
+ PDFDocument pdfdoc = new PDFDocument("");
+ PDFPage pdfpage = new PDFPage(new PDFResources(pdfdoc), 0, r, r, r, r);
+ pdfdoc.assignObjectNumber(pdfpage);
+ pdfpage.setDocument(pdfdoc);
+ Map<Object, Object> pdfCache = new HashMap<Object, Object>();
+ PDFBoxAdapter adapter = new PDFBoxAdapter(
+ pdfpage, new HashMap<Object, Object>(), new HashMap<Integer, PDFArray>(), pdfCache);
+ PDDocument doc = PDDocument.load(new File(LOOP));
+ PDPage page = doc.getDocumentCatalog().getPages().get(0);
+ adapter.createStreamFromPDFBoxPage(doc, page, "key", new AffineTransform(), null, new Rectangle());
+ doc.close();
+
+ Object item = pdfCache.values().iterator().next();
+ Assert.assertEquals(item.getClass(), PDFStream.class);
+ Assert.assertEquals(pdfCache.size(), 11);
+ }
}