blob: cf1ccefb7e4424c3744c5db76154822f389e2def [file] [log] [blame]
<!DOCTYPE html>
<html lang="en">
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE- 2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<head>
<meta charset="utf-8">
<meta http-equiv="X-UA-Compatible" content="IE=edge">
<meta name="viewport" content="width=device-width, initial-scale=1">
<meta name="description" content="The Apache PDFBox™ library is an open source Java tool for working with PDF documents. This project allows creation of new PDF documents, manipulation of existing documents and the ability to extract content from documents. Apache PDFBox also includes several command-line utilities. Apache PDFBox is published under the Apache License v2.0.">
<title>Apache PDFBox | PDFBox 3.0 Migration Guide</title>
<link href="/bootstrap/css/bootstrap.min.css" rel="stylesheet">
<link href="/css/prism.css" rel="stylesheet">
<link href="/css/styles.css" rel="stylesheet">
</head>
<body>
<header class="main-header">
<div class="main-header-logo">
<a href="/" aria-label="Navigation to the PDFBox home page">
<svg focusable="false" class="pdfbox-brand-toolbox" viewBox="0 0 744.09448819 1052.3622047" xmlns="http://www.w3.org/2000/svg" aria-labelledby="brandImageTitle brandImageDesc" role="img">
<title id="brandImageTitle">PDFBox Brand Logo</title>
<desc id="brandImageDesc">The PDFBox logo showing a toolbox.</desc>
<g transform="matrix(1.25 0 0 -1.25 -317.14 1018.08)" clip-path="url(#clipPath3375)">
<path d="M821.924 376.535L463.24 122.525l-203.83 76.86c23.89 6.02 46.87 15.197 68.335 27.29 60.063 33.835 105.686 88.46 128.282 153.59 2.634 4.66 8.11 6.92 13.265 5.47 4.667-1.31 8.01-5.41 8.353-10.247l-3.572-188.12 334.99 193.957c2.41 1 5.113 1.028 7.54.075 2.315-.907 4.21-2.64 5.32-4.865zm-1.307 97.91l.13-78.324c-.87-2.72-3.342-4.61-6.197-4.75-3.104-.14-5.924 1.8-6.893 4.75l1.29 79.54c.386 2.92 2.893 5.09 5.835 5.04 3.37-.04 6.022-2.89 5.835-6.25z"/>
<path d="M751.88 651.666c-.237.002-.48-.022-.723-.077l-363.512-25.15c-10.37-.73-19.583-6.76-24.588-15.87-6.943-12.64-4.677-28.26 5.864-37.72 2.85-2.56 6.22-4.49 8.97-7.16 2.19-2.12 3.93-4.65 5.12-7.45 23.7-26.86 30.3-64.76 17.08-98.05-4.75-11.96-11.97-22.77-21.2-31.73l-73.74 16.1 46.69 115.89c2.01 3.07.64 7.21-2.8 8.49-2.8 1.03-5.91-.34-7.03-3.1L286.49 448.8l-11.79 4.32c-2.097.28-4.21-.39-5.76-1.825-1.16-1.077-1.925-2.516-2.16-4.083l-11.53-189.68c1.66-1.83 4.03-2.85 6.5-2.798 2.533.05 4.913 1.22 6.5 3.197 6.22 22.46 11.216 45.23 14.96 68.22 5.137 31.55 7.91 63.43 8.3 95.39l136.152-15.28c2.396-1.09 5.032-1.56 7.66-1.35 2.75.22 5.396 1.16 7.663 2.73 41.62 19.22 83.375 38.15 125.26 56.79 41.615 18.52 83.36 36.75 125.23 54.68 1.935-.16 3.703-1.16 4.843-2.73 1.266-1.74 1.618-3.99.948-6.04-3.8-3.45-7.333-7.19-10.575-11.17-3.5-4.3-6.65-8.88-9.42-13.69.11-.34.22-.68.35-1.01.32-.85.7-1.68 1.25-2.41 2.42-3.16 7.04-3.55 9.96-.84 8.66 9.71 19.21 17.557 31.01 23.05 10.67 4.97 22.17 7.927 33.92 8.717 2.58 1.626 4.23 4.39 4.43 7.437.14 2.06-.41 4.103-1.56 5.815-3.63-.24-7.27.26-10.71 1.47-3.55 1.25-6.81 3.23-9.55 5.815l19.55 78.14c-1.4 1.684-3.13 3.07-5.08 4.06-2.28 1.16-4.81 1.76-7.37 1.746-53.05-4.71-106.21-8.08-159.433-10.11-53.34-2.035-106.73-2.72-160.1-2.056-3.79-.575-6.82 3.092-5.54 6.7.63 1.784 2.31 2.98 4.2 2.996l321.783 26.06c2.094.873 3.99 1.933 5.74 3.18 1.46 1.036 2.987 2.365 2.947 4.34-.034 1.75-1.484 3.09-3.154 3.11zm-23.61-56.02c.952-.32 1.776-.934 2.354-1.755.472-.67.76-1.45.833-2.27l-12.96-35.37c-2.706-3.39-5.87-6.4-9.404-8.92-3.324-2.37-6.945-4.3-10.77-5.73l-260.77-58.65-21.01 80.83 311.727 31.88z"/>
<path d="M786.68 627.94c.393-18.97 2.614-37.734 6.564-56.05 3.873-17.958 9.463-35.75 19.087-51.71 3.54-5.854 7.58-11.385 12.08-16.53 1.07-1.608 1.71-3.462 1.86-5.386.14-1.842-.16-3.69-.9-5.385-54.19-15.56-108.32-31.34-162.39-47.35-54.2-16.05-108.33-32.33-162.39-48.85-.23 1.43.04 2.9.78 4.15 1.07 1.81 2.96 2.99 5.06 3.13l271.54 104.43c-4.43 19.56-7.17 39.46-8.19 59.49-.99 19.68-.32 39.4 2.02 58.97.81 4.06 4.63 6.79 8.74 6.24 2.88-.39 5.27-2.4 6.15-5.16z"/>
</g>
</svg>
<svg focusable="false" class="pdfbox-brand-text" xmlns="http://www.w3.org/2000/svg" style="isolation:isolate" viewBox="0 0 109.81066666651577 30.943999999957384" aria-labelledby="brandTextTitle brandTextDesc" role="img">
<title id="brandTextTitle">PDFBox brand text</title>
<desc id="brandTextDesc">PDFBox, the brand text.</desc>
<path d="M0 .31h6.528q1.792 0 3.157.47 1.366.468 2.475 1.663 1.11 1.194 1.536 2.816.427 1.57.427 4.3 0 2-.256 3.45-.214 1.41-.982 2.64-.896 1.492-2.39 2.345-1.492.81-3.924.81H4.36v11.87H0V.305zm4.352 14.42h2.09q1.323 0 2.05-.383.724-.384 1.065-1.024.342-.683.384-1.622.09-.93.09-2.09 0-1.06-.08-2-.04-.98-.38-1.66-.3-.72-.98-1.11-.68-.43-1.96-.43H4.36v10.32z" fill-rule="evenodd"/>
<path d="M17.333.31h6.443q3.712 0 5.675 2.09 1.97 2.048 1.97 5.76v14.208q0 4.267-2.09 6.315-2.05 2.005-5.93 2.005h-6.06V.308zm4.352 26.282h2.006q1.84 0 2.61-.896.77-.94.77-2.9V8.16q0-1.792-.72-2.773-.72-.982-2.64-.982H21.7v22.187z" fill-rule="evenodd"/>
<path d="M35.583.31h12.97v4.095h-8.618v9.216h7.51v4.1h-7.51v12.97h-4.352V.31z"/>
<path d="M51.417.31h6.357q2.09 0 3.54.64 1.495.64 2.433 1.706.94 1.067 1.323 2.475.427 1.37.427 2.86V9.1q0 1.236-.214 2.09-.17.853-.554 1.493-.39.64-.94 1.152-.56.47-1.28.896 1.53.73 2.26 2.18.72 1.41.72 3.8v1.71q0 4.01-1.97 6.15-1.92 2.13-6.19 2.13H51.4V.31zm4.352 26.026h1.87q1.32 0 2.05-.384.77-.384 1.15-1.067.38-.682.47-1.62.08-.94.08-2.05 0-1.15-.13-2.004-.13-.85-.56-1.4-.386-.6-1.11-.89-.727-.3-1.92-.3h-1.92v9.73zm0-13.568h1.96q2.17 0 2.9-1.067.77-1.1.77-3.2 0-2.04-.86-3.07-.81-1.02-2.99-1.02h-1.79v8.37z" fill-rule="evenodd"/>
<path d="M69.027 16.31q0-1.323.17-2.433.17-1.11.64-1.962.768-1.408 2.22-2.262 1.45-.853 3.455-.853t3.456.853q1.45.854 2.22 2.262.468.853.64 1.962.17 1.11.17 2.432v7.12q0 1.32-.17 2.43-.172 1.11-.64 1.96-.77 1.4-2.22 2.26-1.45.85-3.456.85-2.005 0-3.456-.86-1.45-.854-2.22-2.26-.468-.855-.64-1.964-.17-1.11-.17-2.43V16.3zm4.352 7.807q0 1.238.55 1.878.6.597 1.58.597.98 0 1.53-.597.59-.64.59-1.878v-8.49q0-1.238-.6-1.835-.557-.64-1.538-.64-.98 0-1.58.64-.553.597-.553 1.835v8.49z" fill-rule="evenodd"/>
<path d="M88.316 19.637L83.24 9.057h4.607l2.688 6.143 2.688-6.144h4.608l-5.16 10.58 5.42 11.052h-4.61l-2.94-6.613-2.94 6.613h-4.61l5.34-11.05z"/>
<path d="M102.883 5.28h1.2q.784 0 1.168-.224.4-.24.4-.784 0-.464-.35-.672-.33-.224-.88-.224h-1.53V5.28zm-1.056-2.864h2.56q2.32 0 2.32 1.904 0 .48-.144.816-.128.336-.368.56-.24.224-.56.352-.304.112-.656.16l1.93 2.96h-1.28L103.7 6.24h-.817v2.928h-1.056V2.416zm6.832 3.376q0-.976-.37-1.84-.37-.864-.99-1.504-.63-.64-1.48-1.008-.85-.384-1.81-.384t-1.81.384q-.85.368-1.47 1.008t-1 1.504-.37 1.84q0 .976.364 1.84.37.864.992 1.504t1.47 1.024q.85.368 1.81.368.96 0 1.805-.368.85-.384 1.47-1.024.625-.64.99-1.504.37-.864.37-1.84zm-10.44 0q0-1.2.45-2.256.46-1.056 1.25-1.84t1.84-1.232Q102.82 0 104.02 0t2.255.464q1.056.448 1.84 1.232t1.232 1.84q.464 1.056.464 2.256 0 1.2-.46 2.256-.45 1.056-1.23 1.84t-1.84 1.248q-1.05.448-2.25.448t-2.25-.448q-1.053-.464-1.84-1.248t-1.25-1.84q-.45-1.056-.45-2.256z" fill-rule="evenodd"/>
</svg>
</a>
</div>
<nav class="wrapper">
<input type="checkbox" id="menu-toggle">
<label for="menu-toggle" class="label-toggle"></label>
<ul>
<li><a href="/blog">Blog</a></li>
</ul>
</nav>
</header>
<div class="container documentation">
<nav class="accordion-menu" role="navigation">
<h1>Documentation</h1>
<ul><li>
<a href="/3.0/migration.html" >
Migration
</a>
</li><li>
<a href="/3.0/getting-started.html" >
Getting Started
</a>
</li><li>
<a href="/3.0/dependencies.html" >
Dependencies
</a>
</li><li>
<a href="/3.0/commandline.html" >
Command-Line Tools
</a>
</li><li>
<a href="/3.0/faq.html" >
FAQ
</a>
</li><li>
<a href="https://javadoc.io/doc/org.apache.pdfbox/pdfbox/3.0.0-alpha3/index.html" >
API Docs&emsp;<small>via javadoc.io</small>
</a>
</li>
</ul>
</nav>
<section>
<h1 id="pdfbox-3.0-migration-guide" tabindex="-1">PDFBox 3.0 Migration Guide</h1>
<p class="alert alert-warning">Work in progress! This is guide will be improved over time. If you believe there is
a missing topic, open an issue or help us with a contribution to improve the guide.
</p>
<p>This guide describes the updates in Apache PDFBox 3.0 release. Use the information provided to upgrade your PDFBox 2.x applications
to PDFBox 3.0. It provides information about the new, deprecated and unsupported features in this release.</p>
<h2 id="java-versions" tabindex="-1">Java Versions</h2>
<p>PDFBox 3.0 requires at least Java 8. Testing has been done up to Java 19.</p>
<h2 id="dependency-updates" tabindex="-1">Dependency Updates</h2>
<p>Apache Xmpbox no longer depends on <code>javax.xml.bind.jaxb-api</code>. All test classes were updated to use JUnit 5.</p>
<p>All libraries on which PDFBox depends are updated to their latest stable versions:</p>
<ul>
<li>Bouncy Castle 1.75</li>
<li>Apache Commons Logging 1.2</li>
<li>picocli 4.7.4</li>
</ul>
<p>For test support the libraries are updated to</p>
<ul>
<li>JUnit 5.10.0</li>
<li>JAI Image Core 1.4.0</li>
<li>JAI JPEG2000 1.4.0</li>
<li>Apache JBIG ImageIO Plugin 3.0.4</li>
<li>Apache Commons IO 2.13</li>
<li>Apache Log4j 2.20.0</li>
</ul>
<h2 id="general-changes-for-pdfbox-3.0" tabindex="-1">General Changes for PDFBox 3.0</h2>
<p>This section explains the fundamental differences between PDFBox 3.0 and 2.x releases.</p>
<h3 id="deprecated-apis-and-components" tabindex="-1">Deprecated APIs and Components</h3>
<p>All deprecated APIs and components from PDFBox 2.x have been removed in PDFBox 3.0. Deprecated APIs in
<code>PDPageContentStream</code> have been kept but you are encouraged to replace them with the non deprecated calls
as they are treated to be of <strong>internal use only</strong>.</p>
<h3 id="new-maven-module-for-io-classes" tabindex="-1">New maven module for IO-classes</h3>
<p>All basic classes used for io-operations were moved to a separate module for a shared usage.</p>
<pre><code> &lt;dependency&gt;
&lt;groupId&gt;org.apache.pdfbox&lt;/groupId&gt;
&lt;artifactId&gt;pdfbox-io&lt;/artifactId&gt;
&lt;/dependency&gt;
</code></pre>
<p>The whole code was overhauled including the following changes:</p>
<ul>
<li>switch to java.nio</li>
<li>add support for memory mapped files for reading</li>
<li>use the origin source when creating a new reader to process parts of it</li>
<li>read operations no longer use scratch files</li>
<li>provide an interface to implement an individual class to read an pdf</li>
<li>provide an interface to implement an individual cache holding streams when creating/writing a pdf</li>
</ul>
<h4 id="reader-implementations" tabindex="-1">Reader implementations</h4>
<p>PDFBox offers the following implementations of the interface <code>org.apache.pdfbox.io.RandomAccessRead</code> to be used as source to read a pdf:</p>
<ul>
<li><em><strong>org.apache.pdfbox.io.RandomAccessReadBuffer</strong></em></li>
</ul>
<p><code>RandomAccessReadBuffe</code>r stores all the data in memory. It is backed by the given byte array or ByteBuffer. Using the constructor with an InputStream copies the data to the buffer. Internally the data is stored in a chunk of ByteBuffers with a default chunk size of 4KB.</p>
<ul>
<li><em><strong>org.apache.pdfbox.io.RandomAccessReadBufferedFile</strong></em></li>
</ul>
<p><code>RandomAccessReadBufferedFile</code> is backed by the given file. It has an in-memory cache using pages with a size of 4KB. The cache follows the FIFO principle. If the the maximum of 1000 pages is reached the first added page is replaced with new data.</p>
<ul>
<li><em><strong>org.apache.pdfbox.io.RandomAccessReadMemoryMappedFile</strong></em></li>
</ul>
<p><code>RandomAccessReadMemoryMappedFile</code> uses the memory mapping feature of java. The whole file is mapped to memory and the maximum allowed file size is <em><strong>Integer.MAX_VALUE</strong></em>.</p>
<p class="alert alert-warning">There is a <a href="https://bugs.openjdk.java.net/browse/JDK-4715154">known issue</a> with locked files after closing the memory mapped file on windows. PDFBox implements its own unmapper as a workaround.</p>
<p><em><strong>Implementing your own reader</strong></em></p>
<p>If there is any need to implement a different reader one has to implement the interface <code>org.apache.pdfbox.io.RandomAccessRead</code>. It shall be done thread safe to avoid issues in multithreaded environments.</p>
<h4 id="writer-implementations" tabindex="-1">Writer implementations</h4>
<p>PDFBox offers the following implementation of the interface <code>org.apache.pdfbox.io.RandomAccess</code> to be used to write and read data.</p>
<ul>
<li><em><strong>org.apache.pdfbox.io.RandomAccessReadWriteBuffer</strong></em></li>
</ul>
<p><code>RandomAccessReadWriteBuffer</code> extends the class <code>RandomAccessReadBuffer</code> and stores the all the data in memory as well. The implementation adds the ability to write data to the buffer which is automatically expanded by a new chunk.</p>
<h4 id="stream-cache" tabindex="-1">Stream cache</h4>
<p>PDFBox 3.0.x no longer uses a separate cache when reading a pdf, but still does for write operations. It introduces the interface <code>org.apache.pdfbox.io.RandomAccessStreamCache</code> to define a cache factory in a more flexible way.</p>
<p><em><strong>Provided implementations</strong></em></p>
<ul>
<li><em><strong>org.apache.pdfbox.io.RandomAccessStreamCache</strong></em></li>
</ul>
<p><code>RandomAccessStreamCacheImpl</code> is a simple default implementaion using <code>RandomAccessReadWriteBuffer</code> as buffer.</p>
<ul>
<li><em><strong>org.apache.pdfbox.io.ScratchFile</strong></em></li>
</ul>
<p>The well known class <code>ScratchFile</code> is another implementation for a cache factory. It can be configured to use memory only, temp file only or a fix of both.</p>
<p><em><strong>org.apache.pdfbox.io.MemoryUsageSetting</strong></em></p>
<p>The MemoryUsageSetting parameter within the loadPDF methods was replaced by a parameter using the new functional interface <code>StreamCacheCreateFunction</code> to encapsulate the caching details within the IO package. <code>IOUtils</code> provides two variants of a possible cache for convenience. The memory only one uses <code>RandomAccessStreamCache</code> and the temporary file only uses <code>ScratchFile</code> as cache buffer factory. The newly introduced loader uses a memory only cache as default if the caller doesn't provide any cache.</p>
<p><em><strong>Implementing your own stream cache</strong></em></p>
<p>If there is any need to implement a different cache one has to implement the interface <code>org.apache.pdfbox.io.RandomAccessStreamCache</code>. It shall be done thread safe to avoid issues in multithreaded environments.</p>
<h3 id="use-loader-to-get-a-pdf-document" tabindex="-1">Use <strong>Loader</strong> to get a PDF document</h3>
<p>The new class <em><strong>org.apache.pdfbox.Loader</strong></em> is used for loading a PDF. It offers several methods to load a pdf using different kind of sources. All load methods have been removed from <em><strong>org.apache.pdfbox.pdmodel.PDDocument</strong></em>. The same is true for loading a FDF document.</p>
<p>The most flexible way is to use an instance of RandomAccessRead such as the following sample:</p>
<pre><code> try (PDDocument document = Loader.loadPDF(new RandomAccessReadBufferedFile(&quot;yourfile.pdf&quot;)))
{
for (PDPage page : document.getPages())
{
....
}
}
</code></pre>
<p><em><strong>org.apache.pdfbox.Loader</strong></em> provides two other kind of load methods for your convenience.</p>
<ul>
<li><em><strong>using a byte array as input</strong></em></li>
</ul>
<p>If a byte array is provided as source PDFBox uses <code>org.apache.pdfbox.io.RandomAccessReadBuffer</code> to hold the data. The byte buffer is backed by the given byte array.</p>
<ul>
<li><em><strong>using a file as input</strong></em></li>
</ul>
<p>If a file is provided as source PDFBox uses <code>org.apache.pdfbox.io.RandomAccessReadBufferedFile</code> to wrap the source data using the in-memory cache as described above.</p>
<h3 id="changes-when-saving-pdf" tabindex="-1">Changes when saving PDF</h3>
<h4 id="compressed-saving-by-default" tabindex="-1">Compressed saving by default</h4>
<p>When saving a PDF this will now be done in compressed mode by default. To override that (e.g. if you want to create a PDF/A-1b document) use <code>PDDocument.save</code> with <code>CompressParameters.NO_COMPRESSION</code>.</p>
<h4 id="don't-use-the-source-as-output" tabindex="-1">Don't use the source as output</h4>
<p>The input file must not be used as output for saving operations. It will corrupt the file and throw an exception as parts of the file are read the first time when saving it.</p>
<h3 id="reduced-memory-usage" tabindex="-1">Reduced memory usage</h3>
<h4 id="incremental-parsing" tabindex="-1">Incremental Parsing</h4>
<p>PDFBox now loads a PDF Document incrementally reducing the initial memory footprint (on demand parsing). This will also reduce the memory needed to
consume a PDF if only certain parts of the PDF are accessed. Note that, due to the nature of PDF, uses such as iterating over all pages,
accessing annotations, signing a PDF etc. might still load all parts of the PDF overtime which might consume a significant amount of memory.</p>
<p>Do not try to access parts of the document after the PDDocument object has been closed, because this may lead to incorrect results, as shown in <a href="https://issues.apache.org/jira/browse/PDFBOX-5720">PDFBOX-5720</a>.</p>
<h4 id="improved-io-operations" tabindex="-1">Improved IO operations</h4>
<p>The introduction of the new io classes has a positive impact on the memory usage. Especially the re-usage of the source for reading parts of it instead of using intermediate streams reduces the memory footprint significantly.</p>
<h4 id="further-optimizations" tabindex="-1">Further optimizations</h4>
<p>There were a lot of changes and optimizations which have a more or less huge impact on the memory consumption.</p>
<h3 id="static-instances-for-standard-14-fonts-removed" tabindex="-1">Static instances for Standard 14 fonts removed</h3>
<p>The static instances of <code>PDType1Font</code> for the standard 14 fonts were removed as the underlying <code>COSDictionary</code> isn't supposed to be immutable which led to several issues.</p>
<p>A new constructor for <code>PDType1Font</code> was introduced to create a standard 14 font. The new Enum <code>Standard14Fonts.FontName</code> is the one and only parameter and defines the
name of the standard 14 font for which the instance of <code>PDType1Font</code> is created for. That instance isn't a singleton anymore and has to be recreated if necessary or cached
by the user if suitable.</p>
<h3 id="changes-to-color-methods" tabindex="-1">Changes to color methods</h3>
<p>The <code>int</code> triple overloads of the <code>setStrokingColor</code> and <code>setNonStrokingColor</code> methods of <code>PDAbstractContentStream</code>, with inputs representing RGB colors defined in the 0-255
range, have been removed. While usages passing in <code>int</code> triples will compile (thanks to implicit casting of the <code>int</code> values to <code>float</code>), an <code>IllegalArgumentException</code> can be
thrown at runtime as the <code>float</code> overloads of these methods accept only values in the range 0-1.</p>
<p>To retain RGB colors defines as 0-255 integer triples, construct a <code>java.awt.Color</code> instance and use the relevant overload. Alternatively, convert values to the 0-1
range and define using <code>float</code> triples instead.</p>
<h3 id="changes-to-annotation-classes" tabindex="-1">Changes to annotation classes</h3>
<p>Instead of using the <code>PDAnnotationTextMarkup</code>, <code>PDAnnotationSquareCircle</code> or the <code>PDAnnotationMarkup</code> classes when creating certain annotations, use their subclasses <code>PDAnnotationCaret</code>, <code>PDAnnotationFreeText</code>, <code>PDAnnotationInk</code>, <code>PDAnnotationPolygon</code>, <code>PDAnnotationPolyline</code>, <code>PDAnnotationSound</code>, <code>PDAnnotationCircle</code>, <code>PDAnnotationSquare</code>, <code>PDAnnotationHighlight</code>, <code>PDAnnotationSquiggly</code>, <code>PDAnnotationStrikeout</code> and <code>PDAnnotationUnderline</code>.</p>
<h2 id="changes-in-common-functions" tabindex="-1">Changes in Common Functions</h2>
<h3 id="interactive-forms" tabindex="-1">Interactive Forms</h3>
<p>When accessing <code>AcroForms</code> using <code>PDDocumentCatalog.getAcroForm()</code> a number of fix ups are applied aligning PDFBox with most of the default behaviour
of Adobe Reader. If you'd like to bypass this use <code>PDDocumentCatalog.getAcroForm(null)</code>.</p>
<p>The fix ups include</p>
<ul>
<li>setting default font resources if they are not already part of the AcroForm</li>
<li>create form fields from orphaned widget annotations under certain conditions</li>
<li>create the normal appearance stream under certain conditions</li>
</ul>
<p>You can lookup the details in the <code>org.apache.pdfbox.pdmodel.fixup</code> package of the source distribution and also create your own fix up(s).</p>
<h2 id="changes-in-pdfbox-app" tabindex="-1">Changes in PDFBox App</h2>
<p>The command line interface for the PDFBox App has been rewritten. As a result</p>
<ul>
<li>the individual commands have been changed</li>
<li>passing input and output files have been changed from using parameters to using options/flags to reduce the ambiguity</li>
<li>all commands now return an exit code</li>
<li>all commands now support passing <code>-h</code> or <code>--help</code> to display usage information</li>
<li>all commands now support passing <code>-V</code> or <code>--version</code> to display the version information</li>
</ul>
<h2 id="changes-in-pdfdebugger" tabindex="-1">Changes in PDFDebugger</h2>
<p>The following features were added to the PDFDebugger:</p>
<ul>
<li>text extraction of the selected page</li>
<li>detailed information about the glyph metrics used by text extraction
<ul>
<li>text stripper text position</li>
<li>text stripper beads</li>
<li>approximate text bounds</li>
<li>glyph bounds</li>
</ul>
</li>
<li>new tree view showing the cross reference table information for all indirect objects</li>
</ul>
</section>
<aside>
<h1>Table of Contents<a class="edit-link" href="https://github.com/apache/pdfbox-docs/edit/master/./content/3.0/migration.md" title="Edit this page">
<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" viewBox="0 0 16 16"><path fill-rule="evenodd" d="M11.013 1.427a1.75 1.75 0 012.474 0l1.086 1.086a1.75 1.75 0 010 2.474l-8.61 8.61c-.21.21-.47.364-.756.445l-3.251.93a.75.75 0 01-.927-.928l.929-3.25a1.75 1.75 0 01.445-.758l8.61-8.61zm1.414 1.06a.25.25 0 00-.354 0L10.811 3.75l1.439 1.44 1.263-1.263a.25.25 0 000-.354l-1.086-1.086zM11.189 6.25L9.75 4.81l-6.286 6.287a.25.25 0 00-.064.108l-.558 1.953 1.953-.558a.249.249 0 00.108-.064l6.286-6.286z"/></svg>
</a></h1>
<nav class="toc">
<ol>
<li><a href="#java-versions">Java Versions</a>
</li>
<li><a href="#dependency-updates">Dependency Updates</a>
</li>
<li><a href="#general-changes-for-pdfbox-3.0">General Changes for PDFBox 3.0</a>
<ol>
<li><a href="#deprecated-apis-and-components">Deprecated APIs and Components</a>
</li>
<li><a href="#new-maven-module-for-io-classes">New maven module for IO-classes</a>
</li>
<li><a href="#use-loader-to-get-a-pdf-document">Use Loader to get a PDF document</a>
</li>
<li><a href="#changes-when-saving-pdf">Changes when saving PDF</a>
</li>
<li><a href="#reduced-memory-usage">Reduced memory usage</a>
</li>
<li><a href="#static-instances-for-standard-14-fonts-removed">Static instances for Standard 14 fonts removed</a>
</li>
<li><a href="#changes-to-color-methods">Changes to color methods</a>
</li>
<li><a href="#changes-to-annotation-classes">Changes to annotation classes</a>
</li>
</ol>
</li>
<li><a href="#changes-in-common-functions">Changes in Common Functions</a>
<ol>
<li><a href="#interactive-forms">Interactive Forms</a>
</li>
</ol>
</li>
<li><a href="#changes-in-pdfbox-app">Changes in PDFBox App</a>
</li>
<li><a href="#changes-in-pdfdebugger">Changes in PDFDebugger</a>
</li>
</ol>
</nav>
</aside>
</div>
<footer class="footer">
<div class="container">
<div class="row">
<div class="span3">
<!-- nothing in here on purpose -->
</div>
<div class="span9">
<p>Copyright © 2009&ndash;2024 <a href="https://www.apache.org/">The Apache Software Foundation</a>. Licensed under the <a href="https://www.apache.org/licenses/LICENSE-2.0">Apache License, Version 2.0</a>.
<br>Apache PDFBox, PDFBox, Apache, the Apache feather logo and the Apache PDFBox project logos are trademarks of The Apache Software Foundation.</p>
</div>
</div>
</div>
</footer>
</body>
</html>