<!DOCTYPE html>
<html lang="en">

<!--

     Licensed to the Apache Software Foundation (ASF) under one or more
     contributor license agreements.  See the NOTICE file distributed with
     this work for additional information regarding copyright ownership.
     The ASF licenses this file to You under the Apache License, Version 2.0
     (the "License"); you may not use this file except in compliance with
     the License.  You may obtain a copy of the License at

     http://www.apache.org/licenses/LICENSE- 2.0

     Unless required by applicable law or agreed to in writing, software
     distributed under the License is distributed on an "AS IS" BASIS,
     WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     See the License for the specific language governing permissions and
     limitations under the License.
     -->

<head>
    <meta charset="utf-8">
    <meta http-equiv="X-UA-Compatible" content="IE=edge">
    <meta name="viewport" content="width=device-width, initial-scale=1">
    <meta name="description" content="The Apache PDFBox™ library is an open source Java tool for working with PDF documents. This project allows creation of new PDF documents, manipulation of existing documents and the ability to extract content from documents. Apache PDFBox also includes several command-line utilities. Apache PDFBox is published under the Apache License v2.0.">

    <title>Apache PDFBox |  Frequently Asked Questions (FAQ)</title>

    <link href="/bootstrap/css/bootstrap.min.css" rel="stylesheet">
    <link href="/css/prism.css" rel="stylesheet">
    <link href="/css/styles.css" rel="stylesheet">

    

    

    

</head>


<body>

  <header class="main-header">
    <div class="main-header-logo">
      <a href="/" aria-label="Navigation to the PDFBox home page">
        <svg focusable="false" class="pdfbox-brand-toolbox" viewBox="0 0 744.09448819 1052.3622047" xmlns="http://www.w3.org/2000/svg" aria-labelledby="brandImageTitle brandImageDesc" role="img">
  <title id="brandImageTitle">PDFBox Brand Logo</title>
  <desc id="brandImageDesc">The PDFBox logo showing a toolbox.</desc>
  <g transform="matrix(1.25 0 0 -1.25 -317.14 1018.08)" clip-path="url(#clipPath3375)">
    <path d="M821.924 376.535L463.24 122.525l-203.83 76.86c23.89 6.02 46.87 15.197 68.335 27.29 60.063 33.835 105.686 88.46 128.282 153.59 2.634 4.66 8.11 6.92 13.265 5.47 4.667-1.31 8.01-5.41 8.353-10.247l-3.572-188.12 334.99 193.957c2.41 1 5.113 1.028 7.54.075 2.315-.907 4.21-2.64 5.32-4.865zm-1.307 97.91l.13-78.324c-.87-2.72-3.342-4.61-6.197-4.75-3.104-.14-5.924 1.8-6.893 4.75l1.29 79.54c.386 2.92 2.893 5.09 5.835 5.04 3.37-.04 6.022-2.89 5.835-6.25z"/>
    <path d="M751.88 651.666c-.237.002-.48-.022-.723-.077l-363.512-25.15c-10.37-.73-19.583-6.76-24.588-15.87-6.943-12.64-4.677-28.26 5.864-37.72 2.85-2.56 6.22-4.49 8.97-7.16 2.19-2.12 3.93-4.65 5.12-7.45 23.7-26.86 30.3-64.76 17.08-98.05-4.75-11.96-11.97-22.77-21.2-31.73l-73.74 16.1 46.69 115.89c2.01 3.07.64 7.21-2.8 8.49-2.8 1.03-5.91-.34-7.03-3.1L286.49 448.8l-11.79 4.32c-2.097.28-4.21-.39-5.76-1.825-1.16-1.077-1.925-2.516-2.16-4.083l-11.53-189.68c1.66-1.83 4.03-2.85 6.5-2.798 2.533.05 4.913 1.22 6.5 3.197 6.22 22.46 11.216 45.23 14.96 68.22 5.137 31.55 7.91 63.43 8.3 95.39l136.152-15.28c2.396-1.09 5.032-1.56 7.66-1.35 2.75.22 5.396 1.16 7.663 2.73 41.62 19.22 83.375 38.15 125.26 56.79 41.615 18.52 83.36 36.75 125.23 54.68 1.935-.16 3.703-1.16 4.843-2.73 1.266-1.74 1.618-3.99.948-6.04-3.8-3.45-7.333-7.19-10.575-11.17-3.5-4.3-6.65-8.88-9.42-13.69.11-.34.22-.68.35-1.01.32-.85.7-1.68 1.25-2.41 2.42-3.16 7.04-3.55 9.96-.84 8.66 9.71 19.21 17.557 31.01 23.05 10.67 4.97 22.17 7.927 33.92 8.717 2.58 1.626 4.23 4.39 4.43 7.437.14 2.06-.41 4.103-1.56 5.815-3.63-.24-7.27.26-10.71 1.47-3.55 1.25-6.81 3.23-9.55 5.815l19.55 78.14c-1.4 1.684-3.13 3.07-5.08 4.06-2.28 1.16-4.81 1.76-7.37 1.746-53.05-4.71-106.21-8.08-159.433-10.11-53.34-2.035-106.73-2.72-160.1-2.056-3.79-.575-6.82 3.092-5.54 6.7.63 1.784 2.31 2.98 4.2 2.996l321.783 26.06c2.094.873 3.99 1.933 5.74 3.18 1.46 1.036 2.987 2.365 2.947 4.34-.034 1.75-1.484 3.09-3.154 3.11zm-23.61-56.02c.952-.32 1.776-.934 2.354-1.755.472-.67.76-1.45.833-2.27l-12.96-35.37c-2.706-3.39-5.87-6.4-9.404-8.92-3.324-2.37-6.945-4.3-10.77-5.73l-260.77-58.65-21.01 80.83 311.727 31.88z"/>
    <path d="M786.68 627.94c.393-18.97 2.614-37.734 6.564-56.05 3.873-17.958 9.463-35.75 19.087-51.71 3.54-5.854 7.58-11.385 12.08-16.53 1.07-1.608 1.71-3.462 1.86-5.386.14-1.842-.16-3.69-.9-5.385-54.19-15.56-108.32-31.34-162.39-47.35-54.2-16.05-108.33-32.33-162.39-48.85-.23 1.43.04 2.9.78 4.15 1.07 1.81 2.96 2.99 5.06 3.13l271.54 104.43c-4.43 19.56-7.17 39.46-8.19 59.49-.99 19.68-.32 39.4 2.02 58.97.81 4.06 4.63 6.79 8.74 6.24 2.88-.39 5.27-2.4 6.15-5.16z"/>
  </g>
</svg>

        <svg focusable="false" class="pdfbox-brand-text" xmlns="http://www.w3.org/2000/svg" style="isolation:isolate" viewBox="0 0 109.81066666651577 30.943999999957384" aria-labelledby="brandTextTitle brandTextDesc" role="img">
  <title id="brandTextTitle">PDFBox brand text</title>
  <desc id="brandTextDesc">PDFBox, the brand text.</desc>
  <path d="M0 .31h6.528q1.792 0 3.157.47 1.366.468 2.475 1.663 1.11 1.194 1.536 2.816.427 1.57.427 4.3 0 2-.256 3.45-.214 1.41-.982 2.64-.896 1.492-2.39 2.345-1.492.81-3.924.81H4.36v11.87H0V.305zm4.352 14.42h2.09q1.323 0 2.05-.383.724-.384 1.065-1.024.342-.683.384-1.622.09-.93.09-2.09 0-1.06-.08-2-.04-.98-.38-1.66-.3-.72-.98-1.11-.68-.43-1.96-.43H4.36v10.32z" fill-rule="evenodd"/>
  <path d="M17.333.31h6.443q3.712 0 5.675 2.09 1.97 2.048 1.97 5.76v14.208q0 4.267-2.09 6.315-2.05 2.005-5.93 2.005h-6.06V.308zm4.352 26.282h2.006q1.84 0 2.61-.896.77-.94.77-2.9V8.16q0-1.792-.72-2.773-.72-.982-2.64-.982H21.7v22.187z" fill-rule="evenodd"/>
  <path d="M35.583.31h12.97v4.095h-8.618v9.216h7.51v4.1h-7.51v12.97h-4.352V.31z"/>
  <path d="M51.417.31h6.357q2.09 0 3.54.64 1.495.64 2.433 1.706.94 1.067 1.323 2.475.427 1.37.427 2.86V9.1q0 1.236-.214 2.09-.17.853-.554 1.493-.39.64-.94 1.152-.56.47-1.28.896 1.53.73 2.26 2.18.72 1.41.72 3.8v1.71q0 4.01-1.97 6.15-1.92 2.13-6.19 2.13H51.4V.31zm4.352 26.026h1.87q1.32 0 2.05-.384.77-.384 1.15-1.067.38-.682.47-1.62.08-.94.08-2.05 0-1.15-.13-2.004-.13-.85-.56-1.4-.386-.6-1.11-.89-.727-.3-1.92-.3h-1.92v9.73zm0-13.568h1.96q2.17 0 2.9-1.067.77-1.1.77-3.2 0-2.04-.86-3.07-.81-1.02-2.99-1.02h-1.79v8.37z" fill-rule="evenodd"/>
  <path d="M69.027 16.31q0-1.323.17-2.433.17-1.11.64-1.962.768-1.408 2.22-2.262 1.45-.853 3.455-.853t3.456.853q1.45.854 2.22 2.262.468.853.64 1.962.17 1.11.17 2.432v7.12q0 1.32-.17 2.43-.172 1.11-.64 1.96-.77 1.4-2.22 2.26-1.45.85-3.456.85-2.005 0-3.456-.86-1.45-.854-2.22-2.26-.468-.855-.64-1.964-.17-1.11-.17-2.43V16.3zm4.352 7.807q0 1.238.55 1.878.6.597 1.58.597.98 0 1.53-.597.59-.64.59-1.878v-8.49q0-1.238-.6-1.835-.557-.64-1.538-.64-.98 0-1.58.64-.553.597-.553 1.835v8.49z" fill-rule="evenodd"/>
  <path d="M88.316 19.637L83.24 9.057h4.607l2.688 6.143 2.688-6.144h4.608l-5.16 10.58 5.42 11.052h-4.61l-2.94-6.613-2.94 6.613h-4.61l5.34-11.05z"/>
  <path d="M102.883 5.28h1.2q.784 0 1.168-.224.4-.24.4-.784 0-.464-.35-.672-.33-.224-.88-.224h-1.53V5.28zm-1.056-2.864h2.56q2.32 0 2.32 1.904 0 .48-.144.816-.128.336-.368.56-.24.224-.56.352-.304.112-.656.16l1.93 2.96h-1.28L103.7 6.24h-.817v2.928h-1.056V2.416zm6.832 3.376q0-.976-.37-1.84-.37-.864-.99-1.504-.63-.64-1.48-1.008-.85-.384-1.81-.384t-1.81.384q-.85.368-1.47 1.008t-1 1.504-.37 1.84q0 .976.364 1.84.37.864.992 1.504t1.47 1.024q.85.368 1.81.368.96 0 1.805-.368.85-.384 1.47-1.024.625-.64.99-1.504.37-.864.37-1.84zm-10.44 0q0-1.2.45-2.256.46-1.056 1.25-1.84t1.84-1.232Q102.82 0 104.02 0t2.255.464q1.056.448 1.84 1.232t1.232 1.84q.464 1.056.464 2.256 0 1.2-.46 2.256-.45 1.056-1.23 1.84t-1.84 1.248q-1.05.448-2.25.448t-2.25-.448q-1.053-.464-1.84-1.248t-1.25-1.84q-.45-1.056-.45-2.256z" fill-rule="evenodd"/>
</svg>

      </a>
    </div>
    <nav class="wrapper">
      <input type="checkbox" id="menu-toggle">
      <label for="menu-toggle" class="label-toggle"></label>
      <ul>
        <li><a href="/blog">Blog</a></li>
      </ul>
    </nav>
</header>


  

  <div class="container documentation">
    <nav class="accordion-menu" role="navigation">
      <h1>Documentation</h1>
      <ul><li>
            <a href="/3.0/migration.html" >
              Migration
            </a>
          </li><li>
            <a href="/3.0/getting-started.html" >
              Getting Started
            </a>
          </li><li>
            <a href="/3.0/dependencies.html" >
              Dependencies
            </a>
          </li><li>
            <a href="/3.0/commandline.html" >
              Command-Line Tools
            </a>
          </li><li>
            <a href="/3.0/faq.html" >
              FAQ
            </a>
          </li><li>
          <a href="https://javadoc.io/doc/org.apache.pdfbox/pdfbox/3.0.0-alpha3/index.html" >
            API Docs&emsp;<small>via javadoc.io</small>
          </a>
        </li>
      </ul>
    </nav>

    <section>
      <h1 id="frequently-asked-questions" tabindex="-1">Frequently Asked Questions</h1>
<h2 id="general-questions" tabindex="-1">General Questions</h2>
<p><a name="log4j"></a></p>
<h3 id="i-am-getting-the-below-log4j-warning-message%2C-how-do-i-remove-it%3F" tabindex="-1">I am getting the below Log4J warning message, how do I remove it?</h3>
<pre><code>log4j:WARN No appenders could be found for logger (org.apache.pdfbox.util.ResourceLoader).
log4j:WARN Please initialize the log4j system properly.
</code></pre>
<p>This message means that you need to configure the log4j logging system.
See the <a href="http://logging.apache.org/log4j/1.2/manual.html">log4j documentation</a> for more information.</p>
<p>PDFBox comes with a sample log4j configuration file.  To use it you set a system property like this</p>
<pre><code>java -Dlog4j.configuration=log4j.xml org.apache.pdfbox.ExtractText &lt;PDF-file&gt; &lt;output-text-file&gt;
</code></pre>
<p>If this is not working for you then you may have to specify the log4j config file using a URL path, like this:</p>
<pre><code>log4j.configuration=file:///&lt;path to config file&gt;
</code></pre>
<p><a name="threadsafe"></a></p>
<h3 id="is-pdfbox-thread-safe%3F" tabindex="-1">Is PDFBox thread safe?</h3>
<p>No! Only one thread may access a single document at a time. You can have multiple threads each accessing their own
PDDocument object.</p>
<p><a name="notclosed"></a></p>
<h3 id="why-do-i-get-a-%22warning%3A-you-did-not-close-the-pdf-document%22%3F" tabindex="-1">Why do I get a &quot;Warning: You did not close the PDF Document&quot;?</h3>
<p>You need to call close() on the PDDocument inside the finally block, if you
don't then the document will not be closed properly.  Also, you must close all
PDDocument objects that get created.  The following code creates <strong>two</strong>
PDDocument objects; one from the &quot;new PDDocument()&quot; and the second by the load method.</p>
<pre class="language-java"><code class="language-java"><span class="token class-name">PDDocument</span> doc <span class="token operator">=</span> <span class="token keyword">new</span> <span class="token class-name">PDDocument</span><span class="token punctuation">(</span><span class="token punctuation">)</span><span class="token punctuation">;</span><br><span class="token keyword">try</span><br><span class="token punctuation">{</span><br>   doc <span class="token operator">=</span> <span class="token class-name">PDDocument</span><span class="token punctuation">.</span><span class="token function">loadPDF</span><span class="token punctuation">(</span> <span class="token string">"my.pdf"</span> <span class="token punctuation">)</span><span class="token punctuation">;</span><br><span class="token punctuation">}</span><br><span class="token keyword">finally</span><br><span class="token punctuation">{</span><br>   <span class="token keyword">if</span><span class="token punctuation">(</span> doc <span class="token operator">!=</span> <span class="token keyword">null</span> <span class="token punctuation">)</span><br>   <span class="token punctuation">{</span><br>      doc<span class="token punctuation">.</span><span class="token function">close</span><span class="token punctuation">(</span><span class="token punctuation">)</span><span class="token punctuation">;</span><br>   <span class="token punctuation">}</span><br><span class="token punctuation">}</span></code></pre>
<h2 id="font-handling" tabindex="-1">Font Handling</h2>
<p><a name="fontencoding"></a></p>
<h3 id="i'm-getting-java.lang.illegalargumentexception%3A-...-is-not-available-in-this-font's-encoding%3A-winansiencoding" tabindex="-1">I'm getting java.lang.IllegalArgumentException: ... is not available in this font's encoding: WinAnsiEncoding</h3>
<p>Check whether the character is available in WinAnsiEncoding by looking at the <a href="https://www.adobe.com/content/dam/acom/en/devnet/pdf/pdfs/PDF32000_2008.pdf">PDF Specification</a> Appendix D.
If not, but if it is available in this font (in windows, have a look with charmap.exe), then load the font with
PDType0Font.load(), see also in the EmbeddedFonts.java example in the source code download.</p>
<h2 id="pdf-creation" tabindex="-1">PDF Creation</h2>
<p><a name="layout"></a></p>
<h3 id="can-i-use-pdfbox-to-create-complex-layouts%3F" tabindex="-1">Can I use PDFBox to create complex layouts?</h3>
<p>I'd like to use PDFBox to create a complex layout containing several paragraphs, tables, images etc. Is PDFBox fit for that purpose?</p>
<p>PDFBox being a low level PDF library provides the APIs to create page content such as text, images etc. But at this point in time it doesn't provide a higher level API to do page layout, paragraph handling, automatic line wrapping or create tables and such.</p>
<p>But PDFBox is the foundation of some projects which might help in that case. This includes projects such as</p>
<ul>
<li><a href="http://dhorions.github.io/boxable/">Boxable</a></li>
<li><a href="https://github.com/errt/BoxTable">BoxTable</a></li>
<li><a href="https://github.com/vandeseer/easytable">easytable</a></li>
<li><a href="https://github.com/ralfstuckert/pdfbox-layout">pdfbox-layout</a></li>
<li><a href="https://github.com/GlenKPeterson/PdfLayoutManager">PdfLayoutManager</a></li>
<li><a href="https://github.com/phax/ph-pdf-layout">ph-pdf-layout</a></li>
</ul>
<p>You may also want to consider using <a href="https://xmlgraphics.apache.org/fop/">Apache FOP</a> which allows to create complex documents from XML data and templates-</p>
<p><a name="emptypage"></a></p>
<h3 id="i'm-creating-a-pdf-but-my-page-is-empty.-why%3F" tabindex="-1">I'm creating a PDF but my page is empty. Why?</h3>
<p>Make sure that you closed your content stream before saving.</p>
<h2 id="text-extraction" tabindex="-1">Text Extraction</h2>
<p><a name="textorder"></a></p>
<h3 id="why-does-the-extracted-text-appear-in-the-wrong-sequence%3F" tabindex="-1">Why does the extracted text appear in the wrong sequence?</h3>
<p>By default, text extraction is done in the same sequence as the text in the PDF page content stream.
PDF is a graphic format, not a text format, and unlike HTML, it has no requirements that text one on page
be rendered in a certain order. The order is the one that was determined by the software that created the PDF.
To get text sorted from left to right and top to botton, use <code>setSortByPosition(true)</code>.</p>
<p><a name="notext"></a></p>
<h3 id="how-come-i-am-not-getting-any-text-from-the-pdf-document%3F" tabindex="-1">How come I am not getting any text from the PDF document?</h3>
<p>Text extraction from a pdf document is a complicated task and there are many factors
involved that effect the possibility and accuracy of text extraction.  It would be helpful
to the PDFBox team if you could try a couple things.</p>
<ul>
<li>Open the PDF in Acrobat and try to extract text from there.  If Acrobat can extract text then PDFBox
should be able to as well and it is a bug if it cannot.  If Acrobat cannot extract text then PDFBox 'probably' cannot either.</li>
<li>It might really be an image instead of text.  Some PDF documents are just images that have been scanned in.
You can tell by using the selection tool in Acrobat, if you can't select any text then it is probably an image.</li>
</ul>
<p><a name="gibberish"></a></p>
<h3 id="how-come-i-am-getting-gibberish(g38g43g36g51g5)-when-extracting-text%3F" tabindex="-1">How come I am getting gibberish(G38G43G36G51G5) when extracting text?</h3>
<p>This is because the characters in a PDF document can use a custom encoding
instead of unicode or ASCII.  When you see gibberish text then it
probably means that a meaningless internal encoding is being used.  The
only way to access the text is to use OCR.  This may be a future
enhancement.</p>
<p><a name="fontwidth"></a></p>
<h3 id="what-does-%22java.io.ioexception%3A-can't-handle-font-width%22-mean%3F" tabindex="-1">What does &quot;java.io.IOException: Can't handle font width&quot; mean?</h3>
<p>This probably means that the &quot;Resources&quot; directory is not in your classpath. The
Resources directory is included in the PDFBox jar so this is only a problem if you
are building PDFBox yourself and not using the binary.</p>
<p><a name="permission"></a></p>
<h3 id="why-do-i-get-%22you-do-not-have-permission-to-extract-text%22-on-some-documents%3F" tabindex="-1">Why do I get &quot;You do not have permission to extract text&quot; on some documents?</h3>
<p>PDF documents have certain security permissions that can be applied to them and two
passwords associated with them, an user password and an owner password. If the &quot;cannot extract text&quot;
permission bit is set then you need to decrypt the document with the owner password in order
to extract the text.</p>
<p><a name="partially"></a></p>
<h3 id="can't-we-just-extract-the-text-without-parsing-the-whole-document-or-extract-text-as-it-is-parsed%3F" tabindex="-1">Can't we just extract the text without parsing the whole document or extract text as it is parsed?</h3>
<p>Not really, for a couple reasons.</p>
<ul>
<li>If the document is encrypted then you need to parse at least until the encryption dictionary before
you can decrypt.</li>
<li>Sometimes the PDFont contains vital information needed for text extraction.</li>
<li>Text on a page does not have to be drawn in reading order. For example: if the page said &quot;Hello World&quot;,
the pdf could have been written such that &quot;World&quot; gets drawn and then the cursor moves to the left and
the word &quot;Hello&quot; is drawn.</li>
</ul>
<h2 id="pdf-rendering" tabindex="-1">PDF rendering</h2>
<p><a name="outofmemoryerror"></a></p>
<h3 id="i'm-getting-an-outofmemoryerror.-what-can-i-do%3F" tabindex="-1">I'm getting an OutOfMemoryError. What can I do?</h3>
<p>The memory footprint depends on the PDF itself and on the resolution you use for rendering. Some possible options:</p>
<ul>
<li>increase the <code>-Xmx</code> value when starting java</li>
<li>use a scratch file by loading files with this code <code>PDDocument.load(file, MemoryUsageSetting.setupTempFileOnly())</code></li>
<li>be careful not to hold your images after rendering them, e.g. avoid putting all images of a PDF into a <code>List</code></li>
<li>don't forgot to close your <code>PDDocument</code> objects</li>
<li>decrease the scale when calling <code>PDFRenderer.renderImage()</code>, or the dpi value when calling <code>PDFRenderer.renderImageWithDPI()</code></li>
<li>disable the cache for <code>PDImageXObject</code> objects by calling <code>PDDocument.setResourceCache()</code> with a cache object that is derived from <code>DefaultResourceCache</code> and whose call <code>public void put(COSObject indirect, PDXObject xobject)</code> does nothing. Be aware that this will slow down rendering for PDF files that have an identical image in several pages (e.g. a company logo or a background). More about this can be read in <a href="https://issues.apache.org/jira/browse/PDFBOX-3700">PDFBOX-3700</a>.</li>
</ul>
<p><a name="textantialias"></a></p>
<h3 id="why-are-some-texts-in-poor-quality-and-not-antialiased%3F" tabindex="-1">Why are some texts in poor quality and not antialiased?</h3>
<p>This is because in some PDFs (e.g. the one in <a href="https://issues.apache.org/jira/browse/PDFBOX-2814">PDFBOX-2814</a>, text is not
rendered directly, but as a shaped clipping from a background. Java graphics does not support &quot;soft clipping&quot;
<a href="https://bugs.openjdk.java.net/browse/JDK-4212743">https://bugs.openjdk.java.net/browse/JDK-4212743</a>, and because of that, the edges are not looking smooth.
Soft clipping could be achieved with <a href="https://web.archive.org/web/20200814083145/https://community.oracle.com/blogs/campbell/2006/07/19/java-2d-trickery-soft-clipping">some extra steps</a>,
but these would cost additional time and memory space. You can have a higher quality by rendering at a higher dpi and then downscale the image.</p>
<p><a name="badraster"></a></p>
<h3 id="what-to-do-with-the-illegalargumentexception-%22numbers-of-source-raster-bands-and-source-color-space-components-do-not-match%22%3F" tabindex="-1">What to do with the IllegalArgumentException &quot;Numbers of source Raster bands and source color space components do not match&quot;?</h3>
<p>Sadly, this is a known bug in Java Imaging. Use the twelvemonkeys library as described in the <a href="dependencies.html">dependencies</a> page.</p>
<h3 id="does-pdfbox-support-complex-scripts%3F" tabindex="-1">Does PDFBox support complex scripts?</h3>
<p>Thanks to contributions we have supported Bengali and Latin ligatures since 3.0.0.
Starting with version 3.0.2 we also support Devanagari and Gujarati.
However there are some caveats: PDFBox will support only one language in a specific font, thus it is
best to use a font that is specific enough, e.g. the <a href="https://pagure.io/lohit">Lohit fonts</a>.
For example, the Mangal font is meant to be a Devangari font, but PDFBox will choose Bengali
because that one is also claimed to be supported and is checked first.
It is not possible to deactivate the feature.
The features may be incomplete because we do not yet support all GSUB tables,
and we don't support GPOS at all.</p>

    </section>
    <aside>
      
      <h1>Table of Contents<a class="edit-link" href="https://github.com/apache/pdfbox-docs/edit/master/./content/3.0/faq.md" title="Edit this page">
        <svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" viewBox="0 0 16 16"><path fill-rule="evenodd" d="M11.013 1.427a1.75 1.75 0 012.474 0l1.086 1.086a1.75 1.75 0 010 2.474l-8.61 8.61c-.21.21-.47.364-.756.445l-3.251.93a.75.75 0 01-.927-.928l.929-3.25a1.75 1.75 0 01.445-.758l8.61-8.61zm1.414 1.06a.25.25 0 00-.354 0L10.811 3.75l1.439 1.44 1.263-1.263a.25.25 0 000-.354l-1.086-1.086zM11.189 6.25L9.75 4.81l-6.286 6.287a.25.25 0 00-.064.108l-.558 1.953 1.953-.558a.249.249 0 00.108-.064l6.286-6.286z"/></svg>
      </a></h1>
      
      <nav class="toc">
                <ol>
                    
                    <li><a href="#general-questions">General Questions</a>
            
                <ol>
                    
                    <li><a href="#i-am-getting-the-below-log4j-warning-message%2C-how-do-i-remove-it%3F">I am getting the below Log4J warning message, how do I remove it?</a>
            		</li>

                    <li><a href="#is-pdfbox-thread-safe%3F">Is PDFBox thread safe?</a>
            		</li>

                    <li><a href="#why-do-i-get-a-%22warning%3A-you-did-not-close-the-pdf-document%22%3F">Why do I get a "Warning: You did not close the PDF Document"?</a>
            		</li>
                </ol>
            		</li>

                    <li><a href="#font-handling">Font Handling</a>
            
                <ol>
                    
                    <li><a href="#i'm-getting-java.lang.illegalargumentexception%3A-...-is-not-available-in-this-font's-encoding%3A-winansiencoding">I'm getting java.lang.IllegalArgumentException: ... is not available in this font's encoding: WinAnsiEncoding</a>
            		</li>
                </ol>
            		</li>

                    <li><a href="#pdf-creation">PDF Creation</a>
            
                <ol>
                    
                    <li><a href="#can-i-use-pdfbox-to-create-complex-layouts%3F">Can I use PDFBox to create complex layouts?</a>
            		</li>

                    <li><a href="#i'm-creating-a-pdf-but-my-page-is-empty.-why%3F">I'm creating a PDF but my page is empty. Why?</a>
            		</li>
                </ol>
            		</li>

                    <li><a href="#text-extraction">Text Extraction</a>
            
                <ol>
                    
                    <li><a href="#why-does-the-extracted-text-appear-in-the-wrong-sequence%3F">Why does the extracted text appear in the wrong sequence?</a>
            		</li>

                    <li><a href="#how-come-i-am-not-getting-any-text-from-the-pdf-document%3F">How come I am not getting any text from the PDF document?</a>
            		</li>

                    <li><a href="#how-come-i-am-getting-gibberish(g38g43g36g51g5)-when-extracting-text%3F">How come I am getting gibberish(G38G43G36G51G5) when extracting text?</a>
            		</li>

                    <li><a href="#what-does-%22java.io.ioexception%3A-can't-handle-font-width%22-mean%3F">What does "java.io.IOException: Can't handle font width" mean?</a>
            		</li>

                    <li><a href="#why-do-i-get-%22you-do-not-have-permission-to-extract-text%22-on-some-documents%3F">Why do I get "You do not have permission to extract text" on some documents?</a>
            		</li>

                    <li><a href="#can't-we-just-extract-the-text-without-parsing-the-whole-document-or-extract-text-as-it-is-parsed%3F">Can't we just extract the text without parsing the whole document or extract text as it is parsed?</a>
            		</li>
                </ol>
            		</li>

                    <li><a href="#pdf-rendering">PDF rendering</a>
            
                <ol>
                    
                    <li><a href="#i'm-getting-an-outofmemoryerror.-what-can-i-do%3F">I'm getting an OutOfMemoryError. What can I do?</a>
            		</li>

                    <li><a href="#why-are-some-texts-in-poor-quality-and-not-antialiased%3F">Why are some texts in poor quality and not antialiased?</a>
            		</li>

                    <li><a href="#what-to-do-with-the-illegalargumentexception-%22numbers-of-source-raster-bands-and-source-color-space-components-do-not-match%22%3F">What to do with the IllegalArgumentException "Numbers of source Raster bands and source color space components do not match"?</a>
            		</li>

                    <li><a href="#does-pdfbox-support-complex-scripts%3F">Does PDFBox support complex scripts?</a>
            		</li>
                </ol>
            		</li>
                </ol>
            </nav>

    </aside>
  </div>

  <footer class="footer">
    <div class="container">
        <div class="row">
            <div class="span3">
                <!-- nothing in here on purpose -->
            </div>
            <div class="span9">
                <p>Copyright © 2009&ndash;2024 <a href="https://www.apache.org/">The Apache Software Foundation</a>. Licensed under the <a href="https://www.apache.org/licenses/LICENSE-2.0">Apache License, Version 2.0</a>.
                    <br>Apache PDFBox, PDFBox, Apache, the Apache feather logo and the Apache PDFBox project logos are trademarks of The Apache Software Foundation.</p>
            </div>
        </div>
    </div>
</footer>


</body>

</html>