blob: af71e5979767e0643266648c73d3a93281626d76 [file] [log] [blame]
<!doctype html>
<html class="no-js" lang="en" data-content_root="./">
<head><meta charset="utf-8"/>
<meta name="viewport" content="width=device-width,initial-scale=1"/>
<meta name="color-scheme" content="light dark"><meta name="viewport" content="width=device-width, initial-scale=1" />
<link rel="index" title="Index" href="genindex.html" /><link rel="search" title="Search" href="search.html" /><link rel="next" title="Java Algorithms" href="algorithm.html" /><link rel="prev" title="Table" href="table.html" />
<!-- Generated with Sphinx 8.1.3 and Furo 2024.08.06 -->
<title>Reading/Writing IPC formats - arrow-java 18.1.0 documentation</title>
<link rel="stylesheet" type="text/css" href="_static/pygments.css?v=8f2a1f02" />
<link rel="stylesheet" type="text/css" href="_static/styles/furo.css?v=354aac6f" />
<link rel="stylesheet" type="text/css" href="_static/styles/furo-extensions.css?v=302659d7" />
<style>
body {
--color-code-background: #f8f8f8;
--color-code-foreground: black;
}
@media not print {
body[data-theme="dark"] {
--color-code-background: #202020;
--color-code-foreground: #d0d0d0;
}
@media (prefers-color-scheme: dark) {
body:not([data-theme="light"]) {
--color-code-background: #202020;
--color-code-foreground: #d0d0d0;
}
}
}
</style></head>
<body>
<script>
document.body.dataset.theme = localStorage.getItem("theme") || "auto";
</script>
<svg xmlns="http://www.w3.org/2000/svg" style="display: none;">
<symbol id="svg-toc" viewBox="0 0 24 24">
<title>Contents</title>
<svg stroke="currentColor" fill="currentColor" stroke-width="0" viewBox="0 0 1024 1024">
<path d="M408 442h480c4.4 0 8-3.6 8-8v-56c0-4.4-3.6-8-8-8H408c-4.4 0-8 3.6-8 8v56c0 4.4 3.6 8 8 8zm-8 204c0 4.4 3.6 8 8 8h480c4.4 0 8-3.6 8-8v-56c0-4.4-3.6-8-8-8H408c-4.4 0-8 3.6-8 8v56zm504-486H120c-4.4 0-8 3.6-8 8v56c0 4.4 3.6 8 8 8h784c4.4 0 8-3.6 8-8v-56c0-4.4-3.6-8-8-8zm0 632H120c-4.4 0-8 3.6-8 8v56c0 4.4 3.6 8 8 8h784c4.4 0 8-3.6 8-8v-56c0-4.4-3.6-8-8-8zM115.4 518.9L271.7 642c5.8 4.6 14.4.5 14.4-6.9V388.9c0-7.4-8.5-11.5-14.4-6.9L115.4 505.1a8.74 8.74 0 0 0 0 13.8z"/>
</svg>
</symbol>
<symbol id="svg-menu" viewBox="0 0 24 24">
<title>Menu</title>
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="none" stroke="currentColor"
stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="feather-menu">
<line x1="3" y1="12" x2="21" y2="12"></line>
<line x1="3" y1="6" x2="21" y2="6"></line>
<line x1="3" y1="18" x2="21" y2="18"></line>
</svg>
</symbol>
<symbol id="svg-arrow-right" viewBox="0 0 24 24">
<title>Expand</title>
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="none" stroke="currentColor"
stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="feather-chevron-right">
<polyline points="9 18 15 12 9 6"></polyline>
</svg>
</symbol>
<symbol id="svg-sun" viewBox="0 0 24 24">
<title>Light mode</title>
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="none" stroke="currentColor"
stroke-width="1" stroke-linecap="round" stroke-linejoin="round" class="feather-sun">
<circle cx="12" cy="12" r="5"></circle>
<line x1="12" y1="1" x2="12" y2="3"></line>
<line x1="12" y1="21" x2="12" y2="23"></line>
<line x1="4.22" y1="4.22" x2="5.64" y2="5.64"></line>
<line x1="18.36" y1="18.36" x2="19.78" y2="19.78"></line>
<line x1="1" y1="12" x2="3" y2="12"></line>
<line x1="21" y1="12" x2="23" y2="12"></line>
<line x1="4.22" y1="19.78" x2="5.64" y2="18.36"></line>
<line x1="18.36" y1="5.64" x2="19.78" y2="4.22"></line>
</svg>
</symbol>
<symbol id="svg-moon" viewBox="0 0 24 24">
<title>Dark mode</title>
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="none" stroke="currentColor"
stroke-width="1" stroke-linecap="round" stroke-linejoin="round" class="icon-tabler-moon">
<path stroke="none" d="M0 0h24v24H0z" fill="none" />
<path d="M12 3c.132 0 .263 0 .393 0a7.5 7.5 0 0 0 7.92 12.446a9 9 0 1 1 -8.313 -12.454z" />
</svg>
</symbol>
<symbol id="svg-sun-with-moon" viewBox="0 0 24 24">
<title>Auto light/dark, in light mode</title>
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="none" stroke="currentColor"
stroke-width="1" stroke-linecap="round" stroke-linejoin="round"
class="icon-custom-derived-from-feather-sun-and-tabler-moon">
<path style="opacity: 50%" d="M 5.411 14.504 C 5.471 14.504 5.532 14.504 5.591 14.504 C 3.639 16.319 4.383 19.569 6.931 20.352 C 7.693 20.586 8.512 20.551 9.25 20.252 C 8.023 23.207 4.056 23.725 2.11 21.184 C 0.166 18.642 1.702 14.949 4.874 14.536 C 5.051 14.512 5.231 14.5 5.411 14.5 L 5.411 14.504 Z"/>
<line x1="14.5" y1="3.25" x2="14.5" y2="1.25"/>
<line x1="14.5" y1="15.85" x2="14.5" y2="17.85"/>
<line x1="10.044" y1="5.094" x2="8.63" y2="3.68"/>
<line x1="19" y1="14.05" x2="20.414" y2="15.464"/>
<line x1="8.2" y1="9.55" x2="6.2" y2="9.55"/>
<line x1="20.8" y1="9.55" x2="22.8" y2="9.55"/>
<line x1="10.044" y1="14.006" x2="8.63" y2="15.42"/>
<line x1="19" y1="5.05" x2="20.414" y2="3.636"/>
<circle cx="14.5" cy="9.55" r="3.6"/>
</svg>
</symbol>
<symbol id="svg-moon-with-sun" viewBox="0 0 24 24">
<title>Auto light/dark, in dark mode</title>
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="none" stroke="currentColor"
stroke-width="1" stroke-linecap="round" stroke-linejoin="round"
class="icon-custom-derived-from-feather-sun-and-tabler-moon">
<path d="M 8.282 7.007 C 8.385 7.007 8.494 7.007 8.595 7.007 C 5.18 10.184 6.481 15.869 10.942 17.24 C 12.275 17.648 13.706 17.589 15 17.066 C 12.851 22.236 5.91 23.143 2.505 18.696 C -0.897 14.249 1.791 7.786 7.342 7.063 C 7.652 7.021 7.965 7 8.282 7 L 8.282 7.007 Z"/>
<line style="opacity: 50%" x1="18" y1="3.705" x2="18" y2="2.5"/>
<line style="opacity: 50%" x1="18" y1="11.295" x2="18" y2="12.5"/>
<line style="opacity: 50%" x1="15.316" y1="4.816" x2="14.464" y2="3.964"/>
<line style="opacity: 50%" x1="20.711" y1="10.212" x2="21.563" y2="11.063"/>
<line style="opacity: 50%" x1="14.205" y1="7.5" x2="13.001" y2="7.5"/>
<line style="opacity: 50%" x1="21.795" y1="7.5" x2="23" y2="7.5"/>
<line style="opacity: 50%" x1="15.316" y1="10.184" x2="14.464" y2="11.036"/>
<line style="opacity: 50%" x1="20.711" y1="4.789" x2="21.563" y2="3.937"/>
<circle style="opacity: 50%" cx="18" cy="7.5" r="2.169"/>
</svg>
</symbol>
<symbol id="svg-pencil" viewBox="0 0 24 24">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="none" stroke="currentColor"
stroke-width="1" stroke-linecap="round" stroke-linejoin="round" class="icon-tabler-pencil-code">
<path d="M4 20h4l10.5 -10.5a2.828 2.828 0 1 0 -4 -4l-10.5 10.5v4" />
<path d="M13.5 6.5l4 4" />
<path d="M20 21l2 -2l-2 -2" />
<path d="M17 17l-2 2l2 2" />
</svg>
</symbol>
<symbol id="svg-eye" viewBox="0 0 24 24">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="none" stroke="currentColor"
stroke-width="1" stroke-linecap="round" stroke-linejoin="round" class="icon-tabler-eye-code">
<path stroke="none" d="M0 0h24v24H0z" fill="none" />
<path d="M10 12a2 2 0 1 0 4 0a2 2 0 0 0 -4 0" />
<path
d="M11.11 17.958c-3.209 -.307 -5.91 -2.293 -8.11 -5.958c2.4 -4 5.4 -6 9 -6c3.6 0 6.6 2 9 6c-.21 .352 -.427 .688 -.647 1.008" />
<path d="M20 21l2 -2l-2 -2" />
<path d="M17 17l-2 2l2 2" />
</svg>
</symbol>
</svg>
<input type="checkbox" class="sidebar-toggle" name="__navigation" id="__navigation">
<input type="checkbox" class="sidebar-toggle" name="__toc" id="__toc">
<label class="overlay sidebar-overlay" for="__navigation">
<div class="visually-hidden">Hide navigation sidebar</div>
</label>
<label class="overlay toc-overlay" for="__toc">
<div class="visually-hidden">Hide table of contents sidebar</div>
</label>
<a class="skip-to-content muted-link" href="#furo-main-content">Skip to content</a>
<div class="page">
<header class="mobile-header">
<div class="header-left">
<label class="nav-overlay-icon" for="__navigation">
<div class="visually-hidden">Toggle site navigation sidebar</div>
<i class="icon"><svg><use href="#svg-menu"></use></svg></i>
</label>
</div>
<div class="header-center">
<a href="index.html"><div class="brand">arrow-java 18.1.0 documentation</div></a>
</div>
<div class="header-right">
<div class="theme-toggle-container theme-toggle-header">
<button class="theme-toggle">
<div class="visually-hidden">Toggle Light / Dark / Auto color theme</div>
<svg class="theme-icon-when-auto-light"><use href="#svg-sun-with-moon"></use></svg>
<svg class="theme-icon-when-auto-dark"><use href="#svg-moon-with-sun"></use></svg>
<svg class="theme-icon-when-dark"><use href="#svg-moon"></use></svg>
<svg class="theme-icon-when-light"><use href="#svg-sun"></use></svg>
</button>
</div>
<label class="toc-overlay-icon toc-header-icon" for="__toc">
<div class="visually-hidden">Toggle table of contents sidebar</div>
<i class="icon"><svg><use href="#svg-toc"></use></svg></i>
</label>
</div>
</header>
<aside class="sidebar-drawer">
<div class="sidebar-container">
<div class="sidebar-sticky"><a class="sidebar-brand" href="index.html">
<span class="sidebar-brand-text">arrow-java 18.1.0 documentation</span>
</a><form class="sidebar-search-container" method="get" action="search.html" role="search">
<input class="sidebar-search" placeholder="Search" name="q" aria-label="Search">
<input type="hidden" name="check_keywords" value="yes">
<input type="hidden" name="area" value="default">
</form>
<div id="searchbox"></div><div class="sidebar-scroll"><div class="sidebar-tree">
<ul class="current">
<li class="toctree-l1"><a class="reference internal" href="quickstartguide.html">Quick Start Guide</a></li>
<li class="toctree-l1"><a class="reference internal" href="overview.html">High-Level Overview</a></li>
<li class="toctree-l1"><a class="reference internal" href="install.html">Installing Java Modules</a></li>
<li class="toctree-l1 has-children"><a class="reference internal" href="developers/index.html">Java Development</a><input class="toctree-checkbox" id="toctree-checkbox-1" name="toctree-checkbox-1" role="switch" type="checkbox"/><label for="toctree-checkbox-1"><div class="visually-hidden">Toggle navigation of Java Development</div><i class="icon"><svg><use href="#svg-arrow-right"></use></svg></i></label><ul>
<li class="toctree-l2"><a class="reference internal" href="developers/building.html">Building Arrow Java</a></li>
<li class="toctree-l2"><a class="reference internal" href="developers/development.html">Development Guidelines</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="memory.html">Memory Management</a></li>
<li class="toctree-l1"><a class="reference internal" href="vector.html">ValueVector</a></li>
<li class="toctree-l1"><a class="reference internal" href="vector_schema_root.html">Tabular Data</a></li>
<li class="toctree-l1"><a class="reference internal" href="table.html">Table</a></li>
<li class="toctree-l1 current current-page"><a class="current reference internal" href="#">Reading/Writing IPC formats</a></li>
<li class="toctree-l1"><a class="reference internal" href="algorithm.html">Java Algorithms</a></li>
<li class="toctree-l1"><a class="reference internal" href="flight.html">Arrow Flight RPC</a></li>
<li class="toctree-l1"><a class="reference internal" href="flight_sql.html">Arrow Flight SQL</a></li>
<li class="toctree-l1"><a class="reference internal" href="flight_sql_jdbc_driver.html">Arrow Flight SQL JDBC Driver</a></li>
<li class="toctree-l1"><a class="reference internal" href="dataset.html">Dataset</a></li>
<li class="toctree-l1"><a class="reference internal" href="substrait.html">Substrait</a></li>
<li class="toctree-l1"><a class="reference internal" href="cdata.html">C Data Interface</a></li>
<li class="toctree-l1"><a class="reference internal" href="jdbc.html">Arrow JDBC Adapter</a></li>
<li class="toctree-l1"><a class="reference internal" href="reference/index.html">Reference (javadoc)</a></li>
<li class="toctree-l1"><a class="reference external" href="https://arrow.apache.org/cookbook/java/">Cookbook</a></li>
</ul>
</div>
</div>
</div>
</div>
</aside>
<div class="main">
<div class="content">
<div class="article-container">
<a href="#" class="back-to-top muted-link">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24">
<path d="M13 20h-2V8l-5.5 5.5-1.42-1.42L12 4.16l7.92 7.92-1.42 1.42L13 8v12z"></path>
</svg>
<span>Back to top</span>
</a>
<div class="content-icon-container">
<div class="view-this-page">
<a class="muted-link" href="_sources/ipc.rst.txt" title="View this page">
<svg><use href="#svg-eye"></use></svg>
<span class="visually-hidden">View this page</span>
</a>
</div>
<div class="theme-toggle-container theme-toggle-content">
<button class="theme-toggle">
<div class="visually-hidden">Toggle Light / Dark / Auto color theme</div>
<svg class="theme-icon-when-auto-light"><use href="#svg-sun-with-moon"></use></svg>
<svg class="theme-icon-when-auto-dark"><use href="#svg-moon-with-sun"></use></svg>
<svg class="theme-icon-when-dark"><use href="#svg-moon"></use></svg>
<svg class="theme-icon-when-light"><use href="#svg-sun"></use></svg>
</button>
</div>
<label class="toc-overlay-icon toc-content-icon" for="__toc">
<div class="visually-hidden">Toggle table of contents sidebar</div>
<i class="icon"><svg><use href="#svg-toc"></use></svg></i>
</label>
</div>
<article role="main" id="furo-main-content">
<section id="reading-writing-ipc-formats">
<h1>Reading/Writing IPC formats<a class="headerlink" href="#reading-writing-ipc-formats" title="Link to this heading">¶</a></h1>
<p>Arrow defines two types of binary formats for serializing record batches:</p>
<ul class="simple">
<li><p><strong>Streaming format</strong>: for sending an arbitrary number of record
batches. The format must be processed from start to end, and does not support
random access</p></li>
<li><p><strong>File or Random Access format</strong>: for serializing a fixed number of record
batches. It supports random access, and thus is very useful when used with
memory maps</p></li>
</ul>
<section id="writing-and-reading-streaming-format">
<h2>Writing and Reading Streaming Format<a class="headerlink" href="#writing-and-reading-streaming-format" title="Link to this heading">¶</a></h2>
<p>First, let’s populate a <code class="xref py py-class docutils literal notranslate"><span class="pre">VectorSchemaRoot</span></code> with a small batch of records</p>
<div class="highlight-Java notranslate"><div class="highlight"><pre><span></span><span class="n">BitVector</span><span class="w"> </span><span class="n">bitVector</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="k">new</span><span class="w"> </span><span class="n">BitVector</span><span class="p">(</span><span class="s">&quot;boolean&quot;</span><span class="p">,</span><span class="w"> </span><span class="n">allocator</span><span class="p">);</span>
<span class="n">VarCharVector</span><span class="w"> </span><span class="n">varCharVector</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="k">new</span><span class="w"> </span><span class="n">VarCharVector</span><span class="p">(</span><span class="s">&quot;varchar&quot;</span><span class="p">,</span><span class="w"> </span><span class="n">allocator</span><span class="p">);</span>
<span class="k">for</span><span class="w"> </span><span class="p">(</span><span class="kt">int</span><span class="w"> </span><span class="n">i</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="mi">0</span><span class="p">;</span><span class="w"> </span><span class="n">i</span><span class="w"> </span><span class="o">&lt;</span><span class="w"> </span><span class="mi">10</span><span class="p">;</span><span class="w"> </span><span class="n">i</span><span class="o">++</span><span class="p">)</span><span class="w"> </span><span class="p">{</span>
<span class="w"> </span><span class="n">bitVector</span><span class="p">.</span><span class="na">setSafe</span><span class="p">(</span><span class="n">i</span><span class="p">,</span><span class="w"> </span><span class="n">i</span><span class="w"> </span><span class="o">%</span><span class="w"> </span><span class="mi">2</span><span class="w"> </span><span class="o">==</span><span class="w"> </span><span class="mi">0</span><span class="w"> </span><span class="o">?</span><span class="w"> </span><span class="mi">0</span><span class="w"> </span><span class="p">:</span><span class="w"> </span><span class="mi">1</span><span class="p">);</span>
<span class="w"> </span><span class="n">varCharVector</span><span class="p">.</span><span class="na">setSafe</span><span class="p">(</span><span class="n">i</span><span class="p">,</span><span class="w"> </span><span class="p">(</span><span class="s">&quot;test&quot;</span><span class="w"> </span><span class="o">+</span><span class="w"> </span><span class="n">i</span><span class="p">).</span><span class="na">getBytes</span><span class="p">(</span><span class="n">StandardCharsets</span><span class="p">.</span><span class="na">UTF_8</span><span class="p">));</span>
<span class="p">}</span>
<span class="n">bitVector</span><span class="p">.</span><span class="na">setValueCount</span><span class="p">(</span><span class="mi">10</span><span class="p">);</span>
<span class="n">varCharVector</span><span class="p">.</span><span class="na">setValueCount</span><span class="p">(</span><span class="mi">10</span><span class="p">);</span>
<span class="n">List</span><span class="o">&lt;</span><span class="n">Field</span><span class="o">&gt;</span><span class="w"> </span><span class="n">fields</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">Arrays</span><span class="p">.</span><span class="na">asList</span><span class="p">(</span><span class="n">bitVector</span><span class="p">.</span><span class="na">getField</span><span class="p">(),</span><span class="w"> </span><span class="n">varCharVector</span><span class="p">.</span><span class="na">getField</span><span class="p">());</span>
<span class="n">List</span><span class="o">&lt;</span><span class="n">FieldVector</span><span class="o">&gt;</span><span class="w"> </span><span class="n">vectors</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">Arrays</span><span class="p">.</span><span class="na">asList</span><span class="p">(</span><span class="n">bitVector</span><span class="p">,</span><span class="w"> </span><span class="n">varCharVector</span><span class="p">);</span>
<span class="n">VectorSchemaRoot</span><span class="w"> </span><span class="n">root</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="k">new</span><span class="w"> </span><span class="n">VectorSchemaRoot</span><span class="p">(</span><span class="n">fields</span><span class="p">,</span><span class="w"> </span><span class="n">vectors</span><span class="p">);</span>
</pre></div>
</div>
<p>Now, we can begin writing a stream containing some number of these batches. For this we use <code class="xref py py-class docutils literal notranslate"><span class="pre">ArrowStreamWriter</span></code>
(DictionaryProvider used for any vectors that are dictionary encoded is optional and can be null))</p>
<div class="highlight-Java notranslate"><div class="highlight"><pre><span></span><span class="k">try</span><span class="w"> </span><span class="p">(</span>
<span class="w"> </span><span class="n">ByteArrayOutputStream</span><span class="w"> </span><span class="n">out</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="k">new</span><span class="w"> </span><span class="n">ByteArrayOutputStream</span><span class="p">();</span>
<span class="w"> </span><span class="n">ArrowStreamWriter</span><span class="w"> </span><span class="n">writer</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="k">new</span><span class="w"> </span><span class="n">ArrowStreamWriter</span><span class="p">(</span><span class="n">root</span><span class="p">,</span><span class="w"> </span><span class="cm">/*DictionaryProvider=*/</span><span class="kc">null</span><span class="p">,</span><span class="w"> </span><span class="n">Channels</span><span class="p">.</span><span class="na">newChannel</span><span class="p">(</span><span class="n">out</span><span class="p">));</span>
<span class="p">)</span><span class="w"> </span><span class="p">{</span>
<span class="w"> </span><span class="c1">// ... do write into the ArrowStreamWriter</span>
<span class="p">}</span>
</pre></div>
</div>
<p>Here we used an in-memory stream, but this could have been a socket or some other IO stream. Then we can do</p>
<div class="highlight-Java notranslate"><div class="highlight"><pre><span></span><span class="n">writer</span><span class="p">.</span><span class="na">start</span><span class="p">();</span>
<span class="c1">// write the first batch</span>
<span class="n">writer</span><span class="p">.</span><span class="na">writeBatch</span><span class="p">();</span>
<span class="c1">// write another four batches.</span>
<span class="k">for</span><span class="w"> </span><span class="p">(</span><span class="kt">int</span><span class="w"> </span><span class="n">i</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="mi">0</span><span class="p">;</span><span class="w"> </span><span class="n">i</span><span class="w"> </span><span class="o">&lt;</span><span class="w"> </span><span class="mi">4</span><span class="p">;</span><span class="w"> </span><span class="n">i</span><span class="o">++</span><span class="p">)</span><span class="w"> </span><span class="p">{</span>
<span class="w"> </span><span class="c1">// populate VectorSchemaRoot data and write the second batch</span>
<span class="w"> </span><span class="n">BitVector</span><span class="w"> </span><span class="n">childVector1</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="p">(</span><span class="n">BitVector</span><span class="p">)</span><span class="n">root</span><span class="p">.</span><span class="na">getVector</span><span class="p">(</span><span class="mi">0</span><span class="p">);</span>
<span class="w"> </span><span class="n">VarCharVector</span><span class="w"> </span><span class="n">childVector2</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="p">(</span><span class="n">VarCharVector</span><span class="p">)</span><span class="n">root</span><span class="p">.</span><span class="na">getVector</span><span class="p">(</span><span class="mi">1</span><span class="p">);</span>
<span class="w"> </span><span class="n">childVector1</span><span class="p">.</span><span class="na">reset</span><span class="p">();</span>
<span class="w"> </span><span class="n">childVector2</span><span class="p">.</span><span class="na">reset</span><span class="p">();</span>
<span class="w"> </span><span class="c1">// ... do some populate work here, could be different for each batch</span>
<span class="w"> </span><span class="n">writer</span><span class="p">.</span><span class="na">writeBatch</span><span class="p">();</span>
<span class="p">}</span>
<span class="n">writer</span><span class="p">.</span><span class="na">end</span><span class="p">();</span>
</pre></div>
</div>
<p>Note that, since the <code class="xref py py-class docutils literal notranslate"><span class="pre">VectorSchemaRoot</span></code> in the writer is a container that can hold batches, batches flow through
<code class="xref py py-class docutils literal notranslate"><span class="pre">VectorSchemaRoot</span></code> as part of a pipeline, so we need to populate data before <code class="docutils literal notranslate"><span class="pre">writeBatch</span></code>, so that later batches
could overwrite previous ones.</p>
<p>Now the <code class="xref py py-class docutils literal notranslate"><span class="pre">ByteArrayOutputStream</span></code> contains the complete stream which contains 5 record batches.
We can read such a stream with <code class="xref py py-class docutils literal notranslate"><span class="pre">ArrowStreamReader</span></code>. Note that the <code class="xref py py-class docutils literal notranslate"><span class="pre">VectorSchemaRoot</span></code> within the reader
will be loaded with new values on every call to <code class="xref py py-class docutils literal notranslate"><span class="pre">loadNextBatch()</span></code></p>
<div class="highlight-Java notranslate"><div class="highlight"><pre><span></span><span class="k">try</span><span class="w"> </span><span class="p">(</span><span class="n">ArrowStreamReader</span><span class="w"> </span><span class="n">reader</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="k">new</span><span class="w"> </span><span class="n">ArrowStreamReader</span><span class="p">(</span><span class="k">new</span><span class="w"> </span><span class="n">ByteArrayInputStream</span><span class="p">(</span><span class="n">out</span><span class="p">.</span><span class="na">toByteArray</span><span class="p">()),</span><span class="w"> </span><span class="n">allocator</span><span class="p">))</span><span class="w"> </span><span class="p">{</span>
<span class="w"> </span><span class="c1">// This will be loaded with new values on every call to loadNextBatch</span>
<span class="w"> </span><span class="n">VectorSchemaRoot</span><span class="w"> </span><span class="n">readRoot</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">reader</span><span class="p">.</span><span class="na">getVectorSchemaRoot</span><span class="p">();</span>
<span class="w"> </span><span class="n">Schema</span><span class="w"> </span><span class="n">schema</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">readRoot</span><span class="p">.</span><span class="na">getSchema</span><span class="p">();</span>
<span class="w"> </span><span class="k">for</span><span class="w"> </span><span class="p">(</span><span class="kt">int</span><span class="w"> </span><span class="n">i</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="mi">0</span><span class="p">;</span><span class="w"> </span><span class="n">i</span><span class="w"> </span><span class="o">&lt;</span><span class="w"> </span><span class="mi">5</span><span class="p">;</span><span class="w"> </span><span class="n">i</span><span class="o">++</span><span class="p">)</span><span class="w"> </span><span class="p">{</span>
<span class="w"> </span><span class="n">reader</span><span class="p">.</span><span class="na">loadNextBatch</span><span class="p">();</span>
<span class="w"> </span><span class="c1">// ... do something with readRoot</span>
<span class="w"> </span><span class="p">}</span>
<span class="p">}</span>
</pre></div>
</div>
<p>Here we also give a simple example with dictionary encoded vectors</p>
<div class="highlight-Java notranslate"><div class="highlight"><pre><span></span><span class="c1">// create provider</span>
<span class="n">DictionaryProvider</span><span class="p">.</span><span class="na">MapDictionaryProvider</span><span class="w"> </span><span class="n">provider</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="k">new</span><span class="w"> </span><span class="n">DictionaryProvider</span><span class="p">.</span><span class="na">MapDictionaryProvider</span><span class="p">();</span>
<span class="k">try</span><span class="w"> </span><span class="p">(</span>
<span class="w"> </span><span class="kd">final</span><span class="w"> </span><span class="n">VarCharVector</span><span class="w"> </span><span class="n">dictVector</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="k">new</span><span class="w"> </span><span class="n">VarCharVector</span><span class="p">(</span><span class="s">&quot;dict&quot;</span><span class="p">,</span><span class="w"> </span><span class="n">allocator</span><span class="p">);</span>
<span class="w"> </span><span class="kd">final</span><span class="w"> </span><span class="n">VarCharVector</span><span class="w"> </span><span class="n">vector</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="k">new</span><span class="w"> </span><span class="n">VarCharVector</span><span class="p">(</span><span class="s">&quot;vector&quot;</span><span class="p">,</span><span class="w"> </span><span class="n">allocator</span><span class="p">);</span>
<span class="p">)</span><span class="w"> </span><span class="p">{</span>
<span class="w"> </span><span class="c1">// create dictionary vector</span>
<span class="w"> </span><span class="n">dictVector</span><span class="p">.</span><span class="na">allocateNewSafe</span><span class="p">();</span>
<span class="w"> </span><span class="n">dictVector</span><span class="p">.</span><span class="na">setSafe</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span><span class="w"> </span><span class="s">&quot;aa&quot;</span><span class="p">.</span><span class="na">getBytes</span><span class="p">());</span>
<span class="w"> </span><span class="n">dictVector</span><span class="p">.</span><span class="na">setSafe</span><span class="p">(</span><span class="mi">1</span><span class="p">,</span><span class="w"> </span><span class="s">&quot;bb&quot;</span><span class="p">.</span><span class="na">getBytes</span><span class="p">());</span>
<span class="w"> </span><span class="n">dictVector</span><span class="p">.</span><span class="na">setSafe</span><span class="p">(</span><span class="mi">2</span><span class="p">,</span><span class="w"> </span><span class="s">&quot;cc&quot;</span><span class="p">.</span><span class="na">getBytes</span><span class="p">());</span>
<span class="w"> </span><span class="n">dictVector</span><span class="p">.</span><span class="na">setValueCount</span><span class="p">(</span><span class="mi">3</span><span class="p">);</span>
<span class="w"> </span><span class="c1">// create dictionary</span>
<span class="w"> </span><span class="n">Dictionary</span><span class="w"> </span><span class="n">dictionary</span><span class="w"> </span><span class="o">=</span>
<span class="w"> </span><span class="k">new</span><span class="w"> </span><span class="n">Dictionary</span><span class="p">(</span><span class="n">dictVector</span><span class="p">,</span><span class="w"> </span><span class="k">new</span><span class="w"> </span><span class="n">DictionaryEncoding</span><span class="p">(</span><span class="mi">1L</span><span class="p">,</span><span class="w"> </span><span class="kc">false</span><span class="p">,</span><span class="w"> </span><span class="cm">/*indexType=*/</span><span class="kc">null</span><span class="p">));</span>
<span class="w"> </span><span class="n">provider</span><span class="p">.</span><span class="na">put</span><span class="p">(</span><span class="n">dictionary</span><span class="p">);</span>
<span class="w"> </span><span class="c1">// create original data vector</span>
<span class="w"> </span><span class="n">vector</span><span class="p">.</span><span class="na">allocateNewSafe</span><span class="p">();</span>
<span class="w"> </span><span class="n">vector</span><span class="p">.</span><span class="na">setSafe</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span><span class="w"> </span><span class="s">&quot;bb&quot;</span><span class="p">.</span><span class="na">getBytes</span><span class="p">());</span>
<span class="w"> </span><span class="n">vector</span><span class="p">.</span><span class="na">setSafe</span><span class="p">(</span><span class="mi">1</span><span class="p">,</span><span class="w"> </span><span class="s">&quot;bb&quot;</span><span class="p">.</span><span class="na">getBytes</span><span class="p">());</span>
<span class="w"> </span><span class="n">vector</span><span class="p">.</span><span class="na">setSafe</span><span class="p">(</span><span class="mi">2</span><span class="p">,</span><span class="w"> </span><span class="s">&quot;cc&quot;</span><span class="p">.</span><span class="na">getBytes</span><span class="p">());</span>
<span class="w"> </span><span class="n">vector</span><span class="p">.</span><span class="na">setSafe</span><span class="p">(</span><span class="mi">3</span><span class="p">,</span><span class="w"> </span><span class="s">&quot;aa&quot;</span><span class="p">.</span><span class="na">getBytes</span><span class="p">());</span>
<span class="w"> </span><span class="n">vector</span><span class="p">.</span><span class="na">setValueCount</span><span class="p">(</span><span class="mi">4</span><span class="p">);</span>
<span class="w"> </span><span class="c1">// get the encoded vector</span>
<span class="w"> </span><span class="n">IntVector</span><span class="w"> </span><span class="n">encodedVector</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="p">(</span><span class="n">IntVector</span><span class="p">)</span><span class="w"> </span><span class="n">DictionaryEncoder</span><span class="p">.</span><span class="na">encode</span><span class="p">(</span><span class="n">vector</span><span class="p">,</span><span class="w"> </span><span class="n">dictionary</span><span class="p">);</span>
<span class="w"> </span><span class="n">ByteArrayOutputStream</span><span class="w"> </span><span class="n">out</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="k">new</span><span class="w"> </span><span class="n">ByteArrayOutputStream</span><span class="p">();</span>
<span class="w"> </span><span class="c1">// create VectorSchemaRoot</span>
<span class="w"> </span><span class="n">List</span><span class="o">&lt;</span><span class="n">Field</span><span class="o">&gt;</span><span class="w"> </span><span class="n">fields</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">Arrays</span><span class="p">.</span><span class="na">asList</span><span class="p">(</span><span class="n">encodedVector</span><span class="p">.</span><span class="na">getField</span><span class="p">());</span>
<span class="w"> </span><span class="n">List</span><span class="o">&lt;</span><span class="n">FieldVector</span><span class="o">&gt;</span><span class="w"> </span><span class="n">vectors</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">Arrays</span><span class="p">.</span><span class="na">asList</span><span class="p">(</span><span class="n">encodedVector</span><span class="p">);</span>
<span class="w"> </span><span class="k">try</span><span class="w"> </span><span class="p">(</span><span class="n">VectorSchemaRoot</span><span class="w"> </span><span class="n">root</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="k">new</span><span class="w"> </span><span class="n">VectorSchemaRoot</span><span class="p">(</span><span class="n">fields</span><span class="p">,</span><span class="w"> </span><span class="n">vectors</span><span class="p">))</span><span class="w"> </span><span class="p">{</span>
<span class="w"> </span><span class="c1">// write data</span>
<span class="w"> </span><span class="n">ArrowStreamWriter</span><span class="w"> </span><span class="n">writer</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="k">new</span><span class="w"> </span><span class="n">ArrowStreamWriter</span><span class="p">(</span><span class="n">root</span><span class="p">,</span><span class="w"> </span><span class="n">provider</span><span class="p">,</span><span class="w"> </span><span class="n">Channels</span><span class="p">.</span><span class="na">newChannel</span><span class="p">(</span><span class="n">out</span><span class="p">));</span>
<span class="w"> </span><span class="n">writer</span><span class="p">.</span><span class="na">start</span><span class="p">();</span>
<span class="w"> </span><span class="n">writer</span><span class="p">.</span><span class="na">writeBatch</span><span class="p">();</span>
<span class="w"> </span><span class="n">writer</span><span class="p">.</span><span class="na">end</span><span class="p">();</span>
<span class="w"> </span><span class="p">}</span>
<span class="w"> </span><span class="c1">// read data</span>
<span class="w"> </span><span class="k">try</span><span class="w"> </span><span class="p">(</span><span class="n">ArrowStreamReader</span><span class="w"> </span><span class="n">reader</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="k">new</span><span class="w"> </span><span class="n">ArrowStreamReader</span><span class="p">(</span><span class="k">new</span><span class="w"> </span><span class="n">ByteArrayInputStream</span><span class="p">(</span><span class="n">out</span><span class="p">.</span><span class="na">toByteArray</span><span class="p">()),</span><span class="w"> </span><span class="n">allocator</span><span class="p">))</span><span class="w"> </span><span class="p">{</span>
<span class="w"> </span><span class="n">reader</span><span class="p">.</span><span class="na">loadNextBatch</span><span class="p">();</span>
<span class="w"> </span><span class="n">VectorSchemaRoot</span><span class="w"> </span><span class="n">readRoot</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">reader</span><span class="p">.</span><span class="na">getVectorSchemaRoot</span><span class="p">();</span>
<span class="w"> </span><span class="c1">// get the encoded vector</span>
<span class="w"> </span><span class="n">IntVector</span><span class="w"> </span><span class="n">intVector</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="p">(</span><span class="n">IntVector</span><span class="p">)</span><span class="w"> </span><span class="n">readRoot</span><span class="p">.</span><span class="na">getVector</span><span class="p">(</span><span class="mi">0</span><span class="p">);</span>
<span class="w"> </span><span class="c1">// get dictionaries and decode the vector</span>
<span class="w"> </span><span class="n">Map</span><span class="o">&lt;</span><span class="n">Long</span><span class="p">,</span><span class="w"> </span><span class="n">Dictionary</span><span class="o">&gt;</span><span class="w"> </span><span class="n">dictionaryMap</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">reader</span><span class="p">.</span><span class="na">getDictionaryVectors</span><span class="p">();</span>
<span class="w"> </span><span class="kt">long</span><span class="w"> </span><span class="n">dictionaryId</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">intVector</span><span class="p">.</span><span class="na">getField</span><span class="p">().</span><span class="na">getDictionary</span><span class="p">().</span><span class="na">getId</span><span class="p">();</span>
<span class="w"> </span><span class="k">try</span><span class="w"> </span><span class="p">(</span><span class="n">VarCharVector</span><span class="w"> </span><span class="n">varCharVector</span><span class="w"> </span><span class="o">=</span>
<span class="w"> </span><span class="p">(</span><span class="n">VarCharVector</span><span class="p">)</span><span class="w"> </span><span class="n">DictionaryEncoder</span><span class="p">.</span><span class="na">decode</span><span class="p">(</span><span class="n">intVector</span><span class="p">,</span><span class="w"> </span><span class="n">dictionaryMap</span><span class="p">.</span><span class="na">get</span><span class="p">(</span><span class="n">dictionaryId</span><span class="p">)))</span><span class="w"> </span><span class="p">{</span>
<span class="w"> </span><span class="c1">// ... use decoded vector</span>
<span class="w"> </span><span class="p">}</span>
<span class="w"> </span><span class="p">}</span>
<span class="p">}</span>
</pre></div>
</div>
</section>
<section id="writing-and-reading-random-access-files">
<h2>Writing and Reading Random Access Files<a class="headerlink" href="#writing-and-reading-random-access-files" title="Link to this heading">¶</a></h2>
<p>The <code class="xref py py-class docutils literal notranslate"><span class="pre">ArrowFileWriter</span></code> has the same API as <code class="xref py py-class docutils literal notranslate"><span class="pre">ArrowStreamWriter</span></code></p>
<div class="highlight-Java notranslate"><div class="highlight"><pre><span></span><span class="k">try</span><span class="w"> </span><span class="p">(</span>
<span class="w"> </span><span class="n">ByteArrayOutputStream</span><span class="w"> </span><span class="n">out</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="k">new</span><span class="w"> </span><span class="n">ByteArrayOutputStream</span><span class="p">();</span>
<span class="w"> </span><span class="n">ArrowFileWriter</span><span class="w"> </span><span class="n">writer</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="k">new</span><span class="w"> </span><span class="n">ArrowFileWriter</span><span class="p">(</span><span class="n">root</span><span class="p">,</span><span class="w"> </span><span class="cm">/*DictionaryProvider=*/</span><span class="kc">null</span><span class="p">,</span><span class="w"> </span><span class="n">Channels</span><span class="p">.</span><span class="na">newChannel</span><span class="p">(</span><span class="n">out</span><span class="p">));</span>
<span class="p">)</span><span class="w"> </span><span class="p">{</span>
<span class="w"> </span><span class="n">writer</span><span class="p">.</span><span class="na">start</span><span class="p">();</span>
<span class="w"> </span><span class="c1">// write the first batch</span>
<span class="w"> </span><span class="n">writer</span><span class="p">.</span><span class="na">writeBatch</span><span class="p">();</span>
<span class="w"> </span><span class="c1">// write another four batches.</span>
<span class="w"> </span><span class="k">for</span><span class="w"> </span><span class="p">(</span><span class="kt">int</span><span class="w"> </span><span class="n">i</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="mi">0</span><span class="p">;</span><span class="w"> </span><span class="n">i</span><span class="w"> </span><span class="o">&lt;</span><span class="w"> </span><span class="mi">4</span><span class="p">;</span><span class="w"> </span><span class="n">i</span><span class="o">++</span><span class="p">)</span><span class="w"> </span><span class="p">{</span>
<span class="w"> </span><span class="c1">// ... do populate work</span>
<span class="w"> </span><span class="n">writer</span><span class="p">.</span><span class="na">writeBatch</span><span class="p">();</span>
<span class="w"> </span><span class="p">}</span>
<span class="w"> </span><span class="n">writer</span><span class="p">.</span><span class="na">end</span><span class="p">();</span>
<span class="p">}</span>
</pre></div>
</div>
<p>The difference between <code class="xref py py-class docutils literal notranslate"><span class="pre">ArrowFileReader</span></code> and <code class="xref py py-class docutils literal notranslate"><span class="pre">ArrowStreamReader</span></code> is that the input source
must have a <code class="docutils literal notranslate"><span class="pre">seek</span></code> method for random access. Because we have access to the entire payload, we know the
number of record batches in the file, and can read any at random</p>
<div class="highlight-Java notranslate"><div class="highlight"><pre><span></span><span class="k">try</span><span class="w"> </span><span class="p">(</span><span class="n">ArrowFileReader</span><span class="w"> </span><span class="n">reader</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="k">new</span><span class="w"> </span><span class="n">ArrowFileReader</span><span class="p">(</span>
<span class="w"> </span><span class="k">new</span><span class="w"> </span><span class="n">ByteArrayReadableSeekableByteChannel</span><span class="p">(</span><span class="n">out</span><span class="p">.</span><span class="na">toByteArray</span><span class="p">()),</span><span class="w"> </span><span class="n">allocator</span><span class="p">))</span><span class="w"> </span><span class="p">{</span>
<span class="w"> </span><span class="c1">// read the 4-th batch</span>
<span class="w"> </span><span class="n">ArrowBlock</span><span class="w"> </span><span class="n">block</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">reader</span><span class="p">.</span><span class="na">getRecordBlocks</span><span class="p">().</span><span class="na">get</span><span class="p">(</span><span class="mi">3</span><span class="p">);</span>
<span class="w"> </span><span class="n">reader</span><span class="p">.</span><span class="na">loadRecordBatch</span><span class="p">(</span><span class="n">block</span><span class="p">);</span>
<span class="w"> </span><span class="n">VectorSchemaRoot</span><span class="w"> </span><span class="n">readBatch</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">reader</span><span class="p">.</span><span class="na">getVectorSchemaRoot</span><span class="p">();</span>
<span class="p">}</span>
</pre></div>
</div>
</section>
</section>
</article>
</div>
<footer>
<div class="related-pages">
<a class="next-page" href="algorithm.html">
<div class="page-info">
<div class="context">
<span>Next</span>
</div>
<div class="title">Java Algorithms</div>
</div>
<svg class="furo-related-icon"><use href="#svg-arrow-right"></use></svg>
</a>
<a class="prev-page" href="table.html">
<svg class="furo-related-icon"><use href="#svg-arrow-right"></use></svg>
<div class="page-info">
<div class="context">
<span>Previous</span>
</div>
<div class="title">Table</div>
</div>
</a>
</div>
<div class="bottom-of-page">
<div class="left-details">
<div class="copyright">
Copyright &#169; 2025, Apache Arrow Developers
</div>
Made with <a href="https://www.sphinx-doc.org/">Sphinx</a> and <a class="muted-link" href="https://pradyunsg.me">@pradyunsg</a>'s
<a href="https://github.com/pradyunsg/furo">Furo</a>
</div>
<div class="right-details">
</div>
</div>
</footer>
</div>
<aside class="toc-drawer">
<div class="toc-sticky toc-scroll">
<div class="toc-title-container">
<span class="toc-title">
On this page
</span>
</div>
<div class="toc-tree-container">
<div class="toc-tree">
<ul>
<li><a class="reference internal" href="#">Reading/Writing IPC formats</a><ul>
<li><a class="reference internal" href="#writing-and-reading-streaming-format">Writing and Reading Streaming Format</a></li>
<li><a class="reference internal" href="#writing-and-reading-random-access-files">Writing and Reading Random Access Files</a></li>
</ul>
</li>
</ul>
</div>
</div>
</div>
</aside>
</div>
</div><script src="_static/documentation_options.js?v=c4c92189"></script>
<script src="_static/doctools.js?v=9bcbadda"></script>
<script src="_static/sphinx_highlight.js?v=dc90522c"></script>
<script src="_static/scripts/furo.js?v=5fa4622c"></script>
</body>
</html>