blob: 6debbdfd63f70e0efb654c02c5c8e0306f009ec5 [file] [log] [blame]
<!DOCTYPE html>
<html lang="en" data-content_root="./">
<head>
<meta charset="utf-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="viewport" content="width=device-width, initial-scale=1" />
<title>Reading and writing data &#8212; Apache Arrow Java Cookbook documentation</title>
<link rel="stylesheet" type="text/css" href="_static/pygments.css?v=d1102ebc" />
<link rel="stylesheet" type="text/css" href="_static/alabaster.css?v=49eeb2a1" />
<script src="_static/documentation_options.js?v=5929fcd5"></script>
<script src="_static/doctools.js?v=888ff710"></script>
<script src="_static/sphinx_highlight.js?v=dc90522c"></script>
<link rel="icon" href="_static/favicon.ico"/>
<link rel="index" title="Index" href="genindex.html" />
<link rel="search" title="Search" href="search.html" />
<link rel="next" title="Arrow Flight" href="flight.html" />
<link rel="prev" title="Working with Schema" href="schema.html" />
<link rel="stylesheet" href="_static/custom.css" type="text/css" />
<!-- Matomo -->
<script>
var _paq = window._paq = window._paq || [];
/* tracker methods like "setCustomDimension" should be called before "trackPageView" */
/* We explicitly disable cookie tracking to avoid privacy issues */
_paq.push(['disableCookies']);
_paq.push(['trackPageView']);
_paq.push(['enableLinkTracking']);
(function() {
var u="https://analytics.apache.org/";
_paq.push(['setTrackerUrl', u+'matomo.php']);
_paq.push(['setSiteId', '20']);
var d=document, g=d.createElement('script'), s=d.getElementsByTagName('script')[0];
g.async=true; g.src=u+'matomo.js'; s.parentNode.insertBefore(g,s);
})();
</script>
<!-- End Matomo Code -->
</head><body>
<div class="document">
<div class="documentwrapper">
<div class="bodywrapper">
<div class="body" role="main">
<section id="reading-and-writing-data">
<span id="arrow-io"></span><h1><a class="toc-backref" href="#id5" role="doc-backlink">Reading and writing data</a><a class="headerlink" href="#reading-and-writing-data" title="Link to this heading"></a></h1>
<p>The <a class="reference external" href="https://arrow.apache.org/docs/java/ipc.html">Arrow IPC format</a> defines two types of binary formats
for serializing Arrow data: the streaming format and the file format (or random access format). Such files can
be directly memory-mapped when read.</p>
<nav class="contents" id="contents">
<p class="topic-title">Contents</p>
<ul class="simple">
<li><p><a class="reference internal" href="#reading-and-writing-data" id="id5">Reading and writing data</a></p>
<ul>
<li><p><a class="reference internal" href="#writing" id="id6">Writing</a></p>
<ul>
<li><p><a class="reference internal" href="#writing-random-access-files" id="id7">Writing Random Access Files</a></p>
<ul>
<li><p><a class="reference internal" href="#write-out-to-file" id="id8">Write - Out to File</a></p></li>
<li><p><a class="reference internal" href="#write-out-to-buffer" id="id9">Write - Out to Buffer</a></p></li>
</ul>
</li>
<li><p><a class="reference internal" href="#writing-streaming-format" id="id10">Writing Streaming Format</a></p>
<ul>
<li><p><a class="reference internal" href="#id1" id="id11">Write - Out to File</a></p></li>
<li><p><a class="reference internal" href="#id2" id="id12">Write - Out to Buffer</a></p></li>
</ul>
</li>
</ul>
</li>
<li><p><a class="reference internal" href="#reading" id="id13">Reading</a></p>
<ul>
<li><p><a class="reference internal" href="#reading-random-access-files" id="id14">Reading Random Access Files</a></p>
<ul>
<li><p><a class="reference internal" href="#read-from-file" id="id15">Read - From File</a></p></li>
<li><p><a class="reference internal" href="#read-from-buffer" id="id16">Read - From Buffer</a></p></li>
</ul>
</li>
<li><p><a class="reference internal" href="#reading-streaming-format" id="id17">Reading Streaming Format</a></p>
<ul>
<li><p><a class="reference internal" href="#id3" id="id18">Read - From File</a></p></li>
<li><p><a class="reference internal" href="#id4" id="id19">Read - From Buffer</a></p></li>
</ul>
</li>
<li><p><a class="reference internal" href="#reading-parquet-file" id="id20">Reading Parquet File</a></p></li>
<li><p><a class="reference internal" href="#handling-data-with-dictionaries" id="id21">Handling Data with Dictionaries</a></p></li>
</ul>
</li>
</ul>
</li>
</ul>
</nav>
<section id="writing">
<h2><a class="toc-backref" href="#id6" role="doc-backlink">Writing</a><a class="headerlink" href="#writing" title="Link to this heading"></a></h2>
<p>Both writing file and streaming formats use the same API.</p>
<section id="writing-random-access-files">
<h3><a class="toc-backref" href="#id7" role="doc-backlink">Writing Random Access Files</a><a class="headerlink" href="#writing-random-access-files" title="Link to this heading"></a></h3>
<section id="write-out-to-file">
<h4><a class="toc-backref" href="#id8" role="doc-backlink">Write - Out to File</a><a class="headerlink" href="#write-out-to-file" title="Link to this heading"></a></h4>
<div class="highlight-java notranslate"><div class="highlight"><pre><span></span><span class="kn">import</span><span class="w"> </span><span class="nn">org.apache.arrow.memory.BufferAllocator</span><span class="p">;</span>
<span class="kn">import</span><span class="w"> </span><span class="nn">org.apache.arrow.memory.RootAllocator</span><span class="p">;</span>
<span class="kn">import</span><span class="w"> </span><span class="nn">org.apache.arrow.vector.VarCharVector</span><span class="p">;</span>
<span class="kn">import</span><span class="w"> </span><span class="nn">org.apache.arrow.vector.IntVector</span><span class="p">;</span>
<span class="kn">import</span><span class="w"> </span><span class="nn">org.apache.arrow.vector.types.pojo.Field</span><span class="p">;</span>
<span class="kn">import</span><span class="w"> </span><span class="nn">org.apache.arrow.vector.types.pojo.FieldType</span><span class="p">;</span>
<span class="kn">import</span><span class="w"> </span><span class="nn">org.apache.arrow.vector.types.pojo.ArrowType</span><span class="p">;</span>
<span class="kn">import</span><span class="w"> </span><span class="nn">org.apache.arrow.vector.types.pojo.Schema</span><span class="p">;</span>
<span class="kn">import</span><span class="w"> </span><span class="nn">org.apache.arrow.vector.VectorSchemaRoot</span><span class="p">;</span>
<span class="kn">import static</span><span class="w"> </span><span class="nn">java.util.Arrays.asList</span><span class="p">;</span>
<span class="kn">import</span><span class="w"> </span><span class="nn">org.apache.arrow.vector.ipc.ArrowFileWriter</span><span class="p">;</span>
<span class="kn">import</span><span class="w"> </span><span class="nn">java.io.File</span><span class="p">;</span>
<span class="kn">import</span><span class="w"> </span><span class="nn">java.io.FileOutputStream</span><span class="p">;</span>
<span class="kn">import</span><span class="w"> </span><span class="nn">java.io.IOException</span><span class="p">;</span>
<span class="k">try</span><span class="w"> </span><span class="p">(</span><span class="n">BufferAllocator</span><span class="w"> </span><span class="n">allocator</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="k">new</span><span class="w"> </span><span class="n">RootAllocator</span><span class="p">())</span><span class="w"> </span><span class="p">{</span>
<span class="w"> </span><span class="n">Field</span><span class="w"> </span><span class="n">name</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="k">new</span><span class="w"> </span><span class="n">Field</span><span class="p">(</span><span class="s">&quot;name&quot;</span><span class="p">,</span><span class="w"> </span><span class="n">FieldType</span><span class="p">.</span><span class="na">nullable</span><span class="p">(</span><span class="k">new</span><span class="w"> </span><span class="n">ArrowType</span><span class="p">.</span><span class="na">Utf8</span><span class="p">()),</span><span class="w"> </span><span class="kc">null</span><span class="p">);</span>
<span class="w"> </span><span class="n">Field</span><span class="w"> </span><span class="n">age</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="k">new</span><span class="w"> </span><span class="n">Field</span><span class="p">(</span><span class="s">&quot;age&quot;</span><span class="p">,</span><span class="w"> </span><span class="n">FieldType</span><span class="p">.</span><span class="na">nullable</span><span class="p">(</span><span class="k">new</span><span class="w"> </span><span class="n">ArrowType</span><span class="p">.</span><span class="na">Int</span><span class="p">(</span><span class="mi">32</span><span class="p">,</span><span class="w"> </span><span class="kc">true</span><span class="p">)),</span><span class="w"> </span><span class="kc">null</span><span class="p">);</span>
<span class="w"> </span><span class="n">Schema</span><span class="w"> </span><span class="n">schemaPerson</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="k">new</span><span class="w"> </span><span class="n">Schema</span><span class="p">(</span><span class="n">asList</span><span class="p">(</span><span class="n">name</span><span class="p">,</span><span class="w"> </span><span class="n">age</span><span class="p">));</span>
<span class="w"> </span><span class="k">try</span><span class="p">(</span>
<span class="w"> </span><span class="n">VectorSchemaRoot</span><span class="w"> </span><span class="n">vectorSchemaRoot</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">VectorSchemaRoot</span><span class="p">.</span><span class="na">create</span><span class="p">(</span><span class="n">schemaPerson</span><span class="p">,</span><span class="w"> </span><span class="n">allocator</span><span class="p">)</span>
<span class="w"> </span><span class="p">){</span>
<span class="w"> </span><span class="n">VarCharVector</span><span class="w"> </span><span class="n">nameVector</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="p">(</span><span class="n">VarCharVector</span><span class="p">)</span><span class="w"> </span><span class="n">vectorSchemaRoot</span><span class="p">.</span><span class="na">getVector</span><span class="p">(</span><span class="s">&quot;name&quot;</span><span class="p">);</span>
<span class="w"> </span><span class="n">nameVector</span><span class="p">.</span><span class="na">allocateNew</span><span class="p">(</span><span class="mi">3</span><span class="p">);</span>
<span class="w"> </span><span class="n">nameVector</span><span class="p">.</span><span class="na">set</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span><span class="w"> </span><span class="s">&quot;David&quot;</span><span class="p">.</span><span class="na">getBytes</span><span class="p">());</span>
<span class="w"> </span><span class="n">nameVector</span><span class="p">.</span><span class="na">set</span><span class="p">(</span><span class="mi">1</span><span class="p">,</span><span class="w"> </span><span class="s">&quot;Gladis&quot;</span><span class="p">.</span><span class="na">getBytes</span><span class="p">());</span>
<span class="w"> </span><span class="n">nameVector</span><span class="p">.</span><span class="na">set</span><span class="p">(</span><span class="mi">2</span><span class="p">,</span><span class="w"> </span><span class="s">&quot;Juan&quot;</span><span class="p">.</span><span class="na">getBytes</span><span class="p">());</span>
<span class="w"> </span><span class="n">IntVector</span><span class="w"> </span><span class="n">ageVector</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="p">(</span><span class="n">IntVector</span><span class="p">)</span><span class="w"> </span><span class="n">vectorSchemaRoot</span><span class="p">.</span><span class="na">getVector</span><span class="p">(</span><span class="s">&quot;age&quot;</span><span class="p">);</span>
<span class="w"> </span><span class="n">ageVector</span><span class="p">.</span><span class="na">allocateNew</span><span class="p">(</span><span class="mi">3</span><span class="p">);</span>
<span class="w"> </span><span class="n">ageVector</span><span class="p">.</span><span class="na">set</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span><span class="w"> </span><span class="mi">10</span><span class="p">);</span>
<span class="w"> </span><span class="n">ageVector</span><span class="p">.</span><span class="na">set</span><span class="p">(</span><span class="mi">1</span><span class="p">,</span><span class="w"> </span><span class="mi">20</span><span class="p">);</span>
<span class="w"> </span><span class="n">ageVector</span><span class="p">.</span><span class="na">set</span><span class="p">(</span><span class="mi">2</span><span class="p">,</span><span class="w"> </span><span class="mi">30</span><span class="p">);</span>
<span class="w"> </span><span class="n">vectorSchemaRoot</span><span class="p">.</span><span class="na">setRowCount</span><span class="p">(</span><span class="mi">3</span><span class="p">);</span>
<span class="w"> </span><span class="n">File</span><span class="w"> </span><span class="n">file</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="k">new</span><span class="w"> </span><span class="n">File</span><span class="p">(</span><span class="s">&quot;randon_access_to_file.arrow&quot;</span><span class="p">);</span>
<span class="w"> </span><span class="k">try</span><span class="w"> </span><span class="p">(</span>
<span class="w"> </span><span class="n">FileOutputStream</span><span class="w"> </span><span class="n">fileOutputStream</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="k">new</span><span class="w"> </span><span class="n">FileOutputStream</span><span class="p">(</span><span class="n">file</span><span class="p">);</span>
<span class="w"> </span><span class="n">ArrowFileWriter</span><span class="w"> </span><span class="n">writer</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="k">new</span><span class="w"> </span><span class="n">ArrowFileWriter</span><span class="p">(</span><span class="n">vectorSchemaRoot</span><span class="p">,</span><span class="w"> </span><span class="kc">null</span><span class="p">,</span><span class="w"> </span><span class="n">fileOutputStream</span><span class="p">.</span><span class="na">getChannel</span><span class="p">())</span>
<span class="w"> </span><span class="p">)</span><span class="w"> </span><span class="p">{</span>
<span class="w"> </span><span class="n">writer</span><span class="p">.</span><span class="na">start</span><span class="p">();</span>
<span class="w"> </span><span class="n">writer</span><span class="p">.</span><span class="na">writeBatch</span><span class="p">();</span>
<span class="w"> </span><span class="n">writer</span><span class="p">.</span><span class="na">end</span><span class="p">();</span>
<span class="w"> </span><span class="n">System</span><span class="p">.</span><span class="na">out</span><span class="p">.</span><span class="na">println</span><span class="p">(</span><span class="s">&quot;Record batches written: &quot;</span><span class="w"> </span><span class="o">+</span><span class="w"> </span><span class="n">writer</span><span class="p">.</span><span class="na">getRecordBlocks</span><span class="p">().</span><span class="na">size</span><span class="p">()</span><span class="w"> </span><span class="o">+</span><span class="w"> </span><span class="s">&quot;. Number of rows written: &quot;</span><span class="w"> </span><span class="o">+</span><span class="w"> </span><span class="n">vectorSchemaRoot</span><span class="p">.</span><span class="na">getRowCount</span><span class="p">());</span>
<span class="w"> </span><span class="p">}</span><span class="w"> </span><span class="k">catch</span><span class="w"> </span><span class="p">(</span><span class="n">IOException</span><span class="w"> </span><span class="n">e</span><span class="p">)</span><span class="w"> </span><span class="p">{</span>
<span class="w"> </span><span class="n">e</span><span class="p">.</span><span class="na">printStackTrace</span><span class="p">();</span>
<span class="w"> </span><span class="p">}</span>
<span class="w"> </span><span class="p">}</span>
<span class="p">}</span>
</pre></div>
</div>
<div class="highlight-none notranslate"><div class="highlight"><pre><span></span>Record batches written: 1. Number of rows written: 3
</pre></div>
</div>
</section>
<section id="write-out-to-buffer">
<h4><a class="toc-backref" href="#id9" role="doc-backlink">Write - Out to Buffer</a><a class="headerlink" href="#write-out-to-buffer" title="Link to this heading"></a></h4>
<div class="highlight-java notranslate"><div class="highlight"><pre><span></span><span class="kn">import</span><span class="w"> </span><span class="nn">org.apache.arrow.memory.BufferAllocator</span><span class="p">;</span>
<span class="kn">import</span><span class="w"> </span><span class="nn">org.apache.arrow.memory.RootAllocator</span><span class="p">;</span>
<span class="kn">import</span><span class="w"> </span><span class="nn">org.apache.arrow.vector.VarCharVector</span><span class="p">;</span>
<span class="kn">import</span><span class="w"> </span><span class="nn">org.apache.arrow.vector.IntVector</span><span class="p">;</span>
<span class="kn">import</span><span class="w"> </span><span class="nn">org.apache.arrow.vector.types.pojo.Field</span><span class="p">;</span>
<span class="kn">import</span><span class="w"> </span><span class="nn">org.apache.arrow.vector.types.pojo.FieldType</span><span class="p">;</span>
<span class="kn">import</span><span class="w"> </span><span class="nn">org.apache.arrow.vector.types.pojo.ArrowType</span><span class="p">;</span>
<span class="kn">import</span><span class="w"> </span><span class="nn">org.apache.arrow.vector.types.pojo.Schema</span><span class="p">;</span>
<span class="kn">import</span><span class="w"> </span><span class="nn">org.apache.arrow.vector.VectorSchemaRoot</span><span class="p">;</span>
<span class="kn">import static</span><span class="w"> </span><span class="nn">java.util.Arrays.asList</span><span class="p">;</span>
<span class="kn">import</span><span class="w"> </span><span class="nn">org.apache.arrow.vector.ipc.ArrowFileWriter</span><span class="p">;</span>
<span class="kn">import</span><span class="w"> </span><span class="nn">java.io.ByteArrayOutputStream</span><span class="p">;</span>
<span class="kn">import</span><span class="w"> </span><span class="nn">java.io.IOException</span><span class="p">;</span>
<span class="kn">import</span><span class="w"> </span><span class="nn">java.nio.channels.Channels</span><span class="p">;</span>
<span class="k">try</span><span class="w"> </span><span class="p">(</span><span class="n">BufferAllocator</span><span class="w"> </span><span class="n">allocator</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="k">new</span><span class="w"> </span><span class="n">RootAllocator</span><span class="p">())</span><span class="w"> </span><span class="p">{</span>
<span class="w"> </span><span class="n">Field</span><span class="w"> </span><span class="n">name</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="k">new</span><span class="w"> </span><span class="n">Field</span><span class="p">(</span><span class="s">&quot;name&quot;</span><span class="p">,</span><span class="w"> </span><span class="n">FieldType</span><span class="p">.</span><span class="na">nullable</span><span class="p">(</span><span class="k">new</span><span class="w"> </span><span class="n">ArrowType</span><span class="p">.</span><span class="na">Utf8</span><span class="p">()),</span><span class="w"> </span><span class="kc">null</span><span class="p">);</span>
<span class="w"> </span><span class="n">Field</span><span class="w"> </span><span class="n">age</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="k">new</span><span class="w"> </span><span class="n">Field</span><span class="p">(</span><span class="s">&quot;age&quot;</span><span class="p">,</span><span class="w"> </span><span class="n">FieldType</span><span class="p">.</span><span class="na">nullable</span><span class="p">(</span><span class="k">new</span><span class="w"> </span><span class="n">ArrowType</span><span class="p">.</span><span class="na">Int</span><span class="p">(</span><span class="mi">32</span><span class="p">,</span><span class="w"> </span><span class="kc">true</span><span class="p">)),</span><span class="w"> </span><span class="kc">null</span><span class="p">);</span>
<span class="w"> </span><span class="n">Schema</span><span class="w"> </span><span class="n">schemaPerson</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="k">new</span><span class="w"> </span><span class="n">Schema</span><span class="p">(</span><span class="n">asList</span><span class="p">(</span><span class="n">name</span><span class="p">,</span><span class="w"> </span><span class="n">age</span><span class="p">));</span>
<span class="w"> </span><span class="k">try</span><span class="p">(</span>
<span class="w"> </span><span class="n">VectorSchemaRoot</span><span class="w"> </span><span class="n">vectorSchemaRoot</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">VectorSchemaRoot</span><span class="p">.</span><span class="na">create</span><span class="p">(</span><span class="n">schemaPerson</span><span class="p">,</span><span class="w"> </span><span class="n">allocator</span><span class="p">)</span>
<span class="w"> </span><span class="p">){</span>
<span class="w"> </span><span class="n">VarCharVector</span><span class="w"> </span><span class="n">nameVector</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="p">(</span><span class="n">VarCharVector</span><span class="p">)</span><span class="w"> </span><span class="n">vectorSchemaRoot</span><span class="p">.</span><span class="na">getVector</span><span class="p">(</span><span class="s">&quot;name&quot;</span><span class="p">);</span>
<span class="w"> </span><span class="n">nameVector</span><span class="p">.</span><span class="na">allocateNew</span><span class="p">(</span><span class="mi">3</span><span class="p">);</span>
<span class="w"> </span><span class="n">nameVector</span><span class="p">.</span><span class="na">set</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span><span class="w"> </span><span class="s">&quot;David&quot;</span><span class="p">.</span><span class="na">getBytes</span><span class="p">());</span>
<span class="w"> </span><span class="n">nameVector</span><span class="p">.</span><span class="na">set</span><span class="p">(</span><span class="mi">1</span><span class="p">,</span><span class="w"> </span><span class="s">&quot;Gladis&quot;</span><span class="p">.</span><span class="na">getBytes</span><span class="p">());</span>
<span class="w"> </span><span class="n">nameVector</span><span class="p">.</span><span class="na">set</span><span class="p">(</span><span class="mi">2</span><span class="p">,</span><span class="w"> </span><span class="s">&quot;Juan&quot;</span><span class="p">.</span><span class="na">getBytes</span><span class="p">());</span>
<span class="w"> </span><span class="n">IntVector</span><span class="w"> </span><span class="n">ageVector</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="p">(</span><span class="n">IntVector</span><span class="p">)</span><span class="w"> </span><span class="n">vectorSchemaRoot</span><span class="p">.</span><span class="na">getVector</span><span class="p">(</span><span class="s">&quot;age&quot;</span><span class="p">);</span>
<span class="w"> </span><span class="n">ageVector</span><span class="p">.</span><span class="na">allocateNew</span><span class="p">(</span><span class="mi">3</span><span class="p">);</span>
<span class="w"> </span><span class="n">ageVector</span><span class="p">.</span><span class="na">set</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span><span class="w"> </span><span class="mi">10</span><span class="p">);</span>
<span class="w"> </span><span class="n">ageVector</span><span class="p">.</span><span class="na">set</span><span class="p">(</span><span class="mi">1</span><span class="p">,</span><span class="w"> </span><span class="mi">20</span><span class="p">);</span>
<span class="w"> </span><span class="n">ageVector</span><span class="p">.</span><span class="na">set</span><span class="p">(</span><span class="mi">2</span><span class="p">,</span><span class="w"> </span><span class="mi">30</span><span class="p">);</span>
<span class="w"> </span><span class="n">vectorSchemaRoot</span><span class="p">.</span><span class="na">setRowCount</span><span class="p">(</span><span class="mi">3</span><span class="p">);</span>
<span class="w"> </span><span class="k">try</span><span class="w"> </span><span class="p">(</span>
<span class="w"> </span><span class="n">ByteArrayOutputStream</span><span class="w"> </span><span class="n">out</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="k">new</span><span class="w"> </span><span class="n">ByteArrayOutputStream</span><span class="p">();</span>
<span class="w"> </span><span class="n">ArrowFileWriter</span><span class="w"> </span><span class="n">writer</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="k">new</span><span class="w"> </span><span class="n">ArrowFileWriter</span><span class="p">(</span><span class="n">vectorSchemaRoot</span><span class="p">,</span><span class="w"> </span><span class="kc">null</span><span class="p">,</span><span class="w"> </span><span class="n">Channels</span><span class="p">.</span><span class="na">newChannel</span><span class="p">(</span><span class="n">out</span><span class="p">))</span>
<span class="w"> </span><span class="p">)</span><span class="w"> </span><span class="p">{</span>
<span class="w"> </span><span class="n">writer</span><span class="p">.</span><span class="na">start</span><span class="p">();</span>
<span class="w"> </span><span class="n">writer</span><span class="p">.</span><span class="na">writeBatch</span><span class="p">();</span>
<span class="w"> </span><span class="n">System</span><span class="p">.</span><span class="na">out</span><span class="p">.</span><span class="na">println</span><span class="p">(</span><span class="s">&quot;Record batches written: &quot;</span><span class="w"> </span><span class="o">+</span><span class="w"> </span><span class="n">writer</span><span class="p">.</span><span class="na">getRecordBlocks</span><span class="p">().</span><span class="na">size</span><span class="p">()</span><span class="w"> </span><span class="o">+</span>
<span class="w"> </span><span class="s">&quot;. Number of rows written: &quot;</span><span class="w"> </span><span class="o">+</span><span class="w"> </span><span class="n">vectorSchemaRoot</span><span class="p">.</span><span class="na">getRowCount</span><span class="p">());</span>
<span class="w"> </span><span class="p">}</span><span class="w"> </span><span class="k">catch</span><span class="w"> </span><span class="p">(</span><span class="n">IOException</span><span class="w"> </span><span class="n">e</span><span class="p">)</span><span class="w"> </span><span class="p">{</span>
<span class="w"> </span><span class="n">e</span><span class="p">.</span><span class="na">printStackTrace</span><span class="p">();</span>
<span class="w"> </span><span class="p">}</span>
<span class="w"> </span><span class="p">}</span>
<span class="p">}</span>
</pre></div>
</div>
<div class="highlight-none notranslate"><div class="highlight"><pre><span></span>Record batches written: 1. Number of rows written: 3
</pre></div>
</div>
</section>
</section>
<section id="writing-streaming-format">
<h3><a class="toc-backref" href="#id10" role="doc-backlink">Writing Streaming Format</a><a class="headerlink" href="#writing-streaming-format" title="Link to this heading"></a></h3>
<section id="id1">
<h4><a class="toc-backref" href="#id11" role="doc-backlink">Write - Out to File</a><a class="headerlink" href="#id1" title="Link to this heading"></a></h4>
<div class="highlight-java notranslate"><div class="highlight"><pre><span></span><span class="kn">import</span><span class="w"> </span><span class="nn">org.apache.arrow.memory.BufferAllocator</span><span class="p">;</span>
<span class="kn">import</span><span class="w"> </span><span class="nn">org.apache.arrow.memory.RootAllocator</span><span class="p">;</span>
<span class="kn">import</span><span class="w"> </span><span class="nn">org.apache.arrow.vector.VarCharVector</span><span class="p">;</span>
<span class="kn">import</span><span class="w"> </span><span class="nn">org.apache.arrow.vector.IntVector</span><span class="p">;</span>
<span class="kn">import</span><span class="w"> </span><span class="nn">org.apache.arrow.vector.ipc.ArrowStreamWriter</span><span class="p">;</span>
<span class="kn">import</span><span class="w"> </span><span class="nn">org.apache.arrow.vector.types.pojo.Field</span><span class="p">;</span>
<span class="kn">import</span><span class="w"> </span><span class="nn">org.apache.arrow.vector.types.pojo.FieldType</span><span class="p">;</span>
<span class="kn">import</span><span class="w"> </span><span class="nn">org.apache.arrow.vector.types.pojo.ArrowType</span><span class="p">;</span>
<span class="kn">import</span><span class="w"> </span><span class="nn">org.apache.arrow.vector.types.pojo.Schema</span><span class="p">;</span>
<span class="kn">import</span><span class="w"> </span><span class="nn">org.apache.arrow.vector.VectorSchemaRoot</span><span class="p">;</span>
<span class="kn">import static</span><span class="w"> </span><span class="nn">java.util.Arrays.asList</span><span class="p">;</span>
<span class="kn">import</span><span class="w"> </span><span class="nn">java.io.File</span><span class="p">;</span>
<span class="kn">import</span><span class="w"> </span><span class="nn">java.io.FileOutputStream</span><span class="p">;</span>
<span class="kn">import</span><span class="w"> </span><span class="nn">java.io.IOException</span><span class="p">;</span>
<span class="k">try</span><span class="w"> </span><span class="p">(</span><span class="n">BufferAllocator</span><span class="w"> </span><span class="n">rootAllocator</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="k">new</span><span class="w"> </span><span class="n">RootAllocator</span><span class="p">())</span><span class="w"> </span><span class="p">{</span>
<span class="w"> </span><span class="n">Field</span><span class="w"> </span><span class="n">name</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="k">new</span><span class="w"> </span><span class="n">Field</span><span class="p">(</span><span class="s">&quot;name&quot;</span><span class="p">,</span><span class="w"> </span><span class="n">FieldType</span><span class="p">.</span><span class="na">nullable</span><span class="p">(</span><span class="k">new</span><span class="w"> </span><span class="n">ArrowType</span><span class="p">.</span><span class="na">Utf8</span><span class="p">()),</span><span class="w"> </span><span class="kc">null</span><span class="p">);</span>
<span class="w"> </span><span class="n">Field</span><span class="w"> </span><span class="n">age</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="k">new</span><span class="w"> </span><span class="n">Field</span><span class="p">(</span><span class="s">&quot;age&quot;</span><span class="p">,</span><span class="w"> </span><span class="n">FieldType</span><span class="p">.</span><span class="na">nullable</span><span class="p">(</span><span class="k">new</span><span class="w"> </span><span class="n">ArrowType</span><span class="p">.</span><span class="na">Int</span><span class="p">(</span><span class="mi">32</span><span class="p">,</span><span class="w"> </span><span class="kc">true</span><span class="p">)),</span><span class="w"> </span><span class="kc">null</span><span class="p">);</span>
<span class="w"> </span><span class="n">Schema</span><span class="w"> </span><span class="n">schemaPerson</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="k">new</span><span class="w"> </span><span class="n">Schema</span><span class="p">(</span><span class="n">asList</span><span class="p">(</span><span class="n">name</span><span class="p">,</span><span class="w"> </span><span class="n">age</span><span class="p">));</span>
<span class="w"> </span><span class="k">try</span><span class="p">(</span>
<span class="w"> </span><span class="n">VectorSchemaRoot</span><span class="w"> </span><span class="n">vectorSchemaRoot</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">VectorSchemaRoot</span><span class="p">.</span><span class="na">create</span><span class="p">(</span><span class="n">schemaPerson</span><span class="p">,</span><span class="w"> </span><span class="n">rootAllocator</span><span class="p">)</span>
<span class="w"> </span><span class="p">){</span>
<span class="w"> </span><span class="n">VarCharVector</span><span class="w"> </span><span class="n">nameVector</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="p">(</span><span class="n">VarCharVector</span><span class="p">)</span><span class="w"> </span><span class="n">vectorSchemaRoot</span><span class="p">.</span><span class="na">getVector</span><span class="p">(</span><span class="s">&quot;name&quot;</span><span class="p">);</span>
<span class="w"> </span><span class="n">nameVector</span><span class="p">.</span><span class="na">allocateNew</span><span class="p">(</span><span class="mi">3</span><span class="p">);</span>
<span class="w"> </span><span class="n">nameVector</span><span class="p">.</span><span class="na">set</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span><span class="w"> </span><span class="s">&quot;David&quot;</span><span class="p">.</span><span class="na">getBytes</span><span class="p">());</span>
<span class="w"> </span><span class="n">nameVector</span><span class="p">.</span><span class="na">set</span><span class="p">(</span><span class="mi">1</span><span class="p">,</span><span class="w"> </span><span class="s">&quot;Gladis&quot;</span><span class="p">.</span><span class="na">getBytes</span><span class="p">());</span>
<span class="w"> </span><span class="n">nameVector</span><span class="p">.</span><span class="na">set</span><span class="p">(</span><span class="mi">2</span><span class="p">,</span><span class="w"> </span><span class="s">&quot;Juan&quot;</span><span class="p">.</span><span class="na">getBytes</span><span class="p">());</span>
<span class="w"> </span><span class="n">IntVector</span><span class="w"> </span><span class="n">ageVector</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="p">(</span><span class="n">IntVector</span><span class="p">)</span><span class="w"> </span><span class="n">vectorSchemaRoot</span><span class="p">.</span><span class="na">getVector</span><span class="p">(</span><span class="s">&quot;age&quot;</span><span class="p">);</span>
<span class="w"> </span><span class="n">ageVector</span><span class="p">.</span><span class="na">allocateNew</span><span class="p">(</span><span class="mi">3</span><span class="p">);</span>
<span class="w"> </span><span class="n">ageVector</span><span class="p">.</span><span class="na">set</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span><span class="w"> </span><span class="mi">10</span><span class="p">);</span>
<span class="w"> </span><span class="n">ageVector</span><span class="p">.</span><span class="na">set</span><span class="p">(</span><span class="mi">1</span><span class="p">,</span><span class="w"> </span><span class="mi">20</span><span class="p">);</span>
<span class="w"> </span><span class="n">ageVector</span><span class="p">.</span><span class="na">set</span><span class="p">(</span><span class="mi">2</span><span class="p">,</span><span class="w"> </span><span class="mi">30</span><span class="p">);</span>
<span class="w"> </span><span class="n">vectorSchemaRoot</span><span class="p">.</span><span class="na">setRowCount</span><span class="p">(</span><span class="mi">3</span><span class="p">);</span>
<span class="w"> </span><span class="n">File</span><span class="w"> </span><span class="n">file</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="k">new</span><span class="w"> </span><span class="n">File</span><span class="p">(</span><span class="s">&quot;streaming_to_file.arrow&quot;</span><span class="p">);</span>
<span class="w"> </span><span class="k">try</span><span class="w"> </span><span class="p">(</span>
<span class="w"> </span><span class="n">FileOutputStream</span><span class="w"> </span><span class="n">fileOutputStream</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="k">new</span><span class="w"> </span><span class="n">FileOutputStream</span><span class="p">(</span><span class="n">file</span><span class="p">);</span>
<span class="w"> </span><span class="n">ArrowStreamWriter</span><span class="w"> </span><span class="n">writer</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="k">new</span><span class="w"> </span><span class="n">ArrowStreamWriter</span><span class="p">(</span><span class="n">vectorSchemaRoot</span><span class="p">,</span><span class="w"> </span><span class="kc">null</span><span class="p">,</span><span class="w"> </span><span class="n">fileOutputStream</span><span class="p">.</span><span class="na">getChannel</span><span class="p">())</span>
<span class="w"> </span><span class="p">){</span>
<span class="w"> </span><span class="n">writer</span><span class="p">.</span><span class="na">start</span><span class="p">();</span>
<span class="w"> </span><span class="n">writer</span><span class="p">.</span><span class="na">writeBatch</span><span class="p">();</span>
<span class="w"> </span><span class="n">System</span><span class="p">.</span><span class="na">out</span><span class="p">.</span><span class="na">println</span><span class="p">(</span><span class="s">&quot;Number of rows written: &quot;</span><span class="w"> </span><span class="o">+</span><span class="w"> </span><span class="n">vectorSchemaRoot</span><span class="p">.</span><span class="na">getRowCount</span><span class="p">());</span>
<span class="w"> </span><span class="p">}</span><span class="w"> </span><span class="k">catch</span><span class="w"> </span><span class="p">(</span><span class="n">IOException</span><span class="w"> </span><span class="n">e</span><span class="p">)</span><span class="w"> </span><span class="p">{</span>
<span class="w"> </span><span class="n">e</span><span class="p">.</span><span class="na">printStackTrace</span><span class="p">();</span>
<span class="w"> </span><span class="p">}</span>
<span class="w"> </span><span class="p">}</span>
<span class="p">}</span>
</pre></div>
</div>
<div class="highlight-none notranslate"><div class="highlight"><pre><span></span>Number of rows written: 3
</pre></div>
</div>
</section>
<section id="id2">
<h4><a class="toc-backref" href="#id12" role="doc-backlink">Write - Out to Buffer</a><a class="headerlink" href="#id2" title="Link to this heading"></a></h4>
<div class="highlight-java notranslate"><div class="highlight"><pre><span></span><span class="kn">import</span><span class="w"> </span><span class="nn">org.apache.arrow.memory.BufferAllocator</span><span class="p">;</span>
<span class="kn">import</span><span class="w"> </span><span class="nn">org.apache.arrow.memory.RootAllocator</span><span class="p">;</span>
<span class="kn">import</span><span class="w"> </span><span class="nn">org.apache.arrow.vector.VarCharVector</span><span class="p">;</span>
<span class="kn">import</span><span class="w"> </span><span class="nn">org.apache.arrow.vector.IntVector</span><span class="p">;</span>
<span class="kn">import</span><span class="w"> </span><span class="nn">org.apache.arrow.vector.ipc.ArrowStreamWriter</span><span class="p">;</span>
<span class="kn">import</span><span class="w"> </span><span class="nn">org.apache.arrow.vector.types.pojo.Field</span><span class="p">;</span>
<span class="kn">import</span><span class="w"> </span><span class="nn">org.apache.arrow.vector.types.pojo.FieldType</span><span class="p">;</span>
<span class="kn">import</span><span class="w"> </span><span class="nn">org.apache.arrow.vector.types.pojo.ArrowType</span><span class="p">;</span>
<span class="kn">import</span><span class="w"> </span><span class="nn">org.apache.arrow.vector.types.pojo.Schema</span><span class="p">;</span>
<span class="kn">import</span><span class="w"> </span><span class="nn">org.apache.arrow.vector.VectorSchemaRoot</span><span class="p">;</span>
<span class="kn">import static</span><span class="w"> </span><span class="nn">java.util.Arrays.asList</span><span class="p">;</span>
<span class="kn">import</span><span class="w"> </span><span class="nn">java.io.ByteArrayOutputStream</span><span class="p">;</span>
<span class="kn">import</span><span class="w"> </span><span class="nn">java.io.IOException</span><span class="p">;</span>
<span class="kn">import</span><span class="w"> </span><span class="nn">java.nio.channels.Channels</span><span class="p">;</span>
<span class="k">try</span><span class="w"> </span><span class="p">(</span><span class="n">BufferAllocator</span><span class="w"> </span><span class="n">rootAllocator</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="k">new</span><span class="w"> </span><span class="n">RootAllocator</span><span class="p">())</span><span class="w"> </span><span class="p">{</span>
<span class="w"> </span><span class="n">Field</span><span class="w"> </span><span class="n">name</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="k">new</span><span class="w"> </span><span class="n">Field</span><span class="p">(</span><span class="s">&quot;name&quot;</span><span class="p">,</span><span class="w"> </span><span class="n">FieldType</span><span class="p">.</span><span class="na">nullable</span><span class="p">(</span><span class="k">new</span><span class="w"> </span><span class="n">ArrowType</span><span class="p">.</span><span class="na">Utf8</span><span class="p">()),</span><span class="w"> </span><span class="kc">null</span><span class="p">);</span>
<span class="w"> </span><span class="n">Field</span><span class="w"> </span><span class="n">age</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="k">new</span><span class="w"> </span><span class="n">Field</span><span class="p">(</span><span class="s">&quot;age&quot;</span><span class="p">,</span><span class="w"> </span><span class="n">FieldType</span><span class="p">.</span><span class="na">nullable</span><span class="p">(</span><span class="k">new</span><span class="w"> </span><span class="n">ArrowType</span><span class="p">.</span><span class="na">Int</span><span class="p">(</span><span class="mi">32</span><span class="p">,</span><span class="w"> </span><span class="kc">true</span><span class="p">)),</span><span class="w"> </span><span class="kc">null</span><span class="p">);</span>
<span class="w"> </span><span class="n">Schema</span><span class="w"> </span><span class="n">schemaPerson</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="k">new</span><span class="w"> </span><span class="n">Schema</span><span class="p">(</span><span class="n">asList</span><span class="p">(</span><span class="n">name</span><span class="p">,</span><span class="w"> </span><span class="n">age</span><span class="p">));</span>
<span class="w"> </span><span class="k">try</span><span class="p">(</span>
<span class="w"> </span><span class="n">VectorSchemaRoot</span><span class="w"> </span><span class="n">vectorSchemaRoot</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">VectorSchemaRoot</span><span class="p">.</span><span class="na">create</span><span class="p">(</span><span class="n">schemaPerson</span><span class="p">,</span><span class="w"> </span><span class="n">rootAllocator</span><span class="p">)</span>
<span class="w"> </span><span class="p">){</span>
<span class="w"> </span><span class="n">VarCharVector</span><span class="w"> </span><span class="n">nameVector</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="p">(</span><span class="n">VarCharVector</span><span class="p">)</span><span class="w"> </span><span class="n">vectorSchemaRoot</span><span class="p">.</span><span class="na">getVector</span><span class="p">(</span><span class="s">&quot;name&quot;</span><span class="p">);</span>
<span class="w"> </span><span class="n">nameVector</span><span class="p">.</span><span class="na">allocateNew</span><span class="p">(</span><span class="mi">3</span><span class="p">);</span>
<span class="w"> </span><span class="n">nameVector</span><span class="p">.</span><span class="na">set</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span><span class="w"> </span><span class="s">&quot;David&quot;</span><span class="p">.</span><span class="na">getBytes</span><span class="p">());</span>
<span class="w"> </span><span class="n">nameVector</span><span class="p">.</span><span class="na">set</span><span class="p">(</span><span class="mi">1</span><span class="p">,</span><span class="w"> </span><span class="s">&quot;Gladis&quot;</span><span class="p">.</span><span class="na">getBytes</span><span class="p">());</span>
<span class="w"> </span><span class="n">nameVector</span><span class="p">.</span><span class="na">set</span><span class="p">(</span><span class="mi">2</span><span class="p">,</span><span class="w"> </span><span class="s">&quot;Juan&quot;</span><span class="p">.</span><span class="na">getBytes</span><span class="p">());</span>
<span class="w"> </span><span class="n">IntVector</span><span class="w"> </span><span class="n">ageVector</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="p">(</span><span class="n">IntVector</span><span class="p">)</span><span class="w"> </span><span class="n">vectorSchemaRoot</span><span class="p">.</span><span class="na">getVector</span><span class="p">(</span><span class="s">&quot;age&quot;</span><span class="p">);</span>
<span class="w"> </span><span class="n">ageVector</span><span class="p">.</span><span class="na">allocateNew</span><span class="p">(</span><span class="mi">3</span><span class="p">);</span>
<span class="w"> </span><span class="n">ageVector</span><span class="p">.</span><span class="na">set</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span><span class="w"> </span><span class="mi">10</span><span class="p">);</span>
<span class="w"> </span><span class="n">ageVector</span><span class="p">.</span><span class="na">set</span><span class="p">(</span><span class="mi">1</span><span class="p">,</span><span class="w"> </span><span class="mi">20</span><span class="p">);</span>
<span class="w"> </span><span class="n">ageVector</span><span class="p">.</span><span class="na">set</span><span class="p">(</span><span class="mi">2</span><span class="p">,</span><span class="w"> </span><span class="mi">30</span><span class="p">);</span>
<span class="w"> </span><span class="n">vectorSchemaRoot</span><span class="p">.</span><span class="na">setRowCount</span><span class="p">(</span><span class="mi">3</span><span class="p">);</span>
<span class="w"> </span><span class="k">try</span><span class="w"> </span><span class="p">(</span>
<span class="w"> </span><span class="n">ByteArrayOutputStream</span><span class="w"> </span><span class="n">out</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="k">new</span><span class="w"> </span><span class="n">ByteArrayOutputStream</span><span class="p">();</span>
<span class="w"> </span><span class="n">ArrowStreamWriter</span><span class="w"> </span><span class="n">writer</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="k">new</span><span class="w"> </span><span class="n">ArrowStreamWriter</span><span class="p">(</span><span class="n">vectorSchemaRoot</span><span class="p">,</span><span class="w"> </span><span class="kc">null</span><span class="p">,</span><span class="w"> </span><span class="n">Channels</span><span class="p">.</span><span class="na">newChannel</span><span class="p">(</span><span class="n">out</span><span class="p">))</span>
<span class="w"> </span><span class="p">){</span>
<span class="w"> </span><span class="n">writer</span><span class="p">.</span><span class="na">start</span><span class="p">();</span>
<span class="w"> </span><span class="n">writer</span><span class="p">.</span><span class="na">writeBatch</span><span class="p">();</span>
<span class="w"> </span><span class="n">System</span><span class="p">.</span><span class="na">out</span><span class="p">.</span><span class="na">println</span><span class="p">(</span><span class="s">&quot;Number of rows written: &quot;</span><span class="w"> </span><span class="o">+</span><span class="w"> </span><span class="n">vectorSchemaRoot</span><span class="p">.</span><span class="na">getRowCount</span><span class="p">());</span>
<span class="w"> </span><span class="p">}</span><span class="w"> </span><span class="k">catch</span><span class="w"> </span><span class="p">(</span><span class="n">IOException</span><span class="w"> </span><span class="n">e</span><span class="p">)</span><span class="w"> </span><span class="p">{</span>
<span class="w"> </span><span class="n">e</span><span class="p">.</span><span class="na">printStackTrace</span><span class="p">();</span>
<span class="w"> </span><span class="p">}</span>
<span class="w"> </span><span class="p">}</span>
<span class="p">}</span>
</pre></div>
</div>
<div class="highlight-none notranslate"><div class="highlight"><pre><span></span>Number of rows written: 3
</pre></div>
</div>
</section>
</section>
</section>
<section id="reading">
<h2><a class="toc-backref" href="#id13" role="doc-backlink">Reading</a><a class="headerlink" href="#reading" title="Link to this heading"></a></h2>
<p>Reading the random access format and streaming format both offer the same API,
with the difference that random access files also offer access to any record batch by index.</p>
<section id="reading-random-access-files">
<h3><a class="toc-backref" href="#id14" role="doc-backlink">Reading Random Access Files</a><a class="headerlink" href="#reading-random-access-files" title="Link to this heading"></a></h3>
<section id="read-from-file">
<h4><a class="toc-backref" href="#id15" role="doc-backlink">Read - From File</a><a class="headerlink" href="#read-from-file" title="Link to this heading"></a></h4>
<p>We are providing a path with auto generated arrow files for testing purposes, change that at your convenience.</p>
<div class="highlight-java notranslate"><div class="highlight"><pre><span></span><span class="kn">import</span><span class="w"> </span><span class="nn">org.apache.arrow.memory.BufferAllocator</span><span class="p">;</span>
<span class="kn">import</span><span class="w"> </span><span class="nn">org.apache.arrow.memory.RootAllocator</span><span class="p">;</span>
<span class="kn">import</span><span class="w"> </span><span class="nn">org.apache.arrow.vector.ipc.ArrowFileReader</span><span class="p">;</span>
<span class="kn">import</span><span class="w"> </span><span class="nn">org.apache.arrow.vector.ipc.message.ArrowBlock</span><span class="p">;</span>
<span class="kn">import</span><span class="w"> </span><span class="nn">org.apache.arrow.vector.VectorSchemaRoot</span><span class="p">;</span>
<span class="kn">import</span><span class="w"> </span><span class="nn">java.io.File</span><span class="p">;</span>
<span class="kn">import</span><span class="w"> </span><span class="nn">java.io.FileInputStream</span><span class="p">;</span>
<span class="kn">import</span><span class="w"> </span><span class="nn">java.io.IOException</span><span class="p">;</span>
<span class="n">File</span><span class="w"> </span><span class="n">file</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="k">new</span><span class="w"> </span><span class="n">File</span><span class="p">(</span><span class="s">&quot;./thirdpartydeps/arrowfiles/random_access.arrow&quot;</span><span class="p">);</span>
<span class="k">try</span><span class="p">(</span>
<span class="w"> </span><span class="n">BufferAllocator</span><span class="w"> </span><span class="n">rootAllocator</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="k">new</span><span class="w"> </span><span class="n">RootAllocator</span><span class="p">();</span>
<span class="w"> </span><span class="n">FileInputStream</span><span class="w"> </span><span class="n">fileInputStream</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="k">new</span><span class="w"> </span><span class="n">FileInputStream</span><span class="p">(</span><span class="n">file</span><span class="p">);</span>
<span class="w"> </span><span class="n">ArrowFileReader</span><span class="w"> </span><span class="n">reader</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="k">new</span><span class="w"> </span><span class="n">ArrowFileReader</span><span class="p">(</span><span class="n">fileInputStream</span><span class="p">.</span><span class="na">getChannel</span><span class="p">(),</span><span class="w"> </span><span class="n">rootAllocator</span><span class="p">)</span>
<span class="p">){</span>
<span class="w"> </span><span class="n">System</span><span class="p">.</span><span class="na">out</span><span class="p">.</span><span class="na">println</span><span class="p">(</span><span class="s">&quot;Record batches in file: &quot;</span><span class="w"> </span><span class="o">+</span><span class="w"> </span><span class="n">reader</span><span class="p">.</span><span class="na">getRecordBlocks</span><span class="p">().</span><span class="na">size</span><span class="p">());</span>
<span class="w"> </span><span class="k">for</span><span class="w"> </span><span class="p">(</span><span class="n">ArrowBlock</span><span class="w"> </span><span class="n">arrowBlock</span><span class="w"> </span><span class="p">:</span><span class="w"> </span><span class="n">reader</span><span class="p">.</span><span class="na">getRecordBlocks</span><span class="p">())</span><span class="w"> </span><span class="p">{</span>
<span class="w"> </span><span class="n">reader</span><span class="p">.</span><span class="na">loadRecordBatch</span><span class="p">(</span><span class="n">arrowBlock</span><span class="p">);</span>
<span class="w"> </span><span class="n">VectorSchemaRoot</span><span class="w"> </span><span class="n">vectorSchemaRootRecover</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">reader</span><span class="p">.</span><span class="na">getVectorSchemaRoot</span><span class="p">();</span>
<span class="w"> </span><span class="n">System</span><span class="p">.</span><span class="na">out</span><span class="p">.</span><span class="na">print</span><span class="p">(</span><span class="n">vectorSchemaRootRecover</span><span class="p">.</span><span class="na">contentToTSVString</span><span class="p">());</span>
<span class="w"> </span><span class="p">}</span>
<span class="p">}</span><span class="w"> </span><span class="k">catch</span><span class="w"> </span><span class="p">(</span><span class="n">IOException</span><span class="w"> </span><span class="n">e</span><span class="p">)</span><span class="w"> </span><span class="p">{</span>
<span class="w"> </span><span class="n">e</span><span class="p">.</span><span class="na">printStackTrace</span><span class="p">();</span>
<span class="p">}</span>
</pre></div>
</div>
<div class="highlight-none notranslate"><div class="highlight"><pre><span></span>Record batches in file: 3
name age
David 10
Gladis 20
Juan 30
name age
Nidia 15
Alexa 20
Mara 15
name age
Raul 34
Jhon 29
Thomy 33
</pre></div>
</div>
</section>
<section id="read-from-buffer">
<h4><a class="toc-backref" href="#id16" role="doc-backlink">Read - From Buffer</a><a class="headerlink" href="#read-from-buffer" title="Link to this heading"></a></h4>
<div class="highlight-java notranslate"><div class="highlight"><pre><span></span><span class="kn">import</span><span class="w"> </span><span class="nn">org.apache.arrow.memory.BufferAllocator</span><span class="p">;</span>
<span class="kn">import</span><span class="w"> </span><span class="nn">org.apache.arrow.memory.RootAllocator</span><span class="p">;</span>
<span class="kn">import</span><span class="w"> </span><span class="nn">org.apache.arrow.vector.ipc.ArrowFileReader</span><span class="p">;</span>
<span class="kn">import</span><span class="w"> </span><span class="nn">org.apache.arrow.vector.ipc.SeekableReadChannel</span><span class="p">;</span>
<span class="kn">import</span><span class="w"> </span><span class="nn">org.apache.arrow.vector.ipc.message.ArrowBlock</span><span class="p">;</span>
<span class="kn">import</span><span class="w"> </span><span class="nn">org.apache.arrow.vector.VectorSchemaRoot</span><span class="p">;</span>
<span class="kn">import</span><span class="w"> </span><span class="nn">org.apache.arrow.vector.util.ByteArrayReadableSeekableByteChannel</span><span class="p">;</span>
<span class="kn">import</span><span class="w"> </span><span class="nn">java.io.IOException</span><span class="p">;</span>
<span class="kn">import</span><span class="w"> </span><span class="nn">java.nio.file.Files</span><span class="p">;</span>
<span class="kn">import</span><span class="w"> </span><span class="nn">java.nio.file.Path</span><span class="p">;</span>
<span class="kn">import</span><span class="w"> </span><span class="nn">java.nio.file.Paths</span><span class="p">;</span>
<span class="n">Path</span><span class="w"> </span><span class="n">path</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">Paths</span><span class="p">.</span><span class="na">get</span><span class="p">(</span><span class="s">&quot;./thirdpartydeps/arrowfiles/random_access.arrow&quot;</span><span class="p">);</span>
<span class="k">try</span><span class="p">(</span>
<span class="w"> </span><span class="n">BufferAllocator</span><span class="w"> </span><span class="n">rootAllocator</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="k">new</span><span class="w"> </span><span class="n">RootAllocator</span><span class="p">();</span>
<span class="w"> </span><span class="n">ArrowFileReader</span><span class="w"> </span><span class="n">reader</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="k">new</span><span class="w"> </span><span class="n">ArrowFileReader</span><span class="p">(</span><span class="k">new</span><span class="w"> </span><span class="n">SeekableReadChannel</span><span class="p">(</span><span class="k">new</span><span class="w"> </span><span class="n">ByteArrayReadableSeekableByteChannel</span><span class="p">(</span>
<span class="w"> </span><span class="n">Files</span><span class="p">.</span><span class="na">readAllBytes</span><span class="p">(</span><span class="n">path</span><span class="p">))),</span><span class="w"> </span><span class="n">rootAllocator</span><span class="p">)</span>
<span class="p">)</span><span class="w"> </span><span class="p">{</span>
<span class="w"> </span><span class="n">System</span><span class="p">.</span><span class="na">out</span><span class="p">.</span><span class="na">println</span><span class="p">(</span><span class="s">&quot;Record batches in file: &quot;</span><span class="w"> </span><span class="o">+</span><span class="w"> </span><span class="n">reader</span><span class="p">.</span><span class="na">getRecordBlocks</span><span class="p">().</span><span class="na">size</span><span class="p">());</span>
<span class="w"> </span><span class="k">for</span><span class="w"> </span><span class="p">(</span><span class="n">ArrowBlock</span><span class="w"> </span><span class="n">arrowBlock</span><span class="w"> </span><span class="p">:</span><span class="w"> </span><span class="n">reader</span><span class="p">.</span><span class="na">getRecordBlocks</span><span class="p">())</span><span class="w"> </span><span class="p">{</span>
<span class="w"> </span><span class="n">reader</span><span class="p">.</span><span class="na">loadRecordBatch</span><span class="p">(</span><span class="n">arrowBlock</span><span class="p">);</span>
<span class="w"> </span><span class="n">VectorSchemaRoot</span><span class="w"> </span><span class="n">vectorSchemaRootRecover</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">reader</span><span class="p">.</span><span class="na">getVectorSchemaRoot</span><span class="p">();</span>
<span class="w"> </span><span class="n">System</span><span class="p">.</span><span class="na">out</span><span class="p">.</span><span class="na">print</span><span class="p">(</span><span class="n">vectorSchemaRootRecover</span><span class="p">.</span><span class="na">contentToTSVString</span><span class="p">());</span>
<span class="w"> </span><span class="p">}</span>
<span class="p">}</span><span class="w"> </span><span class="k">catch</span><span class="w"> </span><span class="p">(</span><span class="n">IOException</span><span class="w"> </span><span class="n">e</span><span class="p">)</span><span class="w"> </span><span class="p">{</span>
<span class="w"> </span><span class="n">e</span><span class="p">.</span><span class="na">printStackTrace</span><span class="p">();</span>
<span class="p">}</span>
</pre></div>
</div>
<div class="highlight-none notranslate"><div class="highlight"><pre><span></span>Record batches in file: 3
name age
David 10
Gladis 20
Juan 30
name age
Nidia 15
Alexa 20
Mara 15
name age
Raul 34
Jhon 29
Thomy 33
</pre></div>
</div>
</section>
</section>
<section id="reading-streaming-format">
<h3><a class="toc-backref" href="#id17" role="doc-backlink">Reading Streaming Format</a><a class="headerlink" href="#reading-streaming-format" title="Link to this heading"></a></h3>
<section id="id3">
<h4><a class="toc-backref" href="#id18" role="doc-backlink">Read - From File</a><a class="headerlink" href="#id3" title="Link to this heading"></a></h4>
<div class="highlight-java notranslate"><div class="highlight"><pre><span></span><span class="kn">import</span><span class="w"> </span><span class="nn">org.apache.arrow.memory.BufferAllocator</span><span class="p">;</span>
<span class="kn">import</span><span class="w"> </span><span class="nn">org.apache.arrow.memory.RootAllocator</span><span class="p">;</span>
<span class="kn">import</span><span class="w"> </span><span class="nn">org.apache.arrow.vector.ipc.ArrowStreamReader</span><span class="p">;</span>
<span class="kn">import</span><span class="w"> </span><span class="nn">org.apache.arrow.vector.VectorSchemaRoot</span><span class="p">;</span>
<span class="kn">import</span><span class="w"> </span><span class="nn">java.io.File</span><span class="p">;</span>
<span class="kn">import</span><span class="w"> </span><span class="nn">java.io.FileInputStream</span><span class="p">;</span>
<span class="kn">import</span><span class="w"> </span><span class="nn">java.io.IOException</span><span class="p">;</span>
<span class="n">File</span><span class="w"> </span><span class="n">file</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="k">new</span><span class="w"> </span><span class="n">File</span><span class="p">(</span><span class="s">&quot;./thirdpartydeps/arrowfiles/streaming.arrow&quot;</span><span class="p">);</span>
<span class="k">try</span><span class="p">(</span>
<span class="w"> </span><span class="n">BufferAllocator</span><span class="w"> </span><span class="n">rootAllocator</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="k">new</span><span class="w"> </span><span class="n">RootAllocator</span><span class="p">();</span>
<span class="w"> </span><span class="n">FileInputStream</span><span class="w"> </span><span class="n">fileInputStreamForStream</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="k">new</span><span class="w"> </span><span class="n">FileInputStream</span><span class="p">(</span><span class="n">file</span><span class="p">);</span>
<span class="w"> </span><span class="n">ArrowStreamReader</span><span class="w"> </span><span class="n">reader</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="k">new</span><span class="w"> </span><span class="n">ArrowStreamReader</span><span class="p">(</span><span class="n">fileInputStreamForStream</span><span class="p">,</span><span class="w"> </span><span class="n">rootAllocator</span><span class="p">)</span>
<span class="p">)</span><span class="w"> </span><span class="p">{</span>
<span class="w"> </span><span class="k">while</span><span class="w"> </span><span class="p">(</span><span class="n">reader</span><span class="p">.</span><span class="na">loadNextBatch</span><span class="p">())</span><span class="w"> </span><span class="p">{</span>
<span class="w"> </span><span class="n">VectorSchemaRoot</span><span class="w"> </span><span class="n">vectorSchemaRootRecover</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">reader</span><span class="p">.</span><span class="na">getVectorSchemaRoot</span><span class="p">();</span>
<span class="w"> </span><span class="n">System</span><span class="p">.</span><span class="na">out</span><span class="p">.</span><span class="na">print</span><span class="p">(</span><span class="n">vectorSchemaRootRecover</span><span class="p">.</span><span class="na">contentToTSVString</span><span class="p">());</span>
<span class="w"> </span><span class="p">}</span>
<span class="p">}</span><span class="w"> </span><span class="k">catch</span><span class="w"> </span><span class="p">(</span><span class="n">IOException</span><span class="w"> </span><span class="n">e</span><span class="p">)</span><span class="w"> </span><span class="p">{</span>
<span class="w"> </span><span class="n">e</span><span class="p">.</span><span class="na">printStackTrace</span><span class="p">();</span>
<span class="p">}</span>
</pre></div>
</div>
<div class="highlight-none notranslate"><div class="highlight"><pre><span></span>name age
David 10
Gladis 20
Juan 30
name age
Nidia 15
Alexa 20
Mara 15
name age
Raul 34
Jhon 29
Thomy 33
</pre></div>
</div>
</section>
<section id="id4">
<h4><a class="toc-backref" href="#id19" role="doc-backlink">Read - From Buffer</a><a class="headerlink" href="#id4" title="Link to this heading"></a></h4>
<div class="highlight-java notranslate"><div class="highlight"><pre><span></span><span class="kn">import</span><span class="w"> </span><span class="nn">org.apache.arrow.memory.BufferAllocator</span><span class="p">;</span>
<span class="kn">import</span><span class="w"> </span><span class="nn">org.apache.arrow.memory.RootAllocator</span><span class="p">;</span>
<span class="kn">import</span><span class="w"> </span><span class="nn">org.apache.arrow.vector.ipc.ArrowStreamReader</span><span class="p">;</span>
<span class="kn">import</span><span class="w"> </span><span class="nn">java.io.ByteArrayInputStream</span><span class="p">;</span>
<span class="kn">import</span><span class="w"> </span><span class="nn">java.io.IOException</span><span class="p">;</span>
<span class="kn">import</span><span class="w"> </span><span class="nn">java.nio.file.Files</span><span class="p">;</span>
<span class="kn">import</span><span class="w"> </span><span class="nn">java.nio.file.Path</span><span class="p">;</span>
<span class="kn">import</span><span class="w"> </span><span class="nn">java.nio.file.Paths</span><span class="p">;</span>
<span class="n">Path</span><span class="w"> </span><span class="n">path</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">Paths</span><span class="p">.</span><span class="na">get</span><span class="p">(</span><span class="s">&quot;./thirdpartydeps/arrowfiles/streaming.arrow&quot;</span><span class="p">);</span>
<span class="k">try</span><span class="p">(</span>
<span class="w"> </span><span class="n">BufferAllocator</span><span class="w"> </span><span class="n">rootAllocator</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="k">new</span><span class="w"> </span><span class="n">RootAllocator</span><span class="p">();</span>
<span class="w"> </span><span class="n">ArrowStreamReader</span><span class="w"> </span><span class="n">reader</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="k">new</span><span class="w"> </span><span class="n">ArrowStreamReader</span><span class="p">(</span><span class="k">new</span><span class="w"> </span><span class="n">ByteArrayInputStream</span><span class="p">(</span>
<span class="w"> </span><span class="n">Files</span><span class="p">.</span><span class="na">readAllBytes</span><span class="p">(</span><span class="n">path</span><span class="p">)),</span><span class="w"> </span><span class="n">rootAllocator</span><span class="p">)</span>
<span class="p">)</span><span class="w"> </span><span class="p">{</span>
<span class="w"> </span><span class="k">while</span><span class="p">(</span><span class="n">reader</span><span class="p">.</span><span class="na">loadNextBatch</span><span class="p">()){</span>
<span class="w"> </span><span class="n">System</span><span class="p">.</span><span class="na">out</span><span class="p">.</span><span class="na">print</span><span class="p">(</span><span class="n">reader</span><span class="p">.</span><span class="na">getVectorSchemaRoot</span><span class="p">().</span><span class="na">contentToTSVString</span><span class="p">());</span>
<span class="w"> </span><span class="p">}</span>
<span class="p">}</span><span class="w"> </span><span class="k">catch</span><span class="w"> </span><span class="p">(</span><span class="n">IOException</span><span class="w"> </span><span class="n">e</span><span class="p">)</span><span class="w"> </span><span class="p">{</span>
<span class="w"> </span><span class="n">e</span><span class="p">.</span><span class="na">printStackTrace</span><span class="p">();</span>
<span class="p">}</span>
</pre></div>
</div>
<div class="highlight-none notranslate"><div class="highlight"><pre><span></span>name age
David 10
Gladis 20
Juan 30
name age
Nidia 15
Alexa 20
Mara 15
name age
Raul 34
Jhon 29
Thomy 33
</pre></div>
</div>
</section>
</section>
<section id="reading-parquet-file">
<h3><a class="toc-backref" href="#id20" role="doc-backlink">Reading Parquet File</a><a class="headerlink" href="#reading-parquet-file" title="Link to this heading"></a></h3>
<p>Please check <a class="reference internal" href="dataset.html"><span class="doc">Dataset</span></a></p>
</section>
<section id="handling-data-with-dictionaries">
<h3><a class="toc-backref" href="#id21" role="doc-backlink">Handling Data with Dictionaries</a><a class="headerlink" href="#handling-data-with-dictionaries" title="Link to this heading"></a></h3>
<p>Reading and writing dictionary-encoded data requires separately tracking the dictionaries.</p>
<div class="highlight-java notranslate"><div class="highlight"><pre><span></span><span class="kn">import</span><span class="w"> </span><span class="nn">org.apache.arrow.memory.BufferAllocator</span><span class="p">;</span>
<span class="kn">import</span><span class="w"> </span><span class="nn">org.apache.arrow.memory.RootAllocator</span><span class="p">;</span>
<span class="kn">import</span><span class="w"> </span><span class="nn">org.apache.arrow.vector.FieldVector</span><span class="p">;</span>
<span class="kn">import</span><span class="w"> </span><span class="nn">org.apache.arrow.vector.ValueVector</span><span class="p">;</span>
<span class="kn">import</span><span class="w"> </span><span class="nn">org.apache.arrow.vector.VarCharVector</span><span class="p">;</span>
<span class="kn">import</span><span class="w"> </span><span class="nn">org.apache.arrow.vector.VectorSchemaRoot</span><span class="p">;</span>
<span class="kn">import</span><span class="w"> </span><span class="nn">org.apache.arrow.vector.dictionary.Dictionary</span><span class="p">;</span>
<span class="kn">import</span><span class="w"> </span><span class="nn">org.apache.arrow.vector.dictionary.DictionaryEncoder</span><span class="p">;</span>
<span class="kn">import</span><span class="w"> </span><span class="nn">org.apache.arrow.vector.dictionary.DictionaryProvider</span><span class="p">;</span>
<span class="kn">import</span><span class="w"> </span><span class="nn">org.apache.arrow.vector.ipc.ArrowFileReader</span><span class="p">;</span>
<span class="kn">import</span><span class="w"> </span><span class="nn">org.apache.arrow.vector.ipc.ArrowFileWriter</span><span class="p">;</span>
<span class="kn">import</span><span class="w"> </span><span class="nn">org.apache.arrow.vector.ipc.message.ArrowBlock</span><span class="p">;</span>
<span class="kn">import</span><span class="w"> </span><span class="nn">org.apache.arrow.vector.types.Types</span><span class="p">;</span>
<span class="kn">import</span><span class="w"> </span><span class="nn">org.apache.arrow.vector.types.pojo.ArrowType</span><span class="p">;</span>
<span class="kn">import</span><span class="w"> </span><span class="nn">org.apache.arrow.vector.types.pojo.DictionaryEncoding</span><span class="p">;</span>
<span class="kn">import</span><span class="w"> </span><span class="nn">org.apache.arrow.vector.types.pojo.FieldType</span><span class="p">;</span>
<span class="kn">import</span><span class="w"> </span><span class="nn">java.io.File</span><span class="p">;</span>
<span class="kn">import</span><span class="w"> </span><span class="nn">java.io.FileInputStream</span><span class="p">;</span>
<span class="kn">import</span><span class="w"> </span><span class="nn">java.io.FileNotFoundException</span><span class="p">;</span>
<span class="kn">import</span><span class="w"> </span><span class="nn">java.io.FileOutputStream</span><span class="p">;</span>
<span class="kn">import</span><span class="w"> </span><span class="nn">java.io.IOException</span><span class="p">;</span>
<span class="kn">import</span><span class="w"> </span><span class="nn">java.nio.charset.StandardCharsets</span><span class="p">;</span>
<span class="n">DictionaryEncoding</span><span class="w"> </span><span class="n">dictionaryEncoding</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="k">new</span><span class="w"> </span><span class="n">DictionaryEncoding</span><span class="p">(</span>
<span class="w"> </span><span class="cm">/*id=*/</span><span class="mi">666L</span><span class="p">,</span><span class="w"> </span><span class="cm">/*ordered=*/</span><span class="kc">false</span><span class="p">,</span><span class="w"> </span><span class="cm">/*indexType=*/</span>
<span class="w"> </span><span class="k">new</span><span class="w"> </span><span class="n">ArrowType</span><span class="p">.</span><span class="na">Int</span><span class="p">(</span><span class="mi">8</span><span class="p">,</span><span class="w"> </span><span class="kc">true</span><span class="p">)</span>
<span class="p">);</span>
<span class="k">try</span><span class="w"> </span><span class="p">(</span><span class="n">BufferAllocator</span><span class="w"> </span><span class="n">root</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="k">new</span><span class="w"> </span><span class="n">RootAllocator</span><span class="p">();</span>
<span class="w"> </span><span class="n">VarCharVector</span><span class="w"> </span><span class="n">countries</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="k">new</span><span class="w"> </span><span class="n">VarCharVector</span><span class="p">(</span><span class="s">&quot;country-dict&quot;</span><span class="p">,</span><span class="w"> </span><span class="n">root</span><span class="p">);</span>
<span class="w"> </span><span class="n">VarCharVector</span><span class="w"> </span><span class="n">appUserCountriesUnencoded</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="k">new</span><span class="w"> </span><span class="n">VarCharVector</span><span class="p">(</span>
<span class="w"> </span><span class="s">&quot;app-use-country-dict&quot;</span><span class="p">,</span>
<span class="w"> </span><span class="k">new</span><span class="w"> </span><span class="n">FieldType</span><span class="p">(</span><span class="kc">true</span><span class="p">,</span><span class="w"> </span><span class="n">Types</span><span class="p">.</span><span class="na">MinorType</span><span class="p">.</span><span class="na">VARCHAR</span><span class="p">.</span><span class="na">getType</span><span class="p">(),</span><span class="w"> </span><span class="n">dictionaryEncoding</span><span class="p">),</span>
<span class="w"> </span><span class="n">root</span><span class="p">)</span>
<span class="p">)</span><span class="w"> </span><span class="p">{</span>
<span class="w"> </span><span class="n">countries</span><span class="p">.</span><span class="na">allocateNew</span><span class="p">(</span><span class="mi">10</span><span class="p">);</span>
<span class="w"> </span><span class="n">countries</span><span class="p">.</span><span class="na">set</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span><span class="w"> </span><span class="s">&quot;Andorra&quot;</span><span class="p">.</span><span class="na">getBytes</span><span class="p">(</span><span class="n">StandardCharsets</span><span class="p">.</span><span class="na">UTF_8</span><span class="p">));</span>
<span class="w"> </span><span class="n">countries</span><span class="p">.</span><span class="na">set</span><span class="p">(</span><span class="mi">1</span><span class="p">,</span><span class="w"> </span><span class="s">&quot;Cuba&quot;</span><span class="p">.</span><span class="na">getBytes</span><span class="p">(</span><span class="n">StandardCharsets</span><span class="p">.</span><span class="na">UTF_8</span><span class="p">));</span>
<span class="w"> </span><span class="n">countries</span><span class="p">.</span><span class="na">set</span><span class="p">(</span><span class="mi">2</span><span class="p">,</span><span class="w"> </span><span class="s">&quot;Grecia&quot;</span><span class="p">.</span><span class="na">getBytes</span><span class="p">(</span><span class="n">StandardCharsets</span><span class="p">.</span><span class="na">UTF_8</span><span class="p">));</span>
<span class="w"> </span><span class="n">countries</span><span class="p">.</span><span class="na">set</span><span class="p">(</span><span class="mi">3</span><span class="p">,</span><span class="w"> </span><span class="s">&quot;Guinea&quot;</span><span class="p">.</span><span class="na">getBytes</span><span class="p">(</span><span class="n">StandardCharsets</span><span class="p">.</span><span class="na">UTF_8</span><span class="p">));</span>
<span class="w"> </span><span class="n">countries</span><span class="p">.</span><span class="na">set</span><span class="p">(</span><span class="mi">4</span><span class="p">,</span><span class="w"> </span><span class="s">&quot;Islandia&quot;</span><span class="p">.</span><span class="na">getBytes</span><span class="p">(</span><span class="n">StandardCharsets</span><span class="p">.</span><span class="na">UTF_8</span><span class="p">));</span>
<span class="w"> </span><span class="n">countries</span><span class="p">.</span><span class="na">set</span><span class="p">(</span><span class="mi">5</span><span class="p">,</span><span class="w"> </span><span class="s">&quot;Malta&quot;</span><span class="p">.</span><span class="na">getBytes</span><span class="p">(</span><span class="n">StandardCharsets</span><span class="p">.</span><span class="na">UTF_8</span><span class="p">));</span>
<span class="w"> </span><span class="n">countries</span><span class="p">.</span><span class="na">set</span><span class="p">(</span><span class="mi">6</span><span class="p">,</span><span class="w"> </span><span class="s">&quot;Tailandia&quot;</span><span class="p">.</span><span class="na">getBytes</span><span class="p">(</span><span class="n">StandardCharsets</span><span class="p">.</span><span class="na">UTF_8</span><span class="p">));</span>
<span class="w"> </span><span class="n">countries</span><span class="p">.</span><span class="na">set</span><span class="p">(</span><span class="mi">7</span><span class="p">,</span><span class="w"> </span><span class="s">&quot;Uganda&quot;</span><span class="p">.</span><span class="na">getBytes</span><span class="p">(</span><span class="n">StandardCharsets</span><span class="p">.</span><span class="na">UTF_8</span><span class="p">));</span>
<span class="w"> </span><span class="n">countries</span><span class="p">.</span><span class="na">set</span><span class="p">(</span><span class="mi">8</span><span class="p">,</span><span class="w"> </span><span class="s">&quot;Yemen&quot;</span><span class="p">.</span><span class="na">getBytes</span><span class="p">(</span><span class="n">StandardCharsets</span><span class="p">.</span><span class="na">UTF_8</span><span class="p">));</span>
<span class="w"> </span><span class="n">countries</span><span class="p">.</span><span class="na">set</span><span class="p">(</span><span class="mi">9</span><span class="p">,</span><span class="w"> </span><span class="s">&quot;Zambia&quot;</span><span class="p">.</span><span class="na">getBytes</span><span class="p">(</span><span class="n">StandardCharsets</span><span class="p">.</span><span class="na">UTF_8</span><span class="p">));</span>
<span class="w"> </span><span class="n">countries</span><span class="p">.</span><span class="na">setValueCount</span><span class="p">(</span><span class="mi">10</span><span class="p">);</span>
<span class="w"> </span><span class="n">Dictionary</span><span class="w"> </span><span class="n">countriesDictionary</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="k">new</span><span class="w"> </span><span class="n">Dictionary</span><span class="p">(</span><span class="n">countries</span><span class="p">,</span><span class="w"> </span><span class="n">dictionaryEncoding</span><span class="p">);</span>
<span class="w"> </span><span class="n">System</span><span class="p">.</span><span class="na">out</span><span class="p">.</span><span class="na">println</span><span class="p">(</span><span class="s">&quot;Dictionary: &quot;</span><span class="w"> </span><span class="o">+</span><span class="w"> </span><span class="n">countriesDictionary</span><span class="p">);</span>
<span class="w"> </span><span class="n">appUserCountriesUnencoded</span><span class="p">.</span><span class="na">allocateNew</span><span class="p">(</span><span class="mi">5</span><span class="p">);</span>
<span class="w"> </span><span class="n">appUserCountriesUnencoded</span><span class="p">.</span><span class="na">set</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span><span class="w"> </span><span class="s">&quot;Andorra&quot;</span><span class="p">.</span><span class="na">getBytes</span><span class="p">(</span><span class="n">StandardCharsets</span><span class="p">.</span><span class="na">UTF_8</span><span class="p">));</span>
<span class="w"> </span><span class="n">appUserCountriesUnencoded</span><span class="p">.</span><span class="na">set</span><span class="p">(</span><span class="mi">1</span><span class="p">,</span><span class="w"> </span><span class="s">&quot;Guinea&quot;</span><span class="p">.</span><span class="na">getBytes</span><span class="p">(</span><span class="n">StandardCharsets</span><span class="p">.</span><span class="na">UTF_8</span><span class="p">));</span>
<span class="w"> </span><span class="n">appUserCountriesUnencoded</span><span class="p">.</span><span class="na">set</span><span class="p">(</span><span class="mi">2</span><span class="p">,</span><span class="w"> </span><span class="s">&quot;Islandia&quot;</span><span class="p">.</span><span class="na">getBytes</span><span class="p">(</span><span class="n">StandardCharsets</span><span class="p">.</span><span class="na">UTF_8</span><span class="p">));</span>
<span class="w"> </span><span class="n">appUserCountriesUnencoded</span><span class="p">.</span><span class="na">set</span><span class="p">(</span><span class="mi">3</span><span class="p">,</span><span class="w"> </span><span class="s">&quot;Malta&quot;</span><span class="p">.</span><span class="na">getBytes</span><span class="p">(</span><span class="n">StandardCharsets</span><span class="p">.</span><span class="na">UTF_8</span><span class="p">));</span>
<span class="w"> </span><span class="n">appUserCountriesUnencoded</span><span class="p">.</span><span class="na">set</span><span class="p">(</span><span class="mi">4</span><span class="p">,</span><span class="w"> </span><span class="s">&quot;Uganda&quot;</span><span class="p">.</span><span class="na">getBytes</span><span class="p">(</span><span class="n">StandardCharsets</span><span class="p">.</span><span class="na">UTF_8</span><span class="p">));</span>
<span class="w"> </span><span class="n">appUserCountriesUnencoded</span><span class="p">.</span><span class="na">setValueCount</span><span class="p">(</span><span class="mi">5</span><span class="p">);</span>
<span class="w"> </span><span class="n">System</span><span class="p">.</span><span class="na">out</span><span class="p">.</span><span class="na">println</span><span class="p">(</span><span class="s">&quot;Unencoded data: &quot;</span><span class="w"> </span><span class="o">+</span><span class="w"> </span><span class="n">appUserCountriesUnencoded</span><span class="p">);</span>
<span class="w"> </span><span class="n">File</span><span class="w"> </span><span class="n">file</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="k">new</span><span class="w"> </span><span class="n">File</span><span class="p">(</span><span class="s">&quot;random_access_file_with_dictionary.arrow&quot;</span><span class="p">);</span>
<span class="w"> </span><span class="n">DictionaryProvider</span><span class="p">.</span><span class="na">MapDictionaryProvider</span><span class="w"> </span><span class="n">provider</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="k">new</span><span class="w"> </span><span class="n">DictionaryProvider</span><span class="p">.</span><span class="na">MapDictionaryProvider</span><span class="p">();</span>
<span class="w"> </span><span class="n">provider</span><span class="p">.</span><span class="na">put</span><span class="p">(</span><span class="n">countriesDictionary</span><span class="p">);</span>
<span class="w"> </span><span class="k">try</span><span class="w"> </span><span class="p">(</span><span class="n">FieldVector</span><span class="w"> </span><span class="n">appUseCountryDictionaryEncoded</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="p">(</span><span class="n">FieldVector</span><span class="p">)</span><span class="w"> </span><span class="n">DictionaryEncoder</span>
<span class="w"> </span><span class="p">.</span><span class="na">encode</span><span class="p">(</span><span class="n">appUserCountriesUnencoded</span><span class="p">,</span><span class="w"> </span><span class="n">countriesDictionary</span><span class="p">);</span>
<span class="w"> </span><span class="n">VectorSchemaRoot</span><span class="w"> </span><span class="n">vectorSchemaRoot</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">VectorSchemaRoot</span><span class="p">.</span><span class="na">of</span><span class="p">(</span><span class="n">appUseCountryDictionaryEncoded</span><span class="p">);</span>
<span class="w"> </span><span class="n">FileOutputStream</span><span class="w"> </span><span class="n">fileOutputStream</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="k">new</span><span class="w"> </span><span class="n">FileOutputStream</span><span class="p">(</span><span class="n">file</span><span class="p">);</span>
<span class="w"> </span><span class="n">ArrowFileWriter</span><span class="w"> </span><span class="n">writer</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="k">new</span><span class="w"> </span><span class="n">ArrowFileWriter</span><span class="p">(</span><span class="n">vectorSchemaRoot</span><span class="p">,</span><span class="w"> </span><span class="n">provider</span><span class="p">,</span><span class="w"> </span><span class="n">fileOutputStream</span><span class="p">.</span><span class="na">getChannel</span><span class="p">())</span>
<span class="w"> </span><span class="p">)</span><span class="w"> </span><span class="p">{</span>
<span class="w"> </span><span class="n">System</span><span class="p">.</span><span class="na">out</span><span class="p">.</span><span class="na">println</span><span class="p">(</span><span class="s">&quot;Dictionary-encoded data: &quot;</span><span class="w"> </span><span class="o">+</span><span class="n">appUseCountryDictionaryEncoded</span><span class="p">);</span>
<span class="w"> </span><span class="n">System</span><span class="p">.</span><span class="na">out</span><span class="p">.</span><span class="na">println</span><span class="p">(</span><span class="s">&quot;Dictionary-encoded ID: &quot;</span><span class="w"> </span><span class="o">+</span><span class="n">appUseCountryDictionaryEncoded</span><span class="p">.</span><span class="na">getField</span><span class="p">().</span><span class="na">getDictionary</span><span class="p">().</span><span class="na">getId</span><span class="p">());</span>
<span class="w"> </span><span class="n">writer</span><span class="p">.</span><span class="na">start</span><span class="p">();</span>
<span class="w"> </span><span class="n">writer</span><span class="p">.</span><span class="na">writeBatch</span><span class="p">();</span>
<span class="w"> </span><span class="n">writer</span><span class="p">.</span><span class="na">end</span><span class="p">();</span>
<span class="w"> </span><span class="n">System</span><span class="p">.</span><span class="na">out</span><span class="p">.</span><span class="na">println</span><span class="p">(</span><span class="s">&quot;Record batches written: &quot;</span><span class="w"> </span><span class="o">+</span><span class="w"> </span><span class="n">writer</span><span class="p">.</span><span class="na">getRecordBlocks</span><span class="p">().</span><span class="na">size</span><span class="p">()</span><span class="w"> </span><span class="o">+</span><span class="w"> </span><span class="s">&quot;. Number of rows written: &quot;</span><span class="w"> </span><span class="o">+</span><span class="w"> </span><span class="n">vectorSchemaRoot</span><span class="p">.</span><span class="na">getRowCount</span><span class="p">());</span>
<span class="w"> </span><span class="k">try</span><span class="p">(</span>
<span class="w"> </span><span class="n">BufferAllocator</span><span class="w"> </span><span class="n">rootAllocator</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="k">new</span><span class="w"> </span><span class="n">RootAllocator</span><span class="p">();</span>
<span class="w"> </span><span class="n">FileInputStream</span><span class="w"> </span><span class="n">fileInputStream</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="k">new</span><span class="w"> </span><span class="n">FileInputStream</span><span class="p">(</span><span class="n">file</span><span class="p">);</span>
<span class="w"> </span><span class="n">ArrowFileReader</span><span class="w"> </span><span class="n">reader</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="k">new</span><span class="w"> </span><span class="n">ArrowFileReader</span><span class="p">(</span><span class="n">fileInputStream</span><span class="p">.</span><span class="na">getChannel</span><span class="p">(),</span><span class="w"> </span><span class="n">rootAllocator</span><span class="p">)</span>
<span class="w"> </span><span class="p">){</span>
<span class="w"> </span><span class="k">for</span><span class="w"> </span><span class="p">(</span><span class="n">ArrowBlock</span><span class="w"> </span><span class="n">arrowBlock</span><span class="w"> </span><span class="p">:</span><span class="w"> </span><span class="n">reader</span><span class="p">.</span><span class="na">getRecordBlocks</span><span class="p">())</span><span class="w"> </span><span class="p">{</span>
<span class="w"> </span><span class="n">reader</span><span class="p">.</span><span class="na">loadRecordBatch</span><span class="p">(</span><span class="n">arrowBlock</span><span class="p">);</span>
<span class="w"> </span><span class="n">FieldVector</span><span class="w"> </span><span class="n">appUseCountryDictionaryEncodedRead</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">reader</span><span class="p">.</span><span class="na">getVectorSchemaRoot</span><span class="p">().</span><span class="na">getVector</span><span class="p">(</span><span class="s">&quot;app-use-country-dict&quot;</span><span class="p">);</span>
<span class="w"> </span><span class="n">DictionaryEncoding</span><span class="w"> </span><span class="n">dictionaryEncodingRead</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">appUseCountryDictionaryEncodedRead</span><span class="p">.</span><span class="na">getField</span><span class="p">().</span><span class="na">getDictionary</span><span class="p">();</span>
<span class="w"> </span><span class="n">System</span><span class="p">.</span><span class="na">out</span><span class="p">.</span><span class="na">println</span><span class="p">(</span><span class="s">&quot;Dictionary-encoded ID recovered: &quot;</span><span class="w"> </span><span class="o">+</span><span class="w"> </span><span class="n">dictionaryEncodingRead</span><span class="p">.</span><span class="na">getId</span><span class="p">());</span>
<span class="w"> </span><span class="n">Dictionary</span><span class="w"> </span><span class="n">appUseCountryDictionaryRead</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">reader</span><span class="p">.</span><span class="na">getDictionaryVectors</span><span class="p">().</span><span class="na">get</span><span class="p">(</span><span class="n">dictionaryEncodingRead</span><span class="p">.</span><span class="na">getId</span><span class="p">());</span>
<span class="w"> </span><span class="n">System</span><span class="p">.</span><span class="na">out</span><span class="p">.</span><span class="na">println</span><span class="p">(</span><span class="s">&quot;Dictionary-encoded data recovered: &quot;</span><span class="w"> </span><span class="o">+</span><span class="w"> </span><span class="n">appUseCountryDictionaryEncodedRead</span><span class="p">);</span>
<span class="w"> </span><span class="n">System</span><span class="p">.</span><span class="na">out</span><span class="p">.</span><span class="na">println</span><span class="p">(</span><span class="s">&quot;Dictionary recovered: &quot;</span><span class="w"> </span><span class="o">+</span><span class="w"> </span><span class="n">appUseCountryDictionaryRead</span><span class="p">);</span>
<span class="w"> </span><span class="k">try</span><span class="w"> </span><span class="p">(</span><span class="n">ValueVector</span><span class="w"> </span><span class="n">readVector</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">DictionaryEncoder</span><span class="p">.</span><span class="na">decode</span><span class="p">(</span><span class="n">appUseCountryDictionaryEncodedRead</span><span class="p">,</span><span class="w"> </span><span class="n">appUseCountryDictionaryRead</span><span class="p">))</span><span class="w"> </span><span class="p">{</span>
<span class="w"> </span><span class="n">System</span><span class="p">.</span><span class="na">out</span><span class="p">.</span><span class="na">println</span><span class="p">(</span><span class="s">&quot;Decoded data: &quot;</span><span class="w"> </span><span class="o">+</span><span class="w"> </span><span class="n">readVector</span><span class="p">);</span>
<span class="w"> </span><span class="p">}</span>
<span class="w"> </span><span class="p">}</span>
<span class="w"> </span><span class="p">}</span>
<span class="w"> </span><span class="p">}</span><span class="w"> </span><span class="k">catch</span><span class="w"> </span><span class="p">(</span><span class="n">FileNotFoundException</span><span class="w"> </span><span class="n">e</span><span class="p">)</span><span class="w"> </span><span class="p">{</span>
<span class="w"> </span><span class="n">e</span><span class="p">.</span><span class="na">printStackTrace</span><span class="p">();</span>
<span class="w"> </span><span class="p">}</span><span class="w"> </span><span class="k">catch</span><span class="w"> </span><span class="p">(</span><span class="n">IOException</span><span class="w"> </span><span class="n">e</span><span class="p">)</span><span class="w"> </span><span class="p">{</span>
<span class="w"> </span><span class="n">e</span><span class="p">.</span><span class="na">printStackTrace</span><span class="p">();</span>
<span class="w"> </span><span class="p">}</span>
<span class="p">}</span>
</pre></div>
</div>
<div class="highlight-none notranslate"><div class="highlight"><pre><span></span>Dictionary: Dictionary DictionaryEncoding[id=666,ordered=false,indexType=Int(8, true)] [Andorra, Cuba, Grecia, Guinea, Islandia, Malta, Tailandia, Uganda, Yemen, Zambia]
Unencoded data: [Andorra, Guinea, Islandia, Malta, Uganda]
Dictionary-encoded data: [0, 3, 4, 5, 7]
Dictionary-encoded ID: 666
Record batches written: 1. Number of rows written: 5
Dictionary-encoded ID recovered: 666
Dictionary-encoded data recovered: [0, 3, 4, 5, 7]
Dictionary recovered: Dictionary DictionaryEncoding[id=666,ordered=false,indexType=Int(8, true)] [Andorra, Cuba, Grecia, Guinea, Islandia, Malta, Tailandia, Uganda, Yemen, Zambia]
Decoded data: [Andorra, Guinea, Islandia, Malta, Uganda]
</pre></div>
</div>
</section>
</section>
</section>
</div>
</div>
</div>
<div class="sphinxsidebar" role="navigation" aria-label="main navigation">
<div class="sphinxsidebarwrapper">
<p class="logo">
<a href="index.html">
<img class="logo" src="_static/arrow-logo_vertical_black-txt_transparent-bg.svg" alt="Logo" />
</a>
</p>
<p>
<iframe src="https://ghbtns.com/github-btn.html?user=apache&repo=arrow-cookbook&type=none&count=true&size=large&v=2"
allowtransparency="true" frameborder="0" scrolling="0" width="200px" height="35px"></iframe>
</p>
<h3>Navigation</h3>
<p class="caption" role="heading"><span class="caption-text">Contents:</span></p>
<ul class="current">
<li class="toctree-l1"><a class="reference internal" href="create.html">Creating Arrow Objects</a></li>
<li class="toctree-l1"><a class="reference internal" href="schema.html">Working with Schema</a></li>
<li class="toctree-l1 current"><a class="current reference internal" href="#">Reading and writing data</a><ul>
<li class="toctree-l2"><a class="reference internal" href="#writing">Writing</a></li>
<li class="toctree-l2"><a class="reference internal" href="#reading">Reading</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="flight.html">Arrow Flight</a></li>
<li class="toctree-l1"><a class="reference internal" href="dataset.html">Dataset</a></li>
<li class="toctree-l1"><a class="reference internal" href="substrait.html">Substrait</a></li>
<li class="toctree-l1"><a class="reference internal" href="data.html">Data manipulation</a></li>
<li class="toctree-l1"><a class="reference internal" href="avro.html">Avro</a></li>
<li class="toctree-l1"><a class="reference internal" href="jdbc.html">Arrow JDBC Adapter</a></li>
</ul>
<hr />
<ul>
<li class="toctree-l1"><a href="https://arrow.apache.org/docs/java/index.html">User Guide</a></li>
<li class="toctree-l1"><a href="https://arrow.apache.org/docs/java/reference/index.html">API Reference</a></li>
</ul>
<div class="relations">
<h3>Related Topics</h3>
<ul>
<li><a href="index.html">Documentation overview</a><ul>
<li>Previous: <a href="schema.html" title="previous chapter">Working with Schema</a></li>
<li>Next: <a href="flight.html" title="next chapter">Arrow Flight</a></li>
</ul></li>
</ul>
</div>
<div id="searchbox" style="display: none" role="search">
<h3 id="searchlabel">Quick search</h3>
<div class="searchformwrapper">
<form class="search" action="search.html" method="get">
<input type="text" name="q" aria-labelledby="searchlabel" autocomplete="off" autocorrect="off" autocapitalize="off" spellcheck="false"/>
<input type="submit" value="Go" />
</form>
</div>
</div>
<script>document.getElementById('searchbox').style.display = "block"</script>
</div>
</div>
<div class="clearer"></div>
</div>
<div class="footer">
&#169;2022, Apache Software Foundation.
|
Powered by <a href="https://www.sphinx-doc.org/">Sphinx 7.2.6</a>
&amp; <a href="https://alabaster.readthedocs.io">Alabaster 0.7.16</a>
|
<a href="_sources/io.rst.txt"
rel="nofollow">Page source</a>
</div>
</body>
</html>