blob: 98e9b8b5eb8522103ad4364736c09f3186b0747f [file] [log] [blame]
<!DOCTYPE html>
<html lang="en" data-content_root="./">
<head>
<meta charset="utf-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="viewport" content="width=device-width, initial-scale=1" />
<title>Working with Schema &#8212; Apache Arrow Java Cookbook documentation</title>
<link rel="stylesheet" type="text/css" href="_static/pygments.css?v=4f649999" />
<link rel="stylesheet" type="text/css" href="_static/alabaster.css?v=39aeeac0" />
<script src="_static/documentation_options.js?v=5929fcd5"></script>
<script src="_static/doctools.js?v=888ff710"></script>
<script src="_static/sphinx_highlight.js?v=dc90522c"></script>
<link rel="icon" href="_static/favicon.ico"/>
<link rel="index" title="Index" href="genindex.html" />
<link rel="search" title="Search" href="search.html" />
<link rel="next" title="Reading and writing data" href="io.html" />
<link rel="prev" title="Creating Arrow Objects" href="create.html" />
<link rel="stylesheet" href="_static/custom.css" type="text/css" />
<meta name="viewport" content="width=device-width, initial-scale=0.9, maximum-scale=0.9" />
<!-- Matomo -->
<script>
var _paq = window._paq = window._paq || [];
/* tracker methods like "setCustomDimension" should be called before "trackPageView" */
/* We explicitly disable cookie tracking to avoid privacy issues */
_paq.push(['disableCookies']);
_paq.push(['trackPageView']);
_paq.push(['enableLinkTracking']);
(function() {
var u="https://analytics.apache.org/";
_paq.push(['setTrackerUrl', u+'matomo.php']);
_paq.push(['setSiteId', '20']);
var d=document, g=d.createElement('script'), s=d.getElementsByTagName('script')[0];
g.async=true; g.src=u+'matomo.js'; s.parentNode.insertBefore(g,s);
})();
</script>
<!-- End Matomo Code -->
</head><body>
<div class="document">
<div class="documentwrapper">
<div class="bodywrapper">
<div class="body" role="main">
<section id="working-with-schema">
<h1><a class="toc-backref" href="#id1" role="doc-backlink">Working with Schema</a><a class="headerlink" href="#working-with-schema" title="Link to this heading"></a></h1>
<p>Let’s start talking about tabular data. Data often comes in the form of two-dimensional
sets of heterogeneous data (such as database tables, CSV files…). Arrow provides
several abstractions to handle such data conveniently and efficiently.</p>
<nav class="contents" id="contents">
<p class="topic-title">Contents</p>
<ul class="simple">
<li><p><a class="reference internal" href="#working-with-schema" id="id1">Working with Schema</a></p>
<ul>
<li><p><a class="reference internal" href="#creating-fields" id="id2">Creating Fields</a></p></li>
<li><p><a class="reference internal" href="#creating-the-schema" id="id3">Creating the Schema</a></p></li>
<li><p><a class="reference internal" href="#adding-metadata-to-fields-and-schemas" id="id4">Adding Metadata to Fields and Schemas</a></p></li>
<li><p><a class="reference internal" href="#creating-vectorschemaroot" id="id5">Creating VectorSchemaRoot</a></p></li>
</ul>
</li>
</ul>
</nav>
<section id="creating-fields">
<h2><a class="toc-backref" href="#id2" role="doc-backlink">Creating Fields</a><a class="headerlink" href="#creating-fields" title="Link to this heading"></a></h2>
<p>Fields are used to denote the particular columns of tabular data.</p>
<div class="highlight-java notranslate"><div class="highlight"><pre><span></span><span class="kn">import</span><span class="w"> </span><span class="nn">org.apache.arrow.vector.types.pojo.ArrowType</span><span class="p">;</span>
<span class="kn">import</span><span class="w"> </span><span class="nn">org.apache.arrow.vector.types.pojo.Field</span><span class="p">;</span>
<span class="kn">import</span><span class="w"> </span><span class="nn">org.apache.arrow.vector.types.pojo.FieldType</span><span class="p">;</span>
<span class="n">Field</span><span class="w"> </span><span class="n">name</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="k">new</span><span class="w"> </span><span class="n">Field</span><span class="p">(</span><span class="s">&quot;name&quot;</span><span class="p">,</span><span class="w"> </span><span class="n">FieldType</span><span class="p">.</span><span class="na">nullable</span><span class="p">(</span><span class="k">new</span><span class="w"> </span><span class="n">ArrowType</span><span class="p">.</span><span class="na">Utf8</span><span class="p">()),</span><span class="w"> </span><span class="kc">null</span><span class="p">);</span>
<span class="n">System</span><span class="p">.</span><span class="na">out</span><span class="p">.</span><span class="na">print</span><span class="p">(</span><span class="n">name</span><span class="p">);</span>
</pre></div>
</div>
<div class="highlight-none notranslate"><div class="highlight"><pre><span></span>name: Utf8
</pre></div>
</div>
<div class="highlight-java notranslate"><div class="highlight"><pre><span></span><span class="kn">import</span><span class="w"> </span><span class="nn">org.apache.arrow.vector.types.pojo.ArrowType</span><span class="p">;</span>
<span class="kn">import</span><span class="w"> </span><span class="nn">org.apache.arrow.vector.types.pojo.Field</span><span class="p">;</span>
<span class="kn">import</span><span class="w"> </span><span class="nn">org.apache.arrow.vector.types.pojo.FieldType</span><span class="p">;</span>
<span class="n">Field</span><span class="w"> </span><span class="n">age</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="k">new</span><span class="w"> </span><span class="n">Field</span><span class="p">(</span><span class="s">&quot;age&quot;</span><span class="p">,</span><span class="w"> </span><span class="n">FieldType</span><span class="p">.</span><span class="na">nullable</span><span class="p">(</span><span class="k">new</span><span class="w"> </span><span class="n">ArrowType</span><span class="p">.</span><span class="na">Int</span><span class="p">(</span><span class="mi">32</span><span class="p">,</span><span class="w"> </span><span class="kc">true</span><span class="p">)),</span><span class="w"> </span><span class="kc">null</span><span class="p">);</span>
<span class="n">System</span><span class="p">.</span><span class="na">out</span><span class="p">.</span><span class="na">print</span><span class="p">(</span><span class="n">age</span><span class="p">);</span>
</pre></div>
</div>
<div class="highlight-none notranslate"><div class="highlight"><pre><span></span>age: Int(32, true)
</pre></div>
</div>
<div class="highlight-java notranslate"><div class="highlight"><pre><span></span><span class="kn">import</span><span class="w"> </span><span class="nn">org.apache.arrow.vector.types.pojo.ArrowType</span><span class="p">;</span>
<span class="kn">import</span><span class="w"> </span><span class="nn">org.apache.arrow.vector.types.pojo.Field</span><span class="p">;</span>
<span class="kn">import</span><span class="w"> </span><span class="nn">org.apache.arrow.vector.types.pojo.FieldType</span><span class="p">;</span>
<span class="n">FieldType</span><span class="w"> </span><span class="n">intType</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="k">new</span><span class="w"> </span><span class="n">FieldType</span><span class="p">(</span><span class="kc">true</span><span class="p">,</span><span class="w"> </span><span class="k">new</span><span class="w"> </span><span class="n">ArrowType</span><span class="p">.</span><span class="na">Int</span><span class="p">(</span><span class="mi">32</span><span class="p">,</span><span class="w"> </span><span class="kc">true</span><span class="p">),</span><span class="w"> </span><span class="kc">null</span><span class="p">);</span>
<span class="n">FieldType</span><span class="w"> </span><span class="n">listType</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="k">new</span><span class="w"> </span><span class="n">FieldType</span><span class="p">(</span><span class="kc">true</span><span class="p">,</span><span class="w"> </span><span class="k">new</span><span class="w"> </span><span class="n">ArrowType</span><span class="p">.</span><span class="na">List</span><span class="p">(),</span><span class="w"> </span><span class="kc">null</span><span class="p">);</span>
<span class="n">Field</span><span class="w"> </span><span class="n">childField</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="k">new</span><span class="w"> </span><span class="n">Field</span><span class="p">(</span><span class="s">&quot;intCol&quot;</span><span class="p">,</span><span class="w"> </span><span class="n">intType</span><span class="p">,</span><span class="w"> </span><span class="kc">null</span><span class="p">);</span>
<span class="n">List</span><span class="o">&lt;</span><span class="n">Field</span><span class="o">&gt;</span><span class="w"> </span><span class="n">childFields</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="k">new</span><span class="w"> </span><span class="n">ArrayList</span><span class="o">&lt;&gt;</span><span class="p">();</span>
<span class="n">childFields</span><span class="p">.</span><span class="na">add</span><span class="p">(</span><span class="n">childField</span><span class="p">);</span>
<span class="n">Field</span><span class="w"> </span><span class="n">points</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="k">new</span><span class="w"> </span><span class="n">Field</span><span class="p">(</span><span class="s">&quot;points&quot;</span><span class="p">,</span><span class="w"> </span><span class="n">listType</span><span class="p">,</span><span class="w"> </span><span class="n">childFields</span><span class="p">);</span>
<span class="n">System</span><span class="p">.</span><span class="na">out</span><span class="p">.</span><span class="na">print</span><span class="p">(</span><span class="n">points</span><span class="p">);</span>
</pre></div>
</div>
<div class="highlight-none notranslate"><div class="highlight"><pre><span></span>points: List&lt;intCol: Int(32, true)&gt;
</pre></div>
</div>
</section>
<section id="creating-the-schema">
<h2><a class="toc-backref" href="#id3" role="doc-backlink">Creating the Schema</a><a class="headerlink" href="#creating-the-schema" title="Link to this heading"></a></h2>
<p>A schema describes a sequence of columns in tabular data, and consists
of a list of fields.</p>
<div class="highlight-java notranslate"><div class="highlight"><pre><span></span><span class="kn">import</span><span class="w"> </span><span class="nn">org.apache.arrow.vector.types.pojo.Schema</span><span class="p">;</span>
<span class="kn">import</span><span class="w"> </span><span class="nn">org.apache.arrow.vector.types.pojo.ArrowType</span><span class="p">;</span>
<span class="kn">import</span><span class="w"> </span><span class="nn">org.apache.arrow.vector.types.pojo.Field</span><span class="p">;</span>
<span class="kn">import</span><span class="w"> </span><span class="nn">org.apache.arrow.vector.types.pojo.FieldType</span><span class="p">;</span>
<span class="kn">import</span><span class="w"> </span><span class="nn">java.util.ArrayList</span><span class="p">;</span>
<span class="kn">import</span><span class="w"> </span><span class="nn">java.util.List</span><span class="p">;</span>
<span class="kn">import static</span><span class="w"> </span><span class="nn">java.util.Arrays.asList</span><span class="p">;</span>
<span class="n">Field</span><span class="w"> </span><span class="n">name</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="k">new</span><span class="w"> </span><span class="n">Field</span><span class="p">(</span><span class="s">&quot;name&quot;</span><span class="p">,</span><span class="w"> </span><span class="n">FieldType</span><span class="p">.</span><span class="na">nullable</span><span class="p">(</span><span class="k">new</span><span class="w"> </span><span class="n">ArrowType</span><span class="p">.</span><span class="na">Utf8</span><span class="p">()),</span><span class="w"> </span><span class="kc">null</span><span class="p">);</span>
<span class="n">Field</span><span class="w"> </span><span class="n">document</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="k">new</span><span class="w"> </span><span class="n">Field</span><span class="p">(</span><span class="s">&quot;document&quot;</span><span class="p">,</span><span class="w"> </span><span class="k">new</span><span class="w"> </span><span class="n">FieldType</span><span class="p">(</span><span class="kc">true</span><span class="p">,</span><span class="w"> </span><span class="k">new</span><span class="w"> </span><span class="n">ArrowType</span><span class="p">.</span><span class="na">Utf8</span><span class="p">(),</span><span class="w"> </span><span class="kc">null</span><span class="p">),</span><span class="w"> </span><span class="kc">null</span><span class="p">);</span>
<span class="n">Field</span><span class="w"> </span><span class="n">age</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="k">new</span><span class="w"> </span><span class="n">Field</span><span class="p">(</span><span class="s">&quot;age&quot;</span><span class="p">,</span><span class="w"> </span><span class="n">FieldType</span><span class="p">.</span><span class="na">nullable</span><span class="p">(</span><span class="k">new</span><span class="w"> </span><span class="n">ArrowType</span><span class="p">.</span><span class="na">Int</span><span class="p">(</span><span class="mi">32</span><span class="p">,</span><span class="w"> </span><span class="kc">true</span><span class="p">)),</span><span class="w"> </span><span class="kc">null</span><span class="p">);</span>
<span class="n">FieldType</span><span class="w"> </span><span class="n">intType</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="k">new</span><span class="w"> </span><span class="n">FieldType</span><span class="p">(</span><span class="kc">true</span><span class="p">,</span><span class="w"> </span><span class="k">new</span><span class="w"> </span><span class="n">ArrowType</span><span class="p">.</span><span class="na">Int</span><span class="p">(</span><span class="mi">32</span><span class="p">,</span><span class="w"> </span><span class="kc">true</span><span class="p">),</span><span class="w"> </span><span class="cm">/*dictionary=*/</span><span class="kc">null</span><span class="p">);</span>
<span class="n">FieldType</span><span class="w"> </span><span class="n">listType</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="k">new</span><span class="w"> </span><span class="n">FieldType</span><span class="p">(</span><span class="kc">true</span><span class="p">,</span><span class="w"> </span><span class="k">new</span><span class="w"> </span><span class="n">ArrowType</span><span class="p">.</span><span class="na">List</span><span class="p">(),</span><span class="w"> </span><span class="cm">/*dictionary=*/</span><span class="kc">null</span><span class="p">);</span>
<span class="n">Field</span><span class="w"> </span><span class="n">childField</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="k">new</span><span class="w"> </span><span class="n">Field</span><span class="p">(</span><span class="s">&quot;intCol&quot;</span><span class="p">,</span><span class="w"> </span><span class="n">intType</span><span class="p">,</span><span class="w"> </span><span class="kc">null</span><span class="p">);</span>
<span class="n">List</span><span class="o">&lt;</span><span class="n">Field</span><span class="o">&gt;</span><span class="w"> </span><span class="n">childFields</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="k">new</span><span class="w"> </span><span class="n">ArrayList</span><span class="o">&lt;&gt;</span><span class="p">();</span>
<span class="n">childFields</span><span class="p">.</span><span class="na">add</span><span class="p">(</span><span class="n">childField</span><span class="p">);</span>
<span class="n">Field</span><span class="w"> </span><span class="n">points</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="k">new</span><span class="w"> </span><span class="n">Field</span><span class="p">(</span><span class="s">&quot;points&quot;</span><span class="p">,</span><span class="w"> </span><span class="n">listType</span><span class="p">,</span><span class="w"> </span><span class="n">childFields</span><span class="p">);</span>
<span class="n">Schema</span><span class="w"> </span><span class="n">schemaPerson</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="k">new</span><span class="w"> </span><span class="n">Schema</span><span class="p">(</span><span class="n">asList</span><span class="p">(</span><span class="n">name</span><span class="p">,</span><span class="w"> </span><span class="n">document</span><span class="p">,</span><span class="w"> </span><span class="n">age</span><span class="p">,</span><span class="w"> </span><span class="n">points</span><span class="p">));</span>
<span class="n">System</span><span class="p">.</span><span class="na">out</span><span class="p">.</span><span class="na">print</span><span class="p">(</span><span class="n">schemaPerson</span><span class="p">);</span>
</pre></div>
</div>
<div class="highlight-none notranslate"><div class="highlight"><pre><span></span>Schema&lt;name: Utf8, document: Utf8, age: Int(32, true), points: List&lt;intCol: Int(32, true)&gt;&gt;
</pre></div>
</div>
</section>
<section id="adding-metadata-to-fields-and-schemas">
<h2><a class="toc-backref" href="#id4" role="doc-backlink">Adding Metadata to Fields and Schemas</a><a class="headerlink" href="#adding-metadata-to-fields-and-schemas" title="Link to this heading"></a></h2>
<p>In case we need to add metadata to our Field we could use:</p>
<div class="highlight-java notranslate"><div class="highlight"><pre><span></span><span class="kn">import</span><span class="w"> </span><span class="nn">org.apache.arrow.vector.types.pojo.ArrowType</span><span class="p">;</span>
<span class="kn">import</span><span class="w"> </span><span class="nn">org.apache.arrow.vector.types.pojo.Field</span><span class="p">;</span>
<span class="kn">import</span><span class="w"> </span><span class="nn">org.apache.arrow.vector.types.pojo.FieldType</span><span class="p">;</span>
<span class="n">Map</span><span class="o">&lt;</span><span class="n">String</span><span class="p">,</span><span class="w"> </span><span class="n">String</span><span class="o">&gt;</span><span class="w"> </span><span class="n">metadata</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="k">new</span><span class="w"> </span><span class="n">HashMap</span><span class="o">&lt;&gt;</span><span class="p">();</span>
<span class="n">metadata</span><span class="p">.</span><span class="na">put</span><span class="p">(</span><span class="s">&quot;A&quot;</span><span class="p">,</span><span class="w"> </span><span class="s">&quot;Id card&quot;</span><span class="p">);</span>
<span class="n">metadata</span><span class="p">.</span><span class="na">put</span><span class="p">(</span><span class="s">&quot;B&quot;</span><span class="p">,</span><span class="w"> </span><span class="s">&quot;Passport&quot;</span><span class="p">);</span>
<span class="n">metadata</span><span class="p">.</span><span class="na">put</span><span class="p">(</span><span class="s">&quot;C&quot;</span><span class="p">,</span><span class="w"> </span><span class="s">&quot;Visa&quot;</span><span class="p">);</span>
<span class="n">Field</span><span class="w"> </span><span class="n">document</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="k">new</span><span class="w"> </span><span class="n">Field</span><span class="p">(</span><span class="s">&quot;document&quot;</span><span class="p">,</span><span class="w"> </span><span class="k">new</span><span class="w"> </span><span class="n">FieldType</span><span class="p">(</span><span class="kc">true</span><span class="p">,</span><span class="w"> </span><span class="k">new</span><span class="w"> </span><span class="n">ArrowType</span><span class="p">.</span><span class="na">Utf8</span><span class="p">(),</span><span class="w"> </span><span class="kc">null</span><span class="p">,</span><span class="w"> </span><span class="n">metadata</span><span class="p">),</span><span class="w"> </span><span class="kc">null</span><span class="p">);</span>
<span class="n">System</span><span class="p">.</span><span class="na">out</span><span class="p">.</span><span class="na">print</span><span class="p">(</span><span class="n">document</span><span class="p">.</span><span class="na">getMetadata</span><span class="p">());</span>
</pre></div>
</div>
<div class="highlight-none notranslate"><div class="highlight"><pre><span></span>{A=Id card, B=Passport, C=Visa}
</pre></div>
</div>
<p>In case we need to add metadata to our Schema we could use:</p>
<div class="highlight-java notranslate"><div class="highlight"><pre><span></span><span class="kn">import</span><span class="w"> </span><span class="nn">org.apache.arrow.vector.types.pojo.Schema</span><span class="p">;</span>
<span class="kn">import</span><span class="w"> </span><span class="nn">org.apache.arrow.vector.types.pojo.ArrowType</span><span class="p">;</span>
<span class="kn">import</span><span class="w"> </span><span class="nn">org.apache.arrow.vector.types.pojo.Field</span><span class="p">;</span>
<span class="kn">import</span><span class="w"> </span><span class="nn">org.apache.arrow.vector.types.pojo.FieldType</span><span class="p">;</span>
<span class="kn">import</span><span class="w"> </span><span class="nn">java.util.ArrayList</span><span class="p">;</span>
<span class="kn">import</span><span class="w"> </span><span class="nn">java.util.HashMap</span><span class="p">;</span>
<span class="kn">import</span><span class="w"> </span><span class="nn">java.util.List</span><span class="p">;</span>
<span class="kn">import</span><span class="w"> </span><span class="nn">java.util.Map</span><span class="p">;</span>
<span class="kn">import static</span><span class="w"> </span><span class="nn">java.util.Arrays.asList</span><span class="p">;</span>
<span class="n">Field</span><span class="w"> </span><span class="n">name</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="k">new</span><span class="w"> </span><span class="n">Field</span><span class="p">(</span><span class="s">&quot;name&quot;</span><span class="p">,</span><span class="w"> </span><span class="n">FieldType</span><span class="p">.</span><span class="na">nullable</span><span class="p">(</span><span class="k">new</span><span class="w"> </span><span class="n">ArrowType</span><span class="p">.</span><span class="na">Utf8</span><span class="p">()),</span><span class="w"> </span><span class="kc">null</span><span class="p">);</span>
<span class="n">Field</span><span class="w"> </span><span class="n">document</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="k">new</span><span class="w"> </span><span class="n">Field</span><span class="p">(</span><span class="s">&quot;document&quot;</span><span class="p">,</span><span class="w"> </span><span class="k">new</span><span class="w"> </span><span class="n">FieldType</span><span class="p">(</span><span class="kc">true</span><span class="p">,</span><span class="w"> </span><span class="k">new</span><span class="w"> </span><span class="n">ArrowType</span><span class="p">.</span><span class="na">Utf8</span><span class="p">(),</span><span class="w"> </span><span class="kc">null</span><span class="p">),</span><span class="w"> </span><span class="kc">null</span><span class="p">);</span>
<span class="n">Field</span><span class="w"> </span><span class="n">age</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="k">new</span><span class="w"> </span><span class="n">Field</span><span class="p">(</span><span class="s">&quot;age&quot;</span><span class="p">,</span><span class="w"> </span><span class="n">FieldType</span><span class="p">.</span><span class="na">nullable</span><span class="p">(</span><span class="k">new</span><span class="w"> </span><span class="n">ArrowType</span><span class="p">.</span><span class="na">Int</span><span class="p">(</span><span class="mi">32</span><span class="p">,</span><span class="w"> </span><span class="kc">true</span><span class="p">)),</span><span class="w"> </span><span class="kc">null</span><span class="p">);</span>
<span class="n">FieldType</span><span class="w"> </span><span class="n">intType</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="k">new</span><span class="w"> </span><span class="n">FieldType</span><span class="p">(</span><span class="kc">true</span><span class="p">,</span><span class="w"> </span><span class="k">new</span><span class="w"> </span><span class="n">ArrowType</span><span class="p">.</span><span class="na">Int</span><span class="p">(</span><span class="mi">32</span><span class="p">,</span><span class="w"> </span><span class="kc">true</span><span class="p">),</span><span class="w"> </span><span class="cm">/*dictionary=*/</span><span class="kc">null</span><span class="p">);</span>
<span class="n">FieldType</span><span class="w"> </span><span class="n">listType</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="k">new</span><span class="w"> </span><span class="n">FieldType</span><span class="p">(</span><span class="kc">true</span><span class="p">,</span><span class="w"> </span><span class="k">new</span><span class="w"> </span><span class="n">ArrowType</span><span class="p">.</span><span class="na">List</span><span class="p">(),</span><span class="w"> </span><span class="cm">/*dictionary=*/</span><span class="kc">null</span><span class="p">);</span>
<span class="n">Field</span><span class="w"> </span><span class="n">childField</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="k">new</span><span class="w"> </span><span class="n">Field</span><span class="p">(</span><span class="s">&quot;intCol&quot;</span><span class="p">,</span><span class="w"> </span><span class="n">intType</span><span class="p">,</span><span class="w"> </span><span class="kc">null</span><span class="p">);</span>
<span class="n">List</span><span class="o">&lt;</span><span class="n">Field</span><span class="o">&gt;</span><span class="w"> </span><span class="n">childFields</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="k">new</span><span class="w"> </span><span class="n">ArrayList</span><span class="o">&lt;&gt;</span><span class="p">();</span>
<span class="n">childFields</span><span class="p">.</span><span class="na">add</span><span class="p">(</span><span class="n">childField</span><span class="p">);</span>
<span class="n">Field</span><span class="w"> </span><span class="n">points</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="k">new</span><span class="w"> </span><span class="n">Field</span><span class="p">(</span><span class="s">&quot;points&quot;</span><span class="p">,</span><span class="w"> </span><span class="n">listType</span><span class="p">,</span><span class="w"> </span><span class="n">childFields</span><span class="p">);</span>
<span class="n">Map</span><span class="o">&lt;</span><span class="n">String</span><span class="p">,</span><span class="w"> </span><span class="n">String</span><span class="o">&gt;</span><span class="w"> </span><span class="n">metadataSchema</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="k">new</span><span class="w"> </span><span class="n">HashMap</span><span class="o">&lt;&gt;</span><span class="p">();</span>
<span class="n">metadataSchema</span><span class="p">.</span><span class="na">put</span><span class="p">(</span><span class="s">&quot;Key-1&quot;</span><span class="p">,</span><span class="w"> </span><span class="s">&quot;Value-1&quot;</span><span class="p">);</span>
<span class="n">Schema</span><span class="w"> </span><span class="n">schemaPerson</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="k">new</span><span class="w"> </span><span class="n">Schema</span><span class="p">(</span><span class="n">asList</span><span class="p">(</span><span class="n">name</span><span class="p">,</span><span class="w"> </span><span class="n">document</span><span class="p">,</span><span class="w"> </span><span class="n">age</span><span class="p">,</span><span class="w"> </span><span class="n">points</span><span class="p">),</span><span class="w"> </span><span class="n">metadataSchema</span><span class="p">);</span>
<span class="n">System</span><span class="p">.</span><span class="na">out</span><span class="p">.</span><span class="na">print</span><span class="p">(</span><span class="n">schemaPerson</span><span class="p">);</span>
</pre></div>
</div>
<div class="highlight-none notranslate"><div class="highlight"><pre><span></span>Schema&lt;name: Utf8, document: Utf8, age: Int(32, true), points: List&lt;intCol: Int(32, true)&gt;&gt;(metadata: {Key-1=Value-1})
</pre></div>
</div>
</section>
<section id="creating-vectorschemaroot">
<h2><a class="toc-backref" href="#id5" role="doc-backlink">Creating VectorSchemaRoot</a><a class="headerlink" href="#creating-vectorschemaroot" title="Link to this heading"></a></h2>
<p><code class="docutils literal notranslate"><span class="pre">VectorSchemaRoot</span></code> is somewhat analogous to tables and record batches in the
other Arrow implementations in that they all are 2D datasets, but the usage is different.</p>
<p>Let’s populate a <code class="docutils literal notranslate"><span class="pre">VectorSchemaRoot</span></code> with a small batch of records:</p>
<div class="highlight-java notranslate"><div class="highlight"><pre><span></span><span class="kn">import</span><span class="w"> </span><span class="nn">org.apache.arrow.memory.BufferAllocator</span><span class="p">;</span>
<span class="kn">import</span><span class="w"> </span><span class="nn">org.apache.arrow.memory.RootAllocator</span><span class="p">;</span>
<span class="kn">import</span><span class="w"> </span><span class="nn">org.apache.arrow.vector.VarCharVector</span><span class="p">;</span>
<span class="kn">import</span><span class="w"> </span><span class="nn">org.apache.arrow.vector.VectorSchemaRoot</span><span class="p">;</span>
<span class="kn">import</span><span class="w"> </span><span class="nn">org.apache.arrow.vector.complex.ListVector</span><span class="p">;</span>
<span class="kn">import</span><span class="w"> </span><span class="nn">org.apache.arrow.vector.IntVector</span><span class="p">;</span>
<span class="kn">import</span><span class="w"> </span><span class="nn">org.apache.arrow.vector.complex.impl.UnionListWriter</span><span class="p">;</span>
<span class="kn">import</span><span class="w"> </span><span class="nn">org.apache.arrow.vector.types.pojo.Schema</span><span class="p">;</span>
<span class="kn">import</span><span class="w"> </span><span class="nn">org.apache.arrow.vector.types.pojo.ArrowType</span><span class="p">;</span>
<span class="kn">import</span><span class="w"> </span><span class="nn">org.apache.arrow.vector.types.pojo.Field</span><span class="p">;</span>
<span class="kn">import</span><span class="w"> </span><span class="nn">org.apache.arrow.vector.types.pojo.FieldType</span><span class="p">;</span>
<span class="kn">import</span><span class="w"> </span><span class="nn">java.util.ArrayList</span><span class="p">;</span>
<span class="kn">import</span><span class="w"> </span><span class="nn">java.util.List</span><span class="p">;</span>
<span class="kn">import static</span><span class="w"> </span><span class="nn">java.util.Arrays.asList</span><span class="p">;</span>
<span class="n">Field</span><span class="w"> </span><span class="n">name</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="k">new</span><span class="w"> </span><span class="n">Field</span><span class="p">(</span><span class="s">&quot;name&quot;</span><span class="p">,</span><span class="w"> </span><span class="n">FieldType</span><span class="p">.</span><span class="na">nullable</span><span class="p">(</span><span class="k">new</span><span class="w"> </span><span class="n">ArrowType</span><span class="p">.</span><span class="na">Utf8</span><span class="p">()),</span><span class="w"> </span><span class="kc">null</span><span class="p">);</span>
<span class="n">Field</span><span class="w"> </span><span class="n">age</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="k">new</span><span class="w"> </span><span class="n">Field</span><span class="p">(</span><span class="s">&quot;age&quot;</span><span class="p">,</span><span class="w"> </span><span class="n">FieldType</span><span class="p">.</span><span class="na">nullable</span><span class="p">(</span><span class="k">new</span><span class="w"> </span><span class="n">ArrowType</span><span class="p">.</span><span class="na">Int</span><span class="p">(</span><span class="mi">32</span><span class="p">,</span><span class="w"> </span><span class="kc">true</span><span class="p">)),</span><span class="w"> </span><span class="kc">null</span><span class="p">);</span>
<span class="n">FieldType</span><span class="w"> </span><span class="n">intType</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="k">new</span><span class="w"> </span><span class="n">FieldType</span><span class="p">(</span><span class="kc">true</span><span class="p">,</span><span class="w"> </span><span class="k">new</span><span class="w"> </span><span class="n">ArrowType</span><span class="p">.</span><span class="na">Int</span><span class="p">(</span><span class="mi">32</span><span class="p">,</span><span class="w"> </span><span class="kc">true</span><span class="p">),</span><span class="w"> </span><span class="kc">null</span><span class="p">);</span>
<span class="n">FieldType</span><span class="w"> </span><span class="n">listType</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="k">new</span><span class="w"> </span><span class="n">FieldType</span><span class="p">(</span><span class="kc">true</span><span class="p">,</span><span class="w"> </span><span class="k">new</span><span class="w"> </span><span class="n">ArrowType</span><span class="p">.</span><span class="na">List</span><span class="p">(),</span><span class="w"> </span><span class="kc">null</span><span class="p">);</span>
<span class="n">Field</span><span class="w"> </span><span class="n">childField</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="k">new</span><span class="w"> </span><span class="n">Field</span><span class="p">(</span><span class="s">&quot;intCol&quot;</span><span class="p">,</span><span class="w"> </span><span class="n">intType</span><span class="p">,</span><span class="w"> </span><span class="kc">null</span><span class="p">);</span>
<span class="n">List</span><span class="o">&lt;</span><span class="n">Field</span><span class="o">&gt;</span><span class="w"> </span><span class="n">childFields</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="k">new</span><span class="w"> </span><span class="n">ArrayList</span><span class="o">&lt;&gt;</span><span class="p">();</span>
<span class="n">childFields</span><span class="p">.</span><span class="na">add</span><span class="p">(</span><span class="n">childField</span><span class="p">);</span>
<span class="n">Field</span><span class="w"> </span><span class="n">points</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="k">new</span><span class="w"> </span><span class="n">Field</span><span class="p">(</span><span class="s">&quot;points&quot;</span><span class="p">,</span><span class="w"> </span><span class="n">listType</span><span class="p">,</span><span class="w"> </span><span class="n">childFields</span><span class="p">);</span>
<span class="n">Schema</span><span class="w"> </span><span class="n">schema</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="k">new</span><span class="w"> </span><span class="n">Schema</span><span class="p">(</span><span class="n">asList</span><span class="p">(</span><span class="n">name</span><span class="p">,</span><span class="w"> </span><span class="n">age</span><span class="p">,</span><span class="w"> </span><span class="n">points</span><span class="p">));</span>
<span class="k">try</span><span class="p">(</span>
<span class="w"> </span><span class="n">BufferAllocator</span><span class="w"> </span><span class="n">allocator</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="k">new</span><span class="w"> </span><span class="n">RootAllocator</span><span class="p">();</span>
<span class="w"> </span><span class="n">VectorSchemaRoot</span><span class="w"> </span><span class="n">root</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">VectorSchemaRoot</span><span class="p">.</span><span class="na">create</span><span class="p">(</span><span class="n">schema</span><span class="p">,</span><span class="w"> </span><span class="n">allocator</span><span class="p">)</span>
<span class="p">){</span>
<span class="w"> </span><span class="n">VarCharVector</span><span class="w"> </span><span class="n">nameVector</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="p">(</span><span class="n">VarCharVector</span><span class="p">)</span><span class="w"> </span><span class="n">root</span><span class="p">.</span><span class="na">getVector</span><span class="p">(</span><span class="s">&quot;name&quot;</span><span class="p">);</span>
<span class="w"> </span><span class="n">nameVector</span><span class="p">.</span><span class="na">allocateNew</span><span class="p">(</span><span class="mi">3</span><span class="p">);</span>
<span class="w"> </span><span class="n">nameVector</span><span class="p">.</span><span class="na">set</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span><span class="w"> </span><span class="s">&quot;David&quot;</span><span class="p">.</span><span class="na">getBytes</span><span class="p">());</span>
<span class="w"> </span><span class="n">nameVector</span><span class="p">.</span><span class="na">set</span><span class="p">(</span><span class="mi">1</span><span class="p">,</span><span class="w"> </span><span class="s">&quot;Gladis&quot;</span><span class="p">.</span><span class="na">getBytes</span><span class="p">());</span>
<span class="w"> </span><span class="n">nameVector</span><span class="p">.</span><span class="na">set</span><span class="p">(</span><span class="mi">2</span><span class="p">,</span><span class="w"> </span><span class="s">&quot;Juan&quot;</span><span class="p">.</span><span class="na">getBytes</span><span class="p">());</span>
<span class="w"> </span><span class="n">nameVector</span><span class="p">.</span><span class="na">setValueCount</span><span class="p">(</span><span class="mi">3</span><span class="p">);</span>
<span class="w"> </span><span class="n">IntVector</span><span class="w"> </span><span class="n">ageVector</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="p">(</span><span class="n">IntVector</span><span class="p">)</span><span class="w"> </span><span class="n">root</span><span class="p">.</span><span class="na">getVector</span><span class="p">(</span><span class="s">&quot;age&quot;</span><span class="p">);</span>
<span class="w"> </span><span class="n">ageVector</span><span class="p">.</span><span class="na">allocateNew</span><span class="p">(</span><span class="mi">3</span><span class="p">);</span>
<span class="w"> </span><span class="n">ageVector</span><span class="p">.</span><span class="na">set</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span><span class="w"> </span><span class="mi">10</span><span class="p">);</span>
<span class="w"> </span><span class="n">ageVector</span><span class="p">.</span><span class="na">set</span><span class="p">(</span><span class="mi">1</span><span class="p">,</span><span class="w"> </span><span class="mi">20</span><span class="p">);</span>
<span class="w"> </span><span class="n">ageVector</span><span class="p">.</span><span class="na">set</span><span class="p">(</span><span class="mi">2</span><span class="p">,</span><span class="w"> </span><span class="mi">30</span><span class="p">);</span>
<span class="w"> </span><span class="n">ageVector</span><span class="p">.</span><span class="na">setValueCount</span><span class="p">(</span><span class="mi">3</span><span class="p">);</span>
<span class="w"> </span><span class="n">ListVector</span><span class="w"> </span><span class="n">listVector</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="p">(</span><span class="n">ListVector</span><span class="p">)</span><span class="w"> </span><span class="n">root</span><span class="p">.</span><span class="na">getVector</span><span class="p">(</span><span class="s">&quot;points&quot;</span><span class="p">);</span>
<span class="w"> </span><span class="n">UnionListWriter</span><span class="w"> </span><span class="n">listWriter</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">listVector</span><span class="p">.</span><span class="na">getWriter</span><span class="p">();</span>
<span class="w"> </span><span class="kt">int</span><span class="o">[]</span><span class="w"> </span><span class="n">data</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="k">new</span><span class="w"> </span><span class="kt">int</span><span class="o">[]</span><span class="w"> </span><span class="p">{</span><span class="w"> </span><span class="mi">4</span><span class="p">,</span><span class="w"> </span><span class="mi">8</span><span class="p">,</span><span class="w"> </span><span class="mi">12</span><span class="p">,</span><span class="w"> </span><span class="mi">10</span><span class="p">,</span><span class="w"> </span><span class="mi">20</span><span class="p">,</span><span class="w"> </span><span class="mi">30</span><span class="p">,</span><span class="w"> </span><span class="mi">5</span><span class="p">,</span><span class="w"> </span><span class="mi">10</span><span class="p">,</span><span class="w"> </span><span class="mi">15</span><span class="w"> </span><span class="p">};</span>
<span class="w"> </span><span class="kt">int</span><span class="w"> </span><span class="n">tmp_index</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="mi">0</span><span class="p">;</span>
<span class="w"> </span><span class="k">for</span><span class="p">(</span><span class="kt">int</span><span class="w"> </span><span class="n">i</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="mi">0</span><span class="p">;</span><span class="w"> </span><span class="n">i</span><span class="w"> </span><span class="o">&lt;</span><span class="w"> </span><span class="mi">3</span><span class="p">;</span><span class="w"> </span><span class="n">i</span><span class="o">++</span><span class="p">)</span><span class="w"> </span><span class="p">{</span>
<span class="w"> </span><span class="n">listWriter</span><span class="p">.</span><span class="na">setPosition</span><span class="p">(</span><span class="n">i</span><span class="p">);</span>
<span class="w"> </span><span class="n">listWriter</span><span class="p">.</span><span class="na">startList</span><span class="p">();</span>
<span class="w"> </span><span class="k">for</span><span class="p">(</span><span class="kt">int</span><span class="w"> </span><span class="n">j</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="mi">0</span><span class="p">;</span><span class="w"> </span><span class="n">j</span><span class="w"> </span><span class="o">&lt;</span><span class="w"> </span><span class="mi">3</span><span class="p">;</span><span class="w"> </span><span class="n">j</span><span class="o">++</span><span class="p">)</span><span class="w"> </span><span class="p">{</span>
<span class="w"> </span><span class="n">listWriter</span><span class="p">.</span><span class="na">writeInt</span><span class="p">(</span><span class="n">data</span><span class="o">[</span><span class="n">tmp_index</span><span class="o">]</span><span class="p">);</span>
<span class="w"> </span><span class="n">tmp_index</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">tmp_index</span><span class="w"> </span><span class="o">+</span><span class="w"> </span><span class="mi">1</span><span class="p">;</span>
<span class="w"> </span><span class="p">}</span>
<span class="w"> </span><span class="n">listWriter</span><span class="p">.</span><span class="na">setValueCount</span><span class="p">(</span><span class="mi">2</span><span class="p">);</span>
<span class="w"> </span><span class="n">listWriter</span><span class="p">.</span><span class="na">endList</span><span class="p">();</span>
<span class="w"> </span><span class="p">}</span>
<span class="w"> </span><span class="n">listVector</span><span class="p">.</span><span class="na">setValueCount</span><span class="p">(</span><span class="mi">3</span><span class="p">);</span>
<span class="w"> </span><span class="n">root</span><span class="p">.</span><span class="na">setRowCount</span><span class="p">(</span><span class="mi">3</span><span class="p">);</span>
<span class="w"> </span><span class="n">System</span><span class="p">.</span><span class="na">out</span><span class="p">.</span><span class="na">print</span><span class="p">(</span><span class="n">root</span><span class="p">.</span><span class="na">contentToTSVString</span><span class="p">());</span>
<span class="p">}</span><span class="w"> </span><span class="k">catch</span><span class="w"> </span><span class="p">(</span><span class="n">Exception</span><span class="w"> </span><span class="n">e</span><span class="p">)</span><span class="w"> </span><span class="p">{</span>
<span class="w"> </span><span class="n">e</span><span class="p">.</span><span class="na">printStackTrace</span><span class="p">();</span>
<span class="p">}</span>
</pre></div>
</div>
<div class="highlight-none notranslate"><div class="highlight"><pre><span></span>name age points
David 10 [4,8,12]
Gladis 20 [10,20,30]
Juan 30 [5,10,15]
</pre></div>
</div>
</section>
</section>
</div>
</div>
</div>
<div class="sphinxsidebar" role="navigation" aria-label="main navigation">
<div class="sphinxsidebarwrapper">
<p class="logo">
<a href="index.html">
<img class="logo" src="_static/arrow-logo_vertical_black-txt_transparent-bg.svg" alt="Logo"/>
</a>
</p>
<p>
<iframe src="https://ghbtns.com/github-btn.html?user=apache&repo=arrow-cookbook&type=none&count=true&size=large&v=2"
allowtransparency="true" frameborder="0" scrolling="0" width="200px" height="35px"></iframe>
</p>
<h3>Navigation</h3>
<p class="caption" role="heading"><span class="caption-text">Contents:</span></p>
<ul class="current">
<li class="toctree-l1"><a class="reference internal" href="create.html">Creating Arrow Objects</a></li>
<li class="toctree-l1 current"><a class="current reference internal" href="#">Working with Schema</a><ul>
<li class="toctree-l2"><a class="reference internal" href="#creating-fields">Creating Fields</a></li>
<li class="toctree-l2"><a class="reference internal" href="#creating-the-schema">Creating the Schema</a></li>
<li class="toctree-l2"><a class="reference internal" href="#adding-metadata-to-fields-and-schemas">Adding Metadata to Fields and Schemas</a></li>
<li class="toctree-l2"><a class="reference internal" href="#creating-vectorschemaroot">Creating VectorSchemaRoot</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="io.html">Reading and writing data</a></li>
<li class="toctree-l1"><a class="reference internal" href="flight.html">Arrow Flight</a></li>
<li class="toctree-l1"><a class="reference internal" href="dataset.html">Dataset</a></li>
<li class="toctree-l1"><a class="reference internal" href="substrait.html">Substrait</a></li>
<li class="toctree-l1"><a class="reference internal" href="data.html">Data manipulation</a></li>
<li class="toctree-l1"><a class="reference internal" href="avro.html">Avro</a></li>
<li class="toctree-l1"><a class="reference internal" href="jdbc.html">Arrow JDBC Adapter</a></li>
</ul>
<hr />
<ul>
<li class="toctree-l1"><a href="https://arrow.apache.org/docs/java/index.html">User Guide</a></li>
<li class="toctree-l1"><a href="https://arrow.apache.org/docs/java/reference/index.html">API Reference</a></li>
</ul>
<div class="relations">
<h3>Related Topics</h3>
<ul>
<li><a href="index.html">Documentation overview</a><ul>
<li>Previous: <a href="create.html" title="previous chapter">Creating Arrow Objects</a></li>
<li>Next: <a href="io.html" title="next chapter">Reading and writing data</a></li>
</ul></li>
</ul>
</div>
<div id="searchbox" style="display: none" role="search">
<h3 id="searchlabel">Quick search</h3>
<div class="searchformwrapper">
<form class="search" action="search.html" method="get">
<input type="text" name="q" aria-labelledby="searchlabel" autocomplete="off" autocorrect="off" autocapitalize="off" spellcheck="false"/>
<input type="submit" value="Go" />
</form>
</div>
</div>
<script>document.getElementById('searchbox').style.display = "block"</script>
</div>
</div>
<div class="clearer"></div>
</div>
<div class="footer">
&copy;2022, Apache Software Foundation.
|
Powered by <a href="http://sphinx-doc.org/">Sphinx 7.2.6</a>
&amp; <a href="https://github.com/bitprophet/alabaster">Alabaster 0.7.13</a>
|
<a href="_sources/schema.rst.txt"
rel="nofollow">Page source</a>
</div>
</body>
</html>