blob: 892a9c1f57ef8b26f324d9f2b5edfa26bb561cc3 [file] [log] [blame]
<!DOCTYPE html>
<html lang="en" data-content_root="" >
<head>
<meta charset="utf-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="generator" content="Docutils 0.19: https://docutils.sourceforge.io/" />
<title>ValueVector &#8212; Apache Arrow v17.0.0.dev52</title>
<script data-cfasync="false">
document.documentElement.dataset.mode = localStorage.getItem("mode") || "";
document.documentElement.dataset.theme = localStorage.getItem("theme") || "light";
</script>
<!-- Loaded before other Sphinx assets -->
<link href="../_static/styles/theme.css?digest=8d27b9dea8ad943066ae" rel="stylesheet" />
<link href="../_static/styles/bootstrap.css?digest=8d27b9dea8ad943066ae" rel="stylesheet" />
<link href="../_static/styles/pydata-sphinx-theme.css?digest=8d27b9dea8ad943066ae" rel="stylesheet" />
<link href="../_static/vendor/fontawesome/6.5.1/css/all.min.css?digest=8d27b9dea8ad943066ae" rel="stylesheet" />
<link rel="preload" as="font" type="font/woff2" crossorigin href="../_static/vendor/fontawesome/6.5.1/webfonts/fa-solid-900.woff2" />
<link rel="preload" as="font" type="font/woff2" crossorigin href="../_static/vendor/fontawesome/6.5.1/webfonts/fa-brands-400.woff2" />
<link rel="preload" as="font" type="font/woff2" crossorigin href="../_static/vendor/fontawesome/6.5.1/webfonts/fa-regular-400.woff2" />
<link rel="stylesheet" type="text/css" href="../_static/pygments.css" />
<link rel="stylesheet" type="text/css" href="../_static/copybutton.css" />
<link rel="stylesheet" type="text/css" href="../_static/design-style.1e8bd061cd6da7fc9cf755528e8ffc24.min.css" />
<link rel="stylesheet" type="text/css" href="../_static/theme_overrides.css" />
<!-- Pre-loaded scripts that we'll load fully later -->
<link rel="preload" as="script" href="../_static/scripts/bootstrap.js?digest=8d27b9dea8ad943066ae" />
<link rel="preload" as="script" href="../_static/scripts/pydata-sphinx-theme.js?digest=8d27b9dea8ad943066ae" />
<script src="../_static/vendor/fontawesome/6.5.1/js/all.min.js?digest=8d27b9dea8ad943066ae"></script>
<script data-url_root="../" id="documentation_options" src="../_static/documentation_options.js"></script>
<script src="../_static/doctools.js"></script>
<script src="../_static/sphinx_highlight.js"></script>
<script src="../_static/clipboard.min.js"></script>
<script src="../_static/copybutton.js"></script>
<script src="../_static/design-tabs.js"></script>
<script>DOCUMENTATION_OPTIONS.pagename = 'java/vector';</script>
<script>
DOCUMENTATION_OPTIONS.theme_version = '0.15.2';
DOCUMENTATION_OPTIONS.theme_switcher_json_url = '/docs/_static/versions.json';
DOCUMENTATION_OPTIONS.theme_switcher_version_match = 'dev/';
DOCUMENTATION_OPTIONS.show_version_warning_banner = true;
</script>
<link rel="canonical" href="https://arrow.apache.org/docs/java/vector.html" />
<link rel="icon" href="../_static/favicon.ico"/>
<link rel="index" title="Index" href="../genindex.html" />
<link rel="search" title="Search" href="../search.html" />
<link rel="next" title="Tabular Data" href="vector_schema_root.html" />
<link rel="prev" title="Memory Management" href="memory.html" />
<meta name="viewport" content="width=device-width, initial-scale=1"/>
<meta name="docsearch:language" content="en"/>
<!-- Matomo -->
<script>
var _paq = window._paq = window._paq || [];
/* tracker methods like "setCustomDimension" should be called before "trackPageView" */
/* We explicitly disable cookie tracking to avoid privacy issues */
_paq.push(['disableCookies']);
_paq.push(['trackPageView']);
_paq.push(['enableLinkTracking']);
(function() {
var u="https://analytics.apache.org/";
_paq.push(['setTrackerUrl', u+'matomo.php']);
_paq.push(['setSiteId', '20']);
var d=document, g=d.createElement('script'), s=d.getElementsByTagName('script')[0];
g.async=true; g.src=u+'matomo.js'; s.parentNode.insertBefore(g,s);
})();
</script>
<!-- End Matomo Code -->
</head>
<body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
<a id="pst-skip-link" class="skip-link" href="#main-content">Skip to main content</a>
<div id="pst-scroll-pixel-helper"></div>
<button type="button" class="btn rounded-pill" id="pst-back-to-top">
<i class="fa-solid fa-arrow-up"></i>
Back to top
</button>
<input type="checkbox"
class="sidebar-toggle"
name="__primary"
id="__primary"/>
<label class="overlay overlay-primary" for="__primary"></label>
<input type="checkbox"
class="sidebar-toggle"
name="__secondary"
id="__secondary"/>
<label class="overlay overlay-secondary" for="__secondary"></label>
<div class="search-button__wrapper">
<div class="search-button__overlay"></div>
<div class="search-button__search-container">
<form class="bd-search d-flex align-items-center"
action="../search.html"
method="get">
<i class="fa-solid fa-magnifying-glass"></i>
<input type="search"
class="form-control"
name="q"
id="search-input"
placeholder="Search the docs ..."
aria-label="Search the docs ..."
autocomplete="off"
autocorrect="off"
autocapitalize="off"
spellcheck="false"/>
<span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd>K</kbd></span>
</form></div>
</div>
<header class="bd-header navbar navbar-expand-lg bd-navbar">
<div class="bd-header__inner bd-page-width">
<label class="sidebar-toggle primary-toggle" for="__primary">
<span class="fa-solid fa-bars"></span>
</label>
<div class="col-lg-3 navbar-header-items__start">
<div class="navbar-item">
<a class="navbar-brand logo" href="../index.html">
<img src="../_static/arrow.png" class="logo__image only-light" alt="Apache Arrow v17.0.0.dev52 - Home"/>
<script>document.write(`<img src="../_static/arrow-dark.png" class="logo__image only-dark" alt="Apache Arrow v17.0.0.dev52 - Home"/>`);</script>
</a></div>
</div>
<div class="col-lg-9 navbar-header-items">
<div class="me-auto navbar-header-items__center">
<div class="navbar-item">
<nav class="navbar-nav">
<ul class="bd-navbar-elements navbar-nav">
<li class="nav-item">
<a class="nav-link nav-internal" href="../format/index.html">
Specifications
</a>
</li>
<li class="nav-item">
<a class="nav-link nav-internal" href="../developers/index.html">
Development
</a>
</li>
<li class="nav-item dropdown">
<button class="btn dropdown-toggle nav-item" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-controls="pst-nav-more-links">
Implementations
</button>
<ul id="pst-nav-more-links" class="dropdown-menu">
<li class="nav-item">
<a class="nav-link dropdown-item nav-internal" href="../c_glib/index.html">
C/GLib
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-internal" href="../cpp/index.html">
C++
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://github.com/apache/arrow/blob/main/csharp/README.md">
C#
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://pkg.go.dev/github.com/apache/arrow/go/v17">
Go
</a>
</li>
<li class="nav-item current active">
<a class="nav-link dropdown-item nav-internal" href="index.html">
Java
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-internal" href="../js/index.html">
JavaScript
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://arrow.apache.org/julia/">
Julia
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://github.com/apache/arrow/blob/main/matlab/README.md">
MATLAB
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://arrow.apache.org/nanoarrow/">
nanoarrow
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-internal" href="../python/index.html">
Python
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-internal" href="../r/index.html">
R
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://github.com/apache/arrow/blob/main/ruby/README.md">
Ruby
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://docs.rs/crate/arrow/">
Rust
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-internal" href="../status.html">
Implementation Status
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://arrow.apache.org/cookbook/cpp/">
C++ cookbook
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://arrow.apache.org/cookbook/java/">
Java cookbook
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://arrow.apache.org/cookbook/py/">
Python cookbook
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://arrow.apache.org/cookbook/r/">
R cookbook
</a>
</li>
</ul>
</li>
</ul>
</nav></div>
</div>
<div class="navbar-header-items__end">
<div class="navbar-item navbar-persistent--container">
<script>
document.write(`
<button class="btn navbar-btn search-button-field search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
<i class="fa-solid fa-magnifying-glass"></i>
<span class="search-button__default-text">Search</span>
<span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd class="kbd-shortcut__modifier">K</kbd></span>
</button>
`);
</script>
</div>
<div class="navbar-item">
<script>
document.write(`
<div class="version-switcher__container dropdown">
<button id="pst-version-switcher-button-2"
type="button"
class="version-switcher__button btn btn-sm navbar-btn dropdown-toggle"
data-bs-toggle="dropdown"
aria-haspopup="listbox"
aria-controls="pst-version-switcher-list-2"
aria-label="Version switcher list"
>
Choose version <!-- this text may get changed later by javascript -->
<span class="caret"></span>
</button>
<div id="pst-version-switcher-list-2"
class="version-switcher__menu dropdown-menu list-group-flush py-0"
role="listbox" aria-labelledby="pst-version-switcher-button-2">
<!-- dropdown will be populated by javascript on page load -->
</div>
</div>
`);
</script></div>
<div class="navbar-item">
<script>
document.write(`
<button class="btn btn-sm navbar-btn theme-switch-button" title="light/dark" aria-label="light/dark" data-bs-placement="bottom" data-bs-toggle="tooltip">
<span class="theme-switch nav-link" data-mode="light"><i class="fa-solid fa-sun fa-lg"></i></span>
<span class="theme-switch nav-link" data-mode="dark"><i class="fa-solid fa-moon fa-lg"></i></span>
<span class="theme-switch nav-link" data-mode="auto"><i class="fa-solid fa-circle-half-stroke fa-lg"></i></span>
</button>
`);
</script></div>
<div class="navbar-item"><ul class="navbar-icon-links navbar-nav"
aria-label="Icon Links">
<li class="nav-item">
<a href="https://github.com/apache/arrow" title="GitHub" class="nav-link" rel="noopener" target="_blank" data-bs-toggle="tooltip" data-bs-placement="bottom"><span><i class="fa-brands fa-square-github fa-lg" aria-hidden="true"></i></span>
<span class="sr-only">GitHub</span></a>
</li>
<li class="nav-item">
<a href="https://twitter.com/ApacheArrow" title="X" class="nav-link" rel="noopener" target="_blank" data-bs-toggle="tooltip" data-bs-placement="bottom"><span><i class="fa-brands fa-square-x-twitter fa-lg" aria-hidden="true"></i></span>
<span class="sr-only">X</span></a>
</li>
</ul></div>
</div>
</div>
<div class="navbar-persistent--mobile">
<script>
document.write(`
<button class="btn navbar-btn search-button-field search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
<i class="fa-solid fa-magnifying-glass"></i>
<span class="search-button__default-text">Search</span>
<span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd class="kbd-shortcut__modifier">K</kbd></span>
</button>
`);
</script>
</div>
<label class="sidebar-toggle secondary-toggle" for="__secondary" tabindex="0">
<span class="fa-solid fa-outdent"></span>
</label>
</div>
</header>
<div class="bd-container">
<div class="bd-container__inner bd-page-width">
<div class="bd-sidebar-primary bd-sidebar">
<div class="sidebar-header-items sidebar-primary__section">
<div class="sidebar-header-items__center">
<div class="navbar-item">
<nav class="navbar-nav">
<ul class="bd-navbar-elements navbar-nav">
<li class="nav-item">
<a class="nav-link nav-internal" href="../format/index.html">
Specifications
</a>
</li>
<li class="nav-item">
<a class="nav-link nav-internal" href="../developers/index.html">
Development
</a>
</li>
<li class="nav-item dropdown">
<button class="btn dropdown-toggle nav-item" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-controls="pst-nav-more-links-2">
Implementations
</button>
<ul id="pst-nav-more-links-2" class="dropdown-menu">
<li class="nav-item">
<a class="nav-link dropdown-item nav-internal" href="../c_glib/index.html">
C/GLib
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-internal" href="../cpp/index.html">
C++
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://github.com/apache/arrow/blob/main/csharp/README.md">
C#
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://pkg.go.dev/github.com/apache/arrow/go/v17">
Go
</a>
</li>
<li class="nav-item current active">
<a class="nav-link dropdown-item nav-internal" href="index.html">
Java
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-internal" href="../js/index.html">
JavaScript
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://arrow.apache.org/julia/">
Julia
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://github.com/apache/arrow/blob/main/matlab/README.md">
MATLAB
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://arrow.apache.org/nanoarrow/">
nanoarrow
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-internal" href="../python/index.html">
Python
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-internal" href="../r/index.html">
R
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://github.com/apache/arrow/blob/main/ruby/README.md">
Ruby
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://docs.rs/crate/arrow/">
Rust
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-internal" href="../status.html">
Implementation Status
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://arrow.apache.org/cookbook/cpp/">
C++ cookbook
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://arrow.apache.org/cookbook/java/">
Java cookbook
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://arrow.apache.org/cookbook/py/">
Python cookbook
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://arrow.apache.org/cookbook/r/">
R cookbook
</a>
</li>
</ul>
</li>
</ul>
</nav></div>
</div>
<div class="sidebar-header-items__end">
<div class="navbar-item">
<script>
document.write(`
<div class="version-switcher__container dropdown">
<button id="pst-version-switcher-button-3"
type="button"
class="version-switcher__button btn btn-sm navbar-btn dropdown-toggle"
data-bs-toggle="dropdown"
aria-haspopup="listbox"
aria-controls="pst-version-switcher-list-3"
aria-label="Version switcher list"
>
Choose version <!-- this text may get changed later by javascript -->
<span class="caret"></span>
</button>
<div id="pst-version-switcher-list-3"
class="version-switcher__menu dropdown-menu list-group-flush py-0"
role="listbox" aria-labelledby="pst-version-switcher-button-3">
<!-- dropdown will be populated by javascript on page load -->
</div>
</div>
`);
</script></div>
<div class="navbar-item">
<script>
document.write(`
<button class="btn btn-sm navbar-btn theme-switch-button" title="light/dark" aria-label="light/dark" data-bs-placement="bottom" data-bs-toggle="tooltip">
<span class="theme-switch nav-link" data-mode="light"><i class="fa-solid fa-sun fa-lg"></i></span>
<span class="theme-switch nav-link" data-mode="dark"><i class="fa-solid fa-moon fa-lg"></i></span>
<span class="theme-switch nav-link" data-mode="auto"><i class="fa-solid fa-circle-half-stroke fa-lg"></i></span>
</button>
`);
</script></div>
<div class="navbar-item"><ul class="navbar-icon-links navbar-nav"
aria-label="Icon Links">
<li class="nav-item">
<a href="https://github.com/apache/arrow" title="GitHub" class="nav-link" rel="noopener" target="_blank" data-bs-toggle="tooltip" data-bs-placement="bottom"><span><i class="fa-brands fa-square-github fa-lg" aria-hidden="true"></i></span>
<span class="sr-only">GitHub</span></a>
</li>
<li class="nav-item">
<a href="https://twitter.com/ApacheArrow" title="X" class="nav-link" rel="noopener" target="_blank" data-bs-toggle="tooltip" data-bs-placement="bottom"><span><i class="fa-brands fa-square-x-twitter fa-lg" aria-hidden="true"></i></span>
<span class="sr-only">X</span></a>
</li>
</ul></div>
</div>
</div>
<div class="sidebar-primary-items__start sidebar-primary__section">
<div class="sidebar-primary-item">
<nav class="bd-docs-nav bd-links"
aria-label="Section Navigation">
<p class="bd-links__title" role="heading" aria-level="1">Section Navigation</p>
<div class="bd-toc-item navbar-nav"><ul class="current nav bd-sidenav">
<li class="toctree-l1"><a class="reference internal" href="quickstartguide.html">Quick Start Guide</a></li>
<li class="toctree-l1"><a class="reference internal" href="overview.html">High-Level Overview</a></li>
<li class="toctree-l1"><a class="reference internal" href="install.html">Installing Java Modules</a></li>
<li class="toctree-l1"><a class="reference internal" href="memory.html">Memory Management</a></li>
<li class="toctree-l1 current active"><a class="current reference internal" href="#">ValueVector</a></li>
<li class="toctree-l1"><a class="reference internal" href="vector_schema_root.html">Tabular Data</a></li>
<li class="toctree-l1"><a class="reference internal" href="table.html">Table</a></li>
<li class="toctree-l1"><a class="reference internal" href="ipc.html">Reading/Writing IPC formats</a></li>
<li class="toctree-l1"><a class="reference internal" href="algorithm.html">Java Algorithms</a></li>
<li class="toctree-l1"><a class="reference internal" href="flight.html">Arrow Flight RPC</a></li>
<li class="toctree-l1"><a class="reference internal" href="flight_sql.html">Arrow Flight SQL</a></li>
<li class="toctree-l1"><a class="reference internal" href="flight_sql_jdbc_driver.html">Arrow Flight SQL JDBC Driver</a></li>
<li class="toctree-l1"><a class="reference internal" href="dataset.html">Dataset</a></li>
<li class="toctree-l1"><a class="reference internal" href="substrait.html">Substrait</a></li>
<li class="toctree-l1"><a class="reference internal" href="cdata.html">C Data Interface</a></li>
<li class="toctree-l1"><a class="reference internal" href="jdbc.html">Arrow JDBC Adapter</a></li>
<li class="toctree-l1"><a class="reference internal" href="reference/index.html">Reference (javadoc)</a></li>
<li class="toctree-l1"><a class="reference external" href="https://arrow.apache.org/cookbook/java/">Java cookbook</a></li>
</ul>
</div>
</nav></div>
</div>
<div class="sidebar-primary-items__end sidebar-primary__section">
</div>
<div id="rtd-footer-container"></div>
</div>
<main id="main-content" class="bd-main">
<div class="bd-content">
<div class="bd-article-container">
<div class="bd-header-article">
<div class="header-article-items header-article__inner">
<div class="header-article-items__start">
<div class="header-article-item">
<nav aria-label="Breadcrumb">
<ul class="bd-breadcrumbs">
<li class="breadcrumb-item breadcrumb-home">
<a href="../index.html" class="nav-link" aria-label="Home">
<i class="fa-solid fa-home"></i>
</a>
</li>
<li class="breadcrumb-item"><a href="index.html" class="nav-link">Java Implementation</a></li>
<li class="breadcrumb-item active" aria-current="page">ValueVector</li>
</ul>
</nav>
</div>
</div>
</div>
</div>
<div id="searchbox"></div>
<article class="bd-article">
<section id="valuevector">
<h1>ValueVector<a class="headerlink" href="#valuevector" title="Permalink to this heading">#</a></h1>
<p><code class="xref py py-class docutils literal notranslate"><span class="pre">ValueVector</span></code> interface (which called Array in C++ implementation and
the <a class="reference internal" href="../format/Columnar.html"><span class="doc">the specification</span></a>) is an abstraction that is used to store a
sequence of values having the same type in an individual column. Internally, those values are
represented by one or several buffers, the number and meaning of which depend on the vector’s data type.</p>
<p>There are concrete subclasses of <code class="xref py py-class docutils literal notranslate"><span class="pre">ValueVector</span></code> for each primitive data type
and nested type described in the specification. There are a few differences in naming
with the type names described in the specification:
Table with non-intuitive names (BigInt = 64 bit integer, etc).</p>
<p>It is important that vector is allocated before attempting to read or write,
<code class="xref py py-class docutils literal notranslate"><span class="pre">ValueVector</span></code> “should” strive to guarantee this order of operation:
create &gt; allocate &gt; mutate &gt; set value count &gt; access &gt; clear (or allocate to start the process over).
We will go through a concrete example to demonstrate each operation in the next section.</p>
<section id="vector-life-cycle">
<h2>Vector Life Cycle<a class="headerlink" href="#vector-life-cycle" title="Permalink to this heading">#</a></h2>
<p>As discussed above, each vector goes through several steps in its life cycle,
and each step is triggered by a vector operation. In particular, we have the following vector operations:</p>
<p>1. <strong>Vector creation</strong>: we create a new vector object by, for example, the vector constructor.
The following code creates a new <code class="docutils literal notranslate"><span class="pre">IntVector</span></code> by the constructor:</p>
<div class="highlight-Java notranslate"><div class="highlight"><pre><span></span><span class="n">RootAllocator</span><span class="w"> </span><span class="n">allocator</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="k">new</span><span class="w"> </span><span class="n">RootAllocator</span><span class="p">(</span><span class="n">Long</span><span class="p">.</span><span class="na">MAX_VALUE</span><span class="p">);</span>
<span class="p">...</span>
<span class="n">IntVector</span><span class="w"> </span><span class="n">vector</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="k">new</span><span class="w"> </span><span class="n">IntVector</span><span class="p">(</span><span class="s">&quot;int vector&quot;</span><span class="p">,</span><span class="w"> </span><span class="n">allocator</span><span class="p">);</span>
</pre></div>
</div>
<p>By now, a vector object is created. However, no underlying memory has been allocated, so we need the
following step.</p>
<p>2. <strong>Vector allocation</strong>: in this step, we allocate memory for the vector. For most vectors, we
have two options: 1) if we know the maximum vector capacity, we can specify it by calling the
<code class="docutils literal notranslate"><span class="pre">allocateNew(int)</span></code> method; 2) otherwise, we should call the <code class="docutils literal notranslate"><span class="pre">allocateNew()</span></code> method, and a default
capacity will be allocated for it. For our running example, we assume that the vector capacity never
exceeds 10:</p>
<div class="highlight-Java notranslate"><div class="highlight"><pre><span></span><span class="n">vector</span><span class="p">.</span><span class="na">allocateNew</span><span class="p">(</span><span class="mi">10</span><span class="p">);</span>
</pre></div>
</div>
<p>3. <strong>Vector mutation</strong>: now we can populate the vector with values we desire. For all vectors, we can populate
vector values through vector writers (An example will be given in the next section). For primitive types,
we can also mutate the vector by the set methods. There are two classes of set methods: 1) if we can
be sure the vector has enough capacity, we can call the <code class="docutils literal notranslate"><span class="pre">set(index,</span> <span class="pre">value)</span></code> method. 2) if we are not sure
about the vector capacity, we should call the <code class="docutils literal notranslate"><span class="pre">setSafe(index,</span> <span class="pre">value)</span></code> method, which will automatically
take care of vector reallocation, if the capacity is not sufficient. For our running example, we know the
vector has enough capacity, so we can call</p>
<div class="highlight-Java notranslate"><div class="highlight"><pre><span></span><span class="n">vector</span><span class="p">.</span><span class="na">set</span><span class="p">(</span><span class="cm">/*index*/</span><span class="mi">5</span><span class="p">,</span><span class="w"> </span><span class="cm">/*value*/</span><span class="mi">25</span><span class="p">);</span>
</pre></div>
</div>
<p>4. <strong>Set value count</strong>: for this step, we set the value count of the vector by calling the
<code class="docutils literal notranslate"><span class="pre">setValueCount(int)</span></code> method:</p>
<div class="highlight-Java notranslate"><div class="highlight"><pre><span></span><span class="n">vector</span><span class="p">.</span><span class="na">setValueCount</span><span class="p">(</span><span class="mi">10</span><span class="p">);</span>
</pre></div>
</div>
<p>After this step, the vector enters an immutable state. In other words, we should no longer mutate it.
(Unless we reuse the vector by allocating it again. This will be discussed shortly.)</p>
<p>5. <strong>Vector access</strong>: it is time to access vector values. Similarly, we have two options to access values:
1) get methods and 2) vector reader. Vector reader works for all types of vectors, while get methods are
only available for primitive vectors. A concrete example for vector reader will be given in the next section.
Below is an example of vector access by get method:</p>
<div class="highlight-Java notranslate"><div class="highlight"><pre><span></span><span class="kt">int</span><span class="w"> </span><span class="n">value</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">vector</span><span class="p">.</span><span class="na">get</span><span class="p">(</span><span class="mi">5</span><span class="p">);</span><span class="w"> </span><span class="c1">// value == 25</span>
</pre></div>
</div>
<p>6. <strong>Vector clear</strong>: when we are done with the vector, we should clear it to release its memory. This is done by
calling the <code class="docutils literal notranslate"><span class="pre">close()</span></code> method:</p>
<div class="highlight-Java notranslate"><div class="highlight"><pre><span></span><span class="n">vector</span><span class="p">.</span><span class="na">close</span><span class="p">();</span>
</pre></div>
</div>
<p>Some points to note about the steps above:</p>
<ul class="simple">
<li><p>The steps are not necessarily performed in a linear sequence. Instead, they can be in a loop. For example,
when a vector enters the access step, we can also go back to the vector mutation step, and then set value
count, access vector, and so on.</p></li>
<li><p>We should try to make sure the above steps are carried out in order. Otherwise, the vector
may be in an undefined state, and some unexpected behavior may occur. However, this restriction
is not strict. That means it is possible that we violates the order above, but still get
correct results.</p></li>
<li><p>When mutating vector values through set methods, we should prefer <code class="docutils literal notranslate"><span class="pre">set(index,</span> <span class="pre">value)</span></code> methods to
<code class="docutils literal notranslate"><span class="pre">setSafe(index,</span> <span class="pre">value)</span></code> methods whenever possible, to avoid unnecessary performance overhead of handling
vector capacity.</p></li>
<li><p>All vectors implement the <code class="docutils literal notranslate"><span class="pre">AutoCloseable</span></code> interface. So they must be closed explicitly when they are
no longer used, to avoid resource leak. To make sure of this, it is recommended to place vector related operations
into a try-with-resources block.</p></li>
<li><p>For fixed width vectors (e.g. IntVector), we can set values at different indices in arbitrary orders.
For variable width vectors (e.g. VarCharVector), however, we must set values in non-decreasing order of the
indices. Otherwise, the values after the set position will become invalid. For example, suppose we use the
following statements to populate a variable width vector:</p></li>
</ul>
<div class="highlight-Java notranslate"><div class="highlight"><pre><span></span><span class="n">VarCharVector</span><span class="w"> </span><span class="n">vector</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="k">new</span><span class="w"> </span><span class="n">VarCharVector</span><span class="p">(</span><span class="s">&quot;vector&quot;</span><span class="p">,</span><span class="w"> </span><span class="n">allocator</span><span class="p">);</span>
<span class="n">vector</span><span class="p">.</span><span class="na">allocateNew</span><span class="p">();</span>
<span class="n">vector</span><span class="p">.</span><span class="na">setSafe</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span><span class="w"> </span><span class="s">&quot;zero&quot;</span><span class="p">);</span>
<span class="n">vector</span><span class="p">.</span><span class="na">setSafe</span><span class="p">(</span><span class="mi">1</span><span class="p">,</span><span class="w"> </span><span class="s">&quot;one&quot;</span><span class="p">);</span>
<span class="p">...</span>
<span class="n">vector</span><span class="p">.</span><span class="na">setSafe</span><span class="p">(</span><span class="mi">9</span><span class="p">,</span><span class="w"> </span><span class="s">&quot;nine&quot;</span><span class="p">);</span>
</pre></div>
</div>
<p>Then we set the value at position 5 again:</p>
<div class="highlight-Java notranslate"><div class="highlight"><pre><span></span><span class="n">vector</span><span class="p">.</span><span class="na">setSafe</span><span class="p">(</span><span class="mi">5</span><span class="p">,</span><span class="w"> </span><span class="s">&quot;5&quot;</span><span class="p">);</span>
</pre></div>
</div>
<p>After that, the values at positions 6, 7, 8, and 9 of the vector will become invalid.</p>
</section>
<section id="building-valuevector">
<h2>Building ValueVector<a class="headerlink" href="#building-valuevector" title="Permalink to this heading">#</a></h2>
<p>Note that the current implementation doesn’t enforce the rule that Arrow objects are immutable.
<code class="xref py py-class docutils literal notranslate"><span class="pre">ValueVector</span></code> instances could be created directly by using new keyword, there are
set/setSafe APIs and concrete subclasses of FieldWriter for populating values.</p>
<p>For example, the code below shows how to build a <code class="xref py py-class docutils literal notranslate"><span class="pre">BigIntVector</span></code>, in this case, we build a
vector of the range 0 to 7 where the element that should hold the fourth value is nulled</p>
<div class="highlight-Java notranslate"><div class="highlight"><pre><span></span><span class="k">try</span><span class="w"> </span><span class="p">(</span><span class="n">BufferAllocator</span><span class="w"> </span><span class="n">allocator</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="k">new</span><span class="w"> </span><span class="n">RootAllocator</span><span class="p">(</span><span class="n">Long</span><span class="p">.</span><span class="na">MAX_VALUE</span><span class="p">);</span>
<span class="w"> </span><span class="n">BigIntVector</span><span class="w"> </span><span class="n">vector</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="k">new</span><span class="w"> </span><span class="n">BigIntVector</span><span class="p">(</span><span class="s">&quot;vector&quot;</span><span class="p">,</span><span class="w"> </span><span class="n">allocator</span><span class="p">))</span><span class="w"> </span><span class="p">{</span>
<span class="w"> </span><span class="n">vector</span><span class="p">.</span><span class="na">allocateNew</span><span class="p">(</span><span class="mi">8</span><span class="p">);</span>
<span class="w"> </span><span class="n">vector</span><span class="p">.</span><span class="na">set</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span><span class="w"> </span><span class="mi">1</span><span class="p">);</span>
<span class="w"> </span><span class="n">vector</span><span class="p">.</span><span class="na">set</span><span class="p">(</span><span class="mi">1</span><span class="p">,</span><span class="w"> </span><span class="mi">2</span><span class="p">);</span>
<span class="w"> </span><span class="n">vector</span><span class="p">.</span><span class="na">set</span><span class="p">(</span><span class="mi">2</span><span class="p">,</span><span class="w"> </span><span class="mi">3</span><span class="p">);</span>
<span class="w"> </span><span class="n">vector</span><span class="p">.</span><span class="na">setNull</span><span class="p">(</span><span class="mi">3</span><span class="p">);</span>
<span class="w"> </span><span class="n">vector</span><span class="p">.</span><span class="na">set</span><span class="p">(</span><span class="mi">4</span><span class="p">,</span><span class="w"> </span><span class="mi">5</span><span class="p">);</span>
<span class="w"> </span><span class="n">vector</span><span class="p">.</span><span class="na">set</span><span class="p">(</span><span class="mi">5</span><span class="p">,</span><span class="w"> </span><span class="mi">6</span><span class="p">);</span>
<span class="w"> </span><span class="n">vector</span><span class="p">.</span><span class="na">set</span><span class="p">(</span><span class="mi">6</span><span class="p">,</span><span class="w"> </span><span class="mi">7</span><span class="p">);</span>
<span class="w"> </span><span class="n">vector</span><span class="p">.</span><span class="na">set</span><span class="p">(</span><span class="mi">7</span><span class="p">,</span><span class="w"> </span><span class="mi">8</span><span class="p">);</span>
<span class="w"> </span><span class="n">vector</span><span class="p">.</span><span class="na">setValueCount</span><span class="p">(</span><span class="mi">8</span><span class="p">);</span><span class="w"> </span><span class="c1">// this will finalizes the vector by convention.</span>
<span class="w"> </span><span class="p">...</span>
<span class="p">}</span>
</pre></div>
</div>
<p>The <code class="xref py py-class docutils literal notranslate"><span class="pre">BigIntVector</span></code> holds two ArrowBufs. The first buffer holds the null bitmap, which consists
here of a single byte with the bits 1|1|1|1|0|1|1|1 (the bit is 1 if the value is non-null).
The second buffer contains all the above values. As the fourth entry is null, the value at that position
in the buffer is undefined. Note compared with set API, setSafe API would check value capacity before setting
values and reallocate buffers if necessary.</p>
<p>Here is how to build a vector using writer</p>
<div class="highlight-Java notranslate"><div class="highlight"><pre><span></span><span class="k">try</span><span class="w"> </span><span class="p">(</span><span class="n">BigIntVector</span><span class="w"> </span><span class="n">vector</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="k">new</span><span class="w"> </span><span class="n">BigIntVector</span><span class="p">(</span><span class="s">&quot;vector&quot;</span><span class="p">,</span><span class="w"> </span><span class="n">allocator</span><span class="p">);</span>
<span class="w"> </span><span class="n">BigIntWriter</span><span class="w"> </span><span class="n">writer</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="k">new</span><span class="w"> </span><span class="n">BigIntWriterImpl</span><span class="p">(</span><span class="n">vector</span><span class="p">))</span><span class="w"> </span><span class="p">{</span>
<span class="w"> </span><span class="n">writer</span><span class="p">.</span><span class="na">setPosition</span><span class="p">(</span><span class="mi">0</span><span class="p">);</span>
<span class="w"> </span><span class="n">writer</span><span class="p">.</span><span class="na">writeBigInt</span><span class="p">(</span><span class="mi">1</span><span class="p">);</span>
<span class="w"> </span><span class="n">writer</span><span class="p">.</span><span class="na">setPosition</span><span class="p">(</span><span class="mi">1</span><span class="p">);</span>
<span class="w"> </span><span class="n">writer</span><span class="p">.</span><span class="na">writeBigInt</span><span class="p">(</span><span class="mi">2</span><span class="p">);</span>
<span class="w"> </span><span class="n">writer</span><span class="p">.</span><span class="na">setPosition</span><span class="p">(</span><span class="mi">2</span><span class="p">);</span>
<span class="w"> </span><span class="n">writer</span><span class="p">.</span><span class="na">writeBigInt</span><span class="p">(</span><span class="mi">3</span><span class="p">);</span>
<span class="w"> </span><span class="c1">// writer.setPosition(3) is not called which means the fourth value is null.</span>
<span class="w"> </span><span class="n">writer</span><span class="p">.</span><span class="na">setPosition</span><span class="p">(</span><span class="mi">4</span><span class="p">);</span>
<span class="w"> </span><span class="n">writer</span><span class="p">.</span><span class="na">writeBigInt</span><span class="p">(</span><span class="mi">5</span><span class="p">);</span>
<span class="w"> </span><span class="n">writer</span><span class="p">.</span><span class="na">setPosition</span><span class="p">(</span><span class="mi">5</span><span class="p">);</span>
<span class="w"> </span><span class="n">writer</span><span class="p">.</span><span class="na">writeBigInt</span><span class="p">(</span><span class="mi">6</span><span class="p">);</span>
<span class="w"> </span><span class="n">writer</span><span class="p">.</span><span class="na">setPosition</span><span class="p">(</span><span class="mi">6</span><span class="p">);</span>
<span class="w"> </span><span class="n">writer</span><span class="p">.</span><span class="na">writeBigInt</span><span class="p">(</span><span class="mi">7</span><span class="p">);</span>
<span class="w"> </span><span class="n">writer</span><span class="p">.</span><span class="na">setPosition</span><span class="p">(</span><span class="mi">7</span><span class="p">);</span>
<span class="w"> </span><span class="n">writer</span><span class="p">.</span><span class="na">writeBigInt</span><span class="p">(</span><span class="mi">8</span><span class="p">);</span>
<span class="p">}</span>
</pre></div>
</div>
<p>There are get API and concrete subclasses of <code class="xref py py-class docutils literal notranslate"><span class="pre">FieldReader</span></code> for accessing vector values, what needs
to be declared is that writer/reader is not as efficient as direct access</p>
<div class="highlight-Java notranslate"><div class="highlight"><pre><span></span><span class="c1">// access via get API</span>
<span class="k">for</span><span class="w"> </span><span class="p">(</span><span class="kt">int</span><span class="w"> </span><span class="n">i</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="mi">0</span><span class="p">;</span><span class="w"> </span><span class="n">i</span><span class="w"> </span><span class="o">&lt;</span><span class="w"> </span><span class="n">vector</span><span class="p">.</span><span class="na">getValueCount</span><span class="p">();</span><span class="w"> </span><span class="n">i</span><span class="o">++</span><span class="p">)</span><span class="w"> </span><span class="p">{</span>
<span class="w"> </span><span class="k">if</span><span class="w"> </span><span class="p">(</span><span class="o">!</span><span class="n">vector</span><span class="p">.</span><span class="na">isNull</span><span class="p">(</span><span class="n">i</span><span class="p">))</span><span class="w"> </span><span class="p">{</span>
<span class="w"> </span><span class="n">System</span><span class="p">.</span><span class="na">out</span><span class="p">.</span><span class="na">println</span><span class="p">(</span><span class="n">vector</span><span class="p">.</span><span class="na">get</span><span class="p">(</span><span class="n">i</span><span class="p">));</span>
<span class="w"> </span><span class="p">}</span>
<span class="p">}</span>
<span class="c1">// access via reader</span>
<span class="n">BigIntReader</span><span class="w"> </span><span class="n">reader</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">vector</span><span class="p">.</span><span class="na">getReader</span><span class="p">();</span>
<span class="k">for</span><span class="w"> </span><span class="p">(</span><span class="kt">int</span><span class="w"> </span><span class="n">i</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="mi">0</span><span class="p">;</span><span class="w"> </span><span class="n">i</span><span class="w"> </span><span class="o">&lt;</span><span class="w"> </span><span class="n">vector</span><span class="p">.</span><span class="na">getValueCount</span><span class="p">();</span><span class="w"> </span><span class="n">i</span><span class="o">++</span><span class="p">)</span><span class="w"> </span><span class="p">{</span>
<span class="w"> </span><span class="n">reader</span><span class="p">.</span><span class="na">setPosition</span><span class="p">(</span><span class="n">i</span><span class="p">);</span>
<span class="w"> </span><span class="k">if</span><span class="w"> </span><span class="p">(</span><span class="n">reader</span><span class="p">.</span><span class="na">isSet</span><span class="p">())</span><span class="w"> </span><span class="p">{</span>
<span class="w"> </span><span class="n">System</span><span class="p">.</span><span class="na">out</span><span class="p">.</span><span class="na">println</span><span class="p">(</span><span class="n">reader</span><span class="p">.</span><span class="na">readLong</span><span class="p">());</span>
<span class="w"> </span><span class="p">}</span>
<span class="p">}</span>
</pre></div>
</div>
</section>
<section id="building-listvector">
<h2>Building ListVector<a class="headerlink" href="#building-listvector" title="Permalink to this heading">#</a></h2>
<p>A <code class="xref py py-class docutils literal notranslate"><span class="pre">ListVector</span></code> is a vector that holds a list of values for each index. Working with one you need to handle the same steps as mentioned above (create &gt; allocate &gt; mutate &gt; set value count &gt; access &gt; clear), but the details of how you accomplish this are slightly different since you need to both create the vector and set the list of values for each index.</p>
<p>For example, the code below shows how to build a <code class="xref py py-class docutils literal notranslate"><span class="pre">ListVector</span></code> of int’s using the writer <code class="xref py py-class docutils literal notranslate"><span class="pre">UnionListWriter</span></code>. We build a vector from 0 to 9 and each index contains a list with values [[0, 0, 0, 0, 0], [0, 1, 2, 3, 4], [0, 2, 4, 6, 8], …, [0, 9, 18, 27, 36]]. List values can be added in any order so writing a list such as [3, 1, 2] would be just as valid.</p>
<div class="highlight-Java notranslate"><div class="highlight"><pre><span></span><span class="k">try</span><span class="w"> </span><span class="p">(</span><span class="n">BufferAllocator</span><span class="w"> </span><span class="n">allocator</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="k">new</span><span class="w"> </span><span class="n">RootAllocator</span><span class="p">(</span><span class="n">Long</span><span class="p">.</span><span class="na">MAX_VALUE</span><span class="p">);</span>
<span class="w"> </span><span class="n">ListVector</span><span class="w"> </span><span class="n">listVector</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">ListVector</span><span class="p">.</span><span class="na">empty</span><span class="p">(</span><span class="s">&quot;vector&quot;</span><span class="p">,</span><span class="w"> </span><span class="n">allocator</span><span class="p">))</span><span class="w"> </span><span class="p">{</span>
<span class="w"> </span><span class="n">UnionListWriter</span><span class="w"> </span><span class="n">writer</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">listVector</span><span class="p">.</span><span class="na">getWriter</span><span class="p">();</span>
<span class="w"> </span><span class="k">for</span><span class="w"> </span><span class="p">(</span><span class="kt">int</span><span class="w"> </span><span class="n">i</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="mi">0</span><span class="p">;</span><span class="w"> </span><span class="n">i</span><span class="w"> </span><span class="o">&lt;</span><span class="w"> </span><span class="mi">10</span><span class="p">;</span><span class="w"> </span><span class="n">i</span><span class="o">++</span><span class="p">)</span><span class="w"> </span><span class="p">{</span>
<span class="w"> </span><span class="n">writer</span><span class="p">.</span><span class="na">startList</span><span class="p">();</span>
<span class="w"> </span><span class="n">writer</span><span class="p">.</span><span class="na">setPosition</span><span class="p">(</span><span class="n">i</span><span class="p">);</span>
<span class="w"> </span><span class="k">for</span><span class="w"> </span><span class="p">(</span><span class="kt">int</span><span class="w"> </span><span class="n">j</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="mi">0</span><span class="p">;</span><span class="w"> </span><span class="n">j</span><span class="w"> </span><span class="o">&lt;</span><span class="w"> </span><span class="mi">5</span><span class="p">;</span><span class="w"> </span><span class="n">j</span><span class="o">++</span><span class="p">)</span><span class="w"> </span><span class="p">{</span>
<span class="w"> </span><span class="n">writer</span><span class="p">.</span><span class="na">writeInt</span><span class="p">(</span><span class="n">j</span><span class="w"> </span><span class="o">*</span><span class="w"> </span><span class="n">i</span><span class="p">);</span>
<span class="w"> </span><span class="p">}</span>
<span class="w"> </span><span class="n">writer</span><span class="p">.</span><span class="na">setValueCount</span><span class="p">(</span><span class="mi">5</span><span class="p">);</span>
<span class="w"> </span><span class="n">writer</span><span class="p">.</span><span class="na">endList</span><span class="p">();</span>
<span class="w"> </span><span class="p">}</span>
<span class="w"> </span><span class="n">listVector</span><span class="p">.</span><span class="na">setValueCount</span><span class="p">(</span><span class="mi">10</span><span class="p">);</span>
<span class="p">}</span>
</pre></div>
</div>
<p><code class="xref py py-class docutils literal notranslate"><span class="pre">ListVector</span></code> values can be accessed either through the get API or through the reader class <code class="xref py py-class docutils literal notranslate"><span class="pre">UnionListReader</span></code>. To read all the values, first enumerate through the indexes, and then enumerate through the inner list values.</p>
<div class="highlight-Java notranslate"><div class="highlight"><pre><span></span><span class="c1">// access via get API</span>
<span class="k">for</span><span class="w"> </span><span class="p">(</span><span class="kt">int</span><span class="w"> </span><span class="n">i</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="mi">0</span><span class="p">;</span><span class="w"> </span><span class="n">i</span><span class="w"> </span><span class="o">&lt;</span><span class="w"> </span><span class="n">listVector</span><span class="p">.</span><span class="na">getValueCount</span><span class="p">();</span><span class="w"> </span><span class="n">i</span><span class="o">++</span><span class="p">)</span><span class="w"> </span><span class="p">{</span>
<span class="w"> </span><span class="k">if</span><span class="w"> </span><span class="p">(</span><span class="o">!</span><span class="n">listVector</span><span class="p">.</span><span class="na">isNull</span><span class="p">(</span><span class="n">i</span><span class="p">))</span><span class="w"> </span><span class="p">{</span>
<span class="w"> </span><span class="n">ArrayList</span><span class="o">&lt;</span><span class="n">Integer</span><span class="o">&gt;</span><span class="w"> </span><span class="n">elements</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="p">(</span><span class="n">ArrayList</span><span class="o">&lt;</span><span class="n">Integer</span><span class="o">&gt;</span><span class="p">)</span><span class="w"> </span><span class="n">listVector</span><span class="p">.</span><span class="na">getObject</span><span class="p">(</span><span class="n">i</span><span class="p">);</span>
<span class="w"> </span><span class="k">for</span><span class="w"> </span><span class="p">(</span><span class="n">Integer</span><span class="w"> </span><span class="n">element</span><span class="w"> </span><span class="p">:</span><span class="w"> </span><span class="n">elements</span><span class="p">)</span><span class="w"> </span><span class="p">{</span>
<span class="w"> </span><span class="n">System</span><span class="p">.</span><span class="na">out</span><span class="p">.</span><span class="na">println</span><span class="p">(</span><span class="n">element</span><span class="p">);</span>
<span class="w"> </span><span class="p">}</span>
<span class="w"> </span><span class="p">}</span>
<span class="p">}</span>
<span class="c1">// access via reader</span>
<span class="n">UnionListReader</span><span class="w"> </span><span class="n">reader</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">listVector</span><span class="p">.</span><span class="na">getReader</span><span class="p">();</span>
<span class="k">for</span><span class="w"> </span><span class="p">(</span><span class="kt">int</span><span class="w"> </span><span class="n">i</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="mi">0</span><span class="p">;</span><span class="w"> </span><span class="n">i</span><span class="w"> </span><span class="o">&lt;</span><span class="w"> </span><span class="n">listVector</span><span class="p">.</span><span class="na">getValueCount</span><span class="p">();</span><span class="w"> </span><span class="n">i</span><span class="o">++</span><span class="p">)</span><span class="w"> </span><span class="p">{</span>
<span class="w"> </span><span class="n">reader</span><span class="p">.</span><span class="na">setPosition</span><span class="p">(</span><span class="n">i</span><span class="p">);</span>
<span class="w"> </span><span class="k">while</span><span class="w"> </span><span class="p">(</span><span class="n">reader</span><span class="p">.</span><span class="na">next</span><span class="p">())</span><span class="w"> </span><span class="p">{</span>
<span class="w"> </span><span class="n">IntReader</span><span class="w"> </span><span class="n">intReader</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">reader</span><span class="p">.</span><span class="na">reader</span><span class="p">();</span>
<span class="w"> </span><span class="k">if</span><span class="w"> </span><span class="p">(</span><span class="n">intReader</span><span class="p">.</span><span class="na">isSet</span><span class="p">())</span><span class="w"> </span><span class="p">{</span>
<span class="w"> </span><span class="n">System</span><span class="p">.</span><span class="na">out</span><span class="p">.</span><span class="na">println</span><span class="p">(</span><span class="n">intReader</span><span class="p">.</span><span class="na">readInteger</span><span class="p">());</span>
<span class="w"> </span><span class="p">}</span>
<span class="w"> </span><span class="p">}</span>
<span class="p">}</span>
</pre></div>
</div>
</section>
<section id="dictionary-encoding">
<h2>Dictionary Encoding<a class="headerlink" href="#dictionary-encoding" title="Permalink to this heading">#</a></h2>
<p>Dictionary encoding is a form of compression where values of one type are replaced by values of a smaller type: an array of ints replacing an array of strings is a common example. The mapping between the original values and the replacements is held in a ‘dictionary’. Since the dictionary needs only one copy of each of the longer values, the combination of the dictionary and the array of smaller values may use less memory. The more repetitive the original data, the greater the savings.</p>
<p>A <code class="docutils literal notranslate"><span class="pre">FieldVector</span></code> can be dictionary encoded for performance or improved memory efficiency. Nearly any type of vector might be encoded if there are many values, but few unique values.</p>
<p>There are a few steps involved in the encoding process:</p>
<ol class="arabic simple">
<li><p>Create a regular, un-encoded vector and populate it</p></li>
<li><p>Create a dictionary vector of the same type as the un-encoded vector. This vector must have the same values, but each unique value in the un-encoded vector need appear here only once.</p></li>
<li><p>Create a <code class="docutils literal notranslate"><span class="pre">Dictionary</span></code>. It will contain the dictionary vector, plus a <code class="docutils literal notranslate"><span class="pre">DictionaryEncoding</span></code> object that holds the encoding’s metadata and settings values.</p></li>
<li><p>Create a <code class="docutils literal notranslate"><span class="pre">DictionaryEncoder</span></code>.</p></li>
<li><p>Call the encode() method on the <code class="docutils literal notranslate"><span class="pre">DictionaryEncoder</span></code> to produce an encoded version of the original vector.</p></li>
<li><p>(Optional) Call the decode() method on the encoded vector to re-create the original values.</p></li>
</ol>
<p>The encoded values will be integers. Depending on how many unique values you have, you can use <code class="docutils literal notranslate"><span class="pre">TinyIntVector</span></code>, <code class="docutils literal notranslate"><span class="pre">SmallIntVector</span></code>, <code class="docutils literal notranslate"><span class="pre">IntVector</span></code>, or <code class="docutils literal notranslate"><span class="pre">BigIntVector</span></code> to hold them. You specify the type when you create your <code class="docutils literal notranslate"><span class="pre">DictionaryEncoding</span></code> instance. You might wonder where those integers come from: the dictionary vector is a regular vector, so the value’s index position in that vector is used as its encoded value.</p>
<p>Another critical attribute in <code class="docutils literal notranslate"><span class="pre">DictionaryEncoding</span></code> is the id. It’s important to understand how the id is used, so we cover that later in this section.</p>
<p>This result will be a new vector (for example, an <code class="docutils literal notranslate"><span class="pre">IntVector</span></code>) that can act in place of the original vector (for example, a <code class="docutils literal notranslate"><span class="pre">VarCharVector</span></code>). When you write the data in arrow format, it is both the new <code class="docutils literal notranslate"><span class="pre">IntVector</span></code> plus the dictionary that is written: you will need the dictionary later to retrieve the original values.</p>
<div class="highlight-Java notranslate"><div class="highlight"><pre><span></span><span class="c1">// 1. create a vector for the un-encoded data and populate it</span>
<span class="n">VarCharVector</span><span class="w"> </span><span class="n">unencoded</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="k">new</span><span class="w"> </span><span class="n">VarCharVector</span><span class="p">(</span><span class="s">&quot;unencoded&quot;</span><span class="p">,</span><span class="w"> </span><span class="n">allocator</span><span class="p">);</span>
<span class="c1">// now put some data in it before continuing</span>
<span class="c1">// 2. create a vector to hold the dictionary and populate it</span>
<span class="n">VarCharVector</span><span class="w"> </span><span class="n">dictionaryVector</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="k">new</span><span class="w"> </span><span class="n">VarCharVector</span><span class="p">(</span><span class="s">&quot;dictionary&quot;</span><span class="p">,</span><span class="w"> </span><span class="n">allocator</span><span class="p">);</span>
<span class="c1">// 3. create a dictionary object</span>
<span class="n">Dictionary</span><span class="w"> </span><span class="n">dictionary</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="k">new</span><span class="w"> </span><span class="n">Dictionary</span><span class="p">(</span><span class="n">dictionaryVector</span><span class="p">,</span><span class="w"> </span><span class="k">new</span><span class="w"> </span><span class="n">DictionaryEncoding</span><span class="p">(</span><span class="mi">1L</span><span class="p">,</span><span class="w"> </span><span class="kc">false</span><span class="p">,</span><span class="w"> </span><span class="kc">null</span><span class="p">));</span>
<span class="c1">// 4. create a dictionary encoder</span>
<span class="n">DictionaryEncoder</span><span class="w"> </span><span class="n">encoder</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="k">new</span><span class="w"> </span><span class="n">DictionaryEncoder</span><span class="p">.</span><span class="na">encode</span><span class="p">(</span><span class="n">dictionary</span><span class="p">,</span><span class="w"> </span><span class="n">allocator</span><span class="p">);</span>
<span class="c1">// 5. encode the data</span>
<span class="n">IntVector</span><span class="w"> </span><span class="n">encoded</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="p">(</span><span class="n">IntVector</span><span class="p">)</span><span class="w"> </span><span class="n">encoder</span><span class="p">.</span><span class="na">encode</span><span class="p">(</span><span class="n">unencoded</span><span class="p">);</span>
<span class="c1">// 6. re-create an un-encoded version from the encoded vector</span>
<span class="n">VarCharVector</span><span class="w"> </span><span class="n">decoded</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="p">(</span><span class="n">VarCharVector</span><span class="p">)</span><span class="w"> </span><span class="n">encoder</span><span class="p">.</span><span class="na">decode</span><span class="p">(</span><span class="n">encoded</span><span class="p">);</span>
</pre></div>
</div>
<p>One thing we haven’t discussed is how to create the dictionary vector from the original un-encoded values. That is left to the library user since a custom method will likely be more efficient than a general utility. Since the dictionary vector is just a normal vector, you can populate its values with the standard APIs.</p>
<p>Finally, you can package a number of dictionaries together, which is useful if you’re working with a <code class="docutils literal notranslate"><span class="pre">VectorSchemaRoot</span></code> with several dictionary-encoded vectors. This is done using an object called a <code class="docutils literal notranslate"><span class="pre">DictionaryProvider</span></code>. as shown in the example below. Note that we don’t put the dictionary vectors in the same <code class="docutils literal notranslate"><span class="pre">VectorSchemaRoot</span></code> as the data vectors, as they will generally have fewer values.</p>
<div class="highlight-Java notranslate"><div class="highlight"><pre><span></span><span class="n">DictionaryProvider</span><span class="p">.</span><span class="na">MapDictionaryProvider</span><span class="w"> </span><span class="n">provider</span><span class="w"> </span><span class="o">=</span>
<span class="w"> </span><span class="k">new</span><span class="w"> </span><span class="n">DictionaryProvider</span><span class="p">.</span><span class="na">MapDictionaryProvider</span><span class="p">();</span>
<span class="n">provider</span><span class="p">.</span><span class="na">put</span><span class="p">(</span><span class="n">dictionary</span><span class="p">);</span>
</pre></div>
</div>
<p>The <code class="docutils literal notranslate"><span class="pre">DictionaryProvider</span></code> is simply a map of identifiers to <code class="docutils literal notranslate"><span class="pre">Dictionary</span></code> objects, where each identifier is a long value. In the above code you will see it as the first argument to the <code class="docutils literal notranslate"><span class="pre">DictionaryEncoding</span></code> constructor.</p>
<p>This is where the <code class="docutils literal notranslate"><span class="pre">DictionaryEncoding</span></code>’s ‘id’ attribute comes in. This value is used to connect dictionaries to instances of <code class="docutils literal notranslate"><span class="pre">VectorSchemaRoot</span></code>, using a <code class="docutils literal notranslate"><span class="pre">DictionaryProvider</span></code>. Here’s how that works:</p>
<ul class="simple">
<li><p>The <code class="docutils literal notranslate"><span class="pre">VectorSchemaRoot</span></code> has a <code class="docutils literal notranslate"><span class="pre">Schema</span></code> object containing a list of <code class="docutils literal notranslate"><span class="pre">Field</span></code> objects.</p></li>
<li><p>The field has an attribute called ‘dictionary’, but it holds a <code class="docutils literal notranslate"><span class="pre">DictionaryEncoding</span></code> rather than a <code class="docutils literal notranslate"><span class="pre">Dictionary</span></code></p></li>
<li><p>As mentioned, the <code class="docutils literal notranslate"><span class="pre">DictionaryProvider</span></code> holds dictionaries indexed by a long value. This value is the id from your <code class="docutils literal notranslate"><span class="pre">DictionaryEncoding</span></code>.</p></li>
<li><p>To retrieve the dictionary for a vector in a <code class="docutils literal notranslate"><span class="pre">VectorSchemaRoot</span></code>, you get the field associated with the vector, get its dictionary attribute, and use that object’s id to look up the correct dictionary in the provider.</p></li>
</ul>
<div class="highlight-Java notranslate"><div class="highlight"><pre><span></span><span class="c1">// create the encoded vector, the Dictionary and DictionaryProvider as discussed above</span>
<span class="c1">// Create a VectorSchemaRoot with one encoded vector</span>
<span class="n">VectorSchemaRoot</span><span class="w"> </span><span class="n">vsr</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="k">new</span><span class="w"> </span><span class="n">VectorSchemaRoot</span><span class="p">(</span><span class="n">List</span><span class="p">.</span><span class="na">of</span><span class="p">(</span><span class="n">encoded</span><span class="p">));</span>
<span class="c1">// now we want to decode our vector, so we retrieve its dictionary from the provider</span>
<span class="n">Field</span><span class="w"> </span><span class="n">f</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">vsr</span><span class="p">.</span><span class="na">getField</span><span class="p">(</span><span class="n">encoded</span><span class="p">.</span><span class="na">getName</span><span class="p">());</span>
<span class="n">DictionaryEncoding</span><span class="w"> </span><span class="n">encoding</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">f</span><span class="p">.</span><span class="na">getDictionary</span><span class="p">();</span>
<span class="n">Dictionary</span><span class="w"> </span><span class="n">dictionary</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">provider</span><span class="p">.</span><span class="na">lookup</span><span class="p">(</span><span class="n">encoding</span><span class="p">.</span><span class="na">getId</span><span class="p">());</span>
</pre></div>
</div>
<p>As you can see, a <code class="docutils literal notranslate"><span class="pre">DictionaryProvider</span></code> is handy for managing the dictionaries associated with a <code class="docutils literal notranslate"><span class="pre">VectorSchemaRoot</span></code>. More importantly, it helps package the dictionaries for a <code class="docutils literal notranslate"><span class="pre">VectorSchemaRoot</span></code> when it’s written. The classes <code class="docutils literal notranslate"><span class="pre">ArrowFileWriter</span></code> and <code class="docutils literal notranslate"><span class="pre">ArrowStreamWriter</span></code> both accept an optional <code class="docutils literal notranslate"><span class="pre">DictionaryProvider</span></code> argument for that purpose. You can find example code for writing dictionaries in the documentation for (<a class="reference internal" href="ipc.html"><span class="doc">Reading/Writing IPC formats</span></a>). <code class="docutils literal notranslate"><span class="pre">ArrowReader</span></code> and its subclasses also implement the <code class="docutils literal notranslate"><span class="pre">DictionaryProvider</span></code> interface, so you can retrieve the actual dictionaries when reading a file.</p>
</section>
<section id="slicing">
<h2>Slicing<a class="headerlink" href="#slicing" title="Permalink to this heading">#</a></h2>
<p>Similar with C++ implementation, it is possible to make zero-copy slices of vectors to obtain a vector
referring to some logical sub-sequence of the data through <code class="xref py py-class docutils literal notranslate"><span class="pre">TransferPair</span></code></p>
<div class="highlight-Java notranslate"><div class="highlight"><pre><span></span><span class="n">IntVector</span><span class="w"> </span><span class="n">vector</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="k">new</span><span class="w"> </span><span class="n">IntVector</span><span class="p">(</span><span class="s">&quot;intVector&quot;</span><span class="p">,</span><span class="w"> </span><span class="n">allocator</span><span class="p">);</span>
<span class="k">for</span><span class="w"> </span><span class="p">(</span><span class="kt">int</span><span class="w"> </span><span class="n">i</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="mi">0</span><span class="p">;</span><span class="w"> </span><span class="n">i</span><span class="w"> </span><span class="o">&lt;</span><span class="w"> </span><span class="mi">10</span><span class="p">;</span><span class="w"> </span><span class="n">i</span><span class="o">++</span><span class="p">)</span><span class="w"> </span><span class="p">{</span>
<span class="w"> </span><span class="n">vector</span><span class="p">.</span><span class="na">setSafe</span><span class="p">(</span><span class="n">i</span><span class="p">,</span><span class="w"> </span><span class="n">i</span><span class="p">);</span>
<span class="p">}</span>
<span class="n">vector</span><span class="p">.</span><span class="na">setValueCount</span><span class="p">(</span><span class="mi">10</span><span class="p">);</span>
<span class="n">TransferPair</span><span class="w"> </span><span class="n">tp</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">vector</span><span class="p">.</span><span class="na">getTransferPair</span><span class="p">(</span><span class="n">allocator</span><span class="p">);</span>
<span class="n">tp</span><span class="p">.</span><span class="na">splitAndTransfer</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span><span class="w"> </span><span class="mi">5</span><span class="p">);</span>
<span class="n">IntVector</span><span class="w"> </span><span class="n">sliced</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="p">(</span><span class="n">IntVector</span><span class="p">)</span><span class="w"> </span><span class="n">tp</span><span class="p">.</span><span class="na">getTo</span><span class="p">();</span>
<span class="c1">// In this case, the vector values are [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] and the sliceVector values are [0, 1, 2, 3, 4].</span>
</pre></div>
</div>
</section>
</section>
</article>
<footer class="prev-next-footer">
<div class="prev-next-area">
<a class="left-prev"
href="memory.html"
title="previous page">
<i class="fa-solid fa-angle-left"></i>
<div class="prev-next-info">
<p class="prev-next-subtitle">previous</p>
<p class="prev-next-title">Memory Management</p>
</div>
</a>
<a class="right-next"
href="vector_schema_root.html"
title="next page">
<div class="prev-next-info">
<p class="prev-next-subtitle">next</p>
<p class="prev-next-title">Tabular Data</p>
</div>
<i class="fa-solid fa-angle-right"></i>
</a>
</div>
</footer>
</div>
<div class="bd-sidebar-secondary bd-toc"><div class="sidebar-secondary-items sidebar-secondary__inner">
<div class="sidebar-secondary-item">
<div
id="pst-page-navigation-heading-2"
class="page-toc tocsection onthispage">
<i class="fa-solid fa-list"></i> On this page
</div>
<nav class="bd-toc-nav page-toc" aria-labelledby="pst-page-navigation-heading-2">
<ul class="visible nav section-nav flex-column">
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#vector-life-cycle">Vector Life Cycle</a></li>
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#building-valuevector">Building ValueVector</a></li>
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#building-listvector">Building ListVector</a></li>
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#dictionary-encoding">Dictionary Encoding</a></li>
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#slicing">Slicing</a></li>
</ul>
</nav></div>
<div class="sidebar-secondary-item">
<div class="tocsection editthispage">
<a href="https://github.com/apache/arrow/edit/main/docs/source/java/vector.rst">
<i class="fa-solid fa-pencil"></i>
Edit on GitHub
</a>
</div>
</div>
</div></div>
</div>
<footer class="bd-footer-content">
</footer>
</main>
</div>
</div>
<!-- Scripts loaded after <body> so the DOM is not blocked -->
<script src="../_static/scripts/bootstrap.js?digest=8d27b9dea8ad943066ae"></script>
<script src="../_static/scripts/pydata-sphinx-theme.js?digest=8d27b9dea8ad943066ae"></script>
<footer class="bd-footer">
<div class="bd-footer__inner bd-page-width">
<div class="footer-items__start">
<div class="footer-item">
<p class="copyright">
© Copyright 2016-2024 Apache Software Foundation.
Apache Arrow, Arrow, Apache, the Apache feather logo, and the Apache Arrow project logo are either registered trademarks or trademarks of The Apache Software Foundation in the United States and other countries.
<br/>
</p>
</div>
<div class="footer-item">
<p class="sphinx-version">
Created using <a href="https://www.sphinx-doc.org/">Sphinx</a> 6.2.0.
<br/>
</p>
</div>
</div>
<div class="footer-items__end">
<div class="footer-item">
<p class="theme-version">
Built with the <a href="https://pydata-sphinx-theme.readthedocs.io/en/stable/index.html">PyData Sphinx Theme</a> 0.15.2.
</p></div>
</div>
</div>
</footer>
</body>
</html>