blob: 9c7355fffe46e1b9a6d5230f7af659aabb1e7de8 [file] [log] [blame]
<!DOCTYPE html>
<html lang="en" data-content_root="" >
<head>
<meta charset="utf-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="generator" content="Docutils 0.19: https://docutils.sourceforge.io/" />
<title>Acero User’s Guide &#8212; Apache Arrow v17.0.0.dev59</title>
<script data-cfasync="false">
document.documentElement.dataset.mode = localStorage.getItem("mode") || "";
document.documentElement.dataset.theme = localStorage.getItem("theme") || "light";
</script>
<!-- Loaded before other Sphinx assets -->
<link href="../../_static/styles/theme.css?digest=8d27b9dea8ad943066ae" rel="stylesheet" />
<link href="../../_static/styles/bootstrap.css?digest=8d27b9dea8ad943066ae" rel="stylesheet" />
<link href="../../_static/styles/pydata-sphinx-theme.css?digest=8d27b9dea8ad943066ae" rel="stylesheet" />
<link href="../../_static/vendor/fontawesome/6.5.1/css/all.min.css?digest=8d27b9dea8ad943066ae" rel="stylesheet" />
<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.1/webfonts/fa-solid-900.woff2" />
<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.1/webfonts/fa-brands-400.woff2" />
<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.1/webfonts/fa-regular-400.woff2" />
<link rel="stylesheet" type="text/css" href="../../_static/pygments.css" />
<link rel="stylesheet" type="text/css" href="../../_static/copybutton.css" />
<link rel="stylesheet" type="text/css" href="../../_static/design-style.1e8bd061cd6da7fc9cf755528e8ffc24.min.css" />
<link rel="stylesheet" type="text/css" href="../../_static/theme_overrides.css" />
<!-- Pre-loaded scripts that we'll load fully later -->
<link rel="preload" as="script" href="../../_static/scripts/bootstrap.js?digest=8d27b9dea8ad943066ae" />
<link rel="preload" as="script" href="../../_static/scripts/pydata-sphinx-theme.js?digest=8d27b9dea8ad943066ae" />
<script src="../../_static/vendor/fontawesome/6.5.1/js/all.min.js?digest=8d27b9dea8ad943066ae"></script>
<script data-url_root="../../" id="documentation_options" src="../../_static/documentation_options.js"></script>
<script src="../../_static/doctools.js"></script>
<script src="../../_static/sphinx_highlight.js"></script>
<script src="../../_static/clipboard.min.js"></script>
<script src="../../_static/copybutton.js"></script>
<script src="../../_static/design-tabs.js"></script>
<script>DOCUMENTATION_OPTIONS.pagename = 'cpp/acero/user_guide';</script>
<script>
DOCUMENTATION_OPTIONS.theme_version = '0.15.2';
DOCUMENTATION_OPTIONS.theme_switcher_json_url = '/docs/_static/versions.json';
DOCUMENTATION_OPTIONS.theme_switcher_version_match = 'dev/';
DOCUMENTATION_OPTIONS.show_version_warning_banner = true;
</script>
<link rel="canonical" href="https://arrow.apache.org/docs/cpp/acero/user_guide.html" />
<link rel="icon" href="../../_static/favicon.ico"/>
<link rel="index" title="Index" href="../../genindex.html" />
<link rel="search" title="Search" href="../../search.html" />
<link rel="next" title="Using Acero with Substrait" href="substrait.html" />
<link rel="prev" title="Acero Overview" href="overview.html" />
<meta name="viewport" content="width=device-width, initial-scale=1"/>
<meta name="docsearch:language" content="en"/>
<!-- Matomo -->
<script>
var _paq = window._paq = window._paq || [];
/* tracker methods like "setCustomDimension" should be called before "trackPageView" */
/* We explicitly disable cookie tracking to avoid privacy issues */
_paq.push(['disableCookies']);
_paq.push(['trackPageView']);
_paq.push(['enableLinkTracking']);
(function() {
var u="https://analytics.apache.org/";
_paq.push(['setTrackerUrl', u+'matomo.php']);
_paq.push(['setSiteId', '20']);
var d=document, g=d.createElement('script'), s=d.getElementsByTagName('script')[0];
g.async=true; g.src=u+'matomo.js'; s.parentNode.insertBefore(g,s);
})();
</script>
<!-- End Matomo Code -->
</head>
<body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
<a id="pst-skip-link" class="skip-link" href="#main-content">Skip to main content</a>
<div id="pst-scroll-pixel-helper"></div>
<button type="button" class="btn rounded-pill" id="pst-back-to-top">
<i class="fa-solid fa-arrow-up"></i>
Back to top
</button>
<input type="checkbox"
class="sidebar-toggle"
name="__primary"
id="__primary"/>
<label class="overlay overlay-primary" for="__primary"></label>
<input type="checkbox"
class="sidebar-toggle"
name="__secondary"
id="__secondary"/>
<label class="overlay overlay-secondary" for="__secondary"></label>
<div class="search-button__wrapper">
<div class="search-button__overlay"></div>
<div class="search-button__search-container">
<form class="bd-search d-flex align-items-center"
action="../../search.html"
method="get">
<i class="fa-solid fa-magnifying-glass"></i>
<input type="search"
class="form-control"
name="q"
id="search-input"
placeholder="Search the docs ..."
aria-label="Search the docs ..."
autocomplete="off"
autocorrect="off"
autocapitalize="off"
spellcheck="false"/>
<span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd>K</kbd></span>
</form></div>
</div>
<header class="bd-header navbar navbar-expand-lg bd-navbar">
<div class="bd-header__inner bd-page-width">
<label class="sidebar-toggle primary-toggle" for="__primary">
<span class="fa-solid fa-bars"></span>
</label>
<div class="col-lg-3 navbar-header-items__start">
<div class="navbar-item">
<a class="navbar-brand logo" href="../../index.html">
<img src="../../_static/arrow.png" class="logo__image only-light" alt="Apache Arrow v17.0.0.dev59 - Home"/>
<script>document.write(`<img src="../../_static/arrow-dark.png" class="logo__image only-dark" alt="Apache Arrow v17.0.0.dev59 - Home"/>`);</script>
</a></div>
</div>
<div class="col-lg-9 navbar-header-items">
<div class="me-auto navbar-header-items__center">
<div class="navbar-item">
<nav class="navbar-nav">
<ul class="bd-navbar-elements navbar-nav">
<li class="nav-item">
<a class="nav-link nav-internal" href="../../format/index.html">
Specifications
</a>
</li>
<li class="nav-item">
<a class="nav-link nav-internal" href="../../developers/index.html">
Development
</a>
</li>
<li class="nav-item dropdown">
<button class="btn dropdown-toggle nav-item" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-controls="pst-nav-more-links">
Implementations
</button>
<ul id="pst-nav-more-links" class="dropdown-menu">
<li class="nav-item">
<a class="nav-link dropdown-item nav-internal" href="../../c_glib/index.html">
C/GLib
</a>
</li>
<li class="nav-item current active">
<a class="nav-link dropdown-item nav-internal" href="../index.html">
C++
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://github.com/apache/arrow/blob/main/csharp/README.md">
C#
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://pkg.go.dev/github.com/apache/arrow/go/v17">
Go
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-internal" href="../../java/index.html">
Java
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-internal" href="../../js/index.html">
JavaScript
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://arrow.apache.org/julia/">
Julia
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://github.com/apache/arrow/blob/main/matlab/README.md">
MATLAB
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://arrow.apache.org/nanoarrow/">
nanoarrow
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-internal" href="../../python/index.html">
Python
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-internal" href="../../r/index.html">
R
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://github.com/apache/arrow/blob/main/ruby/README.md">
Ruby
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://docs.rs/crate/arrow/">
Rust
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-internal" href="../../status.html">
Implementation Status
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://arrow.apache.org/cookbook/cpp/">
C++ cookbook
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://arrow.apache.org/cookbook/java/">
Java cookbook
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://arrow.apache.org/cookbook/py/">
Python cookbook
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://arrow.apache.org/cookbook/r/">
R cookbook
</a>
</li>
</ul>
</li>
</ul>
</nav></div>
</div>
<div class="navbar-header-items__end">
<div class="navbar-item navbar-persistent--container">
<script>
document.write(`
<button class="btn navbar-btn search-button-field search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
<i class="fa-solid fa-magnifying-glass"></i>
<span class="search-button__default-text">Search</span>
<span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd class="kbd-shortcut__modifier">K</kbd></span>
</button>
`);
</script>
</div>
<div class="navbar-item">
<script>
document.write(`
<div class="version-switcher__container dropdown">
<button id="pst-version-switcher-button-2"
type="button"
class="version-switcher__button btn btn-sm navbar-btn dropdown-toggle"
data-bs-toggle="dropdown"
aria-haspopup="listbox"
aria-controls="pst-version-switcher-list-2"
aria-label="Version switcher list"
>
Choose version <!-- this text may get changed later by javascript -->
<span class="caret"></span>
</button>
<div id="pst-version-switcher-list-2"
class="version-switcher__menu dropdown-menu list-group-flush py-0"
role="listbox" aria-labelledby="pst-version-switcher-button-2">
<!-- dropdown will be populated by javascript on page load -->
</div>
</div>
`);
</script></div>
<div class="navbar-item">
<script>
document.write(`
<button class="btn btn-sm navbar-btn theme-switch-button" title="light/dark" aria-label="light/dark" data-bs-placement="bottom" data-bs-toggle="tooltip">
<span class="theme-switch nav-link" data-mode="light"><i class="fa-solid fa-sun fa-lg"></i></span>
<span class="theme-switch nav-link" data-mode="dark"><i class="fa-solid fa-moon fa-lg"></i></span>
<span class="theme-switch nav-link" data-mode="auto"><i class="fa-solid fa-circle-half-stroke fa-lg"></i></span>
</button>
`);
</script></div>
<div class="navbar-item"><ul class="navbar-icon-links navbar-nav"
aria-label="Icon Links">
<li class="nav-item">
<a href="https://github.com/apache/arrow" title="GitHub" class="nav-link" rel="noopener" target="_blank" data-bs-toggle="tooltip" data-bs-placement="bottom"><span><i class="fa-brands fa-square-github fa-lg" aria-hidden="true"></i></span>
<span class="sr-only">GitHub</span></a>
</li>
<li class="nav-item">
<a href="https://twitter.com/ApacheArrow" title="X" class="nav-link" rel="noopener" target="_blank" data-bs-toggle="tooltip" data-bs-placement="bottom"><span><i class="fa-brands fa-square-x-twitter fa-lg" aria-hidden="true"></i></span>
<span class="sr-only">X</span></a>
</li>
</ul></div>
</div>
</div>
<div class="navbar-persistent--mobile">
<script>
document.write(`
<button class="btn navbar-btn search-button-field search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
<i class="fa-solid fa-magnifying-glass"></i>
<span class="search-button__default-text">Search</span>
<span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd class="kbd-shortcut__modifier">K</kbd></span>
</button>
`);
</script>
</div>
<label class="sidebar-toggle secondary-toggle" for="__secondary" tabindex="0">
<span class="fa-solid fa-outdent"></span>
</label>
</div>
</header>
<div class="bd-container">
<div class="bd-container__inner bd-page-width">
<div class="bd-sidebar-primary bd-sidebar">
<div class="sidebar-header-items sidebar-primary__section">
<div class="sidebar-header-items__center">
<div class="navbar-item">
<nav class="navbar-nav">
<ul class="bd-navbar-elements navbar-nav">
<li class="nav-item">
<a class="nav-link nav-internal" href="../../format/index.html">
Specifications
</a>
</li>
<li class="nav-item">
<a class="nav-link nav-internal" href="../../developers/index.html">
Development
</a>
</li>
<li class="nav-item dropdown">
<button class="btn dropdown-toggle nav-item" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-controls="pst-nav-more-links-2">
Implementations
</button>
<ul id="pst-nav-more-links-2" class="dropdown-menu">
<li class="nav-item">
<a class="nav-link dropdown-item nav-internal" href="../../c_glib/index.html">
C/GLib
</a>
</li>
<li class="nav-item current active">
<a class="nav-link dropdown-item nav-internal" href="../index.html">
C++
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://github.com/apache/arrow/blob/main/csharp/README.md">
C#
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://pkg.go.dev/github.com/apache/arrow/go/v17">
Go
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-internal" href="../../java/index.html">
Java
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-internal" href="../../js/index.html">
JavaScript
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://arrow.apache.org/julia/">
Julia
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://github.com/apache/arrow/blob/main/matlab/README.md">
MATLAB
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://arrow.apache.org/nanoarrow/">
nanoarrow
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-internal" href="../../python/index.html">
Python
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-internal" href="../../r/index.html">
R
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://github.com/apache/arrow/blob/main/ruby/README.md">
Ruby
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://docs.rs/crate/arrow/">
Rust
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-internal" href="../../status.html">
Implementation Status
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://arrow.apache.org/cookbook/cpp/">
C++ cookbook
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://arrow.apache.org/cookbook/java/">
Java cookbook
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://arrow.apache.org/cookbook/py/">
Python cookbook
</a>
</li>
<li class="nav-item">
<a class="nav-link dropdown-item nav-external" href="https://arrow.apache.org/cookbook/r/">
R cookbook
</a>
</li>
</ul>
</li>
</ul>
</nav></div>
</div>
<div class="sidebar-header-items__end">
<div class="navbar-item">
<script>
document.write(`
<div class="version-switcher__container dropdown">
<button id="pst-version-switcher-button-3"
type="button"
class="version-switcher__button btn btn-sm navbar-btn dropdown-toggle"
data-bs-toggle="dropdown"
aria-haspopup="listbox"
aria-controls="pst-version-switcher-list-3"
aria-label="Version switcher list"
>
Choose version <!-- this text may get changed later by javascript -->
<span class="caret"></span>
</button>
<div id="pst-version-switcher-list-3"
class="version-switcher__menu dropdown-menu list-group-flush py-0"
role="listbox" aria-labelledby="pst-version-switcher-button-3">
<!-- dropdown will be populated by javascript on page load -->
</div>
</div>
`);
</script></div>
<div class="navbar-item">
<script>
document.write(`
<button class="btn btn-sm navbar-btn theme-switch-button" title="light/dark" aria-label="light/dark" data-bs-placement="bottom" data-bs-toggle="tooltip">
<span class="theme-switch nav-link" data-mode="light"><i class="fa-solid fa-sun fa-lg"></i></span>
<span class="theme-switch nav-link" data-mode="dark"><i class="fa-solid fa-moon fa-lg"></i></span>
<span class="theme-switch nav-link" data-mode="auto"><i class="fa-solid fa-circle-half-stroke fa-lg"></i></span>
</button>
`);
</script></div>
<div class="navbar-item"><ul class="navbar-icon-links navbar-nav"
aria-label="Icon Links">
<li class="nav-item">
<a href="https://github.com/apache/arrow" title="GitHub" class="nav-link" rel="noopener" target="_blank" data-bs-toggle="tooltip" data-bs-placement="bottom"><span><i class="fa-brands fa-square-github fa-lg" aria-hidden="true"></i></span>
<span class="sr-only">GitHub</span></a>
</li>
<li class="nav-item">
<a href="https://twitter.com/ApacheArrow" title="X" class="nav-link" rel="noopener" target="_blank" data-bs-toggle="tooltip" data-bs-placement="bottom"><span><i class="fa-brands fa-square-x-twitter fa-lg" aria-hidden="true"></i></span>
<span class="sr-only">X</span></a>
</li>
</ul></div>
</div>
</div>
<div class="sidebar-primary-items__start sidebar-primary__section">
<div class="sidebar-primary-item">
<nav class="bd-docs-nav bd-links"
aria-label="Section Navigation">
<p class="bd-links__title" role="heading" aria-level="1">Section Navigation</p>
<div class="bd-toc-item navbar-nav"><ul class="current nav bd-sidenav">
<li class="toctree-l1 has-children"><a class="reference internal" href="../getting_started.html">Getting Started</a><input class="toctree-checkbox" id="toctree-checkbox-1" name="toctree-checkbox-1" type="checkbox"/><label class="toctree-toggle" for="toctree-checkbox-1"><i class="fa-solid fa-chevron-down"></i></label><ul>
<li class="toctree-l2"><a class="reference internal" href="../build_system.html">Using Arrow C++ in your own project</a></li>
<li class="toctree-l2"><a class="reference internal" href="../conventions.html">Conventions</a></li>
<li class="toctree-l2"><a class="reference internal" href="../tutorials/basic_arrow.html">Basic Arrow Data Structures</a></li>
<li class="toctree-l2"><a class="reference internal" href="../tutorials/io_tutorial.html">Arrow File I/O</a></li>
<li class="toctree-l2"><a class="reference internal" href="../tutorials/compute_tutorial.html">Arrow Compute</a></li>
<li class="toctree-l2"><a class="reference internal" href="../tutorials/datasets_tutorial.html">Arrow Datasets</a></li>
</ul>
</li>
<li class="toctree-l1 current active has-children"><a class="reference internal" href="../user_guide.html">User Guide</a><input checked="" class="toctree-checkbox" id="toctree-checkbox-2" name="toctree-checkbox-2" type="checkbox"/><label class="toctree-toggle" for="toctree-checkbox-2"><i class="fa-solid fa-chevron-down"></i></label><ul class="current">
<li class="toctree-l2"><a class="reference internal" href="../overview.html">High-Level Overview</a></li>
<li class="toctree-l2"><a class="reference internal" href="../memory.html">Memory Management</a></li>
<li class="toctree-l2"><a class="reference internal" href="../arrays.html">Arrays</a></li>
<li class="toctree-l2"><a class="reference internal" href="../datatypes.html">Data Types</a></li>
<li class="toctree-l2"><a class="reference internal" href="../tables.html">Tabular Data</a></li>
<li class="toctree-l2"><a class="reference internal" href="../compute.html">Compute Functions</a></li>
<li class="toctree-l2 has-children"><a class="reference internal" href="../gandiva.html">The Gandiva Expression Compiler</a><input class="toctree-checkbox" id="toctree-checkbox-3" name="toctree-checkbox-3" type="checkbox"/><label class="toctree-toggle" for="toctree-checkbox-3"><i class="fa-solid fa-chevron-down"></i></label><ul>
<li class="toctree-l3"><a class="reference internal" href="../gandiva/expr_projector_filter.html">Gandiva Expression, Projector, and Filter</a></li>
<li class="toctree-l3"><a class="reference internal" href="../gandiva/external_func.html">Gandiva External Functions Development Guide</a></li>
</ul>
</li>
<li class="toctree-l2 current active has-children"><a class="reference internal" href="../streaming_execution.html">Acero: A C++ streaming execution engine</a><input checked="" class="toctree-checkbox" id="toctree-checkbox-4" name="toctree-checkbox-4" type="checkbox"/><label class="toctree-toggle" for="toctree-checkbox-4"><i class="fa-solid fa-chevron-down"></i></label><ul class="current">
<li class="toctree-l3"><a class="reference internal" href="overview.html">Acero Overview</a></li>
<li class="toctree-l3 current active"><a class="current reference internal" href="#">Acero User’s Guide</a></li>
<li class="toctree-l3"><a class="reference internal" href="substrait.html">Using Acero with Substrait</a></li>
<li class="toctree-l3"><a class="reference internal" href="developer_guide.html">Developer’s Guide</a></li>
</ul>
</li>
<li class="toctree-l2"><a class="reference internal" href="../io.html">Input / output and filesystems</a></li>
<li class="toctree-l2"><a class="reference internal" href="../ipc.html">Reading and writing the Arrow IPC format</a></li>
<li class="toctree-l2"><a class="reference internal" href="../orc.html">Reading and Writing ORC files</a></li>
<li class="toctree-l2"><a class="reference internal" href="../parquet.html">Reading and writing Parquet files</a></li>
<li class="toctree-l2"><a class="reference internal" href="../csv.html">Reading and Writing CSV files</a></li>
<li class="toctree-l2"><a class="reference internal" href="../json.html">Reading JSON files</a></li>
<li class="toctree-l2"><a class="reference internal" href="../dataset.html">Tabular Datasets</a></li>
<li class="toctree-l2"><a class="reference internal" href="../flight.html">Arrow Flight RPC</a></li>
<li class="toctree-l2"><a class="reference internal" href="../gdb.html">Debugging code using Arrow</a></li>
<li class="toctree-l2"><a class="reference internal" href="../threading.html">Thread Management</a></li>
<li class="toctree-l2"><a class="reference internal" href="../opentelemetry.html">OpenTelemetry</a></li>
<li class="toctree-l2"><a class="reference internal" href="../env_vars.html">Environment Variables</a></li>
</ul>
</li>
<li class="toctree-l1 has-children"><a class="reference internal" href="../examples/index.html">Examples</a><input class="toctree-checkbox" id="toctree-checkbox-5" name="toctree-checkbox-5" type="checkbox"/><label class="toctree-toggle" for="toctree-checkbox-5"><i class="fa-solid fa-chevron-down"></i></label><ul>
<li class="toctree-l2"><a class="reference internal" href="../examples/cmake_minimal_build.html">Minimal build using CMake</a></li>
<li class="toctree-l2"><a class="reference internal" href="../examples/compute_and_write_example.html">Compute and Write CSV Example</a></li>
<li class="toctree-l2"><a class="reference internal" href="../examples/dataset_documentation_example.html">Arrow Datasets example</a></li>
<li class="toctree-l2"><a class="reference internal" href="../examples/dataset_skyhook_scan_example.html">Arrow Skyhook example</a></li>
<li class="toctree-l2"><a class="reference internal" href="../examples/row_columnar_conversion.html">Row to columnar conversion</a></li>
<li class="toctree-l2"><a class="reference internal" href="../examples/tuple_range_conversion.html">std::tuple-like ranges to Arrow</a></li>
<li class="toctree-l2"><a class="reference internal" href="../examples/converting_recordbatch_to_tensor.html">Converting RecordBatch to Tensor</a></li>
</ul>
</li>
<li class="toctree-l1 has-children"><a class="reference internal" href="../api.html">API Reference</a><input class="toctree-checkbox" id="toctree-checkbox-6" name="toctree-checkbox-6" type="checkbox"/><label class="toctree-toggle" for="toctree-checkbox-6"><i class="fa-solid fa-chevron-down"></i></label><ul>
<li class="toctree-l2"><a class="reference internal" href="../api/support.html">Programming Support</a></li>
<li class="toctree-l2"><a class="reference internal" href="../api/memory.html">Memory (management)</a></li>
<li class="toctree-l2"><a class="reference internal" href="../api/thread.html">Thread (management)</a></li>
<li class="toctree-l2"><a class="reference internal" href="../api/datatype.html">Data Types</a></li>
<li class="toctree-l2"><a class="reference internal" href="../api/array.html">Arrays</a></li>
<li class="toctree-l2"><a class="reference internal" href="../api/scalar.html">Scalars</a></li>
<li class="toctree-l2"><a class="reference internal" href="../api/builder.html">Array Builders</a></li>
<li class="toctree-l2"><a class="reference internal" href="../api/table.html">Two-dimensional Datasets</a></li>
<li class="toctree-l2"><a class="reference internal" href="../api/c_abi.html">C Interfaces</a></li>
<li class="toctree-l2"><a class="reference internal" href="../api/compute.html">Compute Functions</a></li>
<li class="toctree-l2"><a class="reference internal" href="../api/acero.html">Streaming Execution (Acero)</a></li>
<li class="toctree-l2"><a class="reference internal" href="../api/gandiva.html">Gandiva Expression Compiler</a></li>
<li class="toctree-l2"><a class="reference internal" href="../api/tensor.html">Tensors</a></li>
<li class="toctree-l2"><a class="reference internal" href="../api/utilities.html">Utilities</a></li>
<li class="toctree-l2"><a class="reference internal" href="../api/async.html">Asynchronous programming</a></li>
<li class="toctree-l2"><a class="reference internal" href="../api/io.html">Input / output</a></li>
<li class="toctree-l2"><a class="reference internal" href="../api/ipc.html">Arrow IPC</a></li>
<li class="toctree-l2"><a class="reference internal" href="../api/formats.html">File Formats</a></li>
<li class="toctree-l2"><a class="reference internal" href="../api/cuda.html">CUDA support</a></li>
<li class="toctree-l2"><a class="reference internal" href="../api/flight.html">Arrow Flight RPC</a></li>
<li class="toctree-l2"><a class="reference internal" href="../api/flightsql.html">Arrow Flight SQL</a></li>
<li class="toctree-l2"><a class="reference internal" href="../api/filesystem.html">Filesystems</a></li>
<li class="toctree-l2"><a class="reference internal" href="../api/dataset.html">Dataset</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference external" href="https://arrow.apache.org/cookbook/cpp/">C++ cookbook</a></li>
</ul>
</div>
</nav></div>
</div>
<div class="sidebar-primary-items__end sidebar-primary__section">
</div>
<div id="rtd-footer-container"></div>
</div>
<main id="main-content" class="bd-main">
<div class="bd-content">
<div class="bd-article-container">
<div class="bd-header-article">
<div class="header-article-items header-article__inner">
<div class="header-article-items__start">
<div class="header-article-item">
<nav aria-label="Breadcrumb">
<ul class="bd-breadcrumbs">
<li class="breadcrumb-item breadcrumb-home">
<a href="../../index.html" class="nav-link" aria-label="Home">
<i class="fa-solid fa-home"></i>
</a>
</li>
<li class="breadcrumb-item"><a href="../index.html" class="nav-link">C++ Implementation</a></li>
<li class="breadcrumb-item"><i class="fa-solid fa-ellipsis"></i></li>
<li class="breadcrumb-item"><a href="../streaming_execution.html" class="nav-link">Acero: A C++ streaming execution engine</a></li>
<li class="breadcrumb-item active" aria-current="page">Acero User’s Guide</li>
</ul>
</nav>
</div>
</div>
</div>
</div>
<div id="searchbox"></div>
<article class="bd-article">
<section id="acero-user-s-guide">
<h1>Acero User’s Guide<a class="headerlink" href="#acero-user-s-guide" title="Permalink to this heading">#</a></h1>
<p>This page describes how to use Acero. It’s recommended that you read the
overview first and familiarize yourself with the basic concepts.</p>
<section id="using-acero">
<h2>Using Acero<a class="headerlink" href="#using-acero" title="Permalink to this heading">#</a></h2>
<p>The basic workflow for Acero is this:</p>
<ol class="arabic simple">
<li><p>First, create a graph of <a class="reference internal" href="../api/acero.html#_CPPv4N5arrow5acero11DeclarationE" title="arrow::acero::Declaration"><code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">Declaration</span></code></a> objects describing the plan</p></li>
<li><p>Call one of the DeclarationToXyz methods to execute the Declaration.</p>
<ol class="loweralpha simple">
<li><p>A new ExecPlan is created from the graph of Declarations. Each Declaration will correspond to one
ExecNode in the plan. In addition, a sink node will be added, depending on which DeclarationToXyz method
was used.</p></li>
<li><p>The ExecPlan is executed. Typically this happens as part of the DeclarationToXyz call but in
DeclarationToReader the reader is returned before the plan is finished executing.</p></li>
<li><p>Once the plan is finished it is destroyed</p></li>
</ol>
</li>
</ol>
</section>
<section id="creating-a-plan">
<h2>Creating a Plan<a class="headerlink" href="#creating-a-plan" title="Permalink to this heading">#</a></h2>
<section id="using-substrait">
<h3>Using Substrait<a class="headerlink" href="#using-substrait" title="Permalink to this heading">#</a></h3>
<p>Substrait is the preferred mechanism for creating a plan (graph of <a class="reference internal" href="../api/acero.html#_CPPv4N5arrow5acero11DeclarationE" title="arrow::acero::Declaration"><code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">Declaration</span></code></a>). There are a few
reasons for this:</p>
<ul class="simple">
<li><p>Substrait producers spend a lot of time and energy in creating user-friendly APIs for producing complex
execution plans in a simple way. For example, the <code class="docutils literal notranslate"><span class="pre">pivot_wider</span></code> operation can be achieved using a complex
series of <code class="docutils literal notranslate"><span class="pre">aggregate</span></code> nodes. Rather than create all of those <code class="docutils literal notranslate"><span class="pre">aggregate</span></code> nodes by hand a producer will
give you a much simpler API.</p></li>
<li><p>If you are using Substrait then you can easily switch out to any other Substrait-consuming engine should you
at some point find that it serves your needs better than Acero.</p></li>
<li><p>We hope that tools will eventually emerge for Substrait-based optimizers and planners. By using Substrait
you will be making it much easier to use these tools in the future.</p></li>
</ul>
<p>You could create the Substrait plan yourself but you’ll probably have a much easier time finding an existing
Substrait producer. For example, you could use <a class="reference external" href="https://github.com/ibis-project/ibis-substrait">ibis-substrait</a>
to easily create Substrait plans from python expressions. There are a few different tools that are able to create
Substrait plans from SQL. Eventually, we hope that C++ based Substrait producers will emerge. However, we
are not aware of any at this time.</p>
<p>Detailed instructions on creating an execution plan from Substrait can be found in
<a class="reference internal" href="substrait.html#acero-substrait"><span class="std std-ref">the Substrait page</span></a></p>
</section>
<section id="programmatic-plan-creation">
<h3>Programmatic Plan Creation<a class="headerlink" href="#programmatic-plan-creation" title="Permalink to this heading">#</a></h3>
<p>Creating an execution plan programmatically is simpler than creating a plan from Substrait, though loses some of
the flexibility and future-proofing guarantees. The simplest way to create a Declaration is to simply instantiate
one. You will need the name of the declaration, a vector of inputs, and an options object. For example:</p>
<div class="highlight-cpp notranslate"><div class="highlight"><pre><span></span><span class="linenos">381</span><span class="c1">/// \brief An example showing a project node</span>
<span class="linenos">382</span><span class="c1">///</span>
<span class="linenos">383</span><span class="c1">/// Scan-Project-Table</span>
<span class="linenos">384</span><span class="c1">/// This example shows how a Scan operation can be used to load the data</span>
<span class="linenos">385</span><span class="c1">/// into the execution plan, how a project operation can be applied on the</span>
<span class="linenos">386</span><span class="c1">/// data stream and how the output is collected into a table</span>
<span class="linenos">387</span><span class="n">arrow</span><span class="o">::</span><span class="n">Status</span><span class="w"> </span><span class="nf">ScanProjectSinkExample</span><span class="p">()</span><span class="w"> </span><span class="p">{</span>
<span class="linenos">388</span><span class="w"> </span><span class="n">ARROW_ASSIGN_OR_RAISE</span><span class="p">(</span><span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o">&lt;</span><span class="n">arrow</span><span class="o">::</span><span class="n">dataset</span><span class="o">::</span><span class="n">Dataset</span><span class="o">&gt;</span><span class="w"> </span><span class="n">dataset</span><span class="p">,</span><span class="w"> </span><span class="n">GetDataset</span><span class="p">());</span>
<span class="linenos">389</span>
<span class="linenos">390</span><span class="w"> </span><span class="k">auto</span><span class="w"> </span><span class="n">options</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">make_shared</span><span class="o">&lt;</span><span class="n">arrow</span><span class="o">::</span><span class="n">dataset</span><span class="o">::</span><span class="n">ScanOptions</span><span class="o">&gt;</span><span class="p">();</span>
<span class="linenos">391</span><span class="w"> </span><span class="c1">// projection</span>
<span class="linenos">392</span><span class="w"> </span><span class="n">cp</span><span class="o">::</span><span class="n">Expression</span><span class="w"> </span><span class="n">a_times_2</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">cp</span><span class="o">::</span><span class="n">call</span><span class="p">(</span><span class="s">&quot;multiply&quot;</span><span class="p">,</span><span class="w"> </span><span class="p">{</span><span class="n">cp</span><span class="o">::</span><span class="n">field_ref</span><span class="p">(</span><span class="s">&quot;a&quot;</span><span class="p">),</span><span class="w"> </span><span class="n">cp</span><span class="o">::</span><span class="n">literal</span><span class="p">(</span><span class="mi">2</span><span class="p">)});</span>
<span class="linenos">393</span><span class="w"> </span><span class="n">options</span><span class="o">-&gt;</span><span class="n">projection</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">cp</span><span class="o">::</span><span class="n">project</span><span class="p">({},</span><span class="w"> </span><span class="p">{});</span>
<span class="linenos">394</span>
<span class="linenos">395</span><span class="w"> </span><span class="k">auto</span><span class="w"> </span><span class="n">scan_node_options</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">dataset</span><span class="o">::</span><span class="n">ScanNodeOptions</span><span class="p">{</span><span class="n">dataset</span><span class="p">,</span><span class="w"> </span><span class="n">options</span><span class="p">};</span>
<span class="linenos">396</span>
<span class="linenos">397</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">Declaration</span><span class="w"> </span><span class="n">scan</span><span class="p">{</span><span class="s">&quot;scan&quot;</span><span class="p">,</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">move</span><span class="p">(</span><span class="n">scan_node_options</span><span class="p">)};</span>
<span class="linenos">398</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">Declaration</span><span class="w"> </span><span class="n">project</span><span class="p">{</span>
<span class="linenos">399</span><span class="w"> </span><span class="s">&quot;project&quot;</span><span class="p">,</span><span class="w"> </span><span class="p">{</span><span class="n">std</span><span class="o">::</span><span class="n">move</span><span class="p">(</span><span class="n">scan</span><span class="p">)},</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">ProjectNodeOptions</span><span class="p">({</span><span class="n">a_times_2</span><span class="p">})};</span>
<span class="linenos">400</span>
<span class="linenos">401</span><span class="w"> </span><span class="k">return</span><span class="w"> </span><span class="n">ExecutePlanAndCollectAsTable</span><span class="p">(</span><span class="n">std</span><span class="o">::</span><span class="n">move</span><span class="p">(</span><span class="n">project</span><span class="p">));</span>
<span class="linenos">402</span><span class="p">}</span>
</pre></div>
</div>
<p>The above code creates a scan declaration (which has no inputs) and a project declaration (using the scan as
input). This is simple enough but we can make it slightly easier. If you are creating a linear sequence of
declarations (like in the above example) then you can also use the <a class="reference internal" href="../api/acero.html#_CPPv4N5arrow5acero11Declaration8SequenceENSt6vectorI11DeclarationEE" title="arrow::acero::Declaration::Sequence"><code class="xref cpp cpp-func docutils literal notranslate"><span class="pre">Declaration::Sequence()</span></code></a> function.</p>
<div class="highlight-cpp notranslate"><div class="highlight"><pre><span></span><span class="linenos">420</span><span class="w"> </span><span class="c1">// Inputs do not have to be passed to the project node when using Sequence</span>
<span class="linenos">421</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">Declaration</span><span class="w"> </span><span class="n">plan</span><span class="w"> </span><span class="o">=</span>
<span class="linenos">422</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">Declaration</span><span class="o">::</span><span class="n">Sequence</span><span class="p">({{</span><span class="s">&quot;scan&quot;</span><span class="p">,</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">move</span><span class="p">(</span><span class="n">scan_node_options</span><span class="p">)},</span>
<span class="linenos">423</span><span class="w"> </span><span class="p">{</span><span class="s">&quot;project&quot;</span><span class="p">,</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">ProjectNodeOptions</span><span class="p">({</span><span class="n">a_times_2</span><span class="p">})}});</span>
</pre></div>
</div>
<p>There are many more examples of programmatic plan creation later in this document.</p>
</section>
</section>
<section id="executing-a-plan">
<h2>Executing a Plan<a class="headerlink" href="#executing-a-plan" title="Permalink to this heading">#</a></h2>
<p>There are a number of different methods that can be used to execute a declaration. Each one provides the
data in a slightly different form. Since all of these methods start with <code class="docutils literal notranslate"><span class="pre">DeclarationTo...</span></code> this guide
will often refer to these methods as the <code class="docutils literal notranslate"><span class="pre">DeclarationToXyz</span></code> methods.</p>
<section id="declarationtotable">
<h3>DeclarationToTable<a class="headerlink" href="#declarationtotable" title="Permalink to this heading">#</a></h3>
<p>The <a class="reference internal" href="../api/acero.html#_CPPv418DeclarationToTable11DeclarationbP10MemoryPoolP16FunctionRegistry" title="DeclarationToTable"><code class="xref cpp cpp-func docutils literal notranslate"><span class="pre">DeclarationToTable()</span></code></a> method will accumulate all of the results into a single <a class="reference internal" href="../api/table.html#_CPPv4N5arrow5TableE" title="arrow::Table"><code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">arrow::Table</span></code></a>.
This is perhaps the simplest way to collect results from Acero. The main disadvantage to this approach is
that it requires accumulating all results into memory.</p>
<div class="admonition note">
<p class="admonition-title">Note</p>
<p>Acero processes large datasets in small chunks. This is described in more detail in the developer’s guide.
As a result, you may be surprised to find that a table collected with DeclarationToTable is chunked
differently than your input. For example, your input might be a large table with a single chunk with 2
million rows. Your output table might then have 64 chunks with 32Ki rows each. There is a current request
to specify the chunk size for the output in <a class="reference external" href="https://github.com/apache/arrow/issues/15155">GH-15155</a>.</p>
</div>
</section>
<section id="declarationtoreader">
<h3>DeclarationToReader<a class="headerlink" href="#declarationtoreader" title="Permalink to this heading">#</a></h3>
<p>The <a class="reference internal" href="../api/acero.html#_CPPv419DeclarationToReader11DeclarationbP10MemoryPoolP16FunctionRegistry" title="DeclarationToReader"><code class="xref cpp cpp-func docutils literal notranslate"><span class="pre">DeclarationToReader()</span></code></a> method allows you to iteratively consume the results. It will create an
<a class="reference internal" href="../api/table.html#_CPPv4N5arrow17RecordBatchReaderE" title="arrow::RecordBatchReader"><code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">arrow::RecordBatchReader</span></code></a> which you can read from at your leisure. If you do not read from the
reader quickly enough then backpressure will be applied and the execution plan will pause. Closing the
reader will cancel the running execution plan and the reader’s destructor will wait for the execution plan
to finish whatever it is doing and so it may block.</p>
</section>
<section id="declarationtostatus">
<h3>DeclarationToStatus<a class="headerlink" href="#declarationtostatus" title="Permalink to this heading">#</a></h3>
<p>The <a class="reference internal" href="../api/acero.html#_CPPv419DeclarationToStatus11DeclarationbP10MemoryPoolP16FunctionRegistry" title="DeclarationToStatus"><code class="xref cpp cpp-func docutils literal notranslate"><span class="pre">DeclarationToStatus()</span></code></a> method is useful if you want to run the plan but do not actually want to
consume the results. For example, this is useful when benchmarking or when the plan has side effects such
as a dataset write node. If the plan generates any results then they will be immediately discarded.</p>
</section>
<section id="running-a-plan-directly">
<h3>Running a Plan Directly<a class="headerlink" href="#running-a-plan-directly" title="Permalink to this heading">#</a></h3>
<p>If one of the <code class="docutils literal notranslate"><span class="pre">DeclarationToXyz</span></code> methods is not sufficient for some reason then it is possible to run a plan
directly. This should only be needed if you are doing something unique. For example, if you have created a
custom sink node or if you need a plan that has multiple outputs.</p>
<div class="admonition note">
<p class="admonition-title">Note</p>
<p>In academic literature and many existing systems there is a general assumption that an execution plan has
at most one output. There are some things in Acero, such as the DeclarationToXyz methods, which will expect
this. However, there is nothing in the design that strictly prevents having multiple sink nodes.</p>
</div>
<p>Detailed instructions on how to do this are out of scope for this guide but the rough steps are:</p>
<ol class="arabic simple">
<li><p>Create a new <a class="reference internal" href="../api/acero.html#_CPPv4N5arrow5acero8ExecPlanE" title="arrow::acero::ExecPlan"><code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">ExecPlan</span></code></a> object.</p></li>
<li><p>Add sink nodes to your graph of <a class="reference internal" href="../api/acero.html#_CPPv4N5arrow5acero11DeclarationE" title="arrow::acero::Declaration"><code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">Declaration</span></code></a> objects (this is the only type you will need
to create declarations for sink nodes)</p></li>
<li><p>Use <a class="reference internal" href="../api/acero.html#_CPPv4NK5arrow5acero11Declaration9AddToPlanEP8ExecPlanP19ExecFactoryRegistry" title="arrow::acero::Declaration::AddToPlan"><code class="xref cpp cpp-func docutils literal notranslate"><span class="pre">Declaration::AddToPlan()</span></code></a> to add your declaration to your plan (if you have more than one output
then you will not be able to use this method and will need to add your nodes one at a time)</p></li>
<li><p>Validate the plan with <a class="reference internal" href="../api/acero.html#_CPPv4N5arrow5acero8ExecPlan8ValidateEv" title="arrow::acero::ExecPlan::Validate"><code class="xref cpp cpp-func docutils literal notranslate"><span class="pre">ExecPlan::Validate()</span></code></a></p></li>
<li><p>Start the plan with <a class="reference internal" href="../api/acero.html#_CPPv4N5arrow5acero8ExecPlan14StartProducingEv" title="arrow::acero::ExecPlan::StartProducing"><code class="xref cpp cpp-func docutils literal notranslate"><span class="pre">ExecPlan::StartProducing()</span></code></a></p></li>
<li><p>Wait for the future returned by <a class="reference internal" href="../api/acero.html#_CPPv4N5arrow5acero8ExecPlan8finishedEv" title="arrow::acero::ExecPlan::finished"><code class="xref cpp cpp-func docutils literal notranslate"><span class="pre">ExecPlan::finished()</span></code></a> to complete.</p></li>
</ol>
</section>
</section>
<section id="providing-input">
<h2>Providing Input<a class="headerlink" href="#providing-input" title="Permalink to this heading">#</a></h2>
<p>Input data for an exec plan can come from a variety of sources. It is often read from files stored on some
kind of filesystem. It is also common for input to come from in-memory data. In-memory data is typical, for
example, in a pandas-like frontend. Input could also come from network streams like a Flight request. Acero
can support all of these cases and can even support unique and custom situations not mentioned here.</p>
<p>There are pre-defined source nodes that cover the most common input scenarios. These are listed below. However,
if your source data is unique then you will need to use the generic <code class="docutils literal notranslate"><span class="pre">source</span></code> node. This node expects you to
provide an asynchronous stream of batches and is covered in more detail <a class="reference internal" href="#stream-execution-source-docs"><span class="std std-ref">here</span></a>.</p>
</section>
<section id="available-execnode-implementations">
<span id="execnode-list"></span><h2>Available <code class="docutils literal notranslate"><span class="pre">ExecNode</span></code> Implementations<a class="headerlink" href="#available-execnode-implementations" title="Permalink to this heading">#</a></h2>
<p>The following tables quickly summarize the available operators.</p>
<section id="sources">
<h3>Sources<a class="headerlink" href="#sources" title="Permalink to this heading">#</a></h3>
<p>These nodes can be used as sources of data</p>
<table class="table" id="id1">
<caption><span class="caption-text">Source Nodes</span><a class="headerlink" href="#id1" title="Permalink to this table">#</a></caption>
<colgroup>
<col style="width: 25.0%" />
<col style="width: 25.0%" />
<col style="width: 50.0%" />
</colgroup>
<thead>
<tr class="row-odd"><th class="head"><p>Factory Name</p></th>
<th class="head"><p>Options</p></th>
<th class="head"><p>Brief Description</p></th>
</tr>
</thead>
<tbody>
<tr class="row-even"><td><p><code class="docutils literal notranslate"><span class="pre">source</span></code></p></td>
<td><p><a class="reference internal" href="../api/acero.html#_CPPv4N5arrow5acero17SourceNodeOptionsE" title="arrow::acero::SourceNodeOptions"><code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">SourceNodeOptions</span></code></a></p></td>
<td><p>A generic source node that wraps an asynchronous stream of data (<a class="reference internal" href="#stream-execution-source-docs"><span class="std std-ref">example</span></a>)</p></td>
</tr>
<tr class="row-odd"><td><p><code class="docutils literal notranslate"><span class="pre">table_source</span></code></p></td>
<td><p><a class="reference internal" href="../api/acero.html#_CPPv4N5arrow5acero22TableSourceNodeOptionsE" title="arrow::acero::TableSourceNodeOptions"><code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">TableSourceNodeOptions</span></code></a></p></td>
<td><p>Generates data from an <a class="reference internal" href="../api/table.html#_CPPv4N5arrow5TableE" title="arrow::Table"><code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">arrow::Table</span></code></a> (<a class="reference internal" href="#stream-execution-table-source-docs"><span class="std std-ref">example</span></a>)</p></td>
</tr>
<tr class="row-even"><td><p><code class="docutils literal notranslate"><span class="pre">record_batch_source</span></code></p></td>
<td><p><a class="reference internal" href="../api/acero.html#_CPPv4N5arrow5acero28RecordBatchSourceNodeOptionsE" title="arrow::acero::RecordBatchSourceNodeOptions"><code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">RecordBatchSourceNodeOptions</span></code></a></p></td>
<td><p>Generates data from an iterator of <a class="reference internal" href="../api/table.html#_CPPv4N5arrow11RecordBatchE" title="arrow::RecordBatch"><code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">arrow::RecordBatch</span></code></a></p></td>
</tr>
<tr class="row-odd"><td><p><code class="docutils literal notranslate"><span class="pre">record_batch_reader_source</span></code></p></td>
<td><p><a class="reference internal" href="../api/acero.html#_CPPv4N5arrow5acero34RecordBatchReaderSourceNodeOptionsE" title="arrow::acero::RecordBatchReaderSourceNodeOptions"><code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">RecordBatchReaderSourceNodeOptions</span></code></a></p></td>
<td><p>Generates data from an <a class="reference internal" href="../api/table.html#_CPPv4N5arrow17RecordBatchReaderE" title="arrow::RecordBatchReader"><code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">arrow::RecordBatchReader</span></code></a></p></td>
</tr>
<tr class="row-even"><td><p><code class="docutils literal notranslate"><span class="pre">exec_batch_source</span></code></p></td>
<td><p><a class="reference internal" href="../api/acero.html#_CPPv4N5arrow5acero26ExecBatchSourceNodeOptionsE" title="arrow::acero::ExecBatchSourceNodeOptions"><code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">ExecBatchSourceNodeOptions</span></code></a></p></td>
<td><p>Generates data from an iterator of <a class="reference internal" href="../api/acero.html#_CPPv4N5arrow7compute9ExecBatchE" title="arrow::compute::ExecBatch"><code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">arrow::compute::ExecBatch</span></code></a></p></td>
</tr>
<tr class="row-odd"><td><p><code class="docutils literal notranslate"><span class="pre">array_vector_source</span></code></p></td>
<td><p><a class="reference internal" href="../api/acero.html#_CPPv4N5arrow5acero28ArrayVectorSourceNodeOptionsE" title="arrow::acero::ArrayVectorSourceNodeOptions"><code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">ArrayVectorSourceNodeOptions</span></code></a></p></td>
<td><p>Generates data from an iterator of vectors of <a class="reference internal" href="../api/array.html#_CPPv4N5arrow5ArrayE" title="arrow::Array"><code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">arrow::Array</span></code></a></p></td>
</tr>
<tr class="row-even"><td><p><code class="docutils literal notranslate"><span class="pre">scan</span></code></p></td>
<td><p><a class="reference internal" href="../api/dataset.html#_CPPv4N5arrow7dataset15ScanNodeOptionsE" title="arrow::dataset::ScanNodeOptions"><code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">arrow::dataset::ScanNodeOptions</span></code></a></p></td>
<td><p>Generates data from an <a class="reference internal" href="../api/dataset.html#_CPPv4N5arrow7dataset7DatasetE" title="arrow::dataset::Dataset"><code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">arrow::dataset::Dataset</span></code></a> (requires the datasets module)
(<a class="reference internal" href="#stream-execution-scan-docs"><span class="std std-ref">example</span></a>)</p></td>
</tr>
</tbody>
</table>
</section>
<section id="compute-nodes">
<h3>Compute Nodes<a class="headerlink" href="#compute-nodes" title="Permalink to this heading">#</a></h3>
<p>These nodes perform computations on data and may transform or reshape the data</p>
<table class="table" id="id2">
<caption><span class="caption-text">Compute Nodes</span><a class="headerlink" href="#id2" title="Permalink to this table">#</a></caption>
<colgroup>
<col style="width: 25.0%" />
<col style="width: 25.0%" />
<col style="width: 50.0%" />
</colgroup>
<thead>
<tr class="row-odd"><th class="head"><p>Factory Name</p></th>
<th class="head"><p>Options</p></th>
<th class="head"><p>Brief Description</p></th>
</tr>
</thead>
<tbody>
<tr class="row-even"><td><p><code class="docutils literal notranslate"><span class="pre">filter</span></code></p></td>
<td><p><a class="reference internal" href="../api/acero.html#_CPPv4N5arrow5acero17FilterNodeOptionsE" title="arrow::acero::FilterNodeOptions"><code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">FilterNodeOptions</span></code></a></p></td>
<td><p>Removes rows that do not match a given filter expression
(<a class="reference internal" href="#stream-execution-filter-docs"><span class="std std-ref">example</span></a>)</p></td>
</tr>
<tr class="row-odd"><td><p><code class="docutils literal notranslate"><span class="pre">project</span></code></p></td>
<td><p><a class="reference internal" href="../api/acero.html#_CPPv4N5arrow5acero18ProjectNodeOptionsE" title="arrow::acero::ProjectNodeOptions"><code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">ProjectNodeOptions</span></code></a></p></td>
<td><p>Creates new columns by evaluating compute expressions. Can also drop and reorder columns
(<a class="reference internal" href="#stream-execution-project-docs"><span class="std std-ref">example</span></a>)</p></td>
</tr>
<tr class="row-even"><td><p><code class="docutils literal notranslate"><span class="pre">aggregate</span></code></p></td>
<td><p><a class="reference internal" href="../api/acero.html#_CPPv4N5arrow5acero20AggregateNodeOptionsE" title="arrow::acero::AggregateNodeOptions"><code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">AggregateNodeOptions</span></code></a></p></td>
<td><p>Calculates summary statistics across the entire input stream or on groups of data
(<a class="reference internal" href="#stream-execution-aggregate-docs"><span class="std std-ref">example</span></a>)</p></td>
</tr>
<tr class="row-odd"><td><p><code class="docutils literal notranslate"><span class="pre">pivot_longer</span></code></p></td>
<td><p><a class="reference internal" href="../api/acero.html#_CPPv4N5arrow5acero22PivotLongerNodeOptionsE" title="arrow::acero::PivotLongerNodeOptions"><code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">PivotLongerNodeOptions</span></code></a></p></td>
<td><p>Reshapes data by converting some columns into additional rows</p></td>
</tr>
</tbody>
</table>
</section>
<section id="arrangement-nodes">
<h3>Arrangement Nodes<a class="headerlink" href="#arrangement-nodes" title="Permalink to this heading">#</a></h3>
<p>These nodes reorder, combine, or slice streams of data</p>
<table class="table" id="id3">
<caption><span class="caption-text">Arrangement Nodes</span><a class="headerlink" href="#id3" title="Permalink to this table">#</a></caption>
<colgroup>
<col style="width: 25.0%" />
<col style="width: 25.0%" />
<col style="width: 50.0%" />
</colgroup>
<thead>
<tr class="row-odd"><th class="head"><p>Factory Name</p></th>
<th class="head"><p>Options</p></th>
<th class="head"><p>Brief Description</p></th>
</tr>
</thead>
<tbody>
<tr class="row-even"><td><p><code class="docutils literal notranslate"><span class="pre">hash_join</span></code></p></td>
<td><p><a class="reference internal" href="../api/acero.html#_CPPv4N5arrow5acero19HashJoinNodeOptionsE" title="arrow::acero::HashJoinNodeOptions"><code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">HashJoinNodeOptions</span></code></a></p></td>
<td><p>Joins two inputs based on common columns (<a class="reference internal" href="#stream-execution-hashjoin-docs"><span class="std std-ref">example</span></a>)</p></td>
</tr>
<tr class="row-odd"><td><p><code class="docutils literal notranslate"><span class="pre">asofjoin</span></code></p></td>
<td><p><a class="reference internal" href="../api/acero.html#_CPPv4N5arrow5acero19AsofJoinNodeOptionsE" title="arrow::acero::AsofJoinNodeOptions"><code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">AsofJoinNodeOptions</span></code></a></p></td>
<td><p>Joins multiple inputs to the first input based on a common ordered column (often time)</p></td>
</tr>
<tr class="row-even"><td><p><code class="docutils literal notranslate"><span class="pre">union</span></code></p></td>
<td><p>N/A</p></td>
<td><p>Merges two inputs with identical schemas (<a class="reference internal" href="#stream-execution-union-docs"><span class="std std-ref">example</span></a>)</p></td>
</tr>
<tr class="row-odd"><td><p><code class="docutils literal notranslate"><span class="pre">order_by</span></code></p></td>
<td><p><a class="reference internal" href="../api/acero.html#_CPPv4N5arrow5acero18OrderByNodeOptionsE" title="arrow::acero::OrderByNodeOptions"><code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">OrderByNodeOptions</span></code></a></p></td>
<td><p>Reorders a stream</p></td>
</tr>
<tr class="row-even"><td><p><code class="docutils literal notranslate"><span class="pre">fetch</span></code></p></td>
<td><p><a class="reference internal" href="../api/acero.html#_CPPv4N5arrow5acero16FetchNodeOptionsE" title="arrow::acero::FetchNodeOptions"><code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">FetchNodeOptions</span></code></a></p></td>
<td><p>Slices a range of rows from a stream</p></td>
</tr>
</tbody>
</table>
</section>
<section id="sink-nodes">
<h3>Sink Nodes<a class="headerlink" href="#sink-nodes" title="Permalink to this heading">#</a></h3>
<p>These nodes terminate a plan. Users do not typically create sink nodes as they are
selected based on the DeclarationToXyz method used to consume the plan. However, this
list may be useful for those developing new sink nodes or using Acero in advanced ways.</p>
<table class="table" id="id4">
<caption><span class="caption-text">Sink Nodes</span><a class="headerlink" href="#id4" title="Permalink to this table">#</a></caption>
<colgroup>
<col style="width: 25.0%" />
<col style="width: 25.0%" />
<col style="width: 50.0%" />
</colgroup>
<thead>
<tr class="row-odd"><th class="head"><p>Factory Name</p></th>
<th class="head"><p>Options</p></th>
<th class="head"><p>Brief Description</p></th>
</tr>
</thead>
<tbody>
<tr class="row-even"><td><p><code class="docutils literal notranslate"><span class="pre">sink</span></code></p></td>
<td><p><a class="reference internal" href="../api/acero.html#_CPPv4N5arrow5acero15SinkNodeOptionsE" title="arrow::acero::SinkNodeOptions"><code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">SinkNodeOptions</span></code></a></p></td>
<td><p>Collects batches into a FIFO queue with optional backpressure</p></td>
</tr>
<tr class="row-odd"><td><p><code class="docutils literal notranslate"><span class="pre">write</span></code></p></td>
<td><p><a class="reference internal" href="../api/dataset.html#_CPPv4N5arrow7dataset16WriteNodeOptionsE" title="arrow::dataset::WriteNodeOptions"><code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">arrow::dataset::WriteNodeOptions</span></code></a></p></td>
<td><p>Writes batches to a filesystem (<a class="reference internal" href="#stream-execution-write-docs"><span class="std std-ref">example</span></a>)</p></td>
</tr>
<tr class="row-even"><td><p><code class="docutils literal notranslate"><span class="pre">consuming_sink</span></code></p></td>
<td><p><a class="reference internal" href="../api/acero.html#_CPPv4N5arrow5acero24ConsumingSinkNodeOptionsE" title="arrow::acero::ConsumingSinkNodeOptions"><code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">ConsumingSinkNodeOptions</span></code></a></p></td>
<td><p>Consumes batches using a user provided callback function</p></td>
</tr>
<tr class="row-odd"><td><p><code class="docutils literal notranslate"><span class="pre">table_sink</span></code></p></td>
<td><p><a class="reference internal" href="../api/acero.html#_CPPv4N5arrow5acero20TableSinkNodeOptionsE" title="arrow::acero::TableSinkNodeOptions"><code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">TableSinkNodeOptions</span></code></a></p></td>
<td><p>Collects batches into an <a class="reference internal" href="../api/table.html#_CPPv4N5arrow5TableE" title="arrow::Table"><code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">arrow::Table</span></code></a></p></td>
</tr>
<tr class="row-even"><td><p><code class="docutils literal notranslate"><span class="pre">order_by_sink</span></code></p></td>
<td><p><a class="reference internal" href="../api/acero.html#_CPPv4N5arrow5acero22OrderBySinkNodeOptionsE" title="arrow::acero::OrderBySinkNodeOptions"><code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">OrderBySinkNodeOptions</span></code></a></p></td>
<td><p>Deprecated</p></td>
</tr>
<tr class="row-odd"><td><p><code class="docutils literal notranslate"><span class="pre">select_k_sink</span></code></p></td>
<td><p><a class="reference internal" href="../api/acero.html#_CPPv4N5arrow5acero22SelectKSinkNodeOptionsE" title="arrow::acero::SelectKSinkNodeOptions"><code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">SelectKSinkNodeOptions</span></code></a></p></td>
<td><p>Deprecated</p></td>
</tr>
</tbody>
</table>
</section>
</section>
<section id="examples">
<h2>Examples<a class="headerlink" href="#examples" title="Permalink to this heading">#</a></h2>
<p>The rest of this document contains example execution plans. Each example highlights the behavior
of a specific execution node.</p>
<section id="source">
<span id="stream-execution-source-docs"></span><h3><code class="docutils literal notranslate"><span class="pre">source</span></code><a class="headerlink" href="#source" title="Permalink to this heading">#</a></h3>
<p>A <code class="docutils literal notranslate"><span class="pre">source</span></code> operation can be considered as an entry point to create a streaming execution plan.
<a class="reference internal" href="../api/acero.html#_CPPv4N5arrow5acero17SourceNodeOptionsE" title="arrow::acero::SourceNodeOptions"><code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">SourceNodeOptions</span></code></a> are used to create the <code class="docutils literal notranslate"><span class="pre">source</span></code> operation. The
<code class="docutils literal notranslate"><span class="pre">source</span></code> operation is the most generic and flexible type of source currently available but it can
be quite tricky to configure. First you should review the other source node types to ensure there
isn’t a simpler choice.</p>
<p>The source node requires some kind of function that can be called to poll for more data. This
function should take no arguments and should return an
<code class="docutils literal notranslate"><span class="pre">arrow::Future&lt;std::optional&lt;arrow::ExecBatch&gt;&gt;</span></code>.
This function might be reading a file, iterating through an in memory structure, or receiving data
from a network connection. The arrow library refers to these functions as <code class="docutils literal notranslate"><span class="pre">arrow::AsyncGenerator</span></code>
and there are a number of utilities for working with these functions. For this example we use
a vector of record batches that we’ve already stored in memory.
In addition, the schema of the data must be known up front. Acero must know the schema of the data
at each stage of the execution graph before any processing has begun. This means we must supply the
schema for a source node separately from the data itself.</p>
<p>Here we define a struct to hold the data generator definition. This includes in-memory batches, schema
and a function that serves as a data generator :</p>
<div class="highlight-cpp notranslate"><div class="highlight"><pre><span></span><span class="linenos">156</span><span class="k">struct</span><span class="w"> </span><span class="nc">BatchesWithSchema</span><span class="w"> </span><span class="p">{</span>
<span class="linenos">157</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">vector</span><span class="o">&lt;</span><span class="n">cp</span><span class="o">::</span><span class="n">ExecBatch</span><span class="o">&gt;</span><span class="w"> </span><span class="n">batches</span><span class="p">;</span>
<span class="linenos">158</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o">&lt;</span><span class="n">arrow</span><span class="o">::</span><span class="n">Schema</span><span class="o">&gt;</span><span class="w"> </span><span class="n">schema</span><span class="p">;</span>
<span class="linenos">159</span><span class="w"> </span><span class="c1">// This method uses internal arrow utilities to</span>
<span class="linenos">160</span><span class="w"> </span><span class="c1">// convert a vector of record batches to an AsyncGenerator of optional batches</span>
<span class="linenos">161</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">AsyncGenerator</span><span class="o">&lt;</span><span class="n">std</span><span class="o">::</span><span class="n">optional</span><span class="o">&lt;</span><span class="n">cp</span><span class="o">::</span><span class="n">ExecBatch</span><span class="o">&gt;&gt;</span><span class="w"> </span><span class="n">gen</span><span class="p">()</span><span class="w"> </span><span class="k">const</span><span class="w"> </span><span class="p">{</span>
<span class="linenos">162</span><span class="w"> </span><span class="k">auto</span><span class="w"> </span><span class="n">opt_batches</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="o">::</span><span class="n">arrow</span><span class="o">::</span><span class="n">internal</span><span class="o">::</span><span class="n">MapVector</span><span class="p">(</span>
<span class="linenos">163</span><span class="w"> </span><span class="p">[](</span><span class="n">cp</span><span class="o">::</span><span class="n">ExecBatch</span><span class="w"> </span><span class="n">batch</span><span class="p">)</span><span class="w"> </span><span class="p">{</span><span class="w"> </span><span class="k">return</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">make_optional</span><span class="p">(</span><span class="n">std</span><span class="o">::</span><span class="n">move</span><span class="p">(</span><span class="n">batch</span><span class="p">));</span><span class="w"> </span><span class="p">},</span>
<span class="linenos">164</span><span class="w"> </span><span class="n">batches</span><span class="p">);</span>
<span class="linenos">165</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">AsyncGenerator</span><span class="o">&lt;</span><span class="n">std</span><span class="o">::</span><span class="n">optional</span><span class="o">&lt;</span><span class="n">cp</span><span class="o">::</span><span class="n">ExecBatch</span><span class="o">&gt;&gt;</span><span class="w"> </span><span class="n">gen</span><span class="p">;</span>
<span class="linenos">166</span><span class="w"> </span><span class="n">gen</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">MakeVectorGenerator</span><span class="p">(</span><span class="n">std</span><span class="o">::</span><span class="n">move</span><span class="p">(</span><span class="n">opt_batches</span><span class="p">));</span>
<span class="linenos">167</span><span class="w"> </span><span class="k">return</span><span class="w"> </span><span class="n">gen</span><span class="p">;</span>
<span class="linenos">168</span><span class="w"> </span><span class="p">}</span>
<span class="linenos">169</span><span class="p">};</span>
</pre></div>
</div>
<p>Generating sample batches for computation:</p>
<div class="highlight-cpp notranslate"><div class="highlight"><pre><span></span><span class="linenos">173</span><span class="n">arrow</span><span class="o">::</span><span class="n">Result</span><span class="o">&lt;</span><span class="n">BatchesWithSchema</span><span class="o">&gt;</span><span class="w"> </span><span class="n">MakeBasicBatches</span><span class="p">()</span><span class="w"> </span><span class="p">{</span>
<span class="linenos">174</span><span class="w"> </span><span class="n">BatchesWithSchema</span><span class="w"> </span><span class="n">out</span><span class="p">;</span>
<span class="linenos">175</span><span class="w"> </span><span class="k">auto</span><span class="w"> </span><span class="n">field_vector</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="p">{</span><span class="n">arrow</span><span class="o">::</span><span class="n">field</span><span class="p">(</span><span class="s">&quot;a&quot;</span><span class="p">,</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">int32</span><span class="p">()),</span>
<span class="linenos">176</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">field</span><span class="p">(</span><span class="s">&quot;b&quot;</span><span class="p">,</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">boolean</span><span class="p">())};</span>
<span class="linenos">177</span><span class="w"> </span><span class="n">ARROW_ASSIGN_OR_RAISE</span><span class="p">(</span><span class="k">auto</span><span class="w"> </span><span class="n">b1_int</span><span class="p">,</span><span class="w"> </span><span class="n">GetArrayDataSample</span><span class="o">&lt;</span><span class="n">arrow</span><span class="o">::</span><span class="n">Int32Type</span><span class="o">&gt;</span><span class="p">({</span><span class="mi">0</span><span class="p">,</span><span class="w"> </span><span class="mi">4</span><span class="p">}));</span>
<span class="linenos">178</span><span class="w"> </span><span class="n">ARROW_ASSIGN_OR_RAISE</span><span class="p">(</span><span class="k">auto</span><span class="w"> </span><span class="n">b2_int</span><span class="p">,</span><span class="w"> </span><span class="n">GetArrayDataSample</span><span class="o">&lt;</span><span class="n">arrow</span><span class="o">::</span><span class="n">Int32Type</span><span class="o">&gt;</span><span class="p">({</span><span class="mi">5</span><span class="p">,</span><span class="w"> </span><span class="mi">6</span><span class="p">,</span><span class="w"> </span><span class="mi">7</span><span class="p">}));</span>
<span class="linenos">179</span><span class="w"> </span><span class="n">ARROW_ASSIGN_OR_RAISE</span><span class="p">(</span><span class="k">auto</span><span class="w"> </span><span class="n">b3_int</span><span class="p">,</span><span class="w"> </span><span class="n">GetArrayDataSample</span><span class="o">&lt;</span><span class="n">arrow</span><span class="o">::</span><span class="n">Int32Type</span><span class="o">&gt;</span><span class="p">({</span><span class="mi">8</span><span class="p">,</span><span class="w"> </span><span class="mi">9</span><span class="p">,</span><span class="w"> </span><span class="mi">10</span><span class="p">}));</span>
<span class="linenos">180</span>
<span class="linenos">181</span><span class="w"> </span><span class="n">ARROW_ASSIGN_OR_RAISE</span><span class="p">(</span><span class="k">auto</span><span class="w"> </span><span class="n">b1_bool</span><span class="p">,</span>
<span class="linenos">182</span><span class="w"> </span><span class="n">GetArrayDataSample</span><span class="o">&lt;</span><span class="n">arrow</span><span class="o">::</span><span class="n">BooleanType</span><span class="o">&gt;</span><span class="p">({</span><span class="nb">false</span><span class="p">,</span><span class="w"> </span><span class="nb">true</span><span class="p">}));</span>
<span class="linenos">183</span><span class="w"> </span><span class="n">ARROW_ASSIGN_OR_RAISE</span><span class="p">(</span><span class="k">auto</span><span class="w"> </span><span class="n">b2_bool</span><span class="p">,</span>
<span class="linenos">184</span><span class="w"> </span><span class="n">GetArrayDataSample</span><span class="o">&lt;</span><span class="n">arrow</span><span class="o">::</span><span class="n">BooleanType</span><span class="o">&gt;</span><span class="p">({</span><span class="nb">true</span><span class="p">,</span><span class="w"> </span><span class="nb">false</span><span class="p">,</span><span class="w"> </span><span class="nb">true</span><span class="p">}));</span>
<span class="linenos">185</span><span class="w"> </span><span class="n">ARROW_ASSIGN_OR_RAISE</span><span class="p">(</span><span class="k">auto</span><span class="w"> </span><span class="n">b3_bool</span><span class="p">,</span>
<span class="linenos">186</span><span class="w"> </span><span class="n">GetArrayDataSample</span><span class="o">&lt;</span><span class="n">arrow</span><span class="o">::</span><span class="n">BooleanType</span><span class="o">&gt;</span><span class="p">({</span><span class="nb">false</span><span class="p">,</span><span class="w"> </span><span class="nb">true</span><span class="p">,</span><span class="w"> </span><span class="nb">false</span><span class="p">}));</span>
<span class="linenos">187</span>
<span class="linenos">188</span><span class="w"> </span><span class="n">ARROW_ASSIGN_OR_RAISE</span><span class="p">(</span><span class="k">auto</span><span class="w"> </span><span class="n">b1</span><span class="p">,</span>
<span class="linenos">189</span><span class="w"> </span><span class="n">GetExecBatchFromVectors</span><span class="p">(</span><span class="n">field_vector</span><span class="p">,</span><span class="w"> </span><span class="p">{</span><span class="n">b1_int</span><span class="p">,</span><span class="w"> </span><span class="n">b1_bool</span><span class="p">}));</span>
<span class="linenos">190</span><span class="w"> </span><span class="n">ARROW_ASSIGN_OR_RAISE</span><span class="p">(</span><span class="k">auto</span><span class="w"> </span><span class="n">b2</span><span class="p">,</span>
<span class="linenos">191</span><span class="w"> </span><span class="n">GetExecBatchFromVectors</span><span class="p">(</span><span class="n">field_vector</span><span class="p">,</span><span class="w"> </span><span class="p">{</span><span class="n">b2_int</span><span class="p">,</span><span class="w"> </span><span class="n">b2_bool</span><span class="p">}));</span>
<span class="linenos">192</span><span class="w"> </span><span class="n">ARROW_ASSIGN_OR_RAISE</span><span class="p">(</span><span class="k">auto</span><span class="w"> </span><span class="n">b3</span><span class="p">,</span>
<span class="linenos">193</span><span class="w"> </span><span class="n">GetExecBatchFromVectors</span><span class="p">(</span><span class="n">field_vector</span><span class="p">,</span><span class="w"> </span><span class="p">{</span><span class="n">b3_int</span><span class="p">,</span><span class="w"> </span><span class="n">b3_bool</span><span class="p">}));</span>
<span class="linenos">194</span>
<span class="linenos">195</span><span class="w"> </span><span class="n">out</span><span class="p">.</span><span class="n">batches</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="p">{</span><span class="n">b1</span><span class="p">,</span><span class="w"> </span><span class="n">b2</span><span class="p">,</span><span class="w"> </span><span class="n">b3</span><span class="p">};</span>
<span class="linenos">196</span><span class="w"> </span><span class="n">out</span><span class="p">.</span><span class="n">schema</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">schema</span><span class="p">(</span><span class="n">field_vector</span><span class="p">);</span>
<span class="linenos">197</span><span class="w"> </span><span class="k">return</span><span class="w"> </span><span class="n">out</span><span class="p">;</span>
<span class="linenos">198</span><span class="p">}</span>
</pre></div>
</div>
<p>Example of using <code class="docutils literal notranslate"><span class="pre">source</span></code> (usage of sink is explained in detail in <a class="reference internal" href="#stream-execution-sink-docs"><span class="std std-ref">sink</span></a>):</p>
<div class="highlight-cpp notranslate"><div class="highlight"><pre><span></span><span class="linenos">294</span><span class="c1">/// \brief An example demonstrating a source and sink node</span>
<span class="linenos">295</span><span class="c1">///</span>
<span class="linenos">296</span><span class="c1">/// Source-Table Example</span>
<span class="linenos">297</span><span class="c1">/// This example shows how a custom source can be used</span>
<span class="linenos">298</span><span class="c1">/// in an execution plan. This includes source node using pregenerated</span>
<span class="linenos">299</span><span class="c1">/// data and collecting it into a table.</span>
<span class="linenos">300</span><span class="c1">///</span>
<span class="linenos">301</span><span class="c1">/// This sort of custom source is often not needed. In most cases you can</span>
<span class="linenos">302</span><span class="c1">/// use a scan (for a dataset source) or a source like table_source, array_vector_source,</span>
<span class="linenos">303</span><span class="c1">/// exec_batch_source, or record_batch_source (for in-memory data)</span>
<span class="linenos">304</span><span class="n">arrow</span><span class="o">::</span><span class="n">Status</span><span class="w"> </span><span class="nf">SourceSinkExample</span><span class="p">()</span><span class="w"> </span><span class="p">{</span>
<span class="linenos">305</span><span class="w"> </span><span class="n">ARROW_ASSIGN_OR_RAISE</span><span class="p">(</span><span class="k">auto</span><span class="w"> </span><span class="n">basic_data</span><span class="p">,</span><span class="w"> </span><span class="n">MakeBasicBatches</span><span class="p">());</span>
<span class="linenos">306</span>
<span class="linenos">307</span><span class="w"> </span><span class="k">auto</span><span class="w"> </span><span class="n">source_node_options</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">SourceNodeOptions</span><span class="p">{</span><span class="n">basic_data</span><span class="p">.</span><span class="n">schema</span><span class="p">,</span><span class="w"> </span><span class="n">basic_data</span><span class="p">.</span><span class="n">gen</span><span class="p">()};</span>
<span class="linenos">308</span>
<span class="linenos">309</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">Declaration</span><span class="w"> </span><span class="n">source</span><span class="p">{</span><span class="s">&quot;source&quot;</span><span class="p">,</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">move</span><span class="p">(</span><span class="n">source_node_options</span><span class="p">)};</span>
<span class="linenos">310</span>
<span class="linenos">311</span><span class="w"> </span><span class="k">return</span><span class="w"> </span><span class="n">ExecutePlanAndCollectAsTable</span><span class="p">(</span><span class="n">std</span><span class="o">::</span><span class="n">move</span><span class="p">(</span><span class="n">source</span><span class="p">));</span>
<span class="linenos">312</span><span class="p">}</span>
</pre></div>
</div>
</section>
<section id="table-source">
<h3><code class="docutils literal notranslate"><span class="pre">table_source</span></code><a class="headerlink" href="#table-source" title="Permalink to this heading">#</a></h3>
<p id="stream-execution-table-source-docs">In the previous example, <a class="reference internal" href="#stream-execution-source-docs"><span class="std std-ref">source node</span></a>, a source node
was used to input the data. But when developing an application, if the data is already in memory
as a table, it is much easier, and more performant to use <a class="reference internal" href="../api/acero.html#_CPPv4N5arrow5acero22TableSourceNodeOptionsE" title="arrow::acero::TableSourceNodeOptions"><code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">TableSourceNodeOptions</span></code></a>.
Here the input data can be passed as a <code class="docutils literal notranslate"><span class="pre">std::shared_ptr&lt;arrow::Table&gt;</span></code> along with a <code class="docutils literal notranslate"><span class="pre">max_batch_size</span></code>.
The <code class="docutils literal notranslate"><span class="pre">max_batch_size</span></code> is to break up large record batches so that they can be processed in parallel.
It is important to note that the table batches will not get merged to form larger batches when the source
table has a smaller batch size.</p>
<p>Example of using <code class="docutils literal notranslate"><span class="pre">table_source</span></code></p>
<div class="highlight-cpp notranslate"><div class="highlight"><pre><span></span><span class="linenos">317</span><span class="c1">/// \brief An example showing a table source node</span>
<span class="linenos">318</span><span class="c1">///</span>
<span class="linenos">319</span><span class="c1">/// TableSource-Table Example</span>
<span class="linenos">320</span><span class="c1">/// This example shows how a table_source can be used</span>
<span class="linenos">321</span><span class="c1">/// in an execution plan. This includes a table source node</span>
<span class="linenos">322</span><span class="c1">/// receiving data from a table. This plan simply collects the</span>
<span class="linenos">323</span><span class="c1">/// data back into a table but nodes could be added that modify</span>
<span class="linenos">324</span><span class="c1">/// or transform the data as well (as is shown in later examples)</span>
<span class="linenos">325</span><span class="n">arrow</span><span class="o">::</span><span class="n">Status</span><span class="w"> </span><span class="nf">TableSourceSinkExample</span><span class="p">()</span><span class="w"> </span><span class="p">{</span>
<span class="linenos">326</span><span class="w"> </span><span class="n">ARROW_ASSIGN_OR_RAISE</span><span class="p">(</span><span class="k">auto</span><span class="w"> </span><span class="n">table</span><span class="p">,</span><span class="w"> </span><span class="n">GetTable</span><span class="p">());</span>
<span class="linenos">327</span>
<span class="linenos">328</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">AsyncGenerator</span><span class="o">&lt;</span><span class="n">std</span><span class="o">::</span><span class="n">optional</span><span class="o">&lt;</span><span class="n">cp</span><span class="o">::</span><span class="n">ExecBatch</span><span class="o">&gt;&gt;</span><span class="w"> </span><span class="n">sink_gen</span><span class="p">;</span>
<span class="linenos">329</span><span class="w"> </span><span class="kt">int</span><span class="w"> </span><span class="n">max_batch_size</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="mi">2</span><span class="p">;</span>
<span class="linenos">330</span><span class="w"> </span><span class="k">auto</span><span class="w"> </span><span class="n">table_source_options</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">TableSourceNodeOptions</span><span class="p">{</span><span class="n">table</span><span class="p">,</span><span class="w"> </span><span class="n">max_batch_size</span><span class="p">};</span>
<span class="linenos">331</span>
<span class="linenos">332</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">Declaration</span><span class="w"> </span><span class="n">source</span><span class="p">{</span><span class="s">&quot;table_source&quot;</span><span class="p">,</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">move</span><span class="p">(</span><span class="n">table_source_options</span><span class="p">)};</span>
<span class="linenos">333</span>
<span class="linenos">334</span><span class="w"> </span><span class="k">return</span><span class="w"> </span><span class="n">ExecutePlanAndCollectAsTable</span><span class="p">(</span><span class="n">std</span><span class="o">::</span><span class="n">move</span><span class="p">(</span><span class="n">source</span><span class="p">));</span>
<span class="linenos">335</span><span class="p">}</span>
</pre></div>
</div>
</section>
<section id="filter">
<span id="stream-execution-filter-docs"></span><h3><code class="docutils literal notranslate"><span class="pre">filter</span></code><a class="headerlink" href="#filter" title="Permalink to this heading">#</a></h3>
<p><code class="docutils literal notranslate"><span class="pre">filter</span></code> operation, as the name suggests, provides an option to define data filtering
criteria. It selects rows where the given expression evaluates to true. Filters can be written using
<a class="reference internal" href="../api/compute.html#_CPPv4N5arrow7compute10ExpressionE" title="arrow::compute::Expression"><code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">arrow::compute::Expression</span></code></a>, and the expression should have a return type of boolean.
For example, if we wish to keep rows where the value
of column <code class="docutils literal notranslate"><span class="pre">b</span></code> is greater than 3, then we can use the following expression.</p>
<p>Filter example:</p>
<div class="highlight-cpp notranslate"><div class="highlight"><pre><span></span><span class="linenos">340</span><span class="c1">/// \brief An example showing a filter node</span>
<span class="linenos">341</span><span class="c1">///</span>
<span class="linenos">342</span><span class="c1">/// Source-Filter-Table</span>
<span class="linenos">343</span><span class="c1">/// This example shows how a filter can be used in an execution plan,</span>
<span class="linenos">344</span><span class="c1">/// to filter data from a source. The output from the execution plan</span>
<span class="linenos">345</span><span class="c1">/// is collected into a table.</span>
<span class="linenos">346</span><span class="n">arrow</span><span class="o">::</span><span class="n">Status</span><span class="w"> </span><span class="nf">ScanFilterSinkExample</span><span class="p">()</span><span class="w"> </span><span class="p">{</span>
<span class="linenos">347</span><span class="w"> </span><span class="n">ARROW_ASSIGN_OR_RAISE</span><span class="p">(</span><span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o">&lt;</span><span class="n">arrow</span><span class="o">::</span><span class="n">dataset</span><span class="o">::</span><span class="n">Dataset</span><span class="o">&gt;</span><span class="w"> </span><span class="n">dataset</span><span class="p">,</span><span class="w"> </span><span class="n">GetDataset</span><span class="p">());</span>
<span class="linenos">348</span>
<span class="linenos">349</span><span class="w"> </span><span class="k">auto</span><span class="w"> </span><span class="n">options</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">make_shared</span><span class="o">&lt;</span><span class="n">arrow</span><span class="o">::</span><span class="n">dataset</span><span class="o">::</span><span class="n">ScanOptions</span><span class="o">&gt;</span><span class="p">();</span>
<span class="linenos">350</span><span class="w"> </span><span class="c1">// specify the filter. This filter removes all rows where the</span>
<span class="linenos">351</span><span class="w"> </span><span class="c1">// value of the &quot;a&quot; column is greater than 3.</span>
<span class="linenos">352</span><span class="w"> </span><span class="n">cp</span><span class="o">::</span><span class="n">Expression</span><span class="w"> </span><span class="n">filter_expr</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">cp</span><span class="o">::</span><span class="n">greater</span><span class="p">(</span><span class="n">cp</span><span class="o">::</span><span class="n">field_ref</span><span class="p">(</span><span class="s">&quot;a&quot;</span><span class="p">),</span><span class="w"> </span><span class="n">cp</span><span class="o">::</span><span class="n">literal</span><span class="p">(</span><span class="mi">3</span><span class="p">));</span>
<span class="linenos">353</span><span class="w"> </span><span class="c1">// set filter for scanner : on-disk / push-down filtering.</span>
<span class="linenos">354</span><span class="w"> </span><span class="c1">// This step can be skipped if you are not reading from disk.</span>
<span class="linenos">355</span><span class="w"> </span><span class="n">options</span><span class="o">-&gt;</span><span class="n">filter</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">filter_expr</span><span class="p">;</span>
<span class="linenos">356</span><span class="w"> </span><span class="c1">// empty projection</span>
<span class="linenos">357</span><span class="w"> </span><span class="n">options</span><span class="o">-&gt;</span><span class="n">projection</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">cp</span><span class="o">::</span><span class="n">project</span><span class="p">({},</span><span class="w"> </span><span class="p">{});</span>
<span class="linenos">358</span>
<span class="linenos">359</span><span class="w"> </span><span class="c1">// construct the scan node</span>
<span class="linenos">360</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">cout</span><span class="w"> </span><span class="o">&lt;&lt;</span><span class="w"> </span><span class="s">&quot;Initialized Scanning Options&quot;</span><span class="w"> </span><span class="o">&lt;&lt;</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">endl</span><span class="p">;</span>
<span class="linenos">361</span>
<span class="linenos">362</span><span class="w"> </span><span class="k">auto</span><span class="w"> </span><span class="n">scan_node_options</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">dataset</span><span class="o">::</span><span class="n">ScanNodeOptions</span><span class="p">{</span><span class="n">dataset</span><span class="p">,</span><span class="w"> </span><span class="n">options</span><span class="p">};</span>
<span class="linenos">363</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">cout</span><span class="w"> </span><span class="o">&lt;&lt;</span><span class="w"> </span><span class="s">&quot;Scan node options created&quot;</span><span class="w"> </span><span class="o">&lt;&lt;</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">endl</span><span class="p">;</span>
<span class="linenos">364</span>
<span class="linenos">365</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">Declaration</span><span class="w"> </span><span class="n">scan</span><span class="p">{</span><span class="s">&quot;scan&quot;</span><span class="p">,</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">move</span><span class="p">(</span><span class="n">scan_node_options</span><span class="p">)};</span>
<span class="linenos">366</span>
<span class="linenos">367</span><span class="w"> </span><span class="c1">// pipe the scan node into the filter node</span>
<span class="linenos">368</span><span class="w"> </span><span class="c1">// Need to set the filter in scan node options and filter node options.</span>
<span class="linenos">369</span><span class="w"> </span><span class="c1">// At scan node it is used for on-disk / push-down filtering.</span>
<span class="linenos">370</span><span class="w"> </span><span class="c1">// At filter node it is used for in-memory filtering.</span>
<span class="linenos">371</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">Declaration</span><span class="w"> </span><span class="n">filter</span><span class="p">{</span>
<span class="linenos">372</span><span class="w"> </span><span class="s">&quot;filter&quot;</span><span class="p">,</span><span class="w"> </span><span class="p">{</span><span class="n">std</span><span class="o">::</span><span class="n">move</span><span class="p">(</span><span class="n">scan</span><span class="p">)},</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">FilterNodeOptions</span><span class="p">(</span><span class="n">std</span><span class="o">::</span><span class="n">move</span><span class="p">(</span><span class="n">filter_expr</span><span class="p">))};</span>
<span class="linenos">373</span>
<span class="linenos">374</span><span class="w"> </span><span class="k">return</span><span class="w"> </span><span class="n">ExecutePlanAndCollectAsTable</span><span class="p">(</span><span class="n">std</span><span class="o">::</span><span class="n">move</span><span class="p">(</span><span class="n">filter</span><span class="p">));</span>
<span class="linenos">375</span><span class="p">}</span>
</pre></div>
</div>
</section>
<section id="project">
<span id="stream-execution-project-docs"></span><h3><code class="docutils literal notranslate"><span class="pre">project</span></code><a class="headerlink" href="#project" title="Permalink to this heading">#</a></h3>
<p><code class="docutils literal notranslate"><span class="pre">project</span></code> operation rearranges, deletes, transforms, and creates columns.
Each output column is computed by evaluating an expression
against the source record batch. These must be scalar expressions
(expressions consisting of scalar literals, field references and scalar
functions, i.e. elementwise functions that return one value for each input
row independent of the value of all other rows).
This is exposed via <a class="reference internal" href="../api/acero.html#_CPPv4N5arrow5acero18ProjectNodeOptionsE" title="arrow::acero::ProjectNodeOptions"><code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">ProjectNodeOptions</span></code></a> which requires,
an <a class="reference internal" href="../api/compute.html#_CPPv4N5arrow7compute10ExpressionE" title="arrow::compute::Expression"><code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">arrow::compute::Expression</span></code></a> and name for each of the output columns (if names are not
provided, the string representations of exprs will be used).</p>
<p>Project example:</p>
<div class="highlight-cpp notranslate"><div class="highlight"><pre><span></span><span class="linenos">381</span><span class="c1">/// \brief An example showing a project node</span>
<span class="linenos">382</span><span class="c1">///</span>
<span class="linenos">383</span><span class="c1">/// Scan-Project-Table</span>
<span class="linenos">384</span><span class="c1">/// This example shows how a Scan operation can be used to load the data</span>
<span class="linenos">385</span><span class="c1">/// into the execution plan, how a project operation can be applied on the</span>
<span class="linenos">386</span><span class="c1">/// data stream and how the output is collected into a table</span>
<span class="linenos">387</span><span class="n">arrow</span><span class="o">::</span><span class="n">Status</span><span class="w"> </span><span class="nf">ScanProjectSinkExample</span><span class="p">()</span><span class="w"> </span><span class="p">{</span>
<span class="linenos">388</span><span class="w"> </span><span class="n">ARROW_ASSIGN_OR_RAISE</span><span class="p">(</span><span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o">&lt;</span><span class="n">arrow</span><span class="o">::</span><span class="n">dataset</span><span class="o">::</span><span class="n">Dataset</span><span class="o">&gt;</span><span class="w"> </span><span class="n">dataset</span><span class="p">,</span><span class="w"> </span><span class="n">GetDataset</span><span class="p">());</span>
<span class="linenos">389</span>
<span class="linenos">390</span><span class="w"> </span><span class="k">auto</span><span class="w"> </span><span class="n">options</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">make_shared</span><span class="o">&lt;</span><span class="n">arrow</span><span class="o">::</span><span class="n">dataset</span><span class="o">::</span><span class="n">ScanOptions</span><span class="o">&gt;</span><span class="p">();</span>
<span class="linenos">391</span><span class="w"> </span><span class="c1">// projection</span>
<span class="linenos">392</span><span class="w"> </span><span class="n">cp</span><span class="o">::</span><span class="n">Expression</span><span class="w"> </span><span class="n">a_times_2</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">cp</span><span class="o">::</span><span class="n">call</span><span class="p">(</span><span class="s">&quot;multiply&quot;</span><span class="p">,</span><span class="w"> </span><span class="p">{</span><span class="n">cp</span><span class="o">::</span><span class="n">field_ref</span><span class="p">(</span><span class="s">&quot;a&quot;</span><span class="p">),</span><span class="w"> </span><span class="n">cp</span><span class="o">::</span><span class="n">literal</span><span class="p">(</span><span class="mi">2</span><span class="p">)});</span>
<span class="linenos">393</span><span class="w"> </span><span class="n">options</span><span class="o">-&gt;</span><span class="n">projection</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">cp</span><span class="o">::</span><span class="n">project</span><span class="p">({},</span><span class="w"> </span><span class="p">{});</span>
<span class="linenos">394</span>
<span class="linenos">395</span><span class="w"> </span><span class="k">auto</span><span class="w"> </span><span class="n">scan_node_options</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">dataset</span><span class="o">::</span><span class="n">ScanNodeOptions</span><span class="p">{</span><span class="n">dataset</span><span class="p">,</span><span class="w"> </span><span class="n">options</span><span class="p">};</span>
<span class="linenos">396</span>
<span class="linenos">397</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">Declaration</span><span class="w"> </span><span class="n">scan</span><span class="p">{</span><span class="s">&quot;scan&quot;</span><span class="p">,</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">move</span><span class="p">(</span><span class="n">scan_node_options</span><span class="p">)};</span>
<span class="linenos">398</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">Declaration</span><span class="w"> </span><span class="n">project</span><span class="p">{</span>
<span class="linenos">399</span><span class="w"> </span><span class="s">&quot;project&quot;</span><span class="p">,</span><span class="w"> </span><span class="p">{</span><span class="n">std</span><span class="o">::</span><span class="n">move</span><span class="p">(</span><span class="n">scan</span><span class="p">)},</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">ProjectNodeOptions</span><span class="p">({</span><span class="n">a_times_2</span><span class="p">})};</span>
<span class="linenos">400</span>
<span class="linenos">401</span><span class="w"> </span><span class="k">return</span><span class="w"> </span><span class="n">ExecutePlanAndCollectAsTable</span><span class="p">(</span><span class="n">std</span><span class="o">::</span><span class="n">move</span><span class="p">(</span><span class="n">project</span><span class="p">));</span>
<span class="linenos">402</span><span class="p">}</span>
</pre></div>
</div>
</section>
<section id="aggregate">
<span id="stream-execution-aggregate-docs"></span><h3><code class="docutils literal notranslate"><span class="pre">aggregate</span></code><a class="headerlink" href="#aggregate" title="Permalink to this heading">#</a></h3>
<p>The <code class="docutils literal notranslate"><span class="pre">aggregate</span></code> node computes various types of aggregates over data.</p>
<p>Arrow supports two types of aggregates: “scalar” aggregates, and
“hash” aggregates. Scalar aggregates reduce an array or scalar input
to a single scalar output (e.g. computing the mean of a column). Hash
aggregates act like <code class="docutils literal notranslate"><span class="pre">GROUP</span> <span class="pre">BY</span></code> in SQL and first partition data based
on one or more key columns, then reduce the data in each
partition. The <code class="docutils literal notranslate"><span class="pre">aggregate</span></code> node supports both types of computation,
and can compute any number of aggregations at once.</p>
<p><a class="reference internal" href="../api/acero.html#_CPPv4N5arrow5acero20AggregateNodeOptionsE" title="arrow::acero::AggregateNodeOptions"><code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">AggregateNodeOptions</span></code></a> is used to define the
aggregation criteria. It takes a list of aggregation functions and
their options; a list of target fields to aggregate, one per function;
and a list of names for the output fields, one per function.
Optionally, it takes a list of columns that are used to partition the
data, in the case of a hash aggregation. The aggregation functions
can be selected from <a class="reference internal" href="../compute.html#aggregation-option-list"><span class="std std-ref">this list of aggregation functions</span></a>.</p>
<div class="admonition note">
<p class="admonition-title">Note</p>
<p>This node is a “pipeline breaker” and will fully materialize
the dataset in memory. In the future, spillover mechanisms
will be added which should alleviate this constraint.</p>
</div>
<p>The aggregation can provide results as a group or scalar. For instances,
an operation like <cite>hash_count</cite> provides the counts per each unique record
as a grouped result while an operation like <cite>sum</cite> provides a single record.</p>
<p>Scalar Aggregation example:</p>
<div class="highlight-cpp notranslate"><div class="highlight"><pre><span></span><span class="linenos">430</span><span class="c1">/// \brief An example showing an aggregation node to aggregate an entire table</span>
<span class="linenos">431</span><span class="c1">///</span>
<span class="linenos">432</span><span class="c1">/// Source-Aggregation-Table</span>
<span class="linenos">433</span><span class="c1">/// This example shows how an aggregation operation can be applied on a</span>
<span class="linenos">434</span><span class="c1">/// execution plan resulting in a scalar output. The source node loads the</span>
<span class="linenos">435</span><span class="c1">/// data and the aggregation (counting unique types in column &#39;a&#39;)</span>
<span class="linenos">436</span><span class="c1">/// is applied on this data. The output is collected into a table (that will</span>
<span class="linenos">437</span><span class="c1">/// have exactly one row)</span>
<span class="linenos">438</span><span class="n">arrow</span><span class="o">::</span><span class="n">Status</span><span class="w"> </span><span class="nf">SourceScalarAggregateSinkExample</span><span class="p">()</span><span class="w"> </span><span class="p">{</span>
<span class="linenos">439</span><span class="w"> </span><span class="n">ARROW_ASSIGN_OR_RAISE</span><span class="p">(</span><span class="k">auto</span><span class="w"> </span><span class="n">basic_data</span><span class="p">,</span><span class="w"> </span><span class="n">MakeBasicBatches</span><span class="p">());</span>
<span class="linenos">440</span>
<span class="linenos">441</span><span class="w"> </span><span class="k">auto</span><span class="w"> </span><span class="n">source_node_options</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">SourceNodeOptions</span><span class="p">{</span><span class="n">basic_data</span><span class="p">.</span><span class="n">schema</span><span class="p">,</span><span class="w"> </span><span class="n">basic_data</span><span class="p">.</span><span class="n">gen</span><span class="p">()};</span>
<span class="linenos">442</span>
<span class="linenos">443</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">Declaration</span><span class="w"> </span><span class="n">source</span><span class="p">{</span><span class="s">&quot;source&quot;</span><span class="p">,</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">move</span><span class="p">(</span><span class="n">source_node_options</span><span class="p">)};</span>
<span class="linenos">444</span><span class="w"> </span><span class="k">auto</span><span class="w"> </span><span class="n">aggregate_options</span><span class="w"> </span><span class="o">=</span>
<span class="linenos">445</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">AggregateNodeOptions</span><span class="p">{</span><span class="cm">/*aggregates=*/</span><span class="p">{{</span><span class="s">&quot;sum&quot;</span><span class="p">,</span><span class="w"> </span><span class="k">nullptr</span><span class="p">,</span><span class="w"> </span><span class="s">&quot;a&quot;</span><span class="p">,</span><span class="w"> </span><span class="s">&quot;sum(a)&quot;</span><span class="p">}}};</span>
<span class="linenos">446</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">Declaration</span><span class="w"> </span><span class="n">aggregate</span><span class="p">{</span>
<span class="linenos">447</span><span class="w"> </span><span class="s">&quot;aggregate&quot;</span><span class="p">,</span><span class="w"> </span><span class="p">{</span><span class="n">std</span><span class="o">::</span><span class="n">move</span><span class="p">(</span><span class="n">source</span><span class="p">)},</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">move</span><span class="p">(</span><span class="n">aggregate_options</span><span class="p">)};</span>
<span class="linenos">448</span>
<span class="linenos">449</span><span class="w"> </span><span class="k">return</span><span class="w"> </span><span class="n">ExecutePlanAndCollectAsTable</span><span class="p">(</span><span class="n">std</span><span class="o">::</span><span class="n">move</span><span class="p">(</span><span class="n">aggregate</span><span class="p">));</span>
<span class="linenos">450</span><span class="p">}</span>
</pre></div>
</div>
<p>Group Aggregation example:</p>
<div class="highlight-cpp notranslate"><div class="highlight"><pre><span></span><span class="linenos">455</span><span class="c1">/// \brief An example showing an aggregation node to perform a group-by operation</span>
<span class="linenos">456</span><span class="c1">///</span>
<span class="linenos">457</span><span class="c1">/// Source-Aggregation-Table</span>
<span class="linenos">458</span><span class="c1">/// This example shows how an aggregation operation can be applied on a</span>
<span class="linenos">459</span><span class="c1">/// execution plan resulting in grouped output. The source node loads the</span>
<span class="linenos">460</span><span class="c1">/// data and the aggregation (counting unique types in column &#39;a&#39;) is</span>
<span class="linenos">461</span><span class="c1">/// applied on this data. The output is collected into a table that will contain</span>
<span class="linenos">462</span><span class="c1">/// one row for each unique combination of group keys.</span>
<span class="linenos">463</span><span class="n">arrow</span><span class="o">::</span><span class="n">Status</span><span class="w"> </span><span class="nf">SourceGroupAggregateSinkExample</span><span class="p">()</span><span class="w"> </span><span class="p">{</span>
<span class="linenos">464</span><span class="w"> </span><span class="n">ARROW_ASSIGN_OR_RAISE</span><span class="p">(</span><span class="k">auto</span><span class="w"> </span><span class="n">basic_data</span><span class="p">,</span><span class="w"> </span><span class="n">MakeBasicBatches</span><span class="p">());</span>
<span class="linenos">465</span>
<span class="linenos">466</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">AsyncGenerator</span><span class="o">&lt;</span><span class="n">std</span><span class="o">::</span><span class="n">optional</span><span class="o">&lt;</span><span class="n">cp</span><span class="o">::</span><span class="n">ExecBatch</span><span class="o">&gt;&gt;</span><span class="w"> </span><span class="n">sink_gen</span><span class="p">;</span>
<span class="linenos">467</span>
<span class="linenos">468</span><span class="w"> </span><span class="k">auto</span><span class="w"> </span><span class="n">source_node_options</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">SourceNodeOptions</span><span class="p">{</span><span class="n">basic_data</span><span class="p">.</span><span class="n">schema</span><span class="p">,</span><span class="w"> </span><span class="n">basic_data</span><span class="p">.</span><span class="n">gen</span><span class="p">()};</span>
<span class="linenos">469</span>
<span class="linenos">470</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">Declaration</span><span class="w"> </span><span class="n">source</span><span class="p">{</span><span class="s">&quot;source&quot;</span><span class="p">,</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">move</span><span class="p">(</span><span class="n">source_node_options</span><span class="p">)};</span>
<span class="linenos">471</span><span class="w"> </span><span class="k">auto</span><span class="w"> </span><span class="n">options</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">make_shared</span><span class="o">&lt;</span><span class="n">cp</span><span class="o">::</span><span class="n">CountOptions</span><span class="o">&gt;</span><span class="p">(</span><span class="n">cp</span><span class="o">::</span><span class="n">CountOptions</span><span class="o">::</span><span class="n">ONLY_VALID</span><span class="p">);</span>
<span class="linenos">472</span><span class="w"> </span><span class="k">auto</span><span class="w"> </span><span class="n">aggregate_options</span><span class="w"> </span><span class="o">=</span>
<span class="linenos">473</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">AggregateNodeOptions</span><span class="p">{</span><span class="cm">/*aggregates=*/</span><span class="p">{{</span><span class="s">&quot;hash_count&quot;</span><span class="p">,</span><span class="w"> </span><span class="n">options</span><span class="p">,</span><span class="w"> </span><span class="s">&quot;a&quot;</span><span class="p">,</span><span class="w"> </span><span class="s">&quot;count(a)&quot;</span><span class="p">}},</span>
<span class="linenos">474</span><span class="w"> </span><span class="cm">/*keys=*/</span><span class="p">{</span><span class="s">&quot;b&quot;</span><span class="p">}};</span>
<span class="linenos">475</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">Declaration</span><span class="w"> </span><span class="n">aggregate</span><span class="p">{</span>
<span class="linenos">476</span><span class="w"> </span><span class="s">&quot;aggregate&quot;</span><span class="p">,</span><span class="w"> </span><span class="p">{</span><span class="n">std</span><span class="o">::</span><span class="n">move</span><span class="p">(</span><span class="n">source</span><span class="p">)},</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">move</span><span class="p">(</span><span class="n">aggregate_options</span><span class="p">)};</span>
<span class="linenos">477</span>
<span class="linenos">478</span><span class="w"> </span><span class="k">return</span><span class="w"> </span><span class="n">ExecutePlanAndCollectAsTable</span><span class="p">(</span><span class="n">std</span><span class="o">::</span><span class="n">move</span><span class="p">(</span><span class="n">aggregate</span><span class="p">));</span>
<span class="linenos">479</span><span class="p">}</span>
</pre></div>
</div>
</section>
<section id="sink">
<span id="stream-execution-sink-docs"></span><h3><code class="docutils literal notranslate"><span class="pre">sink</span></code><a class="headerlink" href="#sink" title="Permalink to this heading">#</a></h3>
<p><code class="docutils literal notranslate"><span class="pre">sink</span></code> operation provides output and is the final node of a streaming
execution definition. <a class="reference internal" href="../api/acero.html#_CPPv4N5arrow5acero15SinkNodeOptionsE" title="arrow::acero::SinkNodeOptions"><code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">SinkNodeOptions</span></code></a> interface is used to pass
the required options. Similar to the source operator the sink operator exposes the output
with a function that returns a record batch future each time it is called. It is expected the
caller will repeatedly call this function until the generator function is exhausted (returns
<code class="docutils literal notranslate"><span class="pre">std::optional::nullopt</span></code>). If this function is not called often enough then record batches
will accumulate in memory. An execution plan should only have one
“terminal” node (one sink node). An <a class="reference internal" href="../api/acero.html#_CPPv4N5arrow5acero8ExecPlanE" title="arrow::acero::ExecPlan"><code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">ExecPlan</span></code></a> can terminate early due to cancellation or
an error, before the output is fully consumed. However, the plan can be safely destroyed independently
of the sink, which will hold the unconsumed batches by <cite>exec_plan-&gt;finished()</cite>.</p>
<p>As a part of the Source Example, the Sink operation is also included;</p>
<div class="highlight-cpp notranslate"><div class="highlight"><pre><span></span><span class="linenos">294</span><span class="c1">/// \brief An example demonstrating a source and sink node</span>
<span class="linenos">295</span><span class="c1">///</span>
<span class="linenos">296</span><span class="c1">/// Source-Table Example</span>
<span class="linenos">297</span><span class="c1">/// This example shows how a custom source can be used</span>
<span class="linenos">298</span><span class="c1">/// in an execution plan. This includes source node using pregenerated</span>
<span class="linenos">299</span><span class="c1">/// data and collecting it into a table.</span>
<span class="linenos">300</span><span class="c1">///</span>
<span class="linenos">301</span><span class="c1">/// This sort of custom source is often not needed. In most cases you can</span>
<span class="linenos">302</span><span class="c1">/// use a scan (for a dataset source) or a source like table_source, array_vector_source,</span>
<span class="linenos">303</span><span class="c1">/// exec_batch_source, or record_batch_source (for in-memory data)</span>
<span class="linenos">304</span><span class="n">arrow</span><span class="o">::</span><span class="n">Status</span><span class="w"> </span><span class="nf">SourceSinkExample</span><span class="p">()</span><span class="w"> </span><span class="p">{</span>
<span class="linenos">305</span><span class="w"> </span><span class="n">ARROW_ASSIGN_OR_RAISE</span><span class="p">(</span><span class="k">auto</span><span class="w"> </span><span class="n">basic_data</span><span class="p">,</span><span class="w"> </span><span class="n">MakeBasicBatches</span><span class="p">());</span>
<span class="linenos">306</span>
<span class="linenos">307</span><span class="w"> </span><span class="k">auto</span><span class="w"> </span><span class="n">source_node_options</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">SourceNodeOptions</span><span class="p">{</span><span class="n">basic_data</span><span class="p">.</span><span class="n">schema</span><span class="p">,</span><span class="w"> </span><span class="n">basic_data</span><span class="p">.</span><span class="n">gen</span><span class="p">()};</span>
<span class="linenos">308</span>
<span class="linenos">309</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">Declaration</span><span class="w"> </span><span class="n">source</span><span class="p">{</span><span class="s">&quot;source&quot;</span><span class="p">,</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">move</span><span class="p">(</span><span class="n">source_node_options</span><span class="p">)};</span>
<span class="linenos">310</span>
<span class="linenos">311</span><span class="w"> </span><span class="k">return</span><span class="w"> </span><span class="n">ExecutePlanAndCollectAsTable</span><span class="p">(</span><span class="n">std</span><span class="o">::</span><span class="n">move</span><span class="p">(</span><span class="n">source</span><span class="p">));</span>
<span class="linenos">312</span><span class="p">}</span>
</pre></div>
</div>
</section>
<section id="consuming-sink">
<span id="stream-execution-consuming-sink-docs"></span><h3><code class="docutils literal notranslate"><span class="pre">consuming_sink</span></code><a class="headerlink" href="#consuming-sink" title="Permalink to this heading">#</a></h3>
<p><code class="docutils literal notranslate"><span class="pre">consuming_sink</span></code> operator is a sink operation containing consuming operation within the
execution plan (i.e. the exec plan should not complete until the consumption has completed).
Unlike the <code class="docutils literal notranslate"><span class="pre">sink</span></code> node this node takes in a callback function that is expected to consume the
batch. Once this callback has finished the execution plan will no longer hold any reference to
the batch.
The consuming function may be called before a previous invocation has completed. If the consuming
function does not run quickly enough then many concurrent executions could pile up, blocking the
CPU thread pool. The execution plan will not be marked finished until all consuming function callbacks
have been completed.
Once all batches have been delivered the execution plan will wait for the <cite>finish</cite> future to complete
before marking the execution plan finished. This allows for workflows where the consumption function
converts batches into async tasks (this is currently done internally for the dataset write node).</p>
<p>Example:</p>
<div class="highlight-cpp notranslate"><div class="highlight"><pre><span></span><span class="c1">// define a Custom SinkNodeConsumer</span>
<span class="n">std</span><span class="o">::</span><span class="n">atomic</span><span class="o">&lt;</span><span class="kt">uint32_t</span><span class="o">&gt;</span><span class="w"> </span><span class="n">batches_seen</span><span class="p">{</span><span class="mi">0</span><span class="p">};</span>
<span class="n">arrow</span><span class="o">::</span><span class="n">Future</span><span class="o">&lt;&gt;</span><span class="w"> </span><span class="n">finish</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">Future</span><span class="o">&lt;&gt;::</span><span class="n">Make</span><span class="p">();</span>
<span class="k">struct</span><span class="w"> </span><span class="nc">CustomSinkNodeConsumer</span><span class="w"> </span><span class="o">:</span><span class="w"> </span><span class="k">public</span><span class="w"> </span><span class="n">cp</span><span class="o">::</span><span class="n">SinkNodeConsumer</span><span class="w"> </span><span class="p">{</span>
<span class="w"> </span><span class="n">CustomSinkNodeConsumer</span><span class="p">(</span><span class="n">std</span><span class="o">::</span><span class="n">atomic</span><span class="o">&lt;</span><span class="kt">uint32_t</span><span class="o">&gt;</span><span class="w"> </span><span class="o">*</span><span class="n">batches_seen</span><span class="p">,</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">Future</span><span class="o">&lt;&gt;</span><span class="n">finish</span><span class="p">)</span><span class="o">:</span>
<span class="w"> </span><span class="n">batches_seen</span><span class="p">(</span><span class="n">batches_seen</span><span class="p">),</span><span class="w"> </span><span class="n">finish</span><span class="p">(</span><span class="n">std</span><span class="o">::</span><span class="n">move</span><span class="p">(</span><span class="n">finish</span><span class="p">))</span><span class="w"> </span><span class="p">{}</span>
<span class="w"> </span><span class="c1">// Consumption logic can be written here</span>
<span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">Status</span><span class="w"> </span><span class="n">Consume</span><span class="p">(</span><span class="n">cp</span><span class="o">::</span><span class="n">ExecBatch</span><span class="w"> </span><span class="n">batch</span><span class="p">)</span><span class="w"> </span><span class="k">override</span><span class="w"> </span><span class="p">{</span>
<span class="w"> </span><span class="c1">// data can be consumed in the expected way</span>
<span class="w"> </span><span class="c1">// transfer to another system or just do some work</span>
<span class="w"> </span><span class="c1">// and write to disk</span>
<span class="w"> </span><span class="p">(</span><span class="o">*</span><span class="n">batches_seen</span><span class="p">)</span><span class="o">++</span><span class="p">;</span>
<span class="w"> </span><span class="k">return</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">Status</span><span class="o">::</span><span class="n">OK</span><span class="p">();</span>
<span class="w"> </span><span class="p">}</span>
<span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">Future</span><span class="o">&lt;&gt;</span><span class="w"> </span><span class="n">Finish</span><span class="p">()</span><span class="w"> </span><span class="k">override</span><span class="w"> </span><span class="p">{</span><span class="w"> </span><span class="k">return</span><span class="w"> </span><span class="n">finish</span><span class="p">;</span><span class="w"> </span><span class="p">}</span>
<span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">atomic</span><span class="o">&lt;</span><span class="kt">uint32_t</span><span class="o">&gt;</span><span class="w"> </span><span class="o">*</span><span class="n">batches_seen</span><span class="p">;</span>
<span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">Future</span><span class="o">&lt;&gt;</span><span class="w"> </span><span class="n">finish</span><span class="p">;</span>
<span class="p">};</span>
<span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o">&lt;</span><span class="n">CustomSinkNodeConsumer</span><span class="o">&gt;</span><span class="w"> </span><span class="n">consumer</span><span class="w"> </span><span class="o">=</span>
<span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">make_shared</span><span class="o">&lt;</span><span class="n">CustomSinkNodeConsumer</span><span class="o">&gt;</span><span class="p">(</span><span class="o">&amp;</span><span class="n">batches_seen</span><span class="p">,</span><span class="w"> </span><span class="n">finish</span><span class="p">);</span>
<span class="n">arrow</span><span class="o">::</span><span class="n">acero</span><span class="o">::</span><span class="n">ExecNode</span><span class="w"> </span><span class="o">*</span><span class="n">consuming_sink</span><span class="p">;</span>
<span class="n">ARROW_ASSIGN_OR_RAISE</span><span class="p">(</span><span class="n">consuming_sink</span><span class="p">,</span><span class="w"> </span><span class="n">MakeExecNode</span><span class="p">(</span><span class="s">&quot;consuming_sink&quot;</span><span class="p">,</span><span class="w"> </span><span class="n">plan</span><span class="p">.</span><span class="n">get</span><span class="p">(),</span>
<span class="w"> </span><span class="p">{</span><span class="n">source</span><span class="p">},</span><span class="w"> </span><span class="n">cp</span><span class="o">::</span><span class="n">ConsumingSinkNodeOptions</span><span class="p">(</span><span class="n">consumer</span><span class="p">)));</span>
</pre></div>
</div>
<p>Consuming-Sink example:</p>
<div class="highlight-cpp notranslate"><div class="highlight"><pre><span></span><span class="linenos">484</span><span class="c1">/// \brief An example showing a consuming sink node</span>
<span class="linenos">485</span><span class="c1">///</span>
<span class="linenos">486</span><span class="c1">/// Source-Consuming-Sink</span>
<span class="linenos">487</span><span class="c1">/// This example shows how the data can be consumed within the execution plan</span>
<span class="linenos">488</span><span class="c1">/// by using a ConsumingSink node. There is no data output from this execution plan.</span>
<span class="linenos">489</span><span class="n">arrow</span><span class="o">::</span><span class="n">Status</span><span class="w"> </span><span class="nf">SourceConsumingSinkExample</span><span class="p">()</span><span class="w"> </span><span class="p">{</span>
<span class="linenos">490</span><span class="w"> </span><span class="n">ARROW_ASSIGN_OR_RAISE</span><span class="p">(</span><span class="k">auto</span><span class="w"> </span><span class="n">basic_data</span><span class="p">,</span><span class="w"> </span><span class="n">MakeBasicBatches</span><span class="p">());</span>
<span class="linenos">491</span>
<span class="linenos">492</span><span class="w"> </span><span class="k">auto</span><span class="w"> </span><span class="n">source_node_options</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">SourceNodeOptions</span><span class="p">{</span><span class="n">basic_data</span><span class="p">.</span><span class="n">schema</span><span class="p">,</span><span class="w"> </span><span class="n">basic_data</span><span class="p">.</span><span class="n">gen</span><span class="p">()};</span>
<span class="linenos">493</span>
<span class="linenos">494</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">Declaration</span><span class="w"> </span><span class="n">source</span><span class="p">{</span><span class="s">&quot;source&quot;</span><span class="p">,</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">move</span><span class="p">(</span><span class="n">source_node_options</span><span class="p">)};</span>
<span class="linenos">495</span>
<span class="linenos">496</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">atomic</span><span class="o">&lt;</span><span class="kt">uint32_t</span><span class="o">&gt;</span><span class="w"> </span><span class="n">batches_seen</span><span class="p">{</span><span class="mi">0</span><span class="p">};</span>
<span class="linenos">497</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">Future</span><span class="o">&lt;&gt;</span><span class="w"> </span><span class="n">finish</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">Future</span><span class="o">&lt;&gt;::</span><span class="n">Make</span><span class="p">();</span>
<span class="linenos">498</span><span class="w"> </span><span class="k">struct</span><span class="w"> </span><span class="nc">CustomSinkNodeConsumer</span><span class="w"> </span><span class="o">:</span><span class="w"> </span><span class="k">public</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">SinkNodeConsumer</span><span class="w"> </span><span class="p">{</span>
<span class="linenos">499</span><span class="w"> </span><span class="n">CustomSinkNodeConsumer</span><span class="p">(</span><span class="n">std</span><span class="o">::</span><span class="n">atomic</span><span class="o">&lt;</span><span class="kt">uint32_t</span><span class="o">&gt;*</span><span class="w"> </span><span class="n">batches_seen</span><span class="p">,</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">Future</span><span class="o">&lt;&gt;</span><span class="w"> </span><span class="n">finish</span><span class="p">)</span>
<span class="linenos">500</span><span class="w"> </span><span class="o">:</span><span class="w"> </span><span class="n">batches_seen</span><span class="p">(</span><span class="n">batches_seen</span><span class="p">),</span><span class="w"> </span><span class="n">finish</span><span class="p">(</span><span class="n">std</span><span class="o">::</span><span class="n">move</span><span class="p">(</span><span class="n">finish</span><span class="p">))</span><span class="w"> </span><span class="p">{}</span>
<span class="linenos">501</span>
<span class="linenos">502</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">Status</span><span class="w"> </span><span class="n">Init</span><span class="p">(</span><span class="k">const</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o">&lt;</span><span class="n">arrow</span><span class="o">::</span><span class="n">Schema</span><span class="o">&gt;&amp;</span><span class="w"> </span><span class="n">schema</span><span class="p">,</span>
<span class="linenos">503</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">BackpressureControl</span><span class="o">*</span><span class="w"> </span><span class="n">backpressure_control</span><span class="p">,</span>
<span class="linenos">504</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">ExecPlan</span><span class="o">*</span><span class="w"> </span><span class="n">plan</span><span class="p">)</span><span class="w"> </span><span class="k">override</span><span class="w"> </span><span class="p">{</span>
<span class="linenos">505</span><span class="w"> </span><span class="c1">// This will be called as the plan is started (before the first call to Consume)</span>
<span class="linenos">506</span><span class="w"> </span><span class="c1">// and provides the schema of the data coming into the node, controls for pausing /</span>
<span class="linenos">507</span><span class="w"> </span><span class="c1">// resuming input, and a pointer to the plan itself which can be used to access</span>
<span class="linenos">508</span><span class="w"> </span><span class="c1">// other utilities such as the thread indexer or async task scheduler.</span>
<span class="linenos">509</span><span class="w"> </span><span class="k">return</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">Status</span><span class="o">::</span><span class="n">OK</span><span class="p">();</span>
<span class="linenos">510</span><span class="w"> </span><span class="p">}</span>
<span class="linenos">511</span>
<span class="linenos">512</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">Status</span><span class="w"> </span><span class="n">Consume</span><span class="p">(</span><span class="n">cp</span><span class="o">::</span><span class="n">ExecBatch</span><span class="w"> </span><span class="n">batch</span><span class="p">)</span><span class="w"> </span><span class="k">override</span><span class="w"> </span><span class="p">{</span>
<span class="linenos">513</span><span class="w"> </span><span class="p">(</span><span class="o">*</span><span class="n">batches_seen</span><span class="p">)</span><span class="o">++</span><span class="p">;</span>
<span class="linenos">514</span><span class="w"> </span><span class="k">return</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">Status</span><span class="o">::</span><span class="n">OK</span><span class="p">();</span>
<span class="linenos">515</span><span class="w"> </span><span class="p">}</span>
<span class="linenos">516</span>
<span class="linenos">517</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">Future</span><span class="o">&lt;&gt;</span><span class="w"> </span><span class="n">Finish</span><span class="p">()</span><span class="w"> </span><span class="k">override</span><span class="w"> </span><span class="p">{</span>
<span class="linenos">518</span><span class="w"> </span><span class="c1">// Here you can perform whatever (possibly async) cleanup is needed, e.g. closing</span>
<span class="linenos">519</span><span class="w"> </span><span class="c1">// output file handles and flushing remaining work</span>
<span class="linenos">520</span><span class="w"> </span><span class="k">return</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">Future</span><span class="o">&lt;&gt;::</span><span class="n">MakeFinished</span><span class="p">();</span>
<span class="linenos">521</span><span class="w"> </span><span class="p">}</span>
<span class="linenos">522</span>
<span class="linenos">523</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">atomic</span><span class="o">&lt;</span><span class="kt">uint32_t</span><span class="o">&gt;*</span><span class="w"> </span><span class="n">batches_seen</span><span class="p">;</span>
<span class="linenos">524</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">Future</span><span class="o">&lt;&gt;</span><span class="w"> </span><span class="n">finish</span><span class="p">;</span>
<span class="linenos">525</span><span class="w"> </span><span class="p">};</span>
<span class="linenos">526</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o">&lt;</span><span class="n">CustomSinkNodeConsumer</span><span class="o">&gt;</span><span class="w"> </span><span class="n">consumer</span><span class="w"> </span><span class="o">=</span>
<span class="linenos">527</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">make_shared</span><span class="o">&lt;</span><span class="n">CustomSinkNodeConsumer</span><span class="o">&gt;</span><span class="p">(</span><span class="o">&amp;</span><span class="n">batches_seen</span><span class="p">,</span><span class="w"> </span><span class="n">finish</span><span class="p">);</span>
<span class="linenos">528</span>
<span class="linenos">529</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">Declaration</span><span class="w"> </span><span class="n">consuming_sink</span><span class="p">{</span><span class="s">&quot;consuming_sink&quot;</span><span class="p">,</span>
<span class="linenos">530</span><span class="w"> </span><span class="p">{</span><span class="n">std</span><span class="o">::</span><span class="n">move</span><span class="p">(</span><span class="n">source</span><span class="p">)},</span>
<span class="linenos">531</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">ConsumingSinkNodeOptions</span><span class="p">(</span><span class="n">std</span><span class="o">::</span><span class="n">move</span><span class="p">(</span><span class="n">consumer</span><span class="p">))};</span>
<span class="linenos">532</span>
<span class="linenos">533</span><span class="w"> </span><span class="c1">// Since we are consuming the data within the plan there is no output and we simply</span>
<span class="linenos">534</span><span class="w"> </span><span class="c1">// run the plan to completion instead of collecting into a table.</span>
<span class="linenos">535</span><span class="w"> </span><span class="n">ARROW_RETURN_NOT_OK</span><span class="p">(</span><span class="n">ac</span><span class="o">::</span><span class="n">DeclarationToStatus</span><span class="p">(</span><span class="n">std</span><span class="o">::</span><span class="n">move</span><span class="p">(</span><span class="n">consuming_sink</span><span class="p">)));</span>
<span class="linenos">536</span>
<span class="linenos">537</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">cout</span><span class="w"> </span><span class="o">&lt;&lt;</span><span class="w"> </span><span class="s">&quot;The consuming sink node saw &quot;</span><span class="w"> </span><span class="o">&lt;&lt;</span><span class="w"> </span><span class="n">batches_seen</span><span class="p">.</span><span class="n">load</span><span class="p">()</span><span class="w"> </span><span class="o">&lt;&lt;</span><span class="w"> </span><span class="s">&quot; batches&quot;</span>
<span class="linenos">538</span><span class="w"> </span><span class="o">&lt;&lt;</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">endl</span><span class="p">;</span>
<span class="linenos">539</span><span class="w"> </span><span class="k">return</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">Status</span><span class="o">::</span><span class="n">OK</span><span class="p">();</span>
<span class="linenos">540</span><span class="p">}</span>
</pre></div>
</div>
</section>
<section id="order-by-sink">
<span id="stream-execution-order-by-sink-docs"></span><h3><code class="docutils literal notranslate"><span class="pre">order_by_sink</span></code><a class="headerlink" href="#order-by-sink" title="Permalink to this heading">#</a></h3>
<p><code class="docutils literal notranslate"><span class="pre">order_by_sink</span></code> operation is an extension to the <code class="docutils literal notranslate"><span class="pre">sink</span></code> operation.
This operation provides the ability to guarantee the ordering of the
stream by providing the <a class="reference internal" href="../api/acero.html#_CPPv4N5arrow5acero22OrderBySinkNodeOptionsE" title="arrow::acero::OrderBySinkNodeOptions"><code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">OrderBySinkNodeOptions</span></code></a>.
Here the <a class="reference internal" href="../api/compute.html#_CPPv4N5arrow7compute11SortOptionsE" title="arrow::compute::SortOptions"><code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">arrow::compute::SortOptions</span></code></a> are provided to define which columns
are used for sorting and whether to sort by ascending or descending values.</p>
<div class="admonition note">
<p class="admonition-title">Note</p>
<p>This node is a “pipeline breaker” and will fully materialize the dataset in memory.
In the future, spillover mechanisms will be added which should alleviate this
constraint.</p>
</div>
<p>Order-By-Sink example:</p>
<div class="highlight-cpp notranslate"><div class="highlight"><pre><span></span><span class="linenos">545</span><span class="n">arrow</span><span class="o">::</span><span class="n">Status</span><span class="w"> </span><span class="nf">ExecutePlanAndCollectAsTableWithCustomSink</span><span class="p">(</span>
<span class="linenos">546</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o">&lt;</span><span class="n">ac</span><span class="o">::</span><span class="n">ExecPlan</span><span class="o">&gt;</span><span class="w"> </span><span class="n">plan</span><span class="p">,</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o">&lt;</span><span class="n">arrow</span><span class="o">::</span><span class="n">Schema</span><span class="o">&gt;</span><span class="w"> </span><span class="n">schema</span><span class="p">,</span>
<span class="linenos">547</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">AsyncGenerator</span><span class="o">&lt;</span><span class="n">std</span><span class="o">::</span><span class="n">optional</span><span class="o">&lt;</span><span class="n">cp</span><span class="o">::</span><span class="n">ExecBatch</span><span class="o">&gt;&gt;</span><span class="w"> </span><span class="n">sink_gen</span><span class="p">)</span><span class="w"> </span><span class="p">{</span>
<span class="linenos">548</span><span class="w"> </span><span class="c1">// translate sink_gen (async) to sink_reader (sync)</span>
<span class="linenos">549</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o">&lt;</span><span class="n">arrow</span><span class="o">::</span><span class="n">RecordBatchReader</span><span class="o">&gt;</span><span class="w"> </span><span class="n">sink_reader</span><span class="w"> </span><span class="o">=</span>
<span class="linenos">550</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">MakeGeneratorReader</span><span class="p">(</span><span class="n">schema</span><span class="p">,</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">move</span><span class="p">(</span><span class="n">sink_gen</span><span class="p">),</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">default_memory_pool</span><span class="p">());</span>
<span class="linenos">551</span>
<span class="linenos">552</span><span class="w"> </span><span class="c1">// validate the ExecPlan</span>
<span class="linenos">553</span><span class="w"> </span><span class="n">ARROW_RETURN_NOT_OK</span><span class="p">(</span><span class="n">plan</span><span class="o">-&gt;</span><span class="n">Validate</span><span class="p">());</span>
<span class="linenos">554</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">cout</span><span class="w"> </span><span class="o">&lt;&lt;</span><span class="w"> </span><span class="s">&quot;ExecPlan created : &quot;</span><span class="w"> </span><span class="o">&lt;&lt;</span><span class="w"> </span><span class="n">plan</span><span class="o">-&gt;</span><span class="n">ToString</span><span class="p">()</span><span class="w"> </span><span class="o">&lt;&lt;</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">endl</span><span class="p">;</span>
<span class="linenos">555</span><span class="w"> </span><span class="c1">// start the ExecPlan</span>
<span class="linenos">556</span><span class="w"> </span><span class="n">plan</span><span class="o">-&gt;</span><span class="n">StartProducing</span><span class="p">();</span>
<span class="linenos">557</span>
<span class="linenos">558</span><span class="w"> </span><span class="c1">// collect sink_reader into a Table</span>
<span class="linenos">559</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o">&lt;</span><span class="n">arrow</span><span class="o">::</span><span class="n">Table</span><span class="o">&gt;</span><span class="w"> </span><span class="n">response_table</span><span class="p">;</span>
<span class="linenos">560</span>
<span class="linenos">561</span><span class="w"> </span><span class="n">ARROW_ASSIGN_OR_RAISE</span><span class="p">(</span><span class="n">response_table</span><span class="p">,</span>
<span class="linenos">562</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">Table</span><span class="o">::</span><span class="n">FromRecordBatchReader</span><span class="p">(</span><span class="n">sink_reader</span><span class="p">.</span><span class="n">get</span><span class="p">()));</span>
<span class="linenos">563</span>
<span class="linenos">564</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">cout</span><span class="w"> </span><span class="o">&lt;&lt;</span><span class="w"> </span><span class="s">&quot;Results : &quot;</span><span class="w"> </span><span class="o">&lt;&lt;</span><span class="w"> </span><span class="n">response_table</span><span class="o">-&gt;</span><span class="n">ToString</span><span class="p">()</span><span class="w"> </span><span class="o">&lt;&lt;</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">endl</span><span class="p">;</span>
<span class="linenos">565</span>
<span class="linenos">566</span><span class="w"> </span><span class="c1">// stop producing</span>
<span class="linenos">567</span><span class="w"> </span><span class="n">plan</span><span class="o">-&gt;</span><span class="n">StopProducing</span><span class="p">();</span>
<span class="linenos">568</span><span class="w"> </span><span class="c1">// plan mark finished</span>
<span class="linenos">569</span><span class="w"> </span><span class="k">auto</span><span class="w"> </span><span class="n">future</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">plan</span><span class="o">-&gt;</span><span class="n">finished</span><span class="p">();</span>
<span class="linenos">570</span><span class="w"> </span><span class="k">return</span><span class="w"> </span><span class="n">future</span><span class="p">.</span><span class="n">status</span><span class="p">();</span>
<span class="linenos">571</span><span class="p">}</span>
<span class="linenos">572</span>
<span class="linenos">573</span><span class="c1">/// \brief An example showing an order-by node</span>
<span class="linenos">574</span><span class="c1">///</span>
<span class="linenos">575</span><span class="c1">/// Source-OrderBy-Sink</span>
<span class="linenos">576</span><span class="c1">/// In this example, the data enters through the source node</span>
<span class="linenos">577</span><span class="c1">/// and the data is ordered in the sink node. The order can be</span>
<span class="linenos">578</span><span class="c1">/// ASCENDING or DESCENDING and it is configurable. The output</span>
<span class="linenos">579</span><span class="c1">/// is obtained as a table from the sink node.</span>
<span class="linenos">580</span><span class="n">arrow</span><span class="o">::</span><span class="n">Status</span><span class="w"> </span><span class="nf">SourceOrderBySinkExample</span><span class="p">()</span><span class="w"> </span><span class="p">{</span>
<span class="linenos">581</span><span class="w"> </span><span class="n">ARROW_ASSIGN_OR_RAISE</span><span class="p">(</span><span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o">&lt;</span><span class="n">ac</span><span class="o">::</span><span class="n">ExecPlan</span><span class="o">&gt;</span><span class="w"> </span><span class="n">plan</span><span class="p">,</span>
<span class="linenos">582</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">ExecPlan</span><span class="o">::</span><span class="n">Make</span><span class="p">(</span><span class="o">*</span><span class="n">cp</span><span class="o">::</span><span class="n">threaded_exec_context</span><span class="p">()));</span>
<span class="linenos">583</span>
<span class="linenos">584</span><span class="w"> </span><span class="n">ARROW_ASSIGN_OR_RAISE</span><span class="p">(</span><span class="k">auto</span><span class="w"> </span><span class="n">basic_data</span><span class="p">,</span><span class="w"> </span><span class="n">MakeSortTestBasicBatches</span><span class="p">());</span>
<span class="linenos">585</span>
<span class="linenos">586</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">AsyncGenerator</span><span class="o">&lt;</span><span class="n">std</span><span class="o">::</span><span class="n">optional</span><span class="o">&lt;</span><span class="n">cp</span><span class="o">::</span><span class="n">ExecBatch</span><span class="o">&gt;&gt;</span><span class="w"> </span><span class="n">sink_gen</span><span class="p">;</span>
<span class="linenos">587</span>
<span class="linenos">588</span><span class="w"> </span><span class="k">auto</span><span class="w"> </span><span class="n">source_node_options</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">SourceNodeOptions</span><span class="p">{</span><span class="n">basic_data</span><span class="p">.</span><span class="n">schema</span><span class="p">,</span><span class="w"> </span><span class="n">basic_data</span><span class="p">.</span><span class="n">gen</span><span class="p">()};</span>
<span class="linenos">589</span><span class="w"> </span><span class="n">ARROW_ASSIGN_OR_RAISE</span><span class="p">(</span><span class="n">ac</span><span class="o">::</span><span class="n">ExecNode</span><span class="w"> </span><span class="o">*</span><span class="w"> </span><span class="n">source</span><span class="p">,</span>
<span class="linenos">590</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">MakeExecNode</span><span class="p">(</span><span class="s">&quot;source&quot;</span><span class="p">,</span><span class="w"> </span><span class="n">plan</span><span class="p">.</span><span class="n">get</span><span class="p">(),</span><span class="w"> </span><span class="p">{},</span><span class="w"> </span><span class="n">source_node_options</span><span class="p">));</span>
<span class="linenos">591</span>
<span class="linenos">592</span><span class="w"> </span><span class="n">ARROW_RETURN_NOT_OK</span><span class="p">(</span><span class="n">ac</span><span class="o">::</span><span class="n">MakeExecNode</span><span class="p">(</span>
<span class="linenos">593</span><span class="w"> </span><span class="s">&quot;order_by_sink&quot;</span><span class="p">,</span><span class="w"> </span><span class="n">plan</span><span class="p">.</span><span class="n">get</span><span class="p">(),</span><span class="w"> </span><span class="p">{</span><span class="n">source</span><span class="p">},</span>
<span class="linenos">594</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">OrderBySinkNodeOptions</span><span class="p">{</span>
<span class="linenos">595</span><span class="w"> </span><span class="n">cp</span><span class="o">::</span><span class="n">SortOptions</span><span class="p">{{</span><span class="n">cp</span><span class="o">::</span><span class="n">SortKey</span><span class="p">{</span><span class="s">&quot;a&quot;</span><span class="p">,</span><span class="w"> </span><span class="n">cp</span><span class="o">::</span><span class="n">SortOrder</span><span class="o">::</span><span class="n">Descending</span><span class="p">}}},</span><span class="w"> </span><span class="o">&amp;</span><span class="n">sink_gen</span><span class="p">}));</span>
<span class="linenos">596</span>
<span class="linenos">597</span><span class="w"> </span><span class="k">return</span><span class="w"> </span><span class="n">ExecutePlanAndCollectAsTableWithCustomSink</span><span class="p">(</span><span class="n">plan</span><span class="p">,</span><span class="w"> </span><span class="n">basic_data</span><span class="p">.</span><span class="n">schema</span><span class="p">,</span><span class="w"> </span><span class="n">sink_gen</span><span class="p">);</span>
<span class="linenos">598</span><span class="p">}</span>
</pre></div>
</div>
</section>
<section id="select-k-sink">
<span id="stream-execution-select-k-docs"></span><h3><code class="docutils literal notranslate"><span class="pre">select_k_sink</span></code><a class="headerlink" href="#select-k-sink" title="Permalink to this heading">#</a></h3>
<p><code class="docutils literal notranslate"><span class="pre">select_k_sink</span></code> option enables selecting the top/bottom K elements,
similar to a SQL <code class="docutils literal notranslate"><span class="pre">ORDER</span> <span class="pre">BY</span> <span class="pre">...</span> <span class="pre">LIMIT</span> <span class="pre">K</span></code> clause.
<code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">SelectKOptions</span></code> which is a defined by
using <code class="xref cpp cpp-struct docutils literal notranslate"><span class="pre">OrderBySinkNode</span></code> definition. This option returns a sink node that receives
inputs and then compute top_k/bottom_k.</p>
<div class="admonition note">
<p class="admonition-title">Note</p>
<p>This node is a “pipeline breaker” and will fully materialize the input in memory.
In the future, spillover mechanisms will be added which should alleviate this
constraint.</p>
</div>
<p>SelectK example:</p>
<div class="highlight-cpp notranslate"><div class="highlight"><pre><span></span><span class="linenos">631</span><span class="c1">/// \brief An example showing a select-k node</span>
<span class="linenos">632</span><span class="c1">///</span>
<span class="linenos">633</span><span class="c1">/// Source-KSelect</span>
<span class="linenos">634</span><span class="c1">/// This example shows how K number of elements can be selected</span>
<span class="linenos">635</span><span class="c1">/// either from the top or bottom. The output node is a modified</span>
<span class="linenos">636</span><span class="c1">/// sink node where output can be obtained as a table.</span>
<span class="linenos">637</span><span class="n">arrow</span><span class="o">::</span><span class="n">Status</span><span class="w"> </span><span class="nf">SourceKSelectExample</span><span class="p">()</span><span class="w"> </span><span class="p">{</span>
<span class="linenos">638</span><span class="w"> </span><span class="n">ARROW_ASSIGN_OR_RAISE</span><span class="p">(</span><span class="k">auto</span><span class="w"> </span><span class="n">input</span><span class="p">,</span><span class="w"> </span><span class="n">MakeGroupableBatches</span><span class="p">());</span>
<span class="linenos">639</span><span class="w"> </span><span class="n">ARROW_ASSIGN_OR_RAISE</span><span class="p">(</span><span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o">&lt;</span><span class="n">ac</span><span class="o">::</span><span class="n">ExecPlan</span><span class="o">&gt;</span><span class="w"> </span><span class="n">plan</span><span class="p">,</span>
<span class="linenos">640</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">ExecPlan</span><span class="o">::</span><span class="n">Make</span><span class="p">(</span><span class="o">*</span><span class="n">cp</span><span class="o">::</span><span class="n">threaded_exec_context</span><span class="p">()));</span>
<span class="linenos">641</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">AsyncGenerator</span><span class="o">&lt;</span><span class="n">std</span><span class="o">::</span><span class="n">optional</span><span class="o">&lt;</span><span class="n">cp</span><span class="o">::</span><span class="n">ExecBatch</span><span class="o">&gt;&gt;</span><span class="w"> </span><span class="n">sink_gen</span><span class="p">;</span>
<span class="linenos">642</span>
<span class="linenos">643</span><span class="w"> </span><span class="n">ARROW_ASSIGN_OR_RAISE</span><span class="p">(</span>
<span class="linenos">644</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">ExecNode</span><span class="w"> </span><span class="o">*</span><span class="w"> </span><span class="n">source</span><span class="p">,</span>
<span class="linenos">645</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">MakeExecNode</span><span class="p">(</span><span class="s">&quot;source&quot;</span><span class="p">,</span><span class="w"> </span><span class="n">plan</span><span class="p">.</span><span class="n">get</span><span class="p">(),</span><span class="w"> </span><span class="p">{},</span>
<span class="linenos">646</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">SourceNodeOptions</span><span class="p">{</span><span class="n">input</span><span class="p">.</span><span class="n">schema</span><span class="p">,</span><span class="w"> </span><span class="n">input</span><span class="p">.</span><span class="n">gen</span><span class="p">()}));</span>
<span class="linenos">647</span>
<span class="linenos">648</span><span class="w"> </span><span class="n">cp</span><span class="o">::</span><span class="n">SelectKOptions</span><span class="w"> </span><span class="n">options</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">cp</span><span class="o">::</span><span class="n">SelectKOptions</span><span class="o">::</span><span class="n">TopKDefault</span><span class="p">(</span><span class="cm">/*k=*/</span><span class="mi">2</span><span class="p">,</span><span class="w"> </span><span class="p">{</span><span class="s">&quot;i32&quot;</span><span class="p">});</span>
<span class="linenos">649</span>
<span class="linenos">650</span><span class="w"> </span><span class="n">ARROW_RETURN_NOT_OK</span><span class="p">(</span><span class="n">ac</span><span class="o">::</span><span class="n">MakeExecNode</span><span class="p">(</span><span class="s">&quot;select_k_sink&quot;</span><span class="p">,</span><span class="w"> </span><span class="n">plan</span><span class="p">.</span><span class="n">get</span><span class="p">(),</span><span class="w"> </span><span class="p">{</span><span class="n">source</span><span class="p">},</span>
<span class="linenos">651</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">SelectKSinkNodeOptions</span><span class="p">{</span><span class="n">options</span><span class="p">,</span><span class="w"> </span><span class="o">&amp;</span><span class="n">sink_gen</span><span class="p">}));</span>
<span class="linenos">652</span>
<span class="linenos">653</span><span class="w"> </span><span class="k">auto</span><span class="w"> </span><span class="n">schema</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">schema</span><span class="p">(</span>
<span class="linenos">654</span><span class="w"> </span><span class="p">{</span><span class="n">arrow</span><span class="o">::</span><span class="n">field</span><span class="p">(</span><span class="s">&quot;i32&quot;</span><span class="p">,</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">int32</span><span class="p">()),</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">field</span><span class="p">(</span><span class="s">&quot;str&quot;</span><span class="p">,</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">utf8</span><span class="p">())});</span>
<span class="linenos">655</span>
<span class="linenos">656</span><span class="w"> </span><span class="k">return</span><span class="w"> </span><span class="n">ExecutePlanAndCollectAsTableWithCustomSink</span><span class="p">(</span><span class="n">plan</span><span class="p">,</span><span class="w"> </span><span class="n">schema</span><span class="p">,</span><span class="w"> </span><span class="n">sink_gen</span><span class="p">);</span>
<span class="linenos">657</span><span class="p">}</span>
</pre></div>
</div>
</section>
<section id="table-sink">
<h3><code class="docutils literal notranslate"><span class="pre">table_sink</span></code><a class="headerlink" href="#table-sink" title="Permalink to this heading">#</a></h3>
<p id="stream-execution-table-sink-docs">The <code class="docutils literal notranslate"><span class="pre">table_sink</span></code> node provides the ability to receive the output as an in-memory table.
This is simpler to use than the other sink nodes provided by the streaming execution engine
but it only makes sense when the output fits comfortably in memory.
The node is created using <a class="reference internal" href="../api/acero.html#_CPPv4N5arrow5acero20TableSinkNodeOptionsE" title="arrow::acero::TableSinkNodeOptions"><code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">TableSinkNodeOptions</span></code></a>.</p>
<p>Example of using <code class="docutils literal notranslate"><span class="pre">table_sink</span></code></p>
<div class="highlight-cpp notranslate"><div class="highlight"><pre><span></span><span class="linenos">749</span><span class="c1">/// \brief An example showing a table sink node</span>
<span class="linenos">750</span><span class="c1">///</span>
<span class="linenos">751</span><span class="c1">/// TableSink Example</span>
<span class="linenos">752</span><span class="c1">/// This example shows how a table_sink can be used</span>
<span class="linenos">753</span><span class="c1">/// in an execution plan. This includes a source node</span>
<span class="linenos">754</span><span class="c1">/// receiving data as batches and the table sink node</span>
<span class="linenos">755</span><span class="c1">/// which emits the output as a table.</span>
<span class="linenos">756</span><span class="n">arrow</span><span class="o">::</span><span class="n">Status</span><span class="w"> </span><span class="nf">TableSinkExample</span><span class="p">()</span><span class="w"> </span><span class="p">{</span>
<span class="linenos">757</span><span class="w"> </span><span class="n">ARROW_ASSIGN_OR_RAISE</span><span class="p">(</span><span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o">&lt;</span><span class="n">ac</span><span class="o">::</span><span class="n">ExecPlan</span><span class="o">&gt;</span><span class="w"> </span><span class="n">plan</span><span class="p">,</span>
<span class="linenos">758</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">ExecPlan</span><span class="o">::</span><span class="n">Make</span><span class="p">(</span><span class="o">*</span><span class="n">cp</span><span class="o">::</span><span class="n">threaded_exec_context</span><span class="p">()));</span>
<span class="linenos">759</span>
<span class="linenos">760</span><span class="w"> </span><span class="n">ARROW_ASSIGN_OR_RAISE</span><span class="p">(</span><span class="k">auto</span><span class="w"> </span><span class="n">basic_data</span><span class="p">,</span><span class="w"> </span><span class="n">MakeBasicBatches</span><span class="p">());</span>
<span class="linenos">761</span>
<span class="linenos">762</span><span class="w"> </span><span class="k">auto</span><span class="w"> </span><span class="n">source_node_options</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">SourceNodeOptions</span><span class="p">{</span><span class="n">basic_data</span><span class="p">.</span><span class="n">schema</span><span class="p">,</span><span class="w"> </span><span class="n">basic_data</span><span class="p">.</span><span class="n">gen</span><span class="p">()};</span>
<span class="linenos">763</span>
<span class="linenos">764</span><span class="w"> </span><span class="n">ARROW_ASSIGN_OR_RAISE</span><span class="p">(</span><span class="n">ac</span><span class="o">::</span><span class="n">ExecNode</span><span class="w"> </span><span class="o">*</span><span class="w"> </span><span class="n">source</span><span class="p">,</span>
<span class="linenos">765</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">MakeExecNode</span><span class="p">(</span><span class="s">&quot;source&quot;</span><span class="p">,</span><span class="w"> </span><span class="n">plan</span><span class="p">.</span><span class="n">get</span><span class="p">(),</span><span class="w"> </span><span class="p">{},</span><span class="w"> </span><span class="n">source_node_options</span><span class="p">));</span>
<span class="linenos">766</span>
<span class="linenos">767</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o">&lt;</span><span class="n">arrow</span><span class="o">::</span><span class="n">Table</span><span class="o">&gt;</span><span class="w"> </span><span class="n">output_table</span><span class="p">;</span>
<span class="linenos">768</span><span class="w"> </span><span class="k">auto</span><span class="w"> </span><span class="n">table_sink_options</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">TableSinkNodeOptions</span><span class="p">{</span><span class="o">&amp;</span><span class="n">output_table</span><span class="p">};</span>
<span class="linenos">769</span>
<span class="linenos">770</span><span class="w"> </span><span class="n">ARROW_RETURN_NOT_OK</span><span class="p">(</span>
<span class="linenos">771</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">MakeExecNode</span><span class="p">(</span><span class="s">&quot;table_sink&quot;</span><span class="p">,</span><span class="w"> </span><span class="n">plan</span><span class="p">.</span><span class="n">get</span><span class="p">(),</span><span class="w"> </span><span class="p">{</span><span class="n">source</span><span class="p">},</span><span class="w"> </span><span class="n">table_sink_options</span><span class="p">));</span>
<span class="linenos">772</span><span class="w"> </span><span class="c1">// validate the ExecPlan</span>
<span class="linenos">773</span><span class="w"> </span><span class="n">ARROW_RETURN_NOT_OK</span><span class="p">(</span><span class="n">plan</span><span class="o">-&gt;</span><span class="n">Validate</span><span class="p">());</span>
<span class="linenos">774</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">cout</span><span class="w"> </span><span class="o">&lt;&lt;</span><span class="w"> </span><span class="s">&quot;ExecPlan created : &quot;</span><span class="w"> </span><span class="o">&lt;&lt;</span><span class="w"> </span><span class="n">plan</span><span class="o">-&gt;</span><span class="n">ToString</span><span class="p">()</span><span class="w"> </span><span class="o">&lt;&lt;</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">endl</span><span class="p">;</span>
<span class="linenos">775</span><span class="w"> </span><span class="c1">// start the ExecPlan</span>
<span class="linenos">776</span><span class="w"> </span><span class="n">plan</span><span class="o">-&gt;</span><span class="n">StartProducing</span><span class="p">();</span>
<span class="linenos">777</span>
<span class="linenos">778</span><span class="w"> </span><span class="c1">// Wait for the plan to finish</span>
<span class="linenos">779</span><span class="w"> </span><span class="k">auto</span><span class="w"> </span><span class="n">finished</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">plan</span><span class="o">-&gt;</span><span class="n">finished</span><span class="p">();</span>
<span class="linenos">780</span><span class="w"> </span><span class="n">RETURN_NOT_OK</span><span class="p">(</span><span class="n">finished</span><span class="p">.</span><span class="n">status</span><span class="p">());</span>
<span class="linenos">781</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">cout</span><span class="w"> </span><span class="o">&lt;&lt;</span><span class="w"> </span><span class="s">&quot;Results : &quot;</span><span class="w"> </span><span class="o">&lt;&lt;</span><span class="w"> </span><span class="n">output_table</span><span class="o">-&gt;</span><span class="n">ToString</span><span class="p">()</span><span class="w"> </span><span class="o">&lt;&lt;</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">endl</span><span class="p">;</span>
<span class="linenos">782</span><span class="w"> </span><span class="k">return</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">Status</span><span class="o">::</span><span class="n">OK</span><span class="p">();</span>
<span class="linenos">783</span><span class="p">}</span>
</pre></div>
</div>
</section>
<section id="scan">
<span id="stream-execution-scan-docs"></span><h3><code class="docutils literal notranslate"><span class="pre">scan</span></code><a class="headerlink" href="#scan" title="Permalink to this heading">#</a></h3>
<p><code class="docutils literal notranslate"><span class="pre">scan</span></code> is an operation used to load and process datasets. It should be preferred over the
more generic <code class="docutils literal notranslate"><span class="pre">source</span></code> node when your input is a dataset. The behavior is defined using
<a class="reference internal" href="../api/dataset.html#_CPPv4N5arrow7dataset15ScanNodeOptionsE" title="arrow::dataset::ScanNodeOptions"><code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">arrow::dataset::ScanNodeOptions</span></code></a>. More information on datasets and the various
scan options can be found in <a class="reference internal" href="../dataset.html"><span class="doc">Tabular Datasets</span></a>.</p>
<p>This node is capable of applying pushdown filters to the file readers which reduce
the amount of data that needs to be read. This means you may supply the same
filter expression to the scan node that you also supply to the FilterNode because
the filtering is done in two different places.</p>
<p>Scan example:</p>
<div class="highlight-cpp notranslate"><div class="highlight"><pre><span></span><span class="linenos">271</span><span class="c1">/// \brief An example demonstrating a scan and sink node</span>
<span class="linenos">272</span><span class="c1">///</span>
<span class="linenos">273</span><span class="c1">/// Scan-Table</span>
<span class="linenos">274</span><span class="c1">/// This example shows how scan operation can be applied on a dataset.</span>
<span class="linenos">275</span><span class="c1">/// There are operations that can be applied on the scan (project, filter)</span>
<span class="linenos">276</span><span class="c1">/// and the input data can be processed. The output is obtained as a table</span>
<span class="linenos">277</span><span class="n">arrow</span><span class="o">::</span><span class="n">Status</span><span class="w"> </span><span class="nf">ScanSinkExample</span><span class="p">()</span><span class="w"> </span><span class="p">{</span>
<span class="linenos">278</span><span class="w"> </span><span class="n">ARROW_ASSIGN_OR_RAISE</span><span class="p">(</span><span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o">&lt;</span><span class="n">arrow</span><span class="o">::</span><span class="n">dataset</span><span class="o">::</span><span class="n">Dataset</span><span class="o">&gt;</span><span class="w"> </span><span class="n">dataset</span><span class="p">,</span><span class="w"> </span><span class="n">GetDataset</span><span class="p">());</span>
<span class="linenos">279</span>
<span class="linenos">280</span><span class="w"> </span><span class="k">auto</span><span class="w"> </span><span class="n">options</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">make_shared</span><span class="o">&lt;</span><span class="n">arrow</span><span class="o">::</span><span class="n">dataset</span><span class="o">::</span><span class="n">ScanOptions</span><span class="o">&gt;</span><span class="p">();</span>
<span class="linenos">281</span><span class="w"> </span><span class="n">options</span><span class="o">-&gt;</span><span class="n">projection</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">cp</span><span class="o">::</span><span class="n">project</span><span class="p">({},</span><span class="w"> </span><span class="p">{});</span><span class="w"> </span><span class="c1">// create empty projection</span>
<span class="linenos">282</span>
<span class="linenos">283</span><span class="w"> </span><span class="c1">// construct the scan node</span>
<span class="linenos">284</span><span class="w"> </span><span class="k">auto</span><span class="w"> </span><span class="n">scan_node_options</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">dataset</span><span class="o">::</span><span class="n">ScanNodeOptions</span><span class="p">{</span><span class="n">dataset</span><span class="p">,</span><span class="w"> </span><span class="n">options</span><span class="p">};</span>
<span class="linenos">285</span>
<span class="linenos">286</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">Declaration</span><span class="w"> </span><span class="n">scan</span><span class="p">{</span><span class="s">&quot;scan&quot;</span><span class="p">,</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">move</span><span class="p">(</span><span class="n">scan_node_options</span><span class="p">)};</span>
<span class="linenos">287</span>
<span class="linenos">288</span><span class="w"> </span><span class="k">return</span><span class="w"> </span><span class="n">ExecutePlanAndCollectAsTable</span><span class="p">(</span><span class="n">std</span><span class="o">::</span><span class="n">move</span><span class="p">(</span><span class="n">scan</span><span class="p">));</span>
<span class="linenos">289</span><span class="p">}</span>
</pre></div>
</div>
</section>
<section id="write">
<span id="stream-execution-write-docs"></span><h3><code class="docutils literal notranslate"><span class="pre">write</span></code><a class="headerlink" href="#write" title="Permalink to this heading">#</a></h3>
<p>The <code class="docutils literal notranslate"><span class="pre">write</span></code> node saves query results as a dataset of files in a
format like Parquet, Feather, CSV, etc. using the <a class="reference internal" href="../dataset.html"><span class="doc">Tabular Datasets</span></a>
functionality in Arrow. The write options are provided via the
<a class="reference internal" href="../api/dataset.html#_CPPv4N5arrow7dataset16WriteNodeOptionsE" title="arrow::dataset::WriteNodeOptions"><code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">arrow::dataset::WriteNodeOptions</span></code></a> which in turn contains
<a class="reference internal" href="../api/dataset.html#_CPPv4N5arrow7dataset29FileSystemDatasetWriteOptionsE" title="arrow::dataset::FileSystemDatasetWriteOptions"><code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">arrow::dataset::FileSystemDatasetWriteOptions</span></code></a>.
<a class="reference internal" href="../api/dataset.html#_CPPv4N5arrow7dataset29FileSystemDatasetWriteOptionsE" title="arrow::dataset::FileSystemDatasetWriteOptions"><code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">arrow::dataset::FileSystemDatasetWriteOptions</span></code></a> provides
control over the written dataset, including options like the output
directory, file naming scheme, and so on.</p>
<p>Write example:</p>
<div class="highlight-cpp notranslate"><div class="highlight"><pre><span></span><span class="linenos">663</span><span class="c1">/// \brief An example showing a write node</span>
<span class="linenos">664</span><span class="c1">/// \param file_path The destination to write to</span>
<span class="linenos">665</span><span class="c1">///</span>
<span class="linenos">666</span><span class="c1">/// Scan-Filter-Write</span>
<span class="linenos">667</span><span class="c1">/// This example shows how scan node can be used to load the data</span>
<span class="linenos">668</span><span class="c1">/// and after processing how it can be written to disk.</span>
<span class="linenos">669</span><span class="n">arrow</span><span class="o">::</span><span class="n">Status</span><span class="w"> </span><span class="nf">ScanFilterWriteExample</span><span class="p">(</span><span class="k">const</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">string</span><span class="o">&amp;</span><span class="w"> </span><span class="n">file_path</span><span class="p">)</span><span class="w"> </span><span class="p">{</span>
<span class="linenos">670</span><span class="w"> </span><span class="n">ARROW_ASSIGN_OR_RAISE</span><span class="p">(</span><span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o">&lt;</span><span class="n">arrow</span><span class="o">::</span><span class="n">dataset</span><span class="o">::</span><span class="n">Dataset</span><span class="o">&gt;</span><span class="w"> </span><span class="n">dataset</span><span class="p">,</span><span class="w"> </span><span class="n">GetDataset</span><span class="p">());</span>
<span class="linenos">671</span>
<span class="linenos">672</span><span class="w"> </span><span class="k">auto</span><span class="w"> </span><span class="n">options</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">make_shared</span><span class="o">&lt;</span><span class="n">arrow</span><span class="o">::</span><span class="n">dataset</span><span class="o">::</span><span class="n">ScanOptions</span><span class="o">&gt;</span><span class="p">();</span>
<span class="linenos">673</span><span class="w"> </span><span class="c1">// empty projection</span>
<span class="linenos">674</span><span class="w"> </span><span class="n">options</span><span class="o">-&gt;</span><span class="n">projection</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">cp</span><span class="o">::</span><span class="n">project</span><span class="p">({},</span><span class="w"> </span><span class="p">{});</span>
<span class="linenos">675</span>
<span class="linenos">676</span><span class="w"> </span><span class="k">auto</span><span class="w"> </span><span class="n">scan_node_options</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">dataset</span><span class="o">::</span><span class="n">ScanNodeOptions</span><span class="p">{</span><span class="n">dataset</span><span class="p">,</span><span class="w"> </span><span class="n">options</span><span class="p">};</span>
<span class="linenos">677</span>
<span class="linenos">678</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">Declaration</span><span class="w"> </span><span class="n">scan</span><span class="p">{</span><span class="s">&quot;scan&quot;</span><span class="p">,</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">move</span><span class="p">(</span><span class="n">scan_node_options</span><span class="p">)};</span>
<span class="linenos">679</span>
<span class="linenos">680</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">AsyncGenerator</span><span class="o">&lt;</span><span class="n">std</span><span class="o">::</span><span class="n">optional</span><span class="o">&lt;</span><span class="n">cp</span><span class="o">::</span><span class="n">ExecBatch</span><span class="o">&gt;&gt;</span><span class="w"> </span><span class="n">sink_gen</span><span class="p">;</span>
<span class="linenos">681</span>
<span class="linenos">682</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">string</span><span class="w"> </span><span class="n">root_path</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="s">&quot;&quot;</span><span class="p">;</span>
<span class="linenos">683</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">string</span><span class="w"> </span><span class="n">uri</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="s">&quot;file://&quot;</span><span class="w"> </span><span class="o">+</span><span class="w"> </span><span class="n">file_path</span><span class="p">;</span>
<span class="linenos">684</span><span class="w"> </span><span class="n">ARROW_ASSIGN_OR_RAISE</span><span class="p">(</span><span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o">&lt;</span><span class="n">arrow</span><span class="o">::</span><span class="n">fs</span><span class="o">::</span><span class="n">FileSystem</span><span class="o">&gt;</span><span class="w"> </span><span class="n">filesystem</span><span class="p">,</span>
<span class="linenos">685</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">fs</span><span class="o">::</span><span class="n">FileSystemFromUri</span><span class="p">(</span><span class="n">uri</span><span class="p">,</span><span class="w"> </span><span class="o">&amp;</span><span class="n">root_path</span><span class="p">));</span>
<span class="linenos">686</span>
<span class="linenos">687</span><span class="w"> </span><span class="k">auto</span><span class="w"> </span><span class="n">base_path</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">root_path</span><span class="w"> </span><span class="o">+</span><span class="w"> </span><span class="s">&quot;/parquet_dataset&quot;</span><span class="p">;</span>
<span class="linenos">688</span><span class="w"> </span><span class="c1">// Uncomment the following line, if run repeatedly</span>
<span class="linenos">689</span><span class="w"> </span><span class="c1">// ARROW_RETURN_NOT_OK(filesystem-&gt;DeleteDirContents(base_path));</span>
<span class="linenos">690</span><span class="w"> </span><span class="n">ARROW_RETURN_NOT_OK</span><span class="p">(</span><span class="n">filesystem</span><span class="o">-&gt;</span><span class="n">CreateDir</span><span class="p">(</span><span class="n">base_path</span><span class="p">));</span>
<span class="linenos">691</span>
<span class="linenos">692</span><span class="w"> </span><span class="c1">// The partition schema determines which fields are part of the partitioning.</span>
<span class="linenos">693</span><span class="w"> </span><span class="k">auto</span><span class="w"> </span><span class="n">partition_schema</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">schema</span><span class="p">({</span><span class="n">arrow</span><span class="o">::</span><span class="n">field</span><span class="p">(</span><span class="s">&quot;a&quot;</span><span class="p">,</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">int32</span><span class="p">())});</span>
<span class="linenos">694</span><span class="w"> </span><span class="c1">// We&#39;ll use Hive-style partitioning,</span>
<span class="linenos">695</span><span class="w"> </span><span class="c1">// which creates directories with &quot;key=value&quot; pairs.</span>
<span class="linenos">696</span>
<span class="linenos">697</span><span class="w"> </span><span class="k">auto</span><span class="w"> </span><span class="n">partitioning</span><span class="w"> </span><span class="o">=</span>
<span class="linenos">698</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">make_shared</span><span class="o">&lt;</span><span class="n">arrow</span><span class="o">::</span><span class="n">dataset</span><span class="o">::</span><span class="n">HivePartitioning</span><span class="o">&gt;</span><span class="p">(</span><span class="n">partition_schema</span><span class="p">);</span>
<span class="linenos">699</span><span class="w"> </span><span class="c1">// We&#39;ll write Parquet files.</span>
<span class="linenos">700</span><span class="w"> </span><span class="k">auto</span><span class="w"> </span><span class="n">format</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">make_shared</span><span class="o">&lt;</span><span class="n">arrow</span><span class="o">::</span><span class="n">dataset</span><span class="o">::</span><span class="n">ParquetFileFormat</span><span class="o">&gt;</span><span class="p">();</span>
<span class="linenos">701</span>
<span class="linenos">702</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">dataset</span><span class="o">::</span><span class="n">FileSystemDatasetWriteOptions</span><span class="w"> </span><span class="n">write_options</span><span class="p">;</span>
<span class="linenos">703</span><span class="w"> </span><span class="n">write_options</span><span class="p">.</span><span class="n">file_write_options</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">format</span><span class="o">-&gt;</span><span class="n">DefaultWriteOptions</span><span class="p">();</span>
<span class="linenos">704</span><span class="w"> </span><span class="n">write_options</span><span class="p">.</span><span class="n">filesystem</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">filesystem</span><span class="p">;</span>
<span class="linenos">705</span><span class="w"> </span><span class="n">write_options</span><span class="p">.</span><span class="n">base_dir</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">base_path</span><span class="p">;</span>
<span class="linenos">706</span><span class="w"> </span><span class="n">write_options</span><span class="p">.</span><span class="n">partitioning</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">partitioning</span><span class="p">;</span>
<span class="linenos">707</span><span class="w"> </span><span class="n">write_options</span><span class="p">.</span><span class="n">basename_template</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="s">&quot;part{i}.parquet&quot;</span><span class="p">;</span>
<span class="linenos">708</span>
<span class="linenos">709</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">dataset</span><span class="o">::</span><span class="n">WriteNodeOptions</span><span class="w"> </span><span class="n">write_node_options</span><span class="p">{</span><span class="n">write_options</span><span class="p">};</span>
<span class="linenos">710</span>
<span class="linenos">711</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">Declaration</span><span class="w"> </span><span class="n">write</span><span class="p">{</span><span class="s">&quot;write&quot;</span><span class="p">,</span><span class="w"> </span><span class="p">{</span><span class="n">std</span><span class="o">::</span><span class="n">move</span><span class="p">(</span><span class="n">scan</span><span class="p">)},</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">move</span><span class="p">(</span><span class="n">write_node_options</span><span class="p">)};</span>
<span class="linenos">712</span>
<span class="linenos">713</span><span class="w"> </span><span class="c1">// Since the write node has no output we simply run the plan to completion and the</span>
<span class="linenos">714</span><span class="w"> </span><span class="c1">// data should be written</span>
<span class="linenos">715</span><span class="w"> </span><span class="n">ARROW_RETURN_NOT_OK</span><span class="p">(</span><span class="n">ac</span><span class="o">::</span><span class="n">DeclarationToStatus</span><span class="p">(</span><span class="n">std</span><span class="o">::</span><span class="n">move</span><span class="p">(</span><span class="n">write</span><span class="p">)));</span>
<span class="linenos">716</span>
<span class="linenos">717</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">cout</span><span class="w"> </span><span class="o">&lt;&lt;</span><span class="w"> </span><span class="s">&quot;Dataset written to &quot;</span><span class="w"> </span><span class="o">&lt;&lt;</span><span class="w"> </span><span class="n">base_path</span><span class="w"> </span><span class="o">&lt;&lt;</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">endl</span><span class="p">;</span>
<span class="linenos">718</span><span class="w"> </span><span class="k">return</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">Status</span><span class="o">::</span><span class="n">OK</span><span class="p">();</span>
<span class="linenos">719</span><span class="p">}</span>
</pre></div>
</div>
</section>
<section id="union">
<span id="stream-execution-union-docs"></span><h3><code class="docutils literal notranslate"><span class="pre">union</span></code><a class="headerlink" href="#union" title="Permalink to this heading">#</a></h3>
<p><code class="docutils literal notranslate"><span class="pre">union</span></code> merges multiple data streams with the same schema into one, similar to
a SQL <code class="docutils literal notranslate"><span class="pre">UNION</span> <span class="pre">ALL</span></code> clause.</p>
<p>The following example demonstrates how this can be achieved using
two data sources.</p>
<p>Union example:</p>
<div class="highlight-cpp notranslate"><div class="highlight"><pre><span></span><span class="linenos">725</span><span class="c1">/// \brief An example showing a union node</span>
<span class="linenos">726</span><span class="c1">///</span>
<span class="linenos">727</span><span class="c1">/// Source-Union-Table</span>
<span class="linenos">728</span><span class="c1">/// This example shows how a union operation can be applied on two</span>
<span class="linenos">729</span><span class="c1">/// data sources. The output is collected into a table.</span>
<span class="linenos">730</span><span class="n">arrow</span><span class="o">::</span><span class="n">Status</span><span class="w"> </span><span class="nf">SourceUnionSinkExample</span><span class="p">()</span><span class="w"> </span><span class="p">{</span>
<span class="linenos">731</span><span class="w"> </span><span class="n">ARROW_ASSIGN_OR_RAISE</span><span class="p">(</span><span class="k">auto</span><span class="w"> </span><span class="n">basic_data</span><span class="p">,</span><span class="w"> </span><span class="n">MakeBasicBatches</span><span class="p">());</span>
<span class="linenos">732</span>
<span class="linenos">733</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">Declaration</span><span class="w"> </span><span class="n">lhs</span><span class="p">{</span><span class="s">&quot;source&quot;</span><span class="p">,</span>
<span class="linenos">734</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">SourceNodeOptions</span><span class="p">{</span><span class="n">basic_data</span><span class="p">.</span><span class="n">schema</span><span class="p">,</span><span class="w"> </span><span class="n">basic_data</span><span class="p">.</span><span class="n">gen</span><span class="p">()}};</span>
<span class="linenos">735</span><span class="w"> </span><span class="n">lhs</span><span class="p">.</span><span class="n">label</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="s">&quot;lhs&quot;</span><span class="p">;</span>
<span class="linenos">736</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">Declaration</span><span class="w"> </span><span class="n">rhs</span><span class="p">{</span><span class="s">&quot;source&quot;</span><span class="p">,</span>
<span class="linenos">737</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">SourceNodeOptions</span><span class="p">{</span><span class="n">basic_data</span><span class="p">.</span><span class="n">schema</span><span class="p">,</span><span class="w"> </span><span class="n">basic_data</span><span class="p">.</span><span class="n">gen</span><span class="p">()}};</span>
<span class="linenos">738</span><span class="w"> </span><span class="n">rhs</span><span class="p">.</span><span class="n">label</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="s">&quot;rhs&quot;</span><span class="p">;</span>
<span class="linenos">739</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">Declaration</span><span class="w"> </span><span class="n">union_plan</span><span class="p">{</span>
<span class="linenos">740</span><span class="w"> </span><span class="s">&quot;union&quot;</span><span class="p">,</span><span class="w"> </span><span class="p">{</span><span class="n">std</span><span class="o">::</span><span class="n">move</span><span class="p">(</span><span class="n">lhs</span><span class="p">),</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">move</span><span class="p">(</span><span class="n">rhs</span><span class="p">)},</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">ExecNodeOptions</span><span class="p">{}};</span>
<span class="linenos">741</span>
<span class="linenos">742</span><span class="w"> </span><span class="k">return</span><span class="w"> </span><span class="n">ExecutePlanAndCollectAsTable</span><span class="p">(</span><span class="n">std</span><span class="o">::</span><span class="n">move</span><span class="p">(</span><span class="n">union_plan</span><span class="p">));</span>
<span class="linenos">743</span><span class="p">}</span>
</pre></div>
</div>
</section>
<section id="hash-join">
<span id="stream-execution-hashjoin-docs"></span><h3><code class="docutils literal notranslate"><span class="pre">hash_join</span></code><a class="headerlink" href="#hash-join" title="Permalink to this heading">#</a></h3>
<p><code class="docutils literal notranslate"><span class="pre">hash_join</span></code> operation provides the relational algebra operation, join using hash-based
algorithm. <a class="reference internal" href="../api/acero.html#_CPPv4N5arrow5acero19HashJoinNodeOptionsE" title="arrow::acero::HashJoinNodeOptions"><code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">HashJoinNodeOptions</span></code></a> contains the options required in
defining a join. The hash_join supports
<a class="reference external" href="https://en.wikipedia.org/wiki/Join_(SQL)">left/right/full semi/anti/outerjoins</a>.
Also the join-key (i.e. the column(s) to join on), and suffixes (i.e a suffix term like “_x”
which can be appended as a suffix for column names duplicated in both left and right
relations.) can be set via the join options.
<a class="reference external" href="https://en.wikipedia.org/wiki/Hash_join">Read more on hash-joins</a>.</p>
<p>Hash-Join example:</p>
<div class="highlight-cpp notranslate"><div class="highlight"><pre><span></span><span class="linenos">604</span><span class="c1">/// \brief An example showing a hash join node</span>
<span class="linenos">605</span><span class="c1">///</span>
<span class="linenos">606</span><span class="c1">/// Source-HashJoin-Table</span>
<span class="linenos">607</span><span class="c1">/// This example shows how source node gets the data and how a self-join</span>
<span class="linenos">608</span><span class="c1">/// is applied on the data. The join options are configurable. The output</span>
<span class="linenos">609</span><span class="c1">/// is collected into a table.</span>
<span class="linenos">610</span><span class="n">arrow</span><span class="o">::</span><span class="n">Status</span><span class="w"> </span><span class="nf">SourceHashJoinSinkExample</span><span class="p">()</span><span class="w"> </span><span class="p">{</span>
<span class="linenos">611</span><span class="w"> </span><span class="n">ARROW_ASSIGN_OR_RAISE</span><span class="p">(</span><span class="k">auto</span><span class="w"> </span><span class="n">input</span><span class="p">,</span><span class="w"> </span><span class="n">MakeGroupableBatches</span><span class="p">());</span>
<span class="linenos">612</span>
<span class="linenos">613</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">Declaration</span><span class="w"> </span><span class="n">left</span><span class="p">{</span><span class="s">&quot;source&quot;</span><span class="p">,</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">SourceNodeOptions</span><span class="p">{</span><span class="n">input</span><span class="p">.</span><span class="n">schema</span><span class="p">,</span><span class="w"> </span><span class="n">input</span><span class="p">.</span><span class="n">gen</span><span class="p">()}};</span>
<span class="linenos">614</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">Declaration</span><span class="w"> </span><span class="n">right</span><span class="p">{</span><span class="s">&quot;source&quot;</span><span class="p">,</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">SourceNodeOptions</span><span class="p">{</span><span class="n">input</span><span class="p">.</span><span class="n">schema</span><span class="p">,</span><span class="w"> </span><span class="n">input</span><span class="p">.</span><span class="n">gen</span><span class="p">()}};</span>
<span class="linenos">615</span>
<span class="linenos">616</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">HashJoinNodeOptions</span><span class="w"> </span><span class="n">join_opts</span><span class="p">{</span>
<span class="linenos">617</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">JoinType</span><span class="o">::</span><span class="n">INNER</span><span class="p">,</span>
<span class="linenos">618</span><span class="w"> </span><span class="cm">/*left_keys=*/</span><span class="p">{</span><span class="s">&quot;str&quot;</span><span class="p">},</span>
<span class="linenos">619</span><span class="w"> </span><span class="cm">/*right_keys=*/</span><span class="p">{</span><span class="s">&quot;str&quot;</span><span class="p">},</span><span class="w"> </span><span class="n">cp</span><span class="o">::</span><span class="n">literal</span><span class="p">(</span><span class="nb">true</span><span class="p">),</span><span class="w"> </span><span class="s">&quot;l_&quot;</span><span class="p">,</span><span class="w"> </span><span class="s">&quot;r_&quot;</span><span class="p">};</span>
<span class="linenos">620</span>
<span class="linenos">621</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">Declaration</span><span class="w"> </span><span class="n">hashjoin</span><span class="p">{</span>
<span class="linenos">622</span><span class="w"> </span><span class="s">&quot;hashjoin&quot;</span><span class="p">,</span><span class="w"> </span><span class="p">{</span><span class="n">std</span><span class="o">::</span><span class="n">move</span><span class="p">(</span><span class="n">left</span><span class="p">),</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">move</span><span class="p">(</span><span class="n">right</span><span class="p">)},</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">move</span><span class="p">(</span><span class="n">join_opts</span><span class="p">)};</span>
<span class="linenos">623</span>
<span class="linenos">624</span><span class="w"> </span><span class="k">return</span><span class="w"> </span><span class="n">ExecutePlanAndCollectAsTable</span><span class="p">(</span><span class="n">std</span><span class="o">::</span><span class="n">move</span><span class="p">(</span><span class="n">hashjoin</span><span class="p">));</span>
<span class="linenos">625</span><span class="p">}</span>
</pre></div>
</div>
</section>
</section>
<section id="summary">
<h2>Summary<a class="headerlink" href="#summary" title="Permalink to this heading">#</a></h2>
<p>There are examples of these nodes which can be found in
<code class="docutils literal notranslate"><span class="pre">cpp/examples/arrow/execution_plan_documentation_examples.cc</span></code> in the Arrow source.</p>
<p>Complete Example:</p>
<div class="highlight-cpp notranslate"><div class="highlight"><pre><span></span><span class="linenos"> 19</span><span class="cp">#include</span><span class="w"> </span><span class="cpf">&lt;arrow/array.h&gt;</span>
<span class="linenos"> 20</span><span class="cp">#include</span><span class="w"> </span><span class="cpf">&lt;arrow/builder.h&gt;</span>
<span class="linenos"> 21</span>
<span class="linenos"> 22</span><span class="cp">#include</span><span class="w"> </span><span class="cpf">&lt;arrow/acero/exec_plan.h&gt;</span>
<span class="linenos"> 23</span><span class="cp">#include</span><span class="w"> </span><span class="cpf">&lt;arrow/compute/api.h&gt;</span>
<span class="linenos"> 24</span><span class="cp">#include</span><span class="w"> </span><span class="cpf">&lt;arrow/compute/api_vector.h&gt;</span>
<span class="linenos"> 25</span><span class="cp">#include</span><span class="w"> </span><span class="cpf">&lt;arrow/compute/cast.h&gt;</span>
<span class="linenos"> 26</span>
<span class="linenos"> 27</span><span class="cp">#include</span><span class="w"> </span><span class="cpf">&lt;arrow/csv/api.h&gt;</span>
<span class="linenos"> 28</span>
<span class="linenos"> 29</span><span class="cp">#include</span><span class="w"> </span><span class="cpf">&lt;arrow/dataset/dataset.h&gt;</span>
<span class="linenos"> 30</span><span class="cp">#include</span><span class="w"> </span><span class="cpf">&lt;arrow/dataset/file_base.h&gt;</span>
<span class="linenos"> 31</span><span class="cp">#include</span><span class="w"> </span><span class="cpf">&lt;arrow/dataset/file_parquet.h&gt;</span>
<span class="linenos"> 32</span><span class="cp">#include</span><span class="w"> </span><span class="cpf">&lt;arrow/dataset/plan.h&gt;</span>
<span class="linenos"> 33</span><span class="cp">#include</span><span class="w"> </span><span class="cpf">&lt;arrow/dataset/scanner.h&gt;</span>
<span class="linenos"> 34</span>
<span class="linenos"> 35</span><span class="cp">#include</span><span class="w"> </span><span class="cpf">&lt;arrow/io/interfaces.h&gt;</span>
<span class="linenos"> 36</span><span class="cp">#include</span><span class="w"> </span><span class="cpf">&lt;arrow/io/memory.h&gt;</span>
<span class="linenos"> 37</span>
<span class="linenos"> 38</span><span class="cp">#include</span><span class="w"> </span><span class="cpf">&lt;arrow/result.h&gt;</span>
<span class="linenos"> 39</span><span class="cp">#include</span><span class="w"> </span><span class="cpf">&lt;arrow/status.h&gt;</span>
<span class="linenos"> 40</span><span class="cp">#include</span><span class="w"> </span><span class="cpf">&lt;arrow/table.h&gt;</span>
<span class="linenos"> 41</span>
<span class="linenos"> 42</span><span class="cp">#include</span><span class="w"> </span><span class="cpf">&lt;arrow/ipc/api.h&gt;</span>
<span class="linenos"> 43</span>
<span class="linenos"> 44</span><span class="cp">#include</span><span class="w"> </span><span class="cpf">&lt;arrow/util/future.h&gt;</span>
<span class="linenos"> 45</span><span class="cp">#include</span><span class="w"> </span><span class="cpf">&lt;arrow/util/range.h&gt;</span>
<span class="linenos"> 46</span><span class="cp">#include</span><span class="w"> </span><span class="cpf">&lt;arrow/util/thread_pool.h&gt;</span>
<span class="linenos"> 47</span><span class="cp">#include</span><span class="w"> </span><span class="cpf">&lt;arrow/util/vector.h&gt;</span>
<span class="linenos"> 48</span>
<span class="linenos"> 49</span><span class="cp">#include</span><span class="w"> </span><span class="cpf">&lt;iostream&gt;</span>
<span class="linenos"> 50</span><span class="cp">#include</span><span class="w"> </span><span class="cpf">&lt;memory&gt;</span>
<span class="linenos"> 51</span><span class="cp">#include</span><span class="w"> </span><span class="cpf">&lt;utility&gt;</span>
<span class="linenos"> 52</span>
<span class="linenos"> 53</span><span class="c1">// Demonstrate various operators in Arrow Streaming Execution Engine</span>
<span class="linenos"> 54</span>
<span class="linenos"> 55</span><span class="k">namespace</span><span class="w"> </span><span class="nn">cp</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="o">::</span><span class="nn">arrow</span><span class="o">::</span><span class="nn">compute</span><span class="p">;</span>
<span class="linenos"> 56</span><span class="k">namespace</span><span class="w"> </span><span class="nn">ac</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="o">::</span><span class="nn">arrow</span><span class="o">::</span><span class="nn">acero</span><span class="p">;</span>
<span class="linenos"> 57</span>
<span class="linenos"> 58</span><span class="k">constexpr</span><span class="w"> </span><span class="kt">char</span><span class="w"> </span><span class="n">kSep</span><span class="p">[]</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="s">&quot;******&quot;</span><span class="p">;</span>
<span class="linenos"> 59</span>
<span class="linenos"> 60</span><span class="kt">void</span><span class="w"> </span><span class="nf">PrintBlock</span><span class="p">(</span><span class="k">const</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">string</span><span class="o">&amp;</span><span class="w"> </span><span class="n">msg</span><span class="p">)</span><span class="w"> </span><span class="p">{</span>
<span class="linenos"> 61</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">cout</span><span class="w"> </span><span class="o">&lt;&lt;</span><span class="w"> </span><span class="s">&quot;</span><span class="se">\n\t</span><span class="s">&quot;</span><span class="w"> </span><span class="o">&lt;&lt;</span><span class="w"> </span><span class="n">kSep</span><span class="w"> </span><span class="o">&lt;&lt;</span><span class="w"> </span><span class="s">&quot; &quot;</span><span class="w"> </span><span class="o">&lt;&lt;</span><span class="w"> </span><span class="n">msg</span><span class="w"> </span><span class="o">&lt;&lt;</span><span class="w"> </span><span class="s">&quot; &quot;</span><span class="w"> </span><span class="o">&lt;&lt;</span><span class="w"> </span><span class="n">kSep</span><span class="w"> </span><span class="o">&lt;&lt;</span><span class="w"> </span><span class="s">&quot;</span><span class="se">\n</span><span class="s">&quot;</span><span class="w"> </span><span class="o">&lt;&lt;</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">endl</span><span class="p">;</span>
<span class="linenos"> 62</span><span class="p">}</span>
<span class="linenos"> 63</span>
<span class="linenos"> 64</span><span class="k">template</span><span class="w"> </span><span class="o">&lt;</span><span class="k">typename</span><span class="w"> </span><span class="nc">TYPE</span><span class="p">,</span>
<span class="linenos"> 65</span><span class="w"> </span><span class="k">typename</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="k">typename</span><span class="w"> </span><span class="nc">std</span><span class="o">::</span><span class="n">enable_if</span><span class="o">&lt;</span><span class="n">arrow</span><span class="o">::</span><span class="n">is_number_type</span><span class="o">&lt;</span><span class="n">TYPE</span><span class="o">&gt;::</span><span class="n">value</span><span class="w"> </span><span class="o">|</span>
<span class="linenos"> 66</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">is_boolean_type</span><span class="o">&lt;</span><span class="n">TYPE</span><span class="o">&gt;::</span><span class="n">value</span><span class="w"> </span><span class="o">|</span>
<span class="linenos"> 67</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">is_temporal_type</span><span class="o">&lt;</span><span class="n">TYPE</span><span class="o">&gt;::</span><span class="n">value</span><span class="o">&gt;::</span><span class="n">type</span><span class="o">&gt;</span>
<span class="linenos"> 68</span><span class="n">arrow</span><span class="o">::</span><span class="n">Result</span><span class="o">&lt;</span><span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o">&lt;</span><span class="n">arrow</span><span class="o">::</span><span class="n">Array</span><span class="o">&gt;&gt;</span><span class="w"> </span><span class="n">GetArrayDataSample</span><span class="p">(</span>
<span class="linenos"> 69</span><span class="w"> </span><span class="k">const</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">vector</span><span class="o">&lt;</span><span class="k">typename</span><span class="w"> </span><span class="nc">TYPE</span><span class="o">::</span><span class="n">c_type</span><span class="o">&gt;&amp;</span><span class="w"> </span><span class="n">values</span><span class="p">)</span><span class="w"> </span><span class="p">{</span>
<span class="linenos"> 70</span><span class="w"> </span><span class="k">using</span><span class="w"> </span><span class="n">ArrowBuilderType</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="k">typename</span><span class="w"> </span><span class="nc">arrow</span><span class="o">::</span><span class="n">TypeTraits</span><span class="o">&lt;</span><span class="n">TYPE</span><span class="o">&gt;::</span><span class="n">BuilderType</span><span class="p">;</span>
<span class="linenos"> 71</span><span class="w"> </span><span class="n">ArrowBuilderType</span><span class="w"> </span><span class="n">builder</span><span class="p">;</span>
<span class="linenos"> 72</span><span class="w"> </span><span class="n">ARROW_RETURN_NOT_OK</span><span class="p">(</span><span class="n">builder</span><span class="p">.</span><span class="n">Reserve</span><span class="p">(</span><span class="n">values</span><span class="p">.</span><span class="n">size</span><span class="p">()));</span>
<span class="linenos"> 73</span><span class="w"> </span><span class="n">ARROW_RETURN_NOT_OK</span><span class="p">(</span><span class="n">builder</span><span class="p">.</span><span class="n">AppendValues</span><span class="p">(</span><span class="n">values</span><span class="p">));</span>
<span class="linenos"> 74</span><span class="w"> </span><span class="k">return</span><span class="w"> </span><span class="n">builder</span><span class="p">.</span><span class="n">Finish</span><span class="p">();</span>
<span class="linenos"> 75</span><span class="p">}</span>
<span class="linenos"> 76</span>
<span class="linenos"> 77</span><span class="k">template</span><span class="w"> </span><span class="o">&lt;</span><span class="k">class</span><span class="w"> </span><span class="nc">TYPE</span><span class="o">&gt;</span>
<span class="linenos"> 78</span><span class="n">arrow</span><span class="o">::</span><span class="n">Result</span><span class="o">&lt;</span><span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o">&lt;</span><span class="n">arrow</span><span class="o">::</span><span class="n">Array</span><span class="o">&gt;&gt;</span><span class="w"> </span><span class="n">GetBinaryArrayDataSample</span><span class="p">(</span>
<span class="linenos"> 79</span><span class="w"> </span><span class="k">const</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">vector</span><span class="o">&lt;</span><span class="n">std</span><span class="o">::</span><span class="n">string</span><span class="o">&gt;&amp;</span><span class="w"> </span><span class="n">values</span><span class="p">)</span><span class="w"> </span><span class="p">{</span>
<span class="linenos"> 80</span><span class="w"> </span><span class="k">using</span><span class="w"> </span><span class="n">ArrowBuilderType</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="k">typename</span><span class="w"> </span><span class="nc">arrow</span><span class="o">::</span><span class="n">TypeTraits</span><span class="o">&lt;</span><span class="n">TYPE</span><span class="o">&gt;::</span><span class="n">BuilderType</span><span class="p">;</span>
<span class="linenos"> 81</span><span class="w"> </span><span class="n">ArrowBuilderType</span><span class="w"> </span><span class="n">builder</span><span class="p">;</span>
<span class="linenos"> 82</span><span class="w"> </span><span class="n">ARROW_RETURN_NOT_OK</span><span class="p">(</span><span class="n">builder</span><span class="p">.</span><span class="n">Reserve</span><span class="p">(</span><span class="n">values</span><span class="p">.</span><span class="n">size</span><span class="p">()));</span>
<span class="linenos"> 83</span><span class="w"> </span><span class="n">ARROW_RETURN_NOT_OK</span><span class="p">(</span><span class="n">builder</span><span class="p">.</span><span class="n">AppendValues</span><span class="p">(</span><span class="n">values</span><span class="p">));</span>
<span class="linenos"> 84</span><span class="w"> </span><span class="k">return</span><span class="w"> </span><span class="n">builder</span><span class="p">.</span><span class="n">Finish</span><span class="p">();</span>
<span class="linenos"> 85</span><span class="p">}</span>
<span class="linenos"> 86</span>
<span class="linenos"> 87</span><span class="n">arrow</span><span class="o">::</span><span class="n">Result</span><span class="o">&lt;</span><span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o">&lt;</span><span class="n">arrow</span><span class="o">::</span><span class="n">RecordBatch</span><span class="o">&gt;&gt;</span><span class="w"> </span><span class="n">GetSampleRecordBatch</span><span class="p">(</span>
<span class="linenos"> 88</span><span class="w"> </span><span class="k">const</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">ArrayVector</span><span class="w"> </span><span class="n">array_vector</span><span class="p">,</span><span class="w"> </span><span class="k">const</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">FieldVector</span><span class="o">&amp;</span><span class="w"> </span><span class="n">field_vector</span><span class="p">)</span><span class="w"> </span><span class="p">{</span>
<span class="linenos"> 89</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o">&lt;</span><span class="n">arrow</span><span class="o">::</span><span class="n">RecordBatch</span><span class="o">&gt;</span><span class="w"> </span><span class="n">record_batch</span><span class="p">;</span>
<span class="linenos"> 90</span><span class="w"> </span><span class="n">ARROW_ASSIGN_OR_RAISE</span><span class="p">(</span><span class="k">auto</span><span class="w"> </span><span class="n">struct_result</span><span class="p">,</span>
<span class="linenos"> 91</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">StructArray</span><span class="o">::</span><span class="n">Make</span><span class="p">(</span><span class="n">array_vector</span><span class="p">,</span><span class="w"> </span><span class="n">field_vector</span><span class="p">));</span>
<span class="linenos"> 92</span><span class="w"> </span><span class="k">return</span><span class="w"> </span><span class="n">record_batch</span><span class="o">-&gt;</span><span class="n">FromStructArray</span><span class="p">(</span><span class="n">struct_result</span><span class="p">);</span>
<span class="linenos"> 93</span><span class="p">}</span>
<span class="linenos"> 94</span>
<span class="linenos"> 95</span><span class="c1">/// \brief Create a sample table</span>
<span class="linenos"> 96</span><span class="c1">/// The table&#39;s contents will be:</span>
<span class="linenos"> 97</span><span class="c1">/// a,b</span>
<span class="linenos"> 98</span><span class="c1">/// 1,null</span>
<span class="linenos"> 99</span><span class="c1">/// 2,true</span>
<span class="linenos">100</span><span class="c1">/// null,true</span>
<span class="linenos">101</span><span class="c1">/// 3,false</span>
<span class="linenos">102</span><span class="c1">/// null,true</span>
<span class="linenos">103</span><span class="c1">/// 4,false</span>
<span class="linenos">104</span><span class="c1">/// 5,null</span>
<span class="linenos">105</span><span class="c1">/// 6,false</span>
<span class="linenos">106</span><span class="c1">/// 7,false</span>
<span class="linenos">107</span><span class="c1">/// 8,true</span>
<span class="linenos">108</span><span class="c1">/// \return The created table</span>
<span class="linenos">109</span>
<span class="linenos">110</span><span class="n">arrow</span><span class="o">::</span><span class="n">Result</span><span class="o">&lt;</span><span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o">&lt;</span><span class="n">arrow</span><span class="o">::</span><span class="n">Table</span><span class="o">&gt;&gt;</span><span class="w"> </span><span class="n">GetTable</span><span class="p">()</span><span class="w"> </span><span class="p">{</span>
<span class="linenos">111</span><span class="w"> </span><span class="k">auto</span><span class="w"> </span><span class="n">null_long</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">numeric_limits</span><span class="o">&lt;</span><span class="kt">int64_t</span><span class="o">&gt;::</span><span class="n">quiet_NaN</span><span class="p">();</span>
<span class="linenos">112</span><span class="w"> </span><span class="n">ARROW_ASSIGN_OR_RAISE</span><span class="p">(</span><span class="k">auto</span><span class="w"> </span><span class="n">int64_array</span><span class="p">,</span>
<span class="linenos">113</span><span class="w"> </span><span class="n">GetArrayDataSample</span><span class="o">&lt;</span><span class="n">arrow</span><span class="o">::</span><span class="n">Int64Type</span><span class="o">&gt;</span><span class="p">(</span>
<span class="linenos">114</span><span class="w"> </span><span class="p">{</span><span class="mi">1</span><span class="p">,</span><span class="w"> </span><span class="mi">2</span><span class="p">,</span><span class="w"> </span><span class="n">null_long</span><span class="p">,</span><span class="w"> </span><span class="mi">3</span><span class="p">,</span><span class="w"> </span><span class="n">null_long</span><span class="p">,</span><span class="w"> </span><span class="mi">4</span><span class="p">,</span><span class="w"> </span><span class="mi">5</span><span class="p">,</span><span class="w"> </span><span class="mi">6</span><span class="p">,</span><span class="w"> </span><span class="mi">7</span><span class="p">,</span><span class="w"> </span><span class="mi">8</span><span class="p">}));</span>
<span class="linenos">115</span>
<span class="linenos">116</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">BooleanBuilder</span><span class="w"> </span><span class="n">boolean_builder</span><span class="p">;</span>
<span class="linenos">117</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o">&lt;</span><span class="n">arrow</span><span class="o">::</span><span class="n">BooleanArray</span><span class="o">&gt;</span><span class="w"> </span><span class="n">bool_array</span><span class="p">;</span>
<span class="linenos">118</span>
<span class="linenos">119</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">vector</span><span class="o">&lt;</span><span class="kt">uint8_t</span><span class="o">&gt;</span><span class="w"> </span><span class="n">bool_values</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="p">{</span><span class="nb">false</span><span class="p">,</span><span class="w"> </span><span class="nb">true</span><span class="p">,</span><span class="w"> </span><span class="nb">true</span><span class="p">,</span><span class="w"> </span><span class="nb">false</span><span class="p">,</span><span class="w"> </span><span class="nb">true</span><span class="p">,</span>
<span class="linenos">120</span><span class="w"> </span><span class="nb">false</span><span class="p">,</span><span class="w"> </span><span class="nb">false</span><span class="p">,</span><span class="w"> </span><span class="nb">false</span><span class="p">,</span><span class="w"> </span><span class="nb">false</span><span class="p">,</span><span class="w"> </span><span class="nb">true</span><span class="p">};</span>
<span class="linenos">121</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">vector</span><span class="o">&lt;</span><span class="kt">bool</span><span class="o">&gt;</span><span class="w"> </span><span class="n">is_valid</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="p">{</span><span class="nb">false</span><span class="p">,</span><span class="w"> </span><span class="nb">true</span><span class="p">,</span><span class="w"> </span><span class="nb">true</span><span class="p">,</span><span class="w"> </span><span class="nb">true</span><span class="p">,</span><span class="w"> </span><span class="nb">true</span><span class="p">,</span>
<span class="linenos">122</span><span class="w"> </span><span class="nb">true</span><span class="p">,</span><span class="w"> </span><span class="nb">false</span><span class="p">,</span><span class="w"> </span><span class="nb">true</span><span class="p">,</span><span class="w"> </span><span class="nb">true</span><span class="p">,</span><span class="w"> </span><span class="nb">true</span><span class="p">};</span>
<span class="linenos">123</span>
<span class="linenos">124</span><span class="w"> </span><span class="n">ARROW_RETURN_NOT_OK</span><span class="p">(</span><span class="n">boolean_builder</span><span class="p">.</span><span class="n">Reserve</span><span class="p">(</span><span class="mi">10</span><span class="p">));</span>
<span class="linenos">125</span>
<span class="linenos">126</span><span class="w"> </span><span class="n">ARROW_RETURN_NOT_OK</span><span class="p">(</span><span class="n">boolean_builder</span><span class="p">.</span><span class="n">AppendValues</span><span class="p">(</span><span class="n">bool_values</span><span class="p">,</span><span class="w"> </span><span class="n">is_valid</span><span class="p">));</span>
<span class="linenos">127</span>
<span class="linenos">128</span><span class="w"> </span><span class="n">ARROW_RETURN_NOT_OK</span><span class="p">(</span><span class="n">boolean_builder</span><span class="p">.</span><span class="n">Finish</span><span class="p">(</span><span class="o">&amp;</span><span class="n">bool_array</span><span class="p">));</span>
<span class="linenos">129</span>
<span class="linenos">130</span><span class="w"> </span><span class="k">auto</span><span class="w"> </span><span class="n">record_batch</span><span class="w"> </span><span class="o">=</span>
<span class="linenos">131</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">RecordBatch</span><span class="o">::</span><span class="n">Make</span><span class="p">(</span><span class="n">arrow</span><span class="o">::</span><span class="n">schema</span><span class="p">({</span><span class="n">arrow</span><span class="o">::</span><span class="n">field</span><span class="p">(</span><span class="s">&quot;a&quot;</span><span class="p">,</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">int64</span><span class="p">()),</span>
<span class="linenos">132</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">field</span><span class="p">(</span><span class="s">&quot;b&quot;</span><span class="p">,</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">boolean</span><span class="p">())}),</span>
<span class="linenos">133</span><span class="w"> </span><span class="mi">10</span><span class="p">,</span><span class="w"> </span><span class="p">{</span><span class="n">int64_array</span><span class="p">,</span><span class="w"> </span><span class="n">bool_array</span><span class="p">});</span>
<span class="linenos">134</span><span class="w"> </span><span class="n">ARROW_ASSIGN_OR_RAISE</span><span class="p">(</span><span class="k">auto</span><span class="w"> </span><span class="n">table</span><span class="p">,</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">Table</span><span class="o">::</span><span class="n">FromRecordBatches</span><span class="p">({</span><span class="n">record_batch</span><span class="p">}));</span>
<span class="linenos">135</span><span class="w"> </span><span class="k">return</span><span class="w"> </span><span class="n">table</span><span class="p">;</span>
<span class="linenos">136</span><span class="p">}</span>
<span class="linenos">137</span>
<span class="linenos">138</span><span class="c1">/// \brief Create a sample dataset</span>
<span class="linenos">139</span><span class="c1">/// \return An in-memory dataset based on GetTable()</span>
<span class="linenos">140</span><span class="n">arrow</span><span class="o">::</span><span class="n">Result</span><span class="o">&lt;</span><span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o">&lt;</span><span class="n">arrow</span><span class="o">::</span><span class="n">dataset</span><span class="o">::</span><span class="n">Dataset</span><span class="o">&gt;&gt;</span><span class="w"> </span><span class="n">GetDataset</span><span class="p">()</span><span class="w"> </span><span class="p">{</span>
<span class="linenos">141</span><span class="w"> </span><span class="n">ARROW_ASSIGN_OR_RAISE</span><span class="p">(</span><span class="k">auto</span><span class="w"> </span><span class="n">table</span><span class="p">,</span><span class="w"> </span><span class="n">GetTable</span><span class="p">());</span>
<span class="linenos">142</span><span class="w"> </span><span class="k">auto</span><span class="w"> </span><span class="n">ds</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">make_shared</span><span class="o">&lt;</span><span class="n">arrow</span><span class="o">::</span><span class="n">dataset</span><span class="o">::</span><span class="n">InMemoryDataset</span><span class="o">&gt;</span><span class="p">(</span><span class="n">table</span><span class="p">);</span>
<span class="linenos">143</span><span class="w"> </span><span class="k">return</span><span class="w"> </span><span class="n">ds</span><span class="p">;</span>
<span class="linenos">144</span><span class="p">}</span>
<span class="linenos">145</span>
<span class="linenos">146</span><span class="n">arrow</span><span class="o">::</span><span class="n">Result</span><span class="o">&lt;</span><span class="n">cp</span><span class="o">::</span><span class="n">ExecBatch</span><span class="o">&gt;</span><span class="w"> </span><span class="n">GetExecBatchFromVectors</span><span class="p">(</span>
<span class="linenos">147</span><span class="w"> </span><span class="k">const</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">FieldVector</span><span class="o">&amp;</span><span class="w"> </span><span class="n">field_vector</span><span class="p">,</span><span class="w"> </span><span class="k">const</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">ArrayVector</span><span class="o">&amp;</span><span class="w"> </span><span class="n">array_vector</span><span class="p">)</span><span class="w"> </span><span class="p">{</span>
<span class="linenos">148</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o">&lt;</span><span class="n">arrow</span><span class="o">::</span><span class="n">RecordBatch</span><span class="o">&gt;</span><span class="w"> </span><span class="n">record_batch</span><span class="p">;</span>
<span class="linenos">149</span><span class="w"> </span><span class="n">ARROW_ASSIGN_OR_RAISE</span><span class="p">(</span><span class="k">auto</span><span class="w"> </span><span class="n">res_batch</span><span class="p">,</span><span class="w"> </span><span class="n">GetSampleRecordBatch</span><span class="p">(</span><span class="n">array_vector</span><span class="p">,</span><span class="w"> </span><span class="n">field_vector</span><span class="p">));</span>
<span class="linenos">150</span><span class="w"> </span><span class="n">cp</span><span class="o">::</span><span class="n">ExecBatch</span><span class="w"> </span><span class="n">batch</span><span class="p">{</span><span class="o">*</span><span class="n">res_batch</span><span class="p">};</span>
<span class="linenos">151</span><span class="w"> </span><span class="k">return</span><span class="w"> </span><span class="n">batch</span><span class="p">;</span>
<span class="linenos">152</span><span class="p">}</span>
<span class="linenos">153</span>
<span class="linenos">154</span><span class="c1">// (Doc section: BatchesWithSchema Definition)</span>
<span class="linenos">155</span><span class="k">struct</span><span class="w"> </span><span class="nc">BatchesWithSchema</span><span class="w"> </span><span class="p">{</span>
<span class="linenos">156</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">vector</span><span class="o">&lt;</span><span class="n">cp</span><span class="o">::</span><span class="n">ExecBatch</span><span class="o">&gt;</span><span class="w"> </span><span class="n">batches</span><span class="p">;</span>
<span class="linenos">157</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o">&lt;</span><span class="n">arrow</span><span class="o">::</span><span class="n">Schema</span><span class="o">&gt;</span><span class="w"> </span><span class="n">schema</span><span class="p">;</span>
<span class="linenos">158</span><span class="w"> </span><span class="c1">// This method uses internal arrow utilities to</span>
<span class="linenos">159</span><span class="w"> </span><span class="c1">// convert a vector of record batches to an AsyncGenerator of optional batches</span>
<span class="linenos">160</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">AsyncGenerator</span><span class="o">&lt;</span><span class="n">std</span><span class="o">::</span><span class="n">optional</span><span class="o">&lt;</span><span class="n">cp</span><span class="o">::</span><span class="n">ExecBatch</span><span class="o">&gt;&gt;</span><span class="w"> </span><span class="n">gen</span><span class="p">()</span><span class="w"> </span><span class="k">const</span><span class="w"> </span><span class="p">{</span>
<span class="linenos">161</span><span class="w"> </span><span class="k">auto</span><span class="w"> </span><span class="n">opt_batches</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="o">::</span><span class="n">arrow</span><span class="o">::</span><span class="n">internal</span><span class="o">::</span><span class="n">MapVector</span><span class="p">(</span>
<span class="linenos">162</span><span class="w"> </span><span class="p">[](</span><span class="n">cp</span><span class="o">::</span><span class="n">ExecBatch</span><span class="w"> </span><span class="n">batch</span><span class="p">)</span><span class="w"> </span><span class="p">{</span><span class="w"> </span><span class="k">return</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">make_optional</span><span class="p">(</span><span class="n">std</span><span class="o">::</span><span class="n">move</span><span class="p">(</span><span class="n">batch</span><span class="p">));</span><span class="w"> </span><span class="p">},</span>
<span class="linenos">163</span><span class="w"> </span><span class="n">batches</span><span class="p">);</span>
<span class="linenos">164</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">AsyncGenerator</span><span class="o">&lt;</span><span class="n">std</span><span class="o">::</span><span class="n">optional</span><span class="o">&lt;</span><span class="n">cp</span><span class="o">::</span><span class="n">ExecBatch</span><span class="o">&gt;&gt;</span><span class="w"> </span><span class="n">gen</span><span class="p">;</span>
<span class="linenos">165</span><span class="w"> </span><span class="n">gen</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">MakeVectorGenerator</span><span class="p">(</span><span class="n">std</span><span class="o">::</span><span class="n">move</span><span class="p">(</span><span class="n">opt_batches</span><span class="p">));</span>
<span class="linenos">166</span><span class="w"> </span><span class="k">return</span><span class="w"> </span><span class="n">gen</span><span class="p">;</span>
<span class="linenos">167</span><span class="w"> </span><span class="p">}</span>
<span class="linenos">168</span><span class="p">};</span>
<span class="linenos">169</span><span class="c1">// (Doc section: BatchesWithSchema Definition)</span>
<span class="linenos">170</span>
<span class="linenos">171</span><span class="c1">// (Doc section: MakeBasicBatches Definition)</span>
<span class="linenos">172</span><span class="n">arrow</span><span class="o">::</span><span class="n">Result</span><span class="o">&lt;</span><span class="n">BatchesWithSchema</span><span class="o">&gt;</span><span class="w"> </span><span class="n">MakeBasicBatches</span><span class="p">()</span><span class="w"> </span><span class="p">{</span>
<span class="linenos">173</span><span class="w"> </span><span class="n">BatchesWithSchema</span><span class="w"> </span><span class="n">out</span><span class="p">;</span>
<span class="linenos">174</span><span class="w"> </span><span class="k">auto</span><span class="w"> </span><span class="n">field_vector</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="p">{</span><span class="n">arrow</span><span class="o">::</span><span class="n">field</span><span class="p">(</span><span class="s">&quot;a&quot;</span><span class="p">,</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">int32</span><span class="p">()),</span>
<span class="linenos">175</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">field</span><span class="p">(</span><span class="s">&quot;b&quot;</span><span class="p">,</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">boolean</span><span class="p">())};</span>
<span class="linenos">176</span><span class="w"> </span><span class="n">ARROW_ASSIGN_OR_RAISE</span><span class="p">(</span><span class="k">auto</span><span class="w"> </span><span class="n">b1_int</span><span class="p">,</span><span class="w"> </span><span class="n">GetArrayDataSample</span><span class="o">&lt;</span><span class="n">arrow</span><span class="o">::</span><span class="n">Int32Type</span><span class="o">&gt;</span><span class="p">({</span><span class="mi">0</span><span class="p">,</span><span class="w"> </span><span class="mi">4</span><span class="p">}));</span>
<span class="linenos">177</span><span class="w"> </span><span class="n">ARROW_ASSIGN_OR_RAISE</span><span class="p">(</span><span class="k">auto</span><span class="w"> </span><span class="n">b2_int</span><span class="p">,</span><span class="w"> </span><span class="n">GetArrayDataSample</span><span class="o">&lt;</span><span class="n">arrow</span><span class="o">::</span><span class="n">Int32Type</span><span class="o">&gt;</span><span class="p">({</span><span class="mi">5</span><span class="p">,</span><span class="w"> </span><span class="mi">6</span><span class="p">,</span><span class="w"> </span><span class="mi">7</span><span class="p">}));</span>
<span class="linenos">178</span><span class="w"> </span><span class="n">ARROW_ASSIGN_OR_RAISE</span><span class="p">(</span><span class="k">auto</span><span class="w"> </span><span class="n">b3_int</span><span class="p">,</span><span class="w"> </span><span class="n">GetArrayDataSample</span><span class="o">&lt;</span><span class="n">arrow</span><span class="o">::</span><span class="n">Int32Type</span><span class="o">&gt;</span><span class="p">({</span><span class="mi">8</span><span class="p">,</span><span class="w"> </span><span class="mi">9</span><span class="p">,</span><span class="w"> </span><span class="mi">10</span><span class="p">}));</span>
<span class="linenos">179</span>
<span class="linenos">180</span><span class="w"> </span><span class="n">ARROW_ASSIGN_OR_RAISE</span><span class="p">(</span><span class="k">auto</span><span class="w"> </span><span class="n">b1_bool</span><span class="p">,</span>
<span class="linenos">181</span><span class="w"> </span><span class="n">GetArrayDataSample</span><span class="o">&lt;</span><span class="n">arrow</span><span class="o">::</span><span class="n">BooleanType</span><span class="o">&gt;</span><span class="p">({</span><span class="nb">false</span><span class="p">,</span><span class="w"> </span><span class="nb">true</span><span class="p">}));</span>
<span class="linenos">182</span><span class="w"> </span><span class="n">ARROW_ASSIGN_OR_RAISE</span><span class="p">(</span><span class="k">auto</span><span class="w"> </span><span class="n">b2_bool</span><span class="p">,</span>
<span class="linenos">183</span><span class="w"> </span><span class="n">GetArrayDataSample</span><span class="o">&lt;</span><span class="n">arrow</span><span class="o">::</span><span class="n">BooleanType</span><span class="o">&gt;</span><span class="p">({</span><span class="nb">true</span><span class="p">,</span><span class="w"> </span><span class="nb">false</span><span class="p">,</span><span class="w"> </span><span class="nb">true</span><span class="p">}));</span>
<span class="linenos">184</span><span class="w"> </span><span class="n">ARROW_ASSIGN_OR_RAISE</span><span class="p">(</span><span class="k">auto</span><span class="w"> </span><span class="n">b3_bool</span><span class="p">,</span>
<span class="linenos">185</span><span class="w"> </span><span class="n">GetArrayDataSample</span><span class="o">&lt;</span><span class="n">arrow</span><span class="o">::</span><span class="n">BooleanType</span><span class="o">&gt;</span><span class="p">({</span><span class="nb">false</span><span class="p">,</span><span class="w"> </span><span class="nb">true</span><span class="p">,</span><span class="w"> </span><span class="nb">false</span><span class="p">}));</span>
<span class="linenos">186</span>
<span class="linenos">187</span><span class="w"> </span><span class="n">ARROW_ASSIGN_OR_RAISE</span><span class="p">(</span><span class="k">auto</span><span class="w"> </span><span class="n">b1</span><span class="p">,</span>
<span class="linenos">188</span><span class="w"> </span><span class="n">GetExecBatchFromVectors</span><span class="p">(</span><span class="n">field_vector</span><span class="p">,</span><span class="w"> </span><span class="p">{</span><span class="n">b1_int</span><span class="p">,</span><span class="w"> </span><span class="n">b1_bool</span><span class="p">}));</span>
<span class="linenos">189</span><span class="w"> </span><span class="n">ARROW_ASSIGN_OR_RAISE</span><span class="p">(</span><span class="k">auto</span><span class="w"> </span><span class="n">b2</span><span class="p">,</span>
<span class="linenos">190</span><span class="w"> </span><span class="n">GetExecBatchFromVectors</span><span class="p">(</span><span class="n">field_vector</span><span class="p">,</span><span class="w"> </span><span class="p">{</span><span class="n">b2_int</span><span class="p">,</span><span class="w"> </span><span class="n">b2_bool</span><span class="p">}));</span>
<span class="linenos">191</span><span class="w"> </span><span class="n">ARROW_ASSIGN_OR_RAISE</span><span class="p">(</span><span class="k">auto</span><span class="w"> </span><span class="n">b3</span><span class="p">,</span>
<span class="linenos">192</span><span class="w"> </span><span class="n">GetExecBatchFromVectors</span><span class="p">(</span><span class="n">field_vector</span><span class="p">,</span><span class="w"> </span><span class="p">{</span><span class="n">b3_int</span><span class="p">,</span><span class="w"> </span><span class="n">b3_bool</span><span class="p">}));</span>
<span class="linenos">193</span>
<span class="linenos">194</span><span class="w"> </span><span class="n">out</span><span class="p">.</span><span class="n">batches</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="p">{</span><span class="n">b1</span><span class="p">,</span><span class="w"> </span><span class="n">b2</span><span class="p">,</span><span class="w"> </span><span class="n">b3</span><span class="p">};</span>
<span class="linenos">195</span><span class="w"> </span><span class="n">out</span><span class="p">.</span><span class="n">schema</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">schema</span><span class="p">(</span><span class="n">field_vector</span><span class="p">);</span>
<span class="linenos">196</span><span class="w"> </span><span class="k">return</span><span class="w"> </span><span class="n">out</span><span class="p">;</span>
<span class="linenos">197</span><span class="p">}</span>
<span class="linenos">198</span><span class="c1">// (Doc section: MakeBasicBatches Definition)</span>
<span class="linenos">199</span>
<span class="linenos">200</span><span class="n">arrow</span><span class="o">::</span><span class="n">Result</span><span class="o">&lt;</span><span class="n">BatchesWithSchema</span><span class="o">&gt;</span><span class="w"> </span><span class="n">MakeSortTestBasicBatches</span><span class="p">()</span><span class="w"> </span><span class="p">{</span>
<span class="linenos">201</span><span class="w"> </span><span class="n">BatchesWithSchema</span><span class="w"> </span><span class="n">out</span><span class="p">;</span>
<span class="linenos">202</span><span class="w"> </span><span class="k">auto</span><span class="w"> </span><span class="n">field</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">field</span><span class="p">(</span><span class="s">&quot;a&quot;</span><span class="p">,</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">int32</span><span class="p">());</span>
<span class="linenos">203</span><span class="w"> </span><span class="n">ARROW_ASSIGN_OR_RAISE</span><span class="p">(</span><span class="k">auto</span><span class="w"> </span><span class="n">b1_int</span><span class="p">,</span><span class="w"> </span><span class="n">GetArrayDataSample</span><span class="o">&lt;</span><span class="n">arrow</span><span class="o">::</span><span class="n">Int32Type</span><span class="o">&gt;</span><span class="p">({</span><span class="mi">1</span><span class="p">,</span><span class="w"> </span><span class="mi">3</span><span class="p">,</span><span class="w"> </span><span class="mi">0</span><span class="p">,</span><span class="w"> </span><span class="mi">2</span><span class="p">}));</span>
<span class="linenos">204</span><span class="w"> </span><span class="n">ARROW_ASSIGN_OR_RAISE</span><span class="p">(</span><span class="k">auto</span><span class="w"> </span><span class="n">b2_int</span><span class="p">,</span>
<span class="linenos">205</span><span class="w"> </span><span class="n">GetArrayDataSample</span><span class="o">&lt;</span><span class="n">arrow</span><span class="o">::</span><span class="n">Int32Type</span><span class="o">&gt;</span><span class="p">({</span><span class="mi">121</span><span class="p">,</span><span class="w"> </span><span class="mi">101</span><span class="p">,</span><span class="w"> </span><span class="mi">120</span><span class="p">,</span><span class="w"> </span><span class="mi">12</span><span class="p">}));</span>
<span class="linenos">206</span><span class="w"> </span><span class="n">ARROW_ASSIGN_OR_RAISE</span><span class="p">(</span><span class="k">auto</span><span class="w"> </span><span class="n">b3_int</span><span class="p">,</span>
<span class="linenos">207</span><span class="w"> </span><span class="n">GetArrayDataSample</span><span class="o">&lt;</span><span class="n">arrow</span><span class="o">::</span><span class="n">Int32Type</span><span class="o">&gt;</span><span class="p">({</span><span class="mi">10</span><span class="p">,</span><span class="w"> </span><span class="mi">110</span><span class="p">,</span><span class="w"> </span><span class="mi">210</span><span class="p">,</span><span class="w"> </span><span class="mi">121</span><span class="p">}));</span>
<span class="linenos">208</span><span class="w"> </span><span class="n">ARROW_ASSIGN_OR_RAISE</span><span class="p">(</span><span class="k">auto</span><span class="w"> </span><span class="n">b4_int</span><span class="p">,</span>
<span class="linenos">209</span><span class="w"> </span><span class="n">GetArrayDataSample</span><span class="o">&lt;</span><span class="n">arrow</span><span class="o">::</span><span class="n">Int32Type</span><span class="o">&gt;</span><span class="p">({</span><span class="mi">51</span><span class="p">,</span><span class="w"> </span><span class="mi">101</span><span class="p">,</span><span class="w"> </span><span class="mi">2</span><span class="p">,</span><span class="w"> </span><span class="mi">34</span><span class="p">}));</span>
<span class="linenos">210</span><span class="w"> </span><span class="n">ARROW_ASSIGN_OR_RAISE</span><span class="p">(</span><span class="k">auto</span><span class="w"> </span><span class="n">b5_int</span><span class="p">,</span>
<span class="linenos">211</span><span class="w"> </span><span class="n">GetArrayDataSample</span><span class="o">&lt;</span><span class="n">arrow</span><span class="o">::</span><span class="n">Int32Type</span><span class="o">&gt;</span><span class="p">({</span><span class="mi">11</span><span class="p">,</span><span class="w"> </span><span class="mi">31</span><span class="p">,</span><span class="w"> </span><span class="mi">1</span><span class="p">,</span><span class="w"> </span><span class="mi">12</span><span class="p">}));</span>
<span class="linenos">212</span><span class="w"> </span><span class="n">ARROW_ASSIGN_OR_RAISE</span><span class="p">(</span><span class="k">auto</span><span class="w"> </span><span class="n">b6_int</span><span class="p">,</span>
<span class="linenos">213</span><span class="w"> </span><span class="n">GetArrayDataSample</span><span class="o">&lt;</span><span class="n">arrow</span><span class="o">::</span><span class="n">Int32Type</span><span class="o">&gt;</span><span class="p">({</span><span class="mi">12</span><span class="p">,</span><span class="w"> </span><span class="mi">101</span><span class="p">,</span><span class="w"> </span><span class="mi">120</span><span class="p">,</span><span class="w"> </span><span class="mi">12</span><span class="p">}));</span>
<span class="linenos">214</span><span class="w"> </span><span class="n">ARROW_ASSIGN_OR_RAISE</span><span class="p">(</span><span class="k">auto</span><span class="w"> </span><span class="n">b7_int</span><span class="p">,</span>
<span class="linenos">215</span><span class="w"> </span><span class="n">GetArrayDataSample</span><span class="o">&lt;</span><span class="n">arrow</span><span class="o">::</span><span class="n">Int32Type</span><span class="o">&gt;</span><span class="p">({</span><span class="mi">0</span><span class="p">,</span><span class="w"> </span><span class="mi">110</span><span class="p">,</span><span class="w"> </span><span class="mi">210</span><span class="p">,</span><span class="w"> </span><span class="mi">11</span><span class="p">}));</span>
<span class="linenos">216</span><span class="w"> </span><span class="n">ARROW_ASSIGN_OR_RAISE</span><span class="p">(</span><span class="k">auto</span><span class="w"> </span><span class="n">b8_int</span><span class="p">,</span>
<span class="linenos">217</span><span class="w"> </span><span class="n">GetArrayDataSample</span><span class="o">&lt;</span><span class="n">arrow</span><span class="o">::</span><span class="n">Int32Type</span><span class="o">&gt;</span><span class="p">({</span><span class="mi">51</span><span class="p">,</span><span class="w"> </span><span class="mi">10</span><span class="p">,</span><span class="w"> </span><span class="mi">2</span><span class="p">,</span><span class="w"> </span><span class="mi">3</span><span class="p">}));</span>
<span class="linenos">218</span>
<span class="linenos">219</span><span class="w"> </span><span class="n">ARROW_ASSIGN_OR_RAISE</span><span class="p">(</span><span class="k">auto</span><span class="w"> </span><span class="n">b1</span><span class="p">,</span><span class="w"> </span><span class="n">GetExecBatchFromVectors</span><span class="p">({</span><span class="n">field</span><span class="p">},</span><span class="w"> </span><span class="p">{</span><span class="n">b1_int</span><span class="p">}));</span>
<span class="linenos">220</span><span class="w"> </span><span class="n">ARROW_ASSIGN_OR_RAISE</span><span class="p">(</span><span class="k">auto</span><span class="w"> </span><span class="n">b2</span><span class="p">,</span><span class="w"> </span><span class="n">GetExecBatchFromVectors</span><span class="p">({</span><span class="n">field</span><span class="p">},</span><span class="w"> </span><span class="p">{</span><span class="n">b2_int</span><span class="p">}));</span>
<span class="linenos">221</span><span class="w"> </span><span class="n">ARROW_ASSIGN_OR_RAISE</span><span class="p">(</span><span class="k">auto</span><span class="w"> </span><span class="n">b3</span><span class="p">,</span>
<span class="linenos">222</span><span class="w"> </span><span class="n">GetExecBatchFromVectors</span><span class="p">({</span><span class="n">field</span><span class="p">,</span><span class="w"> </span><span class="n">field</span><span class="p">},</span><span class="w"> </span><span class="p">{</span><span class="n">b3_int</span><span class="p">,</span><span class="w"> </span><span class="n">b8_int</span><span class="p">}));</span>
<span class="linenos">223</span><span class="w"> </span><span class="n">ARROW_ASSIGN_OR_RAISE</span><span class="p">(</span><span class="k">auto</span><span class="w"> </span><span class="n">b4</span><span class="p">,</span>
<span class="linenos">224</span><span class="w"> </span><span class="n">GetExecBatchFromVectors</span><span class="p">({</span><span class="n">field</span><span class="p">,</span><span class="w"> </span><span class="n">field</span><span class="p">,</span><span class="w"> </span><span class="n">field</span><span class="p">,</span><span class="w"> </span><span class="n">field</span><span class="p">},</span>
<span class="linenos">225</span><span class="w"> </span><span class="p">{</span><span class="n">b4_int</span><span class="p">,</span><span class="w"> </span><span class="n">b5_int</span><span class="p">,</span><span class="w"> </span><span class="n">b6_int</span><span class="p">,</span><span class="w"> </span><span class="n">b7_int</span><span class="p">}));</span>
<span class="linenos">226</span><span class="w"> </span><span class="n">out</span><span class="p">.</span><span class="n">batches</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="p">{</span><span class="n">b1</span><span class="p">,</span><span class="w"> </span><span class="n">b2</span><span class="p">,</span><span class="w"> </span><span class="n">b3</span><span class="p">,</span><span class="w"> </span><span class="n">b4</span><span class="p">};</span>
<span class="linenos">227</span><span class="w"> </span><span class="n">out</span><span class="p">.</span><span class="n">schema</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">schema</span><span class="p">({</span><span class="n">field</span><span class="p">});</span>
<span class="linenos">228</span><span class="w"> </span><span class="k">return</span><span class="w"> </span><span class="n">out</span><span class="p">;</span>
<span class="linenos">229</span><span class="p">}</span>
<span class="linenos">230</span>
<span class="linenos">231</span><span class="n">arrow</span><span class="o">::</span><span class="n">Result</span><span class="o">&lt;</span><span class="n">BatchesWithSchema</span><span class="o">&gt;</span><span class="w"> </span><span class="n">MakeGroupableBatches</span><span class="p">(</span><span class="kt">int</span><span class="w"> </span><span class="n">multiplicity</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="mi">1</span><span class="p">)</span><span class="w"> </span><span class="p">{</span>
<span class="linenos">232</span><span class="w"> </span><span class="n">BatchesWithSchema</span><span class="w"> </span><span class="n">out</span><span class="p">;</span>
<span class="linenos">233</span><span class="w"> </span><span class="k">auto</span><span class="w"> </span><span class="n">fields</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="p">{</span><span class="n">arrow</span><span class="o">::</span><span class="n">field</span><span class="p">(</span><span class="s">&quot;i32&quot;</span><span class="p">,</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">int32</span><span class="p">()),</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">field</span><span class="p">(</span><span class="s">&quot;str&quot;</span><span class="p">,</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">utf8</span><span class="p">())};</span>
<span class="linenos">234</span><span class="w"> </span><span class="n">ARROW_ASSIGN_OR_RAISE</span><span class="p">(</span><span class="k">auto</span><span class="w"> </span><span class="n">b1_int</span><span class="p">,</span><span class="w"> </span><span class="n">GetArrayDataSample</span><span class="o">&lt;</span><span class="n">arrow</span><span class="o">::</span><span class="n">Int32Type</span><span class="o">&gt;</span><span class="p">({</span><span class="mi">12</span><span class="p">,</span><span class="w"> </span><span class="mi">7</span><span class="p">,</span><span class="w"> </span><span class="mi">3</span><span class="p">}));</span>
<span class="linenos">235</span><span class="w"> </span><span class="n">ARROW_ASSIGN_OR_RAISE</span><span class="p">(</span><span class="k">auto</span><span class="w"> </span><span class="n">b2_int</span><span class="p">,</span><span class="w"> </span><span class="n">GetArrayDataSample</span><span class="o">&lt;</span><span class="n">arrow</span><span class="o">::</span><span class="n">Int32Type</span><span class="o">&gt;</span><span class="p">({</span><span class="mi">-2</span><span class="p">,</span><span class="w"> </span><span class="mi">-1</span><span class="p">,</span><span class="w"> </span><span class="mi">3</span><span class="p">}));</span>
<span class="linenos">236</span><span class="w"> </span><span class="n">ARROW_ASSIGN_OR_RAISE</span><span class="p">(</span><span class="k">auto</span><span class="w"> </span><span class="n">b3_int</span><span class="p">,</span><span class="w"> </span><span class="n">GetArrayDataSample</span><span class="o">&lt;</span><span class="n">arrow</span><span class="o">::</span><span class="n">Int32Type</span><span class="o">&gt;</span><span class="p">({</span><span class="mi">5</span><span class="p">,</span><span class="w"> </span><span class="mi">3</span><span class="p">,</span><span class="w"> </span><span class="mi">-8</span><span class="p">}));</span>
<span class="linenos">237</span><span class="w"> </span><span class="n">ARROW_ASSIGN_OR_RAISE</span><span class="p">(</span><span class="k">auto</span><span class="w"> </span><span class="n">b1_str</span><span class="p">,</span><span class="w"> </span><span class="n">GetBinaryArrayDataSample</span><span class="o">&lt;</span><span class="n">arrow</span><span class="o">::</span><span class="n">StringType</span><span class="o">&gt;</span><span class="p">(</span>
<span class="linenos">238</span><span class="w"> </span><span class="p">{</span><span class="s">&quot;alpha&quot;</span><span class="p">,</span><span class="w"> </span><span class="s">&quot;beta&quot;</span><span class="p">,</span><span class="w"> </span><span class="s">&quot;alpha&quot;</span><span class="p">}));</span>
<span class="linenos">239</span><span class="w"> </span><span class="n">ARROW_ASSIGN_OR_RAISE</span><span class="p">(</span><span class="k">auto</span><span class="w"> </span><span class="n">b2_str</span><span class="p">,</span><span class="w"> </span><span class="n">GetBinaryArrayDataSample</span><span class="o">&lt;</span><span class="n">arrow</span><span class="o">::</span><span class="n">StringType</span><span class="o">&gt;</span><span class="p">(</span>
<span class="linenos">240</span><span class="w"> </span><span class="p">{</span><span class="s">&quot;alpha&quot;</span><span class="p">,</span><span class="w"> </span><span class="s">&quot;gamma&quot;</span><span class="p">,</span><span class="w"> </span><span class="s">&quot;alpha&quot;</span><span class="p">}));</span>
<span class="linenos">241</span><span class="w"> </span><span class="n">ARROW_ASSIGN_OR_RAISE</span><span class="p">(</span><span class="k">auto</span><span class="w"> </span><span class="n">b3_str</span><span class="p">,</span><span class="w"> </span><span class="n">GetBinaryArrayDataSample</span><span class="o">&lt;</span><span class="n">arrow</span><span class="o">::</span><span class="n">StringType</span><span class="o">&gt;</span><span class="p">(</span>
<span class="linenos">242</span><span class="w"> </span><span class="p">{</span><span class="s">&quot;gamma&quot;</span><span class="p">,</span><span class="w"> </span><span class="s">&quot;beta&quot;</span><span class="p">,</span><span class="w"> </span><span class="s">&quot;alpha&quot;</span><span class="p">}));</span>
<span class="linenos">243</span><span class="w"> </span><span class="n">ARROW_ASSIGN_OR_RAISE</span><span class="p">(</span><span class="k">auto</span><span class="w"> </span><span class="n">b1</span><span class="p">,</span><span class="w"> </span><span class="n">GetExecBatchFromVectors</span><span class="p">(</span><span class="n">fields</span><span class="p">,</span><span class="w"> </span><span class="p">{</span><span class="n">b1_int</span><span class="p">,</span><span class="w"> </span><span class="n">b1_str</span><span class="p">}));</span>
<span class="linenos">244</span><span class="w"> </span><span class="n">ARROW_ASSIGN_OR_RAISE</span><span class="p">(</span><span class="k">auto</span><span class="w"> </span><span class="n">b2</span><span class="p">,</span><span class="w"> </span><span class="n">GetExecBatchFromVectors</span><span class="p">(</span><span class="n">fields</span><span class="p">,</span><span class="w"> </span><span class="p">{</span><span class="n">b2_int</span><span class="p">,</span><span class="w"> </span><span class="n">b2_str</span><span class="p">}));</span>
<span class="linenos">245</span><span class="w"> </span><span class="n">ARROW_ASSIGN_OR_RAISE</span><span class="p">(</span><span class="k">auto</span><span class="w"> </span><span class="n">b3</span><span class="p">,</span><span class="w"> </span><span class="n">GetExecBatchFromVectors</span><span class="p">(</span><span class="n">fields</span><span class="p">,</span><span class="w"> </span><span class="p">{</span><span class="n">b3_int</span><span class="p">,</span><span class="w"> </span><span class="n">b3_str</span><span class="p">}));</span>
<span class="linenos">246</span><span class="w"> </span><span class="n">out</span><span class="p">.</span><span class="n">batches</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="p">{</span><span class="n">b1</span><span class="p">,</span><span class="w"> </span><span class="n">b2</span><span class="p">,</span><span class="w"> </span><span class="n">b3</span><span class="p">};</span>
<span class="linenos">247</span>
<span class="linenos">248</span><span class="w"> </span><span class="kt">size_t</span><span class="w"> </span><span class="n">batch_count</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">out</span><span class="p">.</span><span class="n">batches</span><span class="p">.</span><span class="n">size</span><span class="p">();</span>
<span class="linenos">249</span><span class="w"> </span><span class="k">for</span><span class="w"> </span><span class="p">(</span><span class="kt">int</span><span class="w"> </span><span class="n">repeat</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="mi">1</span><span class="p">;</span><span class="w"> </span><span class="n">repeat</span><span class="w"> </span><span class="o">&lt;</span><span class="w"> </span><span class="n">multiplicity</span><span class="p">;</span><span class="w"> </span><span class="o">++</span><span class="n">repeat</span><span class="p">)</span><span class="w"> </span><span class="p">{</span>
<span class="linenos">250</span><span class="w"> </span><span class="k">for</span><span class="w"> </span><span class="p">(</span><span class="kt">size_t</span><span class="w"> </span><span class="n">i</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="mi">0</span><span class="p">;</span><span class="w"> </span><span class="n">i</span><span class="w"> </span><span class="o">&lt;</span><span class="w"> </span><span class="n">batch_count</span><span class="p">;</span><span class="w"> </span><span class="o">++</span><span class="n">i</span><span class="p">)</span><span class="w"> </span><span class="p">{</span>
<span class="linenos">251</span><span class="w"> </span><span class="n">out</span><span class="p">.</span><span class="n">batches</span><span class="p">.</span><span class="n">push_back</span><span class="p">(</span><span class="n">out</span><span class="p">.</span><span class="n">batches</span><span class="p">[</span><span class="n">i</span><span class="p">]);</span>
<span class="linenos">252</span><span class="w"> </span><span class="p">}</span>
<span class="linenos">253</span><span class="w"> </span><span class="p">}</span>
<span class="linenos">254</span>
<span class="linenos">255</span><span class="w"> </span><span class="n">out</span><span class="p">.</span><span class="n">schema</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">schema</span><span class="p">(</span><span class="n">fields</span><span class="p">);</span>
<span class="linenos">256</span><span class="w"> </span><span class="k">return</span><span class="w"> </span><span class="n">out</span><span class="p">;</span>
<span class="linenos">257</span><span class="p">}</span>
<span class="linenos">258</span>
<span class="linenos">259</span><span class="n">arrow</span><span class="o">::</span><span class="n">Status</span><span class="w"> </span><span class="n">ExecutePlanAndCollectAsTable</span><span class="p">(</span><span class="n">ac</span><span class="o">::</span><span class="n">Declaration</span><span class="w"> </span><span class="n">plan</span><span class="p">)</span><span class="w"> </span><span class="p">{</span>
<span class="linenos">260</span><span class="w"> </span><span class="c1">// collect sink_reader into a Table</span>
<span class="linenos">261</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o">&lt;</span><span class="n">arrow</span><span class="o">::</span><span class="n">Table</span><span class="o">&gt;</span><span class="w"> </span><span class="n">response_table</span><span class="p">;</span>
<span class="linenos">262</span><span class="w"> </span><span class="n">ARROW_ASSIGN_OR_RAISE</span><span class="p">(</span><span class="n">response_table</span><span class="p">,</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">DeclarationToTable</span><span class="p">(</span><span class="n">std</span><span class="o">::</span><span class="n">move</span><span class="p">(</span><span class="n">plan</span><span class="p">)));</span>
<span class="linenos">263</span>
<span class="linenos">264</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">cout</span><span class="w"> </span><span class="o">&lt;&lt;</span><span class="w"> </span><span class="s">&quot;Results : &quot;</span><span class="w"> </span><span class="o">&lt;&lt;</span><span class="w"> </span><span class="n">response_table</span><span class="o">-&gt;</span><span class="n">ToString</span><span class="p">()</span><span class="w"> </span><span class="o">&lt;&lt;</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">endl</span><span class="p">;</span>
<span class="linenos">265</span>
<span class="linenos">266</span><span class="w"> </span><span class="k">return</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">Status</span><span class="o">::</span><span class="n">OK</span><span class="p">();</span>
<span class="linenos">267</span><span class="p">}</span>
<span class="linenos">268</span>
<span class="linenos">269</span><span class="c1">// (Doc section: Scan Example)</span>
<span class="linenos">270</span>
<span class="linenos">271</span><span class="c1">/// \brief An example demonstrating a scan and sink node</span>
<span class="linenos">272</span><span class="c1">///</span>
<span class="linenos">273</span><span class="c1">/// Scan-Table</span>
<span class="linenos">274</span><span class="c1">/// This example shows how scan operation can be applied on a dataset.</span>
<span class="linenos">275</span><span class="c1">/// There are operations that can be applied on the scan (project, filter)</span>
<span class="linenos">276</span><span class="c1">/// and the input data can be processed. The output is obtained as a table</span>
<span class="linenos">277</span><span class="n">arrow</span><span class="o">::</span><span class="n">Status</span><span class="w"> </span><span class="n">ScanSinkExample</span><span class="p">()</span><span class="w"> </span><span class="p">{</span>
<span class="linenos">278</span><span class="w"> </span><span class="n">ARROW_ASSIGN_OR_RAISE</span><span class="p">(</span><span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o">&lt;</span><span class="n">arrow</span><span class="o">::</span><span class="n">dataset</span><span class="o">::</span><span class="n">Dataset</span><span class="o">&gt;</span><span class="w"> </span><span class="n">dataset</span><span class="p">,</span><span class="w"> </span><span class="n">GetDataset</span><span class="p">());</span>
<span class="linenos">279</span>
<span class="linenos">280</span><span class="w"> </span><span class="k">auto</span><span class="w"> </span><span class="n">options</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">make_shared</span><span class="o">&lt;</span><span class="n">arrow</span><span class="o">::</span><span class="n">dataset</span><span class="o">::</span><span class="n">ScanOptions</span><span class="o">&gt;</span><span class="p">();</span>
<span class="linenos">281</span><span class="w"> </span><span class="n">options</span><span class="o">-&gt;</span><span class="n">projection</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">cp</span><span class="o">::</span><span class="n">project</span><span class="p">({},</span><span class="w"> </span><span class="p">{});</span><span class="w"> </span><span class="c1">// create empty projection</span>
<span class="linenos">282</span>
<span class="linenos">283</span><span class="w"> </span><span class="c1">// construct the scan node</span>
<span class="linenos">284</span><span class="w"> </span><span class="k">auto</span><span class="w"> </span><span class="n">scan_node_options</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">dataset</span><span class="o">::</span><span class="n">ScanNodeOptions</span><span class="p">{</span><span class="n">dataset</span><span class="p">,</span><span class="w"> </span><span class="n">options</span><span class="p">};</span>
<span class="linenos">285</span>
<span class="linenos">286</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">Declaration</span><span class="w"> </span><span class="n">scan</span><span class="p">{</span><span class="s">&quot;scan&quot;</span><span class="p">,</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">move</span><span class="p">(</span><span class="n">scan_node_options</span><span class="p">)};</span>
<span class="linenos">287</span>
<span class="linenos">288</span><span class="w"> </span><span class="k">return</span><span class="w"> </span><span class="n">ExecutePlanAndCollectAsTable</span><span class="p">(</span><span class="n">std</span><span class="o">::</span><span class="n">move</span><span class="p">(</span><span class="n">scan</span><span class="p">));</span>
<span class="linenos">289</span><span class="p">}</span>
<span class="linenos">290</span><span class="c1">// (Doc section: Scan Example)</span>
<span class="linenos">291</span>
<span class="linenos">292</span><span class="c1">// (Doc section: Source Example)</span>
<span class="linenos">293</span>
<span class="linenos">294</span><span class="c1">/// \brief An example demonstrating a source and sink node</span>
<span class="linenos">295</span><span class="c1">///</span>
<span class="linenos">296</span><span class="c1">/// Source-Table Example</span>
<span class="linenos">297</span><span class="c1">/// This example shows how a custom source can be used</span>
<span class="linenos">298</span><span class="c1">/// in an execution plan. This includes source node using pregenerated</span>
<span class="linenos">299</span><span class="c1">/// data and collecting it into a table.</span>
<span class="linenos">300</span><span class="c1">///</span>
<span class="linenos">301</span><span class="c1">/// This sort of custom source is often not needed. In most cases you can</span>
<span class="linenos">302</span><span class="c1">/// use a scan (for a dataset source) or a source like table_source, array_vector_source,</span>
<span class="linenos">303</span><span class="c1">/// exec_batch_source, or record_batch_source (for in-memory data)</span>
<span class="linenos">304</span><span class="n">arrow</span><span class="o">::</span><span class="n">Status</span><span class="w"> </span><span class="n">SourceSinkExample</span><span class="p">()</span><span class="w"> </span><span class="p">{</span>
<span class="linenos">305</span><span class="w"> </span><span class="n">ARROW_ASSIGN_OR_RAISE</span><span class="p">(</span><span class="k">auto</span><span class="w"> </span><span class="n">basic_data</span><span class="p">,</span><span class="w"> </span><span class="n">MakeBasicBatches</span><span class="p">());</span>
<span class="linenos">306</span>
<span class="linenos">307</span><span class="w"> </span><span class="k">auto</span><span class="w"> </span><span class="n">source_node_options</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">SourceNodeOptions</span><span class="p">{</span><span class="n">basic_data</span><span class="p">.</span><span class="n">schema</span><span class="p">,</span><span class="w"> </span><span class="n">basic_data</span><span class="p">.</span><span class="n">gen</span><span class="p">()};</span>
<span class="linenos">308</span>
<span class="linenos">309</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">Declaration</span><span class="w"> </span><span class="n">source</span><span class="p">{</span><span class="s">&quot;source&quot;</span><span class="p">,</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">move</span><span class="p">(</span><span class="n">source_node_options</span><span class="p">)};</span>
<span class="linenos">310</span>
<span class="linenos">311</span><span class="w"> </span><span class="k">return</span><span class="w"> </span><span class="n">ExecutePlanAndCollectAsTable</span><span class="p">(</span><span class="n">std</span><span class="o">::</span><span class="n">move</span><span class="p">(</span><span class="n">source</span><span class="p">));</span>
<span class="linenos">312</span><span class="p">}</span>
<span class="linenos">313</span><span class="c1">// (Doc section: Source Example)</span>
<span class="linenos">314</span>
<span class="linenos">315</span><span class="c1">// (Doc section: Table Source Example)</span>
<span class="linenos">316</span>
<span class="linenos">317</span><span class="c1">/// \brief An example showing a table source node</span>
<span class="linenos">318</span><span class="c1">///</span>
<span class="linenos">319</span><span class="c1">/// TableSource-Table Example</span>
<span class="linenos">320</span><span class="c1">/// This example shows how a table_source can be used</span>
<span class="linenos">321</span><span class="c1">/// in an execution plan. This includes a table source node</span>
<span class="linenos">322</span><span class="c1">/// receiving data from a table. This plan simply collects the</span>
<span class="linenos">323</span><span class="c1">/// data back into a table but nodes could be added that modify</span>
<span class="linenos">324</span><span class="c1">/// or transform the data as well (as is shown in later examples)</span>
<span class="linenos">325</span><span class="n">arrow</span><span class="o">::</span><span class="n">Status</span><span class="w"> </span><span class="n">TableSourceSinkExample</span><span class="p">()</span><span class="w"> </span><span class="p">{</span>
<span class="linenos">326</span><span class="w"> </span><span class="n">ARROW_ASSIGN_OR_RAISE</span><span class="p">(</span><span class="k">auto</span><span class="w"> </span><span class="n">table</span><span class="p">,</span><span class="w"> </span><span class="n">GetTable</span><span class="p">());</span>
<span class="linenos">327</span>
<span class="linenos">328</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">AsyncGenerator</span><span class="o">&lt;</span><span class="n">std</span><span class="o">::</span><span class="n">optional</span><span class="o">&lt;</span><span class="n">cp</span><span class="o">::</span><span class="n">ExecBatch</span><span class="o">&gt;&gt;</span><span class="w"> </span><span class="n">sink_gen</span><span class="p">;</span>
<span class="linenos">329</span><span class="w"> </span><span class="kt">int</span><span class="w"> </span><span class="n">max_batch_size</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="mi">2</span><span class="p">;</span>
<span class="linenos">330</span><span class="w"> </span><span class="k">auto</span><span class="w"> </span><span class="n">table_source_options</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">TableSourceNodeOptions</span><span class="p">{</span><span class="n">table</span><span class="p">,</span><span class="w"> </span><span class="n">max_batch_size</span><span class="p">};</span>
<span class="linenos">331</span>
<span class="linenos">332</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">Declaration</span><span class="w"> </span><span class="n">source</span><span class="p">{</span><span class="s">&quot;table_source&quot;</span><span class="p">,</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">move</span><span class="p">(</span><span class="n">table_source_options</span><span class="p">)};</span>
<span class="linenos">333</span>
<span class="linenos">334</span><span class="w"> </span><span class="k">return</span><span class="w"> </span><span class="n">ExecutePlanAndCollectAsTable</span><span class="p">(</span><span class="n">std</span><span class="o">::</span><span class="n">move</span><span class="p">(</span><span class="n">source</span><span class="p">));</span>
<span class="linenos">335</span><span class="p">}</span>
<span class="linenos">336</span><span class="c1">// (Doc section: Table Source Example)</span>
<span class="linenos">337</span>
<span class="linenos">338</span><span class="c1">// (Doc section: Filter Example)</span>
<span class="linenos">339</span>
<span class="linenos">340</span><span class="c1">/// \brief An example showing a filter node</span>
<span class="linenos">341</span><span class="c1">///</span>
<span class="linenos">342</span><span class="c1">/// Source-Filter-Table</span>
<span class="linenos">343</span><span class="c1">/// This example shows how a filter can be used in an execution plan,</span>
<span class="linenos">344</span><span class="c1">/// to filter data from a source. The output from the execution plan</span>
<span class="linenos">345</span><span class="c1">/// is collected into a table.</span>
<span class="linenos">346</span><span class="n">arrow</span><span class="o">::</span><span class="n">Status</span><span class="w"> </span><span class="n">ScanFilterSinkExample</span><span class="p">()</span><span class="w"> </span><span class="p">{</span>
<span class="linenos">347</span><span class="w"> </span><span class="n">ARROW_ASSIGN_OR_RAISE</span><span class="p">(</span><span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o">&lt;</span><span class="n">arrow</span><span class="o">::</span><span class="n">dataset</span><span class="o">::</span><span class="n">Dataset</span><span class="o">&gt;</span><span class="w"> </span><span class="n">dataset</span><span class="p">,</span><span class="w"> </span><span class="n">GetDataset</span><span class="p">());</span>
<span class="linenos">348</span>
<span class="linenos">349</span><span class="w"> </span><span class="k">auto</span><span class="w"> </span><span class="n">options</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">make_shared</span><span class="o">&lt;</span><span class="n">arrow</span><span class="o">::</span><span class="n">dataset</span><span class="o">::</span><span class="n">ScanOptions</span><span class="o">&gt;</span><span class="p">();</span>
<span class="linenos">350</span><span class="w"> </span><span class="c1">// specify the filter. This filter removes all rows where the</span>
<span class="linenos">351</span><span class="w"> </span><span class="c1">// value of the &quot;a&quot; column is greater than 3.</span>
<span class="linenos">352</span><span class="w"> </span><span class="n">cp</span><span class="o">::</span><span class="n">Expression</span><span class="w"> </span><span class="n">filter_expr</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">cp</span><span class="o">::</span><span class="n">greater</span><span class="p">(</span><span class="n">cp</span><span class="o">::</span><span class="n">field_ref</span><span class="p">(</span><span class="s">&quot;a&quot;</span><span class="p">),</span><span class="w"> </span><span class="n">cp</span><span class="o">::</span><span class="n">literal</span><span class="p">(</span><span class="mi">3</span><span class="p">));</span>
<span class="linenos">353</span><span class="w"> </span><span class="c1">// set filter for scanner : on-disk / push-down filtering.</span>
<span class="linenos">354</span><span class="w"> </span><span class="c1">// This step can be skipped if you are not reading from disk.</span>
<span class="linenos">355</span><span class="w"> </span><span class="n">options</span><span class="o">-&gt;</span><span class="n">filter</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">filter_expr</span><span class="p">;</span>
<span class="linenos">356</span><span class="w"> </span><span class="c1">// empty projection</span>
<span class="linenos">357</span><span class="w"> </span><span class="n">options</span><span class="o">-&gt;</span><span class="n">projection</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">cp</span><span class="o">::</span><span class="n">project</span><span class="p">({},</span><span class="w"> </span><span class="p">{});</span>
<span class="linenos">358</span>
<span class="linenos">359</span><span class="w"> </span><span class="c1">// construct the scan node</span>
<span class="linenos">360</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">cout</span><span class="w"> </span><span class="o">&lt;&lt;</span><span class="w"> </span><span class="s">&quot;Initialized Scanning Options&quot;</span><span class="w"> </span><span class="o">&lt;&lt;</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">endl</span><span class="p">;</span>
<span class="linenos">361</span>
<span class="linenos">362</span><span class="w"> </span><span class="k">auto</span><span class="w"> </span><span class="n">scan_node_options</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">dataset</span><span class="o">::</span><span class="n">ScanNodeOptions</span><span class="p">{</span><span class="n">dataset</span><span class="p">,</span><span class="w"> </span><span class="n">options</span><span class="p">};</span>
<span class="linenos">363</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">cout</span><span class="w"> </span><span class="o">&lt;&lt;</span><span class="w"> </span><span class="s">&quot;Scan node options created&quot;</span><span class="w"> </span><span class="o">&lt;&lt;</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">endl</span><span class="p">;</span>
<span class="linenos">364</span>
<span class="linenos">365</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">Declaration</span><span class="w"> </span><span class="n">scan</span><span class="p">{</span><span class="s">&quot;scan&quot;</span><span class="p">,</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">move</span><span class="p">(</span><span class="n">scan_node_options</span><span class="p">)};</span>
<span class="linenos">366</span>
<span class="linenos">367</span><span class="w"> </span><span class="c1">// pipe the scan node into the filter node</span>
<span class="linenos">368</span><span class="w"> </span><span class="c1">// Need to set the filter in scan node options and filter node options.</span>
<span class="linenos">369</span><span class="w"> </span><span class="c1">// At scan node it is used for on-disk / push-down filtering.</span>
<span class="linenos">370</span><span class="w"> </span><span class="c1">// At filter node it is used for in-memory filtering.</span>
<span class="linenos">371</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">Declaration</span><span class="w"> </span><span class="n">filter</span><span class="p">{</span>
<span class="linenos">372</span><span class="w"> </span><span class="s">&quot;filter&quot;</span><span class="p">,</span><span class="w"> </span><span class="p">{</span><span class="n">std</span><span class="o">::</span><span class="n">move</span><span class="p">(</span><span class="n">scan</span><span class="p">)},</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">FilterNodeOptions</span><span class="p">(</span><span class="n">std</span><span class="o">::</span><span class="n">move</span><span class="p">(</span><span class="n">filter_expr</span><span class="p">))};</span>
<span class="linenos">373</span>
<span class="linenos">374</span><span class="w"> </span><span class="k">return</span><span class="w"> </span><span class="n">ExecutePlanAndCollectAsTable</span><span class="p">(</span><span class="n">std</span><span class="o">::</span><span class="n">move</span><span class="p">(</span><span class="n">filter</span><span class="p">));</span>
<span class="linenos">375</span><span class="p">}</span>
<span class="linenos">376</span>
<span class="linenos">377</span><span class="c1">// (Doc section: Filter Example)</span>
<span class="linenos">378</span>
<span class="linenos">379</span><span class="c1">// (Doc section: Project Example)</span>
<span class="linenos">380</span>
<span class="linenos">381</span><span class="c1">/// \brief An example showing a project node</span>
<span class="linenos">382</span><span class="c1">///</span>
<span class="linenos">383</span><span class="c1">/// Scan-Project-Table</span>
<span class="linenos">384</span><span class="c1">/// This example shows how a Scan operation can be used to load the data</span>
<span class="linenos">385</span><span class="c1">/// into the execution plan, how a project operation can be applied on the</span>
<span class="linenos">386</span><span class="c1">/// data stream and how the output is collected into a table</span>
<span class="linenos">387</span><span class="n">arrow</span><span class="o">::</span><span class="n">Status</span><span class="w"> </span><span class="n">ScanProjectSinkExample</span><span class="p">()</span><span class="w"> </span><span class="p">{</span>
<span class="linenos">388</span><span class="w"> </span><span class="n">ARROW_ASSIGN_OR_RAISE</span><span class="p">(</span><span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o">&lt;</span><span class="n">arrow</span><span class="o">::</span><span class="n">dataset</span><span class="o">::</span><span class="n">Dataset</span><span class="o">&gt;</span><span class="w"> </span><span class="n">dataset</span><span class="p">,</span><span class="w"> </span><span class="n">GetDataset</span><span class="p">());</span>
<span class="linenos">389</span>
<span class="linenos">390</span><span class="w"> </span><span class="k">auto</span><span class="w"> </span><span class="n">options</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">make_shared</span><span class="o">&lt;</span><span class="n">arrow</span><span class="o">::</span><span class="n">dataset</span><span class="o">::</span><span class="n">ScanOptions</span><span class="o">&gt;</span><span class="p">();</span>
<span class="linenos">391</span><span class="w"> </span><span class="c1">// projection</span>
<span class="linenos">392</span><span class="w"> </span><span class="n">cp</span><span class="o">::</span><span class="n">Expression</span><span class="w"> </span><span class="n">a_times_2</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">cp</span><span class="o">::</span><span class="n">call</span><span class="p">(</span><span class="s">&quot;multiply&quot;</span><span class="p">,</span><span class="w"> </span><span class="p">{</span><span class="n">cp</span><span class="o">::</span><span class="n">field_ref</span><span class="p">(</span><span class="s">&quot;a&quot;</span><span class="p">),</span><span class="w"> </span><span class="n">cp</span><span class="o">::</span><span class="n">literal</span><span class="p">(</span><span class="mi">2</span><span class="p">)});</span>
<span class="linenos">393</span><span class="w"> </span><span class="n">options</span><span class="o">-&gt;</span><span class="n">projection</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">cp</span><span class="o">::</span><span class="n">project</span><span class="p">({},</span><span class="w"> </span><span class="p">{});</span>
<span class="linenos">394</span>
<span class="linenos">395</span><span class="w"> </span><span class="k">auto</span><span class="w"> </span><span class="n">scan_node_options</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">dataset</span><span class="o">::</span><span class="n">ScanNodeOptions</span><span class="p">{</span><span class="n">dataset</span><span class="p">,</span><span class="w"> </span><span class="n">options</span><span class="p">};</span>
<span class="linenos">396</span>
<span class="linenos">397</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">Declaration</span><span class="w"> </span><span class="n">scan</span><span class="p">{</span><span class="s">&quot;scan&quot;</span><span class="p">,</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">move</span><span class="p">(</span><span class="n">scan_node_options</span><span class="p">)};</span>
<span class="linenos">398</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">Declaration</span><span class="w"> </span><span class="n">project</span><span class="p">{</span>
<span class="linenos">399</span><span class="w"> </span><span class="s">&quot;project&quot;</span><span class="p">,</span><span class="w"> </span><span class="p">{</span><span class="n">std</span><span class="o">::</span><span class="n">move</span><span class="p">(</span><span class="n">scan</span><span class="p">)},</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">ProjectNodeOptions</span><span class="p">({</span><span class="n">a_times_2</span><span class="p">})};</span>
<span class="linenos">400</span>
<span class="linenos">401</span><span class="w"> </span><span class="k">return</span><span class="w"> </span><span class="n">ExecutePlanAndCollectAsTable</span><span class="p">(</span><span class="n">std</span><span class="o">::</span><span class="n">move</span><span class="p">(</span><span class="n">project</span><span class="p">));</span>
<span class="linenos">402</span><span class="p">}</span>
<span class="linenos">403</span>
<span class="linenos">404</span><span class="c1">// (Doc section: Project Example)</span>
<span class="linenos">405</span>
<span class="linenos">406</span><span class="c1">// This is a variation of ScanProjectSinkExample introducing how to use the</span>
<span class="linenos">407</span><span class="c1">// Declaration::Sequence function</span>
<span class="linenos">408</span><span class="n">arrow</span><span class="o">::</span><span class="n">Status</span><span class="w"> </span><span class="n">ScanProjectSequenceSinkExample</span><span class="p">()</span><span class="w"> </span><span class="p">{</span>
<span class="linenos">409</span><span class="w"> </span><span class="n">ARROW_ASSIGN_OR_RAISE</span><span class="p">(</span><span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o">&lt;</span><span class="n">arrow</span><span class="o">::</span><span class="n">dataset</span><span class="o">::</span><span class="n">Dataset</span><span class="o">&gt;</span><span class="w"> </span><span class="n">dataset</span><span class="p">,</span><span class="w"> </span><span class="n">GetDataset</span><span class="p">());</span>
<span class="linenos">410</span>
<span class="linenos">411</span><span class="w"> </span><span class="k">auto</span><span class="w"> </span><span class="n">options</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">make_shared</span><span class="o">&lt;</span><span class="n">arrow</span><span class="o">::</span><span class="n">dataset</span><span class="o">::</span><span class="n">ScanOptions</span><span class="o">&gt;</span><span class="p">();</span>
<span class="linenos">412</span><span class="w"> </span><span class="c1">// projection</span>
<span class="linenos">413</span><span class="w"> </span><span class="n">cp</span><span class="o">::</span><span class="n">Expression</span><span class="w"> </span><span class="n">a_times_2</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">cp</span><span class="o">::</span><span class="n">call</span><span class="p">(</span><span class="s">&quot;multiply&quot;</span><span class="p">,</span><span class="w"> </span><span class="p">{</span><span class="n">cp</span><span class="o">::</span><span class="n">field_ref</span><span class="p">(</span><span class="s">&quot;a&quot;</span><span class="p">),</span><span class="w"> </span><span class="n">cp</span><span class="o">::</span><span class="n">literal</span><span class="p">(</span><span class="mi">2</span><span class="p">)});</span>
<span class="linenos">414</span><span class="w"> </span><span class="n">options</span><span class="o">-&gt;</span><span class="n">projection</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">cp</span><span class="o">::</span><span class="n">project</span><span class="p">({},</span><span class="w"> </span><span class="p">{});</span>
<span class="linenos">415</span>
<span class="linenos">416</span><span class="w"> </span><span class="k">auto</span><span class="w"> </span><span class="n">scan_node_options</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">dataset</span><span class="o">::</span><span class="n">ScanNodeOptions</span><span class="p">{</span><span class="n">dataset</span><span class="p">,</span><span class="w"> </span><span class="n">options</span><span class="p">};</span>
<span class="linenos">417</span>
<span class="linenos">418</span><span class="w"> </span><span class="c1">// (Doc section: Project Sequence Example)</span>
<span class="linenos">419</span><span class="w"> </span><span class="c1">// Inputs do not have to be passed to the project node when using Sequence</span>
<span class="linenos">420</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">Declaration</span><span class="w"> </span><span class="n">plan</span><span class="w"> </span><span class="o">=</span>
<span class="linenos">421</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">Declaration</span><span class="o">::</span><span class="n">Sequence</span><span class="p">({{</span><span class="s">&quot;scan&quot;</span><span class="p">,</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">move</span><span class="p">(</span><span class="n">scan_node_options</span><span class="p">)},</span>
<span class="linenos">422</span><span class="w"> </span><span class="p">{</span><span class="s">&quot;project&quot;</span><span class="p">,</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">ProjectNodeOptions</span><span class="p">({</span><span class="n">a_times_2</span><span class="p">})}});</span>
<span class="linenos">423</span><span class="w"> </span><span class="c1">// (Doc section: Project Sequence Example)</span>
<span class="linenos">424</span>
<span class="linenos">425</span><span class="w"> </span><span class="k">return</span><span class="w"> </span><span class="n">ExecutePlanAndCollectAsTable</span><span class="p">(</span><span class="n">std</span><span class="o">::</span><span class="n">move</span><span class="p">(</span><span class="n">plan</span><span class="p">));</span>
<span class="linenos">426</span><span class="p">}</span>
<span class="linenos">427</span>
<span class="linenos">428</span><span class="c1">// (Doc section: Scalar Aggregate Example)</span>
<span class="linenos">429</span>
<span class="linenos">430</span><span class="c1">/// \brief An example showing an aggregation node to aggregate an entire table</span>
<span class="linenos">431</span><span class="c1">///</span>
<span class="linenos">432</span><span class="c1">/// Source-Aggregation-Table</span>
<span class="linenos">433</span><span class="c1">/// This example shows how an aggregation operation can be applied on a</span>
<span class="linenos">434</span><span class="c1">/// execution plan resulting in a scalar output. The source node loads the</span>
<span class="linenos">435</span><span class="c1">/// data and the aggregation (counting unique types in column &#39;a&#39;)</span>
<span class="linenos">436</span><span class="c1">/// is applied on this data. The output is collected into a table (that will</span>
<span class="linenos">437</span><span class="c1">/// have exactly one row)</span>
<span class="linenos">438</span><span class="n">arrow</span><span class="o">::</span><span class="n">Status</span><span class="w"> </span><span class="n">SourceScalarAggregateSinkExample</span><span class="p">()</span><span class="w"> </span><span class="p">{</span>
<span class="linenos">439</span><span class="w"> </span><span class="n">ARROW_ASSIGN_OR_RAISE</span><span class="p">(</span><span class="k">auto</span><span class="w"> </span><span class="n">basic_data</span><span class="p">,</span><span class="w"> </span><span class="n">MakeBasicBatches</span><span class="p">());</span>
<span class="linenos">440</span>
<span class="linenos">441</span><span class="w"> </span><span class="k">auto</span><span class="w"> </span><span class="n">source_node_options</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">SourceNodeOptions</span><span class="p">{</span><span class="n">basic_data</span><span class="p">.</span><span class="n">schema</span><span class="p">,</span><span class="w"> </span><span class="n">basic_data</span><span class="p">.</span><span class="n">gen</span><span class="p">()};</span>
<span class="linenos">442</span>
<span class="linenos">443</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">Declaration</span><span class="w"> </span><span class="n">source</span><span class="p">{</span><span class="s">&quot;source&quot;</span><span class="p">,</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">move</span><span class="p">(</span><span class="n">source_node_options</span><span class="p">)};</span>
<span class="linenos">444</span><span class="w"> </span><span class="k">auto</span><span class="w"> </span><span class="n">aggregate_options</span><span class="w"> </span><span class="o">=</span>
<span class="linenos">445</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">AggregateNodeOptions</span><span class="p">{</span><span class="cm">/*aggregates=*/</span><span class="p">{{</span><span class="s">&quot;sum&quot;</span><span class="p">,</span><span class="w"> </span><span class="k">nullptr</span><span class="p">,</span><span class="w"> </span><span class="s">&quot;a&quot;</span><span class="p">,</span><span class="w"> </span><span class="s">&quot;sum(a)&quot;</span><span class="p">}}};</span>
<span class="linenos">446</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">Declaration</span><span class="w"> </span><span class="n">aggregate</span><span class="p">{</span>
<span class="linenos">447</span><span class="w"> </span><span class="s">&quot;aggregate&quot;</span><span class="p">,</span><span class="w"> </span><span class="p">{</span><span class="n">std</span><span class="o">::</span><span class="n">move</span><span class="p">(</span><span class="n">source</span><span class="p">)},</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">move</span><span class="p">(</span><span class="n">aggregate_options</span><span class="p">)};</span>
<span class="linenos">448</span>
<span class="linenos">449</span><span class="w"> </span><span class="k">return</span><span class="w"> </span><span class="n">ExecutePlanAndCollectAsTable</span><span class="p">(</span><span class="n">std</span><span class="o">::</span><span class="n">move</span><span class="p">(</span><span class="n">aggregate</span><span class="p">));</span>
<span class="linenos">450</span><span class="p">}</span>
<span class="linenos">451</span><span class="c1">// (Doc section: Scalar Aggregate Example)</span>
<span class="linenos">452</span>
<span class="linenos">453</span><span class="c1">// (Doc section: Group Aggregate Example)</span>
<span class="linenos">454</span>
<span class="linenos">455</span><span class="c1">/// \brief An example showing an aggregation node to perform a group-by operation</span>
<span class="linenos">456</span><span class="c1">///</span>
<span class="linenos">457</span><span class="c1">/// Source-Aggregation-Table</span>
<span class="linenos">458</span><span class="c1">/// This example shows how an aggregation operation can be applied on a</span>
<span class="linenos">459</span><span class="c1">/// execution plan resulting in grouped output. The source node loads the</span>
<span class="linenos">460</span><span class="c1">/// data and the aggregation (counting unique types in column &#39;a&#39;) is</span>
<span class="linenos">461</span><span class="c1">/// applied on this data. The output is collected into a table that will contain</span>
<span class="linenos">462</span><span class="c1">/// one row for each unique combination of group keys.</span>
<span class="linenos">463</span><span class="n">arrow</span><span class="o">::</span><span class="n">Status</span><span class="w"> </span><span class="n">SourceGroupAggregateSinkExample</span><span class="p">()</span><span class="w"> </span><span class="p">{</span>
<span class="linenos">464</span><span class="w"> </span><span class="n">ARROW_ASSIGN_OR_RAISE</span><span class="p">(</span><span class="k">auto</span><span class="w"> </span><span class="n">basic_data</span><span class="p">,</span><span class="w"> </span><span class="n">MakeBasicBatches</span><span class="p">());</span>
<span class="linenos">465</span>
<span class="linenos">466</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">AsyncGenerator</span><span class="o">&lt;</span><span class="n">std</span><span class="o">::</span><span class="n">optional</span><span class="o">&lt;</span><span class="n">cp</span><span class="o">::</span><span class="n">ExecBatch</span><span class="o">&gt;&gt;</span><span class="w"> </span><span class="n">sink_gen</span><span class="p">;</span>
<span class="linenos">467</span>
<span class="linenos">468</span><span class="w"> </span><span class="k">auto</span><span class="w"> </span><span class="n">source_node_options</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">SourceNodeOptions</span><span class="p">{</span><span class="n">basic_data</span><span class="p">.</span><span class="n">schema</span><span class="p">,</span><span class="w"> </span><span class="n">basic_data</span><span class="p">.</span><span class="n">gen</span><span class="p">()};</span>
<span class="linenos">469</span>
<span class="linenos">470</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">Declaration</span><span class="w"> </span><span class="n">source</span><span class="p">{</span><span class="s">&quot;source&quot;</span><span class="p">,</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">move</span><span class="p">(</span><span class="n">source_node_options</span><span class="p">)};</span>
<span class="linenos">471</span><span class="w"> </span><span class="k">auto</span><span class="w"> </span><span class="n">options</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">make_shared</span><span class="o">&lt;</span><span class="n">cp</span><span class="o">::</span><span class="n">CountOptions</span><span class="o">&gt;</span><span class="p">(</span><span class="n">cp</span><span class="o">::</span><span class="n">CountOptions</span><span class="o">::</span><span class="n">ONLY_VALID</span><span class="p">);</span>
<span class="linenos">472</span><span class="w"> </span><span class="k">auto</span><span class="w"> </span><span class="n">aggregate_options</span><span class="w"> </span><span class="o">=</span>
<span class="linenos">473</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">AggregateNodeOptions</span><span class="p">{</span><span class="cm">/*aggregates=*/</span><span class="p">{{</span><span class="s">&quot;hash_count&quot;</span><span class="p">,</span><span class="w"> </span><span class="n">options</span><span class="p">,</span><span class="w"> </span><span class="s">&quot;a&quot;</span><span class="p">,</span><span class="w"> </span><span class="s">&quot;count(a)&quot;</span><span class="p">}},</span>
<span class="linenos">474</span><span class="w"> </span><span class="cm">/*keys=*/</span><span class="p">{</span><span class="s">&quot;b&quot;</span><span class="p">}};</span>
<span class="linenos">475</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">Declaration</span><span class="w"> </span><span class="n">aggregate</span><span class="p">{</span>
<span class="linenos">476</span><span class="w"> </span><span class="s">&quot;aggregate&quot;</span><span class="p">,</span><span class="w"> </span><span class="p">{</span><span class="n">std</span><span class="o">::</span><span class="n">move</span><span class="p">(</span><span class="n">source</span><span class="p">)},</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">move</span><span class="p">(</span><span class="n">aggregate_options</span><span class="p">)};</span>
<span class="linenos">477</span>
<span class="linenos">478</span><span class="w"> </span><span class="k">return</span><span class="w"> </span><span class="n">ExecutePlanAndCollectAsTable</span><span class="p">(</span><span class="n">std</span><span class="o">::</span><span class="n">move</span><span class="p">(</span><span class="n">aggregate</span><span class="p">));</span>
<span class="linenos">479</span><span class="p">}</span>
<span class="linenos">480</span><span class="c1">// (Doc section: Group Aggregate Example)</span>
<span class="linenos">481</span>
<span class="linenos">482</span><span class="c1">// (Doc section: ConsumingSink Example)</span>
<span class="linenos">483</span>
<span class="linenos">484</span><span class="c1">/// \brief An example showing a consuming sink node</span>
<span class="linenos">485</span><span class="c1">///</span>
<span class="linenos">486</span><span class="c1">/// Source-Consuming-Sink</span>
<span class="linenos">487</span><span class="c1">/// This example shows how the data can be consumed within the execution plan</span>
<span class="linenos">488</span><span class="c1">/// by using a ConsumingSink node. There is no data output from this execution plan.</span>
<span class="linenos">489</span><span class="n">arrow</span><span class="o">::</span><span class="n">Status</span><span class="w"> </span><span class="n">SourceConsumingSinkExample</span><span class="p">()</span><span class="w"> </span><span class="p">{</span>
<span class="linenos">490</span><span class="w"> </span><span class="n">ARROW_ASSIGN_OR_RAISE</span><span class="p">(</span><span class="k">auto</span><span class="w"> </span><span class="n">basic_data</span><span class="p">,</span><span class="w"> </span><span class="n">MakeBasicBatches</span><span class="p">());</span>
<span class="linenos">491</span>
<span class="linenos">492</span><span class="w"> </span><span class="k">auto</span><span class="w"> </span><span class="n">source_node_options</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">SourceNodeOptions</span><span class="p">{</span><span class="n">basic_data</span><span class="p">.</span><span class="n">schema</span><span class="p">,</span><span class="w"> </span><span class="n">basic_data</span><span class="p">.</span><span class="n">gen</span><span class="p">()};</span>
<span class="linenos">493</span>
<span class="linenos">494</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">Declaration</span><span class="w"> </span><span class="n">source</span><span class="p">{</span><span class="s">&quot;source&quot;</span><span class="p">,</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">move</span><span class="p">(</span><span class="n">source_node_options</span><span class="p">)};</span>
<span class="linenos">495</span>
<span class="linenos">496</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">atomic</span><span class="o">&lt;</span><span class="kt">uint32_t</span><span class="o">&gt;</span><span class="w"> </span><span class="n">batches_seen</span><span class="p">{</span><span class="mi">0</span><span class="p">};</span>
<span class="linenos">497</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">Future</span><span class="o">&lt;&gt;</span><span class="w"> </span><span class="n">finish</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">Future</span><span class="o">&lt;&gt;::</span><span class="n">Make</span><span class="p">();</span>
<span class="linenos">498</span><span class="w"> </span><span class="k">struct</span><span class="w"> </span><span class="nc">CustomSinkNodeConsumer</span><span class="w"> </span><span class="o">:</span><span class="w"> </span><span class="k">public</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">SinkNodeConsumer</span><span class="w"> </span><span class="p">{</span>
<span class="linenos">499</span><span class="w"> </span><span class="n">CustomSinkNodeConsumer</span><span class="p">(</span><span class="n">std</span><span class="o">::</span><span class="n">atomic</span><span class="o">&lt;</span><span class="kt">uint32_t</span><span class="o">&gt;*</span><span class="w"> </span><span class="n">batches_seen</span><span class="p">,</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">Future</span><span class="o">&lt;&gt;</span><span class="w"> </span><span class="n">finish</span><span class="p">)</span>
<span class="linenos">500</span><span class="w"> </span><span class="o">:</span><span class="w"> </span><span class="n">batches_seen</span><span class="p">(</span><span class="n">batches_seen</span><span class="p">),</span><span class="w"> </span><span class="n">finish</span><span class="p">(</span><span class="n">std</span><span class="o">::</span><span class="n">move</span><span class="p">(</span><span class="n">finish</span><span class="p">))</span><span class="w"> </span><span class="p">{}</span>
<span class="linenos">501</span>
<span class="linenos">502</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">Status</span><span class="w"> </span><span class="n">Init</span><span class="p">(</span><span class="k">const</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o">&lt;</span><span class="n">arrow</span><span class="o">::</span><span class="n">Schema</span><span class="o">&gt;&amp;</span><span class="w"> </span><span class="n">schema</span><span class="p">,</span>
<span class="linenos">503</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">BackpressureControl</span><span class="o">*</span><span class="w"> </span><span class="n">backpressure_control</span><span class="p">,</span>
<span class="linenos">504</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">ExecPlan</span><span class="o">*</span><span class="w"> </span><span class="n">plan</span><span class="p">)</span><span class="w"> </span><span class="k">override</span><span class="w"> </span><span class="p">{</span>
<span class="linenos">505</span><span class="w"> </span><span class="c1">// This will be called as the plan is started (before the first call to Consume)</span>
<span class="linenos">506</span><span class="w"> </span><span class="c1">// and provides the schema of the data coming into the node, controls for pausing /</span>
<span class="linenos">507</span><span class="w"> </span><span class="c1">// resuming input, and a pointer to the plan itself which can be used to access</span>
<span class="linenos">508</span><span class="w"> </span><span class="c1">// other utilities such as the thread indexer or async task scheduler.</span>
<span class="linenos">509</span><span class="w"> </span><span class="k">return</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">Status</span><span class="o">::</span><span class="n">OK</span><span class="p">();</span>
<span class="linenos">510</span><span class="w"> </span><span class="p">}</span>
<span class="linenos">511</span>
<span class="linenos">512</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">Status</span><span class="w"> </span><span class="n">Consume</span><span class="p">(</span><span class="n">cp</span><span class="o">::</span><span class="n">ExecBatch</span><span class="w"> </span><span class="n">batch</span><span class="p">)</span><span class="w"> </span><span class="k">override</span><span class="w"> </span><span class="p">{</span>
<span class="linenos">513</span><span class="w"> </span><span class="p">(</span><span class="o">*</span><span class="n">batches_seen</span><span class="p">)</span><span class="o">++</span><span class="p">;</span>
<span class="linenos">514</span><span class="w"> </span><span class="k">return</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">Status</span><span class="o">::</span><span class="n">OK</span><span class="p">();</span>
<span class="linenos">515</span><span class="w"> </span><span class="p">}</span>
<span class="linenos">516</span>
<span class="linenos">517</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">Future</span><span class="o">&lt;&gt;</span><span class="w"> </span><span class="n">Finish</span><span class="p">()</span><span class="w"> </span><span class="k">override</span><span class="w"> </span><span class="p">{</span>
<span class="linenos">518</span><span class="w"> </span><span class="c1">// Here you can perform whatever (possibly async) cleanup is needed, e.g. closing</span>
<span class="linenos">519</span><span class="w"> </span><span class="c1">// output file handles and flushing remaining work</span>
<span class="linenos">520</span><span class="w"> </span><span class="k">return</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">Future</span><span class="o">&lt;&gt;::</span><span class="n">MakeFinished</span><span class="p">();</span>
<span class="linenos">521</span><span class="w"> </span><span class="p">}</span>
<span class="linenos">522</span>
<span class="linenos">523</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">atomic</span><span class="o">&lt;</span><span class="kt">uint32_t</span><span class="o">&gt;*</span><span class="w"> </span><span class="n">batches_seen</span><span class="p">;</span>
<span class="linenos">524</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">Future</span><span class="o">&lt;&gt;</span><span class="w"> </span><span class="n">finish</span><span class="p">;</span>
<span class="linenos">525</span><span class="w"> </span><span class="p">};</span>
<span class="linenos">526</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o">&lt;</span><span class="n">CustomSinkNodeConsumer</span><span class="o">&gt;</span><span class="w"> </span><span class="n">consumer</span><span class="w"> </span><span class="o">=</span>
<span class="linenos">527</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">make_shared</span><span class="o">&lt;</span><span class="n">CustomSinkNodeConsumer</span><span class="o">&gt;</span><span class="p">(</span><span class="o">&amp;</span><span class="n">batches_seen</span><span class="p">,</span><span class="w"> </span><span class="n">finish</span><span class="p">);</span>
<span class="linenos">528</span>
<span class="linenos">529</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">Declaration</span><span class="w"> </span><span class="n">consuming_sink</span><span class="p">{</span><span class="s">&quot;consuming_sink&quot;</span><span class="p">,</span>
<span class="linenos">530</span><span class="w"> </span><span class="p">{</span><span class="n">std</span><span class="o">::</span><span class="n">move</span><span class="p">(</span><span class="n">source</span><span class="p">)},</span>
<span class="linenos">531</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">ConsumingSinkNodeOptions</span><span class="p">(</span><span class="n">std</span><span class="o">::</span><span class="n">move</span><span class="p">(</span><span class="n">consumer</span><span class="p">))};</span>
<span class="linenos">532</span>
<span class="linenos">533</span><span class="w"> </span><span class="c1">// Since we are consuming the data within the plan there is no output and we simply</span>
<span class="linenos">534</span><span class="w"> </span><span class="c1">// run the plan to completion instead of collecting into a table.</span>
<span class="linenos">535</span><span class="w"> </span><span class="n">ARROW_RETURN_NOT_OK</span><span class="p">(</span><span class="n">ac</span><span class="o">::</span><span class="n">DeclarationToStatus</span><span class="p">(</span><span class="n">std</span><span class="o">::</span><span class="n">move</span><span class="p">(</span><span class="n">consuming_sink</span><span class="p">)));</span>
<span class="linenos">536</span>
<span class="linenos">537</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">cout</span><span class="w"> </span><span class="o">&lt;&lt;</span><span class="w"> </span><span class="s">&quot;The consuming sink node saw &quot;</span><span class="w"> </span><span class="o">&lt;&lt;</span><span class="w"> </span><span class="n">batches_seen</span><span class="p">.</span><span class="n">load</span><span class="p">()</span><span class="w"> </span><span class="o">&lt;&lt;</span><span class="w"> </span><span class="s">&quot; batches&quot;</span>
<span class="linenos">538</span><span class="w"> </span><span class="o">&lt;&lt;</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">endl</span><span class="p">;</span>
<span class="linenos">539</span><span class="w"> </span><span class="k">return</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">Status</span><span class="o">::</span><span class="n">OK</span><span class="p">();</span>
<span class="linenos">540</span><span class="p">}</span>
<span class="linenos">541</span><span class="c1">// (Doc section: ConsumingSink Example)</span>
<span class="linenos">542</span>
<span class="linenos">543</span><span class="c1">// (Doc section: OrderBySink Example)</span>
<span class="linenos">544</span>
<span class="linenos">545</span><span class="n">arrow</span><span class="o">::</span><span class="n">Status</span><span class="w"> </span><span class="n">ExecutePlanAndCollectAsTableWithCustomSink</span><span class="p">(</span>
<span class="linenos">546</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o">&lt;</span><span class="n">ac</span><span class="o">::</span><span class="n">ExecPlan</span><span class="o">&gt;</span><span class="w"> </span><span class="n">plan</span><span class="p">,</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o">&lt;</span><span class="n">arrow</span><span class="o">::</span><span class="n">Schema</span><span class="o">&gt;</span><span class="w"> </span><span class="n">schema</span><span class="p">,</span>
<span class="linenos">547</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">AsyncGenerator</span><span class="o">&lt;</span><span class="n">std</span><span class="o">::</span><span class="n">optional</span><span class="o">&lt;</span><span class="n">cp</span><span class="o">::</span><span class="n">ExecBatch</span><span class="o">&gt;&gt;</span><span class="w"> </span><span class="n">sink_gen</span><span class="p">)</span><span class="w"> </span><span class="p">{</span>
<span class="linenos">548</span><span class="w"> </span><span class="c1">// translate sink_gen (async) to sink_reader (sync)</span>
<span class="linenos">549</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o">&lt;</span><span class="n">arrow</span><span class="o">::</span><span class="n">RecordBatchReader</span><span class="o">&gt;</span><span class="w"> </span><span class="n">sink_reader</span><span class="w"> </span><span class="o">=</span>
<span class="linenos">550</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">MakeGeneratorReader</span><span class="p">(</span><span class="n">schema</span><span class="p">,</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">move</span><span class="p">(</span><span class="n">sink_gen</span><span class="p">),</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">default_memory_pool</span><span class="p">());</span>
<span class="linenos">551</span>
<span class="linenos">552</span><span class="w"> </span><span class="c1">// validate the ExecPlan</span>
<span class="linenos">553</span><span class="w"> </span><span class="n">ARROW_RETURN_NOT_OK</span><span class="p">(</span><span class="n">plan</span><span class="o">-&gt;</span><span class="n">Validate</span><span class="p">());</span>
<span class="linenos">554</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">cout</span><span class="w"> </span><span class="o">&lt;&lt;</span><span class="w"> </span><span class="s">&quot;ExecPlan created : &quot;</span><span class="w"> </span><span class="o">&lt;&lt;</span><span class="w"> </span><span class="n">plan</span><span class="o">-&gt;</span><span class="n">ToString</span><span class="p">()</span><span class="w"> </span><span class="o">&lt;&lt;</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">endl</span><span class="p">;</span>
<span class="linenos">555</span><span class="w"> </span><span class="c1">// start the ExecPlan</span>
<span class="linenos">556</span><span class="w"> </span><span class="n">plan</span><span class="o">-&gt;</span><span class="n">StartProducing</span><span class="p">();</span>
<span class="linenos">557</span>
<span class="linenos">558</span><span class="w"> </span><span class="c1">// collect sink_reader into a Table</span>
<span class="linenos">559</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o">&lt;</span><span class="n">arrow</span><span class="o">::</span><span class="n">Table</span><span class="o">&gt;</span><span class="w"> </span><span class="n">response_table</span><span class="p">;</span>
<span class="linenos">560</span>
<span class="linenos">561</span><span class="w"> </span><span class="n">ARROW_ASSIGN_OR_RAISE</span><span class="p">(</span><span class="n">response_table</span><span class="p">,</span>
<span class="linenos">562</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">Table</span><span class="o">::</span><span class="n">FromRecordBatchReader</span><span class="p">(</span><span class="n">sink_reader</span><span class="p">.</span><span class="n">get</span><span class="p">()));</span>
<span class="linenos">563</span>
<span class="linenos">564</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">cout</span><span class="w"> </span><span class="o">&lt;&lt;</span><span class="w"> </span><span class="s">&quot;Results : &quot;</span><span class="w"> </span><span class="o">&lt;&lt;</span><span class="w"> </span><span class="n">response_table</span><span class="o">-&gt;</span><span class="n">ToString</span><span class="p">()</span><span class="w"> </span><span class="o">&lt;&lt;</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">endl</span><span class="p">;</span>
<span class="linenos">565</span>
<span class="linenos">566</span><span class="w"> </span><span class="c1">// stop producing</span>
<span class="linenos">567</span><span class="w"> </span><span class="n">plan</span><span class="o">-&gt;</span><span class="n">StopProducing</span><span class="p">();</span>
<span class="linenos">568</span><span class="w"> </span><span class="c1">// plan mark finished</span>
<span class="linenos">569</span><span class="w"> </span><span class="k">auto</span><span class="w"> </span><span class="n">future</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">plan</span><span class="o">-&gt;</span><span class="n">finished</span><span class="p">();</span>
<span class="linenos">570</span><span class="w"> </span><span class="k">return</span><span class="w"> </span><span class="n">future</span><span class="p">.</span><span class="n">status</span><span class="p">();</span>
<span class="linenos">571</span><span class="p">}</span>
<span class="linenos">572</span>
<span class="linenos">573</span><span class="c1">/// \brief An example showing an order-by node</span>
<span class="linenos">574</span><span class="c1">///</span>
<span class="linenos">575</span><span class="c1">/// Source-OrderBy-Sink</span>
<span class="linenos">576</span><span class="c1">/// In this example, the data enters through the source node</span>
<span class="linenos">577</span><span class="c1">/// and the data is ordered in the sink node. The order can be</span>
<span class="linenos">578</span><span class="c1">/// ASCENDING or DESCENDING and it is configurable. The output</span>
<span class="linenos">579</span><span class="c1">/// is obtained as a table from the sink node.</span>
<span class="linenos">580</span><span class="n">arrow</span><span class="o">::</span><span class="n">Status</span><span class="w"> </span><span class="n">SourceOrderBySinkExample</span><span class="p">()</span><span class="w"> </span><span class="p">{</span>
<span class="linenos">581</span><span class="w"> </span><span class="n">ARROW_ASSIGN_OR_RAISE</span><span class="p">(</span><span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o">&lt;</span><span class="n">ac</span><span class="o">::</span><span class="n">ExecPlan</span><span class="o">&gt;</span><span class="w"> </span><span class="n">plan</span><span class="p">,</span>
<span class="linenos">582</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">ExecPlan</span><span class="o">::</span><span class="n">Make</span><span class="p">(</span><span class="o">*</span><span class="n">cp</span><span class="o">::</span><span class="n">threaded_exec_context</span><span class="p">()));</span>
<span class="linenos">583</span>
<span class="linenos">584</span><span class="w"> </span><span class="n">ARROW_ASSIGN_OR_RAISE</span><span class="p">(</span><span class="k">auto</span><span class="w"> </span><span class="n">basic_data</span><span class="p">,</span><span class="w"> </span><span class="n">MakeSortTestBasicBatches</span><span class="p">());</span>
<span class="linenos">585</span>
<span class="linenos">586</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">AsyncGenerator</span><span class="o">&lt;</span><span class="n">std</span><span class="o">::</span><span class="n">optional</span><span class="o">&lt;</span><span class="n">cp</span><span class="o">::</span><span class="n">ExecBatch</span><span class="o">&gt;&gt;</span><span class="w"> </span><span class="n">sink_gen</span><span class="p">;</span>
<span class="linenos">587</span>
<span class="linenos">588</span><span class="w"> </span><span class="k">auto</span><span class="w"> </span><span class="n">source_node_options</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">SourceNodeOptions</span><span class="p">{</span><span class="n">basic_data</span><span class="p">.</span><span class="n">schema</span><span class="p">,</span><span class="w"> </span><span class="n">basic_data</span><span class="p">.</span><span class="n">gen</span><span class="p">()};</span>
<span class="linenos">589</span><span class="w"> </span><span class="n">ARROW_ASSIGN_OR_RAISE</span><span class="p">(</span><span class="n">ac</span><span class="o">::</span><span class="n">ExecNode</span><span class="w"> </span><span class="o">*</span><span class="w"> </span><span class="n">source</span><span class="p">,</span>
<span class="linenos">590</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">MakeExecNode</span><span class="p">(</span><span class="s">&quot;source&quot;</span><span class="p">,</span><span class="w"> </span><span class="n">plan</span><span class="p">.</span><span class="n">get</span><span class="p">(),</span><span class="w"> </span><span class="p">{},</span><span class="w"> </span><span class="n">source_node_options</span><span class="p">));</span>
<span class="linenos">591</span>
<span class="linenos">592</span><span class="w"> </span><span class="n">ARROW_RETURN_NOT_OK</span><span class="p">(</span><span class="n">ac</span><span class="o">::</span><span class="n">MakeExecNode</span><span class="p">(</span>
<span class="linenos">593</span><span class="w"> </span><span class="s">&quot;order_by_sink&quot;</span><span class="p">,</span><span class="w"> </span><span class="n">plan</span><span class="p">.</span><span class="n">get</span><span class="p">(),</span><span class="w"> </span><span class="p">{</span><span class="n">source</span><span class="p">},</span>
<span class="linenos">594</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">OrderBySinkNodeOptions</span><span class="p">{</span>
<span class="linenos">595</span><span class="w"> </span><span class="n">cp</span><span class="o">::</span><span class="n">SortOptions</span><span class="p">{{</span><span class="n">cp</span><span class="o">::</span><span class="n">SortKey</span><span class="p">{</span><span class="s">&quot;a&quot;</span><span class="p">,</span><span class="w"> </span><span class="n">cp</span><span class="o">::</span><span class="n">SortOrder</span><span class="o">::</span><span class="n">Descending</span><span class="p">}}},</span><span class="w"> </span><span class="o">&amp;</span><span class="n">sink_gen</span><span class="p">}));</span>
<span class="linenos">596</span>
<span class="linenos">597</span><span class="w"> </span><span class="k">return</span><span class="w"> </span><span class="n">ExecutePlanAndCollectAsTableWithCustomSink</span><span class="p">(</span><span class="n">plan</span><span class="p">,</span><span class="w"> </span><span class="n">basic_data</span><span class="p">.</span><span class="n">schema</span><span class="p">,</span><span class="w"> </span><span class="n">sink_gen</span><span class="p">);</span>
<span class="linenos">598</span><span class="p">}</span>
<span class="linenos">599</span>
<span class="linenos">600</span><span class="c1">// (Doc section: OrderBySink Example)</span>
<span class="linenos">601</span>
<span class="linenos">602</span><span class="c1">// (Doc section: HashJoin Example)</span>
<span class="linenos">603</span>
<span class="linenos">604</span><span class="c1">/// \brief An example showing a hash join node</span>
<span class="linenos">605</span><span class="c1">///</span>
<span class="linenos">606</span><span class="c1">/// Source-HashJoin-Table</span>
<span class="linenos">607</span><span class="c1">/// This example shows how source node gets the data and how a self-join</span>
<span class="linenos">608</span><span class="c1">/// is applied on the data. The join options are configurable. The output</span>
<span class="linenos">609</span><span class="c1">/// is collected into a table.</span>
<span class="linenos">610</span><span class="n">arrow</span><span class="o">::</span><span class="n">Status</span><span class="w"> </span><span class="n">SourceHashJoinSinkExample</span><span class="p">()</span><span class="w"> </span><span class="p">{</span>
<span class="linenos">611</span><span class="w"> </span><span class="n">ARROW_ASSIGN_OR_RAISE</span><span class="p">(</span><span class="k">auto</span><span class="w"> </span><span class="n">input</span><span class="p">,</span><span class="w"> </span><span class="n">MakeGroupableBatches</span><span class="p">());</span>
<span class="linenos">612</span>
<span class="linenos">613</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">Declaration</span><span class="w"> </span><span class="n">left</span><span class="p">{</span><span class="s">&quot;source&quot;</span><span class="p">,</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">SourceNodeOptions</span><span class="p">{</span><span class="n">input</span><span class="p">.</span><span class="n">schema</span><span class="p">,</span><span class="w"> </span><span class="n">input</span><span class="p">.</span><span class="n">gen</span><span class="p">()}};</span>
<span class="linenos">614</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">Declaration</span><span class="w"> </span><span class="n">right</span><span class="p">{</span><span class="s">&quot;source&quot;</span><span class="p">,</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">SourceNodeOptions</span><span class="p">{</span><span class="n">input</span><span class="p">.</span><span class="n">schema</span><span class="p">,</span><span class="w"> </span><span class="n">input</span><span class="p">.</span><span class="n">gen</span><span class="p">()}};</span>
<span class="linenos">615</span>
<span class="linenos">616</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">HashJoinNodeOptions</span><span class="w"> </span><span class="n">join_opts</span><span class="p">{</span>
<span class="linenos">617</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">JoinType</span><span class="o">::</span><span class="n">INNER</span><span class="p">,</span>
<span class="linenos">618</span><span class="w"> </span><span class="cm">/*left_keys=*/</span><span class="p">{</span><span class="s">&quot;str&quot;</span><span class="p">},</span>
<span class="linenos">619</span><span class="w"> </span><span class="cm">/*right_keys=*/</span><span class="p">{</span><span class="s">&quot;str&quot;</span><span class="p">},</span><span class="w"> </span><span class="n">cp</span><span class="o">::</span><span class="n">literal</span><span class="p">(</span><span class="nb">true</span><span class="p">),</span><span class="w"> </span><span class="s">&quot;l_&quot;</span><span class="p">,</span><span class="w"> </span><span class="s">&quot;r_&quot;</span><span class="p">};</span>
<span class="linenos">620</span>
<span class="linenos">621</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">Declaration</span><span class="w"> </span><span class="n">hashjoin</span><span class="p">{</span>
<span class="linenos">622</span><span class="w"> </span><span class="s">&quot;hashjoin&quot;</span><span class="p">,</span><span class="w"> </span><span class="p">{</span><span class="n">std</span><span class="o">::</span><span class="n">move</span><span class="p">(</span><span class="n">left</span><span class="p">),</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">move</span><span class="p">(</span><span class="n">right</span><span class="p">)},</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">move</span><span class="p">(</span><span class="n">join_opts</span><span class="p">)};</span>
<span class="linenos">623</span>
<span class="linenos">624</span><span class="w"> </span><span class="k">return</span><span class="w"> </span><span class="n">ExecutePlanAndCollectAsTable</span><span class="p">(</span><span class="n">std</span><span class="o">::</span><span class="n">move</span><span class="p">(</span><span class="n">hashjoin</span><span class="p">));</span>
<span class="linenos">625</span><span class="p">}</span>
<span class="linenos">626</span>
<span class="linenos">627</span><span class="c1">// (Doc section: HashJoin Example)</span>
<span class="linenos">628</span>
<span class="linenos">629</span><span class="c1">// (Doc section: KSelect Example)</span>
<span class="linenos">630</span>
<span class="linenos">631</span><span class="c1">/// \brief An example showing a select-k node</span>
<span class="linenos">632</span><span class="c1">///</span>
<span class="linenos">633</span><span class="c1">/// Source-KSelect</span>
<span class="linenos">634</span><span class="c1">/// This example shows how K number of elements can be selected</span>
<span class="linenos">635</span><span class="c1">/// either from the top or bottom. The output node is a modified</span>
<span class="linenos">636</span><span class="c1">/// sink node where output can be obtained as a table.</span>
<span class="linenos">637</span><span class="n">arrow</span><span class="o">::</span><span class="n">Status</span><span class="w"> </span><span class="n">SourceKSelectExample</span><span class="p">()</span><span class="w"> </span><span class="p">{</span>
<span class="linenos">638</span><span class="w"> </span><span class="n">ARROW_ASSIGN_OR_RAISE</span><span class="p">(</span><span class="k">auto</span><span class="w"> </span><span class="n">input</span><span class="p">,</span><span class="w"> </span><span class="n">MakeGroupableBatches</span><span class="p">());</span>
<span class="linenos">639</span><span class="w"> </span><span class="n">ARROW_ASSIGN_OR_RAISE</span><span class="p">(</span><span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o">&lt;</span><span class="n">ac</span><span class="o">::</span><span class="n">ExecPlan</span><span class="o">&gt;</span><span class="w"> </span><span class="n">plan</span><span class="p">,</span>
<span class="linenos">640</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">ExecPlan</span><span class="o">::</span><span class="n">Make</span><span class="p">(</span><span class="o">*</span><span class="n">cp</span><span class="o">::</span><span class="n">threaded_exec_context</span><span class="p">()));</span>
<span class="linenos">641</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">AsyncGenerator</span><span class="o">&lt;</span><span class="n">std</span><span class="o">::</span><span class="n">optional</span><span class="o">&lt;</span><span class="n">cp</span><span class="o">::</span><span class="n">ExecBatch</span><span class="o">&gt;&gt;</span><span class="w"> </span><span class="n">sink_gen</span><span class="p">;</span>
<span class="linenos">642</span>
<span class="linenos">643</span><span class="w"> </span><span class="n">ARROW_ASSIGN_OR_RAISE</span><span class="p">(</span>
<span class="linenos">644</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">ExecNode</span><span class="w"> </span><span class="o">*</span><span class="w"> </span><span class="n">source</span><span class="p">,</span>
<span class="linenos">645</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">MakeExecNode</span><span class="p">(</span><span class="s">&quot;source&quot;</span><span class="p">,</span><span class="w"> </span><span class="n">plan</span><span class="p">.</span><span class="n">get</span><span class="p">(),</span><span class="w"> </span><span class="p">{},</span>
<span class="linenos">646</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">SourceNodeOptions</span><span class="p">{</span><span class="n">input</span><span class="p">.</span><span class="n">schema</span><span class="p">,</span><span class="w"> </span><span class="n">input</span><span class="p">.</span><span class="n">gen</span><span class="p">()}));</span>
<span class="linenos">647</span>
<span class="linenos">648</span><span class="w"> </span><span class="n">cp</span><span class="o">::</span><span class="n">SelectKOptions</span><span class="w"> </span><span class="n">options</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">cp</span><span class="o">::</span><span class="n">SelectKOptions</span><span class="o">::</span><span class="n">TopKDefault</span><span class="p">(</span><span class="cm">/*k=*/</span><span class="mi">2</span><span class="p">,</span><span class="w"> </span><span class="p">{</span><span class="s">&quot;i32&quot;</span><span class="p">});</span>
<span class="linenos">649</span>
<span class="linenos">650</span><span class="w"> </span><span class="n">ARROW_RETURN_NOT_OK</span><span class="p">(</span><span class="n">ac</span><span class="o">::</span><span class="n">MakeExecNode</span><span class="p">(</span><span class="s">&quot;select_k_sink&quot;</span><span class="p">,</span><span class="w"> </span><span class="n">plan</span><span class="p">.</span><span class="n">get</span><span class="p">(),</span><span class="w"> </span><span class="p">{</span><span class="n">source</span><span class="p">},</span>
<span class="linenos">651</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">SelectKSinkNodeOptions</span><span class="p">{</span><span class="n">options</span><span class="p">,</span><span class="w"> </span><span class="o">&amp;</span><span class="n">sink_gen</span><span class="p">}));</span>
<span class="linenos">652</span>
<span class="linenos">653</span><span class="w"> </span><span class="k">auto</span><span class="w"> </span><span class="n">schema</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">schema</span><span class="p">(</span>
<span class="linenos">654</span><span class="w"> </span><span class="p">{</span><span class="n">arrow</span><span class="o">::</span><span class="n">field</span><span class="p">(</span><span class="s">&quot;i32&quot;</span><span class="p">,</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">int32</span><span class="p">()),</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">field</span><span class="p">(</span><span class="s">&quot;str&quot;</span><span class="p">,</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">utf8</span><span class="p">())});</span>
<span class="linenos">655</span>
<span class="linenos">656</span><span class="w"> </span><span class="k">return</span><span class="w"> </span><span class="n">ExecutePlanAndCollectAsTableWithCustomSink</span><span class="p">(</span><span class="n">plan</span><span class="p">,</span><span class="w"> </span><span class="n">schema</span><span class="p">,</span><span class="w"> </span><span class="n">sink_gen</span><span class="p">);</span>
<span class="linenos">657</span><span class="p">}</span>
<span class="linenos">658</span>
<span class="linenos">659</span><span class="c1">// (Doc section: KSelect Example)</span>
<span class="linenos">660</span>
<span class="linenos">661</span><span class="c1">// (Doc section: Write Example)</span>
<span class="linenos">662</span>
<span class="linenos">663</span><span class="c1">/// \brief An example showing a write node</span>
<span class="linenos">664</span><span class="c1">/// \param file_path The destination to write to</span>
<span class="linenos">665</span><span class="c1">///</span>
<span class="linenos">666</span><span class="c1">/// Scan-Filter-Write</span>
<span class="linenos">667</span><span class="c1">/// This example shows how scan node can be used to load the data</span>
<span class="linenos">668</span><span class="c1">/// and after processing how it can be written to disk.</span>
<span class="linenos">669</span><span class="n">arrow</span><span class="o">::</span><span class="n">Status</span><span class="w"> </span><span class="n">ScanFilterWriteExample</span><span class="p">(</span><span class="k">const</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">string</span><span class="o">&amp;</span><span class="w"> </span><span class="n">file_path</span><span class="p">)</span><span class="w"> </span><span class="p">{</span>
<span class="linenos">670</span><span class="w"> </span><span class="n">ARROW_ASSIGN_OR_RAISE</span><span class="p">(</span><span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o">&lt;</span><span class="n">arrow</span><span class="o">::</span><span class="n">dataset</span><span class="o">::</span><span class="n">Dataset</span><span class="o">&gt;</span><span class="w"> </span><span class="n">dataset</span><span class="p">,</span><span class="w"> </span><span class="n">GetDataset</span><span class="p">());</span>
<span class="linenos">671</span>
<span class="linenos">672</span><span class="w"> </span><span class="k">auto</span><span class="w"> </span><span class="n">options</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">make_shared</span><span class="o">&lt;</span><span class="n">arrow</span><span class="o">::</span><span class="n">dataset</span><span class="o">::</span><span class="n">ScanOptions</span><span class="o">&gt;</span><span class="p">();</span>
<span class="linenos">673</span><span class="w"> </span><span class="c1">// empty projection</span>
<span class="linenos">674</span><span class="w"> </span><span class="n">options</span><span class="o">-&gt;</span><span class="n">projection</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">cp</span><span class="o">::</span><span class="n">project</span><span class="p">({},</span><span class="w"> </span><span class="p">{});</span>
<span class="linenos">675</span>
<span class="linenos">676</span><span class="w"> </span><span class="k">auto</span><span class="w"> </span><span class="n">scan_node_options</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">dataset</span><span class="o">::</span><span class="n">ScanNodeOptions</span><span class="p">{</span><span class="n">dataset</span><span class="p">,</span><span class="w"> </span><span class="n">options</span><span class="p">};</span>
<span class="linenos">677</span>
<span class="linenos">678</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">Declaration</span><span class="w"> </span><span class="n">scan</span><span class="p">{</span><span class="s">&quot;scan&quot;</span><span class="p">,</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">move</span><span class="p">(</span><span class="n">scan_node_options</span><span class="p">)};</span>
<span class="linenos">679</span>
<span class="linenos">680</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">AsyncGenerator</span><span class="o">&lt;</span><span class="n">std</span><span class="o">::</span><span class="n">optional</span><span class="o">&lt;</span><span class="n">cp</span><span class="o">::</span><span class="n">ExecBatch</span><span class="o">&gt;&gt;</span><span class="w"> </span><span class="n">sink_gen</span><span class="p">;</span>
<span class="linenos">681</span>
<span class="linenos">682</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">string</span><span class="w"> </span><span class="n">root_path</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="s">&quot;&quot;</span><span class="p">;</span>
<span class="linenos">683</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">string</span><span class="w"> </span><span class="n">uri</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="s">&quot;file://&quot;</span><span class="w"> </span><span class="o">+</span><span class="w"> </span><span class="n">file_path</span><span class="p">;</span>
<span class="linenos">684</span><span class="w"> </span><span class="n">ARROW_ASSIGN_OR_RAISE</span><span class="p">(</span><span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o">&lt;</span><span class="n">arrow</span><span class="o">::</span><span class="n">fs</span><span class="o">::</span><span class="n">FileSystem</span><span class="o">&gt;</span><span class="w"> </span><span class="n">filesystem</span><span class="p">,</span>
<span class="linenos">685</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">fs</span><span class="o">::</span><span class="n">FileSystemFromUri</span><span class="p">(</span><span class="n">uri</span><span class="p">,</span><span class="w"> </span><span class="o">&amp;</span><span class="n">root_path</span><span class="p">));</span>
<span class="linenos">686</span>
<span class="linenos">687</span><span class="w"> </span><span class="k">auto</span><span class="w"> </span><span class="n">base_path</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">root_path</span><span class="w"> </span><span class="o">+</span><span class="w"> </span><span class="s">&quot;/parquet_dataset&quot;</span><span class="p">;</span>
<span class="linenos">688</span><span class="w"> </span><span class="c1">// Uncomment the following line, if run repeatedly</span>
<span class="linenos">689</span><span class="w"> </span><span class="c1">// ARROW_RETURN_NOT_OK(filesystem-&gt;DeleteDirContents(base_path));</span>
<span class="linenos">690</span><span class="w"> </span><span class="n">ARROW_RETURN_NOT_OK</span><span class="p">(</span><span class="n">filesystem</span><span class="o">-&gt;</span><span class="n">CreateDir</span><span class="p">(</span><span class="n">base_path</span><span class="p">));</span>
<span class="linenos">691</span>
<span class="linenos">692</span><span class="w"> </span><span class="c1">// The partition schema determines which fields are part of the partitioning.</span>
<span class="linenos">693</span><span class="w"> </span><span class="k">auto</span><span class="w"> </span><span class="n">partition_schema</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">schema</span><span class="p">({</span><span class="n">arrow</span><span class="o">::</span><span class="n">field</span><span class="p">(</span><span class="s">&quot;a&quot;</span><span class="p">,</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">int32</span><span class="p">())});</span>
<span class="linenos">694</span><span class="w"> </span><span class="c1">// We&#39;ll use Hive-style partitioning,</span>
<span class="linenos">695</span><span class="w"> </span><span class="c1">// which creates directories with &quot;key=value&quot; pairs.</span>
<span class="linenos">696</span>
<span class="linenos">697</span><span class="w"> </span><span class="k">auto</span><span class="w"> </span><span class="n">partitioning</span><span class="w"> </span><span class="o">=</span>
<span class="linenos">698</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">make_shared</span><span class="o">&lt;</span><span class="n">arrow</span><span class="o">::</span><span class="n">dataset</span><span class="o">::</span><span class="n">HivePartitioning</span><span class="o">&gt;</span><span class="p">(</span><span class="n">partition_schema</span><span class="p">);</span>
<span class="linenos">699</span><span class="w"> </span><span class="c1">// We&#39;ll write Parquet files.</span>
<span class="linenos">700</span><span class="w"> </span><span class="k">auto</span><span class="w"> </span><span class="n">format</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">make_shared</span><span class="o">&lt;</span><span class="n">arrow</span><span class="o">::</span><span class="n">dataset</span><span class="o">::</span><span class="n">ParquetFileFormat</span><span class="o">&gt;</span><span class="p">();</span>
<span class="linenos">701</span>
<span class="linenos">702</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">dataset</span><span class="o">::</span><span class="n">FileSystemDatasetWriteOptions</span><span class="w"> </span><span class="n">write_options</span><span class="p">;</span>
<span class="linenos">703</span><span class="w"> </span><span class="n">write_options</span><span class="p">.</span><span class="n">file_write_options</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">format</span><span class="o">-&gt;</span><span class="n">DefaultWriteOptions</span><span class="p">();</span>
<span class="linenos">704</span><span class="w"> </span><span class="n">write_options</span><span class="p">.</span><span class="n">filesystem</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">filesystem</span><span class="p">;</span>
<span class="linenos">705</span><span class="w"> </span><span class="n">write_options</span><span class="p">.</span><span class="n">base_dir</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">base_path</span><span class="p">;</span>
<span class="linenos">706</span><span class="w"> </span><span class="n">write_options</span><span class="p">.</span><span class="n">partitioning</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">partitioning</span><span class="p">;</span>
<span class="linenos">707</span><span class="w"> </span><span class="n">write_options</span><span class="p">.</span><span class="n">basename_template</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="s">&quot;part{i}.parquet&quot;</span><span class="p">;</span>
<span class="linenos">708</span>
<span class="linenos">709</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">dataset</span><span class="o">::</span><span class="n">WriteNodeOptions</span><span class="w"> </span><span class="n">write_node_options</span><span class="p">{</span><span class="n">write_options</span><span class="p">};</span>
<span class="linenos">710</span>
<span class="linenos">711</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">Declaration</span><span class="w"> </span><span class="n">write</span><span class="p">{</span><span class="s">&quot;write&quot;</span><span class="p">,</span><span class="w"> </span><span class="p">{</span><span class="n">std</span><span class="o">::</span><span class="n">move</span><span class="p">(</span><span class="n">scan</span><span class="p">)},</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">move</span><span class="p">(</span><span class="n">write_node_options</span><span class="p">)};</span>
<span class="linenos">712</span>
<span class="linenos">713</span><span class="w"> </span><span class="c1">// Since the write node has no output we simply run the plan to completion and the</span>
<span class="linenos">714</span><span class="w"> </span><span class="c1">// data should be written</span>
<span class="linenos">715</span><span class="w"> </span><span class="n">ARROW_RETURN_NOT_OK</span><span class="p">(</span><span class="n">ac</span><span class="o">::</span><span class="n">DeclarationToStatus</span><span class="p">(</span><span class="n">std</span><span class="o">::</span><span class="n">move</span><span class="p">(</span><span class="n">write</span><span class="p">)));</span>
<span class="linenos">716</span>
<span class="linenos">717</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">cout</span><span class="w"> </span><span class="o">&lt;&lt;</span><span class="w"> </span><span class="s">&quot;Dataset written to &quot;</span><span class="w"> </span><span class="o">&lt;&lt;</span><span class="w"> </span><span class="n">base_path</span><span class="w"> </span><span class="o">&lt;&lt;</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">endl</span><span class="p">;</span>
<span class="linenos">718</span><span class="w"> </span><span class="k">return</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">Status</span><span class="o">::</span><span class="n">OK</span><span class="p">();</span>
<span class="linenos">719</span><span class="p">}</span>
<span class="linenos">720</span>
<span class="linenos">721</span><span class="c1">// (Doc section: Write Example)</span>
<span class="linenos">722</span>
<span class="linenos">723</span><span class="c1">// (Doc section: Union Example)</span>
<span class="linenos">724</span>
<span class="linenos">725</span><span class="c1">/// \brief An example showing a union node</span>
<span class="linenos">726</span><span class="c1">///</span>
<span class="linenos">727</span><span class="c1">/// Source-Union-Table</span>
<span class="linenos">728</span><span class="c1">/// This example shows how a union operation can be applied on two</span>
<span class="linenos">729</span><span class="c1">/// data sources. The output is collected into a table.</span>
<span class="linenos">730</span><span class="n">arrow</span><span class="o">::</span><span class="n">Status</span><span class="w"> </span><span class="n">SourceUnionSinkExample</span><span class="p">()</span><span class="w"> </span><span class="p">{</span>
<span class="linenos">731</span><span class="w"> </span><span class="n">ARROW_ASSIGN_OR_RAISE</span><span class="p">(</span><span class="k">auto</span><span class="w"> </span><span class="n">basic_data</span><span class="p">,</span><span class="w"> </span><span class="n">MakeBasicBatches</span><span class="p">());</span>
<span class="linenos">732</span>
<span class="linenos">733</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">Declaration</span><span class="w"> </span><span class="n">lhs</span><span class="p">{</span><span class="s">&quot;source&quot;</span><span class="p">,</span>
<span class="linenos">734</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">SourceNodeOptions</span><span class="p">{</span><span class="n">basic_data</span><span class="p">.</span><span class="n">schema</span><span class="p">,</span><span class="w"> </span><span class="n">basic_data</span><span class="p">.</span><span class="n">gen</span><span class="p">()}};</span>
<span class="linenos">735</span><span class="w"> </span><span class="n">lhs</span><span class="p">.</span><span class="n">label</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="s">&quot;lhs&quot;</span><span class="p">;</span>
<span class="linenos">736</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">Declaration</span><span class="w"> </span><span class="n">rhs</span><span class="p">{</span><span class="s">&quot;source&quot;</span><span class="p">,</span>
<span class="linenos">737</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">SourceNodeOptions</span><span class="p">{</span><span class="n">basic_data</span><span class="p">.</span><span class="n">schema</span><span class="p">,</span><span class="w"> </span><span class="n">basic_data</span><span class="p">.</span><span class="n">gen</span><span class="p">()}};</span>
<span class="linenos">738</span><span class="w"> </span><span class="n">rhs</span><span class="p">.</span><span class="n">label</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="s">&quot;rhs&quot;</span><span class="p">;</span>
<span class="linenos">739</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">Declaration</span><span class="w"> </span><span class="n">union_plan</span><span class="p">{</span>
<span class="linenos">740</span><span class="w"> </span><span class="s">&quot;union&quot;</span><span class="p">,</span><span class="w"> </span><span class="p">{</span><span class="n">std</span><span class="o">::</span><span class="n">move</span><span class="p">(</span><span class="n">lhs</span><span class="p">),</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">move</span><span class="p">(</span><span class="n">rhs</span><span class="p">)},</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">ExecNodeOptions</span><span class="p">{}};</span>
<span class="linenos">741</span>
<span class="linenos">742</span><span class="w"> </span><span class="k">return</span><span class="w"> </span><span class="n">ExecutePlanAndCollectAsTable</span><span class="p">(</span><span class="n">std</span><span class="o">::</span><span class="n">move</span><span class="p">(</span><span class="n">union_plan</span><span class="p">));</span>
<span class="linenos">743</span><span class="p">}</span>
<span class="linenos">744</span>
<span class="linenos">745</span><span class="c1">// (Doc section: Union Example)</span>
<span class="linenos">746</span>
<span class="linenos">747</span><span class="c1">// (Doc section: Table Sink Example)</span>
<span class="linenos">748</span>
<span class="linenos">749</span><span class="c1">/// \brief An example showing a table sink node</span>
<span class="linenos">750</span><span class="c1">///</span>
<span class="linenos">751</span><span class="c1">/// TableSink Example</span>
<span class="linenos">752</span><span class="c1">/// This example shows how a table_sink can be used</span>
<span class="linenos">753</span><span class="c1">/// in an execution plan. This includes a source node</span>
<span class="linenos">754</span><span class="c1">/// receiving data as batches and the table sink node</span>
<span class="linenos">755</span><span class="c1">/// which emits the output as a table.</span>
<span class="linenos">756</span><span class="n">arrow</span><span class="o">::</span><span class="n">Status</span><span class="w"> </span><span class="n">TableSinkExample</span><span class="p">()</span><span class="w"> </span><span class="p">{</span>
<span class="linenos">757</span><span class="w"> </span><span class="n">ARROW_ASSIGN_OR_RAISE</span><span class="p">(</span><span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o">&lt;</span><span class="n">ac</span><span class="o">::</span><span class="n">ExecPlan</span><span class="o">&gt;</span><span class="w"> </span><span class="n">plan</span><span class="p">,</span>
<span class="linenos">758</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">ExecPlan</span><span class="o">::</span><span class="n">Make</span><span class="p">(</span><span class="o">*</span><span class="n">cp</span><span class="o">::</span><span class="n">threaded_exec_context</span><span class="p">()));</span>
<span class="linenos">759</span>
<span class="linenos">760</span><span class="w"> </span><span class="n">ARROW_ASSIGN_OR_RAISE</span><span class="p">(</span><span class="k">auto</span><span class="w"> </span><span class="n">basic_data</span><span class="p">,</span><span class="w"> </span><span class="n">MakeBasicBatches</span><span class="p">());</span>
<span class="linenos">761</span>
<span class="linenos">762</span><span class="w"> </span><span class="k">auto</span><span class="w"> </span><span class="n">source_node_options</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">SourceNodeOptions</span><span class="p">{</span><span class="n">basic_data</span><span class="p">.</span><span class="n">schema</span><span class="p">,</span><span class="w"> </span><span class="n">basic_data</span><span class="p">.</span><span class="n">gen</span><span class="p">()};</span>
<span class="linenos">763</span>
<span class="linenos">764</span><span class="w"> </span><span class="n">ARROW_ASSIGN_OR_RAISE</span><span class="p">(</span><span class="n">ac</span><span class="o">::</span><span class="n">ExecNode</span><span class="w"> </span><span class="o">*</span><span class="w"> </span><span class="n">source</span><span class="p">,</span>
<span class="linenos">765</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">MakeExecNode</span><span class="p">(</span><span class="s">&quot;source&quot;</span><span class="p">,</span><span class="w"> </span><span class="n">plan</span><span class="p">.</span><span class="n">get</span><span class="p">(),</span><span class="w"> </span><span class="p">{},</span><span class="w"> </span><span class="n">source_node_options</span><span class="p">));</span>
<span class="linenos">766</span>
<span class="linenos">767</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o">&lt;</span><span class="n">arrow</span><span class="o">::</span><span class="n">Table</span><span class="o">&gt;</span><span class="w"> </span><span class="n">output_table</span><span class="p">;</span>
<span class="linenos">768</span><span class="w"> </span><span class="k">auto</span><span class="w"> </span><span class="n">table_sink_options</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">TableSinkNodeOptions</span><span class="p">{</span><span class="o">&amp;</span><span class="n">output_table</span><span class="p">};</span>
<span class="linenos">769</span>
<span class="linenos">770</span><span class="w"> </span><span class="n">ARROW_RETURN_NOT_OK</span><span class="p">(</span>
<span class="linenos">771</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">MakeExecNode</span><span class="p">(</span><span class="s">&quot;table_sink&quot;</span><span class="p">,</span><span class="w"> </span><span class="n">plan</span><span class="p">.</span><span class="n">get</span><span class="p">(),</span><span class="w"> </span><span class="p">{</span><span class="n">source</span><span class="p">},</span><span class="w"> </span><span class="n">table_sink_options</span><span class="p">));</span>
<span class="linenos">772</span><span class="w"> </span><span class="c1">// validate the ExecPlan</span>
<span class="linenos">773</span><span class="w"> </span><span class="n">ARROW_RETURN_NOT_OK</span><span class="p">(</span><span class="n">plan</span><span class="o">-&gt;</span><span class="n">Validate</span><span class="p">());</span>
<span class="linenos">774</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">cout</span><span class="w"> </span><span class="o">&lt;&lt;</span><span class="w"> </span><span class="s">&quot;ExecPlan created : &quot;</span><span class="w"> </span><span class="o">&lt;&lt;</span><span class="w"> </span><span class="n">plan</span><span class="o">-&gt;</span><span class="n">ToString</span><span class="p">()</span><span class="w"> </span><span class="o">&lt;&lt;</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">endl</span><span class="p">;</span>
<span class="linenos">775</span><span class="w"> </span><span class="c1">// start the ExecPlan</span>
<span class="linenos">776</span><span class="w"> </span><span class="n">plan</span><span class="o">-&gt;</span><span class="n">StartProducing</span><span class="p">();</span>
<span class="linenos">777</span>
<span class="linenos">778</span><span class="w"> </span><span class="c1">// Wait for the plan to finish</span>
<span class="linenos">779</span><span class="w"> </span><span class="k">auto</span><span class="w"> </span><span class="n">finished</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">plan</span><span class="o">-&gt;</span><span class="n">finished</span><span class="p">();</span>
<span class="linenos">780</span><span class="w"> </span><span class="n">RETURN_NOT_OK</span><span class="p">(</span><span class="n">finished</span><span class="p">.</span><span class="n">status</span><span class="p">());</span>
<span class="linenos">781</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">cout</span><span class="w"> </span><span class="o">&lt;&lt;</span><span class="w"> </span><span class="s">&quot;Results : &quot;</span><span class="w"> </span><span class="o">&lt;&lt;</span><span class="w"> </span><span class="n">output_table</span><span class="o">-&gt;</span><span class="n">ToString</span><span class="p">()</span><span class="w"> </span><span class="o">&lt;&lt;</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">endl</span><span class="p">;</span>
<span class="linenos">782</span><span class="w"> </span><span class="k">return</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">Status</span><span class="o">::</span><span class="n">OK</span><span class="p">();</span>
<span class="linenos">783</span><span class="p">}</span>
<span class="linenos">784</span>
<span class="linenos">785</span><span class="c1">// (Doc section: Table Sink Example)</span>
<span class="linenos">786</span>
<span class="linenos">787</span><span class="c1">// (Doc section: RecordBatchReaderSource Example)</span>
<span class="linenos">788</span>
<span class="linenos">789</span><span class="c1">/// \brief An example showing the usage of a RecordBatchReader as the data source.</span>
<span class="linenos">790</span><span class="c1">///</span>
<span class="linenos">791</span><span class="c1">/// RecordBatchReaderSourceSink Example</span>
<span class="linenos">792</span><span class="c1">/// This example shows how a record_batch_reader_source can be used</span>
<span class="linenos">793</span><span class="c1">/// in an execution plan. This includes the source node</span>
<span class="linenos">794</span><span class="c1">/// receiving data from a TableRecordBatchReader.</span>
<span class="linenos">795</span>
<span class="linenos">796</span><span class="n">arrow</span><span class="o">::</span><span class="n">Status</span><span class="w"> </span><span class="n">RecordBatchReaderSourceSinkExample</span><span class="p">()</span><span class="w"> </span><span class="p">{</span>
<span class="linenos">797</span><span class="w"> </span><span class="n">ARROW_ASSIGN_OR_RAISE</span><span class="p">(</span><span class="k">auto</span><span class="w"> </span><span class="n">table</span><span class="p">,</span><span class="w"> </span><span class="n">GetTable</span><span class="p">());</span>
<span class="linenos">798</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o">&lt;</span><span class="n">arrow</span><span class="o">::</span><span class="n">RecordBatchReader</span><span class="o">&gt;</span><span class="w"> </span><span class="n">reader</span><span class="w"> </span><span class="o">=</span>
<span class="linenos">799</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">make_shared</span><span class="o">&lt;</span><span class="n">arrow</span><span class="o">::</span><span class="n">TableBatchReader</span><span class="o">&gt;</span><span class="p">(</span><span class="n">table</span><span class="p">);</span>
<span class="linenos">800</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">Declaration</span><span class="w"> </span><span class="n">reader_source</span><span class="p">{</span><span class="s">&quot;record_batch_reader_source&quot;</span><span class="p">,</span>
<span class="linenos">801</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">RecordBatchReaderSourceNodeOptions</span><span class="p">{</span><span class="n">reader</span><span class="p">}};</span>
<span class="linenos">802</span><span class="w"> </span><span class="k">return</span><span class="w"> </span><span class="n">ExecutePlanAndCollectAsTable</span><span class="p">(</span><span class="n">std</span><span class="o">::</span><span class="n">move</span><span class="p">(</span><span class="n">reader_source</span><span class="p">));</span>
<span class="linenos">803</span><span class="p">}</span>
<span class="linenos">804</span>
<span class="linenos">805</span><span class="c1">// (Doc section: RecordBatchReaderSource Example)</span>
<span class="linenos">806</span>
<span class="linenos">807</span><span class="k">enum</span><span class="w"> </span><span class="nc">ExampleMode</span><span class="w"> </span><span class="p">{</span>
<span class="linenos">808</span><span class="w"> </span><span class="n">SOURCE_SINK</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="mi">0</span><span class="p">,</span>
<span class="linenos">809</span><span class="w"> </span><span class="n">TABLE_SOURCE_SINK</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="mi">1</span><span class="p">,</span>
<span class="linenos">810</span><span class="w"> </span><span class="n">SCAN</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="mi">2</span><span class="p">,</span>
<span class="linenos">811</span><span class="w"> </span><span class="n">FILTER</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="mi">3</span><span class="p">,</span>
<span class="linenos">812</span><span class="w"> </span><span class="n">PROJECT</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="mi">4</span><span class="p">,</span>
<span class="linenos">813</span><span class="w"> </span><span class="n">SCALAR_AGGREGATION</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="mi">5</span><span class="p">,</span>
<span class="linenos">814</span><span class="w"> </span><span class="n">GROUP_AGGREGATION</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="mi">6</span><span class="p">,</span>
<span class="linenos">815</span><span class="w"> </span><span class="n">CONSUMING_SINK</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="mi">7</span><span class="p">,</span>
<span class="linenos">816</span><span class="w"> </span><span class="n">ORDER_BY_SINK</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="mi">8</span><span class="p">,</span>
<span class="linenos">817</span><span class="w"> </span><span class="n">HASHJOIN</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="mi">9</span><span class="p">,</span>
<span class="linenos">818</span><span class="w"> </span><span class="n">KSELECT</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="mi">10</span><span class="p">,</span>
<span class="linenos">819</span><span class="w"> </span><span class="n">WRITE</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="mi">11</span><span class="p">,</span>
<span class="linenos">820</span><span class="w"> </span><span class="n">UNION</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="mi">12</span><span class="p">,</span>
<span class="linenos">821</span><span class="w"> </span><span class="n">TABLE_SOURCE_TABLE_SINK</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="mi">13</span><span class="p">,</span>
<span class="linenos">822</span><span class="w"> </span><span class="n">RECORD_BATCH_READER_SOURCE</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="mi">14</span><span class="p">,</span>
<span class="linenos">823</span><span class="w"> </span><span class="n">PROJECT_SEQUENCE</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="mi">15</span>
<span class="linenos">824</span><span class="p">};</span>
<span class="linenos">825</span>
<span class="linenos">826</span><span class="kt">int</span><span class="w"> </span><span class="nf">main</span><span class="p">(</span><span class="kt">int</span><span class="w"> </span><span class="n">argc</span><span class="p">,</span><span class="w"> </span><span class="kt">char</span><span class="o">**</span><span class="w"> </span><span class="n">argv</span><span class="p">)</span><span class="w"> </span><span class="p">{</span>
<span class="linenos">827</span><span class="w"> </span><span class="k">if</span><span class="w"> </span><span class="p">(</span><span class="n">argc</span><span class="w"> </span><span class="o">&lt;</span><span class="w"> </span><span class="mi">3</span><span class="p">)</span><span class="w"> </span><span class="p">{</span>
<span class="linenos">828</span><span class="w"> </span><span class="c1">// Fake success for CI purposes.</span>
<span class="linenos">829</span><span class="w"> </span><span class="k">return</span><span class="w"> </span><span class="n">EXIT_SUCCESS</span><span class="p">;</span>
<span class="linenos">830</span><span class="w"> </span><span class="p">}</span>
<span class="linenos">831</span>
<span class="linenos">832</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">string</span><span class="w"> </span><span class="n">base_save_path</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">argv</span><span class="p">[</span><span class="mi">1</span><span class="p">];</span>
<span class="linenos">833</span><span class="w"> </span><span class="kt">int</span><span class="w"> </span><span class="n">mode</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">atoi</span><span class="p">(</span><span class="n">argv</span><span class="p">[</span><span class="mi">2</span><span class="p">]);</span>
<span class="linenos">834</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">Status</span><span class="w"> </span><span class="n">status</span><span class="p">;</span>
<span class="linenos">835</span><span class="w"> </span><span class="c1">// ensure arrow::dataset node factories are in the registry</span>
<span class="linenos">836</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">dataset</span><span class="o">::</span><span class="n">internal</span><span class="o">::</span><span class="n">Initialize</span><span class="p">();</span>
<span class="linenos">837</span><span class="w"> </span><span class="k">switch</span><span class="w"> </span><span class="p">(</span><span class="n">mode</span><span class="p">)</span><span class="w"> </span><span class="p">{</span>
<span class="linenos">838</span><span class="w"> </span><span class="k">case</span><span class="w"> </span><span class="no">SOURCE_SINK</span><span class="p">:</span>
<span class="linenos">839</span><span class="w"> </span><span class="n">PrintBlock</span><span class="p">(</span><span class="s">&quot;Source Sink Example&quot;</span><span class="p">);</span>
<span class="linenos">840</span><span class="w"> </span><span class="n">status</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">SourceSinkExample</span><span class="p">();</span>
<span class="linenos">841</span><span class="w"> </span><span class="k">break</span><span class="p">;</span>
<span class="linenos">842</span><span class="w"> </span><span class="k">case</span><span class="w"> </span><span class="no">TABLE_SOURCE_SINK</span><span class="p">:</span>
<span class="linenos">843</span><span class="w"> </span><span class="n">PrintBlock</span><span class="p">(</span><span class="s">&quot;Table Source Sink Example&quot;</span><span class="p">);</span>
<span class="linenos">844</span><span class="w"> </span><span class="n">status</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">TableSourceSinkExample</span><span class="p">();</span>
<span class="linenos">845</span><span class="w"> </span><span class="k">break</span><span class="p">;</span>
<span class="linenos">846</span><span class="w"> </span><span class="k">case</span><span class="w"> </span><span class="no">SCAN</span><span class="p">:</span>
<span class="linenos">847</span><span class="w"> </span><span class="n">PrintBlock</span><span class="p">(</span><span class="s">&quot;Scan Example&quot;</span><span class="p">);</span>
<span class="linenos">848</span><span class="w"> </span><span class="n">status</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">ScanSinkExample</span><span class="p">();</span>
<span class="linenos">849</span><span class="w"> </span><span class="k">break</span><span class="p">;</span>
<span class="linenos">850</span><span class="w"> </span><span class="k">case</span><span class="w"> </span><span class="no">FILTER</span><span class="p">:</span>
<span class="linenos">851</span><span class="w"> </span><span class="n">PrintBlock</span><span class="p">(</span><span class="s">&quot;Filter Example&quot;</span><span class="p">);</span>
<span class="linenos">852</span><span class="w"> </span><span class="n">status</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">ScanFilterSinkExample</span><span class="p">();</span>
<span class="linenos">853</span><span class="w"> </span><span class="k">break</span><span class="p">;</span>
<span class="linenos">854</span><span class="w"> </span><span class="k">case</span><span class="w"> </span><span class="no">PROJECT</span><span class="p">:</span>
<span class="linenos">855</span><span class="w"> </span><span class="n">PrintBlock</span><span class="p">(</span><span class="s">&quot;Project Example&quot;</span><span class="p">);</span>
<span class="linenos">856</span><span class="w"> </span><span class="n">status</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">ScanProjectSinkExample</span><span class="p">();</span>
<span class="linenos">857</span><span class="w"> </span><span class="k">break</span><span class="p">;</span>
<span class="linenos">858</span><span class="w"> </span><span class="k">case</span><span class="w"> </span><span class="no">PROJECT_SEQUENCE</span><span class="p">:</span>
<span class="linenos">859</span><span class="w"> </span><span class="n">PrintBlock</span><span class="p">(</span><span class="s">&quot;Project Example (using Declaration::Sequence)&quot;</span><span class="p">);</span>
<span class="linenos">860</span><span class="w"> </span><span class="n">status</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">ScanProjectSequenceSinkExample</span><span class="p">();</span>
<span class="linenos">861</span><span class="w"> </span><span class="k">break</span><span class="p">;</span>
<span class="linenos">862</span><span class="w"> </span><span class="k">case</span><span class="w"> </span><span class="no">GROUP_AGGREGATION</span><span class="p">:</span>
<span class="linenos">863</span><span class="w"> </span><span class="n">PrintBlock</span><span class="p">(</span><span class="s">&quot;Aggregate Example&quot;</span><span class="p">);</span>
<span class="linenos">864</span><span class="w"> </span><span class="n">status</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">SourceGroupAggregateSinkExample</span><span class="p">();</span>
<span class="linenos">865</span><span class="w"> </span><span class="k">break</span><span class="p">;</span>
<span class="linenos">866</span><span class="w"> </span><span class="k">case</span><span class="w"> </span><span class="no">SCALAR_AGGREGATION</span><span class="p">:</span>
<span class="linenos">867</span><span class="w"> </span><span class="n">PrintBlock</span><span class="p">(</span><span class="s">&quot;Aggregate Example&quot;</span><span class="p">);</span>
<span class="linenos">868</span><span class="w"> </span><span class="n">status</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">SourceScalarAggregateSinkExample</span><span class="p">();</span>
<span class="linenos">869</span><span class="w"> </span><span class="k">break</span><span class="p">;</span>
<span class="linenos">870</span><span class="w"> </span><span class="k">case</span><span class="w"> </span><span class="no">CONSUMING_SINK</span><span class="p">:</span>
<span class="linenos">871</span><span class="w"> </span><span class="n">PrintBlock</span><span class="p">(</span><span class="s">&quot;Consuming-Sink Example&quot;</span><span class="p">);</span>
<span class="linenos">872</span><span class="w"> </span><span class="n">status</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">SourceConsumingSinkExample</span><span class="p">();</span>
<span class="linenos">873</span><span class="w"> </span><span class="k">break</span><span class="p">;</span>
<span class="linenos">874</span><span class="w"> </span><span class="k">case</span><span class="w"> </span><span class="no">ORDER_BY_SINK</span><span class="p">:</span>
<span class="linenos">875</span><span class="w"> </span><span class="n">PrintBlock</span><span class="p">(</span><span class="s">&quot;OrderBy Example&quot;</span><span class="p">);</span>
<span class="linenos">876</span><span class="w"> </span><span class="n">status</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">SourceOrderBySinkExample</span><span class="p">();</span>
<span class="linenos">877</span><span class="w"> </span><span class="k">break</span><span class="p">;</span>
<span class="linenos">878</span><span class="w"> </span><span class="k">case</span><span class="w"> </span><span class="no">HASHJOIN</span><span class="p">:</span>
<span class="linenos">879</span><span class="w"> </span><span class="n">PrintBlock</span><span class="p">(</span><span class="s">&quot;HashJoin Example&quot;</span><span class="p">);</span>
<span class="linenos">880</span><span class="w"> </span><span class="n">status</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">SourceHashJoinSinkExample</span><span class="p">();</span>
<span class="linenos">881</span><span class="w"> </span><span class="k">break</span><span class="p">;</span>
<span class="linenos">882</span><span class="w"> </span><span class="k">case</span><span class="w"> </span><span class="no">KSELECT</span><span class="p">:</span>
<span class="linenos">883</span><span class="w"> </span><span class="n">PrintBlock</span><span class="p">(</span><span class="s">&quot;KSelect Example&quot;</span><span class="p">);</span>
<span class="linenos">884</span><span class="w"> </span><span class="n">status</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">SourceKSelectExample</span><span class="p">();</span>
<span class="linenos">885</span><span class="w"> </span><span class="k">break</span><span class="p">;</span>
<span class="linenos">886</span><span class="w"> </span><span class="k">case</span><span class="w"> </span><span class="no">WRITE</span><span class="p">:</span>
<span class="linenos">887</span><span class="w"> </span><span class="n">PrintBlock</span><span class="p">(</span><span class="s">&quot;Write Example&quot;</span><span class="p">);</span>
<span class="linenos">888</span><span class="w"> </span><span class="n">status</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">ScanFilterWriteExample</span><span class="p">(</span><span class="n">base_save_path</span><span class="p">);</span>
<span class="linenos">889</span><span class="w"> </span><span class="k">break</span><span class="p">;</span>
<span class="linenos">890</span><span class="w"> </span><span class="k">case</span><span class="w"> </span><span class="no">UNION</span><span class="p">:</span>
<span class="linenos">891</span><span class="w"> </span><span class="n">PrintBlock</span><span class="p">(</span><span class="s">&quot;Union Example&quot;</span><span class="p">);</span>
<span class="linenos">892</span><span class="w"> </span><span class="n">status</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">SourceUnionSinkExample</span><span class="p">();</span>
<span class="linenos">893</span><span class="w"> </span><span class="k">break</span><span class="p">;</span>
<span class="linenos">894</span><span class="w"> </span><span class="k">case</span><span class="w"> </span><span class="no">TABLE_SOURCE_TABLE_SINK</span><span class="p">:</span>
<span class="linenos">895</span><span class="w"> </span><span class="n">PrintBlock</span><span class="p">(</span><span class="s">&quot;TableSink Example&quot;</span><span class="p">);</span>
<span class="linenos">896</span><span class="w"> </span><span class="n">status</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">TableSinkExample</span><span class="p">();</span>
<span class="linenos">897</span><span class="w"> </span><span class="k">break</span><span class="p">;</span>
<span class="linenos">898</span><span class="w"> </span><span class="k">case</span><span class="w"> </span><span class="no">RECORD_BATCH_READER_SOURCE</span><span class="p">:</span>
<span class="linenos">899</span><span class="w"> </span><span class="n">PrintBlock</span><span class="p">(</span><span class="s">&quot;RecordBatchReaderSource Example&quot;</span><span class="p">);</span>
<span class="linenos">900</span><span class="w"> </span><span class="n">status</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">RecordBatchReaderSourceSinkExample</span><span class="p">();</span>
<span class="linenos">901</span><span class="w"> </span><span class="k">break</span><span class="p">;</span>
<span class="linenos">902</span><span class="w"> </span><span class="k">default</span><span class="o">:</span>
<span class="linenos">903</span><span class="w"> </span><span class="k">break</span><span class="p">;</span>
<span class="linenos">904</span><span class="w"> </span><span class="p">}</span>
<span class="linenos">905</span>
<span class="linenos">906</span><span class="w"> </span><span class="k">if</span><span class="w"> </span><span class="p">(</span><span class="n">status</span><span class="p">.</span><span class="n">ok</span><span class="p">())</span><span class="w"> </span><span class="p">{</span>
<span class="linenos">907</span><span class="w"> </span><span class="k">return</span><span class="w"> </span><span class="n">EXIT_SUCCESS</span><span class="p">;</span>
<span class="linenos">908</span><span class="w"> </span><span class="p">}</span><span class="w"> </span><span class="k">else</span><span class="w"> </span><span class="p">{</span>
<span class="linenos">909</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">cout</span><span class="w"> </span><span class="o">&lt;&lt;</span><span class="w"> </span><span class="s">&quot;Error occurred: &quot;</span><span class="w"> </span><span class="o">&lt;&lt;</span><span class="w"> </span><span class="n">status</span><span class="p">.</span><span class="n">message</span><span class="p">()</span><span class="w"> </span><span class="o">&lt;&lt;</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">endl</span><span class="p">;</span>
<span class="linenos">910</span><span class="w"> </span><span class="k">return</span><span class="w"> </span><span class="n">EXIT_FAILURE</span><span class="p">;</span>
<span class="linenos">911</span><span class="w"> </span><span class="p">}</span>
<span class="linenos">912</span><span class="p">}</span>
</pre></div>
</div>
</section>
</section>
</article>
<footer class="prev-next-footer">
<div class="prev-next-area">
<a class="left-prev"
href="overview.html"
title="previous page">
<i class="fa-solid fa-angle-left"></i>
<div class="prev-next-info">
<p class="prev-next-subtitle">previous</p>
<p class="prev-next-title">Acero Overview</p>
</div>
</a>
<a class="right-next"
href="substrait.html"
title="next page">
<div class="prev-next-info">
<p class="prev-next-subtitle">next</p>
<p class="prev-next-title">Using Acero with Substrait</p>
</div>
<i class="fa-solid fa-angle-right"></i>
</a>
</div>
</footer>
</div>
<div class="bd-sidebar-secondary bd-toc"><div class="sidebar-secondary-items sidebar-secondary__inner">
<div class="sidebar-secondary-item">
<div
id="pst-page-navigation-heading-2"
class="page-toc tocsection onthispage">
<i class="fa-solid fa-list"></i> On this page
</div>
<nav class="bd-toc-nav page-toc" aria-labelledby="pst-page-navigation-heading-2">
<ul class="visible nav section-nav flex-column">
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#using-acero">Using Acero</a></li>
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#creating-a-plan">Creating a Plan</a><ul class="visible nav section-nav flex-column">
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#using-substrait">Using Substrait</a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#programmatic-plan-creation">Programmatic Plan Creation</a></li>
</ul>
</li>
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#executing-a-plan">Executing a Plan</a><ul class="visible nav section-nav flex-column">
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#declarationtotable">DeclarationToTable</a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#declarationtoreader">DeclarationToReader</a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#declarationtostatus">DeclarationToStatus</a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#running-a-plan-directly">Running a Plan Directly</a></li>
</ul>
</li>
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#providing-input">Providing Input</a></li>
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#available-execnode-implementations">Available <code class="docutils literal notranslate"><span class="pre">ExecNode</span></code> Implementations</a><ul class="visible nav section-nav flex-column">
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#sources">Sources</a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#compute-nodes">Compute Nodes</a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#arrangement-nodes">Arrangement Nodes</a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#sink-nodes">Sink Nodes</a></li>
</ul>
</li>
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#examples">Examples</a><ul class="visible nav section-nav flex-column">
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#source"><code class="docutils literal notranslate"><span class="pre">source</span></code></a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#table-source"><code class="docutils literal notranslate"><span class="pre">table_source</span></code></a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#filter"><code class="docutils literal notranslate"><span class="pre">filter</span></code></a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#project"><code class="docutils literal notranslate"><span class="pre">project</span></code></a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#aggregate"><code class="docutils literal notranslate"><span class="pre">aggregate</span></code></a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#sink"><code class="docutils literal notranslate"><span class="pre">sink</span></code></a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#consuming-sink"><code class="docutils literal notranslate"><span class="pre">consuming_sink</span></code></a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#order-by-sink"><code class="docutils literal notranslate"><span class="pre">order_by_sink</span></code></a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#select-k-sink"><code class="docutils literal notranslate"><span class="pre">select_k_sink</span></code></a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#table-sink"><code class="docutils literal notranslate"><span class="pre">table_sink</span></code></a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#scan"><code class="docutils literal notranslate"><span class="pre">scan</span></code></a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#write"><code class="docutils literal notranslate"><span class="pre">write</span></code></a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#union"><code class="docutils literal notranslate"><span class="pre">union</span></code></a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#hash-join"><code class="docutils literal notranslate"><span class="pre">hash_join</span></code></a></li>
</ul>
</li>
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#summary">Summary</a></li>
</ul>
</nav></div>
<div class="sidebar-secondary-item">
<div class="tocsection editthispage">
<a href="https://github.com/apache/arrow/edit/main/docs/source/cpp/acero/user_guide.rst">
<i class="fa-solid fa-pencil"></i>
Edit on GitHub
</a>
</div>
</div>
</div></div>
</div>
<footer class="bd-footer-content">
</footer>
</main>
</div>
</div>
<!-- Scripts loaded after <body> so the DOM is not blocked -->
<script src="../../_static/scripts/bootstrap.js?digest=8d27b9dea8ad943066ae"></script>
<script src="../../_static/scripts/pydata-sphinx-theme.js?digest=8d27b9dea8ad943066ae"></script>
<footer class="bd-footer">
<div class="bd-footer__inner bd-page-width">
<div class="footer-items__start">
<div class="footer-item">
<p class="copyright">
© Copyright 2016-2024 Apache Software Foundation.
Apache Arrow, Arrow, Apache, the Apache feather logo, and the Apache Arrow project logo are either registered trademarks or trademarks of The Apache Software Foundation in the United States and other countries.
<br/>
</p>
</div>
<div class="footer-item">
<p class="sphinx-version">
Created using <a href="https://www.sphinx-doc.org/">Sphinx</a> 6.2.0.
<br/>
</p>
</div>
</div>
<div class="footer-items__end">
<div class="footer-item">
<p class="theme-version">
Built with the <a href="https://pydata-sphinx-theme.readthedocs.io/en/stable/index.html">PyData Sphinx Theme</a> 0.15.2.
</p></div>
</div>
</div>
</footer>
</body>
</html>