| |
| <!DOCTYPE html> |
| |
| <html lang="en"> |
| <head> |
| <meta charset="utf-8" /> |
| <meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="generator" content="Docutils 0.19: https://docutils.sourceforge.io/" /> |
| |
| <title>Acero: A C++ streaming execution engine — Apache Arrow v12.0.1</title> |
| |
| <link href="../_static/styles/theme.css?digest=1999514e3f237ded88cf" rel="stylesheet"> |
| <link href="../_static/styles/pydata-sphinx-theme.css?digest=1999514e3f237ded88cf" rel="stylesheet"> |
| |
| |
| <link rel="stylesheet" |
| href="../_static/vendor/fontawesome/5.13.0/css/all.min.css"> |
| <link rel="preload" as="font" type="font/woff2" crossorigin |
| href="../_static/vendor/fontawesome/5.13.0/webfonts/fa-solid-900.woff2"> |
| <link rel="preload" as="font" type="font/woff2" crossorigin |
| href="../_static/vendor/fontawesome/5.13.0/webfonts/fa-brands-400.woff2"> |
| |
| |
| |
| |
| |
| <link rel="stylesheet" type="text/css" href="../_static/pygments.css" /> |
| <link rel="stylesheet" type="text/css" href="../_static/styles/pydata-sphinx-theme.css" /> |
| <link rel="stylesheet" type="text/css" href="../_static/copybutton.css" /> |
| <link rel="stylesheet" type="text/css" href="../_static/design-style.1e8bd061cd6da7fc9cf755528e8ffc24.min.css" /> |
| <link rel="stylesheet" type="text/css" href="../_static/theme_overrides.css" /> |
| |
| <link rel="preload" as="script" href="../_static/scripts/pydata-sphinx-theme.js?digest=1999514e3f237ded88cf"> |
| |
| <script data-url_root="../" id="documentation_options" src="../_static/documentation_options.js"></script> |
| <script src="../_static/jquery.js"></script> |
| <script src="../_static/underscore.js"></script> |
| <script src="../_static/_sphinx_javascript_frameworks_compat.js"></script> |
| <script src="../_static/doctools.js"></script> |
| <script src="../_static/sphinx_highlight.js"></script> |
| <script src="../_static/clipboard.min.js"></script> |
| <script src="../_static/copybutton.js"></script> |
| <script src="../_static/design-tabs.js"></script> |
| <link rel="canonical" href="https://arrow.apache.org/docs/cpp/streaming_execution.html" /> |
| <link rel="shortcut icon" href="../_static/favicon.ico"/> |
| <link rel="index" title="Index" href="../genindex.html" /> |
| <link rel="search" title="Search" href="../search.html" /> |
| <link rel="next" title="Input / output and filesystems" href="io.html" /> |
| <link rel="prev" title="The Gandiva Expression Compiler" href="gandiva.html" /> |
| |
| <meta name="viewport" content="width=device-width, initial-scale=1" /> |
| <meta name="docsearch:language" content="en"> |
| |
| |
| <!-- Google Analytics --> |
| |
| |
| <!-- Matomo --> |
| <script> |
| var _paq = window._paq = window._paq || []; |
| /* tracker methods like "setCustomDimension" should be called before "trackPageView" */ |
| /* We explicitly disable cookie tracking to avoid privacy issues */ |
| _paq.push(['disableCookies']); |
| _paq.push(['trackPageView']); |
| _paq.push(['enableLinkTracking']); |
| (function() { |
| var u="https://analytics.apache.org/"; |
| _paq.push(['setTrackerUrl', u+'matomo.php']); |
| _paq.push(['setSiteId', '20']); |
| var d=document, g=d.createElement('script'), s=d.getElementsByTagName('script')[0]; |
| g.async=true; g.src=u+'matomo.js'; s.parentNode.insertBefore(g,s); |
| })(); |
| </script> |
| <!-- End Matomo Code --> |
| |
| </head> |
| <body data-spy="scroll" data-target="#bd-toc-nav" data-offset="80"> |
| |
| <div class="container-fluid" id="banner"></div> |
| |
| |
| |
| |
| <div class="container-xl"> |
| <div class="row"> |
| |
| |
| <!-- Only show if we have sidebars configured, else just a small margin --> |
| <div class="col-12 col-md-3 bd-sidebar"> |
| <div class="sidebar-start-items"> |
| <a class="navbar-brand" href="../index.html"> |
| <img src="../_static/arrow.png" class="logo" alt="logo"> |
| </a> |
| |
| <div id="version-search-wrapper"> |
| |
| <div id="version-button" class="dropdown"> |
| <button type="button" class="btn btn-secondary btn-sm navbar-btn dropdown-toggle" id="version_switcher_button" data-toggle="dropdown"> |
| 12.0.1 |
| <span class="caret"></span> |
| </button> |
| <div id="version_switcher" class="dropdown-menu list-group-flush py-0" aria-labelledby="version_switcher_button"> |
| <!-- dropdown will be populated by javascript on page load --> |
| </div> |
| </div> |
| |
| <script type="text/javascript"> |
| // Function to construct the target URL from the JSON components |
| function buildURL(entry) { |
| var template = "https://arrow.apache.org/docs/{version}"; // supplied by jinja |
| template = template.replace("{version}", entry.version); |
| return template; |
| } |
| |
| // Function to check if corresponding page path exists in other version of docs |
| // and, if so, go there instead of the homepage of the other docs version |
| function checkPageExistsAndRedirect(event) { |
| const currentFilePath = "cpp/streaming_execution.html", |
| otherDocsHomepage = event.target.getAttribute("href"); |
| let tryUrl = `${otherDocsHomepage}${currentFilePath}`; |
| $.ajax({ |
| type: 'HEAD', |
| url: tryUrl, |
| // if the page exists, go there |
| success: function() { |
| location.href = tryUrl; |
| } |
| }).fail(function() { |
| location.href = otherDocsHomepage; |
| }); |
| return false; |
| } |
| |
| // Function to populate the version switcher |
| (function () { |
| // get JSON config |
| $.getJSON("/docs/_static/versions.json", function(data, textStatus, jqXHR) { |
| // create the nodes first (before AJAX calls) to ensure the order is |
| // correct (for now, links will go to doc version homepage) |
| $.each(data, function(index, entry) { |
| // if no custom name specified (e.g., "latest"), use version string |
| if (!("name" in entry)) { |
| entry.name = entry.version; |
| } |
| // construct the appropriate URL, and add it to the dropdown |
| entry.url = buildURL(entry); |
| const node = document.createElement("a"); |
| node.setAttribute("class", "list-group-item list-group-item-action py-1"); |
| node.setAttribute("href", `${entry.url}`); |
| node.textContent = `${entry.name}`; |
| node.onclick = checkPageExistsAndRedirect; |
| $("#version_switcher").append(node); |
| }); |
| }); |
| })(); |
| </script> |
| |
| <form id="search-box" class="bd-search d-flex align-items-center" action="../search.html" method="get"> |
| <i class="icon fas fa-search"></i> |
| <input type="search" class="form-control" name="q" id="search-input" placeholder="Search the docs ..." aria-label="Search the docs ..." autocomplete="off" > |
| </form> |
| |
| </div> |
| |
| <nav class="bd-links" id="bd-docs-nav" aria-label="Main navigation"> |
| <div class="bd-toc-item active"> |
| |
| <p aria-level="2" class="caption" role="heading"> |
| <span class="caption-text"> |
| Supported Environments |
| </span> |
| </p> |
| <ul class="current nav bd-sidenav"> |
| <li class="toctree-l1"> |
| <a class="reference internal" href="../c_glib/index.html"> |
| C/GLib |
| </a> |
| </li> |
| <li class="toctree-l1 current active has-children"> |
| <a class="reference internal" href="index.html"> |
| C++ |
| </a> |
| <input checked="" class="toctree-checkbox" id="toctree-checkbox-1" name="toctree-checkbox-1" type="checkbox"/> |
| <label for="toctree-checkbox-1"> |
| <i class="fas fa-chevron-down"> |
| </i> |
| </label> |
| <ul class="current"> |
| <li class="toctree-l2 has-children"> |
| <a class="reference internal" href="getting_started.html"> |
| Getting Started |
| </a> |
| <input class="toctree-checkbox" id="toctree-checkbox-2" name="toctree-checkbox-2" type="checkbox"/> |
| <label for="toctree-checkbox-2"> |
| <i class="fas fa-chevron-down"> |
| </i> |
| </label> |
| <ul> |
| <li class="toctree-l3"> |
| <a class="reference internal" href="build_system.html"> |
| Using Arrow C++ in your own project |
| </a> |
| </li> |
| <li class="toctree-l3"> |
| <a class="reference internal" href="conventions.html"> |
| Conventions |
| </a> |
| </li> |
| <li class="toctree-l3"> |
| <a class="reference internal" href="tutorials/basic_arrow.html"> |
| Basic Arrow Data Structures |
| </a> |
| </li> |
| <li class="toctree-l3"> |
| <a class="reference internal" href="tutorials/io_tutorial.html"> |
| Arrow File I/O |
| </a> |
| </li> |
| <li class="toctree-l3"> |
| <a class="reference internal" href="tutorials/compute_tutorial.html"> |
| Arrow Compute |
| </a> |
| </li> |
| <li class="toctree-l3"> |
| <a class="reference internal" href="tutorials/datasets_tutorial.html"> |
| Arrow Datasets |
| </a> |
| </li> |
| </ul> |
| </li> |
| <li class="toctree-l2 current active has-children"> |
| <a class="reference internal" href="user_guide.html"> |
| User Guide |
| </a> |
| <input checked="" class="toctree-checkbox" id="toctree-checkbox-3" name="toctree-checkbox-3" type="checkbox"/> |
| <label for="toctree-checkbox-3"> |
| <i class="fas fa-chevron-down"> |
| </i> |
| </label> |
| <ul class="current"> |
| <li class="toctree-l3"> |
| <a class="reference internal" href="overview.html"> |
| High-Level Overview |
| </a> |
| </li> |
| <li class="toctree-l3"> |
| <a class="reference internal" href="memory.html"> |
| Memory Management |
| </a> |
| </li> |
| <li class="toctree-l3"> |
| <a class="reference internal" href="arrays.html"> |
| Arrays |
| </a> |
| </li> |
| <li class="toctree-l3"> |
| <a class="reference internal" href="datatypes.html"> |
| Data Types |
| </a> |
| </li> |
| <li class="toctree-l3"> |
| <a class="reference internal" href="tables.html"> |
| Tabular Data |
| </a> |
| </li> |
| <li class="toctree-l3"> |
| <a class="reference internal" href="compute.html"> |
| Compute Functions |
| </a> |
| </li> |
| <li class="toctree-l3"> |
| <a class="reference internal" href="gandiva.html"> |
| The Gandiva Expression Compiler |
| </a> |
| </li> |
| <li class="toctree-l3 current active"> |
| <a class="current reference internal" href="#"> |
| Acero: A C++ streaming execution engine |
| </a> |
| </li> |
| <li class="toctree-l3"> |
| <a class="reference internal" href="io.html"> |
| Input / output and filesystems |
| </a> |
| </li> |
| <li class="toctree-l3"> |
| <a class="reference internal" href="ipc.html"> |
| Reading and writing the Arrow IPC format |
| </a> |
| </li> |
| <li class="toctree-l3"> |
| <a class="reference internal" href="orc.html"> |
| Reading and Writing ORC files |
| </a> |
| </li> |
| <li class="toctree-l3"> |
| <a class="reference internal" href="parquet.html"> |
| Reading and writing Parquet files |
| </a> |
| </li> |
| <li class="toctree-l3"> |
| <a class="reference internal" href="csv.html"> |
| Reading and Writing CSV files |
| </a> |
| </li> |
| <li class="toctree-l3"> |
| <a class="reference internal" href="json.html"> |
| Reading JSON files |
| </a> |
| </li> |
| <li class="toctree-l3"> |
| <a class="reference internal" href="dataset.html"> |
| Tabular Datasets |
| </a> |
| </li> |
| <li class="toctree-l3"> |
| <a class="reference internal" href="flight.html"> |
| Arrow Flight RPC |
| </a> |
| </li> |
| <li class="toctree-l3"> |
| <a class="reference internal" href="gdb.html"> |
| Debugging code using Arrow |
| </a> |
| </li> |
| <li class="toctree-l3"> |
| <a class="reference internal" href="threading.html"> |
| Thread Management |
| </a> |
| </li> |
| <li class="toctree-l3"> |
| <a class="reference internal" href="opentelemetry.html"> |
| OpenTelemetry |
| </a> |
| </li> |
| <li class="toctree-l3"> |
| <a class="reference internal" href="env_vars.html"> |
| Environment Variables |
| </a> |
| </li> |
| </ul> |
| </li> |
| <li class="toctree-l2 has-children"> |
| <a class="reference internal" href="examples/index.html"> |
| Examples |
| </a> |
| <input class="toctree-checkbox" id="toctree-checkbox-4" name="toctree-checkbox-4" type="checkbox"/> |
| <label for="toctree-checkbox-4"> |
| <i class="fas fa-chevron-down"> |
| </i> |
| </label> |
| <ul> |
| <li class="toctree-l3"> |
| <a class="reference internal" href="examples/cmake_minimal_build.html"> |
| Minimal build using CMake |
| </a> |
| </li> |
| <li class="toctree-l3"> |
| <a class="reference internal" href="examples/compute_and_write_example.html"> |
| Compute and Write CSV Example |
| </a> |
| </li> |
| <li class="toctree-l3"> |
| <a class="reference internal" href="examples/dataset_documentation_example.html"> |
| Arrow Datasets example |
| </a> |
| </li> |
| <li class="toctree-l3"> |
| <a class="reference internal" href="examples/row_columnar_conversion.html"> |
| Row to columnar conversion |
| </a> |
| </li> |
| <li class="toctree-l3"> |
| <a class="reference internal" href="examples/tuple_range_conversion.html"> |
| std::tuple-like ranges to Arrow |
| </a> |
| </li> |
| </ul> |
| </li> |
| <li class="toctree-l2 has-children"> |
| <a class="reference internal" href="api.html"> |
| API Reference |
| </a> |
| <input class="toctree-checkbox" id="toctree-checkbox-5" name="toctree-checkbox-5" type="checkbox"/> |
| <label for="toctree-checkbox-5"> |
| <i class="fas fa-chevron-down"> |
| </i> |
| </label> |
| <ul> |
| <li class="toctree-l3"> |
| <a class="reference internal" href="api/support.html"> |
| Programming Support |
| </a> |
| </li> |
| <li class="toctree-l3"> |
| <a class="reference internal" href="api/memory.html"> |
| Memory (management) |
| </a> |
| </li> |
| <li class="toctree-l3"> |
| <a class="reference internal" href="api/thread.html"> |
| Thread (management) |
| </a> |
| </li> |
| <li class="toctree-l3"> |
| <a class="reference internal" href="api/datatype.html"> |
| Data Types |
| </a> |
| </li> |
| <li class="toctree-l3"> |
| <a class="reference internal" href="api/array.html"> |
| Arrays |
| </a> |
| </li> |
| <li class="toctree-l3"> |
| <a class="reference internal" href="api/scalar.html"> |
| Scalars |
| </a> |
| </li> |
| <li class="toctree-l3"> |
| <a class="reference internal" href="api/builder.html"> |
| Array Builders |
| </a> |
| </li> |
| <li class="toctree-l3"> |
| <a class="reference internal" href="api/table.html"> |
| Two-dimensional Datasets |
| </a> |
| </li> |
| <li class="toctree-l3"> |
| <a class="reference internal" href="api/c_abi.html"> |
| C Interfaces |
| </a> |
| </li> |
| <li class="toctree-l3"> |
| <a class="reference internal" href="api/compute.html"> |
| Compute Functions |
| </a> |
| </li> |
| <li class="toctree-l3"> |
| <a class="reference internal" href="api/gandiva.html"> |
| Gandiva Expression Compiler |
| </a> |
| </li> |
| <li class="toctree-l3"> |
| <a class="reference internal" href="api/tensor.html"> |
| Tensors |
| </a> |
| </li> |
| <li class="toctree-l3"> |
| <a class="reference internal" href="api/utilities.html"> |
| Utilities |
| </a> |
| </li> |
| <li class="toctree-l3"> |
| <a class="reference internal" href="api/async.html"> |
| Asynchronous programming |
| </a> |
| </li> |
| <li class="toctree-l3"> |
| <a class="reference internal" href="api/io.html"> |
| Input / output |
| </a> |
| </li> |
| <li class="toctree-l3"> |
| <a class="reference internal" href="api/ipc.html"> |
| Arrow IPC |
| </a> |
| </li> |
| <li class="toctree-l3"> |
| <a class="reference internal" href="api/formats.html"> |
| File Formats |
| </a> |
| </li> |
| <li class="toctree-l3"> |
| <a class="reference internal" href="api/cuda.html"> |
| CUDA support |
| </a> |
| </li> |
| <li class="toctree-l3"> |
| <a class="reference internal" href="api/flight.html"> |
| Arrow Flight RPC |
| </a> |
| </li> |
| <li class="toctree-l3"> |
| <a class="reference internal" href="api/flightsql.html"> |
| Arrow Flight SQL |
| </a> |
| </li> |
| <li class="toctree-l3"> |
| <a class="reference internal" href="api/filesystem.html"> |
| Filesystems |
| </a> |
| </li> |
| <li class="toctree-l3"> |
| <a class="reference internal" href="api/dataset.html"> |
| Dataset |
| </a> |
| </li> |
| </ul> |
| </li> |
| </ul> |
| </li> |
| <li class="toctree-l1"> |
| <a class="reference external" href="https://github.com/apache/arrow/blob/main/csharp/README.md"> |
| C# |
| </a> |
| </li> |
| <li class="toctree-l1"> |
| <a class="reference external" href="https://pkg.go.dev/github.com/apache/arrow/go"> |
| Go |
| </a> |
| </li> |
| <li class="toctree-l1 has-children"> |
| <a class="reference internal" href="../java/index.html"> |
| Java |
| </a> |
| <input class="toctree-checkbox" id="toctree-checkbox-6" name="toctree-checkbox-6" type="checkbox"/> |
| <label for="toctree-checkbox-6"> |
| <i class="fas fa-chevron-down"> |
| </i> |
| </label> |
| <ul> |
| <li class="toctree-l2"> |
| <a class="reference internal" href="../java/quickstartguide.html"> |
| Quick Start Guide |
| </a> |
| </li> |
| <li class="toctree-l2"> |
| <a class="reference internal" href="../java/overview.html"> |
| High-Level Overview |
| </a> |
| </li> |
| <li class="toctree-l2"> |
| <a class="reference internal" href="../java/install.html"> |
| Installing Java Modules |
| </a> |
| </li> |
| <li class="toctree-l2"> |
| <a class="reference internal" href="../java/memory.html"> |
| Memory Management |
| </a> |
| </li> |
| <li class="toctree-l2"> |
| <a class="reference internal" href="../java/vector.html"> |
| ValueVector |
| </a> |
| </li> |
| <li class="toctree-l2"> |
| <a class="reference internal" href="../java/vector_schema_root.html"> |
| Tabular Data |
| </a> |
| </li> |
| <li class="toctree-l2"> |
| <a class="reference internal" href="../java/table.html"> |
| Table |
| </a> |
| </li> |
| <li class="toctree-l2"> |
| <a class="reference internal" href="../java/ipc.html"> |
| Reading/Writing IPC formats |
| </a> |
| </li> |
| <li class="toctree-l2"> |
| <a class="reference internal" href="../java/algorithm.html"> |
| Java Algorithms |
| </a> |
| </li> |
| <li class="toctree-l2"> |
| <a class="reference internal" href="../java/flight.html"> |
| Arrow Flight RPC |
| </a> |
| </li> |
| <li class="toctree-l2"> |
| <a class="reference internal" href="../java/flight_sql.html"> |
| Arrow Flight SQL |
| </a> |
| </li> |
| <li class="toctree-l2"> |
| <a class="reference internal" href="../java/flight_sql_jdbc_driver.html"> |
| Arrow Flight SQL JDBC Driver |
| </a> |
| </li> |
| <li class="toctree-l2"> |
| <a class="reference internal" href="../java/dataset.html"> |
| Dataset |
| </a> |
| </li> |
| <li class="toctree-l2"> |
| <a class="reference internal" href="../java/cdata.html"> |
| C Data Interface |
| </a> |
| </li> |
| <li class="toctree-l2"> |
| <a class="reference internal" href="../java/jdbc.html"> |
| Arrow JDBC Adapter |
| </a> |
| </li> |
| <li class="toctree-l2"> |
| <a class="reference internal" href="../java/reference/index.html"> |
| Reference (javadoc) |
| </a> |
| </li> |
| </ul> |
| </li> |
| <li class="toctree-l1"> |
| <a class="reference internal" href="../js/index.html"> |
| JavaScript |
| </a> |
| </li> |
| <li class="toctree-l1"> |
| <a class="reference external" href="https://github.com/apache/arrow-julia/blob/main/README.md"> |
| Julia |
| </a> |
| </li> |
| <li class="toctree-l1"> |
| <a class="reference external" href="https://github.com/apache/arrow/blob/main/matlab/README.md"> |
| MATLAB |
| </a> |
| </li> |
| <li class="toctree-l1 has-children"> |
| <a class="reference internal" href="../python/index.html"> |
| Python |
| </a> |
| <input class="toctree-checkbox" id="toctree-checkbox-7" name="toctree-checkbox-7" type="checkbox"/> |
| <label for="toctree-checkbox-7"> |
| <i class="fas fa-chevron-down"> |
| </i> |
| </label> |
| <ul> |
| <li class="toctree-l2"> |
| <a class="reference internal" href="../python/install.html"> |
| Installing PyArrow |
| </a> |
| </li> |
| <li class="toctree-l2"> |
| <a class="reference internal" href="../python/getstarted.html"> |
| Getting Started |
| </a> |
| </li> |
| <li class="toctree-l2"> |
| <a class="reference internal" href="../python/data.html"> |
| Data Types and In-Memory Data Model |
| </a> |
| </li> |
| <li class="toctree-l2"> |
| <a class="reference internal" href="../python/compute.html"> |
| Compute Functions |
| </a> |
| </li> |
| <li class="toctree-l2"> |
| <a class="reference internal" href="../python/memory.html"> |
| Memory and IO Interfaces |
| </a> |
| </li> |
| <li class="toctree-l2"> |
| <a class="reference internal" href="../python/ipc.html"> |
| Streaming, Serialization, and IPC |
| </a> |
| </li> |
| <li class="toctree-l2"> |
| <a class="reference internal" href="../python/filesystems.html"> |
| Filesystem Interface |
| </a> |
| </li> |
| <li class="toctree-l2 has-children"> |
| <a class="reference internal" href="../python/filesystems_deprecated.html"> |
| Filesystem Interface (legacy) |
| </a> |
| <input class="toctree-checkbox" id="toctree-checkbox-8" name="toctree-checkbox-8" type="checkbox"/> |
| <label for="toctree-checkbox-8"> |
| <i class="fas fa-chevron-down"> |
| </i> |
| </label> |
| <ul> |
| <li class="toctree-l3"> |
| <a class="reference internal" href="../python/generated/pyarrow.hdfs.connect.html"> |
| pyarrow.hdfs.connect |
| </a> |
| </li> |
| <li class="toctree-l3"> |
| <a class="reference internal" href="../python/generated/pyarrow.HadoopFileSystem.cat.html"> |
| pyarrow.HadoopFileSystem.cat |
| </a> |
| </li> |
| <li class="toctree-l3"> |
| <a class="reference internal" href="../python/generated/pyarrow.HadoopFileSystem.chmod.html"> |
| pyarrow.HadoopFileSystem.chmod |
| </a> |
| </li> |
| <li class="toctree-l3"> |
| <a class="reference internal" href="../python/generated/pyarrow.HadoopFileSystem.chown.html"> |
| pyarrow.HadoopFileSystem.chown |
| </a> |
| </li> |
| <li class="toctree-l3"> |
| <a class="reference internal" href="../python/generated/pyarrow.HadoopFileSystem.delete.html"> |
| pyarrow.HadoopFileSystem.delete |
| </a> |
| </li> |
| <li class="toctree-l3"> |
| <a class="reference internal" href="../python/generated/pyarrow.HadoopFileSystem.df.html"> |
| pyarrow.HadoopFileSystem.df |
| </a> |
| </li> |
| <li class="toctree-l3"> |
| <a class="reference internal" href="../python/generated/pyarrow.HadoopFileSystem.disk_usage.html"> |
| pyarrow.HadoopFileSystem.disk_usage |
| </a> |
| </li> |
| <li class="toctree-l3"> |
| <a class="reference internal" href="../python/generated/pyarrow.HadoopFileSystem.download.html"> |
| pyarrow.HadoopFileSystem.download |
| </a> |
| </li> |
| <li class="toctree-l3"> |
| <a class="reference internal" href="../python/generated/pyarrow.HadoopFileSystem.exists.html"> |
| pyarrow.HadoopFileSystem.exists |
| </a> |
| </li> |
| <li class="toctree-l3"> |
| <a class="reference internal" href="../python/generated/pyarrow.HadoopFileSystem.get_capacity.html"> |
| pyarrow.HadoopFileSystem.get_capacity |
| </a> |
| </li> |
| <li class="toctree-l3"> |
| <a class="reference internal" href="../python/generated/pyarrow.HadoopFileSystem.get_space_used.html"> |
| pyarrow.HadoopFileSystem.get_space_used |
| </a> |
| </li> |
| <li class="toctree-l3"> |
| <a class="reference internal" href="../python/generated/pyarrow.HadoopFileSystem.info.html"> |
| pyarrow.HadoopFileSystem.info |
| </a> |
| </li> |
| <li class="toctree-l3"> |
| <a class="reference internal" href="../python/generated/pyarrow.HadoopFileSystem.ls.html"> |
| pyarrow.HadoopFileSystem.ls |
| </a> |
| </li> |
| <li class="toctree-l3"> |
| <a class="reference internal" href="../python/generated/pyarrow.HadoopFileSystem.mkdir.html"> |
| pyarrow.HadoopFileSystem.mkdir |
| </a> |
| </li> |
| <li class="toctree-l3"> |
| <a class="reference internal" href="../python/generated/pyarrow.HadoopFileSystem.open.html"> |
| pyarrow.HadoopFileSystem.open |
| </a> |
| </li> |
| <li class="toctree-l3"> |
| <a class="reference internal" href="../python/generated/pyarrow.HadoopFileSystem.rename.html"> |
| pyarrow.HadoopFileSystem.rename |
| </a> |
| </li> |
| <li class="toctree-l3"> |
| <a class="reference internal" href="../python/generated/pyarrow.HadoopFileSystem.rm.html"> |
| pyarrow.HadoopFileSystem.rm |
| </a> |
| </li> |
| <li class="toctree-l3"> |
| <a class="reference internal" href="../python/generated/pyarrow.HadoopFileSystem.upload.html"> |
| pyarrow.HadoopFileSystem.upload |
| </a> |
| </li> |
| <li class="toctree-l3"> |
| <a class="reference internal" href="../python/generated/pyarrow.HdfsFile.html"> |
| pyarrow.HdfsFile |
| </a> |
| </li> |
| </ul> |
| </li> |
| <li class="toctree-l2"> |
| <a class="reference internal" href="../python/numpy.html"> |
| NumPy Integration |
| </a> |
| </li> |
| <li class="toctree-l2"> |
| <a class="reference internal" href="../python/pandas.html"> |
| Pandas Integration |
| </a> |
| </li> |
| <li class="toctree-l2"> |
| <a class="reference internal" href="../python/timestamps.html"> |
| Timestamps |
| </a> |
| </li> |
| <li class="toctree-l2"> |
| <a class="reference internal" href="../python/orc.html"> |
| Reading and Writing the Apache ORC Format |
| </a> |
| </li> |
| <li class="toctree-l2"> |
| <a class="reference internal" href="../python/csv.html"> |
| Reading and Writing CSV files |
| </a> |
| </li> |
| <li class="toctree-l2"> |
| <a class="reference internal" href="../python/feather.html"> |
| Feather File Format |
| </a> |
| </li> |
| <li class="toctree-l2"> |
| <a class="reference internal" href="../python/json.html"> |
| Reading JSON files |
| </a> |
| </li> |
| <li class="toctree-l2"> |
| <a class="reference internal" href="../python/parquet.html"> |
| Reading and Writing the Apache Parquet Format |
| </a> |
| </li> |
| <li class="toctree-l2"> |
| <a class="reference internal" href="../python/dataset.html"> |
| Tabular Datasets |
| </a> |
| </li> |
| <li class="toctree-l2"> |
| <a class="reference internal" href="../python/flight.html"> |
| Arrow Flight RPC |
| </a> |
| </li> |
| <li class="toctree-l2"> |
| <a class="reference internal" href="../python/extending_types.html"> |
| Extending pyarrow |
| </a> |
| </li> |
| <li class="toctree-l2 has-children"> |
| <a class="reference internal" href="../python/integration.html"> |
| PyArrow Integrations |
| </a> |
| <input class="toctree-checkbox" id="toctree-checkbox-9" name="toctree-checkbox-9" type="checkbox"/> |
| <label for="toctree-checkbox-9"> |
| <i class="fas fa-chevron-down"> |
| </i> |
| </label> |
| <ul> |
| <li class="toctree-l3"> |
| <a class="reference internal" href="../python/integration/python_r.html"> |
| Integrating PyArrow with R |
| </a> |
| </li> |
| <li class="toctree-l3"> |
| <a class="reference internal" href="../python/integration/python_java.html"> |
| Integrating PyArrow with Java |
| </a> |
| </li> |
| <li class="toctree-l3"> |
| <a class="reference internal" href="../python/integration/extending.html"> |
| Using pyarrow from C++ and Cython Code |
| </a> |
| </li> |
| <li class="toctree-l3"> |
| <a class="reference internal" href="../python/integration/cuda.html"> |
| CUDA Integration |
| </a> |
| </li> |
| </ul> |
| </li> |
| <li class="toctree-l2"> |
| <a class="reference internal" href="../python/env_vars.html"> |
| Environment Variables |
| </a> |
| </li> |
| <li class="toctree-l2 has-children"> |
| <a class="reference internal" href="../python/api.html"> |
| API Reference |
| </a> |
| <input class="toctree-checkbox" id="toctree-checkbox-10" name="toctree-checkbox-10" type="checkbox"/> |
| <label for="toctree-checkbox-10"> |
| <i class="fas fa-chevron-down"> |
| </i> |
| </label> |
| <ul> |
| <li class="toctree-l3 has-children"> |
| <a class="reference internal" href="../python/api/datatypes.html"> |
| Data Types and Schemas |
| </a> |
| <input class="toctree-checkbox" id="toctree-checkbox-11" name="toctree-checkbox-11" type="checkbox"/> |
| <label for="toctree-checkbox-11"> |
| <i class="fas fa-chevron-down"> |
| </i> |
| </label> |
| <ul> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.null.html"> |
| pyarrow.null |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.bool_.html"> |
| pyarrow.bool_ |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.int8.html"> |
| pyarrow.int8 |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.int16.html"> |
| pyarrow.int16 |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.int32.html"> |
| pyarrow.int32 |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.int64.html"> |
| pyarrow.int64 |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.uint8.html"> |
| pyarrow.uint8 |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.uint16.html"> |
| pyarrow.uint16 |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.uint32.html"> |
| pyarrow.uint32 |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.uint64.html"> |
| pyarrow.uint64 |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.float16.html"> |
| pyarrow.float16 |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.float32.html"> |
| pyarrow.float32 |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.float64.html"> |
| pyarrow.float64 |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.time32.html"> |
| pyarrow.time32 |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.time64.html"> |
| pyarrow.time64 |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.timestamp.html"> |
| pyarrow.timestamp |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.date32.html"> |
| pyarrow.date32 |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.date64.html"> |
| pyarrow.date64 |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.duration.html"> |
| pyarrow.duration |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.month_day_nano_interval.html"> |
| pyarrow.month_day_nano_interval |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.binary.html"> |
| pyarrow.binary |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.string.html"> |
| pyarrow.string |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.utf8.html"> |
| pyarrow.utf8 |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.large_binary.html"> |
| pyarrow.large_binary |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.large_string.html"> |
| pyarrow.large_string |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.large_utf8.html"> |
| pyarrow.large_utf8 |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.decimal128.html"> |
| pyarrow.decimal128 |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.list_.html"> |
| pyarrow.list_ |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.large_list.html"> |
| pyarrow.large_list |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.map_.html"> |
| pyarrow.map_ |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.struct.html"> |
| pyarrow.struct |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.dictionary.html"> |
| pyarrow.dictionary |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.run_end_encoded.html"> |
| pyarrow.run_end_encoded |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.field.html"> |
| pyarrow.field |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.schema.html"> |
| pyarrow.schema |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.from_numpy_dtype.html"> |
| pyarrow.from_numpy_dtype |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.unify_schemas.html"> |
| pyarrow.unify_schemas |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.DataType.html"> |
| pyarrow.DataType |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.DictionaryType.html"> |
| pyarrow.DictionaryType |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.ListType.html"> |
| pyarrow.ListType |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.MapType.html"> |
| pyarrow.MapType |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.StructType.html"> |
| pyarrow.StructType |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.UnionType.html"> |
| pyarrow.UnionType |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.TimestampType.html"> |
| pyarrow.TimestampType |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.Time32Type.html"> |
| pyarrow.Time32Type |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.Time64Type.html"> |
| pyarrow.Time64Type |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.FixedSizeBinaryType.html"> |
| pyarrow.FixedSizeBinaryType |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.Decimal128Type.html"> |
| pyarrow.Decimal128Type |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.Field.html"> |
| pyarrow.Field |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.Schema.html"> |
| pyarrow.Schema |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.RunEndEncodedType.html"> |
| pyarrow.RunEndEncodedType |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.ExtensionType.html"> |
| pyarrow.ExtensionType |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.PyExtensionType.html"> |
| pyarrow.PyExtensionType |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.register_extension_type.html"> |
| pyarrow.register_extension_type |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.unregister_extension_type.html"> |
| pyarrow.unregister_extension_type |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.types.is_boolean.html"> |
| pyarrow.types.is_boolean |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.types.is_integer.html"> |
| pyarrow.types.is_integer |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.types.is_signed_integer.html"> |
| pyarrow.types.is_signed_integer |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.types.is_unsigned_integer.html"> |
| pyarrow.types.is_unsigned_integer |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.types.is_int8.html"> |
| pyarrow.types.is_int8 |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.types.is_int16.html"> |
| pyarrow.types.is_int16 |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.types.is_int32.html"> |
| pyarrow.types.is_int32 |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.types.is_int64.html"> |
| pyarrow.types.is_int64 |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.types.is_uint8.html"> |
| pyarrow.types.is_uint8 |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.types.is_uint16.html"> |
| pyarrow.types.is_uint16 |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.types.is_uint32.html"> |
| pyarrow.types.is_uint32 |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.types.is_uint64.html"> |
| pyarrow.types.is_uint64 |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.types.is_floating.html"> |
| pyarrow.types.is_floating |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.types.is_float16.html"> |
| pyarrow.types.is_float16 |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.types.is_float32.html"> |
| pyarrow.types.is_float32 |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.types.is_float64.html"> |
| pyarrow.types.is_float64 |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.types.is_decimal.html"> |
| pyarrow.types.is_decimal |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.types.is_list.html"> |
| pyarrow.types.is_list |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.types.is_large_list.html"> |
| pyarrow.types.is_large_list |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.types.is_struct.html"> |
| pyarrow.types.is_struct |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.types.is_union.html"> |
| pyarrow.types.is_union |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.types.is_nested.html"> |
| pyarrow.types.is_nested |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.types.is_run_end_encoded.html"> |
| pyarrow.types.is_run_end_encoded |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.types.is_temporal.html"> |
| pyarrow.types.is_temporal |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.types.is_timestamp.html"> |
| pyarrow.types.is_timestamp |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.types.is_date.html"> |
| pyarrow.types.is_date |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.types.is_date32.html"> |
| pyarrow.types.is_date32 |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.types.is_date64.html"> |
| pyarrow.types.is_date64 |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.types.is_time.html"> |
| pyarrow.types.is_time |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.types.is_time32.html"> |
| pyarrow.types.is_time32 |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.types.is_time64.html"> |
| pyarrow.types.is_time64 |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.types.is_null.html"> |
| pyarrow.types.is_null |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.types.is_binary.html"> |
| pyarrow.types.is_binary |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.types.is_unicode.html"> |
| pyarrow.types.is_unicode |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.types.is_string.html"> |
| pyarrow.types.is_string |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.types.is_large_binary.html"> |
| pyarrow.types.is_large_binary |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.types.is_large_unicode.html"> |
| pyarrow.types.is_large_unicode |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.types.is_large_string.html"> |
| pyarrow.types.is_large_string |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.types.is_fixed_size_binary.html"> |
| pyarrow.types.is_fixed_size_binary |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.types.is_map.html"> |
| pyarrow.types.is_map |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.types.is_dictionary.html"> |
| pyarrow.types.is_dictionary |
| </a> |
| </li> |
| </ul> |
| </li> |
| <li class="toctree-l3 has-children"> |
| <a class="reference internal" href="../python/api/arrays.html"> |
| Arrays and Scalars |
| </a> |
| <input class="toctree-checkbox" id="toctree-checkbox-12" name="toctree-checkbox-12" type="checkbox"/> |
| <label for="toctree-checkbox-12"> |
| <i class="fas fa-chevron-down"> |
| </i> |
| </label> |
| <ul> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.array.html"> |
| pyarrow.array |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.nulls.html"> |
| pyarrow.nulls |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.Array.html"> |
| pyarrow.Array |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.BooleanArray.html"> |
| pyarrow.BooleanArray |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.FloatingPointArray.html"> |
| pyarrow.FloatingPointArray |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.IntegerArray.html"> |
| pyarrow.IntegerArray |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.Int8Array.html"> |
| pyarrow.Int8Array |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.Int16Array.html"> |
| pyarrow.Int16Array |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.Int32Array.html"> |
| pyarrow.Int32Array |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.Int64Array.html"> |
| pyarrow.Int64Array |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.NullArray.html"> |
| pyarrow.NullArray |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.NumericArray.html"> |
| pyarrow.NumericArray |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.UInt8Array.html"> |
| pyarrow.UInt8Array |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.UInt16Array.html"> |
| pyarrow.UInt16Array |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.UInt32Array.html"> |
| pyarrow.UInt32Array |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.UInt64Array.html"> |
| pyarrow.UInt64Array |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.BinaryArray.html"> |
| pyarrow.BinaryArray |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.StringArray.html"> |
| pyarrow.StringArray |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.FixedSizeBinaryArray.html"> |
| pyarrow.FixedSizeBinaryArray |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.LargeBinaryArray.html"> |
| pyarrow.LargeBinaryArray |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.LargeStringArray.html"> |
| pyarrow.LargeStringArray |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.Time32Array.html"> |
| pyarrow.Time32Array |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.Time64Array.html"> |
| pyarrow.Time64Array |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.Date32Array.html"> |
| pyarrow.Date32Array |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.Date64Array.html"> |
| pyarrow.Date64Array |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.TimestampArray.html"> |
| pyarrow.TimestampArray |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.DurationArray.html"> |
| pyarrow.DurationArray |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.MonthDayNanoIntervalArray.html"> |
| pyarrow.MonthDayNanoIntervalArray |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.Decimal128Array.html"> |
| pyarrow.Decimal128Array |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.DictionaryArray.html"> |
| pyarrow.DictionaryArray |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.ListArray.html"> |
| pyarrow.ListArray |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.FixedSizeListArray.html"> |
| pyarrow.FixedSizeListArray |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.LargeListArray.html"> |
| pyarrow.LargeListArray |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.MapArray.html"> |
| pyarrow.MapArray |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.RunEndEncodedArray.html"> |
| pyarrow.RunEndEncodedArray |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.StructArray.html"> |
| pyarrow.StructArray |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.UnionArray.html"> |
| pyarrow.UnionArray |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.ExtensionArray.html"> |
| pyarrow.ExtensionArray |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.scalar.html"> |
| pyarrow.scalar |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.NA.html"> |
| pyarrow.NA |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.Scalar.html"> |
| pyarrow.Scalar |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.BooleanScalar.html"> |
| pyarrow.BooleanScalar |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.Int8Scalar.html"> |
| pyarrow.Int8Scalar |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.Int16Scalar.html"> |
| pyarrow.Int16Scalar |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.Int32Scalar.html"> |
| pyarrow.Int32Scalar |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.Int64Scalar.html"> |
| pyarrow.Int64Scalar |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.UInt8Scalar.html"> |
| pyarrow.UInt8Scalar |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.UInt16Scalar.html"> |
| pyarrow.UInt16Scalar |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.UInt32Scalar.html"> |
| pyarrow.UInt32Scalar |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.UInt64Scalar.html"> |
| pyarrow.UInt64Scalar |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.FloatScalar.html"> |
| pyarrow.FloatScalar |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.DoubleScalar.html"> |
| pyarrow.DoubleScalar |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.BinaryScalar.html"> |
| pyarrow.BinaryScalar |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.StringScalar.html"> |
| pyarrow.StringScalar |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.FixedSizeBinaryScalar.html"> |
| pyarrow.FixedSizeBinaryScalar |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.LargeBinaryScalar.html"> |
| pyarrow.LargeBinaryScalar |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.LargeStringScalar.html"> |
| pyarrow.LargeStringScalar |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.Time32Scalar.html"> |
| pyarrow.Time32Scalar |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.Time64Scalar.html"> |
| pyarrow.Time64Scalar |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.Date32Scalar.html"> |
| pyarrow.Date32Scalar |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.Date64Scalar.html"> |
| pyarrow.Date64Scalar |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.TimestampScalar.html"> |
| pyarrow.TimestampScalar |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.DurationScalar.html"> |
| pyarrow.DurationScalar |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.MonthDayNanoIntervalScalar.html"> |
| pyarrow.MonthDayNanoIntervalScalar |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.Decimal128Scalar.html"> |
| pyarrow.Decimal128Scalar |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.DictionaryScalar.html"> |
| pyarrow.DictionaryScalar |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.RunEndEncodedScalar.html"> |
| pyarrow.RunEndEncodedScalar |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.ListScalar.html"> |
| pyarrow.ListScalar |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.LargeListScalar.html"> |
| pyarrow.LargeListScalar |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.MapScalar.html"> |
| pyarrow.MapScalar |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.StructScalar.html"> |
| pyarrow.StructScalar |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.UnionScalar.html"> |
| pyarrow.UnionScalar |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.ExtensionScalar.html"> |
| pyarrow.ExtensionScalar |
| </a> |
| </li> |
| </ul> |
| </li> |
| <li class="toctree-l3 has-children"> |
| <a class="reference internal" href="../python/api/memory.html"> |
| Buffers and Memory |
| </a> |
| <input class="toctree-checkbox" id="toctree-checkbox-13" name="toctree-checkbox-13" type="checkbox"/> |
| <label for="toctree-checkbox-13"> |
| <i class="fas fa-chevron-down"> |
| </i> |
| </label> |
| <ul> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.allocate_buffer.html"> |
| pyarrow.allocate_buffer |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.py_buffer.html"> |
| pyarrow.py_buffer |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.foreign_buffer.html"> |
| pyarrow.foreign_buffer |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.Buffer.html"> |
| pyarrow.Buffer |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.ResizableBuffer.html"> |
| pyarrow.ResizableBuffer |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.Codec.html"> |
| pyarrow.Codec |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compress.html"> |
| pyarrow.compress |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.decompress.html"> |
| pyarrow.decompress |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.MemoryPool.html"> |
| pyarrow.MemoryPool |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.default_memory_pool.html"> |
| pyarrow.default_memory_pool |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.jemalloc_memory_pool.html"> |
| pyarrow.jemalloc_memory_pool |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.mimalloc_memory_pool.html"> |
| pyarrow.mimalloc_memory_pool |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.system_memory_pool.html"> |
| pyarrow.system_memory_pool |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.jemalloc_set_decay_ms.html"> |
| pyarrow.jemalloc_set_decay_ms |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.set_memory_pool.html"> |
| pyarrow.set_memory_pool |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.log_memory_allocations.html"> |
| pyarrow.log_memory_allocations |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.total_allocated_bytes.html"> |
| pyarrow.total_allocated_bytes |
| </a> |
| </li> |
| </ul> |
| </li> |
| <li class="toctree-l3 has-children"> |
| <a class="reference internal" href="../python/api/tables.html"> |
| Tables and Tensors |
| </a> |
| <input class="toctree-checkbox" id="toctree-checkbox-14" name="toctree-checkbox-14" type="checkbox"/> |
| <label for="toctree-checkbox-14"> |
| <i class="fas fa-chevron-down"> |
| </i> |
| </label> |
| <ul> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.chunked_array.html"> |
| pyarrow.chunked_array |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.concat_arrays.html"> |
| pyarrow.concat_arrays |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.concat_tables.html"> |
| pyarrow.concat_tables |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.record_batch.html"> |
| pyarrow.record_batch |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.table.html"> |
| pyarrow.table |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.ChunkedArray.html"> |
| pyarrow.ChunkedArray |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.RecordBatch.html"> |
| pyarrow.RecordBatch |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.Table.html"> |
| pyarrow.Table |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.TableGroupBy.html"> |
| pyarrow.TableGroupBy |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.RecordBatchReader.html"> |
| pyarrow.RecordBatchReader |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.Tensor.html"> |
| pyarrow.Tensor |
| </a> |
| </li> |
| </ul> |
| </li> |
| <li class="toctree-l3 has-children"> |
| <a class="reference internal" href="../python/api/compute.html"> |
| Compute Functions |
| </a> |
| <input class="toctree-checkbox" id="toctree-checkbox-15" name="toctree-checkbox-15" type="checkbox"/> |
| <label for="toctree-checkbox-15"> |
| <i class="fas fa-chevron-down"> |
| </i> |
| </label> |
| <ul> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.all.html"> |
| pyarrow.compute.all |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.any.html"> |
| pyarrow.compute.any |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.approximate_median.html"> |
| pyarrow.compute.approximate_median |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.count.html"> |
| pyarrow.compute.count |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.count_distinct.html"> |
| pyarrow.compute.count_distinct |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.index.html"> |
| pyarrow.compute.index |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.max.html"> |
| pyarrow.compute.max |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.mean.html"> |
| pyarrow.compute.mean |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.min.html"> |
| pyarrow.compute.min |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.min_max.html"> |
| pyarrow.compute.min_max |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.mode.html"> |
| pyarrow.compute.mode |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.product.html"> |
| pyarrow.compute.product |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.quantile.html"> |
| pyarrow.compute.quantile |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.stddev.html"> |
| pyarrow.compute.stddev |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.sum.html"> |
| pyarrow.compute.sum |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.tdigest.html"> |
| pyarrow.compute.tdigest |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.variance.html"> |
| pyarrow.compute.variance |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.cumulative_sum.html"> |
| pyarrow.compute.cumulative_sum |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.cumulative_sum_checked.html"> |
| pyarrow.compute.cumulative_sum_checked |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.abs.html"> |
| pyarrow.compute.abs |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.abs_checked.html"> |
| pyarrow.compute.abs_checked |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.add.html"> |
| pyarrow.compute.add |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.add_checked.html"> |
| pyarrow.compute.add_checked |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.divide.html"> |
| pyarrow.compute.divide |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.divide_checked.html"> |
| pyarrow.compute.divide_checked |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.multiply.html"> |
| pyarrow.compute.multiply |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.multiply_checked.html"> |
| pyarrow.compute.multiply_checked |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.negate.html"> |
| pyarrow.compute.negate |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.negate_checked.html"> |
| pyarrow.compute.negate_checked |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.power.html"> |
| pyarrow.compute.power |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.power_checked.html"> |
| pyarrow.compute.power_checked |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.sign.html"> |
| pyarrow.compute.sign |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.sqrt.html"> |
| pyarrow.compute.sqrt |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.sqrt_checked.html"> |
| pyarrow.compute.sqrt_checked |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.subtract.html"> |
| pyarrow.compute.subtract |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.subtract_checked.html"> |
| pyarrow.compute.subtract_checked |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.bit_wise_and.html"> |
| pyarrow.compute.bit_wise_and |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.bit_wise_not.html"> |
| pyarrow.compute.bit_wise_not |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.bit_wise_or.html"> |
| pyarrow.compute.bit_wise_or |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.bit_wise_xor.html"> |
| pyarrow.compute.bit_wise_xor |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.shift_left.html"> |
| pyarrow.compute.shift_left |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.shift_left_checked.html"> |
| pyarrow.compute.shift_left_checked |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.shift_right.html"> |
| pyarrow.compute.shift_right |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.shift_right_checked.html"> |
| pyarrow.compute.shift_right_checked |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.ceil.html"> |
| pyarrow.compute.ceil |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.floor.html"> |
| pyarrow.compute.floor |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.round.html"> |
| pyarrow.compute.round |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.round_to_multiple.html"> |
| pyarrow.compute.round_to_multiple |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.trunc.html"> |
| pyarrow.compute.trunc |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.ln.html"> |
| pyarrow.compute.ln |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.ln_checked.html"> |
| pyarrow.compute.ln_checked |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.log10.html"> |
| pyarrow.compute.log10 |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.log10_checked.html"> |
| pyarrow.compute.log10_checked |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.log1p.html"> |
| pyarrow.compute.log1p |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.log1p_checked.html"> |
| pyarrow.compute.log1p_checked |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.log2.html"> |
| pyarrow.compute.log2 |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.log2_checked.html"> |
| pyarrow.compute.log2_checked |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.logb.html"> |
| pyarrow.compute.logb |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.logb_checked.html"> |
| pyarrow.compute.logb_checked |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.acos.html"> |
| pyarrow.compute.acos |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.acos_checked.html"> |
| pyarrow.compute.acos_checked |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.asin.html"> |
| pyarrow.compute.asin |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.asin_checked.html"> |
| pyarrow.compute.asin_checked |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.atan.html"> |
| pyarrow.compute.atan |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.atan2.html"> |
| pyarrow.compute.atan2 |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.cos.html"> |
| pyarrow.compute.cos |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.cos_checked.html"> |
| pyarrow.compute.cos_checked |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.sin.html"> |
| pyarrow.compute.sin |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.sin_checked.html"> |
| pyarrow.compute.sin_checked |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.tan.html"> |
| pyarrow.compute.tan |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.tan_checked.html"> |
| pyarrow.compute.tan_checked |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.equal.html"> |
| pyarrow.compute.equal |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.greater.html"> |
| pyarrow.compute.greater |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.greater_equal.html"> |
| pyarrow.compute.greater_equal |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.less.html"> |
| pyarrow.compute.less |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.less_equal.html"> |
| pyarrow.compute.less_equal |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.not_equal.html"> |
| pyarrow.compute.not_equal |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.max_element_wise.html"> |
| pyarrow.compute.max_element_wise |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.min_element_wise.html"> |
| pyarrow.compute.min_element_wise |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.and_.html"> |
| pyarrow.compute.and_ |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.and_kleene.html"> |
| pyarrow.compute.and_kleene |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.and_not.html"> |
| pyarrow.compute.and_not |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.and_not_kleene.html"> |
| pyarrow.compute.and_not_kleene |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.invert.html"> |
| pyarrow.compute.invert |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.or_.html"> |
| pyarrow.compute.or_ |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.or_kleene.html"> |
| pyarrow.compute.or_kleene |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.xor.html"> |
| pyarrow.compute.xor |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.ascii_is_alnum.html"> |
| pyarrow.compute.ascii_is_alnum |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.ascii_is_alpha.html"> |
| pyarrow.compute.ascii_is_alpha |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.ascii_is_decimal.html"> |
| pyarrow.compute.ascii_is_decimal |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.ascii_is_lower.html"> |
| pyarrow.compute.ascii_is_lower |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.ascii_is_printable.html"> |
| pyarrow.compute.ascii_is_printable |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.ascii_is_space.html"> |
| pyarrow.compute.ascii_is_space |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.ascii_is_upper.html"> |
| pyarrow.compute.ascii_is_upper |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.utf8_is_alnum.html"> |
| pyarrow.compute.utf8_is_alnum |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.utf8_is_alpha.html"> |
| pyarrow.compute.utf8_is_alpha |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.utf8_is_decimal.html"> |
| pyarrow.compute.utf8_is_decimal |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.utf8_is_digit.html"> |
| pyarrow.compute.utf8_is_digit |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.utf8_is_lower.html"> |
| pyarrow.compute.utf8_is_lower |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.utf8_is_numeric.html"> |
| pyarrow.compute.utf8_is_numeric |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.utf8_is_printable.html"> |
| pyarrow.compute.utf8_is_printable |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.utf8_is_space.html"> |
| pyarrow.compute.utf8_is_space |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.utf8_is_upper.html"> |
| pyarrow.compute.utf8_is_upper |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.ascii_is_title.html"> |
| pyarrow.compute.ascii_is_title |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.utf8_is_title.html"> |
| pyarrow.compute.utf8_is_title |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.string_is_ascii.html"> |
| pyarrow.compute.string_is_ascii |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.ascii_capitalize.html"> |
| pyarrow.compute.ascii_capitalize |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.ascii_lower.html"> |
| pyarrow.compute.ascii_lower |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.ascii_reverse.html"> |
| pyarrow.compute.ascii_reverse |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.ascii_swapcase.html"> |
| pyarrow.compute.ascii_swapcase |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.ascii_title.html"> |
| pyarrow.compute.ascii_title |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.ascii_upper.html"> |
| pyarrow.compute.ascii_upper |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.binary_length.html"> |
| pyarrow.compute.binary_length |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.binary_repeat.html"> |
| pyarrow.compute.binary_repeat |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.binary_replace_slice.html"> |
| pyarrow.compute.binary_replace_slice |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.binary_reverse.html"> |
| pyarrow.compute.binary_reverse |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.replace_substring.html"> |
| pyarrow.compute.replace_substring |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.replace_substring_regex.html"> |
| pyarrow.compute.replace_substring_regex |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.utf8_capitalize.html"> |
| pyarrow.compute.utf8_capitalize |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.utf8_length.html"> |
| pyarrow.compute.utf8_length |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.utf8_lower.html"> |
| pyarrow.compute.utf8_lower |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.utf8_replace_slice.html"> |
| pyarrow.compute.utf8_replace_slice |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.utf8_reverse.html"> |
| pyarrow.compute.utf8_reverse |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.utf8_swapcase.html"> |
| pyarrow.compute.utf8_swapcase |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.utf8_title.html"> |
| pyarrow.compute.utf8_title |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.utf8_upper.html"> |
| pyarrow.compute.utf8_upper |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.ascii_center.html"> |
| pyarrow.compute.ascii_center |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.ascii_lpad.html"> |
| pyarrow.compute.ascii_lpad |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.ascii_rpad.html"> |
| pyarrow.compute.ascii_rpad |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.utf8_center.html"> |
| pyarrow.compute.utf8_center |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.utf8_lpad.html"> |
| pyarrow.compute.utf8_lpad |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.utf8_rpad.html"> |
| pyarrow.compute.utf8_rpad |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.ascii_ltrim.html"> |
| pyarrow.compute.ascii_ltrim |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.ascii_ltrim_whitespace.html"> |
| pyarrow.compute.ascii_ltrim_whitespace |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.ascii_rtrim.html"> |
| pyarrow.compute.ascii_rtrim |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.ascii_rtrim_whitespace.html"> |
| pyarrow.compute.ascii_rtrim_whitespace |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.ascii_trim.html"> |
| pyarrow.compute.ascii_trim |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.ascii_trim_whitespace.html"> |
| pyarrow.compute.ascii_trim_whitespace |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.utf8_ltrim.html"> |
| pyarrow.compute.utf8_ltrim |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.utf8_ltrim_whitespace.html"> |
| pyarrow.compute.utf8_ltrim_whitespace |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.utf8_rtrim.html"> |
| pyarrow.compute.utf8_rtrim |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.utf8_rtrim_whitespace.html"> |
| pyarrow.compute.utf8_rtrim_whitespace |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.utf8_trim.html"> |
| pyarrow.compute.utf8_trim |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.utf8_trim_whitespace.html"> |
| pyarrow.compute.utf8_trim_whitespace |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.ascii_split_whitespace.html"> |
| pyarrow.compute.ascii_split_whitespace |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.split_pattern.html"> |
| pyarrow.compute.split_pattern |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.split_pattern_regex.html"> |
| pyarrow.compute.split_pattern_regex |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.utf8_split_whitespace.html"> |
| pyarrow.compute.utf8_split_whitespace |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.extract_regex.html"> |
| pyarrow.compute.extract_regex |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.binary_join.html"> |
| pyarrow.compute.binary_join |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.binary_join_element_wise.html"> |
| pyarrow.compute.binary_join_element_wise |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.binary_slice.html"> |
| pyarrow.compute.binary_slice |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.utf8_slice_codeunits.html"> |
| pyarrow.compute.utf8_slice_codeunits |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.count_substring.html"> |
| pyarrow.compute.count_substring |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.count_substring_regex.html"> |
| pyarrow.compute.count_substring_regex |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.ends_with.html"> |
| pyarrow.compute.ends_with |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.find_substring.html"> |
| pyarrow.compute.find_substring |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.find_substring_regex.html"> |
| pyarrow.compute.find_substring_regex |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.index_in.html"> |
| pyarrow.compute.index_in |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.is_in.html"> |
| pyarrow.compute.is_in |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.match_like.html"> |
| pyarrow.compute.match_like |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.match_substring.html"> |
| pyarrow.compute.match_substring |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.match_substring_regex.html"> |
| pyarrow.compute.match_substring_regex |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.starts_with.html"> |
| pyarrow.compute.starts_with |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.indices_nonzero.html"> |
| pyarrow.compute.indices_nonzero |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.is_finite.html"> |
| pyarrow.compute.is_finite |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.is_inf.html"> |
| pyarrow.compute.is_inf |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.is_nan.html"> |
| pyarrow.compute.is_nan |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.is_null.html"> |
| pyarrow.compute.is_null |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.is_valid.html"> |
| pyarrow.compute.is_valid |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.true_unless_null.html"> |
| pyarrow.compute.true_unless_null |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.case_when.html"> |
| pyarrow.compute.case_when |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.choose.html"> |
| pyarrow.compute.choose |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.coalesce.html"> |
| pyarrow.compute.coalesce |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.if_else.html"> |
| pyarrow.compute.if_else |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.cast.html"> |
| pyarrow.compute.cast |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.ceil_temporal.html"> |
| pyarrow.compute.ceil_temporal |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.floor_temporal.html"> |
| pyarrow.compute.floor_temporal |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.round_temporal.html"> |
| pyarrow.compute.round_temporal |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.run_end_decode.html"> |
| pyarrow.compute.run_end_decode |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.run_end_encode.html"> |
| pyarrow.compute.run_end_encode |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.strftime.html"> |
| pyarrow.compute.strftime |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.strptime.html"> |
| pyarrow.compute.strptime |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.day.html"> |
| pyarrow.compute.day |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.day_of_week.html"> |
| pyarrow.compute.day_of_week |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.day_of_year.html"> |
| pyarrow.compute.day_of_year |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.hour.html"> |
| pyarrow.compute.hour |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.iso_week.html"> |
| pyarrow.compute.iso_week |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.iso_year.html"> |
| pyarrow.compute.iso_year |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.iso_calendar.html"> |
| pyarrow.compute.iso_calendar |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.is_leap_year.html"> |
| pyarrow.compute.is_leap_year |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.microsecond.html"> |
| pyarrow.compute.microsecond |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.millisecond.html"> |
| pyarrow.compute.millisecond |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.minute.html"> |
| pyarrow.compute.minute |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.month.html"> |
| pyarrow.compute.month |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.nanosecond.html"> |
| pyarrow.compute.nanosecond |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.quarter.html"> |
| pyarrow.compute.quarter |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.second.html"> |
| pyarrow.compute.second |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.subsecond.html"> |
| pyarrow.compute.subsecond |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.us_week.html"> |
| pyarrow.compute.us_week |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.us_year.html"> |
| pyarrow.compute.us_year |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.week.html"> |
| pyarrow.compute.week |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.year.html"> |
| pyarrow.compute.year |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.year_month_day.html"> |
| pyarrow.compute.year_month_day |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.day_time_interval_between.html"> |
| pyarrow.compute.day_time_interval_between |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.days_between.html"> |
| pyarrow.compute.days_between |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.hours_between.html"> |
| pyarrow.compute.hours_between |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.microseconds_between.html"> |
| pyarrow.compute.microseconds_between |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.milliseconds_between.html"> |
| pyarrow.compute.milliseconds_between |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.minutes_between.html"> |
| pyarrow.compute.minutes_between |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.month_day_nano_interval_between.html"> |
| pyarrow.compute.month_day_nano_interval_between |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.month_interval_between.html"> |
| pyarrow.compute.month_interval_between |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.nanoseconds_between.html"> |
| pyarrow.compute.nanoseconds_between |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.quarters_between.html"> |
| pyarrow.compute.quarters_between |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.seconds_between.html"> |
| pyarrow.compute.seconds_between |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.weeks_between.html"> |
| pyarrow.compute.weeks_between |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.years_between.html"> |
| pyarrow.compute.years_between |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.assume_timezone.html"> |
| pyarrow.compute.assume_timezone |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.dictionary_encode.html"> |
| pyarrow.compute.dictionary_encode |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.unique.html"> |
| pyarrow.compute.unique |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.value_counts.html"> |
| pyarrow.compute.value_counts |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.array_filter.html"> |
| pyarrow.compute.array_filter |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.array_take.html"> |
| pyarrow.compute.array_take |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.drop_null.html"> |
| pyarrow.compute.drop_null |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.filter.html"> |
| pyarrow.compute.filter |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.take.html"> |
| pyarrow.compute.take |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.array_sort_indices.html"> |
| pyarrow.compute.array_sort_indices |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.partition_nth_indices.html"> |
| pyarrow.compute.partition_nth_indices |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.select_k_unstable.html"> |
| pyarrow.compute.select_k_unstable |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.sort_indices.html"> |
| pyarrow.compute.sort_indices |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.fill_null.html"> |
| pyarrow.compute.fill_null |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.fill_null_backward.html"> |
| pyarrow.compute.fill_null_backward |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.fill_null_forward.html"> |
| pyarrow.compute.fill_null_forward |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.list_element.html"> |
| pyarrow.compute.list_element |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.list_flatten.html"> |
| pyarrow.compute.list_flatten |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.list_parent_indices.html"> |
| pyarrow.compute.list_parent_indices |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.list_slice.html"> |
| pyarrow.compute.list_slice |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.list_value_length.html"> |
| pyarrow.compute.list_value_length |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.make_struct.html"> |
| pyarrow.compute.make_struct |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.map_lookup.html"> |
| pyarrow.compute.map_lookup |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.replace_with_mask.html"> |
| pyarrow.compute.replace_with_mask |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.struct_field.html"> |
| pyarrow.compute.struct_field |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.ArraySortOptions.html"> |
| pyarrow.compute.ArraySortOptions |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.AssumeTimezoneOptions.html"> |
| pyarrow.compute.AssumeTimezoneOptions |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.CastOptions.html"> |
| pyarrow.compute.CastOptions |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.CountOptions.html"> |
| pyarrow.compute.CountOptions |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.CountOptions.html"> |
| pyarrow.compute.CountOptions |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.CumulativeSumOptions.html"> |
| pyarrow.compute.CumulativeSumOptions |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.DayOfWeekOptions.html"> |
| pyarrow.compute.DayOfWeekOptions |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.DictionaryEncodeOptions.html"> |
| pyarrow.compute.DictionaryEncodeOptions |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.ElementWiseAggregateOptions.html"> |
| pyarrow.compute.ElementWiseAggregateOptions |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.ExtractRegexOptions.html"> |
| pyarrow.compute.ExtractRegexOptions |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.FilterOptions.html"> |
| pyarrow.compute.FilterOptions |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.IndexOptions.html"> |
| pyarrow.compute.IndexOptions |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.JoinOptions.html"> |
| pyarrow.compute.JoinOptions |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.ListSliceOptions.html"> |
| pyarrow.compute.ListSliceOptions |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.MakeStructOptions.html"> |
| pyarrow.compute.MakeStructOptions |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.MapLookupOptions.html"> |
| pyarrow.compute.MapLookupOptions |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.MatchSubstringOptions.html"> |
| pyarrow.compute.MatchSubstringOptions |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.ModeOptions.html"> |
| pyarrow.compute.ModeOptions |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.NullOptions.html"> |
| pyarrow.compute.NullOptions |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.PadOptions.html"> |
| pyarrow.compute.PadOptions |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.PartitionNthOptions.html"> |
| pyarrow.compute.PartitionNthOptions |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.QuantileOptions.html"> |
| pyarrow.compute.QuantileOptions |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.ReplaceSliceOptions.html"> |
| pyarrow.compute.ReplaceSliceOptions |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.ReplaceSubstringOptions.html"> |
| pyarrow.compute.ReplaceSubstringOptions |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.RoundOptions.html"> |
| pyarrow.compute.RoundOptions |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.RoundTemporalOptions.html"> |
| pyarrow.compute.RoundTemporalOptions |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.RoundToMultipleOptions.html"> |
| pyarrow.compute.RoundToMultipleOptions |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.RunEndEncodeOptions.html"> |
| pyarrow.compute.RunEndEncodeOptions |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.ScalarAggregateOptions.html"> |
| pyarrow.compute.ScalarAggregateOptions |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.ScalarAggregateOptions.html"> |
| pyarrow.compute.ScalarAggregateOptions |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.SelectKOptions.html"> |
| pyarrow.compute.SelectKOptions |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.SetLookupOptions.html"> |
| pyarrow.compute.SetLookupOptions |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.SliceOptions.html"> |
| pyarrow.compute.SliceOptions |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.SortOptions.html"> |
| pyarrow.compute.SortOptions |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.SplitOptions.html"> |
| pyarrow.compute.SplitOptions |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.SplitPatternOptions.html"> |
| pyarrow.compute.SplitPatternOptions |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.StrftimeOptions.html"> |
| pyarrow.compute.StrftimeOptions |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.StrptimeOptions.html"> |
| pyarrow.compute.StrptimeOptions |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.StructFieldOptions.html"> |
| pyarrow.compute.StructFieldOptions |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.TakeOptions.html"> |
| pyarrow.compute.TakeOptions |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.TDigestOptions.html"> |
| pyarrow.compute.TDigestOptions |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.TDigestOptions.html"> |
| pyarrow.compute.TDigestOptions |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.TrimOptions.html"> |
| pyarrow.compute.TrimOptions |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.VarianceOptions.html"> |
| pyarrow.compute.VarianceOptions |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.WeekOptions.html"> |
| pyarrow.compute.WeekOptions |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.register_scalar_function.html"> |
| pyarrow.compute.register_scalar_function |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.compute.ScalarUdfContext.html"> |
| pyarrow.compute.ScalarUdfContext |
| </a> |
| </li> |
| </ul> |
| </li> |
| <li class="toctree-l3 has-children"> |
| <a class="reference internal" href="../python/api/acero.html"> |
| Acero - Streaming Execution Engine |
| </a> |
| <input class="toctree-checkbox" id="toctree-checkbox-16" name="toctree-checkbox-16" type="checkbox"/> |
| <label for="toctree-checkbox-16"> |
| <i class="fas fa-chevron-down"> |
| </i> |
| </label> |
| <ul> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.acero.Declaration.html"> |
| pyarrow.acero.Declaration |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.acero.ExecNodeOptions.html"> |
| pyarrow.acero.ExecNodeOptions |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.acero.TableSourceNodeOptions.html"> |
| pyarrow.acero.TableSourceNodeOptions |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.acero.ScanNodeOptions.html"> |
| pyarrow.acero.ScanNodeOptions |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.acero.FilterNodeOptions.html"> |
| pyarrow.acero.FilterNodeOptions |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.acero.ProjectNodeOptions.html"> |
| pyarrow.acero.ProjectNodeOptions |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.acero.AggregateNodeOptions.html"> |
| pyarrow.acero.AggregateNodeOptions |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.acero.OrderByNodeOptions.html"> |
| pyarrow.acero.OrderByNodeOptions |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.acero.HashJoinNodeOptions.html"> |
| pyarrow.acero.HashJoinNodeOptions |
| </a> |
| </li> |
| </ul> |
| </li> |
| <li class="toctree-l3 has-children"> |
| <a class="reference internal" href="../python/api/substrait.html"> |
| Substrait |
| </a> |
| <input class="toctree-checkbox" id="toctree-checkbox-17" name="toctree-checkbox-17" type="checkbox"/> |
| <label for="toctree-checkbox-17"> |
| <i class="fas fa-chevron-down"> |
| </i> |
| </label> |
| <ul> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.substrait.run_query.html"> |
| pyarrow.substrait.run_query |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.substrait.get_supported_functions.html"> |
| pyarrow.substrait.get_supported_functions |
| </a> |
| </li> |
| </ul> |
| </li> |
| <li class="toctree-l3 has-children"> |
| <a class="reference internal" href="../python/api/files.html"> |
| Streams and File Access |
| </a> |
| <input class="toctree-checkbox" id="toctree-checkbox-18" name="toctree-checkbox-18" type="checkbox"/> |
| <label for="toctree-checkbox-18"> |
| <i class="fas fa-chevron-down"> |
| </i> |
| </label> |
| <ul> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.input_stream.html"> |
| pyarrow.input_stream |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.output_stream.html"> |
| pyarrow.output_stream |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.memory_map.html"> |
| pyarrow.memory_map |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.create_memory_map.html"> |
| pyarrow.create_memory_map |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.NativeFile.html"> |
| pyarrow.NativeFile |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.OSFile.html"> |
| pyarrow.OSFile |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.PythonFile.html"> |
| pyarrow.PythonFile |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.BufferReader.html"> |
| pyarrow.BufferReader |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.BufferOutputStream.html"> |
| pyarrow.BufferOutputStream |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.FixedSizeBufferWriter.html"> |
| pyarrow.FixedSizeBufferWriter |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.MemoryMappedFile.html"> |
| pyarrow.MemoryMappedFile |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.CompressedInputStream.html"> |
| pyarrow.CompressedInputStream |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.CompressedOutputStream.html"> |
| pyarrow.CompressedOutputStream |
| </a> |
| </li> |
| </ul> |
| </li> |
| <li class="toctree-l3 has-children"> |
| <a class="reference internal" href="../python/api/ipc.html"> |
| Serialization and IPC |
| </a> |
| <input class="toctree-checkbox" id="toctree-checkbox-19" name="toctree-checkbox-19" type="checkbox"/> |
| <label for="toctree-checkbox-19"> |
| <i class="fas fa-chevron-down"> |
| </i> |
| </label> |
| <ul> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.ipc.new_file.html"> |
| pyarrow.ipc.new_file |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.ipc.open_file.html"> |
| pyarrow.ipc.open_file |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.ipc.new_stream.html"> |
| pyarrow.ipc.new_stream |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.ipc.open_stream.html"> |
| pyarrow.ipc.open_stream |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.ipc.read_message.html"> |
| pyarrow.ipc.read_message |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.ipc.read_record_batch.html"> |
| pyarrow.ipc.read_record_batch |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.ipc.get_record_batch_size.html"> |
| pyarrow.ipc.get_record_batch_size |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.ipc.read_tensor.html"> |
| pyarrow.ipc.read_tensor |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.ipc.write_tensor.html"> |
| pyarrow.ipc.write_tensor |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.ipc.get_tensor_size.html"> |
| pyarrow.ipc.get_tensor_size |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.ipc.IpcReadOptions.html"> |
| pyarrow.ipc.IpcReadOptions |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.ipc.IpcWriteOptions.html"> |
| pyarrow.ipc.IpcWriteOptions |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.ipc.Message.html"> |
| pyarrow.ipc.Message |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.ipc.MessageReader.html"> |
| pyarrow.ipc.MessageReader |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.ipc.RecordBatchFileReader.html"> |
| pyarrow.ipc.RecordBatchFileReader |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.ipc.RecordBatchFileWriter.html"> |
| pyarrow.ipc.RecordBatchFileWriter |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.ipc.RecordBatchStreamReader.html"> |
| pyarrow.ipc.RecordBatchStreamReader |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.ipc.RecordBatchStreamWriter.html"> |
| pyarrow.ipc.RecordBatchStreamWriter |
| </a> |
| </li> |
| </ul> |
| </li> |
| <li class="toctree-l3 has-children"> |
| <a class="reference internal" href="../python/api/flight.html"> |
| Arrow Flight |
| </a> |
| <input class="toctree-checkbox" id="toctree-checkbox-20" name="toctree-checkbox-20" type="checkbox"/> |
| <label for="toctree-checkbox-20"> |
| <i class="fas fa-chevron-down"> |
| </i> |
| </label> |
| <ul> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.flight.Action.html"> |
| pyarrow.flight.Action |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.flight.ActionType.html"> |
| pyarrow.flight.ActionType |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.flight.DescriptorType.html"> |
| pyarrow.flight.DescriptorType |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.flight.FlightDescriptor.html"> |
| pyarrow.flight.FlightDescriptor |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.flight.FlightEndpoint.html"> |
| pyarrow.flight.FlightEndpoint |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.flight.FlightInfo.html"> |
| pyarrow.flight.FlightInfo |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.flight.Location.html"> |
| pyarrow.flight.Location |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.flight.MetadataRecordBatchReader.html"> |
| pyarrow.flight.MetadataRecordBatchReader |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.flight.MetadataRecordBatchWriter.html"> |
| pyarrow.flight.MetadataRecordBatchWriter |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.flight.Ticket.html"> |
| pyarrow.flight.Ticket |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.flight.Result.html"> |
| pyarrow.flight.Result |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.flight.connect.html"> |
| pyarrow.flight.connect |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.flight.FlightCallOptions.html"> |
| pyarrow.flight.FlightCallOptions |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.flight.FlightClient.html"> |
| pyarrow.flight.FlightClient |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.flight.FlightStreamReader.html"> |
| pyarrow.flight.FlightStreamReader |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.flight.FlightStreamWriter.html"> |
| pyarrow.flight.FlightStreamWriter |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.flight.ClientMiddlewareFactory.html"> |
| pyarrow.flight.ClientMiddlewareFactory |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.flight.ClientMiddleware.html"> |
| pyarrow.flight.ClientMiddleware |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.flight.FlightDataStream.html"> |
| pyarrow.flight.FlightDataStream |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.flight.FlightMetadataWriter.html"> |
| pyarrow.flight.FlightMetadataWriter |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.flight.FlightServerBase.html"> |
| pyarrow.flight.FlightServerBase |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.flight.GeneratorStream.html"> |
| pyarrow.flight.GeneratorStream |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.flight.RecordBatchStream.html"> |
| pyarrow.flight.RecordBatchStream |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.flight.ServerCallContext.html"> |
| pyarrow.flight.ServerCallContext |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.flight.ServerMiddlewareFactory.html"> |
| pyarrow.flight.ServerMiddlewareFactory |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.flight.ServerMiddleware.html"> |
| pyarrow.flight.ServerMiddleware |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.flight.ClientAuthHandler.html"> |
| pyarrow.flight.ClientAuthHandler |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.flight.ServerAuthHandler.html"> |
| pyarrow.flight.ServerAuthHandler |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.flight.FlightError.html"> |
| pyarrow.flight.FlightError |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.flight.FlightCancelledError.html"> |
| pyarrow.flight.FlightCancelledError |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.flight.FlightInternalError.html"> |
| pyarrow.flight.FlightInternalError |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.flight.FlightServerError.html"> |
| pyarrow.flight.FlightServerError |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.flight.FlightTimedOutError.html"> |
| pyarrow.flight.FlightTimedOutError |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.flight.FlightUnauthenticatedError.html"> |
| pyarrow.flight.FlightUnauthenticatedError |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.flight.FlightUnauthorizedError.html"> |
| pyarrow.flight.FlightUnauthorizedError |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.flight.FlightUnavailableError.html"> |
| pyarrow.flight.FlightUnavailableError |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.flight.FlightWriteSizeExceededError.html"> |
| pyarrow.flight.FlightWriteSizeExceededError |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.flight.FlightMethod.html"> |
| pyarrow.flight.FlightMethod |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.flight.CallInfo.html"> |
| pyarrow.flight.CallInfo |
| </a> |
| </li> |
| </ul> |
| </li> |
| <li class="toctree-l3 has-children"> |
| <a class="reference internal" href="../python/api/formats.html"> |
| Tabular File Formats |
| </a> |
| <input class="toctree-checkbox" id="toctree-checkbox-21" name="toctree-checkbox-21" type="checkbox"/> |
| <label for="toctree-checkbox-21"> |
| <i class="fas fa-chevron-down"> |
| </i> |
| </label> |
| <ul> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.csv.ConvertOptions.html"> |
| pyarrow.csv.ConvertOptions |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.csv.CSVStreamingReader.html"> |
| pyarrow.csv.CSVStreamingReader |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.csv.CSVWriter.html"> |
| pyarrow.csv.CSVWriter |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.csv.ISO8601.html"> |
| pyarrow.csv.ISO8601 |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.csv.ParseOptions.html"> |
| pyarrow.csv.ParseOptions |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.csv.ReadOptions.html"> |
| pyarrow.csv.ReadOptions |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.csv.WriteOptions.html"> |
| pyarrow.csv.WriteOptions |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.csv.open_csv.html"> |
| pyarrow.csv.open_csv |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.csv.read_csv.html"> |
| pyarrow.csv.read_csv |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.csv.write_csv.html"> |
| pyarrow.csv.write_csv |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.csv.InvalidRow.html"> |
| pyarrow.csv.InvalidRow |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.feather.read_feather.html"> |
| pyarrow.feather.read_feather |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.feather.read_table.html"> |
| pyarrow.feather.read_table |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.feather.write_feather.html"> |
| pyarrow.feather.write_feather |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.json.ReadOptions.html"> |
| pyarrow.json.ReadOptions |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.json.ParseOptions.html"> |
| pyarrow.json.ParseOptions |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.json.read_json.html"> |
| pyarrow.json.read_json |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.parquet.ParquetDataset.html"> |
| pyarrow.parquet.ParquetDataset |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.parquet.ParquetFile.html"> |
| pyarrow.parquet.ParquetFile |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.parquet.ParquetWriter.html"> |
| pyarrow.parquet.ParquetWriter |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.parquet.read_table.html"> |
| pyarrow.parquet.read_table |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.parquet.read_metadata.html"> |
| pyarrow.parquet.read_metadata |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.parquet.read_pandas.html"> |
| pyarrow.parquet.read_pandas |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.parquet.read_schema.html"> |
| pyarrow.parquet.read_schema |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.parquet.write_metadata.html"> |
| pyarrow.parquet.write_metadata |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.parquet.write_table.html"> |
| pyarrow.parquet.write_table |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.parquet.write_to_dataset.html"> |
| pyarrow.parquet.write_to_dataset |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.parquet.FileMetaData.html"> |
| pyarrow.parquet.FileMetaData |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.parquet.RowGroupMetaData.html"> |
| pyarrow.parquet.RowGroupMetaData |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.parquet.ColumnChunkMetaData.html"> |
| pyarrow.parquet.ColumnChunkMetaData |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.parquet.Statistics.html"> |
| pyarrow.parquet.Statistics |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.parquet.ParquetSchema.html"> |
| pyarrow.parquet.ParquetSchema |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.parquet.ColumnSchema.html"> |
| pyarrow.parquet.ColumnSchema |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.parquet.ParquetLogicalType.html"> |
| pyarrow.parquet.ParquetLogicalType |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.parquet.encryption.CryptoFactory.html"> |
| pyarrow.parquet.encryption.CryptoFactory |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.parquet.encryption.KmsClient.html"> |
| pyarrow.parquet.encryption.KmsClient |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.parquet.encryption.KmsConnectionConfig.html"> |
| pyarrow.parquet.encryption.KmsConnectionConfig |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.parquet.encryption.EncryptionConfiguration.html"> |
| pyarrow.parquet.encryption.EncryptionConfiguration |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.parquet.encryption.DecryptionConfiguration.html"> |
| pyarrow.parquet.encryption.DecryptionConfiguration |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.orc.ORCFile.html"> |
| pyarrow.orc.ORCFile |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.orc.ORCWriter.html"> |
| pyarrow.orc.ORCWriter |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.orc.read_table.html"> |
| pyarrow.orc.read_table |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.orc.write_table.html"> |
| pyarrow.orc.write_table |
| </a> |
| </li> |
| </ul> |
| </li> |
| <li class="toctree-l3 has-children"> |
| <a class="reference internal" href="../python/api/filesystems.html"> |
| Filesystems |
| </a> |
| <input class="toctree-checkbox" id="toctree-checkbox-22" name="toctree-checkbox-22" type="checkbox"/> |
| <label for="toctree-checkbox-22"> |
| <i class="fas fa-chevron-down"> |
| </i> |
| </label> |
| <ul> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.fs.FileInfo.html"> |
| pyarrow.fs.FileInfo |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.fs.FileSelector.html"> |
| pyarrow.fs.FileSelector |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.fs.FileSystem.html"> |
| pyarrow.fs.FileSystem |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.fs.LocalFileSystem.html"> |
| pyarrow.fs.LocalFileSystem |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.fs.S3FileSystem.html"> |
| pyarrow.fs.S3FileSystem |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.fs.GcsFileSystem.html"> |
| pyarrow.fs.GcsFileSystem |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.fs.HadoopFileSystem.html"> |
| pyarrow.fs.HadoopFileSystem |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.fs.SubTreeFileSystem.html"> |
| pyarrow.fs.SubTreeFileSystem |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.fs.PyFileSystem.html"> |
| pyarrow.fs.PyFileSystem |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.fs.FileSystemHandler.html"> |
| pyarrow.fs.FileSystemHandler |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.fs.FSSpecHandler.html"> |
| pyarrow.fs.FSSpecHandler |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.fs.copy_files.html"> |
| pyarrow.fs.copy_files |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.fs.initialize_s3.html"> |
| pyarrow.fs.initialize_s3 |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.fs.finalize_s3.html"> |
| pyarrow.fs.finalize_s3 |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.fs.resolve_s3_region.html"> |
| pyarrow.fs.resolve_s3_region |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.fs.S3LogLevel.html"> |
| pyarrow.fs.S3LogLevel |
| </a> |
| </li> |
| </ul> |
| </li> |
| <li class="toctree-l3 has-children"> |
| <a class="reference internal" href="../python/api/dataset.html"> |
| Dataset |
| </a> |
| <input class="toctree-checkbox" id="toctree-checkbox-23" name="toctree-checkbox-23" type="checkbox"/> |
| <label for="toctree-checkbox-23"> |
| <i class="fas fa-chevron-down"> |
| </i> |
| </label> |
| <ul> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.dataset.dataset.html"> |
| pyarrow.dataset.dataset |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.dataset.parquet_dataset.html"> |
| pyarrow.dataset.parquet_dataset |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.dataset.partitioning.html"> |
| pyarrow.dataset.partitioning |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.dataset.field.html"> |
| pyarrow.dataset.field |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.dataset.scalar.html"> |
| pyarrow.dataset.scalar |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.dataset.write_dataset.html"> |
| pyarrow.dataset.write_dataset |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.dataset.FileFormat.html"> |
| pyarrow.dataset.FileFormat |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.dataset.CsvFileFormat.html"> |
| pyarrow.dataset.CsvFileFormat |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.dataset.CsvFragmentScanOptions.html"> |
| pyarrow.dataset.CsvFragmentScanOptions |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.dataset.IpcFileFormat.html"> |
| pyarrow.dataset.IpcFileFormat |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.dataset.ParquetFileFormat.html"> |
| pyarrow.dataset.ParquetFileFormat |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.dataset.ParquetReadOptions.html"> |
| pyarrow.dataset.ParquetReadOptions |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.dataset.ParquetFragmentScanOptions.html"> |
| pyarrow.dataset.ParquetFragmentScanOptions |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.dataset.OrcFileFormat.html"> |
| pyarrow.dataset.OrcFileFormat |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.dataset.Partitioning.html"> |
| pyarrow.dataset.Partitioning |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.dataset.PartitioningFactory.html"> |
| pyarrow.dataset.PartitioningFactory |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.dataset.DirectoryPartitioning.html"> |
| pyarrow.dataset.DirectoryPartitioning |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.dataset.HivePartitioning.html"> |
| pyarrow.dataset.HivePartitioning |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.dataset.FilenamePartitioning.html"> |
| pyarrow.dataset.FilenamePartitioning |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.dataset.Dataset.html"> |
| pyarrow.dataset.Dataset |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.dataset.FileSystemDataset.html"> |
| pyarrow.dataset.FileSystemDataset |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.dataset.FileSystemFactoryOptions.html"> |
| pyarrow.dataset.FileSystemFactoryOptions |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.dataset.FileSystemDatasetFactory.html"> |
| pyarrow.dataset.FileSystemDatasetFactory |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.dataset.UnionDataset.html"> |
| pyarrow.dataset.UnionDataset |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.dataset.Fragment.html"> |
| pyarrow.dataset.Fragment |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.dataset.FragmentScanOptions.html"> |
| pyarrow.dataset.FragmentScanOptions |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.dataset.TaggedRecordBatch.html"> |
| pyarrow.dataset.TaggedRecordBatch |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.dataset.Scanner.html"> |
| pyarrow.dataset.Scanner |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.dataset.Expression.html"> |
| pyarrow.dataset.Expression |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.dataset.InMemoryDataset.html"> |
| pyarrow.dataset.InMemoryDataset |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.dataset.WrittenFile.html"> |
| pyarrow.dataset.WrittenFile |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.dataset.get_partition_keys.html"> |
| pyarrow.dataset.get_partition_keys |
| </a> |
| </li> |
| </ul> |
| </li> |
| <li class="toctree-l3 has-children"> |
| <a class="reference internal" href="../python/api/cuda.html"> |
| CUDA Integration |
| </a> |
| <input class="toctree-checkbox" id="toctree-checkbox-24" name="toctree-checkbox-24" type="checkbox"/> |
| <label for="toctree-checkbox-24"> |
| <i class="fas fa-chevron-down"> |
| </i> |
| </label> |
| <ul> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.cuda.Context.html"> |
| pyarrow.cuda.Context |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.cuda.CudaBuffer.html"> |
| pyarrow.cuda.CudaBuffer |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.cuda.new_host_buffer.html"> |
| pyarrow.cuda.new_host_buffer |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.cuda.HostBuffer.html"> |
| pyarrow.cuda.HostBuffer |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.cuda.BufferReader.html"> |
| pyarrow.cuda.BufferReader |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.cuda.BufferWriter.html"> |
| pyarrow.cuda.BufferWriter |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.cuda.serialize_record_batch.html"> |
| pyarrow.cuda.serialize_record_batch |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.cuda.read_record_batch.html"> |
| pyarrow.cuda.read_record_batch |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.cuda.read_message.html"> |
| pyarrow.cuda.read_message |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.cuda.IpcMemHandle.html"> |
| pyarrow.cuda.IpcMemHandle |
| </a> |
| </li> |
| </ul> |
| </li> |
| <li class="toctree-l3 has-children"> |
| <a class="reference internal" href="../python/api/misc.html"> |
| Miscellaneous |
| </a> |
| <input class="toctree-checkbox" id="toctree-checkbox-25" name="toctree-checkbox-25" type="checkbox"/> |
| <label for="toctree-checkbox-25"> |
| <i class="fas fa-chevron-down"> |
| </i> |
| </label> |
| <ul> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.cpu_count.html"> |
| pyarrow.cpu_count |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.set_cpu_count.html"> |
| pyarrow.set_cpu_count |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.io_thread_count.html"> |
| pyarrow.io_thread_count |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.set_io_thread_count.html"> |
| pyarrow.set_io_thread_count |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.get_include.html"> |
| pyarrow.get_include |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.get_libraries.html"> |
| pyarrow.get_libraries |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../python/generated/pyarrow.get_library_dirs.html"> |
| pyarrow.get_library_dirs |
| </a> |
| </li> |
| </ul> |
| </li> |
| </ul> |
| </li> |
| <li class="toctree-l2"> |
| <a class="reference internal" href="../python/getting_involved.html"> |
| Getting Involved |
| </a> |
| </li> |
| <li class="toctree-l2"> |
| <a class="reference internal" href="../python/benchmarks.html"> |
| Benchmarks |
| </a> |
| </li> |
| </ul> |
| </li> |
| <li class="toctree-l1"> |
| <a class="reference internal" href="../r/index.html"> |
| R |
| </a> |
| </li> |
| <li class="toctree-l1"> |
| <a class="reference external" href="https://github.com/apache/arrow/blob/main/ruby/README.md"> |
| Ruby |
| </a> |
| </li> |
| <li class="toctree-l1"> |
| <a class="reference external" href="https://docs.rs/crate/arrow/"> |
| Rust |
| </a> |
| </li> |
| <li class="toctree-l1"> |
| <a class="reference internal" href="../status.html"> |
| Implementation Status |
| </a> |
| </li> |
| </ul> |
| <p aria-level="2" class="caption" role="heading"> |
| <span class="caption-text"> |
| Cookbooks |
| </span> |
| </p> |
| <ul class="nav bd-sidenav"> |
| <li class="toctree-l1"> |
| <a class="reference external" href="https://arrow.apache.org/cookbook/cpp/"> |
| C++ |
| </a> |
| </li> |
| <li class="toctree-l1"> |
| <a class="reference external" href="https://arrow.apache.org/cookbook/java/"> |
| Java |
| </a> |
| </li> |
| <li class="toctree-l1"> |
| <a class="reference external" href="https://arrow.apache.org/cookbook/py/"> |
| Python |
| </a> |
| </li> |
| <li class="toctree-l1"> |
| <a class="reference external" href="https://arrow.apache.org/cookbook/r/"> |
| R |
| </a> |
| </li> |
| </ul> |
| <p aria-level="2" class="caption" role="heading"> |
| <span class="caption-text"> |
| Specifications and Protocols |
| </span> |
| </p> |
| <ul class="nav bd-sidenav"> |
| <li class="toctree-l1"> |
| <a class="reference internal" href="../format/Versioning.html"> |
| Format Versioning and Stability |
| </a> |
| </li> |
| <li class="toctree-l1"> |
| <a class="reference internal" href="../format/Columnar.html"> |
| Arrow Columnar Format |
| </a> |
| </li> |
| <li class="toctree-l1"> |
| <a class="reference internal" href="../format/CanonicalExtensions.html"> |
| Canonical Extension Types |
| </a> |
| </li> |
| <li class="toctree-l1"> |
| <a class="reference internal" href="../format/Flight.html"> |
| Arrow Flight RPC |
| </a> |
| </li> |
| <li class="toctree-l1"> |
| <a class="reference internal" href="../format/FlightSql.html"> |
| Arrow Flight SQL |
| </a> |
| </li> |
| <li class="toctree-l1"> |
| <a class="reference internal" href="../format/Integration.html"> |
| Integration Testing |
| </a> |
| </li> |
| <li class="toctree-l1"> |
| <a class="reference internal" href="../format/CDataInterface.html"> |
| The Arrow C data interface |
| </a> |
| </li> |
| <li class="toctree-l1"> |
| <a class="reference internal" href="../format/CStreamInterface.html"> |
| The Arrow C stream interface |
| </a> |
| </li> |
| <li class="toctree-l1 has-children"> |
| <a class="reference internal" href="../format/ADBC.html"> |
| ADBC: Arrow Database Connectivity |
| </a> |
| <input class="toctree-checkbox" id="toctree-checkbox-26" name="toctree-checkbox-26" type="checkbox"/> |
| <label for="toctree-checkbox-26"> |
| <i class="fas fa-chevron-down"> |
| </i> |
| </label> |
| <ul> |
| <li class="toctree-l2"> |
| <a class="reference internal" href="../format/ADBC/C.html"> |
| ADBC C API Specification |
| </a> |
| </li> |
| <li class="toctree-l2"> |
| <a class="reference internal" href="../format/ADBC/Go.html"> |
| ADBC Go API Specification |
| </a> |
| </li> |
| <li class="toctree-l2"> |
| <a class="reference internal" href="../format/ADBC/Java.html"> |
| ADBC Java API Specification |
| </a> |
| </li> |
| </ul> |
| </li> |
| <li class="toctree-l1"> |
| <a class="reference internal" href="../format/Other.html"> |
| Other Data Structures |
| </a> |
| </li> |
| <li class="toctree-l1"> |
| <a class="reference internal" href="../format/Glossary.html"> |
| Glossary |
| </a> |
| </li> |
| </ul> |
| <p aria-level="2" class="caption" role="heading"> |
| <span class="caption-text"> |
| Development |
| </span> |
| </p> |
| <ul class="nav bd-sidenav"> |
| <li class="toctree-l1"> |
| <a class="reference internal" href="../developers/contributing.html"> |
| Contributing to Apache Arrow |
| </a> |
| </li> |
| <li class="toctree-l1"> |
| <a class="reference internal" href="../developers/bug_reports.html"> |
| Bug reports and feature requests |
| </a> |
| </li> |
| <li class="toctree-l1 has-children"> |
| <a class="reference internal" href="../developers/guide/index.html"> |
| New Contributor’s Guide |
| </a> |
| <input class="toctree-checkbox" id="toctree-checkbox-27" name="toctree-checkbox-27" type="checkbox"/> |
| <label for="toctree-checkbox-27"> |
| <i class="fas fa-chevron-down"> |
| </i> |
| </label> |
| <ul> |
| <li class="toctree-l2"> |
| <a class="reference internal" href="../developers/guide/architectural_overview.html"> |
| Architectural Overview |
| </a> |
| </li> |
| <li class="toctree-l2"> |
| <a class="reference internal" href="../developers/guide/communication.html"> |
| Communication |
| </a> |
| </li> |
| <li class="toctree-l2 has-children"> |
| <a class="reference internal" href="../developers/guide/step_by_step/index.html"> |
| Steps in making your first PR |
| </a> |
| <input class="toctree-checkbox" id="toctree-checkbox-28" name="toctree-checkbox-28" type="checkbox"/> |
| <label for="toctree-checkbox-28"> |
| <i class="fas fa-chevron-down"> |
| </i> |
| </label> |
| <ul> |
| <li class="toctree-l3"> |
| <a class="reference internal" href="../developers/guide/step_by_step/set_up.html"> |
| Set up |
| </a> |
| </li> |
| <li class="toctree-l3"> |
| <a class="reference internal" href="../developers/guide/step_by_step/building.html"> |
| Building the Arrow libraries 🏋🏿♀️ |
| </a> |
| </li> |
| <li class="toctree-l3"> |
| <a class="reference internal" href="../developers/guide/step_by_step/finding_issues.html"> |
| Finding good first issues 🔎 |
| </a> |
| </li> |
| <li class="toctree-l3"> |
| <a class="reference internal" href="../developers/guide/step_by_step/arrow_codebase.html"> |
| Working on the Arrow codebase 🧐 |
| </a> |
| </li> |
| <li class="toctree-l3"> |
| <a class="reference internal" href="../developers/guide/step_by_step/testing.html"> |
| Testing 🧪 |
| </a> |
| </li> |
| <li class="toctree-l3"> |
| <a class="reference internal" href="../developers/guide/step_by_step/styling.html"> |
| Styling 😎 |
| </a> |
| </li> |
| <li class="toctree-l3"> |
| <a class="reference internal" href="../developers/guide/step_by_step/pr_lifecycle.html"> |
| Lifecycle of a pull request |
| </a> |
| </li> |
| </ul> |
| </li> |
| <li class="toctree-l2"> |
| <a class="reference internal" href="../developers/guide/documentation.html"> |
| Helping with documentation |
| </a> |
| </li> |
| <li class="toctree-l2 has-children"> |
| <a class="reference internal" href="../developers/guide/tutorials/index.html"> |
| Tutorials |
| </a> |
| <input class="toctree-checkbox" id="toctree-checkbox-29" name="toctree-checkbox-29" type="checkbox"/> |
| <label for="toctree-checkbox-29"> |
| <i class="fas fa-chevron-down"> |
| </i> |
| </label> |
| <ul> |
| <li class="toctree-l3"> |
| <a class="reference internal" href="../developers/guide/tutorials/python_tutorial.html"> |
| Python tutorial |
| </a> |
| </li> |
| <li class="toctree-l3"> |
| <a class="reference internal" href="../developers/guide/tutorials/r_tutorial.html"> |
| R tutorials |
| </a> |
| </li> |
| </ul> |
| </li> |
| <li class="toctree-l2"> |
| <a class="reference internal" href="../developers/guide/resources.html"> |
| Additional information and resources |
| </a> |
| </li> |
| </ul> |
| </li> |
| <li class="toctree-l1"> |
| <a class="reference internal" href="../developers/overview.html"> |
| Contributing Overview |
| </a> |
| </li> |
| <li class="toctree-l1"> |
| <a class="reference internal" href="../developers/reviewing.html"> |
| Reviewing contributions |
| </a> |
| </li> |
| <li class="toctree-l1 has-children"> |
| <a class="reference internal" href="../developers/cpp/index.html"> |
| C++ Development |
| </a> |
| <input class="toctree-checkbox" id="toctree-checkbox-30" name="toctree-checkbox-30" type="checkbox"/> |
| <label for="toctree-checkbox-30"> |
| <i class="fas fa-chevron-down"> |
| </i> |
| </label> |
| <ul> |
| <li class="toctree-l2"> |
| <a class="reference internal" href="../developers/cpp/building.html"> |
| Building Arrow C++ |
| </a> |
| </li> |
| <li class="toctree-l2"> |
| <a class="reference internal" href="../developers/cpp/development.html"> |
| Development Guidelines |
| </a> |
| </li> |
| <li class="toctree-l2"> |
| <a class="reference internal" href="../developers/cpp/windows.html"> |
| Developing on Windows |
| </a> |
| </li> |
| <li class="toctree-l2"> |
| <a class="reference internal" href="../developers/cpp/conventions.html"> |
| Conventions |
| </a> |
| </li> |
| <li class="toctree-l2"> |
| <a class="reference internal" href="../developers/cpp/fuzzing.html"> |
| Fuzzing Arrow C++ |
| </a> |
| </li> |
| </ul> |
| </li> |
| <li class="toctree-l1 has-children"> |
| <a class="reference internal" href="../developers/java/index.html"> |
| Java Development |
| </a> |
| <input class="toctree-checkbox" id="toctree-checkbox-31" name="toctree-checkbox-31" type="checkbox"/> |
| <label for="toctree-checkbox-31"> |
| <i class="fas fa-chevron-down"> |
| </i> |
| </label> |
| <ul> |
| <li class="toctree-l2"> |
| <a class="reference internal" href="../developers/java/building.html"> |
| Building Arrow Java |
| </a> |
| </li> |
| <li class="toctree-l2"> |
| <a class="reference internal" href="../developers/java/development.html"> |
| Development Guidelines |
| </a> |
| </li> |
| </ul> |
| </li> |
| <li class="toctree-l1"> |
| <a class="reference internal" href="../developers/python.html"> |
| Python Development |
| </a> |
| </li> |
| <li class="toctree-l1 has-children"> |
| <a class="reference internal" href="../developers/continuous_integration/index.html"> |
| Continuous Integration |
| </a> |
| <input class="toctree-checkbox" id="toctree-checkbox-32" name="toctree-checkbox-32" type="checkbox"/> |
| <label for="toctree-checkbox-32"> |
| <i class="fas fa-chevron-down"> |
| </i> |
| </label> |
| <ul> |
| <li class="toctree-l2"> |
| <a class="reference internal" href="../developers/continuous_integration/overview.html"> |
| Continuous Integration |
| </a> |
| </li> |
| <li class="toctree-l2"> |
| <a class="reference internal" href="../developers/continuous_integration/docker.html"> |
| Running Docker Builds |
| </a> |
| </li> |
| <li class="toctree-l2"> |
| <a class="reference internal" href="../developers/continuous_integration/archery.html"> |
| Daily Development using Archery |
| </a> |
| </li> |
| <li class="toctree-l2"> |
| <a class="reference internal" href="../developers/continuous_integration/crossbow.html"> |
| Packaging and Testing with Crossbow |
| </a> |
| </li> |
| </ul> |
| </li> |
| <li class="toctree-l1"> |
| <a class="reference internal" href="../developers/benchmarks.html"> |
| Benchmarks |
| </a> |
| </li> |
| <li class="toctree-l1"> |
| <a class="reference internal" href="../developers/documentation.html"> |
| Building the Documentation |
| </a> |
| </li> |
| <li class="toctree-l1"> |
| <a class="reference internal" href="../developers/release.html"> |
| Release Management Guide |
| </a> |
| </li> |
| </ul> |
| |
| |
| </div> |
| </nav> |
| </div> |
| <div class="sidebar-end-items"> |
| </div> |
| </div> |
| |
| |
| |
| |
| <div class="d-none d-xl-block col-xl-2 bd-toc"> |
| |
| |
| <div class="toc-item"> |
| |
| <div class="tocsection onthispage pt-5 pb-3"> |
| <i class="fas fa-list"></i> On this page |
| </div> |
| |
| <nav id="bd-toc-nav"> |
| <ul class="visible nav section-nav flex-column"> |
| <li class="toc-h2 nav-item toc-entry"> |
| <a class="reference internal nav-link" href="#motivation"> |
| Motivation |
| </a> |
| </li> |
| <li class="toc-h2 nav-item toc-entry"> |
| <a class="reference internal nav-link" href="#substrait"> |
| Substrait |
| </a> |
| <ul class="visible nav section-nav flex-column"> |
| <li class="toc-h3 nav-item toc-entry"> |
| <a class="reference internal nav-link" href="#substrait-conformance"> |
| Substrait Conformance |
| </a> |
| <ul class="nav section-nav flex-column"> |
| <li class="toc-h4 nav-item toc-entry"> |
| <a class="reference internal nav-link" href="#plans"> |
| Plans |
| </a> |
| </li> |
| <li class="toc-h4 nav-item toc-entry"> |
| <a class="reference internal nav-link" href="#extensions"> |
| Extensions |
| </a> |
| </li> |
| <li class="toc-h4 nav-item toc-entry"> |
| <a class="reference internal nav-link" href="#relations-in-general"> |
| Relations (in general) |
| </a> |
| </li> |
| <li class="toc-h4 nav-item toc-entry"> |
| <a class="reference internal nav-link" href="#read-relations"> |
| Read Relations |
| </a> |
| </li> |
| <li class="toc-h4 nav-item toc-entry"> |
| <a class="reference internal nav-link" href="#filter-relations"> |
| Filter Relations |
| </a> |
| </li> |
| <li class="toc-h4 nav-item toc-entry"> |
| <a class="reference internal nav-link" href="#project-relations"> |
| Project Relations |
| </a> |
| </li> |
| <li class="toc-h4 nav-item toc-entry"> |
| <a class="reference internal nav-link" href="#join-relations"> |
| Join Relations |
| </a> |
| </li> |
| <li class="toc-h4 nav-item toc-entry"> |
| <a class="reference internal nav-link" href="#aggregate-relations"> |
| Aggregate Relations |
| </a> |
| </li> |
| <li class="toc-h4 nav-item toc-entry"> |
| <a class="reference internal nav-link" href="#expressions-general"> |
| Expressions (general) |
| </a> |
| </li> |
| <li class="toc-h4 nav-item toc-entry"> |
| <a class="reference internal nav-link" href="#literals"> |
| Literals |
| </a> |
| </li> |
| <li class="toc-h4 nav-item toc-entry"> |
| <a class="reference internal nav-link" href="#types"> |
| Types |
| </a> |
| </li> |
| <li class="toc-h4 nav-item toc-entry"> |
| <a class="reference internal nav-link" href="#functions"> |
| Functions |
| </a> |
| </li> |
| </ul> |
| </li> |
| </ul> |
| </li> |
| <li class="toc-h2 nav-item toc-entry"> |
| <a class="reference internal nav-link" href="#architecture-overview"> |
| Architecture Overview |
| </a> |
| </li> |
| <li class="toc-h2 nav-item toc-entry"> |
| <a class="reference internal nav-link" href="#constructing-execplan-objects"> |
| Constructing |
| <code class="docutils literal notranslate"> |
| <span class="pre"> |
| ExecPlan |
| </span> |
| </code> |
| objects |
| </a> |
| </li> |
| <li class="toc-h2 nav-item toc-entry"> |
| <a class="reference internal nav-link" href="#constructing-execnode-using-options"> |
| Constructing |
| <code class="docutils literal notranslate"> |
| <span class="pre"> |
| ExecNode |
| </span> |
| </code> |
| using Options |
| </a> |
| <ul class="visible nav section-nav flex-column"> |
| <li class="toc-h3 nav-item toc-entry"> |
| <a class="reference internal nav-link" href="#source"> |
| <code class="docutils literal notranslate"> |
| <span class="pre"> |
| source |
| </span> |
| </code> |
| </a> |
| </li> |
| <li class="toc-h3 nav-item toc-entry"> |
| <a class="reference internal nav-link" href="#table-source"> |
| <code class="docutils literal notranslate"> |
| <span class="pre"> |
| table_source |
| </span> |
| </code> |
| </a> |
| </li> |
| <li class="toc-h3 nav-item toc-entry"> |
| <a class="reference internal nav-link" href="#filter"> |
| <code class="docutils literal notranslate"> |
| <span class="pre"> |
| filter |
| </span> |
| </code> |
| </a> |
| </li> |
| <li class="toc-h3 nav-item toc-entry"> |
| <a class="reference internal nav-link" href="#project"> |
| <code class="docutils literal notranslate"> |
| <span class="pre"> |
| project |
| </span> |
| </code> |
| </a> |
| </li> |
| <li class="toc-h3 nav-item toc-entry"> |
| <a class="reference internal nav-link" href="#aggregate"> |
| <code class="docutils literal notranslate"> |
| <span class="pre"> |
| aggregate |
| </span> |
| </code> |
| </a> |
| </li> |
| <li class="toc-h3 nav-item toc-entry"> |
| <a class="reference internal nav-link" href="#sink"> |
| <code class="docutils literal notranslate"> |
| <span class="pre"> |
| sink |
| </span> |
| </code> |
| </a> |
| </li> |
| <li class="toc-h3 nav-item toc-entry"> |
| <a class="reference internal nav-link" href="#consuming-sink"> |
| <code class="docutils literal notranslate"> |
| <span class="pre"> |
| consuming_sink |
| </span> |
| </code> |
| </a> |
| </li> |
| <li class="toc-h3 nav-item toc-entry"> |
| <a class="reference internal nav-link" href="#order-by-sink"> |
| <code class="docutils literal notranslate"> |
| <span class="pre"> |
| order_by_sink |
| </span> |
| </code> |
| </a> |
| </li> |
| <li class="toc-h3 nav-item toc-entry"> |
| <a class="reference internal nav-link" href="#select-k-sink"> |
| <code class="docutils literal notranslate"> |
| <span class="pre"> |
| select_k_sink |
| </span> |
| </code> |
| </a> |
| </li> |
| <li class="toc-h3 nav-item toc-entry"> |
| <a class="reference internal nav-link" href="#table-sink"> |
| <code class="docutils literal notranslate"> |
| <span class="pre"> |
| table_sink |
| </span> |
| </code> |
| </a> |
| </li> |
| <li class="toc-h3 nav-item toc-entry"> |
| <a class="reference internal nav-link" href="#scan"> |
| <code class="docutils literal notranslate"> |
| <span class="pre"> |
| scan |
| </span> |
| </code> |
| </a> |
| </li> |
| <li class="toc-h3 nav-item toc-entry"> |
| <a class="reference internal nav-link" href="#write"> |
| <code class="docutils literal notranslate"> |
| <span class="pre"> |
| write |
| </span> |
| </code> |
| </a> |
| </li> |
| <li class="toc-h3 nav-item toc-entry"> |
| <a class="reference internal nav-link" href="#union"> |
| <code class="docutils literal notranslate"> |
| <span class="pre"> |
| union |
| </span> |
| </code> |
| </a> |
| </li> |
| <li class="toc-h3 nav-item toc-entry"> |
| <a class="reference internal nav-link" href="#hash-join"> |
| <code class="docutils literal notranslate"> |
| <span class="pre"> |
| hash_join |
| </span> |
| </code> |
| </a> |
| </li> |
| </ul> |
| </li> |
| <li class="toc-h2 nav-item toc-entry"> |
| <a class="reference internal nav-link" href="#summary"> |
| Summary |
| </a> |
| </li> |
| </ul> |
| |
| </nav> |
| </div> |
| |
| <div class="toc-item"> |
| |
| |
| <div class="tocsection editthispage"> |
| <a href="https://github.com/apache/arrow/edit/main/docs/source/cpp/streaming_execution.rst"> |
| <i class="fas fa-pencil-alt"></i> Edit this page |
| </a> |
| </div> |
| |
| </div> |
| |
| |
| </div> |
| |
| |
| |
| |
| |
| |
| <main class="col-12 col-md-9 col-xl-7 py-md-5 pl-md-5 pr-md-4 bd-content" role="main"> |
| |
| <div> |
| |
| <section id="acero-a-c-streaming-execution-engine"> |
| <h1>Acero: A C++ streaming execution engine<a class="headerlink" href="#acero-a-c-streaming-execution-engine" title="Permalink to this heading">¶</a></h1> |
| <div class="admonition warning"> |
| <p class="admonition-title">Warning</p> |
| <p>Acero is experimental and a stable API is not yet guaranteed.</p> |
| </div> |
| <section id="motivation"> |
| <h2>Motivation<a class="headerlink" href="#motivation" title="Permalink to this heading">¶</a></h2> |
| <p>For many complex computations, successive direct <a class="reference internal" href="compute.html#invoking-compute-functions"><span class="std std-ref">invocation of |
| compute functions</span></a> is not feasible |
| in either memory or computation time. Doing so causes all intermediate |
| data to be fully materialized. To facilitate arbitrarily large inputs |
| and more efficient resource usage, the Arrow C++ implementation also |
| provides Acero, a streaming query engine with which computations can |
| be formulated and executed.</p> |
| <img alt="An example graph of a streaming execution workflow." src="../_images/simple_graph.svg" /><p>Acero allows computation to be expressed as an “execution plan” |
| (<code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">ExecPlan</span></code>) which is a directed graph of operators. Each operator |
| (<code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">ExecNode</span></code>) provides, transforms, or consumes the data passing |
| through it. Batches of data (<a class="reference internal" href="api/compute.html#_CPPv4N5arrow7compute9ExecBatchE" title="arrow::compute::ExecBatch"><code class="xref cpp cpp-struct docutils literal notranslate"><span class="pre">ExecBatch</span></code></a>) flow along edges of |
| the graph from node to node. Structuring the API around streams of batches |
| allows the working set for each node to be tuned for optimal performance |
| independent of any other nodes in the graph. Each <code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">ExecNode</span></code> |
| processes batches as they are pushed to it along an edge of the graph by |
| upstream nodes (its inputs), and pushes batches along an edge of the graph |
| to downstream nodes (its outputs) as they are finalized.</p> |
| <div class="admonition seealso"> |
| <p class="admonition-title">See also</p> |
| <p><a class="reference external" href="https://doi.org/10.1017/s0956796818000102">SHAIKHHA, A., DASHTI, M., & KOCH, C. |
| (2018). Push versus pull-based loop fusion in query engines. |
| Journal of Functional Programming, 28.</a></p> |
| </div> |
| </section> |
| <section id="substrait"> |
| <h2>Substrait<a class="headerlink" href="#substrait" title="Permalink to this heading">¶</a></h2> |
| <p>In order to use Acero you will need to create an execution plan. This is the |
| model that describes the computation you want to apply to your data. Acero has |
| its own internal representation for execution plans but most users should not |
| interact with this directly as it will couple their code to Acero.</p> |
| <p><a class="reference external" href="https://substrait.io">Substrait</a> is an open standard for execution plans. |
| Acero implements the Substrait “consumer” interface. This means that Acero can |
| accept a Substrait plan and fulfill the plan, loading the requested data and |
| applying the desired computation. By using Substrait plans users can easily |
| switch out to a different execution engine at a later time.</p> |
| <section id="substrait-conformance"> |
| <h3>Substrait Conformance<a class="headerlink" href="#substrait-conformance" title="Permalink to this heading">¶</a></h3> |
| <p>Substrait defines a broad set of operators and functions for many different |
| situations and it is unlikely that Acero will ever completely satisfy all |
| defined Substrait operators and functions. To help understand what features |
| are available the following sections define which features have been currently |
| implemented in Acero and any caveats that apply.</p> |
| <section id="plans"> |
| <h4>Plans<a class="headerlink" href="#plans" title="Permalink to this heading">¶</a></h4> |
| <blockquote> |
| <div><ul class="simple"> |
| <li><p>A plan should have a single top-level relation.</p></li> |
| <li><p>The consumer is currently based on version 0.20.0 of Substrait. |
| Any features added that are newer will not be supported.</p></li> |
| <li><p>Due to a breaking change in 0.20.0 any Substrait plan older than 0.20.0 |
| will be rejected.</p></li> |
| </ul> |
| </div></blockquote> |
| </section> |
| <section id="extensions"> |
| <h4>Extensions<a class="headerlink" href="#extensions" title="Permalink to this heading">¶</a></h4> |
| <blockquote> |
| <div><ul class="simple"> |
| <li><p>If a plan contains any extension type variations it will be rejected.</p></li> |
| <li><p>Advanced extensions can be provided by supplying a custom implementation of |
| <code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">arrow::engine::ExtensionProvider</span></code>.</p></li> |
| </ul> |
| </div></blockquote> |
| </section> |
| <section id="relations-in-general"> |
| <h4>Relations (in general)<a class="headerlink" href="#relations-in-general" title="Permalink to this heading">¶</a></h4> |
| <blockquote> |
| <div><ul class="simple"> |
| <li><p>Any relation not explicitly listed below will not be supported |
| and will cause the plan to be rejected.</p></li> |
| </ul> |
| </div></blockquote> |
| </section> |
| <section id="read-relations"> |
| <h4>Read Relations<a class="headerlink" href="#read-relations" title="Permalink to this heading">¶</a></h4> |
| <blockquote> |
| <div><ul class="simple"> |
| <li><p>The <code class="docutils literal notranslate"><span class="pre">projection</span></code> property is not supported and plans containing this |
| property will be rejected.</p></li> |
| <li><p>The <code class="docutils literal notranslate"><span class="pre">VirtualTable</span></code> and <a href="#id2"><span class="problematic" id="id3">``</span></a>ExtensionTable``read types are not supported. |
| Plans containing these types will be rejected.</p></li> |
| <li><p>Only the parquet and arrow file formats are currently supported.</p></li> |
| <li><p>All URIs must use the <code class="docutils literal notranslate"><span class="pre">file</span></code> scheme</p></li> |
| <li><p><code class="docutils literal notranslate"><span class="pre">partition_index</span></code>, <code class="docutils literal notranslate"><span class="pre">start</span></code>, and <code class="docutils literal notranslate"><span class="pre">length</span></code> are not supported. Plans containing |
| non-default values for these properties will be rejected.</p></li> |
| <li><p>The Substrait spec requires that a <code class="docutils literal notranslate"><span class="pre">filter</span></code> be completely satisfied by a read |
| relation. However, Acero only uses a read filter for pushdown projection and |
| it may not be fully satisfied. Users should generally attach an additional |
| filter relation with the same filter expression after the read relation.</p></li> |
| </ul> |
| </div></blockquote> |
| </section> |
| <section id="filter-relations"> |
| <h4>Filter Relations<a class="headerlink" href="#filter-relations" title="Permalink to this heading">¶</a></h4> |
| <blockquote> |
| <div><ul class="simple"> |
| <li><p>No known caveats</p></li> |
| </ul> |
| </div></blockquote> |
| </section> |
| <section id="project-relations"> |
| <h4>Project Relations<a class="headerlink" href="#project-relations" title="Permalink to this heading">¶</a></h4> |
| <blockquote> |
| <div><ul class="simple"> |
| <li><p>No known caveats</p></li> |
| </ul> |
| </div></blockquote> |
| </section> |
| <section id="join-relations"> |
| <h4>Join Relations<a class="headerlink" href="#join-relations" title="Permalink to this heading">¶</a></h4> |
| <blockquote> |
| <div><ul class="simple"> |
| <li><p>The join type <code class="docutils literal notranslate"><span class="pre">JOIN_TYPE_SINGLE</span></code> is not supported and plans containing this |
| will be rejected.</p></li> |
| <li><p>The join expression must be a call to either the <code class="docutils literal notranslate"><span class="pre">equal</span></code> or <code class="docutils literal notranslate"><span class="pre">is_not_distinct_from</span></code> |
| functions. Both arguments to the call must be direct references. Only a single |
| join key is supported.</p></li> |
| <li><p>The <code class="docutils literal notranslate"><span class="pre">post_join_filter</span></code> property is not supported and will be ignored.</p></li> |
| </ul> |
| </div></blockquote> |
| </section> |
| <section id="aggregate-relations"> |
| <h4>Aggregate Relations<a class="headerlink" href="#aggregate-relations" title="Permalink to this heading">¶</a></h4> |
| <blockquote> |
| <div><ul class="simple"> |
| <li><p>At most one grouping set is supported.</p></li> |
| <li><p>Each grouping expression must be a direct reference.</p></li> |
| <li><p>Each measure’s arguments must be direct references.</p></li> |
| <li><p>A measure may not have a filter</p></li> |
| <li><p>A measure may not have sorts</p></li> |
| <li><p>A measure’s invocation must be AGGREGATION_INVOCATION_ALL or |
| AGGREGATION_INVOCATION_UNSPECIFIED</p></li> |
| <li><p>A measure’s phase must be AGGREGATION_PHASE_INITIAL_TO_RESULT</p></li> |
| </ul> |
| </div></blockquote> |
| </section> |
| <section id="expressions-general"> |
| <h4>Expressions (general)<a class="headerlink" href="#expressions-general" title="Permalink to this heading">¶</a></h4> |
| <blockquote> |
| <div><ul class="simple"> |
| <li><p>Various places in the Substrait spec allow for expressions to be used outside |
| of a filter or project relation. For example, a join expression or an aggregate |
| grouping set. Acero typically expects these expressions to be direct references. |
| Planners should extract the implicit projection into a formal project relation |
| before delivering the plan to Acero.</p></li> |
| </ul> |
| </div></blockquote> |
| </section> |
| <section id="literals"> |
| <h4>Literals<a class="headerlink" href="#literals" title="Permalink to this heading">¶</a></h4> |
| <blockquote> |
| <div><ul class="simple"> |
| <li><p>A literal with non-default nullability will cause a plan to be rejected.</p></li> |
| </ul> |
| </div></blockquote> |
| </section> |
| <section id="types"> |
| <h4>Types<a class="headerlink" href="#types" title="Permalink to this heading">¶</a></h4> |
| <blockquote> |
| <div><ul class="simple"> |
| <li><p>Acero does not have full support for non-nullable types and may allow input |
| to have nulls without rejecting it.</p></li> |
| <li><p>The table below shows the mapping between Arrow types and Substrait type |
| classes that are currently supported</p></li> |
| </ul> |
| </div></blockquote> |
| <table class="table" id="id4"> |
| <caption><span class="caption-text">Substrait / Arrow Type Mapping</span><a class="headerlink" href="#id4" title="Permalink to this table">¶</a></caption> |
| <colgroup> |
| <col style="width: 25.0%" /> |
| <col style="width: 25.0%" /> |
| <col style="width: 50.0%" /> |
| </colgroup> |
| <thead> |
| <tr class="row-odd"><th class="head"><p>Substrait Type</p></th> |
| <th class="head"><p>Arrow Type</p></th> |
| <th class="head"><p>Caveat</p></th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr class="row-even"><td><p>boolean</p></td> |
| <td><p>boolean</p></td> |
| <td></td> |
| </tr> |
| <tr class="row-odd"><td><p>i8</p></td> |
| <td><p>int8</p></td> |
| <td></td> |
| </tr> |
| <tr class="row-even"><td><p>i16</p></td> |
| <td><p>int16</p></td> |
| <td></td> |
| </tr> |
| <tr class="row-odd"><td><p>i32</p></td> |
| <td><p>int32</p></td> |
| <td></td> |
| </tr> |
| <tr class="row-even"><td><p>i64</p></td> |
| <td><p>int64</p></td> |
| <td></td> |
| </tr> |
| <tr class="row-odd"><td><p>fp32</p></td> |
| <td><p>float32</p></td> |
| <td></td> |
| </tr> |
| <tr class="row-even"><td><p>fp64</p></td> |
| <td><p>float64</p></td> |
| <td></td> |
| </tr> |
| <tr class="row-odd"><td><p>string</p></td> |
| <td><p>string</p></td> |
| <td></td> |
| </tr> |
| <tr class="row-even"><td><p>binary</p></td> |
| <td><p>binary</p></td> |
| <td></td> |
| </tr> |
| <tr class="row-odd"><td><p>timestamp</p></td> |
| <td><p>timestamp<MICRO,””></p></td> |
| <td></td> |
| </tr> |
| <tr class="row-even"><td><p>timestamp_tz</p></td> |
| <td><p>timestamp<MICRO,”UTC”></p></td> |
| <td></td> |
| </tr> |
| <tr class="row-odd"><td><p>date</p></td> |
| <td><p>date32<DAY></p></td> |
| <td></td> |
| </tr> |
| <tr class="row-even"><td><p>time</p></td> |
| <td><p>time64<MICRO></p></td> |
| <td></td> |
| </tr> |
| <tr class="row-odd"><td><p>interval_year</p></td> |
| <td></td> |
| <td><p>Not currently supported</p></td> |
| </tr> |
| <tr class="row-even"><td><p>interval_day</p></td> |
| <td></td> |
| <td><p>Not currently supported</p></td> |
| </tr> |
| <tr class="row-odd"><td><p>uuid</p></td> |
| <td></td> |
| <td><p>Not currently supported</p></td> |
| </tr> |
| <tr class="row-even"><td><p>FIXEDCHAR<L></p></td> |
| <td></td> |
| <td><p>Not currently supported</p></td> |
| </tr> |
| <tr class="row-odd"><td><p>VARCHAR<L></p></td> |
| <td></td> |
| <td><p>Not currently supported</p></td> |
| </tr> |
| <tr class="row-even"><td><p>FIXEDBINARY<L></p></td> |
| <td><p>fixed_size_binary<L></p></td> |
| <td></td> |
| </tr> |
| <tr class="row-odd"><td><p>DECIMAL<P,S></p></td> |
| <td><p>decimal128<P,S></p></td> |
| <td></td> |
| </tr> |
| <tr class="row-even"><td><p>STRUCT<T1…TN></p></td> |
| <td><p>struct<T1…TN></p></td> |
| <td><p>Arrow struct fields will have no name (empty string)</p></td> |
| </tr> |
| <tr class="row-odd"><td><p>NSTRUCT<N:T1…N:Tn></p></td> |
| <td></td> |
| <td><p>Not currently supported</p></td> |
| </tr> |
| <tr class="row-even"><td><p>LIST<T></p></td> |
| <td><p>list<T></p></td> |
| <td></td> |
| </tr> |
| <tr class="row-odd"><td><p>MAP<K,V></p></td> |
| <td><p>map<K,V></p></td> |
| <td><p>K must not be nullable</p></td> |
| </tr> |
| </tbody> |
| </table> |
| </section> |
| <section id="functions"> |
| <h4>Functions<a class="headerlink" href="#functions" title="Permalink to this heading">¶</a></h4> |
| <blockquote> |
| <div><ul> |
| <li><p>The following functions have caveats or are not supported at all. Note that |
| this is not a comprehensive list. Functions are being added to Substrait at |
| a rapid pace and new functions may be missing.</p> |
| <ul class="simple"> |
| <li><p>Acero does not support the SATURATE option for overflow</p></li> |
| <li><p>Acero does not support kernels that take more than two arguments |
| for the functions <code class="docutils literal notranslate"><span class="pre">and</span></code>, <code class="docutils literal notranslate"><span class="pre">or</span></code>, <code class="docutils literal notranslate"><span class="pre">xor</span></code></p></li> |
| <li><p>Acero does not support temporal arithmetic</p></li> |
| <li><p>Acero does not support the following standard functions:</p> |
| <ul> |
| <li><p><code class="docutils literal notranslate"><span class="pre">is_not_distinct_from</span></code></p></li> |
| <li><p><code class="docutils literal notranslate"><span class="pre">like</span></code></p></li> |
| <li><p><code class="docutils literal notranslate"><span class="pre">substring</span></code></p></li> |
| <li><p><code class="docutils literal notranslate"><span class="pre">starts_with</span></code></p></li> |
| <li><p><code class="docutils literal notranslate"><span class="pre">ends_with</span></code></p></li> |
| <li><p><code class="docutils literal notranslate"><span class="pre">contains</span></code></p></li> |
| <li><p><code class="docutils literal notranslate"><span class="pre">count</span></code></p></li> |
| <li><p><code class="docutils literal notranslate"><span class="pre">count_distinct</span></code></p></li> |
| <li><p><code class="docutils literal notranslate"><span class="pre">approx_count_distinct</span></code></p></li> |
| </ul> |
| </li> |
| </ul> |
| </li> |
| <li><p>The functions above should be referenced using the URI |
| <code class="docutils literal notranslate"><span class="pre">https://github.com/apache/arrow/blob/main/format/substrait/extension_types.yaml</span></code></p> |
| <blockquote> |
| <div><ul class="simple"> |
| <li><p>Alternatively, the URI can be left completely empty and Acero will match |
| based only on function name. This fallback mechanism is non-standard and should |
| be avoided if possible.</p></li> |
| </ul> |
| </div></blockquote> |
| </li> |
| </ul> |
| </div></blockquote> |
| </section> |
| </section> |
| </section> |
| <section id="architecture-overview"> |
| <h2>Architecture Overview<a class="headerlink" href="#architecture-overview" title="Permalink to this heading">¶</a></h2> |
| <dl class="simple"> |
| <dt><code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">ExecNode</span></code></dt><dd><p>Each node in the graph is an implementation of the <code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">ExecNode</span></code> interface.</p> |
| </dd> |
| <dt><code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">ExecPlan</span></code></dt><dd><p>A set of <code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">ExecNode</span></code> is contained and (to an extent) coordinated by an |
| <code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">ExecPlan</span></code>.</p> |
| </dd> |
| <dt><code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">ExecFactoryRegistry</span></code></dt><dd><p>Instances of <code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">ExecNode</span></code> are constructed by factory functions held |
| in a <code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">ExecFactoryRegistry</span></code>.</p> |
| </dd> |
| <dt><code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">ExecNodeOptions</span></code></dt><dd><p>Heterogenous parameters for factories of <code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">ExecNode</span></code> are bundled in an |
| <code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">ExecNodeOptions</span></code>.</p> |
| </dd> |
| <dt><code class="xref cpp cpp-struct docutils literal notranslate"><span class="pre">Declaration</span></code></dt><dd><p><code class="docutils literal notranslate"><span class="pre">dplyr</span></code>-inspired helper for efficient construction of an <code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">ExecPlan</span></code>.</p> |
| </dd> |
| <dt><a class="reference internal" href="api/compute.html#_CPPv4N5arrow7compute9ExecBatchE" title="arrow::compute::ExecBatch"><code class="xref cpp cpp-struct docutils literal notranslate"><span class="pre">ExecBatch</span></code></a></dt><dd><p>A lightweight container for a single chunk of data in the Arrow format. In |
| contrast to <a class="reference internal" href="api/table.html#_CPPv4N5arrow11RecordBatchE" title="arrow::RecordBatch"><code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">RecordBatch</span></code></a>, <a class="reference internal" href="api/compute.html#_CPPv4N5arrow7compute9ExecBatchE" title="arrow::compute::ExecBatch"><code class="xref cpp cpp-struct docutils literal notranslate"><span class="pre">ExecBatch</span></code></a> is intended for use |
| exclusively in a streaming execution context (for example, it doesn’t have a |
| corresponding Python binding). Furthermore columns which happen to have a |
| constant value may be represented by a <a class="reference internal" href="api/scalar.html#_CPPv4N5arrow6ScalarE" title="arrow::Scalar"><code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">Scalar</span></code></a> instead of an |
| <a class="reference internal" href="api/array.html#_CPPv4N5arrow5ArrayE" title="arrow::Array"><code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">Array</span></code></a>. In addition, <a class="reference internal" href="api/compute.html#_CPPv4N5arrow7compute9ExecBatchE" title="arrow::compute::ExecBatch"><code class="xref cpp cpp-struct docutils literal notranslate"><span class="pre">ExecBatch</span></code></a> may carry |
| execution-relevant properties including a guaranteed-true-filter |
| for <a class="reference internal" href="api/compute.html#_CPPv4N5arrow7compute10ExpressionE" title="arrow::compute::Expression"><code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">Expression</span></code></a> simplification.</p> |
| </dd> |
| </dl> |
| <p>An example <code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">ExecNode</span></code> implementation which simply passes all input batches |
| through unchanged:</p> |
| <div class="highlight-cpp notranslate"><div class="highlight"><pre><span></span><span class="k">class</span><span class="w"> </span><span class="nc">PassthruNode</span><span class="w"> </span><span class="o">:</span><span class="w"> </span><span class="k">public</span><span class="w"> </span><span class="n">ExecNode</span><span class="w"> </span><span class="p">{</span> |
| <span class="w"> </span><span class="k">public</span><span class="o">:</span> |
| <span class="w"> </span><span class="c1">// InputReceived is the main entry point for ExecNodes. It is invoked</span> |
| <span class="w"> </span><span class="c1">// by an input of this node to push a batch here for processing.</span> |
| <span class="w"> </span><span class="kt">void</span><span class="w"> </span><span class="n">InputReceived</span><span class="p">(</span><span class="n">ExecNode</span><span class="o">*</span><span class="w"> </span><span class="n">input</span><span class="p">,</span><span class="w"> </span><span class="n">ExecBatch</span><span class="w"> </span><span class="n">batch</span><span class="p">)</span><span class="w"> </span><span class="k">override</span><span class="w"> </span><span class="p">{</span> |
| <span class="w"> </span><span class="c1">// Since this is a passthru node we simply push the batch to our</span> |
| <span class="w"> </span><span class="c1">// only output here.</span> |
| <span class="w"> </span><span class="n">outputs_</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span><span class="o">-></span><span class="n">InputReceived</span><span class="p">(</span><span class="k">this</span><span class="p">,</span><span class="w"> </span><span class="n">batch</span><span class="p">);</span> |
| <span class="w"> </span><span class="p">}</span> |
| |
| <span class="w"> </span><span class="c1">// ErrorReceived is called by an input of this node to report an error.</span> |
| <span class="w"> </span><span class="c1">// ExecNodes should always forward errors to their outputs unless they</span> |
| <span class="w"> </span><span class="c1">// are able to fully handle the error (this is rare).</span> |
| <span class="w"> </span><span class="kt">void</span><span class="w"> </span><span class="n">ErrorReceived</span><span class="p">(</span><span class="n">ExecNode</span><span class="o">*</span><span class="w"> </span><span class="n">input</span><span class="p">,</span><span class="w"> </span><span class="n">Status</span><span class="w"> </span><span class="n">error</span><span class="p">)</span><span class="w"> </span><span class="k">override</span><span class="w"> </span><span class="p">{</span> |
| <span class="w"> </span><span class="n">outputs_</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span><span class="o">-></span><span class="n">ErrorReceived</span><span class="p">(</span><span class="k">this</span><span class="p">,</span><span class="w"> </span><span class="n">error</span><span class="p">);</span> |
| <span class="w"> </span><span class="p">}</span> |
| |
| <span class="w"> </span><span class="c1">// InputFinished is used to signal how many batches will ultimately arrive.</span> |
| <span class="w"> </span><span class="c1">// It may be called with any ordering relative to InputReceived/ErrorReceived.</span> |
| <span class="w"> </span><span class="kt">void</span><span class="w"> </span><span class="n">InputFinished</span><span class="p">(</span><span class="n">ExecNode</span><span class="o">*</span><span class="w"> </span><span class="n">input</span><span class="p">,</span><span class="w"> </span><span class="kt">int</span><span class="w"> </span><span class="n">total_batches</span><span class="p">)</span><span class="w"> </span><span class="k">override</span><span class="w"> </span><span class="p">{</span> |
| <span class="w"> </span><span class="n">outputs_</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span><span class="o">-></span><span class="n">InputFinished</span><span class="p">(</span><span class="k">this</span><span class="p">,</span><span class="w"> </span><span class="n">total_batches</span><span class="p">);</span> |
| <span class="w"> </span><span class="p">}</span> |
| |
| <span class="w"> </span><span class="c1">// ExecNodes may request that their inputs throttle production of batches</span> |
| <span class="w"> </span><span class="c1">// until they are ready for more, or stop production if no further batches</span> |
| <span class="w"> </span><span class="c1">// are required. These signals should typically be forwarded to the inputs</span> |
| <span class="w"> </span><span class="c1">// of the ExecNode.</span> |
| <span class="w"> </span><span class="kt">void</span><span class="w"> </span><span class="n">ResumeProducing</span><span class="p">(</span><span class="n">ExecNode</span><span class="o">*</span><span class="w"> </span><span class="n">output</span><span class="p">)</span><span class="w"> </span><span class="k">override</span><span class="w"> </span><span class="p">{</span><span class="w"> </span><span class="n">inputs_</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span><span class="o">-></span><span class="n">ResumeProducing</span><span class="p">(</span><span class="k">this</span><span class="p">);</span><span class="w"> </span><span class="p">}</span> |
| <span class="w"> </span><span class="kt">void</span><span class="w"> </span><span class="n">PauseProducing</span><span class="p">(</span><span class="n">ExecNode</span><span class="o">*</span><span class="w"> </span><span class="n">output</span><span class="p">)</span><span class="w"> </span><span class="k">override</span><span class="w"> </span><span class="p">{</span><span class="w"> </span><span class="n">inputs_</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span><span class="o">-></span><span class="n">PauseProducing</span><span class="p">(</span><span class="k">this</span><span class="p">);</span><span class="w"> </span><span class="p">}</span> |
| <span class="w"> </span><span class="kt">void</span><span class="w"> </span><span class="n">StopProducing</span><span class="p">(</span><span class="n">ExecNode</span><span class="o">*</span><span class="w"> </span><span class="n">output</span><span class="p">)</span><span class="w"> </span><span class="k">override</span><span class="w"> </span><span class="p">{</span><span class="w"> </span><span class="n">inputs_</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span><span class="o">-></span><span class="n">StopProducing</span><span class="p">(</span><span class="k">this</span><span class="p">);</span><span class="w"> </span><span class="p">}</span> |
| |
| <span class="w"> </span><span class="c1">// An ExecNode has a single output schema to which all its batches conform.</span> |
| <span class="w"> </span><span class="k">using</span><span class="w"> </span><span class="n">ExecNode</span><span class="o">::</span><span class="n">output_schema</span><span class="p">;</span> |
| |
| <span class="w"> </span><span class="c1">// ExecNodes carry basic introspection for debugging purposes</span> |
| <span class="w"> </span><span class="k">const</span><span class="w"> </span><span class="kt">char</span><span class="o">*</span><span class="w"> </span><span class="nf">kind_name</span><span class="p">()</span><span class="w"> </span><span class="k">const</span><span class="w"> </span><span class="k">override</span><span class="w"> </span><span class="p">{</span><span class="w"> </span><span class="k">return</span><span class="w"> </span><span class="s">"PassthruNode"</span><span class="p">;</span><span class="w"> </span><span class="p">}</span> |
| <span class="w"> </span><span class="k">using</span><span class="w"> </span><span class="n">ExecNode</span><span class="o">::</span><span class="n">label</span><span class="p">;</span> |
| <span class="w"> </span><span class="k">using</span><span class="w"> </span><span class="n">ExecNode</span><span class="o">::</span><span class="n">SetLabel</span><span class="p">;</span> |
| <span class="w"> </span><span class="k">using</span><span class="w"> </span><span class="n">ExecNode</span><span class="o">::</span><span class="n">ToString</span><span class="p">;</span> |
| |
| <span class="w"> </span><span class="c1">// An ExecNode holds references to its inputs and outputs, so it is possible</span> |
| <span class="w"> </span><span class="c1">// to walk the graph of execution if necessary.</span> |
| <span class="w"> </span><span class="k">using</span><span class="w"> </span><span class="n">ExecNode</span><span class="o">::</span><span class="n">inputs</span><span class="p">;</span> |
| <span class="w"> </span><span class="k">using</span><span class="w"> </span><span class="n">ExecNode</span><span class="o">::</span><span class="n">outputs</span><span class="p">;</span> |
| |
| <span class="w"> </span><span class="c1">// StartProducing() and StopProducing() are invoked by an ExecPlan to</span> |
| <span class="w"> </span><span class="c1">// coordinate the graph-wide execution state. These do not need to be</span> |
| <span class="w"> </span><span class="c1">// forwarded to inputs or outputs.</span> |
| <span class="w"> </span><span class="n">Status</span><span class="w"> </span><span class="nf">StartProducing</span><span class="p">()</span><span class="w"> </span><span class="k">override</span><span class="w"> </span><span class="p">{</span><span class="w"> </span><span class="k">return</span><span class="w"> </span><span class="n">Status</span><span class="o">::</span><span class="n">OK</span><span class="p">();</span><span class="w"> </span><span class="p">}</span> |
| <span class="w"> </span><span class="kt">void</span><span class="w"> </span><span class="nf">StopProducing</span><span class="p">()</span><span class="w"> </span><span class="k">override</span><span class="w"> </span><span class="p">{}</span> |
| <span class="w"> </span><span class="n">Future</span><span class="o"><></span><span class="w"> </span><span class="n">finished</span><span class="p">()</span><span class="w"> </span><span class="k">override</span><span class="w"> </span><span class="p">{</span><span class="w"> </span><span class="k">return</span><span class="w"> </span><span class="n">inputs_</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span><span class="o">-></span><span class="n">finished</span><span class="p">();</span><span class="w"> </span><span class="p">}</span> |
| <span class="p">};</span> |
| </pre></div> |
| </div> |
| <p>Note that each method which is associated with an edge of the graph must be invoked |
| with an <code class="docutils literal notranslate"><span class="pre">ExecNode*</span></code> to identify the node which invoked it. For example, in an |
| <code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">ExecNode</span></code> which implements <code class="docutils literal notranslate"><span class="pre">JOIN</span></code> this tagging might be used to differentiate |
| between batches from the left or right inputs. |
| <code class="docutils literal notranslate"><span class="pre">InputReceived</span></code>, <code class="docutils literal notranslate"><span class="pre">ErrorReceived</span></code>, <code class="docutils literal notranslate"><span class="pre">InputFinished</span></code> may only be invoked by |
| the inputs of a node, while <code class="docutils literal notranslate"><span class="pre">ResumeProducing</span></code>, <code class="docutils literal notranslate"><span class="pre">PauseProducing</span></code>, <code class="docutils literal notranslate"><span class="pre">StopProducing</span></code> |
| may only be invoked by outputs of a node.</p> |
| <p><code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">ExecPlan</span></code> contains the associated instances of <code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">ExecNode</span></code> |
| and is used to start and stop execution of all nodes and for querying/awaiting |
| their completion:</p> |
| <div class="highlight-cpp notranslate"><div class="highlight"><pre><span></span><span class="c1">// construct an ExecPlan first to hold your nodes</span> |
| <span class="n">ARROW_ASSIGN_OR_RAISE</span><span class="p">(</span><span class="k">auto</span><span class="w"> </span><span class="n">plan</span><span class="p">,</span><span class="w"> </span><span class="n">ExecPlan</span><span class="o">::</span><span class="n">Make</span><span class="p">(</span><span class="n">default_exec_context</span><span class="p">()));</span> |
| |
| <span class="c1">// ... add nodes to your ExecPlan</span> |
| |
| <span class="c1">// start all nodes in the graph</span> |
| <span class="n">ARROW_RETURN_NOT_OK</span><span class="p">(</span><span class="n">plan</span><span class="o">-></span><span class="n">StartProducing</span><span class="p">());</span> |
| |
| <span class="n">SetUserCancellationCallback</span><span class="p">([</span><span class="n">plan</span><span class="p">]</span><span class="w"> </span><span class="p">{</span> |
| <span class="w"> </span><span class="c1">// stop all nodes in the graph</span> |
| <span class="w"> </span><span class="n">plan</span><span class="o">-></span><span class="n">StopProducing</span><span class="p">();</span> |
| <span class="p">});</span> |
| |
| <span class="c1">// Complete will be marked finished when all nodes have run to completion</span> |
| <span class="c1">// or acknowledged a StopProducing() signal. The ExecPlan should be kept</span> |
| <span class="c1">// alive until this future is marked finished.</span> |
| <span class="n">Future</span><span class="o"><></span><span class="w"> </span><span class="n">complete</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">plan</span><span class="o">-></span><span class="n">finished</span><span class="p">();</span> |
| </pre></div> |
| </div> |
| </section> |
| <section id="constructing-execplan-objects"> |
| <h2>Constructing <code class="docutils literal notranslate"><span class="pre">ExecPlan</span></code> objects<a class="headerlink" href="#constructing-execplan-objects" title="Permalink to this heading">¶</a></h2> |
| <p>None of the concrete implementations of <code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">ExecNode</span></code> are exposed |
| in headers, so they can’t be constructed directly outside the |
| translation unit where they are defined. Instead, factories to |
| create them are provided in an extensible registry. This structure |
| provides a number of benefits:</p> |
| <ul class="simple"> |
| <li><p>This enforces consistent construction.</p></li> |
| <li><p>It decouples implementations from consumers of the interface |
| (for example: we have two classes for scalar and grouped aggregate, |
| we can choose which to construct within the single factory by |
| checking whether grouping keys are provided)</p></li> |
| <li><p>This expedites integration with out-of-library extensions. For example |
| “scan” nodes are implemented in the separate <code class="docutils literal notranslate"><span class="pre">libarrow_dataset.so</span></code> library.</p></li> |
| <li><p>Since the class is not referencable outside the translation unit in which it |
| is defined, compilers can optimize more aggressively.</p></li> |
| </ul> |
| <p>Factories of <code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">ExecNode</span></code> can be retrieved by name from the registry. |
| The default registry is available through |
| <code class="xref cpp cpp-func docutils literal notranslate"><span class="pre">arrow::compute::default_exec_factory_registry()</span></code> |
| and can be queried for the built-in factories:</p> |
| <div class="highlight-cpp notranslate"><div class="highlight"><pre><span></span><span class="c1">// get the factory for "filter" nodes:</span> |
| <span class="n">ARROW_ASSIGN_OR_RAISE</span><span class="p">(</span><span class="k">auto</span><span class="w"> </span><span class="n">make_filter</span><span class="p">,</span> |
| <span class="w"> </span><span class="n">default_exec_factory_registry</span><span class="p">()</span><span class="o">-></span><span class="n">GetFactory</span><span class="p">(</span><span class="s">"filter"</span><span class="p">));</span> |
| |
| <span class="c1">// factories take three arguments:</span> |
| <span class="n">ARROW_ASSIGN_OR_RAISE</span><span class="p">(</span><span class="n">ExecNode</span><span class="o">*</span><span class="w"> </span><span class="n">filter_node</span><span class="p">,</span><span class="w"> </span><span class="o">*</span><span class="n">make_filter</span><span class="p">(</span> |
| <span class="w"> </span><span class="c1">// the ExecPlan which should own this node</span> |
| <span class="w"> </span><span class="n">plan</span><span class="p">.</span><span class="n">get</span><span class="p">(),</span> |
| |
| <span class="w"> </span><span class="c1">// nodes which will send batches to this node (inputs)</span> |
| <span class="w"> </span><span class="p">{</span><span class="n">scan_node</span><span class="p">},</span> |
| |
| <span class="w"> </span><span class="c1">// parameters unique to "filter" nodes</span> |
| <span class="w"> </span><span class="n">FilterNodeOptions</span><span class="p">{</span><span class="n">filter_expression</span><span class="p">}));</span> |
| |
| <span class="c1">// alternative shorthand:</span> |
| <span class="n">ARROW_ASSIGN_OR_RAISE</span><span class="p">(</span><span class="n">filter_node</span><span class="p">,</span><span class="w"> </span><span class="n">MakeExecNode</span><span class="p">(</span><span class="s">"filter"</span><span class="p">,</span> |
| <span class="w"> </span><span class="n">plan</span><span class="p">.</span><span class="n">get</span><span class="p">(),</span><span class="w"> </span><span class="p">{</span><span class="n">scan_node</span><span class="p">},</span><span class="w"> </span><span class="n">FilterNodeOptions</span><span class="p">{</span><span class="n">filter_expression</span><span class="p">});</span> |
| </pre></div> |
| </div> |
| <p>Factories can also be added to the default registry as long as they are |
| convertible to <code class="docutils literal notranslate"><span class="pre">std::function<Result<ExecNode*>(</span> |
| <span class="pre">ExecPlan*,</span> <span class="pre">std::vector<ExecNode*>,</span> <span class="pre">const</span> <span class="pre">ExecNodeOptions&)></span></code>.</p> |
| <p>To build an <code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">ExecPlan</span></code> representing a simple pipeline which |
| reads from a <a class="reference internal" href="api/table.html#_CPPv4N5arrow17RecordBatchReaderE" title="arrow::RecordBatchReader"><code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">RecordBatchReader</span></code></a> then filters, projects, and |
| writes to disk:</p> |
| <div class="highlight-cpp notranslate"><div class="highlight"><pre><span></span><span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o"><</span><span class="n">RecordBatchReader</span><span class="o">></span><span class="w"> </span><span class="n">reader</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">GetStreamOfBatches</span><span class="p">();</span> |
| <span class="n">ExecNode</span><span class="o">*</span><span class="w"> </span><span class="n">source_node</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="o">*</span><span class="n">MakeExecNode</span><span class="p">(</span><span class="s">"source"</span><span class="p">,</span><span class="w"> </span><span class="n">plan</span><span class="p">.</span><span class="n">get</span><span class="p">(),</span><span class="w"> </span><span class="p">{},</span> |
| <span class="w"> </span><span class="n">SourceNodeOptions</span><span class="o">::</span><span class="n">FromReader</span><span class="p">(</span> |
| <span class="w"> </span><span class="n">reader</span><span class="p">,</span> |
| <span class="w"> </span><span class="n">GetCpuThreadPool</span><span class="p">()));</span> |
| |
| <span class="n">ExecNode</span><span class="o">*</span><span class="w"> </span><span class="n">filter_node</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="o">*</span><span class="n">MakeExecNode</span><span class="p">(</span><span class="s">"filter"</span><span class="p">,</span><span class="w"> </span><span class="n">plan</span><span class="p">.</span><span class="n">get</span><span class="p">(),</span><span class="w"> </span><span class="p">{</span><span class="n">source_node</span><span class="p">},</span> |
| <span class="w"> </span><span class="n">FilterNodeOptions</span><span class="p">{</span> |
| <span class="w"> </span><span class="n">greater</span><span class="p">(</span><span class="n">field_ref</span><span class="p">(</span><span class="s">"score"</span><span class="p">),</span><span class="w"> </span><span class="n">literal</span><span class="p">(</span><span class="mi">3</span><span class="p">))</span> |
| <span class="w"> </span><span class="p">});</span> |
| |
| <span class="n">ExecNode</span><span class="o">*</span><span class="w"> </span><span class="n">project_node</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="o">*</span><span class="n">MakeExecNode</span><span class="p">(</span><span class="s">"project"</span><span class="p">,</span><span class="w"> </span><span class="n">plan</span><span class="p">.</span><span class="n">get</span><span class="p">(),</span><span class="w"> </span><span class="p">{</span><span class="n">filter_node</span><span class="p">},</span> |
| <span class="w"> </span><span class="n">ProjectNodeOptions</span><span class="p">{</span> |
| <span class="w"> </span><span class="p">{</span><span class="n">add</span><span class="p">(</span><span class="n">field_ref</span><span class="p">(</span><span class="s">"score"</span><span class="p">),</span><span class="w"> </span><span class="n">literal</span><span class="p">(</span><span class="mi">1</span><span class="p">))},</span> |
| <span class="w"> </span><span class="p">{</span><span class="s">"score + 1"</span><span class="p">}</span> |
| <span class="w"> </span><span class="p">});</span> |
| |
| <span class="n">arrow</span><span class="o">::</span><span class="n">dataset</span><span class="o">::</span><span class="n">internal</span><span class="o">::</span><span class="n">Initialize</span><span class="p">();</span> |
| <span class="n">MakeExecNode</span><span class="p">(</span><span class="s">"write"</span><span class="p">,</span><span class="w"> </span><span class="n">plan</span><span class="p">.</span><span class="n">get</span><span class="p">(),</span><span class="w"> </span><span class="p">{</span><span class="n">project_node</span><span class="p">},</span> |
| <span class="w"> </span><span class="n">WriteNodeOptions</span><span class="p">{</span><span class="cm">/*base_dir=*/</span><span class="s">"/dat"</span><span class="p">,</span><span class="w"> </span><span class="cm">/*...*/</span><span class="p">});</span> |
| </pre></div> |
| </div> |
| <p><code class="xref cpp cpp-struct docutils literal notranslate"><span class="pre">Declaration</span></code> is a <a class="reference external" href="https://dplyr.tidyverse.org">dplyr</a>-inspired |
| helper which further decreases the boilerplate associated with populating |
| an <code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">ExecPlan</span></code> from C++:</p> |
| <div class="highlight-cpp notranslate"><div class="highlight"><pre><span></span><span class="n">arrow</span><span class="o">::</span><span class="n">dataset</span><span class="o">::</span><span class="n">internal</span><span class="o">::</span><span class="n">Initialize</span><span class="p">();</span> |
| |
| <span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o"><</span><span class="n">RecordBatchReader</span><span class="o">></span><span class="w"> </span><span class="n">reader</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">GetStreamOfBatches</span><span class="p">();</span> |
| <span class="n">ASSERT_OK</span><span class="p">(</span><span class="n">Declaration</span><span class="o">::</span><span class="n">Sequence</span><span class="p">(</span> |
| <span class="w"> </span><span class="p">{</span> |
| <span class="w"> </span><span class="p">{</span><span class="s">"source"</span><span class="p">,</span><span class="w"> </span><span class="n">SourceNodeOptions</span><span class="o">::</span><span class="n">FromReader</span><span class="p">(</span> |
| <span class="w"> </span><span class="n">reader</span><span class="p">,</span> |
| <span class="w"> </span><span class="n">GetCpuThreadPool</span><span class="p">())},</span> |
| <span class="w"> </span><span class="p">{</span><span class="s">"filter"</span><span class="p">,</span><span class="w"> </span><span class="n">FilterNodeOptions</span><span class="p">{</span> |
| <span class="w"> </span><span class="n">greater</span><span class="p">(</span><span class="n">field_ref</span><span class="p">(</span><span class="s">"score"</span><span class="p">),</span><span class="w"> </span><span class="n">literal</span><span class="p">(</span><span class="mi">3</span><span class="p">))}},</span> |
| <span class="w"> </span><span class="p">{</span><span class="s">"project"</span><span class="p">,</span><span class="w"> </span><span class="n">ProjectNodeOptions</span><span class="p">{</span> |
| <span class="w"> </span><span class="p">{</span><span class="n">add</span><span class="p">(</span><span class="n">field_ref</span><span class="p">(</span><span class="s">"score"</span><span class="p">),</span><span class="w"> </span><span class="n">literal</span><span class="p">(</span><span class="mi">1</span><span class="p">))},</span> |
| <span class="w"> </span><span class="p">{</span><span class="s">"score + 1"</span><span class="p">}}},</span> |
| <span class="w"> </span><span class="p">{</span><span class="s">"write"</span><span class="p">,</span><span class="w"> </span><span class="n">WriteNodeOptions</span><span class="p">{</span><span class="cm">/*base_dir=*/</span><span class="s">"/dat"</span><span class="p">,</span><span class="w"> </span><span class="cm">/*...*/</span><span class="p">}},</span> |
| <span class="w"> </span><span class="p">})</span> |
| <span class="w"> </span><span class="p">.</span><span class="n">AddToPlan</span><span class="p">(</span><span class="n">plan</span><span class="p">.</span><span class="n">get</span><span class="p">()));</span> |
| </pre></div> |
| </div> |
| <p>Note that a source node can wrap anything which resembles a stream of batches. |
| For example, <a class="reference external" href="https://github.com/apache/arrow/pull/11032">PR#11032</a> adds |
| support for use of a <a class="reference external" href="https://duckdb.org">DuckDB</a> query as a source node. |
| Similarly, a sink node can wrap anything which absorbs a stream of batches. |
| In the example above we’re writing completed |
| batches to disk. However we can also collect these in memory into a <a class="reference internal" href="api/table.html#_CPPv4N5arrow5TableE" title="arrow::Table"><code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">Table</span></code></a> |
| or forward them to a <a class="reference internal" href="api/table.html#_CPPv4N5arrow17RecordBatchReaderE" title="arrow::RecordBatchReader"><code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">RecordBatchReader</span></code></a> as an out-of-graph stream. |
| This flexibility allows an <code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">ExecPlan</span></code> to be used as streaming middleware |
| between any endpoints which support Arrow formatted batches.</p> |
| <p>An <a class="reference internal" href="api/dataset.html#_CPPv4N5arrow7dataset7DatasetE" title="arrow::dataset::Dataset"><code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">arrow::dataset::Dataset</span></code></a> can also be wrapped as a source node which |
| pushes all the dataset’s batches into an <code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">ExecPlan</span></code>. This factory is added |
| to the default registry with the name <code class="docutils literal notranslate"><span class="pre">"scan"</span></code> by calling |
| <code class="docutils literal notranslate"><span class="pre">arrow::dataset::internal::Initialize()</span></code>:</p> |
| <div class="highlight-cpp notranslate"><div class="highlight"><pre><span></span><span class="n">arrow</span><span class="o">::</span><span class="n">dataset</span><span class="o">::</span><span class="n">internal</span><span class="o">::</span><span class="n">Initialize</span><span class="p">();</span> |
| |
| <span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o"><</span><span class="n">Dataset</span><span class="o">></span><span class="w"> </span><span class="n">dataset</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">GetDataset</span><span class="p">();</span> |
| |
| <span class="n">ASSERT_OK</span><span class="p">(</span><span class="n">Declaration</span><span class="o">::</span><span class="n">Sequence</span><span class="p">(</span> |
| <span class="w"> </span><span class="p">{</span> |
| <span class="w"> </span><span class="p">{</span><span class="s">"scan"</span><span class="p">,</span><span class="w"> </span><span class="n">ScanNodeOptions</span><span class="p">{</span><span class="n">dataset</span><span class="p">,</span> |
| <span class="w"> </span><span class="cm">/* push down predicate, projection, ... */</span><span class="p">}},</span> |
| <span class="w"> </span><span class="p">{</span><span class="s">"filter"</span><span class="p">,</span><span class="w"> </span><span class="n">FilterNodeOptions</span><span class="p">{</span><span class="cm">/* ... */</span><span class="p">}},</span> |
| <span class="w"> </span><span class="c1">// ...</span> |
| <span class="w"> </span><span class="p">})</span> |
| <span class="w"> </span><span class="p">.</span><span class="n">AddToPlan</span><span class="p">(</span><span class="n">plan</span><span class="p">.</span><span class="n">get</span><span class="p">()));</span> |
| </pre></div> |
| </div> |
| <p>Datasets may be scanned multiple times; just make multiple scan |
| nodes from that dataset. (Useful for a self-join, for example.) |
| Note that producing two scan nodes like this will perform all |
| reads and decodes twice.</p> |
| </section> |
| <section id="constructing-execnode-using-options"> |
| <h2>Constructing <code class="docutils literal notranslate"><span class="pre">ExecNode</span></code> using Options<a class="headerlink" href="#constructing-execnode-using-options" title="Permalink to this heading">¶</a></h2> |
| <p><code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">ExecNode</span></code> is the component we use as a building block |
| containing in-built operations with various functionalities.</p> |
| <p>This is the list of operations associated with the execution plan:</p> |
| <table class="table" id="id5"> |
| <caption><span class="caption-text">Operations and Options</span><a class="headerlink" href="#id5" title="Permalink to this table">¶</a></caption> |
| <colgroup> |
| <col style="width: 50.0%" /> |
| <col style="width: 50.0%" /> |
| </colgroup> |
| <thead> |
| <tr class="row-odd"><th class="head"><p>Operation</p></th> |
| <th class="head"><p>Options</p></th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr class="row-even"><td><p><code class="docutils literal notranslate"><span class="pre">source</span></code></p></td> |
| <td><p><code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">arrow::compute::SourceNodeOptions</span></code></p></td> |
| </tr> |
| <tr class="row-odd"><td><p><code class="docutils literal notranslate"><span class="pre">table_source</span></code></p></td> |
| <td><p><code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">arrow::compute::TableSourceNodeOptions</span></code></p></td> |
| </tr> |
| <tr class="row-even"><td><p><code class="docutils literal notranslate"><span class="pre">filter</span></code></p></td> |
| <td><p><code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">arrow::compute::FilterNodeOptions</span></code></p></td> |
| </tr> |
| <tr class="row-odd"><td><p><code class="docutils literal notranslate"><span class="pre">project</span></code></p></td> |
| <td><p><code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">arrow::compute::ProjectNodeOptions</span></code></p></td> |
| </tr> |
| <tr class="row-even"><td><p><code class="docutils literal notranslate"><span class="pre">aggregate</span></code></p></td> |
| <td><p><code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">arrow::compute::AggregateNodeOptions</span></code></p></td> |
| </tr> |
| <tr class="row-odd"><td><p><code class="docutils literal notranslate"><span class="pre">sink</span></code></p></td> |
| <td><p><code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">arrow::compute::SinkNodeOptions</span></code></p></td> |
| </tr> |
| <tr class="row-even"><td><p><code class="docutils literal notranslate"><span class="pre">consuming_sink</span></code></p></td> |
| <td><p><code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">arrow::compute::ConsumingSinkNodeOptions</span></code></p></td> |
| </tr> |
| <tr class="row-odd"><td><p><code class="docutils literal notranslate"><span class="pre">order_by_sink</span></code></p></td> |
| <td><p><code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">arrow::compute::OrderBySinkNodeOptions</span></code></p></td> |
| </tr> |
| <tr class="row-even"><td><p><code class="docutils literal notranslate"><span class="pre">select_k_sink</span></code></p></td> |
| <td><p><code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">arrow::compute::SelectKSinkNodeOptions</span></code></p></td> |
| </tr> |
| <tr class="row-odd"><td><p><code class="docutils literal notranslate"><span class="pre">scan</span></code></p></td> |
| <td><p><a class="reference internal" href="api/dataset.html#_CPPv4N5arrow7dataset15ScanNodeOptionsE" title="arrow::dataset::ScanNodeOptions"><code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">arrow::dataset::ScanNodeOptions</span></code></a></p></td> |
| </tr> |
| <tr class="row-even"><td><p><code class="docutils literal notranslate"><span class="pre">hash_join</span></code></p></td> |
| <td><p><code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">arrow::compute::HashJoinNodeOptions</span></code></p></td> |
| </tr> |
| <tr class="row-odd"><td><p><code class="docutils literal notranslate"><span class="pre">write</span></code></p></td> |
| <td><p><a class="reference internal" href="api/dataset.html#_CPPv4N5arrow7dataset16WriteNodeOptionsE" title="arrow::dataset::WriteNodeOptions"><code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">arrow::dataset::WriteNodeOptions</span></code></a></p></td> |
| </tr> |
| <tr class="row-even"><td><p><code class="docutils literal notranslate"><span class="pre">union</span></code></p></td> |
| <td><p>N/A</p></td> |
| </tr> |
| <tr class="row-odd"><td><p><code class="docutils literal notranslate"><span class="pre">table_sink</span></code></p></td> |
| <td><p><code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">arrow::compute::TableSinkNodeOptions</span></code></p></td> |
| </tr> |
| </tbody> |
| </table> |
| <section id="source"> |
| <span id="stream-execution-source-docs"></span><h3><code class="docutils literal notranslate"><span class="pre">source</span></code><a class="headerlink" href="#source" title="Permalink to this heading">¶</a></h3> |
| <p>A <code class="docutils literal notranslate"><span class="pre">source</span></code> operation can be considered as an entry point to create a streaming execution plan. |
| <code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">arrow::compute::SourceNodeOptions</span></code> are used to create the <code class="docutils literal notranslate"><span class="pre">source</span></code> operation. The |
| <code class="docutils literal notranslate"><span class="pre">source</span></code> operation is the most generic and flexible type of source currently available but it can |
| be quite tricky to configure. To process data from files the scan operation is likely a simpler choice.</p> |
| <p>The source node requires some kind of function that can be called to poll for more data. This |
| function should take no arguments and should return an |
| <code class="docutils literal notranslate"><span class="pre">arrow::Future<std::optional<arrow::ExecBatch>></span></code>. |
| This function might be reading a file, iterating through an in memory structure, or receiving data |
| from a network connection. The arrow library refers to these functions as <code class="docutils literal notranslate"><span class="pre">arrow::AsyncGenerator</span></code> |
| and there are a number of utilities for working with these functions. For this example we use |
| a vector of record batches that we’ve already stored in memory. |
| In addition, the schema of the data must be known up front. Acero must know the schema of the data |
| at each stage of the execution graph before any processing has begun. This means we must supply the |
| schema for a source node separately from the data itself.</p> |
| <p>Here we define a struct to hold the data generator definition. This includes in-memory batches, schema |
| and a function that serves as a data generator :</p> |
| <div class="highlight-cpp notranslate"><div class="highlight"><pre><span></span><span class="linenos">156</span><span class="k">struct</span><span class="w"> </span><span class="nc">BatchesWithSchema</span><span class="w"> </span><span class="p">{</span> |
| <span class="linenos">157</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">vector</span><span class="o"><</span><span class="n">cp</span><span class="o">::</span><span class="n">ExecBatch</span><span class="o">></span><span class="w"> </span><span class="n">batches</span><span class="p">;</span> |
| <span class="linenos">158</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o"><</span><span class="n">arrow</span><span class="o">::</span><span class="n">Schema</span><span class="o">></span><span class="w"> </span><span class="n">schema</span><span class="p">;</span> |
| <span class="linenos">159</span><span class="w"> </span><span class="c1">// This method uses internal arrow utilities to</span> |
| <span class="linenos">160</span><span class="w"> </span><span class="c1">// convert a vector of record batches to an AsyncGenerator of optional batches</span> |
| <span class="linenos">161</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">AsyncGenerator</span><span class="o"><</span><span class="n">std</span><span class="o">::</span><span class="n">optional</span><span class="o"><</span><span class="n">cp</span><span class="o">::</span><span class="n">ExecBatch</span><span class="o">>></span><span class="w"> </span><span class="n">gen</span><span class="p">()</span><span class="w"> </span><span class="k">const</span><span class="w"> </span><span class="p">{</span> |
| <span class="linenos">162</span><span class="w"> </span><span class="k">auto</span><span class="w"> </span><span class="n">opt_batches</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="o">::</span><span class="n">arrow</span><span class="o">::</span><span class="n">internal</span><span class="o">::</span><span class="n">MapVector</span><span class="p">(</span> |
| <span class="linenos">163</span><span class="w"> </span><span class="p">[](</span><span class="n">cp</span><span class="o">::</span><span class="n">ExecBatch</span><span class="w"> </span><span class="n">batch</span><span class="p">)</span><span class="w"> </span><span class="p">{</span><span class="w"> </span><span class="k">return</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">make_optional</span><span class="p">(</span><span class="n">std</span><span class="o">::</span><span class="n">move</span><span class="p">(</span><span class="n">batch</span><span class="p">));</span><span class="w"> </span><span class="p">},</span> |
| <span class="linenos">164</span><span class="w"> </span><span class="n">batches</span><span class="p">);</span> |
| <span class="linenos">165</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">AsyncGenerator</span><span class="o"><</span><span class="n">std</span><span class="o">::</span><span class="n">optional</span><span class="o"><</span><span class="n">cp</span><span class="o">::</span><span class="n">ExecBatch</span><span class="o">>></span><span class="w"> </span><span class="n">gen</span><span class="p">;</span> |
| <span class="linenos">166</span><span class="w"> </span><span class="n">gen</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">MakeVectorGenerator</span><span class="p">(</span><span class="n">std</span><span class="o">::</span><span class="n">move</span><span class="p">(</span><span class="n">opt_batches</span><span class="p">));</span> |
| <span class="linenos">167</span><span class="w"> </span><span class="k">return</span><span class="w"> </span><span class="n">gen</span><span class="p">;</span> |
| <span class="linenos">168</span><span class="w"> </span><span class="p">}</span> |
| <span class="linenos">169</span><span class="p">};</span> |
| </pre></div> |
| </div> |
| <p>Generating sample batches for computation:</p> |
| <div class="highlight-cpp notranslate"><div class="highlight"><pre><span></span><span class="linenos">173</span><span class="n">arrow</span><span class="o">::</span><span class="n">Result</span><span class="o"><</span><span class="n">BatchesWithSchema</span><span class="o">></span><span class="w"> </span><span class="n">MakeBasicBatches</span><span class="p">()</span><span class="w"> </span><span class="p">{</span> |
| <span class="linenos">174</span><span class="w"> </span><span class="n">BatchesWithSchema</span><span class="w"> </span><span class="n">out</span><span class="p">;</span> |
| <span class="linenos">175</span><span class="w"> </span><span class="k">auto</span><span class="w"> </span><span class="n">field_vector</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="p">{</span><span class="n">arrow</span><span class="o">::</span><span class="n">field</span><span class="p">(</span><span class="s">"a"</span><span class="p">,</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">int32</span><span class="p">()),</span> |
| <span class="linenos">176</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">field</span><span class="p">(</span><span class="s">"b"</span><span class="p">,</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">boolean</span><span class="p">())};</span> |
| <span class="linenos">177</span><span class="w"> </span><span class="n">ARROW_ASSIGN_OR_RAISE</span><span class="p">(</span><span class="k">auto</span><span class="w"> </span><span class="n">b1_int</span><span class="p">,</span><span class="w"> </span><span class="n">GetArrayDataSample</span><span class="o"><</span><span class="n">arrow</span><span class="o">::</span><span class="n">Int32Type</span><span class="o">></span><span class="p">({</span><span class="mi">0</span><span class="p">,</span><span class="w"> </span><span class="mi">4</span><span class="p">}));</span> |
| <span class="linenos">178</span><span class="w"> </span><span class="n">ARROW_ASSIGN_OR_RAISE</span><span class="p">(</span><span class="k">auto</span><span class="w"> </span><span class="n">b2_int</span><span class="p">,</span><span class="w"> </span><span class="n">GetArrayDataSample</span><span class="o"><</span><span class="n">arrow</span><span class="o">::</span><span class="n">Int32Type</span><span class="o">></span><span class="p">({</span><span class="mi">5</span><span class="p">,</span><span class="w"> </span><span class="mi">6</span><span class="p">,</span><span class="w"> </span><span class="mi">7</span><span class="p">}));</span> |
| <span class="linenos">179</span><span class="w"> </span><span class="n">ARROW_ASSIGN_OR_RAISE</span><span class="p">(</span><span class="k">auto</span><span class="w"> </span><span class="n">b3_int</span><span class="p">,</span><span class="w"> </span><span class="n">GetArrayDataSample</span><span class="o"><</span><span class="n">arrow</span><span class="o">::</span><span class="n">Int32Type</span><span class="o">></span><span class="p">({</span><span class="mi">8</span><span class="p">,</span><span class="w"> </span><span class="mi">9</span><span class="p">,</span><span class="w"> </span><span class="mi">10</span><span class="p">}));</span> |
| <span class="linenos">180</span> |
| <span class="linenos">181</span><span class="w"> </span><span class="n">ARROW_ASSIGN_OR_RAISE</span><span class="p">(</span><span class="k">auto</span><span class="w"> </span><span class="n">b1_bool</span><span class="p">,</span> |
| <span class="linenos">182</span><span class="w"> </span><span class="n">GetArrayDataSample</span><span class="o"><</span><span class="n">arrow</span><span class="o">::</span><span class="n">BooleanType</span><span class="o">></span><span class="p">({</span><span class="nb">false</span><span class="p">,</span><span class="w"> </span><span class="nb">true</span><span class="p">}));</span> |
| <span class="linenos">183</span><span class="w"> </span><span class="n">ARROW_ASSIGN_OR_RAISE</span><span class="p">(</span><span class="k">auto</span><span class="w"> </span><span class="n">b2_bool</span><span class="p">,</span> |
| <span class="linenos">184</span><span class="w"> </span><span class="n">GetArrayDataSample</span><span class="o"><</span><span class="n">arrow</span><span class="o">::</span><span class="n">BooleanType</span><span class="o">></span><span class="p">({</span><span class="nb">true</span><span class="p">,</span><span class="w"> </span><span class="nb">false</span><span class="p">,</span><span class="w"> </span><span class="nb">true</span><span class="p">}));</span> |
| <span class="linenos">185</span><span class="w"> </span><span class="n">ARROW_ASSIGN_OR_RAISE</span><span class="p">(</span><span class="k">auto</span><span class="w"> </span><span class="n">b3_bool</span><span class="p">,</span> |
| <span class="linenos">186</span><span class="w"> </span><span class="n">GetArrayDataSample</span><span class="o"><</span><span class="n">arrow</span><span class="o">::</span><span class="n">BooleanType</span><span class="o">></span><span class="p">({</span><span class="nb">false</span><span class="p">,</span><span class="w"> </span><span class="nb">true</span><span class="p">,</span><span class="w"> </span><span class="nb">false</span><span class="p">}));</span> |
| <span class="linenos">187</span> |
| <span class="linenos">188</span><span class="w"> </span><span class="n">ARROW_ASSIGN_OR_RAISE</span><span class="p">(</span><span class="k">auto</span><span class="w"> </span><span class="n">b1</span><span class="p">,</span> |
| <span class="linenos">189</span><span class="w"> </span><span class="n">GetExecBatchFromVectors</span><span class="p">(</span><span class="n">field_vector</span><span class="p">,</span><span class="w"> </span><span class="p">{</span><span class="n">b1_int</span><span class="p">,</span><span class="w"> </span><span class="n">b1_bool</span><span class="p">}));</span> |
| <span class="linenos">190</span><span class="w"> </span><span class="n">ARROW_ASSIGN_OR_RAISE</span><span class="p">(</span><span class="k">auto</span><span class="w"> </span><span class="n">b2</span><span class="p">,</span> |
| <span class="linenos">191</span><span class="w"> </span><span class="n">GetExecBatchFromVectors</span><span class="p">(</span><span class="n">field_vector</span><span class="p">,</span><span class="w"> </span><span class="p">{</span><span class="n">b2_int</span><span class="p">,</span><span class="w"> </span><span class="n">b2_bool</span><span class="p">}));</span> |
| <span class="linenos">192</span><span class="w"> </span><span class="n">ARROW_ASSIGN_OR_RAISE</span><span class="p">(</span><span class="k">auto</span><span class="w"> </span><span class="n">b3</span><span class="p">,</span> |
| <span class="linenos">193</span><span class="w"> </span><span class="n">GetExecBatchFromVectors</span><span class="p">(</span><span class="n">field_vector</span><span class="p">,</span><span class="w"> </span><span class="p">{</span><span class="n">b3_int</span><span class="p">,</span><span class="w"> </span><span class="n">b3_bool</span><span class="p">}));</span> |
| <span class="linenos">194</span> |
| <span class="linenos">195</span><span class="w"> </span><span class="n">out</span><span class="p">.</span><span class="n">batches</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="p">{</span><span class="n">b1</span><span class="p">,</span><span class="w"> </span><span class="n">b2</span><span class="p">,</span><span class="w"> </span><span class="n">b3</span><span class="p">};</span> |
| <span class="linenos">196</span><span class="w"> </span><span class="n">out</span><span class="p">.</span><span class="n">schema</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">schema</span><span class="p">(</span><span class="n">field_vector</span><span class="p">);</span> |
| <span class="linenos">197</span><span class="w"> </span><span class="k">return</span><span class="w"> </span><span class="n">out</span><span class="p">;</span> |
| <span class="linenos">198</span><span class="p">}</span> |
| </pre></div> |
| </div> |
| <p>Example of using <code class="docutils literal notranslate"><span class="pre">source</span></code> (usage of sink is explained in detail in <a class="reference internal" href="#stream-execution-sink-docs"><span class="std std-ref">sink</span></a>):</p> |
| <div class="highlight-cpp notranslate"><div class="highlight"><pre><span></span><span class="linenos">294</span><span class="c1">/// \brief An example demonstrating a source and sink node</span> |
| <span class="linenos">295</span><span class="c1">///</span> |
| <span class="linenos">296</span><span class="c1">/// Source-Table Example</span> |
| <span class="linenos">297</span><span class="c1">/// This example shows how a custom source can be used</span> |
| <span class="linenos">298</span><span class="c1">/// in an execution plan. This includes source node using pregenerated</span> |
| <span class="linenos">299</span><span class="c1">/// data and collecting it into a table.</span> |
| <span class="linenos">300</span><span class="c1">///</span> |
| <span class="linenos">301</span><span class="c1">/// This sort of custom souce is often not needed. In most cases you can</span> |
| <span class="linenos">302</span><span class="c1">/// use a scan (for a dataset source) or a source like table_source, array_vector_source,</span> |
| <span class="linenos">303</span><span class="c1">/// exec_batch_source, or record_batch_source (for in-memory data)</span> |
| <span class="linenos">304</span><span class="n">arrow</span><span class="o">::</span><span class="n">Status</span><span class="w"> </span><span class="nf">SourceSinkExample</span><span class="p">()</span><span class="w"> </span><span class="p">{</span> |
| <span class="linenos">305</span><span class="w"> </span><span class="n">ARROW_ASSIGN_OR_RAISE</span><span class="p">(</span><span class="k">auto</span><span class="w"> </span><span class="n">basic_data</span><span class="p">,</span><span class="w"> </span><span class="n">MakeBasicBatches</span><span class="p">());</span> |
| <span class="linenos">306</span> |
| <span class="linenos">307</span><span class="w"> </span><span class="k">auto</span><span class="w"> </span><span class="n">source_node_options</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">SourceNodeOptions</span><span class="p">{</span><span class="n">basic_data</span><span class="p">.</span><span class="n">schema</span><span class="p">,</span><span class="w"> </span><span class="n">basic_data</span><span class="p">.</span><span class="n">gen</span><span class="p">()};</span> |
| <span class="linenos">308</span> |
| <span class="linenos">309</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">Declaration</span><span class="w"> </span><span class="n">source</span><span class="p">{</span><span class="s">"source"</span><span class="p">,</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">move</span><span class="p">(</span><span class="n">source_node_options</span><span class="p">)};</span> |
| <span class="linenos">310</span> |
| <span class="linenos">311</span><span class="w"> </span><span class="k">return</span><span class="w"> </span><span class="n">ExecutePlanAndCollectAsTable</span><span class="p">(</span><span class="n">std</span><span class="o">::</span><span class="n">move</span><span class="p">(</span><span class="n">source</span><span class="p">));</span> |
| <span class="linenos">312</span><span class="p">}</span> |
| </pre></div> |
| </div> |
| </section> |
| <section id="table-source"> |
| <h3><code class="docutils literal notranslate"><span class="pre">table_source</span></code><a class="headerlink" href="#table-source" title="Permalink to this heading">¶</a></h3> |
| <p id="stream-execution-table-source-docs">In the previous example, <a class="reference internal" href="#stream-execution-source-docs"><span class="std std-ref">source node</span></a>, a source node |
| was used to input the data. But when developing an application, if the data is already in memory |
| as a table, it is much easier, and more performant to use <code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">arrow::compute::TableSourceNodeOptions</span></code>. |
| Here the input data can be passed as a <code class="docutils literal notranslate"><span class="pre">std::shared_ptr<arrow::Table></span></code> along with a <code class="docutils literal notranslate"><span class="pre">max_batch_size</span></code>. |
| The <code class="docutils literal notranslate"><span class="pre">max_batch_size</span></code> is to break up large record batches so that they can be processed in parallel. |
| It is important to note that the table batches will not get merged to form larger batches when the source |
| table has a smaller batch size.</p> |
| <p>Example of using <code class="docutils literal notranslate"><span class="pre">table_source</span></code></p> |
| <div class="highlight-cpp notranslate"><div class="highlight"><pre><span></span><span class="linenos">317</span><span class="c1">/// \brief An example showing a table source node</span> |
| <span class="linenos">318</span><span class="c1">///</span> |
| <span class="linenos">319</span><span class="c1">/// TableSource-Table Example</span> |
| <span class="linenos">320</span><span class="c1">/// This example shows how a table_source can be used</span> |
| <span class="linenos">321</span><span class="c1">/// in an execution plan. This includes a table source node</span> |
| <span class="linenos">322</span><span class="c1">/// receiving data from a table. This plan simply collects the</span> |
| <span class="linenos">323</span><span class="c1">/// data back into a table but nodes could be added that modify</span> |
| <span class="linenos">324</span><span class="c1">/// or transform the data as well (as is shown in later examples)</span> |
| <span class="linenos">325</span><span class="n">arrow</span><span class="o">::</span><span class="n">Status</span><span class="w"> </span><span class="nf">TableSourceSinkExample</span><span class="p">()</span><span class="w"> </span><span class="p">{</span> |
| <span class="linenos">326</span><span class="w"> </span><span class="n">ARROW_ASSIGN_OR_RAISE</span><span class="p">(</span><span class="k">auto</span><span class="w"> </span><span class="n">table</span><span class="p">,</span><span class="w"> </span><span class="n">GetTable</span><span class="p">());</span> |
| <span class="linenos">327</span> |
| <span class="linenos">328</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">AsyncGenerator</span><span class="o"><</span><span class="n">std</span><span class="o">::</span><span class="n">optional</span><span class="o"><</span><span class="n">cp</span><span class="o">::</span><span class="n">ExecBatch</span><span class="o">>></span><span class="w"> </span><span class="n">sink_gen</span><span class="p">;</span> |
| <span class="linenos">329</span><span class="w"> </span><span class="kt">int</span><span class="w"> </span><span class="n">max_batch_size</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="mi">2</span><span class="p">;</span> |
| <span class="linenos">330</span><span class="w"> </span><span class="k">auto</span><span class="w"> </span><span class="n">table_source_options</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">TableSourceNodeOptions</span><span class="p">{</span><span class="n">table</span><span class="p">,</span><span class="w"> </span><span class="n">max_batch_size</span><span class="p">};</span> |
| <span class="linenos">331</span> |
| <span class="linenos">332</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">Declaration</span><span class="w"> </span><span class="n">source</span><span class="p">{</span><span class="s">"table_source"</span><span class="p">,</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">move</span><span class="p">(</span><span class="n">table_source_options</span><span class="p">)};</span> |
| <span class="linenos">333</span> |
| <span class="linenos">334</span><span class="w"> </span><span class="k">return</span><span class="w"> </span><span class="n">ExecutePlanAndCollectAsTable</span><span class="p">(</span><span class="n">std</span><span class="o">::</span><span class="n">move</span><span class="p">(</span><span class="n">source</span><span class="p">));</span> |
| <span class="linenos">335</span><span class="p">}</span> |
| </pre></div> |
| </div> |
| </section> |
| <section id="filter"> |
| <span id="stream-execution-filter-docs"></span><h3><code class="docutils literal notranslate"><span class="pre">filter</span></code><a class="headerlink" href="#filter" title="Permalink to this heading">¶</a></h3> |
| <p><code class="docutils literal notranslate"><span class="pre">filter</span></code> operation, as the name suggests, provides an option to define data filtering |
| criteria. It selects rows where the given expression evaluates to true. Filters can be written using |
| <a class="reference internal" href="api/compute.html#_CPPv4N5arrow7compute10ExpressionE" title="arrow::compute::Expression"><code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">arrow::compute::Expression</span></code></a>, and the expression should have a return type of boolean. |
| For example, if we wish to keep rows where the value |
| of column <code class="docutils literal notranslate"><span class="pre">b</span></code> is greater than 3, then we can use the following expression.</p> |
| <p>Filter example:</p> |
| <div class="highlight-cpp notranslate"><div class="highlight"><pre><span></span><span class="linenos">340</span><span class="c1">/// \brief An example showing a filter node</span> |
| <span class="linenos">341</span><span class="c1">///</span> |
| <span class="linenos">342</span><span class="c1">/// Source-Filter-Table</span> |
| <span class="linenos">343</span><span class="c1">/// This example shows how a filter can be used in an execution plan,</span> |
| <span class="linenos">344</span><span class="c1">/// to filter data from a source. The output from the exeuction plan</span> |
| <span class="linenos">345</span><span class="c1">/// is collected into a table.</span> |
| <span class="linenos">346</span><span class="n">arrow</span><span class="o">::</span><span class="n">Status</span><span class="w"> </span><span class="nf">ScanFilterSinkExample</span><span class="p">()</span><span class="w"> </span><span class="p">{</span> |
| <span class="linenos">347</span><span class="w"> </span><span class="n">ARROW_ASSIGN_OR_RAISE</span><span class="p">(</span><span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o"><</span><span class="n">arrow</span><span class="o">::</span><span class="n">dataset</span><span class="o">::</span><span class="n">Dataset</span><span class="o">></span><span class="w"> </span><span class="n">dataset</span><span class="p">,</span><span class="w"> </span><span class="n">GetDataset</span><span class="p">());</span> |
| <span class="linenos">348</span> |
| <span class="linenos">349</span><span class="w"> </span><span class="k">auto</span><span class="w"> </span><span class="n">options</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">make_shared</span><span class="o"><</span><span class="n">arrow</span><span class="o">::</span><span class="n">dataset</span><span class="o">::</span><span class="n">ScanOptions</span><span class="o">></span><span class="p">();</span> |
| <span class="linenos">350</span><span class="w"> </span><span class="c1">// specify the filter. This filter removes all rows where the</span> |
| <span class="linenos">351</span><span class="w"> </span><span class="c1">// value of the "a" column is greater than 3.</span> |
| <span class="linenos">352</span><span class="w"> </span><span class="n">cp</span><span class="o">::</span><span class="n">Expression</span><span class="w"> </span><span class="n">filter_expr</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">cp</span><span class="o">::</span><span class="n">greater</span><span class="p">(</span><span class="n">cp</span><span class="o">::</span><span class="n">field_ref</span><span class="p">(</span><span class="s">"a"</span><span class="p">),</span><span class="w"> </span><span class="n">cp</span><span class="o">::</span><span class="n">literal</span><span class="p">(</span><span class="mi">3</span><span class="p">));</span> |
| <span class="linenos">353</span><span class="w"> </span><span class="c1">// set filter for scanner : on-disk / push-down filtering.</span> |
| <span class="linenos">354</span><span class="w"> </span><span class="c1">// This step can be skipped if you are not reading from disk.</span> |
| <span class="linenos">355</span><span class="w"> </span><span class="n">options</span><span class="o">-></span><span class="n">filter</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">filter_expr</span><span class="p">;</span> |
| <span class="linenos">356</span><span class="w"> </span><span class="c1">// empty projection</span> |
| <span class="linenos">357</span><span class="w"> </span><span class="n">options</span><span class="o">-></span><span class="n">projection</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">cp</span><span class="o">::</span><span class="n">project</span><span class="p">({},</span><span class="w"> </span><span class="p">{});</span> |
| <span class="linenos">358</span> |
| <span class="linenos">359</span><span class="w"> </span><span class="c1">// construct the scan node</span> |
| <span class="linenos">360</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">cout</span><span class="w"> </span><span class="o"><<</span><span class="w"> </span><span class="s">"Initialized Scanning Options"</span><span class="w"> </span><span class="o"><<</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">endl</span><span class="p">;</span> |
| <span class="linenos">361</span> |
| <span class="linenos">362</span><span class="w"> </span><span class="k">auto</span><span class="w"> </span><span class="n">scan_node_options</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">dataset</span><span class="o">::</span><span class="n">ScanNodeOptions</span><span class="p">{</span><span class="n">dataset</span><span class="p">,</span><span class="w"> </span><span class="n">options</span><span class="p">};</span> |
| <span class="linenos">363</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">cout</span><span class="w"> </span><span class="o"><<</span><span class="w"> </span><span class="s">"Scan node options created"</span><span class="w"> </span><span class="o"><<</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">endl</span><span class="p">;</span> |
| <span class="linenos">364</span> |
| <span class="linenos">365</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">Declaration</span><span class="w"> </span><span class="n">scan</span><span class="p">{</span><span class="s">"scan"</span><span class="p">,</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">move</span><span class="p">(</span><span class="n">scan_node_options</span><span class="p">)};</span> |
| <span class="linenos">366</span> |
| <span class="linenos">367</span><span class="w"> </span><span class="c1">// pipe the scan node into the filter node</span> |
| <span class="linenos">368</span><span class="w"> </span><span class="c1">// Need to set the filter in scan node options and filter node options.</span> |
| <span class="linenos">369</span><span class="w"> </span><span class="c1">// At scan node it is used for on-disk / push-down filtering.</span> |
| <span class="linenos">370</span><span class="w"> </span><span class="c1">// At filter node it is used for in-memory filtering.</span> |
| <span class="linenos">371</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">Declaration</span><span class="w"> </span><span class="n">filter</span><span class="p">{</span> |
| <span class="linenos">372</span><span class="w"> </span><span class="s">"filter"</span><span class="p">,</span><span class="w"> </span><span class="p">{</span><span class="n">std</span><span class="o">::</span><span class="n">move</span><span class="p">(</span><span class="n">scan</span><span class="p">)},</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">FilterNodeOptions</span><span class="p">(</span><span class="n">std</span><span class="o">::</span><span class="n">move</span><span class="p">(</span><span class="n">filter_expr</span><span class="p">))};</span> |
| <span class="linenos">373</span> |
| <span class="linenos">374</span><span class="w"> </span><span class="k">return</span><span class="w"> </span><span class="n">ExecutePlanAndCollectAsTable</span><span class="p">(</span><span class="n">std</span><span class="o">::</span><span class="n">move</span><span class="p">(</span><span class="n">filter</span><span class="p">));</span> |
| <span class="linenos">375</span><span class="p">}</span> |
| </pre></div> |
| </div> |
| </section> |
| <section id="project"> |
| <span id="stream-execution-project-docs"></span><h3><code class="docutils literal notranslate"><span class="pre">project</span></code><a class="headerlink" href="#project" title="Permalink to this heading">¶</a></h3> |
| <p><code class="docutils literal notranslate"><span class="pre">project</span></code> operation rearranges, deletes, transforms, and creates columns. |
| Each output column is computed by evaluating an expression |
| against the source record batch. These must be scalar expressions |
| (expressions consisting of scalar literals, field references and scalar |
| functions, i.e. elementwise functions that return one value for each input |
| row independent of the value of all other rows). |
| This is exposed via <code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">arrow::compute::ProjectNodeOptions</span></code> which requires, |
| an <a class="reference internal" href="api/compute.html#_CPPv4N5arrow7compute10ExpressionE" title="arrow::compute::Expression"><code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">arrow::compute::Expression</span></code></a> and name for each of the output columns (if names are not |
| provided, the string representations of exprs will be used).</p> |
| <p>Project example:</p> |
| <div class="highlight-cpp notranslate"><div class="highlight"><pre><span></span><span class="linenos">381</span><span class="c1">/// \brief An example showing a project node</span> |
| <span class="linenos">382</span><span class="c1">///</span> |
| <span class="linenos">383</span><span class="c1">/// Scan-Project-Table</span> |
| <span class="linenos">384</span><span class="c1">/// This example shows how a Scan operation can be used to load the data</span> |
| <span class="linenos">385</span><span class="c1">/// into the execution plan, how a project operation can be applied on the</span> |
| <span class="linenos">386</span><span class="c1">/// data stream and how the output is collected into a table</span> |
| <span class="linenos">387</span><span class="n">arrow</span><span class="o">::</span><span class="n">Status</span><span class="w"> </span><span class="nf">ScanProjectSinkExample</span><span class="p">()</span><span class="w"> </span><span class="p">{</span> |
| <span class="linenos">388</span><span class="w"> </span><span class="n">ARROW_ASSIGN_OR_RAISE</span><span class="p">(</span><span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o"><</span><span class="n">arrow</span><span class="o">::</span><span class="n">dataset</span><span class="o">::</span><span class="n">Dataset</span><span class="o">></span><span class="w"> </span><span class="n">dataset</span><span class="p">,</span><span class="w"> </span><span class="n">GetDataset</span><span class="p">());</span> |
| <span class="linenos">389</span> |
| <span class="linenos">390</span><span class="w"> </span><span class="k">auto</span><span class="w"> </span><span class="n">options</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">make_shared</span><span class="o"><</span><span class="n">arrow</span><span class="o">::</span><span class="n">dataset</span><span class="o">::</span><span class="n">ScanOptions</span><span class="o">></span><span class="p">();</span> |
| <span class="linenos">391</span><span class="w"> </span><span class="c1">// projection</span> |
| <span class="linenos">392</span><span class="w"> </span><span class="n">cp</span><span class="o">::</span><span class="n">Expression</span><span class="w"> </span><span class="n">a_times_2</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">cp</span><span class="o">::</span><span class="n">call</span><span class="p">(</span><span class="s">"multiply"</span><span class="p">,</span><span class="w"> </span><span class="p">{</span><span class="n">cp</span><span class="o">::</span><span class="n">field_ref</span><span class="p">(</span><span class="s">"a"</span><span class="p">),</span><span class="w"> </span><span class="n">cp</span><span class="o">::</span><span class="n">literal</span><span class="p">(</span><span class="mi">2</span><span class="p">)});</span> |
| <span class="linenos">393</span><span class="w"> </span><span class="n">options</span><span class="o">-></span><span class="n">projection</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">cp</span><span class="o">::</span><span class="n">project</span><span class="p">({},</span><span class="w"> </span><span class="p">{});</span> |
| <span class="linenos">394</span> |
| <span class="linenos">395</span><span class="w"> </span><span class="k">auto</span><span class="w"> </span><span class="n">scan_node_options</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">dataset</span><span class="o">::</span><span class="n">ScanNodeOptions</span><span class="p">{</span><span class="n">dataset</span><span class="p">,</span><span class="w"> </span><span class="n">options</span><span class="p">};</span> |
| <span class="linenos">396</span> |
| <span class="linenos">397</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">Declaration</span><span class="w"> </span><span class="n">scan</span><span class="p">{</span><span class="s">"scan"</span><span class="p">,</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">move</span><span class="p">(</span><span class="n">scan_node_options</span><span class="p">)};</span> |
| <span class="linenos">398</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">Declaration</span><span class="w"> </span><span class="n">project</span><span class="p">{</span> |
| <span class="linenos">399</span><span class="w"> </span><span class="s">"project"</span><span class="p">,</span><span class="w"> </span><span class="p">{</span><span class="n">std</span><span class="o">::</span><span class="n">move</span><span class="p">(</span><span class="n">scan</span><span class="p">)},</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">ProjectNodeOptions</span><span class="p">({</span><span class="n">a_times_2</span><span class="p">})};</span> |
| <span class="linenos">400</span> |
| <span class="linenos">401</span><span class="w"> </span><span class="k">return</span><span class="w"> </span><span class="n">ExecutePlanAndCollectAsTable</span><span class="p">(</span><span class="n">std</span><span class="o">::</span><span class="n">move</span><span class="p">(</span><span class="n">project</span><span class="p">));</span> |
| <span class="linenos">402</span><span class="p">}</span> |
| </pre></div> |
| </div> |
| </section> |
| <section id="aggregate"> |
| <span id="stream-execution-aggregate-docs"></span><h3><code class="docutils literal notranslate"><span class="pre">aggregate</span></code><a class="headerlink" href="#aggregate" title="Permalink to this heading">¶</a></h3> |
| <p>The <code class="docutils literal notranslate"><span class="pre">aggregate</span></code> node computes various types of aggregates over data.</p> |
| <p>Arrow supports two types of aggregates: “scalar” aggregates, and |
| “hash” aggregates. Scalar aggregates reduce an array or scalar input |
| to a single scalar output (e.g. computing the mean of a column). Hash |
| aggregates act like <code class="docutils literal notranslate"><span class="pre">GROUP</span> <span class="pre">BY</span></code> in SQL and first partition data based |
| on one or more key columns, then reduce the data in each |
| partition. The <code class="docutils literal notranslate"><span class="pre">aggregate</span></code> node supports both types of computation, |
| and can compute any number of aggregations at once.</p> |
| <p><code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">arrow::compute::AggregateNodeOptions</span></code> is used to define the |
| aggregation criteria. It takes a list of aggregation functions and |
| their options; a list of target fields to aggregate, one per function; |
| and a list of names for the output fields, one per function. |
| Optionally, it takes a list of columns that are used to partition the |
| data, in the case of a hash aggregation. The aggregation functions |
| can be selected from <a class="reference internal" href="compute.html#aggregation-option-list"><span class="std std-ref">this list of aggregation functions</span></a>.</p> |
| <div class="admonition note"> |
| <p class="admonition-title">Note</p> |
| <p>This node is a “pipeline breaker” and will fully materialize |
| the dataset in memory. In the future, spillover mechanisms |
| will be added which should alleviate this constraint.</p> |
| </div> |
| <p>The aggregation can provide results as a group or scalar. For instances, |
| an operation like <cite>hash_count</cite> provides the counts per each unique record |
| as a grouped result while an operation like <cite>sum</cite> provides a single record.</p> |
| <p>Scalar Aggregation example:</p> |
| <div class="highlight-cpp notranslate"><div class="highlight"><pre><span></span><span class="linenos">408</span><span class="c1">/// \brief An example showing an aggregation node to aggregate an entire table</span> |
| <span class="linenos">409</span><span class="c1">///</span> |
| <span class="linenos">410</span><span class="c1">/// Source-Aggregation-Table</span> |
| <span class="linenos">411</span><span class="c1">/// This example shows how an aggregation operation can be applied on a</span> |
| <span class="linenos">412</span><span class="c1">/// execution plan resulting in a scalar output. The source node loads the</span> |
| <span class="linenos">413</span><span class="c1">/// data and the aggregation (counting unique types in column 'a')</span> |
| <span class="linenos">414</span><span class="c1">/// is applied on this data. The output is collected into a table (that will</span> |
| <span class="linenos">415</span><span class="c1">/// have exactly one row)</span> |
| <span class="linenos">416</span><span class="n">arrow</span><span class="o">::</span><span class="n">Status</span><span class="w"> </span><span class="nf">SourceScalarAggregateSinkExample</span><span class="p">()</span><span class="w"> </span><span class="p">{</span> |
| <span class="linenos">417</span><span class="w"> </span><span class="n">ARROW_ASSIGN_OR_RAISE</span><span class="p">(</span><span class="k">auto</span><span class="w"> </span><span class="n">basic_data</span><span class="p">,</span><span class="w"> </span><span class="n">MakeBasicBatches</span><span class="p">());</span> |
| <span class="linenos">418</span> |
| <span class="linenos">419</span><span class="w"> </span><span class="k">auto</span><span class="w"> </span><span class="n">source_node_options</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">SourceNodeOptions</span><span class="p">{</span><span class="n">basic_data</span><span class="p">.</span><span class="n">schema</span><span class="p">,</span><span class="w"> </span><span class="n">basic_data</span><span class="p">.</span><span class="n">gen</span><span class="p">()};</span> |
| <span class="linenos">420</span> |
| <span class="linenos">421</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">Declaration</span><span class="w"> </span><span class="n">source</span><span class="p">{</span><span class="s">"source"</span><span class="p">,</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">move</span><span class="p">(</span><span class="n">source_node_options</span><span class="p">)};</span> |
| <span class="linenos">422</span><span class="w"> </span><span class="k">auto</span><span class="w"> </span><span class="n">aggregate_options</span><span class="w"> </span><span class="o">=</span> |
| <span class="linenos">423</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">AggregateNodeOptions</span><span class="p">{</span><span class="cm">/*aggregates=*/</span><span class="p">{{</span><span class="s">"sum"</span><span class="p">,</span><span class="w"> </span><span class="k">nullptr</span><span class="p">,</span><span class="w"> </span><span class="s">"a"</span><span class="p">,</span><span class="w"> </span><span class="s">"sum(a)"</span><span class="p">}}};</span> |
| <span class="linenos">424</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">Declaration</span><span class="w"> </span><span class="n">aggregate</span><span class="p">{</span> |
| <span class="linenos">425</span><span class="w"> </span><span class="s">"aggregate"</span><span class="p">,</span><span class="w"> </span><span class="p">{</span><span class="n">std</span><span class="o">::</span><span class="n">move</span><span class="p">(</span><span class="n">source</span><span class="p">)},</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">move</span><span class="p">(</span><span class="n">aggregate_options</span><span class="p">)};</span> |
| <span class="linenos">426</span> |
| <span class="linenos">427</span><span class="w"> </span><span class="k">return</span><span class="w"> </span><span class="n">ExecutePlanAndCollectAsTable</span><span class="p">(</span><span class="n">std</span><span class="o">::</span><span class="n">move</span><span class="p">(</span><span class="n">aggregate</span><span class="p">));</span> |
| <span class="linenos">428</span><span class="p">}</span> |
| </pre></div> |
| </div> |
| <p>Group Aggregation example:</p> |
| <div class="highlight-cpp notranslate"><div class="highlight"><pre><span></span><span class="linenos">433</span><span class="c1">/// \brief An example showing an aggregation node to perform a group-by operation</span> |
| <span class="linenos">434</span><span class="c1">///</span> |
| <span class="linenos">435</span><span class="c1">/// Source-Aggregation-Table</span> |
| <span class="linenos">436</span><span class="c1">/// This example shows how an aggregation operation can be applied on a</span> |
| <span class="linenos">437</span><span class="c1">/// execution plan resulting in grouped output. The source node loads the</span> |
| <span class="linenos">438</span><span class="c1">/// data and the aggregation (counting unique types in column 'a') is</span> |
| <span class="linenos">439</span><span class="c1">/// applied on this data. The output is collected into a table that will contain</span> |
| <span class="linenos">440</span><span class="c1">/// one row for each unique combination of group keys.</span> |
| <span class="linenos">441</span><span class="n">arrow</span><span class="o">::</span><span class="n">Status</span><span class="w"> </span><span class="nf">SourceGroupAggregateSinkExample</span><span class="p">()</span><span class="w"> </span><span class="p">{</span> |
| <span class="linenos">442</span><span class="w"> </span><span class="n">ARROW_ASSIGN_OR_RAISE</span><span class="p">(</span><span class="k">auto</span><span class="w"> </span><span class="n">basic_data</span><span class="p">,</span><span class="w"> </span><span class="n">MakeBasicBatches</span><span class="p">());</span> |
| <span class="linenos">443</span> |
| <span class="linenos">444</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">AsyncGenerator</span><span class="o"><</span><span class="n">std</span><span class="o">::</span><span class="n">optional</span><span class="o"><</span><span class="n">cp</span><span class="o">::</span><span class="n">ExecBatch</span><span class="o">>></span><span class="w"> </span><span class="n">sink_gen</span><span class="p">;</span> |
| <span class="linenos">445</span> |
| <span class="linenos">446</span><span class="w"> </span><span class="k">auto</span><span class="w"> </span><span class="n">source_node_options</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">SourceNodeOptions</span><span class="p">{</span><span class="n">basic_data</span><span class="p">.</span><span class="n">schema</span><span class="p">,</span><span class="w"> </span><span class="n">basic_data</span><span class="p">.</span><span class="n">gen</span><span class="p">()};</span> |
| <span class="linenos">447</span> |
| <span class="linenos">448</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">Declaration</span><span class="w"> </span><span class="n">source</span><span class="p">{</span><span class="s">"source"</span><span class="p">,</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">move</span><span class="p">(</span><span class="n">source_node_options</span><span class="p">)};</span> |
| <span class="linenos">449</span><span class="w"> </span><span class="k">auto</span><span class="w"> </span><span class="n">options</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">make_shared</span><span class="o"><</span><span class="n">cp</span><span class="o">::</span><span class="n">CountOptions</span><span class="o">></span><span class="p">(</span><span class="n">cp</span><span class="o">::</span><span class="n">CountOptions</span><span class="o">::</span><span class="n">ONLY_VALID</span><span class="p">);</span> |
| <span class="linenos">450</span><span class="w"> </span><span class="k">auto</span><span class="w"> </span><span class="n">aggregate_options</span><span class="w"> </span><span class="o">=</span> |
| <span class="linenos">451</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">AggregateNodeOptions</span><span class="p">{</span><span class="cm">/*aggregates=*/</span><span class="p">{{</span><span class="s">"hash_count"</span><span class="p">,</span><span class="w"> </span><span class="n">options</span><span class="p">,</span><span class="w"> </span><span class="s">"a"</span><span class="p">,</span><span class="w"> </span><span class="s">"count(a)"</span><span class="p">}},</span> |
| <span class="linenos">452</span><span class="w"> </span><span class="cm">/*keys=*/</span><span class="p">{</span><span class="s">"b"</span><span class="p">}};</span> |
| <span class="linenos">453</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">Declaration</span><span class="w"> </span><span class="n">aggregate</span><span class="p">{</span> |
| <span class="linenos">454</span><span class="w"> </span><span class="s">"aggregate"</span><span class="p">,</span><span class="w"> </span><span class="p">{</span><span class="n">std</span><span class="o">::</span><span class="n">move</span><span class="p">(</span><span class="n">source</span><span class="p">)},</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">move</span><span class="p">(</span><span class="n">aggregate_options</span><span class="p">)};</span> |
| <span class="linenos">455</span> |
| <span class="linenos">456</span><span class="w"> </span><span class="k">return</span><span class="w"> </span><span class="n">ExecutePlanAndCollectAsTable</span><span class="p">(</span><span class="n">std</span><span class="o">::</span><span class="n">move</span><span class="p">(</span><span class="n">aggregate</span><span class="p">));</span> |
| <span class="linenos">457</span><span class="p">}</span> |
| </pre></div> |
| </div> |
| </section> |
| <section id="sink"> |
| <span id="stream-execution-sink-docs"></span><h3><code class="docutils literal notranslate"><span class="pre">sink</span></code><a class="headerlink" href="#sink" title="Permalink to this heading">¶</a></h3> |
| <p><code class="docutils literal notranslate"><span class="pre">sink</span></code> operation provides output and is the final node of a streaming |
| execution definition. <code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">arrow::compute::SinkNodeOptions</span></code> interface is used to pass |
| the required options. Similar to the source operator the sink operator exposes the output |
| with a function that returns a record batch future each time it is called. It is expected the |
| caller will repeatedly call this function until the generator function is exhausted (returns |
| <code class="docutils literal notranslate"><span class="pre">std::optional::nullopt</span></code>). If this function is not called often enough then record batches |
| will accumulate in memory. An execution plan should only have one |
| “terminal” node (one sink node). An <code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">ExecPlan</span></code> can terminate early due to cancellation or |
| an error, before the output is fully consumed. However, the plan can be safely destroyed independently |
| of the sink, which will hold the unconsumed batches by <cite>exec_plan->finished()</cite>.</p> |
| <p>As a part of the Source Example, the Sink operation is also included;</p> |
| <div class="highlight-cpp notranslate"><div class="highlight"><pre><span></span><span class="linenos">294</span><span class="c1">/// \brief An example demonstrating a source and sink node</span> |
| <span class="linenos">295</span><span class="c1">///</span> |
| <span class="linenos">296</span><span class="c1">/// Source-Table Example</span> |
| <span class="linenos">297</span><span class="c1">/// This example shows how a custom source can be used</span> |
| <span class="linenos">298</span><span class="c1">/// in an execution plan. This includes source node using pregenerated</span> |
| <span class="linenos">299</span><span class="c1">/// data and collecting it into a table.</span> |
| <span class="linenos">300</span><span class="c1">///</span> |
| <span class="linenos">301</span><span class="c1">/// This sort of custom souce is often not needed. In most cases you can</span> |
| <span class="linenos">302</span><span class="c1">/// use a scan (for a dataset source) or a source like table_source, array_vector_source,</span> |
| <span class="linenos">303</span><span class="c1">/// exec_batch_source, or record_batch_source (for in-memory data)</span> |
| <span class="linenos">304</span><span class="n">arrow</span><span class="o">::</span><span class="n">Status</span><span class="w"> </span><span class="nf">SourceSinkExample</span><span class="p">()</span><span class="w"> </span><span class="p">{</span> |
| <span class="linenos">305</span><span class="w"> </span><span class="n">ARROW_ASSIGN_OR_RAISE</span><span class="p">(</span><span class="k">auto</span><span class="w"> </span><span class="n">basic_data</span><span class="p">,</span><span class="w"> </span><span class="n">MakeBasicBatches</span><span class="p">());</span> |
| <span class="linenos">306</span> |
| <span class="linenos">307</span><span class="w"> </span><span class="k">auto</span><span class="w"> </span><span class="n">source_node_options</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">SourceNodeOptions</span><span class="p">{</span><span class="n">basic_data</span><span class="p">.</span><span class="n">schema</span><span class="p">,</span><span class="w"> </span><span class="n">basic_data</span><span class="p">.</span><span class="n">gen</span><span class="p">()};</span> |
| <span class="linenos">308</span> |
| <span class="linenos">309</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">Declaration</span><span class="w"> </span><span class="n">source</span><span class="p">{</span><span class="s">"source"</span><span class="p">,</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">move</span><span class="p">(</span><span class="n">source_node_options</span><span class="p">)};</span> |
| <span class="linenos">310</span> |
| <span class="linenos">311</span><span class="w"> </span><span class="k">return</span><span class="w"> </span><span class="n">ExecutePlanAndCollectAsTable</span><span class="p">(</span><span class="n">std</span><span class="o">::</span><span class="n">move</span><span class="p">(</span><span class="n">source</span><span class="p">));</span> |
| <span class="linenos">312</span><span class="p">}</span> |
| </pre></div> |
| </div> |
| </section> |
| <section id="consuming-sink"> |
| <span id="stream-execution-consuming-sink-docs"></span><h3><code class="docutils literal notranslate"><span class="pre">consuming_sink</span></code><a class="headerlink" href="#consuming-sink" title="Permalink to this heading">¶</a></h3> |
| <p><code class="docutils literal notranslate"><span class="pre">consuming_sink</span></code> operator is a sink operation containing consuming operation within the |
| execution plan (i.e. the exec plan should not complete until the consumption has completed). |
| Unlike the <code class="docutils literal notranslate"><span class="pre">sink</span></code> node this node takes in a callback function that is expected to consume the |
| batch. Once this callback has finished the execution plan will no longer hold any reference to |
| the batch. |
| The consuming function may be called before a previous invocation has completed. If the consuming |
| function does not run quickly enough then many concurrent executions could pile up, blocking the |
| CPU thread pool. The execution plan will not be marked finished until all consuming function callbacks |
| have been completed. |
| Once all batches have been delivered the execution plan will wait for the <cite>finish</cite> future to complete |
| before marking the execution plan finished. This allows for workflows where the consumption function |
| converts batches into async tasks (this is currently done internally for the dataset write node).</p> |
| <p>Example:</p> |
| <div class="highlight-cpp notranslate"><div class="highlight"><pre><span></span><span class="c1">// define a Custom SinkNodeConsumer</span> |
| <span class="n">std</span><span class="o">::</span><span class="n">atomic</span><span class="o"><</span><span class="kt">uint32_t</span><span class="o">></span><span class="w"> </span><span class="n">batches_seen</span><span class="p">{</span><span class="mi">0</span><span class="p">};</span> |
| <span class="n">arrow</span><span class="o">::</span><span class="n">Future</span><span class="o"><></span><span class="w"> </span><span class="n">finish</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">Future</span><span class="o"><>::</span><span class="n">Make</span><span class="p">();</span> |
| <span class="k">struct</span><span class="w"> </span><span class="nc">CustomSinkNodeConsumer</span><span class="w"> </span><span class="o">:</span><span class="w"> </span><span class="k">public</span><span class="w"> </span><span class="n">cp</span><span class="o">::</span><span class="n">SinkNodeConsumer</span><span class="w"> </span><span class="p">{</span> |
| |
| <span class="w"> </span><span class="n">CustomSinkNodeConsumer</span><span class="p">(</span><span class="n">std</span><span class="o">::</span><span class="n">atomic</span><span class="o"><</span><span class="kt">uint32_t</span><span class="o">></span><span class="w"> </span><span class="o">*</span><span class="n">batches_seen</span><span class="p">,</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">Future</span><span class="o"><></span><span class="n">finish</span><span class="p">)</span><span class="o">:</span> |
| <span class="w"> </span><span class="n">batches_seen</span><span class="p">(</span><span class="n">batches_seen</span><span class="p">),</span><span class="w"> </span><span class="n">finish</span><span class="p">(</span><span class="n">std</span><span class="o">::</span><span class="n">move</span><span class="p">(</span><span class="n">finish</span><span class="p">))</span><span class="w"> </span><span class="p">{}</span> |
| <span class="w"> </span><span class="c1">// Consumption logic can be written here</span> |
| <span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">Status</span><span class="w"> </span><span class="n">Consume</span><span class="p">(</span><span class="n">cp</span><span class="o">::</span><span class="n">ExecBatch</span><span class="w"> </span><span class="n">batch</span><span class="p">)</span><span class="w"> </span><span class="k">override</span><span class="w"> </span><span class="p">{</span> |
| <span class="w"> </span><span class="c1">// data can be consumed in the expected way</span> |
| <span class="w"> </span><span class="c1">// transfer to another system or just do some work</span> |
| <span class="w"> </span><span class="c1">// and write to disk</span> |
| <span class="w"> </span><span class="p">(</span><span class="o">*</span><span class="n">batches_seen</span><span class="p">)</span><span class="o">++</span><span class="p">;</span> |
| <span class="w"> </span><span class="k">return</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">Status</span><span class="o">::</span><span class="n">OK</span><span class="p">();</span> |
| <span class="w"> </span><span class="p">}</span> |
| |
| <span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">Future</span><span class="o"><></span><span class="w"> </span><span class="n">Finish</span><span class="p">()</span><span class="w"> </span><span class="k">override</span><span class="w"> </span><span class="p">{</span><span class="w"> </span><span class="k">return</span><span class="w"> </span><span class="n">finish</span><span class="p">;</span><span class="w"> </span><span class="p">}</span> |
| |
| <span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">atomic</span><span class="o"><</span><span class="kt">uint32_t</span><span class="o">></span><span class="w"> </span><span class="o">*</span><span class="n">batches_seen</span><span class="p">;</span> |
| <span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">Future</span><span class="o"><></span><span class="w"> </span><span class="n">finish</span><span class="p">;</span> |
| |
| <span class="p">};</span> |
| |
| <span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o"><</span><span class="n">CustomSinkNodeConsumer</span><span class="o">></span><span class="w"> </span><span class="n">consumer</span><span class="w"> </span><span class="o">=</span> |
| <span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">make_shared</span><span class="o"><</span><span class="n">CustomSinkNodeConsumer</span><span class="o">></span><span class="p">(</span><span class="o">&</span><span class="n">batches_seen</span><span class="p">,</span><span class="w"> </span><span class="n">finish</span><span class="p">);</span> |
| |
| <span class="n">arrow</span><span class="o">::</span><span class="n">compute</span><span class="o">::</span><span class="n">ExecNode</span><span class="w"> </span><span class="o">*</span><span class="n">consuming_sink</span><span class="p">;</span> |
| |
| <span class="n">ARROW_ASSIGN_OR_RAISE</span><span class="p">(</span><span class="n">consuming_sink</span><span class="p">,</span><span class="w"> </span><span class="n">MakeExecNode</span><span class="p">(</span><span class="s">"consuming_sink"</span><span class="p">,</span><span class="w"> </span><span class="n">plan</span><span class="p">.</span><span class="n">get</span><span class="p">(),</span> |
| <span class="w"> </span><span class="p">{</span><span class="n">source</span><span class="p">},</span><span class="w"> </span><span class="n">cp</span><span class="o">::</span><span class="n">ConsumingSinkNodeOptions</span><span class="p">(</span><span class="n">consumer</span><span class="p">)));</span> |
| </pre></div> |
| </div> |
| <p>Consuming-Sink example:</p> |
| <div class="highlight-cpp notranslate"><div class="highlight"><pre><span></span><span class="linenos">462</span><span class="c1">/// \brief An example showing a consuming sink node</span> |
| <span class="linenos">463</span><span class="c1">///</span> |
| <span class="linenos">464</span><span class="c1">/// Source-Consuming-Sink</span> |
| <span class="linenos">465</span><span class="c1">/// This example shows how the data can be consumed within the execution plan</span> |
| <span class="linenos">466</span><span class="c1">/// by using a ConsumingSink node. There is no data output from this execution plan.</span> |
| <span class="linenos">467</span><span class="n">arrow</span><span class="o">::</span><span class="n">Status</span><span class="w"> </span><span class="nf">SourceConsumingSinkExample</span><span class="p">()</span><span class="w"> </span><span class="p">{</span> |
| <span class="linenos">468</span><span class="w"> </span><span class="n">ARROW_ASSIGN_OR_RAISE</span><span class="p">(</span><span class="k">auto</span><span class="w"> </span><span class="n">basic_data</span><span class="p">,</span><span class="w"> </span><span class="n">MakeBasicBatches</span><span class="p">());</span> |
| <span class="linenos">469</span> |
| <span class="linenos">470</span><span class="w"> </span><span class="k">auto</span><span class="w"> </span><span class="n">source_node_options</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">SourceNodeOptions</span><span class="p">{</span><span class="n">basic_data</span><span class="p">.</span><span class="n">schema</span><span class="p">,</span><span class="w"> </span><span class="n">basic_data</span><span class="p">.</span><span class="n">gen</span><span class="p">()};</span> |
| <span class="linenos">471</span> |
| <span class="linenos">472</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">Declaration</span><span class="w"> </span><span class="n">source</span><span class="p">{</span><span class="s">"source"</span><span class="p">,</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">move</span><span class="p">(</span><span class="n">source_node_options</span><span class="p">)};</span> |
| <span class="linenos">473</span> |
| <span class="linenos">474</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">atomic</span><span class="o"><</span><span class="kt">uint32_t</span><span class="o">></span><span class="w"> </span><span class="n">batches_seen</span><span class="p">{</span><span class="mi">0</span><span class="p">};</span> |
| <span class="linenos">475</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">Future</span><span class="o"><></span><span class="w"> </span><span class="n">finish</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">Future</span><span class="o"><>::</span><span class="n">Make</span><span class="p">();</span> |
| <span class="linenos">476</span><span class="w"> </span><span class="k">struct</span><span class="w"> </span><span class="nc">CustomSinkNodeConsumer</span><span class="w"> </span><span class="o">:</span><span class="w"> </span><span class="k">public</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">SinkNodeConsumer</span><span class="w"> </span><span class="p">{</span> |
| <span class="linenos">477</span><span class="w"> </span><span class="n">CustomSinkNodeConsumer</span><span class="p">(</span><span class="n">std</span><span class="o">::</span><span class="n">atomic</span><span class="o"><</span><span class="kt">uint32_t</span><span class="o">>*</span><span class="w"> </span><span class="n">batches_seen</span><span class="p">,</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">Future</span><span class="o"><></span><span class="w"> </span><span class="n">finish</span><span class="p">)</span> |
| <span class="linenos">478</span><span class="w"> </span><span class="o">:</span><span class="w"> </span><span class="n">batches_seen</span><span class="p">(</span><span class="n">batches_seen</span><span class="p">),</span><span class="w"> </span><span class="n">finish</span><span class="p">(</span><span class="n">std</span><span class="o">::</span><span class="n">move</span><span class="p">(</span><span class="n">finish</span><span class="p">))</span><span class="w"> </span><span class="p">{}</span> |
| <span class="linenos">479</span> |
| <span class="linenos">480</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">Status</span><span class="w"> </span><span class="n">Init</span><span class="p">(</span><span class="k">const</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o"><</span><span class="n">arrow</span><span class="o">::</span><span class="n">Schema</span><span class="o">>&</span><span class="w"> </span><span class="n">schema</span><span class="p">,</span> |
| <span class="linenos">481</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">BackpressureControl</span><span class="o">*</span><span class="w"> </span><span class="n">backpressure_control</span><span class="p">,</span> |
| <span class="linenos">482</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">ExecPlan</span><span class="o">*</span><span class="w"> </span><span class="n">plan</span><span class="p">)</span><span class="w"> </span><span class="k">override</span><span class="w"> </span><span class="p">{</span> |
| <span class="linenos">483</span><span class="w"> </span><span class="c1">// This will be called as the plan is started (before the first call to Consume)</span> |
| <span class="linenos">484</span><span class="w"> </span><span class="c1">// and provides the schema of the data coming into the node, controls for pausing /</span> |
| <span class="linenos">485</span><span class="w"> </span><span class="c1">// resuming input, and a pointer to the plan itself which can be used to access</span> |
| <span class="linenos">486</span><span class="w"> </span><span class="c1">// other utilities such as the thread indexer or async task scheduler.</span> |
| <span class="linenos">487</span><span class="w"> </span><span class="k">return</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">Status</span><span class="o">::</span><span class="n">OK</span><span class="p">();</span> |
| <span class="linenos">488</span><span class="w"> </span><span class="p">}</span> |
| <span class="linenos">489</span> |
| <span class="linenos">490</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">Status</span><span class="w"> </span><span class="n">Consume</span><span class="p">(</span><span class="n">cp</span><span class="o">::</span><span class="n">ExecBatch</span><span class="w"> </span><span class="n">batch</span><span class="p">)</span><span class="w"> </span><span class="k">override</span><span class="w"> </span><span class="p">{</span> |
| <span class="linenos">491</span><span class="w"> </span><span class="p">(</span><span class="o">*</span><span class="n">batches_seen</span><span class="p">)</span><span class="o">++</span><span class="p">;</span> |
| <span class="linenos">492</span><span class="w"> </span><span class="k">return</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">Status</span><span class="o">::</span><span class="n">OK</span><span class="p">();</span> |
| <span class="linenos">493</span><span class="w"> </span><span class="p">}</span> |
| <span class="linenos">494</span> |
| <span class="linenos">495</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">Future</span><span class="o"><></span><span class="w"> </span><span class="n">Finish</span><span class="p">()</span><span class="w"> </span><span class="k">override</span><span class="w"> </span><span class="p">{</span> |
| <span class="linenos">496</span><span class="w"> </span><span class="c1">// Here you can perform whatever (possibly async) cleanup is needed, e.g. closing</span> |
| <span class="linenos">497</span><span class="w"> </span><span class="c1">// output file handles and flushing remaining work</span> |
| <span class="linenos">498</span><span class="w"> </span><span class="k">return</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">Future</span><span class="o"><>::</span><span class="n">MakeFinished</span><span class="p">();</span> |
| <span class="linenos">499</span><span class="w"> </span><span class="p">}</span> |
| <span class="linenos">500</span> |
| <span class="linenos">501</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">atomic</span><span class="o"><</span><span class="kt">uint32_t</span><span class="o">>*</span><span class="w"> </span><span class="n">batches_seen</span><span class="p">;</span> |
| <span class="linenos">502</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">Future</span><span class="o"><></span><span class="w"> </span><span class="n">finish</span><span class="p">;</span> |
| <span class="linenos">503</span><span class="w"> </span><span class="p">};</span> |
| <span class="linenos">504</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o"><</span><span class="n">CustomSinkNodeConsumer</span><span class="o">></span><span class="w"> </span><span class="n">consumer</span><span class="w"> </span><span class="o">=</span> |
| <span class="linenos">505</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">make_shared</span><span class="o"><</span><span class="n">CustomSinkNodeConsumer</span><span class="o">></span><span class="p">(</span><span class="o">&</span><span class="n">batches_seen</span><span class="p">,</span><span class="w"> </span><span class="n">finish</span><span class="p">);</span> |
| <span class="linenos">506</span> |
| <span class="linenos">507</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">Declaration</span><span class="w"> </span><span class="n">consuming_sink</span><span class="p">{</span><span class="s">"consuming_sink"</span><span class="p">,</span> |
| <span class="linenos">508</span><span class="w"> </span><span class="p">{</span><span class="n">std</span><span class="o">::</span><span class="n">move</span><span class="p">(</span><span class="n">source</span><span class="p">)},</span> |
| <span class="linenos">509</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">ConsumingSinkNodeOptions</span><span class="p">(</span><span class="n">std</span><span class="o">::</span><span class="n">move</span><span class="p">(</span><span class="n">consumer</span><span class="p">))};</span> |
| <span class="linenos">510</span> |
| <span class="linenos">511</span><span class="w"> </span><span class="c1">// Since we are consuming the data within the plan there is no output and we simply</span> |
| <span class="linenos">512</span><span class="w"> </span><span class="c1">// run the plan to completion instead of collecting into a table.</span> |
| <span class="linenos">513</span><span class="w"> </span><span class="n">ARROW_RETURN_NOT_OK</span><span class="p">(</span><span class="n">ac</span><span class="o">::</span><span class="n">DeclarationToStatus</span><span class="p">(</span><span class="n">std</span><span class="o">::</span><span class="n">move</span><span class="p">(</span><span class="n">consuming_sink</span><span class="p">)));</span> |
| <span class="linenos">514</span> |
| <span class="linenos">515</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">cout</span><span class="w"> </span><span class="o"><<</span><span class="w"> </span><span class="s">"The consuming sink node saw "</span><span class="w"> </span><span class="o"><<</span><span class="w"> </span><span class="n">batches_seen</span><span class="p">.</span><span class="n">load</span><span class="p">()</span><span class="w"> </span><span class="o"><<</span><span class="w"> </span><span class="s">" batches"</span> |
| <span class="linenos">516</span><span class="w"> </span><span class="o"><<</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">endl</span><span class="p">;</span> |
| <span class="linenos">517</span><span class="w"> </span><span class="k">return</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">Status</span><span class="o">::</span><span class="n">OK</span><span class="p">();</span> |
| <span class="linenos">518</span><span class="p">}</span> |
| </pre></div> |
| </div> |
| </section> |
| <section id="order-by-sink"> |
| <span id="stream-execution-order-by-sink-docs"></span><h3><code class="docutils literal notranslate"><span class="pre">order_by_sink</span></code><a class="headerlink" href="#order-by-sink" title="Permalink to this heading">¶</a></h3> |
| <p><code class="docutils literal notranslate"><span class="pre">order_by_sink</span></code> operation is an extension to the <code class="docutils literal notranslate"><span class="pre">sink</span></code> operation. |
| This operation provides the ability to guarantee the ordering of the |
| stream by providing the <code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">arrow::compute::OrderBySinkNodeOptions</span></code>. |
| Here the <a class="reference internal" href="api/compute.html#_CPPv4N5arrow7compute11SortOptionsE" title="arrow::compute::SortOptions"><code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">arrow::compute::SortOptions</span></code></a> are provided to define which columns |
| are used for sorting and whether to sort by ascending or descending values.</p> |
| <div class="admonition note"> |
| <p class="admonition-title">Note</p> |
| <p>This node is a “pipeline breaker” and will fully materialize the dataset in memory. |
| In the future, spillover mechanisms will be added which should alleviate this |
| constraint.</p> |
| </div> |
| <p>Order-By-Sink example:</p> |
| <div class="highlight-cpp notranslate"><div class="highlight"><pre><span></span><span class="linenos">523</span><span class="n">arrow</span><span class="o">::</span><span class="n">Status</span><span class="w"> </span><span class="nf">ExecutePlanAndCollectAsTableWithCustomSink</span><span class="p">(</span> |
| <span class="linenos">524</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o"><</span><span class="n">ac</span><span class="o">::</span><span class="n">ExecPlan</span><span class="o">></span><span class="w"> </span><span class="n">plan</span><span class="p">,</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o"><</span><span class="n">arrow</span><span class="o">::</span><span class="n">Schema</span><span class="o">></span><span class="w"> </span><span class="n">schema</span><span class="p">,</span> |
| <span class="linenos">525</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">AsyncGenerator</span><span class="o"><</span><span class="n">std</span><span class="o">::</span><span class="n">optional</span><span class="o"><</span><span class="n">cp</span><span class="o">::</span><span class="n">ExecBatch</span><span class="o">>></span><span class="w"> </span><span class="n">sink_gen</span><span class="p">)</span><span class="w"> </span><span class="p">{</span> |
| <span class="linenos">526</span><span class="w"> </span><span class="c1">// translate sink_gen (async) to sink_reader (sync)</span> |
| <span class="linenos">527</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o"><</span><span class="n">arrow</span><span class="o">::</span><span class="n">RecordBatchReader</span><span class="o">></span><span class="w"> </span><span class="n">sink_reader</span><span class="w"> </span><span class="o">=</span> |
| <span class="linenos">528</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">MakeGeneratorReader</span><span class="p">(</span><span class="n">schema</span><span class="p">,</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">move</span><span class="p">(</span><span class="n">sink_gen</span><span class="p">),</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">default_memory_pool</span><span class="p">());</span> |
| <span class="linenos">529</span> |
| <span class="linenos">530</span><span class="w"> </span><span class="c1">// validate the ExecPlan</span> |
| <span class="linenos">531</span><span class="w"> </span><span class="n">ARROW_RETURN_NOT_OK</span><span class="p">(</span><span class="n">plan</span><span class="o">-></span><span class="n">Validate</span><span class="p">());</span> |
| <span class="linenos">532</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">cout</span><span class="w"> </span><span class="o"><<</span><span class="w"> </span><span class="s">"ExecPlan created : "</span><span class="w"> </span><span class="o"><<</span><span class="w"> </span><span class="n">plan</span><span class="o">-></span><span class="n">ToString</span><span class="p">()</span><span class="w"> </span><span class="o"><<</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">endl</span><span class="p">;</span> |
| <span class="linenos">533</span><span class="w"> </span><span class="c1">// start the ExecPlan</span> |
| <span class="linenos">534</span><span class="w"> </span><span class="n">plan</span><span class="o">-></span><span class="n">StartProducing</span><span class="p">();</span> |
| <span class="linenos">535</span> |
| <span class="linenos">536</span><span class="w"> </span><span class="c1">// collect sink_reader into a Table</span> |
| <span class="linenos">537</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o"><</span><span class="n">arrow</span><span class="o">::</span><span class="n">Table</span><span class="o">></span><span class="w"> </span><span class="n">response_table</span><span class="p">;</span> |
| <span class="linenos">538</span> |
| <span class="linenos">539</span><span class="w"> </span><span class="n">ARROW_ASSIGN_OR_RAISE</span><span class="p">(</span><span class="n">response_table</span><span class="p">,</span> |
| <span class="linenos">540</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">Table</span><span class="o">::</span><span class="n">FromRecordBatchReader</span><span class="p">(</span><span class="n">sink_reader</span><span class="p">.</span><span class="n">get</span><span class="p">()));</span> |
| <span class="linenos">541</span> |
| <span class="linenos">542</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">cout</span><span class="w"> </span><span class="o"><<</span><span class="w"> </span><span class="s">"Results : "</span><span class="w"> </span><span class="o"><<</span><span class="w"> </span><span class="n">response_table</span><span class="o">-></span><span class="n">ToString</span><span class="p">()</span><span class="w"> </span><span class="o"><<</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">endl</span><span class="p">;</span> |
| <span class="linenos">543</span> |
| <span class="linenos">544</span><span class="w"> </span><span class="c1">// stop producing</span> |
| <span class="linenos">545</span><span class="w"> </span><span class="n">plan</span><span class="o">-></span><span class="n">StopProducing</span><span class="p">();</span> |
| <span class="linenos">546</span><span class="w"> </span><span class="c1">// plan mark finished</span> |
| <span class="linenos">547</span><span class="w"> </span><span class="k">auto</span><span class="w"> </span><span class="n">future</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">plan</span><span class="o">-></span><span class="n">finished</span><span class="p">();</span> |
| <span class="linenos">548</span><span class="w"> </span><span class="k">return</span><span class="w"> </span><span class="n">future</span><span class="p">.</span><span class="n">status</span><span class="p">();</span> |
| <span class="linenos">549</span><span class="p">}</span> |
| <span class="linenos">550</span> |
| <span class="linenos">551</span><span class="c1">/// \brief An example showing an order-by node</span> |
| <span class="linenos">552</span><span class="c1">///</span> |
| <span class="linenos">553</span><span class="c1">/// Source-OrderBy-Sink</span> |
| <span class="linenos">554</span><span class="c1">/// In this example, the data enters through the source node</span> |
| <span class="linenos">555</span><span class="c1">/// and the data is ordered in the sink node. The order can be</span> |
| <span class="linenos">556</span><span class="c1">/// ASCENDING or DESCENDING and it is configurable. The output</span> |
| <span class="linenos">557</span><span class="c1">/// is obtained as a table from the sink node.</span> |
| <span class="linenos">558</span><span class="n">arrow</span><span class="o">::</span><span class="n">Status</span><span class="w"> </span><span class="nf">SourceOrderBySinkExample</span><span class="p">()</span><span class="w"> </span><span class="p">{</span> |
| <span class="linenos">559</span><span class="w"> </span><span class="n">ARROW_ASSIGN_OR_RAISE</span><span class="p">(</span><span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o"><</span><span class="n">ac</span><span class="o">::</span><span class="n">ExecPlan</span><span class="o">></span><span class="w"> </span><span class="n">plan</span><span class="p">,</span> |
| <span class="linenos">560</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">ExecPlan</span><span class="o">::</span><span class="n">Make</span><span class="p">(</span><span class="o">*</span><span class="n">cp</span><span class="o">::</span><span class="n">threaded_exec_context</span><span class="p">()));</span> |
| <span class="linenos">561</span> |
| <span class="linenos">562</span><span class="w"> </span><span class="n">ARROW_ASSIGN_OR_RAISE</span><span class="p">(</span><span class="k">auto</span><span class="w"> </span><span class="n">basic_data</span><span class="p">,</span><span class="w"> </span><span class="n">MakeSortTestBasicBatches</span><span class="p">());</span> |
| <span class="linenos">563</span> |
| <span class="linenos">564</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">AsyncGenerator</span><span class="o"><</span><span class="n">std</span><span class="o">::</span><span class="n">optional</span><span class="o"><</span><span class="n">cp</span><span class="o">::</span><span class="n">ExecBatch</span><span class="o">>></span><span class="w"> </span><span class="n">sink_gen</span><span class="p">;</span> |
| <span class="linenos">565</span> |
| <span class="linenos">566</span><span class="w"> </span><span class="k">auto</span><span class="w"> </span><span class="n">source_node_options</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">SourceNodeOptions</span><span class="p">{</span><span class="n">basic_data</span><span class="p">.</span><span class="n">schema</span><span class="p">,</span><span class="w"> </span><span class="n">basic_data</span><span class="p">.</span><span class="n">gen</span><span class="p">()};</span> |
| <span class="linenos">567</span><span class="w"> </span><span class="n">ARROW_ASSIGN_OR_RAISE</span><span class="p">(</span><span class="n">ac</span><span class="o">::</span><span class="n">ExecNode</span><span class="w"> </span><span class="o">*</span><span class="w"> </span><span class="n">source</span><span class="p">,</span> |
| <span class="linenos">568</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">MakeExecNode</span><span class="p">(</span><span class="s">"source"</span><span class="p">,</span><span class="w"> </span><span class="n">plan</span><span class="p">.</span><span class="n">get</span><span class="p">(),</span><span class="w"> </span><span class="p">{},</span><span class="w"> </span><span class="n">source_node_options</span><span class="p">));</span> |
| <span class="linenos">569</span> |
| <span class="linenos">570</span><span class="w"> </span><span class="n">ARROW_RETURN_NOT_OK</span><span class="p">(</span><span class="n">ac</span><span class="o">::</span><span class="n">MakeExecNode</span><span class="p">(</span> |
| <span class="linenos">571</span><span class="w"> </span><span class="s">"order_by_sink"</span><span class="p">,</span><span class="w"> </span><span class="n">plan</span><span class="p">.</span><span class="n">get</span><span class="p">(),</span><span class="w"> </span><span class="p">{</span><span class="n">source</span><span class="p">},</span> |
| <span class="linenos">572</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">OrderBySinkNodeOptions</span><span class="p">{</span> |
| <span class="linenos">573</span><span class="w"> </span><span class="n">cp</span><span class="o">::</span><span class="n">SortOptions</span><span class="p">{{</span><span class="n">cp</span><span class="o">::</span><span class="n">SortKey</span><span class="p">{</span><span class="s">"a"</span><span class="p">,</span><span class="w"> </span><span class="n">cp</span><span class="o">::</span><span class="n">SortOrder</span><span class="o">::</span><span class="n">Descending</span><span class="p">}}},</span><span class="w"> </span><span class="o">&</span><span class="n">sink_gen</span><span class="p">}));</span> |
| <span class="linenos">574</span> |
| <span class="linenos">575</span><span class="w"> </span><span class="k">return</span><span class="w"> </span><span class="n">ExecutePlanAndCollectAsTableWithCustomSink</span><span class="p">(</span><span class="n">plan</span><span class="p">,</span><span class="w"> </span><span class="n">basic_data</span><span class="p">.</span><span class="n">schema</span><span class="p">,</span><span class="w"> </span><span class="n">sink_gen</span><span class="p">);</span> |
| <span class="linenos">576</span><span class="p">}</span> |
| </pre></div> |
| </div> |
| </section> |
| <section id="select-k-sink"> |
| <span id="stream-execution-select-k-docs"></span><h3><code class="docutils literal notranslate"><span class="pre">select_k_sink</span></code><a class="headerlink" href="#select-k-sink" title="Permalink to this heading">¶</a></h3> |
| <p><code class="docutils literal notranslate"><span class="pre">select_k_sink</span></code> option enables selecting the top/bottom K elements, |
| similar to a SQL <code class="docutils literal notranslate"><span class="pre">ORDER</span> <span class="pre">BY</span> <span class="pre">...</span> <span class="pre">LIMIT</span> <span class="pre">K</span></code> clause. |
| <a class="reference internal" href="api/compute.html#_CPPv4N5arrow7compute14SelectKOptionsE" title="arrow::compute::SelectKOptions"><code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">arrow::compute::SelectKOptions</span></code></a> which is a defined by |
| using <code class="xref cpp cpp-struct docutils literal notranslate"><span class="pre">OrderBySinkNode</span></code> definition. This option returns a sink node that receives |
| inputs and then compute top_k/bottom_k.</p> |
| <div class="admonition note"> |
| <p class="admonition-title">Note</p> |
| <p>This node is a “pipeline breaker” and will fully materialize the input in memory. |
| In the future, spillover mechanisms will be added which should alleviate this |
| constraint.</p> |
| </div> |
| <p>SelectK example:</p> |
| <div class="highlight-cpp notranslate"><div class="highlight"><pre><span></span><span class="linenos">609</span><span class="c1">/// \brief An example showing a select-k node</span> |
| <span class="linenos">610</span><span class="c1">///</span> |
| <span class="linenos">611</span><span class="c1">/// Source-KSelect</span> |
| <span class="linenos">612</span><span class="c1">/// This example shows how K number of elements can be selected</span> |
| <span class="linenos">613</span><span class="c1">/// either from the top or bottom. The output node is a modified</span> |
| <span class="linenos">614</span><span class="c1">/// sink node where output can be obtained as a table.</span> |
| <span class="linenos">615</span><span class="n">arrow</span><span class="o">::</span><span class="n">Status</span><span class="w"> </span><span class="nf">SourceKSelectExample</span><span class="p">()</span><span class="w"> </span><span class="p">{</span> |
| <span class="linenos">616</span><span class="w"> </span><span class="n">ARROW_ASSIGN_OR_RAISE</span><span class="p">(</span><span class="k">auto</span><span class="w"> </span><span class="n">input</span><span class="p">,</span><span class="w"> </span><span class="n">MakeGroupableBatches</span><span class="p">());</span> |
| <span class="linenos">617</span><span class="w"> </span><span class="n">ARROW_ASSIGN_OR_RAISE</span><span class="p">(</span><span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o"><</span><span class="n">ac</span><span class="o">::</span><span class="n">ExecPlan</span><span class="o">></span><span class="w"> </span><span class="n">plan</span><span class="p">,</span> |
| <span class="linenos">618</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">ExecPlan</span><span class="o">::</span><span class="n">Make</span><span class="p">(</span><span class="o">*</span><span class="n">cp</span><span class="o">::</span><span class="n">threaded_exec_context</span><span class="p">()));</span> |
| <span class="linenos">619</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">AsyncGenerator</span><span class="o"><</span><span class="n">std</span><span class="o">::</span><span class="n">optional</span><span class="o"><</span><span class="n">cp</span><span class="o">::</span><span class="n">ExecBatch</span><span class="o">>></span><span class="w"> </span><span class="n">sink_gen</span><span class="p">;</span> |
| <span class="linenos">620</span> |
| <span class="linenos">621</span><span class="w"> </span><span class="n">ARROW_ASSIGN_OR_RAISE</span><span class="p">(</span> |
| <span class="linenos">622</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">ExecNode</span><span class="w"> </span><span class="o">*</span><span class="w"> </span><span class="n">source</span><span class="p">,</span> |
| <span class="linenos">623</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">MakeExecNode</span><span class="p">(</span><span class="s">"source"</span><span class="p">,</span><span class="w"> </span><span class="n">plan</span><span class="p">.</span><span class="n">get</span><span class="p">(),</span><span class="w"> </span><span class="p">{},</span> |
| <span class="linenos">624</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">SourceNodeOptions</span><span class="p">{</span><span class="n">input</span><span class="p">.</span><span class="n">schema</span><span class="p">,</span><span class="w"> </span><span class="n">input</span><span class="p">.</span><span class="n">gen</span><span class="p">()}));</span> |
| <span class="linenos">625</span> |
| <span class="linenos">626</span><span class="w"> </span><span class="n">cp</span><span class="o">::</span><span class="n">SelectKOptions</span><span class="w"> </span><span class="n">options</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">cp</span><span class="o">::</span><span class="n">SelectKOptions</span><span class="o">::</span><span class="n">TopKDefault</span><span class="p">(</span><span class="cm">/*k=*/</span><span class="mi">2</span><span class="p">,</span><span class="w"> </span><span class="p">{</span><span class="s">"i32"</span><span class="p">});</span> |
| <span class="linenos">627</span> |
| <span class="linenos">628</span><span class="w"> </span><span class="n">ARROW_RETURN_NOT_OK</span><span class="p">(</span><span class="n">ac</span><span class="o">::</span><span class="n">MakeExecNode</span><span class="p">(</span><span class="s">"select_k_sink"</span><span class="p">,</span><span class="w"> </span><span class="n">plan</span><span class="p">.</span><span class="n">get</span><span class="p">(),</span><span class="w"> </span><span class="p">{</span><span class="n">source</span><span class="p">},</span> |
| <span class="linenos">629</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">SelectKSinkNodeOptions</span><span class="p">{</span><span class="n">options</span><span class="p">,</span><span class="w"> </span><span class="o">&</span><span class="n">sink_gen</span><span class="p">}));</span> |
| <span class="linenos">630</span> |
| <span class="linenos">631</span><span class="w"> </span><span class="k">auto</span><span class="w"> </span><span class="n">schema</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">schema</span><span class="p">(</span> |
| <span class="linenos">632</span><span class="w"> </span><span class="p">{</span><span class="n">arrow</span><span class="o">::</span><span class="n">field</span><span class="p">(</span><span class="s">"i32"</span><span class="p">,</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">int32</span><span class="p">()),</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">field</span><span class="p">(</span><span class="s">"str"</span><span class="p">,</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">utf8</span><span class="p">())});</span> |
| <span class="linenos">633</span> |
| <span class="linenos">634</span><span class="w"> </span><span class="k">return</span><span class="w"> </span><span class="n">ExecutePlanAndCollectAsTableWithCustomSink</span><span class="p">(</span><span class="n">plan</span><span class="p">,</span><span class="w"> </span><span class="n">schema</span><span class="p">,</span><span class="w"> </span><span class="n">sink_gen</span><span class="p">);</span> |
| <span class="linenos">635</span><span class="p">}</span> |
| </pre></div> |
| </div> |
| </section> |
| <section id="table-sink"> |
| <span id="stream-execution-scan-docs"></span><h3><code class="docutils literal notranslate"><span class="pre">table_sink</span></code><a class="headerlink" href="#table-sink" title="Permalink to this heading">¶</a></h3> |
| <p id="stream-execution-table-sink-docs">The <code class="docutils literal notranslate"><span class="pre">table_sink</span></code> node provides the ability to receive the output as an in-memory table. |
| This is simpler to use than the other sink nodes provided by the streaming execution engine |
| but it only makes sense when the output fits comfortably in memory. |
| The node is created using <code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">arrow::compute::TableSinkNodeOptions</span></code>.</p> |
| <p>Example of using <code class="docutils literal notranslate"><span class="pre">table_sink</span></code></p> |
| <div class="highlight-cpp notranslate"><div class="highlight"><pre><span></span><span class="linenos">727</span><span class="c1">/// \brief An example showing a table sink node</span> |
| <span class="linenos">728</span><span class="c1">///</span> |
| <span class="linenos">729</span><span class="c1">/// TableSink Example</span> |
| <span class="linenos">730</span><span class="c1">/// This example shows how a table_sink can be used</span> |
| <span class="linenos">731</span><span class="c1">/// in an execution plan. This includes a source node</span> |
| <span class="linenos">732</span><span class="c1">/// receiving data as batches and the table sink node</span> |
| <span class="linenos">733</span><span class="c1">/// which emits the output as a table.</span> |
| <span class="linenos">734</span><span class="n">arrow</span><span class="o">::</span><span class="n">Status</span><span class="w"> </span><span class="nf">TableSinkExample</span><span class="p">()</span><span class="w"> </span><span class="p">{</span> |
| <span class="linenos">735</span><span class="w"> </span><span class="n">ARROW_ASSIGN_OR_RAISE</span><span class="p">(</span><span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o"><</span><span class="n">ac</span><span class="o">::</span><span class="n">ExecPlan</span><span class="o">></span><span class="w"> </span><span class="n">plan</span><span class="p">,</span> |
| <span class="linenos">736</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">ExecPlan</span><span class="o">::</span><span class="n">Make</span><span class="p">(</span><span class="o">*</span><span class="n">cp</span><span class="o">::</span><span class="n">threaded_exec_context</span><span class="p">()));</span> |
| <span class="linenos">737</span> |
| <span class="linenos">738</span><span class="w"> </span><span class="n">ARROW_ASSIGN_OR_RAISE</span><span class="p">(</span><span class="k">auto</span><span class="w"> </span><span class="n">basic_data</span><span class="p">,</span><span class="w"> </span><span class="n">MakeBasicBatches</span><span class="p">());</span> |
| <span class="linenos">739</span> |
| <span class="linenos">740</span><span class="w"> </span><span class="k">auto</span><span class="w"> </span><span class="n">source_node_options</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">SourceNodeOptions</span><span class="p">{</span><span class="n">basic_data</span><span class="p">.</span><span class="n">schema</span><span class="p">,</span><span class="w"> </span><span class="n">basic_data</span><span class="p">.</span><span class="n">gen</span><span class="p">()};</span> |
| <span class="linenos">741</span> |
| <span class="linenos">742</span><span class="w"> </span><span class="n">ARROW_ASSIGN_OR_RAISE</span><span class="p">(</span><span class="n">ac</span><span class="o">::</span><span class="n">ExecNode</span><span class="w"> </span><span class="o">*</span><span class="w"> </span><span class="n">source</span><span class="p">,</span> |
| <span class="linenos">743</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">MakeExecNode</span><span class="p">(</span><span class="s">"source"</span><span class="p">,</span><span class="w"> </span><span class="n">plan</span><span class="p">.</span><span class="n">get</span><span class="p">(),</span><span class="w"> </span><span class="p">{},</span><span class="w"> </span><span class="n">source_node_options</span><span class="p">));</span> |
| <span class="linenos">744</span> |
| <span class="linenos">745</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o"><</span><span class="n">arrow</span><span class="o">::</span><span class="n">Table</span><span class="o">></span><span class="w"> </span><span class="n">output_table</span><span class="p">;</span> |
| <span class="linenos">746</span><span class="w"> </span><span class="k">auto</span><span class="w"> </span><span class="n">table_sink_options</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">TableSinkNodeOptions</span><span class="p">{</span><span class="o">&</span><span class="n">output_table</span><span class="p">};</span> |
| <span class="linenos">747</span> |
| <span class="linenos">748</span><span class="w"> </span><span class="n">ARROW_RETURN_NOT_OK</span><span class="p">(</span> |
| <span class="linenos">749</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">MakeExecNode</span><span class="p">(</span><span class="s">"table_sink"</span><span class="p">,</span><span class="w"> </span><span class="n">plan</span><span class="p">.</span><span class="n">get</span><span class="p">(),</span><span class="w"> </span><span class="p">{</span><span class="n">source</span><span class="p">},</span><span class="w"> </span><span class="n">table_sink_options</span><span class="p">));</span> |
| <span class="linenos">750</span><span class="w"> </span><span class="c1">// validate the ExecPlan</span> |
| <span class="linenos">751</span><span class="w"> </span><span class="n">ARROW_RETURN_NOT_OK</span><span class="p">(</span><span class="n">plan</span><span class="o">-></span><span class="n">Validate</span><span class="p">());</span> |
| <span class="linenos">752</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">cout</span><span class="w"> </span><span class="o"><<</span><span class="w"> </span><span class="s">"ExecPlan created : "</span><span class="w"> </span><span class="o"><<</span><span class="w"> </span><span class="n">plan</span><span class="o">-></span><span class="n">ToString</span><span class="p">()</span><span class="w"> </span><span class="o"><<</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">endl</span><span class="p">;</span> |
| <span class="linenos">753</span><span class="w"> </span><span class="c1">// start the ExecPlan</span> |
| <span class="linenos">754</span><span class="w"> </span><span class="n">plan</span><span class="o">-></span><span class="n">StartProducing</span><span class="p">();</span> |
| <span class="linenos">755</span> |
| <span class="linenos">756</span><span class="w"> </span><span class="c1">// Wait for the plan to finish</span> |
| <span class="linenos">757</span><span class="w"> </span><span class="k">auto</span><span class="w"> </span><span class="n">finished</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">plan</span><span class="o">-></span><span class="n">finished</span><span class="p">();</span> |
| <span class="linenos">758</span><span class="w"> </span><span class="n">RETURN_NOT_OK</span><span class="p">(</span><span class="n">finished</span><span class="p">.</span><span class="n">status</span><span class="p">());</span> |
| <span class="linenos">759</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">cout</span><span class="w"> </span><span class="o"><<</span><span class="w"> </span><span class="s">"Results : "</span><span class="w"> </span><span class="o"><<</span><span class="w"> </span><span class="n">output_table</span><span class="o">-></span><span class="n">ToString</span><span class="p">()</span><span class="w"> </span><span class="o"><<</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">endl</span><span class="p">;</span> |
| <span class="linenos">760</span><span class="w"> </span><span class="k">return</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">Status</span><span class="o">::</span><span class="n">OK</span><span class="p">();</span> |
| <span class="linenos">761</span><span class="p">}</span> |
| </pre></div> |
| </div> |
| </section> |
| <section id="scan"> |
| <h3><code class="docutils literal notranslate"><span class="pre">scan</span></code><a class="headerlink" href="#scan" title="Permalink to this heading">¶</a></h3> |
| <p><code class="docutils literal notranslate"><span class="pre">scan</span></code> is an operation used to load and process datasets. It should be preferred over the |
| more generic <code class="docutils literal notranslate"><span class="pre">source</span></code> node when your input is a dataset. The behavior is defined using |
| <a class="reference internal" href="api/dataset.html#_CPPv4N5arrow7dataset15ScanNodeOptionsE" title="arrow::dataset::ScanNodeOptions"><code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">arrow::dataset::ScanNodeOptions</span></code></a>. More information on datasets and the various |
| scan options can be found in <a class="reference internal" href="dataset.html"><span class="doc">Tabular Datasets</span></a>.</p> |
| <p>This node is capable of applying pushdown filters to the file readers which reduce |
| the amount of data that needs to be read. This means you may supply the same |
| filter expression to the scan node that you also supply to the FilterNode because |
| the filtering is done in two different places.</p> |
| <p>Scan example:</p> |
| <div class="highlight-cpp notranslate"><div class="highlight"><pre><span></span><span class="linenos">271</span><span class="c1">/// \brief An example demonstrating a scan and sink node</span> |
| <span class="linenos">272</span><span class="c1">///</span> |
| <span class="linenos">273</span><span class="c1">/// Scan-Table</span> |
| <span class="linenos">274</span><span class="c1">/// This example shows how scan operation can be applied on a dataset.</span> |
| <span class="linenos">275</span><span class="c1">/// There are operations that can be applied on the scan (project, filter)</span> |
| <span class="linenos">276</span><span class="c1">/// and the input data can be processed. The output is obtained as a table</span> |
| <span class="linenos">277</span><span class="n">arrow</span><span class="o">::</span><span class="n">Status</span><span class="w"> </span><span class="nf">ScanSinkExample</span><span class="p">()</span><span class="w"> </span><span class="p">{</span> |
| <span class="linenos">278</span><span class="w"> </span><span class="n">ARROW_ASSIGN_OR_RAISE</span><span class="p">(</span><span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o"><</span><span class="n">arrow</span><span class="o">::</span><span class="n">dataset</span><span class="o">::</span><span class="n">Dataset</span><span class="o">></span><span class="w"> </span><span class="n">dataset</span><span class="p">,</span><span class="w"> </span><span class="n">GetDataset</span><span class="p">());</span> |
| <span class="linenos">279</span> |
| <span class="linenos">280</span><span class="w"> </span><span class="k">auto</span><span class="w"> </span><span class="n">options</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">make_shared</span><span class="o"><</span><span class="n">arrow</span><span class="o">::</span><span class="n">dataset</span><span class="o">::</span><span class="n">ScanOptions</span><span class="o">></span><span class="p">();</span> |
| <span class="linenos">281</span><span class="w"> </span><span class="n">options</span><span class="o">-></span><span class="n">projection</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">cp</span><span class="o">::</span><span class="n">project</span><span class="p">({},</span><span class="w"> </span><span class="p">{});</span><span class="w"> </span><span class="c1">// create empty projection</span> |
| <span class="linenos">282</span> |
| <span class="linenos">283</span><span class="w"> </span><span class="c1">// construct the scan node</span> |
| <span class="linenos">284</span><span class="w"> </span><span class="k">auto</span><span class="w"> </span><span class="n">scan_node_options</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">dataset</span><span class="o">::</span><span class="n">ScanNodeOptions</span><span class="p">{</span><span class="n">dataset</span><span class="p">,</span><span class="w"> </span><span class="n">options</span><span class="p">};</span> |
| <span class="linenos">285</span> |
| <span class="linenos">286</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">Declaration</span><span class="w"> </span><span class="n">scan</span><span class="p">{</span><span class="s">"scan"</span><span class="p">,</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">move</span><span class="p">(</span><span class="n">scan_node_options</span><span class="p">)};</span> |
| <span class="linenos">287</span> |
| <span class="linenos">288</span><span class="w"> </span><span class="k">return</span><span class="w"> </span><span class="n">ExecutePlanAndCollectAsTable</span><span class="p">(</span><span class="n">std</span><span class="o">::</span><span class="n">move</span><span class="p">(</span><span class="n">scan</span><span class="p">));</span> |
| <span class="linenos">289</span><span class="p">}</span> |
| </pre></div> |
| </div> |
| </section> |
| <section id="write"> |
| <h3><code class="docutils literal notranslate"><span class="pre">write</span></code><a class="headerlink" href="#write" title="Permalink to this heading">¶</a></h3> |
| <p>The <code class="docutils literal notranslate"><span class="pre">write</span></code> node saves query results as a dataset of files in a |
| format like Parquet, Feather, CSV, etc. using the <a class="reference internal" href="dataset.html"><span class="doc">Tabular Datasets</span></a> |
| functionality in Arrow. The write options are provided via the |
| <a class="reference internal" href="api/dataset.html#_CPPv4N5arrow7dataset16WriteNodeOptionsE" title="arrow::dataset::WriteNodeOptions"><code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">arrow::dataset::WriteNodeOptions</span></code></a> which in turn contains |
| <a class="reference internal" href="api/dataset.html#_CPPv4N5arrow7dataset29FileSystemDatasetWriteOptionsE" title="arrow::dataset::FileSystemDatasetWriteOptions"><code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">arrow::dataset::FileSystemDatasetWriteOptions</span></code></a>. |
| <a class="reference internal" href="api/dataset.html#_CPPv4N5arrow7dataset29FileSystemDatasetWriteOptionsE" title="arrow::dataset::FileSystemDatasetWriteOptions"><code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">arrow::dataset::FileSystemDatasetWriteOptions</span></code></a> provides |
| control over the written dataset, including options like the output |
| directory, file naming scheme, and so on.</p> |
| <p>Write example:</p> |
| <div class="highlight-cpp notranslate"><div class="highlight"><pre><span></span><span class="linenos">641</span><span class="c1">/// \brief An example showing a write node</span> |
| <span class="linenos">642</span><span class="c1">/// \param file_path The destination to write to</span> |
| <span class="linenos">643</span><span class="c1">///</span> |
| <span class="linenos">644</span><span class="c1">/// Scan-Filter-Write</span> |
| <span class="linenos">645</span><span class="c1">/// This example shows how scan node can be used to load the data</span> |
| <span class="linenos">646</span><span class="c1">/// and after processing how it can be written to disk.</span> |
| <span class="linenos">647</span><span class="n">arrow</span><span class="o">::</span><span class="n">Status</span><span class="w"> </span><span class="nf">ScanFilterWriteExample</span><span class="p">(</span><span class="k">const</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">string</span><span class="o">&</span><span class="w"> </span><span class="n">file_path</span><span class="p">)</span><span class="w"> </span><span class="p">{</span> |
| <span class="linenos">648</span><span class="w"> </span><span class="n">ARROW_ASSIGN_OR_RAISE</span><span class="p">(</span><span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o"><</span><span class="n">arrow</span><span class="o">::</span><span class="n">dataset</span><span class="o">::</span><span class="n">Dataset</span><span class="o">></span><span class="w"> </span><span class="n">dataset</span><span class="p">,</span><span class="w"> </span><span class="n">GetDataset</span><span class="p">());</span> |
| <span class="linenos">649</span> |
| <span class="linenos">650</span><span class="w"> </span><span class="k">auto</span><span class="w"> </span><span class="n">options</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">make_shared</span><span class="o"><</span><span class="n">arrow</span><span class="o">::</span><span class="n">dataset</span><span class="o">::</span><span class="n">ScanOptions</span><span class="o">></span><span class="p">();</span> |
| <span class="linenos">651</span><span class="w"> </span><span class="c1">// empty projection</span> |
| <span class="linenos">652</span><span class="w"> </span><span class="n">options</span><span class="o">-></span><span class="n">projection</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">cp</span><span class="o">::</span><span class="n">project</span><span class="p">({},</span><span class="w"> </span><span class="p">{});</span> |
| <span class="linenos">653</span> |
| <span class="linenos">654</span><span class="w"> </span><span class="k">auto</span><span class="w"> </span><span class="n">scan_node_options</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">dataset</span><span class="o">::</span><span class="n">ScanNodeOptions</span><span class="p">{</span><span class="n">dataset</span><span class="p">,</span><span class="w"> </span><span class="n">options</span><span class="p">};</span> |
| <span class="linenos">655</span> |
| <span class="linenos">656</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">Declaration</span><span class="w"> </span><span class="n">scan</span><span class="p">{</span><span class="s">"scan"</span><span class="p">,</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">move</span><span class="p">(</span><span class="n">scan_node_options</span><span class="p">)};</span> |
| <span class="linenos">657</span> |
| <span class="linenos">658</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">AsyncGenerator</span><span class="o"><</span><span class="n">std</span><span class="o">::</span><span class="n">optional</span><span class="o"><</span><span class="n">cp</span><span class="o">::</span><span class="n">ExecBatch</span><span class="o">>></span><span class="w"> </span><span class="n">sink_gen</span><span class="p">;</span> |
| <span class="linenos">659</span> |
| <span class="linenos">660</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">string</span><span class="w"> </span><span class="n">root_path</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="s">""</span><span class="p">;</span> |
| <span class="linenos">661</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">string</span><span class="w"> </span><span class="n">uri</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="s">"file://"</span><span class="w"> </span><span class="o">+</span><span class="w"> </span><span class="n">file_path</span><span class="p">;</span> |
| <span class="linenos">662</span><span class="w"> </span><span class="n">ARROW_ASSIGN_OR_RAISE</span><span class="p">(</span><span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o"><</span><span class="n">arrow</span><span class="o">::</span><span class="n">fs</span><span class="o">::</span><span class="n">FileSystem</span><span class="o">></span><span class="w"> </span><span class="n">filesystem</span><span class="p">,</span> |
| <span class="linenos">663</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">fs</span><span class="o">::</span><span class="n">FileSystemFromUri</span><span class="p">(</span><span class="n">uri</span><span class="p">,</span><span class="w"> </span><span class="o">&</span><span class="n">root_path</span><span class="p">));</span> |
| <span class="linenos">664</span> |
| <span class="linenos">665</span><span class="w"> </span><span class="k">auto</span><span class="w"> </span><span class="n">base_path</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">root_path</span><span class="w"> </span><span class="o">+</span><span class="w"> </span><span class="s">"/parquet_dataset"</span><span class="p">;</span> |
| <span class="linenos">666</span><span class="w"> </span><span class="c1">// Uncomment the following line, if run repeatedly</span> |
| <span class="linenos">667</span><span class="w"> </span><span class="c1">// ARROW_RETURN_NOT_OK(filesystem->DeleteDirContents(base_path));</span> |
| <span class="linenos">668</span><span class="w"> </span><span class="n">ARROW_RETURN_NOT_OK</span><span class="p">(</span><span class="n">filesystem</span><span class="o">-></span><span class="n">CreateDir</span><span class="p">(</span><span class="n">base_path</span><span class="p">));</span> |
| <span class="linenos">669</span> |
| <span class="linenos">670</span><span class="w"> </span><span class="c1">// The partition schema determines which fields are part of the partitioning.</span> |
| <span class="linenos">671</span><span class="w"> </span><span class="k">auto</span><span class="w"> </span><span class="n">partition_schema</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">schema</span><span class="p">({</span><span class="n">arrow</span><span class="o">::</span><span class="n">field</span><span class="p">(</span><span class="s">"a"</span><span class="p">,</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">int32</span><span class="p">())});</span> |
| <span class="linenos">672</span><span class="w"> </span><span class="c1">// We'll use Hive-style partitioning,</span> |
| <span class="linenos">673</span><span class="w"> </span><span class="c1">// which creates directories with "key=value" pairs.</span> |
| <span class="linenos">674</span> |
| <span class="linenos">675</span><span class="w"> </span><span class="k">auto</span><span class="w"> </span><span class="n">partitioning</span><span class="w"> </span><span class="o">=</span> |
| <span class="linenos">676</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">make_shared</span><span class="o"><</span><span class="n">arrow</span><span class="o">::</span><span class="n">dataset</span><span class="o">::</span><span class="n">HivePartitioning</span><span class="o">></span><span class="p">(</span><span class="n">partition_schema</span><span class="p">);</span> |
| <span class="linenos">677</span><span class="w"> </span><span class="c1">// We'll write Parquet files.</span> |
| <span class="linenos">678</span><span class="w"> </span><span class="k">auto</span><span class="w"> </span><span class="n">format</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">make_shared</span><span class="o"><</span><span class="n">arrow</span><span class="o">::</span><span class="n">dataset</span><span class="o">::</span><span class="n">ParquetFileFormat</span><span class="o">></span><span class="p">();</span> |
| <span class="linenos">679</span> |
| <span class="linenos">680</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">dataset</span><span class="o">::</span><span class="n">FileSystemDatasetWriteOptions</span><span class="w"> </span><span class="n">write_options</span><span class="p">;</span> |
| <span class="linenos">681</span><span class="w"> </span><span class="n">write_options</span><span class="p">.</span><span class="n">file_write_options</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">format</span><span class="o">-></span><span class="n">DefaultWriteOptions</span><span class="p">();</span> |
| <span class="linenos">682</span><span class="w"> </span><span class="n">write_options</span><span class="p">.</span><span class="n">filesystem</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">filesystem</span><span class="p">;</span> |
| <span class="linenos">683</span><span class="w"> </span><span class="n">write_options</span><span class="p">.</span><span class="n">base_dir</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">base_path</span><span class="p">;</span> |
| <span class="linenos">684</span><span class="w"> </span><span class="n">write_options</span><span class="p">.</span><span class="n">partitioning</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">partitioning</span><span class="p">;</span> |
| <span class="linenos">685</span><span class="w"> </span><span class="n">write_options</span><span class="p">.</span><span class="n">basename_template</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="s">"part{i}.parquet"</span><span class="p">;</span> |
| <span class="linenos">686</span> |
| <span class="linenos">687</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">dataset</span><span class="o">::</span><span class="n">WriteNodeOptions</span><span class="w"> </span><span class="n">write_node_options</span><span class="p">{</span><span class="n">write_options</span><span class="p">};</span> |
| <span class="linenos">688</span> |
| <span class="linenos">689</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">Declaration</span><span class="w"> </span><span class="n">write</span><span class="p">{</span><span class="s">"write"</span><span class="p">,</span><span class="w"> </span><span class="p">{</span><span class="n">std</span><span class="o">::</span><span class="n">move</span><span class="p">(</span><span class="n">scan</span><span class="p">)},</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">move</span><span class="p">(</span><span class="n">write_node_options</span><span class="p">)};</span> |
| <span class="linenos">690</span> |
| <span class="linenos">691</span><span class="w"> </span><span class="c1">// Since the write node has no output we simply run the plan to completion and the</span> |
| <span class="linenos">692</span><span class="w"> </span><span class="c1">// data should be written</span> |
| <span class="linenos">693</span><span class="w"> </span><span class="n">ARROW_RETURN_NOT_OK</span><span class="p">(</span><span class="n">ac</span><span class="o">::</span><span class="n">DeclarationToStatus</span><span class="p">(</span><span class="n">std</span><span class="o">::</span><span class="n">move</span><span class="p">(</span><span class="n">write</span><span class="p">)));</span> |
| <span class="linenos">694</span> |
| <span class="linenos">695</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">cout</span><span class="w"> </span><span class="o"><<</span><span class="w"> </span><span class="s">"Dataset written to "</span><span class="w"> </span><span class="o"><<</span><span class="w"> </span><span class="n">base_path</span><span class="w"> </span><span class="o"><<</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">endl</span><span class="p">;</span> |
| <span class="linenos">696</span><span class="w"> </span><span class="k">return</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">Status</span><span class="o">::</span><span class="n">OK</span><span class="p">();</span> |
| <span class="linenos">697</span><span class="p">}</span> |
| </pre></div> |
| </div> |
| </section> |
| <section id="union"> |
| <span id="stream-execution-union-docs"></span><h3><code class="docutils literal notranslate"><span class="pre">union</span></code><a class="headerlink" href="#union" title="Permalink to this heading">¶</a></h3> |
| <p><code class="docutils literal notranslate"><span class="pre">union</span></code> merges multiple data streams with the same schema into one, similar to |
| a SQL <code class="docutils literal notranslate"><span class="pre">UNION</span> <span class="pre">ALL</span></code> clause.</p> |
| <p>The following example demonstrates how this can be achieved using |
| two data sources.</p> |
| <p>Union example:</p> |
| <div class="highlight-cpp notranslate"><div class="highlight"><pre><span></span><span class="linenos">703</span><span class="c1">/// \brief An example showing a union node</span> |
| <span class="linenos">704</span><span class="c1">///</span> |
| <span class="linenos">705</span><span class="c1">/// Source-Union-Table</span> |
| <span class="linenos">706</span><span class="c1">/// This example shows how a union operation can be applied on two</span> |
| <span class="linenos">707</span><span class="c1">/// data sources. The output is collected into a table.</span> |
| <span class="linenos">708</span><span class="n">arrow</span><span class="o">::</span><span class="n">Status</span><span class="w"> </span><span class="nf">SourceUnionSinkExample</span><span class="p">()</span><span class="w"> </span><span class="p">{</span> |
| <span class="linenos">709</span><span class="w"> </span><span class="n">ARROW_ASSIGN_OR_RAISE</span><span class="p">(</span><span class="k">auto</span><span class="w"> </span><span class="n">basic_data</span><span class="p">,</span><span class="w"> </span><span class="n">MakeBasicBatches</span><span class="p">());</span> |
| <span class="linenos">710</span> |
| <span class="linenos">711</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">Declaration</span><span class="w"> </span><span class="n">lhs</span><span class="p">{</span><span class="s">"source"</span><span class="p">,</span> |
| <span class="linenos">712</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">SourceNodeOptions</span><span class="p">{</span><span class="n">basic_data</span><span class="p">.</span><span class="n">schema</span><span class="p">,</span><span class="w"> </span><span class="n">basic_data</span><span class="p">.</span><span class="n">gen</span><span class="p">()}};</span> |
| <span class="linenos">713</span><span class="w"> </span><span class="n">lhs</span><span class="p">.</span><span class="n">label</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="s">"lhs"</span><span class="p">;</span> |
| <span class="linenos">714</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">Declaration</span><span class="w"> </span><span class="n">rhs</span><span class="p">{</span><span class="s">"source"</span><span class="p">,</span> |
| <span class="linenos">715</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">SourceNodeOptions</span><span class="p">{</span><span class="n">basic_data</span><span class="p">.</span><span class="n">schema</span><span class="p">,</span><span class="w"> </span><span class="n">basic_data</span><span class="p">.</span><span class="n">gen</span><span class="p">()}};</span> |
| <span class="linenos">716</span><span class="w"> </span><span class="n">rhs</span><span class="p">.</span><span class="n">label</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="s">"rhs"</span><span class="p">;</span> |
| <span class="linenos">717</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">Declaration</span><span class="w"> </span><span class="n">union_plan</span><span class="p">{</span> |
| <span class="linenos">718</span><span class="w"> </span><span class="s">"union"</span><span class="p">,</span><span class="w"> </span><span class="p">{</span><span class="n">std</span><span class="o">::</span><span class="n">move</span><span class="p">(</span><span class="n">lhs</span><span class="p">),</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">move</span><span class="p">(</span><span class="n">rhs</span><span class="p">)},</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">ExecNodeOptions</span><span class="p">{}};</span> |
| <span class="linenos">719</span> |
| <span class="linenos">720</span><span class="w"> </span><span class="k">return</span><span class="w"> </span><span class="n">ExecutePlanAndCollectAsTable</span><span class="p">(</span><span class="n">std</span><span class="o">::</span><span class="n">move</span><span class="p">(</span><span class="n">union_plan</span><span class="p">));</span> |
| <span class="linenos">721</span><span class="p">}</span> |
| </pre></div> |
| </div> |
| </section> |
| <section id="hash-join"> |
| <span id="stream-execution-hashjoin-docs"></span><h3><code class="docutils literal notranslate"><span class="pre">hash_join</span></code><a class="headerlink" href="#hash-join" title="Permalink to this heading">¶</a></h3> |
| <p><code class="docutils literal notranslate"><span class="pre">hash_join</span></code> operation provides the relational algebra operation, join using hash-based |
| algorithm. <code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">arrow::compute::HashJoinNodeOptions</span></code> contains the options required in |
| defining a join. The hash_join supports |
| <a class="reference external" href="https://en.wikipedia.org/wiki/Join_(SQL)">left/right/full semi/anti/outerjoins</a>. |
| Also the join-key (i.e. the column(s) to join on), and suffixes (i.e a suffix term like “_x” |
| which can be appended as a suffix for column names duplicated in both left and right |
| relations.) can be set via the the join options. |
| <a class="reference external" href="https://en.wikipedia.org/wiki/Hash_join">Read more on hash-joins</a>.</p> |
| <p>Hash-Join example:</p> |
| <div class="highlight-cpp notranslate"><div class="highlight"><pre><span></span><span class="linenos">582</span><span class="c1">/// \brief An example showing a hash join node</span> |
| <span class="linenos">583</span><span class="c1">///</span> |
| <span class="linenos">584</span><span class="c1">/// Source-HashJoin-Table</span> |
| <span class="linenos">585</span><span class="c1">/// This example shows how source node gets the data and how a self-join</span> |
| <span class="linenos">586</span><span class="c1">/// is applied on the data. The join options are configurable. The output</span> |
| <span class="linenos">587</span><span class="c1">/// is collected into a table.</span> |
| <span class="linenos">588</span><span class="n">arrow</span><span class="o">::</span><span class="n">Status</span><span class="w"> </span><span class="nf">SourceHashJoinSinkExample</span><span class="p">()</span><span class="w"> </span><span class="p">{</span> |
| <span class="linenos">589</span><span class="w"> </span><span class="n">ARROW_ASSIGN_OR_RAISE</span><span class="p">(</span><span class="k">auto</span><span class="w"> </span><span class="n">input</span><span class="p">,</span><span class="w"> </span><span class="n">MakeGroupableBatches</span><span class="p">());</span> |
| <span class="linenos">590</span> |
| <span class="linenos">591</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">Declaration</span><span class="w"> </span><span class="n">left</span><span class="p">{</span><span class="s">"source"</span><span class="p">,</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">SourceNodeOptions</span><span class="p">{</span><span class="n">input</span><span class="p">.</span><span class="n">schema</span><span class="p">,</span><span class="w"> </span><span class="n">input</span><span class="p">.</span><span class="n">gen</span><span class="p">()}};</span> |
| <span class="linenos">592</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">Declaration</span><span class="w"> </span><span class="n">right</span><span class="p">{</span><span class="s">"source"</span><span class="p">,</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">SourceNodeOptions</span><span class="p">{</span><span class="n">input</span><span class="p">.</span><span class="n">schema</span><span class="p">,</span><span class="w"> </span><span class="n">input</span><span class="p">.</span><span class="n">gen</span><span class="p">()}};</span> |
| <span class="linenos">593</span> |
| <span class="linenos">594</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">HashJoinNodeOptions</span><span class="w"> </span><span class="n">join_opts</span><span class="p">{</span> |
| <span class="linenos">595</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">JoinType</span><span class="o">::</span><span class="n">INNER</span><span class="p">,</span> |
| <span class="linenos">596</span><span class="w"> </span><span class="cm">/*left_keys=*/</span><span class="p">{</span><span class="s">"str"</span><span class="p">},</span> |
| <span class="linenos">597</span><span class="w"> </span><span class="cm">/*right_keys=*/</span><span class="p">{</span><span class="s">"str"</span><span class="p">},</span><span class="w"> </span><span class="n">cp</span><span class="o">::</span><span class="n">literal</span><span class="p">(</span><span class="nb">true</span><span class="p">),</span><span class="w"> </span><span class="s">"l_"</span><span class="p">,</span><span class="w"> </span><span class="s">"r_"</span><span class="p">};</span> |
| <span class="linenos">598</span> |
| <span class="linenos">599</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">Declaration</span><span class="w"> </span><span class="n">hashjoin</span><span class="p">{</span> |
| <span class="linenos">600</span><span class="w"> </span><span class="s">"hashjoin"</span><span class="p">,</span><span class="w"> </span><span class="p">{</span><span class="n">std</span><span class="o">::</span><span class="n">move</span><span class="p">(</span><span class="n">left</span><span class="p">),</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">move</span><span class="p">(</span><span class="n">right</span><span class="p">)},</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">move</span><span class="p">(</span><span class="n">join_opts</span><span class="p">)};</span> |
| <span class="linenos">601</span> |
| <span class="linenos">602</span><span class="w"> </span><span class="k">return</span><span class="w"> </span><span class="n">ExecutePlanAndCollectAsTable</span><span class="p">(</span><span class="n">std</span><span class="o">::</span><span class="n">move</span><span class="p">(</span><span class="n">hashjoin</span><span class="p">));</span> |
| <span class="linenos">603</span><span class="p">}</span> |
| </pre></div> |
| </div> |
| </section> |
| </section> |
| <section id="summary"> |
| <span id="stream-execution-write-docs"></span><h2>Summary<a class="headerlink" href="#summary" title="Permalink to this heading">¶</a></h2> |
| <p>There are examples of these nodes which can be found in |
| <code class="docutils literal notranslate"><span class="pre">cpp/examples/arrow/execution_plan_documentation_examples.cc</span></code> in the Arrow source.</p> |
| <p>Complete Example:</p> |
| <div class="highlight-cpp notranslate"><div class="highlight"><pre><span></span><span class="linenos"> 19</span><span class="cp">#include</span><span class="w"> </span><span class="cpf"><arrow/array.h></span> |
| <span class="linenos"> 20</span><span class="cp">#include</span><span class="w"> </span><span class="cpf"><arrow/builder.h></span> |
| <span class="linenos"> 21</span> |
| <span class="linenos"> 22</span><span class="cp">#include</span><span class="w"> </span><span class="cpf"><arrow/acero/exec_plan.h></span> |
| <span class="linenos"> 23</span><span class="cp">#include</span><span class="w"> </span><span class="cpf"><arrow/compute/api.h></span> |
| <span class="linenos"> 24</span><span class="cp">#include</span><span class="w"> </span><span class="cpf"><arrow/compute/api_vector.h></span> |
| <span class="linenos"> 25</span><span class="cp">#include</span><span class="w"> </span><span class="cpf"><arrow/compute/cast.h></span> |
| <span class="linenos"> 26</span> |
| <span class="linenos"> 27</span><span class="cp">#include</span><span class="w"> </span><span class="cpf"><arrow/csv/api.h></span> |
| <span class="linenos"> 28</span> |
| <span class="linenos"> 29</span><span class="cp">#include</span><span class="w"> </span><span class="cpf"><arrow/dataset/dataset.h></span> |
| <span class="linenos"> 30</span><span class="cp">#include</span><span class="w"> </span><span class="cpf"><arrow/dataset/file_base.h></span> |
| <span class="linenos"> 31</span><span class="cp">#include</span><span class="w"> </span><span class="cpf"><arrow/dataset/file_parquet.h></span> |
| <span class="linenos"> 32</span><span class="cp">#include</span><span class="w"> </span><span class="cpf"><arrow/dataset/plan.h></span> |
| <span class="linenos"> 33</span><span class="cp">#include</span><span class="w"> </span><span class="cpf"><arrow/dataset/scanner.h></span> |
| <span class="linenos"> 34</span> |
| <span class="linenos"> 35</span><span class="cp">#include</span><span class="w"> </span><span class="cpf"><arrow/io/interfaces.h></span> |
| <span class="linenos"> 36</span><span class="cp">#include</span><span class="w"> </span><span class="cpf"><arrow/io/memory.h></span> |
| <span class="linenos"> 37</span> |
| <span class="linenos"> 38</span><span class="cp">#include</span><span class="w"> </span><span class="cpf"><arrow/result.h></span> |
| <span class="linenos"> 39</span><span class="cp">#include</span><span class="w"> </span><span class="cpf"><arrow/status.h></span> |
| <span class="linenos"> 40</span><span class="cp">#include</span><span class="w"> </span><span class="cpf"><arrow/table.h></span> |
| <span class="linenos"> 41</span> |
| <span class="linenos"> 42</span><span class="cp">#include</span><span class="w"> </span><span class="cpf"><arrow/ipc/api.h></span> |
| <span class="linenos"> 43</span> |
| <span class="linenos"> 44</span><span class="cp">#include</span><span class="w"> </span><span class="cpf"><arrow/util/future.h></span> |
| <span class="linenos"> 45</span><span class="cp">#include</span><span class="w"> </span><span class="cpf"><arrow/util/range.h></span> |
| <span class="linenos"> 46</span><span class="cp">#include</span><span class="w"> </span><span class="cpf"><arrow/util/thread_pool.h></span> |
| <span class="linenos"> 47</span><span class="cp">#include</span><span class="w"> </span><span class="cpf"><arrow/util/vector.h></span> |
| <span class="linenos"> 48</span> |
| <span class="linenos"> 49</span><span class="cp">#include</span><span class="w"> </span><span class="cpf"><iostream></span> |
| <span class="linenos"> 50</span><span class="cp">#include</span><span class="w"> </span><span class="cpf"><memory></span> |
| <span class="linenos"> 51</span><span class="cp">#include</span><span class="w"> </span><span class="cpf"><utility></span> |
| <span class="linenos"> 52</span> |
| <span class="linenos"> 53</span><span class="c1">// Demonstrate various operators in Arrow Streaming Execution Engine</span> |
| <span class="linenos"> 54</span> |
| <span class="linenos"> 55</span><span class="k">namespace</span><span class="w"> </span><span class="nn">cp</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="o">::</span><span class="nn">arrow</span><span class="o">::</span><span class="nn">compute</span><span class="p">;</span> |
| <span class="linenos"> 56</span><span class="k">namespace</span><span class="w"> </span><span class="nn">ac</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="o">::</span><span class="nn">arrow</span><span class="o">::</span><span class="nn">acero</span><span class="p">;</span> |
| <span class="linenos"> 57</span> |
| <span class="linenos"> 58</span><span class="k">constexpr</span><span class="w"> </span><span class="kt">char</span><span class="w"> </span><span class="n">kSep</span><span class="p">[]</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="s">"******"</span><span class="p">;</span> |
| <span class="linenos"> 59</span> |
| <span class="linenos"> 60</span><span class="kt">void</span><span class="w"> </span><span class="nf">PrintBlock</span><span class="p">(</span><span class="k">const</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">string</span><span class="o">&</span><span class="w"> </span><span class="n">msg</span><span class="p">)</span><span class="w"> </span><span class="p">{</span> |
| <span class="linenos"> 61</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">cout</span><span class="w"> </span><span class="o"><<</span><span class="w"> </span><span class="s">"</span><span class="se">\n\t</span><span class="s">"</span><span class="w"> </span><span class="o"><<</span><span class="w"> </span><span class="n">kSep</span><span class="w"> </span><span class="o"><<</span><span class="w"> </span><span class="s">" "</span><span class="w"> </span><span class="o"><<</span><span class="w"> </span><span class="n">msg</span><span class="w"> </span><span class="o"><<</span><span class="w"> </span><span class="s">" "</span><span class="w"> </span><span class="o"><<</span><span class="w"> </span><span class="n">kSep</span><span class="w"> </span><span class="o"><<</span><span class="w"> </span><span class="s">"</span><span class="se">\n</span><span class="s">"</span><span class="w"> </span><span class="o"><<</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">endl</span><span class="p">;</span> |
| <span class="linenos"> 62</span><span class="p">}</span> |
| <span class="linenos"> 63</span> |
| <span class="linenos"> 64</span><span class="k">template</span><span class="w"> </span><span class="o"><</span><span class="k">typename</span><span class="w"> </span><span class="nc">TYPE</span><span class="p">,</span> |
| <span class="linenos"> 65</span><span class="w"> </span><span class="k">typename</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="k">typename</span><span class="w"> </span><span class="nc">std</span><span class="o">::</span><span class="n">enable_if</span><span class="o"><</span><span class="n">arrow</span><span class="o">::</span><span class="n">is_number_type</span><span class="o"><</span><span class="n">TYPE</span><span class="o">>::</span><span class="n">value</span><span class="w"> </span><span class="o">|</span> |
| <span class="linenos"> 66</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">is_boolean_type</span><span class="o"><</span><span class="n">TYPE</span><span class="o">>::</span><span class="n">value</span><span class="w"> </span><span class="o">|</span> |
| <span class="linenos"> 67</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">is_temporal_type</span><span class="o"><</span><span class="n">TYPE</span><span class="o">>::</span><span class="n">value</span><span class="o">>::</span><span class="n">type</span><span class="o">></span> |
| <span class="linenos"> 68</span><span class="n">arrow</span><span class="o">::</span><span class="n">Result</span><span class="o"><</span><span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o"><</span><span class="n">arrow</span><span class="o">::</span><span class="n">Array</span><span class="o">>></span><span class="w"> </span><span class="n">GetArrayDataSample</span><span class="p">(</span> |
| <span class="linenos"> 69</span><span class="w"> </span><span class="k">const</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">vector</span><span class="o"><</span><span class="k">typename</span><span class="w"> </span><span class="nc">TYPE</span><span class="o">::</span><span class="n">c_type</span><span class="o">>&</span><span class="w"> </span><span class="n">values</span><span class="p">)</span><span class="w"> </span><span class="p">{</span> |
| <span class="linenos"> 70</span><span class="w"> </span><span class="k">using</span><span class="w"> </span><span class="n">ArrowBuilderType</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="k">typename</span><span class="w"> </span><span class="nc">arrow</span><span class="o">::</span><span class="n">TypeTraits</span><span class="o"><</span><span class="n">TYPE</span><span class="o">>::</span><span class="n">BuilderType</span><span class="p">;</span> |
| <span class="linenos"> 71</span><span class="w"> </span><span class="n">ArrowBuilderType</span><span class="w"> </span><span class="n">builder</span><span class="p">;</span> |
| <span class="linenos"> 72</span><span class="w"> </span><span class="n">ARROW_RETURN_NOT_OK</span><span class="p">(</span><span class="n">builder</span><span class="p">.</span><span class="n">Reserve</span><span class="p">(</span><span class="n">values</span><span class="p">.</span><span class="n">size</span><span class="p">()));</span> |
| <span class="linenos"> 73</span><span class="w"> </span><span class="n">ARROW_RETURN_NOT_OK</span><span class="p">(</span><span class="n">builder</span><span class="p">.</span><span class="n">AppendValues</span><span class="p">(</span><span class="n">values</span><span class="p">));</span> |
| <span class="linenos"> 74</span><span class="w"> </span><span class="k">return</span><span class="w"> </span><span class="n">builder</span><span class="p">.</span><span class="n">Finish</span><span class="p">();</span> |
| <span class="linenos"> 75</span><span class="p">}</span> |
| <span class="linenos"> 76</span> |
| <span class="linenos"> 77</span><span class="k">template</span><span class="w"> </span><span class="o"><</span><span class="k">class</span><span class="w"> </span><span class="nc">TYPE</span><span class="o">></span> |
| <span class="linenos"> 78</span><span class="n">arrow</span><span class="o">::</span><span class="n">Result</span><span class="o"><</span><span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o"><</span><span class="n">arrow</span><span class="o">::</span><span class="n">Array</span><span class="o">>></span><span class="w"> </span><span class="n">GetBinaryArrayDataSample</span><span class="p">(</span> |
| <span class="linenos"> 79</span><span class="w"> </span><span class="k">const</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">vector</span><span class="o"><</span><span class="n">std</span><span class="o">::</span><span class="n">string</span><span class="o">>&</span><span class="w"> </span><span class="n">values</span><span class="p">)</span><span class="w"> </span><span class="p">{</span> |
| <span class="linenos"> 80</span><span class="w"> </span><span class="k">using</span><span class="w"> </span><span class="n">ArrowBuilderType</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="k">typename</span><span class="w"> </span><span class="nc">arrow</span><span class="o">::</span><span class="n">TypeTraits</span><span class="o"><</span><span class="n">TYPE</span><span class="o">>::</span><span class="n">BuilderType</span><span class="p">;</span> |
| <span class="linenos"> 81</span><span class="w"> </span><span class="n">ArrowBuilderType</span><span class="w"> </span><span class="n">builder</span><span class="p">;</span> |
| <span class="linenos"> 82</span><span class="w"> </span><span class="n">ARROW_RETURN_NOT_OK</span><span class="p">(</span><span class="n">builder</span><span class="p">.</span><span class="n">Reserve</span><span class="p">(</span><span class="n">values</span><span class="p">.</span><span class="n">size</span><span class="p">()));</span> |
| <span class="linenos"> 83</span><span class="w"> </span><span class="n">ARROW_RETURN_NOT_OK</span><span class="p">(</span><span class="n">builder</span><span class="p">.</span><span class="n">AppendValues</span><span class="p">(</span><span class="n">values</span><span class="p">));</span> |
| <span class="linenos"> 84</span><span class="w"> </span><span class="k">return</span><span class="w"> </span><span class="n">builder</span><span class="p">.</span><span class="n">Finish</span><span class="p">();</span> |
| <span class="linenos"> 85</span><span class="p">}</span> |
| <span class="linenos"> 86</span> |
| <span class="linenos"> 87</span><span class="n">arrow</span><span class="o">::</span><span class="n">Result</span><span class="o"><</span><span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o"><</span><span class="n">arrow</span><span class="o">::</span><span class="n">RecordBatch</span><span class="o">>></span><span class="w"> </span><span class="n">GetSampleRecordBatch</span><span class="p">(</span> |
| <span class="linenos"> 88</span><span class="w"> </span><span class="k">const</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">ArrayVector</span><span class="w"> </span><span class="n">array_vector</span><span class="p">,</span><span class="w"> </span><span class="k">const</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">FieldVector</span><span class="o">&</span><span class="w"> </span><span class="n">field_vector</span><span class="p">)</span><span class="w"> </span><span class="p">{</span> |
| <span class="linenos"> 89</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o"><</span><span class="n">arrow</span><span class="o">::</span><span class="n">RecordBatch</span><span class="o">></span><span class="w"> </span><span class="n">record_batch</span><span class="p">;</span> |
| <span class="linenos"> 90</span><span class="w"> </span><span class="n">ARROW_ASSIGN_OR_RAISE</span><span class="p">(</span><span class="k">auto</span><span class="w"> </span><span class="n">struct_result</span><span class="p">,</span> |
| <span class="linenos"> 91</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">StructArray</span><span class="o">::</span><span class="n">Make</span><span class="p">(</span><span class="n">array_vector</span><span class="p">,</span><span class="w"> </span><span class="n">field_vector</span><span class="p">));</span> |
| <span class="linenos"> 92</span><span class="w"> </span><span class="k">return</span><span class="w"> </span><span class="n">record_batch</span><span class="o">-></span><span class="n">FromStructArray</span><span class="p">(</span><span class="n">struct_result</span><span class="p">);</span> |
| <span class="linenos"> 93</span><span class="p">}</span> |
| <span class="linenos"> 94</span> |
| <span class="linenos"> 95</span><span class="c1">/// \brief Create a sample table</span> |
| <span class="linenos"> 96</span><span class="c1">/// The table's contents will be:</span> |
| <span class="linenos"> 97</span><span class="c1">/// a,b</span> |
| <span class="linenos"> 98</span><span class="c1">/// 1,null</span> |
| <span class="linenos"> 99</span><span class="c1">/// 2,true</span> |
| <span class="linenos">100</span><span class="c1">/// null,true</span> |
| <span class="linenos">101</span><span class="c1">/// 3,false</span> |
| <span class="linenos">102</span><span class="c1">/// null,true</span> |
| <span class="linenos">103</span><span class="c1">/// 4,false</span> |
| <span class="linenos">104</span><span class="c1">/// 5,null</span> |
| <span class="linenos">105</span><span class="c1">/// 6,false</span> |
| <span class="linenos">106</span><span class="c1">/// 7,false</span> |
| <span class="linenos">107</span><span class="c1">/// 8,true</span> |
| <span class="linenos">108</span><span class="c1">/// \return The created table</span> |
| <span class="linenos">109</span> |
| <span class="linenos">110</span><span class="n">arrow</span><span class="o">::</span><span class="n">Result</span><span class="o"><</span><span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o"><</span><span class="n">arrow</span><span class="o">::</span><span class="n">Table</span><span class="o">>></span><span class="w"> </span><span class="n">GetTable</span><span class="p">()</span><span class="w"> </span><span class="p">{</span> |
| <span class="linenos">111</span><span class="w"> </span><span class="k">auto</span><span class="w"> </span><span class="n">null_long</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">numeric_limits</span><span class="o"><</span><span class="kt">int64_t</span><span class="o">>::</span><span class="n">quiet_NaN</span><span class="p">();</span> |
| <span class="linenos">112</span><span class="w"> </span><span class="n">ARROW_ASSIGN_OR_RAISE</span><span class="p">(</span><span class="k">auto</span><span class="w"> </span><span class="n">int64_array</span><span class="p">,</span> |
| <span class="linenos">113</span><span class="w"> </span><span class="n">GetArrayDataSample</span><span class="o"><</span><span class="n">arrow</span><span class="o">::</span><span class="n">Int64Type</span><span class="o">></span><span class="p">(</span> |
| <span class="linenos">114</span><span class="w"> </span><span class="p">{</span><span class="mi">1</span><span class="p">,</span><span class="w"> </span><span class="mi">2</span><span class="p">,</span><span class="w"> </span><span class="n">null_long</span><span class="p">,</span><span class="w"> </span><span class="mi">3</span><span class="p">,</span><span class="w"> </span><span class="n">null_long</span><span class="p">,</span><span class="w"> </span><span class="mi">4</span><span class="p">,</span><span class="w"> </span><span class="mi">5</span><span class="p">,</span><span class="w"> </span><span class="mi">6</span><span class="p">,</span><span class="w"> </span><span class="mi">7</span><span class="p">,</span><span class="w"> </span><span class="mi">8</span><span class="p">}));</span> |
| <span class="linenos">115</span> |
| <span class="linenos">116</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">BooleanBuilder</span><span class="w"> </span><span class="n">boolean_builder</span><span class="p">;</span> |
| <span class="linenos">117</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o"><</span><span class="n">arrow</span><span class="o">::</span><span class="n">BooleanArray</span><span class="o">></span><span class="w"> </span><span class="n">bool_array</span><span class="p">;</span> |
| <span class="linenos">118</span> |
| <span class="linenos">119</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">vector</span><span class="o"><</span><span class="kt">uint8_t</span><span class="o">></span><span class="w"> </span><span class="n">bool_values</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="p">{</span><span class="nb">false</span><span class="p">,</span><span class="w"> </span><span class="nb">true</span><span class="p">,</span><span class="w"> </span><span class="nb">true</span><span class="p">,</span><span class="w"> </span><span class="nb">false</span><span class="p">,</span><span class="w"> </span><span class="nb">true</span><span class="p">,</span> |
| <span class="linenos">120</span><span class="w"> </span><span class="nb">false</span><span class="p">,</span><span class="w"> </span><span class="nb">false</span><span class="p">,</span><span class="w"> </span><span class="nb">false</span><span class="p">,</span><span class="w"> </span><span class="nb">false</span><span class="p">,</span><span class="w"> </span><span class="nb">true</span><span class="p">};</span> |
| <span class="linenos">121</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">vector</span><span class="o"><</span><span class="kt">bool</span><span class="o">></span><span class="w"> </span><span class="n">is_valid</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="p">{</span><span class="nb">false</span><span class="p">,</span><span class="w"> </span><span class="nb">true</span><span class="p">,</span><span class="w"> </span><span class="nb">true</span><span class="p">,</span><span class="w"> </span><span class="nb">true</span><span class="p">,</span><span class="w"> </span><span class="nb">true</span><span class="p">,</span> |
| <span class="linenos">122</span><span class="w"> </span><span class="nb">true</span><span class="p">,</span><span class="w"> </span><span class="nb">false</span><span class="p">,</span><span class="w"> </span><span class="nb">true</span><span class="p">,</span><span class="w"> </span><span class="nb">true</span><span class="p">,</span><span class="w"> </span><span class="nb">true</span><span class="p">};</span> |
| <span class="linenos">123</span> |
| <span class="linenos">124</span><span class="w"> </span><span class="n">ARROW_RETURN_NOT_OK</span><span class="p">(</span><span class="n">boolean_builder</span><span class="p">.</span><span class="n">Reserve</span><span class="p">(</span><span class="mi">10</span><span class="p">));</span> |
| <span class="linenos">125</span> |
| <span class="linenos">126</span><span class="w"> </span><span class="n">ARROW_RETURN_NOT_OK</span><span class="p">(</span><span class="n">boolean_builder</span><span class="p">.</span><span class="n">AppendValues</span><span class="p">(</span><span class="n">bool_values</span><span class="p">,</span><span class="w"> </span><span class="n">is_valid</span><span class="p">));</span> |
| <span class="linenos">127</span> |
| <span class="linenos">128</span><span class="w"> </span><span class="n">ARROW_RETURN_NOT_OK</span><span class="p">(</span><span class="n">boolean_builder</span><span class="p">.</span><span class="n">Finish</span><span class="p">(</span><span class="o">&</span><span class="n">bool_array</span><span class="p">));</span> |
| <span class="linenos">129</span> |
| <span class="linenos">130</span><span class="w"> </span><span class="k">auto</span><span class="w"> </span><span class="n">record_batch</span><span class="w"> </span><span class="o">=</span> |
| <span class="linenos">131</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">RecordBatch</span><span class="o">::</span><span class="n">Make</span><span class="p">(</span><span class="n">arrow</span><span class="o">::</span><span class="n">schema</span><span class="p">({</span><span class="n">arrow</span><span class="o">::</span><span class="n">field</span><span class="p">(</span><span class="s">"a"</span><span class="p">,</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">int64</span><span class="p">()),</span> |
| <span class="linenos">132</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">field</span><span class="p">(</span><span class="s">"b"</span><span class="p">,</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">boolean</span><span class="p">())}),</span> |
| <span class="linenos">133</span><span class="w"> </span><span class="mi">10</span><span class="p">,</span><span class="w"> </span><span class="p">{</span><span class="n">int64_array</span><span class="p">,</span><span class="w"> </span><span class="n">bool_array</span><span class="p">});</span> |
| <span class="linenos">134</span><span class="w"> </span><span class="n">ARROW_ASSIGN_OR_RAISE</span><span class="p">(</span><span class="k">auto</span><span class="w"> </span><span class="n">table</span><span class="p">,</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">Table</span><span class="o">::</span><span class="n">FromRecordBatches</span><span class="p">({</span><span class="n">record_batch</span><span class="p">}));</span> |
| <span class="linenos">135</span><span class="w"> </span><span class="k">return</span><span class="w"> </span><span class="n">table</span><span class="p">;</span> |
| <span class="linenos">136</span><span class="p">}</span> |
| <span class="linenos">137</span> |
| <span class="linenos">138</span><span class="c1">/// \brief Create a sample dataset</span> |
| <span class="linenos">139</span><span class="c1">/// \return An in-memory dataset based on GetTable()</span> |
| <span class="linenos">140</span><span class="n">arrow</span><span class="o">::</span><span class="n">Result</span><span class="o"><</span><span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o"><</span><span class="n">arrow</span><span class="o">::</span><span class="n">dataset</span><span class="o">::</span><span class="n">Dataset</span><span class="o">>></span><span class="w"> </span><span class="n">GetDataset</span><span class="p">()</span><span class="w"> </span><span class="p">{</span> |
| <span class="linenos">141</span><span class="w"> </span><span class="n">ARROW_ASSIGN_OR_RAISE</span><span class="p">(</span><span class="k">auto</span><span class="w"> </span><span class="n">table</span><span class="p">,</span><span class="w"> </span><span class="n">GetTable</span><span class="p">());</span> |
| <span class="linenos">142</span><span class="w"> </span><span class="k">auto</span><span class="w"> </span><span class="n">ds</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">make_shared</span><span class="o"><</span><span class="n">arrow</span><span class="o">::</span><span class="n">dataset</span><span class="o">::</span><span class="n">InMemoryDataset</span><span class="o">></span><span class="p">(</span><span class="n">table</span><span class="p">);</span> |
| <span class="linenos">143</span><span class="w"> </span><span class="k">return</span><span class="w"> </span><span class="n">ds</span><span class="p">;</span> |
| <span class="linenos">144</span><span class="p">}</span> |
| <span class="linenos">145</span> |
| <span class="linenos">146</span><span class="n">arrow</span><span class="o">::</span><span class="n">Result</span><span class="o"><</span><span class="n">cp</span><span class="o">::</span><span class="n">ExecBatch</span><span class="o">></span><span class="w"> </span><span class="n">GetExecBatchFromVectors</span><span class="p">(</span> |
| <span class="linenos">147</span><span class="w"> </span><span class="k">const</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">FieldVector</span><span class="o">&</span><span class="w"> </span><span class="n">field_vector</span><span class="p">,</span><span class="w"> </span><span class="k">const</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">ArrayVector</span><span class="o">&</span><span class="w"> </span><span class="n">array_vector</span><span class="p">)</span><span class="w"> </span><span class="p">{</span> |
| <span class="linenos">148</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o"><</span><span class="n">arrow</span><span class="o">::</span><span class="n">RecordBatch</span><span class="o">></span><span class="w"> </span><span class="n">record_batch</span><span class="p">;</span> |
| <span class="linenos">149</span><span class="w"> </span><span class="n">ARROW_ASSIGN_OR_RAISE</span><span class="p">(</span><span class="k">auto</span><span class="w"> </span><span class="n">res_batch</span><span class="p">,</span><span class="w"> </span><span class="n">GetSampleRecordBatch</span><span class="p">(</span><span class="n">array_vector</span><span class="p">,</span><span class="w"> </span><span class="n">field_vector</span><span class="p">));</span> |
| <span class="linenos">150</span><span class="w"> </span><span class="n">cp</span><span class="o">::</span><span class="n">ExecBatch</span><span class="w"> </span><span class="n">batch</span><span class="p">{</span><span class="o">*</span><span class="n">res_batch</span><span class="p">};</span> |
| <span class="linenos">151</span><span class="w"> </span><span class="k">return</span><span class="w"> </span><span class="n">batch</span><span class="p">;</span> |
| <span class="linenos">152</span><span class="p">}</span> |
| <span class="linenos">153</span> |
| <span class="linenos">154</span><span class="c1">// (Doc section: BatchesWithSchema Definition)</span> |
| <span class="linenos">155</span><span class="k">struct</span><span class="w"> </span><span class="nc">BatchesWithSchema</span><span class="w"> </span><span class="p">{</span> |
| <span class="linenos">156</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">vector</span><span class="o"><</span><span class="n">cp</span><span class="o">::</span><span class="n">ExecBatch</span><span class="o">></span><span class="w"> </span><span class="n">batches</span><span class="p">;</span> |
| <span class="linenos">157</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o"><</span><span class="n">arrow</span><span class="o">::</span><span class="n">Schema</span><span class="o">></span><span class="w"> </span><span class="n">schema</span><span class="p">;</span> |
| <span class="linenos">158</span><span class="w"> </span><span class="c1">// This method uses internal arrow utilities to</span> |
| <span class="linenos">159</span><span class="w"> </span><span class="c1">// convert a vector of record batches to an AsyncGenerator of optional batches</span> |
| <span class="linenos">160</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">AsyncGenerator</span><span class="o"><</span><span class="n">std</span><span class="o">::</span><span class="n">optional</span><span class="o"><</span><span class="n">cp</span><span class="o">::</span><span class="n">ExecBatch</span><span class="o">>></span><span class="w"> </span><span class="n">gen</span><span class="p">()</span><span class="w"> </span><span class="k">const</span><span class="w"> </span><span class="p">{</span> |
| <span class="linenos">161</span><span class="w"> </span><span class="k">auto</span><span class="w"> </span><span class="n">opt_batches</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="o">::</span><span class="n">arrow</span><span class="o">::</span><span class="n">internal</span><span class="o">::</span><span class="n">MapVector</span><span class="p">(</span> |
| <span class="linenos">162</span><span class="w"> </span><span class="p">[](</span><span class="n">cp</span><span class="o">::</span><span class="n">ExecBatch</span><span class="w"> </span><span class="n">batch</span><span class="p">)</span><span class="w"> </span><span class="p">{</span><span class="w"> </span><span class="k">return</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">make_optional</span><span class="p">(</span><span class="n">std</span><span class="o">::</span><span class="n">move</span><span class="p">(</span><span class="n">batch</span><span class="p">));</span><span class="w"> </span><span class="p">},</span> |
| <span class="linenos">163</span><span class="w"> </span><span class="n">batches</span><span class="p">);</span> |
| <span class="linenos">164</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">AsyncGenerator</span><span class="o"><</span><span class="n">std</span><span class="o">::</span><span class="n">optional</span><span class="o"><</span><span class="n">cp</span><span class="o">::</span><span class="n">ExecBatch</span><span class="o">>></span><span class="w"> </span><span class="n">gen</span><span class="p">;</span> |
| <span class="linenos">165</span><span class="w"> </span><span class="n">gen</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">MakeVectorGenerator</span><span class="p">(</span><span class="n">std</span><span class="o">::</span><span class="n">move</span><span class="p">(</span><span class="n">opt_batches</span><span class="p">));</span> |
| <span class="linenos">166</span><span class="w"> </span><span class="k">return</span><span class="w"> </span><span class="n">gen</span><span class="p">;</span> |
| <span class="linenos">167</span><span class="w"> </span><span class="p">}</span> |
| <span class="linenos">168</span><span class="p">};</span> |
| <span class="linenos">169</span><span class="c1">// (Doc section: BatchesWithSchema Definition)</span> |
| <span class="linenos">170</span> |
| <span class="linenos">171</span><span class="c1">// (Doc section: MakeBasicBatches Definition)</span> |
| <span class="linenos">172</span><span class="n">arrow</span><span class="o">::</span><span class="n">Result</span><span class="o"><</span><span class="n">BatchesWithSchema</span><span class="o">></span><span class="w"> </span><span class="n">MakeBasicBatches</span><span class="p">()</span><span class="w"> </span><span class="p">{</span> |
| <span class="linenos">173</span><span class="w"> </span><span class="n">BatchesWithSchema</span><span class="w"> </span><span class="n">out</span><span class="p">;</span> |
| <span class="linenos">174</span><span class="w"> </span><span class="k">auto</span><span class="w"> </span><span class="n">field_vector</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="p">{</span><span class="n">arrow</span><span class="o">::</span><span class="n">field</span><span class="p">(</span><span class="s">"a"</span><span class="p">,</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">int32</span><span class="p">()),</span> |
| <span class="linenos">175</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">field</span><span class="p">(</span><span class="s">"b"</span><span class="p">,</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">boolean</span><span class="p">())};</span> |
| <span class="linenos">176</span><span class="w"> </span><span class="n">ARROW_ASSIGN_OR_RAISE</span><span class="p">(</span><span class="k">auto</span><span class="w"> </span><span class="n">b1_int</span><span class="p">,</span><span class="w"> </span><span class="n">GetArrayDataSample</span><span class="o"><</span><span class="n">arrow</span><span class="o">::</span><span class="n">Int32Type</span><span class="o">></span><span class="p">({</span><span class="mi">0</span><span class="p">,</span><span class="w"> </span><span class="mi">4</span><span class="p">}));</span> |
| <span class="linenos">177</span><span class="w"> </span><span class="n">ARROW_ASSIGN_OR_RAISE</span><span class="p">(</span><span class="k">auto</span><span class="w"> </span><span class="n">b2_int</span><span class="p">,</span><span class="w"> </span><span class="n">GetArrayDataSample</span><span class="o"><</span><span class="n">arrow</span><span class="o">::</span><span class="n">Int32Type</span><span class="o">></span><span class="p">({</span><span class="mi">5</span><span class="p">,</span><span class="w"> </span><span class="mi">6</span><span class="p">,</span><span class="w"> </span><span class="mi">7</span><span class="p">}));</span> |
| <span class="linenos">178</span><span class="w"> </span><span class="n">ARROW_ASSIGN_OR_RAISE</span><span class="p">(</span><span class="k">auto</span><span class="w"> </span><span class="n">b3_int</span><span class="p">,</span><span class="w"> </span><span class="n">GetArrayDataSample</span><span class="o"><</span><span class="n">arrow</span><span class="o">::</span><span class="n">Int32Type</span><span class="o">></span><span class="p">({</span><span class="mi">8</span><span class="p">,</span><span class="w"> </span><span class="mi">9</span><span class="p">,</span><span class="w"> </span><span class="mi">10</span><span class="p">}));</span> |
| <span class="linenos">179</span> |
| <span class="linenos">180</span><span class="w"> </span><span class="n">ARROW_ASSIGN_OR_RAISE</span><span class="p">(</span><span class="k">auto</span><span class="w"> </span><span class="n">b1_bool</span><span class="p">,</span> |
| <span class="linenos">181</span><span class="w"> </span><span class="n">GetArrayDataSample</span><span class="o"><</span><span class="n">arrow</span><span class="o">::</span><span class="n">BooleanType</span><span class="o">></span><span class="p">({</span><span class="nb">false</span><span class="p">,</span><span class="w"> </span><span class="nb">true</span><span class="p">}));</span> |
| <span class="linenos">182</span><span class="w"> </span><span class="n">ARROW_ASSIGN_OR_RAISE</span><span class="p">(</span><span class="k">auto</span><span class="w"> </span><span class="n">b2_bool</span><span class="p">,</span> |
| <span class="linenos">183</span><span class="w"> </span><span class="n">GetArrayDataSample</span><span class="o"><</span><span class="n">arrow</span><span class="o">::</span><span class="n">BooleanType</span><span class="o">></span><span class="p">({</span><span class="nb">true</span><span class="p">,</span><span class="w"> </span><span class="nb">false</span><span class="p">,</span><span class="w"> </span><span class="nb">true</span><span class="p">}));</span> |
| <span class="linenos">184</span><span class="w"> </span><span class="n">ARROW_ASSIGN_OR_RAISE</span><span class="p">(</span><span class="k">auto</span><span class="w"> </span><span class="n">b3_bool</span><span class="p">,</span> |
| <span class="linenos">185</span><span class="w"> </span><span class="n">GetArrayDataSample</span><span class="o"><</span><span class="n">arrow</span><span class="o">::</span><span class="n">BooleanType</span><span class="o">></span><span class="p">({</span><span class="nb">false</span><span class="p">,</span><span class="w"> </span><span class="nb">true</span><span class="p">,</span><span class="w"> </span><span class="nb">false</span><span class="p">}));</span> |
| <span class="linenos">186</span> |
| <span class="linenos">187</span><span class="w"> </span><span class="n">ARROW_ASSIGN_OR_RAISE</span><span class="p">(</span><span class="k">auto</span><span class="w"> </span><span class="n">b1</span><span class="p">,</span> |
| <span class="linenos">188</span><span class="w"> </span><span class="n">GetExecBatchFromVectors</span><span class="p">(</span><span class="n">field_vector</span><span class="p">,</span><span class="w"> </span><span class="p">{</span><span class="n">b1_int</span><span class="p">,</span><span class="w"> </span><span class="n">b1_bool</span><span class="p">}));</span> |
| <span class="linenos">189</span><span class="w"> </span><span class="n">ARROW_ASSIGN_OR_RAISE</span><span class="p">(</span><span class="k">auto</span><span class="w"> </span><span class="n">b2</span><span class="p">,</span> |
| <span class="linenos">190</span><span class="w"> </span><span class="n">GetExecBatchFromVectors</span><span class="p">(</span><span class="n">field_vector</span><span class="p">,</span><span class="w"> </span><span class="p">{</span><span class="n">b2_int</span><span class="p">,</span><span class="w"> </span><span class="n">b2_bool</span><span class="p">}));</span> |
| <span class="linenos">191</span><span class="w"> </span><span class="n">ARROW_ASSIGN_OR_RAISE</span><span class="p">(</span><span class="k">auto</span><span class="w"> </span><span class="n">b3</span><span class="p">,</span> |
| <span class="linenos">192</span><span class="w"> </span><span class="n">GetExecBatchFromVectors</span><span class="p">(</span><span class="n">field_vector</span><span class="p">,</span><span class="w"> </span><span class="p">{</span><span class="n">b3_int</span><span class="p">,</span><span class="w"> </span><span class="n">b3_bool</span><span class="p">}));</span> |
| <span class="linenos">193</span> |
| <span class="linenos">194</span><span class="w"> </span><span class="n">out</span><span class="p">.</span><span class="n">batches</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="p">{</span><span class="n">b1</span><span class="p">,</span><span class="w"> </span><span class="n">b2</span><span class="p">,</span><span class="w"> </span><span class="n">b3</span><span class="p">};</span> |
| <span class="linenos">195</span><span class="w"> </span><span class="n">out</span><span class="p">.</span><span class="n">schema</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">schema</span><span class="p">(</span><span class="n">field_vector</span><span class="p">);</span> |
| <span class="linenos">196</span><span class="w"> </span><span class="k">return</span><span class="w"> </span><span class="n">out</span><span class="p">;</span> |
| <span class="linenos">197</span><span class="p">}</span> |
| <span class="linenos">198</span><span class="c1">// (Doc section: MakeBasicBatches Definition)</span> |
| <span class="linenos">199</span> |
| <span class="linenos">200</span><span class="n">arrow</span><span class="o">::</span><span class="n">Result</span><span class="o"><</span><span class="n">BatchesWithSchema</span><span class="o">></span><span class="w"> </span><span class="n">MakeSortTestBasicBatches</span><span class="p">()</span><span class="w"> </span><span class="p">{</span> |
| <span class="linenos">201</span><span class="w"> </span><span class="n">BatchesWithSchema</span><span class="w"> </span><span class="n">out</span><span class="p">;</span> |
| <span class="linenos">202</span><span class="w"> </span><span class="k">auto</span><span class="w"> </span><span class="n">field</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">field</span><span class="p">(</span><span class="s">"a"</span><span class="p">,</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">int32</span><span class="p">());</span> |
| <span class="linenos">203</span><span class="w"> </span><span class="n">ARROW_ASSIGN_OR_RAISE</span><span class="p">(</span><span class="k">auto</span><span class="w"> </span><span class="n">b1_int</span><span class="p">,</span><span class="w"> </span><span class="n">GetArrayDataSample</span><span class="o"><</span><span class="n">arrow</span><span class="o">::</span><span class="n">Int32Type</span><span class="o">></span><span class="p">({</span><span class="mi">1</span><span class="p">,</span><span class="w"> </span><span class="mi">3</span><span class="p">,</span><span class="w"> </span><span class="mi">0</span><span class="p">,</span><span class="w"> </span><span class="mi">2</span><span class="p">}));</span> |
| <span class="linenos">204</span><span class="w"> </span><span class="n">ARROW_ASSIGN_OR_RAISE</span><span class="p">(</span><span class="k">auto</span><span class="w"> </span><span class="n">b2_int</span><span class="p">,</span> |
| <span class="linenos">205</span><span class="w"> </span><span class="n">GetArrayDataSample</span><span class="o"><</span><span class="n">arrow</span><span class="o">::</span><span class="n">Int32Type</span><span class="o">></span><span class="p">({</span><span class="mi">121</span><span class="p">,</span><span class="w"> </span><span class="mi">101</span><span class="p">,</span><span class="w"> </span><span class="mi">120</span><span class="p">,</span><span class="w"> </span><span class="mi">12</span><span class="p">}));</span> |
| <span class="linenos">206</span><span class="w"> </span><span class="n">ARROW_ASSIGN_OR_RAISE</span><span class="p">(</span><span class="k">auto</span><span class="w"> </span><span class="n">b3_int</span><span class="p">,</span> |
| <span class="linenos">207</span><span class="w"> </span><span class="n">GetArrayDataSample</span><span class="o"><</span><span class="n">arrow</span><span class="o">::</span><span class="n">Int32Type</span><span class="o">></span><span class="p">({</span><span class="mi">10</span><span class="p">,</span><span class="w"> </span><span class="mi">110</span><span class="p">,</span><span class="w"> </span><span class="mi">210</span><span class="p">,</span><span class="w"> </span><span class="mi">121</span><span class="p">}));</span> |
| <span class="linenos">208</span><span class="w"> </span><span class="n">ARROW_ASSIGN_OR_RAISE</span><span class="p">(</span><span class="k">auto</span><span class="w"> </span><span class="n">b4_int</span><span class="p">,</span> |
| <span class="linenos">209</span><span class="w"> </span><span class="n">GetArrayDataSample</span><span class="o"><</span><span class="n">arrow</span><span class="o">::</span><span class="n">Int32Type</span><span class="o">></span><span class="p">({</span><span class="mi">51</span><span class="p">,</span><span class="w"> </span><span class="mi">101</span><span class="p">,</span><span class="w"> </span><span class="mi">2</span><span class="p">,</span><span class="w"> </span><span class="mi">34</span><span class="p">}));</span> |
| <span class="linenos">210</span><span class="w"> </span><span class="n">ARROW_ASSIGN_OR_RAISE</span><span class="p">(</span><span class="k">auto</span><span class="w"> </span><span class="n">b5_int</span><span class="p">,</span> |
| <span class="linenos">211</span><span class="w"> </span><span class="n">GetArrayDataSample</span><span class="o"><</span><span class="n">arrow</span><span class="o">::</span><span class="n">Int32Type</span><span class="o">></span><span class="p">({</span><span class="mi">11</span><span class="p">,</span><span class="w"> </span><span class="mi">31</span><span class="p">,</span><span class="w"> </span><span class="mi">1</span><span class="p">,</span><span class="w"> </span><span class="mi">12</span><span class="p">}));</span> |
| <span class="linenos">212</span><span class="w"> </span><span class="n">ARROW_ASSIGN_OR_RAISE</span><span class="p">(</span><span class="k">auto</span><span class="w"> </span><span class="n">b6_int</span><span class="p">,</span> |
| <span class="linenos">213</span><span class="w"> </span><span class="n">GetArrayDataSample</span><span class="o"><</span><span class="n">arrow</span><span class="o">::</span><span class="n">Int32Type</span><span class="o">></span><span class="p">({</span><span class="mi">12</span><span class="p">,</span><span class="w"> </span><span class="mi">101</span><span class="p">,</span><span class="w"> </span><span class="mi">120</span><span class="p">,</span><span class="w"> </span><span class="mi">12</span><span class="p">}));</span> |
| <span class="linenos">214</span><span class="w"> </span><span class="n">ARROW_ASSIGN_OR_RAISE</span><span class="p">(</span><span class="k">auto</span><span class="w"> </span><span class="n">b7_int</span><span class="p">,</span> |
| <span class="linenos">215</span><span class="w"> </span><span class="n">GetArrayDataSample</span><span class="o"><</span><span class="n">arrow</span><span class="o">::</span><span class="n">Int32Type</span><span class="o">></span><span class="p">({</span><span class="mi">0</span><span class="p">,</span><span class="w"> </span><span class="mi">110</span><span class="p">,</span><span class="w"> </span><span class="mi">210</span><span class="p">,</span><span class="w"> </span><span class="mi">11</span><span class="p">}));</span> |
| <span class="linenos">216</span><span class="w"> </span><span class="n">ARROW_ASSIGN_OR_RAISE</span><span class="p">(</span><span class="k">auto</span><span class="w"> </span><span class="n">b8_int</span><span class="p">,</span> |
| <span class="linenos">217</span><span class="w"> </span><span class="n">GetArrayDataSample</span><span class="o"><</span><span class="n">arrow</span><span class="o">::</span><span class="n">Int32Type</span><span class="o">></span><span class="p">({</span><span class="mi">51</span><span class="p">,</span><span class="w"> </span><span class="mi">10</span><span class="p">,</span><span class="w"> </span><span class="mi">2</span><span class="p">,</span><span class="w"> </span><span class="mi">3</span><span class="p">}));</span> |
| <span class="linenos">218</span> |
| <span class="linenos">219</span><span class="w"> </span><span class="n">ARROW_ASSIGN_OR_RAISE</span><span class="p">(</span><span class="k">auto</span><span class="w"> </span><span class="n">b1</span><span class="p">,</span><span class="w"> </span><span class="n">GetExecBatchFromVectors</span><span class="p">({</span><span class="n">field</span><span class="p">},</span><span class="w"> </span><span class="p">{</span><span class="n">b1_int</span><span class="p">}));</span> |
| <span class="linenos">220</span><span class="w"> </span><span class="n">ARROW_ASSIGN_OR_RAISE</span><span class="p">(</span><span class="k">auto</span><span class="w"> </span><span class="n">b2</span><span class="p">,</span><span class="w"> </span><span class="n">GetExecBatchFromVectors</span><span class="p">({</span><span class="n">field</span><span class="p">},</span><span class="w"> </span><span class="p">{</span><span class="n">b2_int</span><span class="p">}));</span> |
| <span class="linenos">221</span><span class="w"> </span><span class="n">ARROW_ASSIGN_OR_RAISE</span><span class="p">(</span><span class="k">auto</span><span class="w"> </span><span class="n">b3</span><span class="p">,</span> |
| <span class="linenos">222</span><span class="w"> </span><span class="n">GetExecBatchFromVectors</span><span class="p">({</span><span class="n">field</span><span class="p">,</span><span class="w"> </span><span class="n">field</span><span class="p">},</span><span class="w"> </span><span class="p">{</span><span class="n">b3_int</span><span class="p">,</span><span class="w"> </span><span class="n">b8_int</span><span class="p">}));</span> |
| <span class="linenos">223</span><span class="w"> </span><span class="n">ARROW_ASSIGN_OR_RAISE</span><span class="p">(</span><span class="k">auto</span><span class="w"> </span><span class="n">b4</span><span class="p">,</span> |
| <span class="linenos">224</span><span class="w"> </span><span class="n">GetExecBatchFromVectors</span><span class="p">({</span><span class="n">field</span><span class="p">,</span><span class="w"> </span><span class="n">field</span><span class="p">,</span><span class="w"> </span><span class="n">field</span><span class="p">,</span><span class="w"> </span><span class="n">field</span><span class="p">},</span> |
| <span class="linenos">225</span><span class="w"> </span><span class="p">{</span><span class="n">b4_int</span><span class="p">,</span><span class="w"> </span><span class="n">b5_int</span><span class="p">,</span><span class="w"> </span><span class="n">b6_int</span><span class="p">,</span><span class="w"> </span><span class="n">b7_int</span><span class="p">}));</span> |
| <span class="linenos">226</span><span class="w"> </span><span class="n">out</span><span class="p">.</span><span class="n">batches</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="p">{</span><span class="n">b1</span><span class="p">,</span><span class="w"> </span><span class="n">b2</span><span class="p">,</span><span class="w"> </span><span class="n">b3</span><span class="p">,</span><span class="w"> </span><span class="n">b4</span><span class="p">};</span> |
| <span class="linenos">227</span><span class="w"> </span><span class="n">out</span><span class="p">.</span><span class="n">schema</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">schema</span><span class="p">({</span><span class="n">field</span><span class="p">});</span> |
| <span class="linenos">228</span><span class="w"> </span><span class="k">return</span><span class="w"> </span><span class="n">out</span><span class="p">;</span> |
| <span class="linenos">229</span><span class="p">}</span> |
| <span class="linenos">230</span> |
| <span class="linenos">231</span><span class="n">arrow</span><span class="o">::</span><span class="n">Result</span><span class="o"><</span><span class="n">BatchesWithSchema</span><span class="o">></span><span class="w"> </span><span class="n">MakeGroupableBatches</span><span class="p">(</span><span class="kt">int</span><span class="w"> </span><span class="n">multiplicity</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="mi">1</span><span class="p">)</span><span class="w"> </span><span class="p">{</span> |
| <span class="linenos">232</span><span class="w"> </span><span class="n">BatchesWithSchema</span><span class="w"> </span><span class="n">out</span><span class="p">;</span> |
| <span class="linenos">233</span><span class="w"> </span><span class="k">auto</span><span class="w"> </span><span class="n">fields</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="p">{</span><span class="n">arrow</span><span class="o">::</span><span class="n">field</span><span class="p">(</span><span class="s">"i32"</span><span class="p">,</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">int32</span><span class="p">()),</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">field</span><span class="p">(</span><span class="s">"str"</span><span class="p">,</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">utf8</span><span class="p">())};</span> |
| <span class="linenos">234</span><span class="w"> </span><span class="n">ARROW_ASSIGN_OR_RAISE</span><span class="p">(</span><span class="k">auto</span><span class="w"> </span><span class="n">b1_int</span><span class="p">,</span><span class="w"> </span><span class="n">GetArrayDataSample</span><span class="o"><</span><span class="n">arrow</span><span class="o">::</span><span class="n">Int32Type</span><span class="o">></span><span class="p">({</span><span class="mi">12</span><span class="p">,</span><span class="w"> </span><span class="mi">7</span><span class="p">,</span><span class="w"> </span><span class="mi">3</span><span class="p">}));</span> |
| <span class="linenos">235</span><span class="w"> </span><span class="n">ARROW_ASSIGN_OR_RAISE</span><span class="p">(</span><span class="k">auto</span><span class="w"> </span><span class="n">b2_int</span><span class="p">,</span><span class="w"> </span><span class="n">GetArrayDataSample</span><span class="o"><</span><span class="n">arrow</span><span class="o">::</span><span class="n">Int32Type</span><span class="o">></span><span class="p">({</span><span class="mi">-2</span><span class="p">,</span><span class="w"> </span><span class="mi">-1</span><span class="p">,</span><span class="w"> </span><span class="mi">3</span><span class="p">}));</span> |
| <span class="linenos">236</span><span class="w"> </span><span class="n">ARROW_ASSIGN_OR_RAISE</span><span class="p">(</span><span class="k">auto</span><span class="w"> </span><span class="n">b3_int</span><span class="p">,</span><span class="w"> </span><span class="n">GetArrayDataSample</span><span class="o"><</span><span class="n">arrow</span><span class="o">::</span><span class="n">Int32Type</span><span class="o">></span><span class="p">({</span><span class="mi">5</span><span class="p">,</span><span class="w"> </span><span class="mi">3</span><span class="p">,</span><span class="w"> </span><span class="mi">-8</span><span class="p">}));</span> |
| <span class="linenos">237</span><span class="w"> </span><span class="n">ARROW_ASSIGN_OR_RAISE</span><span class="p">(</span><span class="k">auto</span><span class="w"> </span><span class="n">b1_str</span><span class="p">,</span><span class="w"> </span><span class="n">GetBinaryArrayDataSample</span><span class="o"><</span><span class="n">arrow</span><span class="o">::</span><span class="n">StringType</span><span class="o">></span><span class="p">(</span> |
| <span class="linenos">238</span><span class="w"> </span><span class="p">{</span><span class="s">"alpha"</span><span class="p">,</span><span class="w"> </span><span class="s">"beta"</span><span class="p">,</span><span class="w"> </span><span class="s">"alpha"</span><span class="p">}));</span> |
| <span class="linenos">239</span><span class="w"> </span><span class="n">ARROW_ASSIGN_OR_RAISE</span><span class="p">(</span><span class="k">auto</span><span class="w"> </span><span class="n">b2_str</span><span class="p">,</span><span class="w"> </span><span class="n">GetBinaryArrayDataSample</span><span class="o"><</span><span class="n">arrow</span><span class="o">::</span><span class="n">StringType</span><span class="o">></span><span class="p">(</span> |
| <span class="linenos">240</span><span class="w"> </span><span class="p">{</span><span class="s">"alpha"</span><span class="p">,</span><span class="w"> </span><span class="s">"gamma"</span><span class="p">,</span><span class="w"> </span><span class="s">"alpha"</span><span class="p">}));</span> |
| <span class="linenos">241</span><span class="w"> </span><span class="n">ARROW_ASSIGN_OR_RAISE</span><span class="p">(</span><span class="k">auto</span><span class="w"> </span><span class="n">b3_str</span><span class="p">,</span><span class="w"> </span><span class="n">GetBinaryArrayDataSample</span><span class="o"><</span><span class="n">arrow</span><span class="o">::</span><span class="n">StringType</span><span class="o">></span><span class="p">(</span> |
| <span class="linenos">242</span><span class="w"> </span><span class="p">{</span><span class="s">"gamma"</span><span class="p">,</span><span class="w"> </span><span class="s">"beta"</span><span class="p">,</span><span class="w"> </span><span class="s">"alpha"</span><span class="p">}));</span> |
| <span class="linenos">243</span><span class="w"> </span><span class="n">ARROW_ASSIGN_OR_RAISE</span><span class="p">(</span><span class="k">auto</span><span class="w"> </span><span class="n">b1</span><span class="p">,</span><span class="w"> </span><span class="n">GetExecBatchFromVectors</span><span class="p">(</span><span class="n">fields</span><span class="p">,</span><span class="w"> </span><span class="p">{</span><span class="n">b1_int</span><span class="p">,</span><span class="w"> </span><span class="n">b1_str</span><span class="p">}));</span> |
| <span class="linenos">244</span><span class="w"> </span><span class="n">ARROW_ASSIGN_OR_RAISE</span><span class="p">(</span><span class="k">auto</span><span class="w"> </span><span class="n">b2</span><span class="p">,</span><span class="w"> </span><span class="n">GetExecBatchFromVectors</span><span class="p">(</span><span class="n">fields</span><span class="p">,</span><span class="w"> </span><span class="p">{</span><span class="n">b2_int</span><span class="p">,</span><span class="w"> </span><span class="n">b2_str</span><span class="p">}));</span> |
| <span class="linenos">245</span><span class="w"> </span><span class="n">ARROW_ASSIGN_OR_RAISE</span><span class="p">(</span><span class="k">auto</span><span class="w"> </span><span class="n">b3</span><span class="p">,</span><span class="w"> </span><span class="n">GetExecBatchFromVectors</span><span class="p">(</span><span class="n">fields</span><span class="p">,</span><span class="w"> </span><span class="p">{</span><span class="n">b3_int</span><span class="p">,</span><span class="w"> </span><span class="n">b3_str</span><span class="p">}));</span> |
| <span class="linenos">246</span><span class="w"> </span><span class="n">out</span><span class="p">.</span><span class="n">batches</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="p">{</span><span class="n">b1</span><span class="p">,</span><span class="w"> </span><span class="n">b2</span><span class="p">,</span><span class="w"> </span><span class="n">b3</span><span class="p">};</span> |
| <span class="linenos">247</span> |
| <span class="linenos">248</span><span class="w"> </span><span class="kt">size_t</span><span class="w"> </span><span class="n">batch_count</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">out</span><span class="p">.</span><span class="n">batches</span><span class="p">.</span><span class="n">size</span><span class="p">();</span> |
| <span class="linenos">249</span><span class="w"> </span><span class="k">for</span><span class="w"> </span><span class="p">(</span><span class="kt">int</span><span class="w"> </span><span class="n">repeat</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="mi">1</span><span class="p">;</span><span class="w"> </span><span class="n">repeat</span><span class="w"> </span><span class="o"><</span><span class="w"> </span><span class="n">multiplicity</span><span class="p">;</span><span class="w"> </span><span class="o">++</span><span class="n">repeat</span><span class="p">)</span><span class="w"> </span><span class="p">{</span> |
| <span class="linenos">250</span><span class="w"> </span><span class="k">for</span><span class="w"> </span><span class="p">(</span><span class="kt">size_t</span><span class="w"> </span><span class="n">i</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="mi">0</span><span class="p">;</span><span class="w"> </span><span class="n">i</span><span class="w"> </span><span class="o"><</span><span class="w"> </span><span class="n">batch_count</span><span class="p">;</span><span class="w"> </span><span class="o">++</span><span class="n">i</span><span class="p">)</span><span class="w"> </span><span class="p">{</span> |
| <span class="linenos">251</span><span class="w"> </span><span class="n">out</span><span class="p">.</span><span class="n">batches</span><span class="p">.</span><span class="n">push_back</span><span class="p">(</span><span class="n">out</span><span class="p">.</span><span class="n">batches</span><span class="p">[</span><span class="n">i</span><span class="p">]);</span> |
| <span class="linenos">252</span><span class="w"> </span><span class="p">}</span> |
| <span class="linenos">253</span><span class="w"> </span><span class="p">}</span> |
| <span class="linenos">254</span> |
| <span class="linenos">255</span><span class="w"> </span><span class="n">out</span><span class="p">.</span><span class="n">schema</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">schema</span><span class="p">(</span><span class="n">fields</span><span class="p">);</span> |
| <span class="linenos">256</span><span class="w"> </span><span class="k">return</span><span class="w"> </span><span class="n">out</span><span class="p">;</span> |
| <span class="linenos">257</span><span class="p">}</span> |
| <span class="linenos">258</span> |
| <span class="linenos">259</span><span class="n">arrow</span><span class="o">::</span><span class="n">Status</span><span class="w"> </span><span class="n">ExecutePlanAndCollectAsTable</span><span class="p">(</span><span class="n">ac</span><span class="o">::</span><span class="n">Declaration</span><span class="w"> </span><span class="n">plan</span><span class="p">)</span><span class="w"> </span><span class="p">{</span> |
| <span class="linenos">260</span><span class="w"> </span><span class="c1">// collect sink_reader into a Table</span> |
| <span class="linenos">261</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o"><</span><span class="n">arrow</span><span class="o">::</span><span class="n">Table</span><span class="o">></span><span class="w"> </span><span class="n">response_table</span><span class="p">;</span> |
| <span class="linenos">262</span><span class="w"> </span><span class="n">ARROW_ASSIGN_OR_RAISE</span><span class="p">(</span><span class="n">response_table</span><span class="p">,</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">DeclarationToTable</span><span class="p">(</span><span class="n">std</span><span class="o">::</span><span class="n">move</span><span class="p">(</span><span class="n">plan</span><span class="p">)));</span> |
| <span class="linenos">263</span> |
| <span class="linenos">264</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">cout</span><span class="w"> </span><span class="o"><<</span><span class="w"> </span><span class="s">"Results : "</span><span class="w"> </span><span class="o"><<</span><span class="w"> </span><span class="n">response_table</span><span class="o">-></span><span class="n">ToString</span><span class="p">()</span><span class="w"> </span><span class="o"><<</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">endl</span><span class="p">;</span> |
| <span class="linenos">265</span> |
| <span class="linenos">266</span><span class="w"> </span><span class="k">return</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">Status</span><span class="o">::</span><span class="n">OK</span><span class="p">();</span> |
| <span class="linenos">267</span><span class="p">}</span> |
| <span class="linenos">268</span> |
| <span class="linenos">269</span><span class="c1">// (Doc section: Scan Example)</span> |
| <span class="linenos">270</span> |
| <span class="linenos">271</span><span class="c1">/// \brief An example demonstrating a scan and sink node</span> |
| <span class="linenos">272</span><span class="c1">///</span> |
| <span class="linenos">273</span><span class="c1">/// Scan-Table</span> |
| <span class="linenos">274</span><span class="c1">/// This example shows how scan operation can be applied on a dataset.</span> |
| <span class="linenos">275</span><span class="c1">/// There are operations that can be applied on the scan (project, filter)</span> |
| <span class="linenos">276</span><span class="c1">/// and the input data can be processed. The output is obtained as a table</span> |
| <span class="linenos">277</span><span class="n">arrow</span><span class="o">::</span><span class="n">Status</span><span class="w"> </span><span class="n">ScanSinkExample</span><span class="p">()</span><span class="w"> </span><span class="p">{</span> |
| <span class="linenos">278</span><span class="w"> </span><span class="n">ARROW_ASSIGN_OR_RAISE</span><span class="p">(</span><span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o"><</span><span class="n">arrow</span><span class="o">::</span><span class="n">dataset</span><span class="o">::</span><span class="n">Dataset</span><span class="o">></span><span class="w"> </span><span class="n">dataset</span><span class="p">,</span><span class="w"> </span><span class="n">GetDataset</span><span class="p">());</span> |
| <span class="linenos">279</span> |
| <span class="linenos">280</span><span class="w"> </span><span class="k">auto</span><span class="w"> </span><span class="n">options</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">make_shared</span><span class="o"><</span><span class="n">arrow</span><span class="o">::</span><span class="n">dataset</span><span class="o">::</span><span class="n">ScanOptions</span><span class="o">></span><span class="p">();</span> |
| <span class="linenos">281</span><span class="w"> </span><span class="n">options</span><span class="o">-></span><span class="n">projection</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">cp</span><span class="o">::</span><span class="n">project</span><span class="p">({},</span><span class="w"> </span><span class="p">{});</span><span class="w"> </span><span class="c1">// create empty projection</span> |
| <span class="linenos">282</span> |
| <span class="linenos">283</span><span class="w"> </span><span class="c1">// construct the scan node</span> |
| <span class="linenos">284</span><span class="w"> </span><span class="k">auto</span><span class="w"> </span><span class="n">scan_node_options</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">dataset</span><span class="o">::</span><span class="n">ScanNodeOptions</span><span class="p">{</span><span class="n">dataset</span><span class="p">,</span><span class="w"> </span><span class="n">options</span><span class="p">};</span> |
| <span class="linenos">285</span> |
| <span class="linenos">286</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">Declaration</span><span class="w"> </span><span class="n">scan</span><span class="p">{</span><span class="s">"scan"</span><span class="p">,</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">move</span><span class="p">(</span><span class="n">scan_node_options</span><span class="p">)};</span> |
| <span class="linenos">287</span> |
| <span class="linenos">288</span><span class="w"> </span><span class="k">return</span><span class="w"> </span><span class="n">ExecutePlanAndCollectAsTable</span><span class="p">(</span><span class="n">std</span><span class="o">::</span><span class="n">move</span><span class="p">(</span><span class="n">scan</span><span class="p">));</span> |
| <span class="linenos">289</span><span class="p">}</span> |
| <span class="linenos">290</span><span class="c1">// (Doc section: Scan Example)</span> |
| <span class="linenos">291</span> |
| <span class="linenos">292</span><span class="c1">// (Doc section: Source Example)</span> |
| <span class="linenos">293</span> |
| <span class="linenos">294</span><span class="c1">/// \brief An example demonstrating a source and sink node</span> |
| <span class="linenos">295</span><span class="c1">///</span> |
| <span class="linenos">296</span><span class="c1">/// Source-Table Example</span> |
| <span class="linenos">297</span><span class="c1">/// This example shows how a custom source can be used</span> |
| <span class="linenos">298</span><span class="c1">/// in an execution plan. This includes source node using pregenerated</span> |
| <span class="linenos">299</span><span class="c1">/// data and collecting it into a table.</span> |
| <span class="linenos">300</span><span class="c1">///</span> |
| <span class="linenos">301</span><span class="c1">/// This sort of custom souce is often not needed. In most cases you can</span> |
| <span class="linenos">302</span><span class="c1">/// use a scan (for a dataset source) or a source like table_source, array_vector_source,</span> |
| <span class="linenos">303</span><span class="c1">/// exec_batch_source, or record_batch_source (for in-memory data)</span> |
| <span class="linenos">304</span><span class="n">arrow</span><span class="o">::</span><span class="n">Status</span><span class="w"> </span><span class="n">SourceSinkExample</span><span class="p">()</span><span class="w"> </span><span class="p">{</span> |
| <span class="linenos">305</span><span class="w"> </span><span class="n">ARROW_ASSIGN_OR_RAISE</span><span class="p">(</span><span class="k">auto</span><span class="w"> </span><span class="n">basic_data</span><span class="p">,</span><span class="w"> </span><span class="n">MakeBasicBatches</span><span class="p">());</span> |
| <span class="linenos">306</span> |
| <span class="linenos">307</span><span class="w"> </span><span class="k">auto</span><span class="w"> </span><span class="n">source_node_options</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">SourceNodeOptions</span><span class="p">{</span><span class="n">basic_data</span><span class="p">.</span><span class="n">schema</span><span class="p">,</span><span class="w"> </span><span class="n">basic_data</span><span class="p">.</span><span class="n">gen</span><span class="p">()};</span> |
| <span class="linenos">308</span> |
| <span class="linenos">309</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">Declaration</span><span class="w"> </span><span class="n">source</span><span class="p">{</span><span class="s">"source"</span><span class="p">,</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">move</span><span class="p">(</span><span class="n">source_node_options</span><span class="p">)};</span> |
| <span class="linenos">310</span> |
| <span class="linenos">311</span><span class="w"> </span><span class="k">return</span><span class="w"> </span><span class="n">ExecutePlanAndCollectAsTable</span><span class="p">(</span><span class="n">std</span><span class="o">::</span><span class="n">move</span><span class="p">(</span><span class="n">source</span><span class="p">));</span> |
| <span class="linenos">312</span><span class="p">}</span> |
| <span class="linenos">313</span><span class="c1">// (Doc section: Source Example)</span> |
| <span class="linenos">314</span> |
| <span class="linenos">315</span><span class="c1">// (Doc section: Table Source Example)</span> |
| <span class="linenos">316</span> |
| <span class="linenos">317</span><span class="c1">/// \brief An example showing a table source node</span> |
| <span class="linenos">318</span><span class="c1">///</span> |
| <span class="linenos">319</span><span class="c1">/// TableSource-Table Example</span> |
| <span class="linenos">320</span><span class="c1">/// This example shows how a table_source can be used</span> |
| <span class="linenos">321</span><span class="c1">/// in an execution plan. This includes a table source node</span> |
| <span class="linenos">322</span><span class="c1">/// receiving data from a table. This plan simply collects the</span> |
| <span class="linenos">323</span><span class="c1">/// data back into a table but nodes could be added that modify</span> |
| <span class="linenos">324</span><span class="c1">/// or transform the data as well (as is shown in later examples)</span> |
| <span class="linenos">325</span><span class="n">arrow</span><span class="o">::</span><span class="n">Status</span><span class="w"> </span><span class="n">TableSourceSinkExample</span><span class="p">()</span><span class="w"> </span><span class="p">{</span> |
| <span class="linenos">326</span><span class="w"> </span><span class="n">ARROW_ASSIGN_OR_RAISE</span><span class="p">(</span><span class="k">auto</span><span class="w"> </span><span class="n">table</span><span class="p">,</span><span class="w"> </span><span class="n">GetTable</span><span class="p">());</span> |
| <span class="linenos">327</span> |
| <span class="linenos">328</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">AsyncGenerator</span><span class="o"><</span><span class="n">std</span><span class="o">::</span><span class="n">optional</span><span class="o"><</span><span class="n">cp</span><span class="o">::</span><span class="n">ExecBatch</span><span class="o">>></span><span class="w"> </span><span class="n">sink_gen</span><span class="p">;</span> |
| <span class="linenos">329</span><span class="w"> </span><span class="kt">int</span><span class="w"> </span><span class="n">max_batch_size</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="mi">2</span><span class="p">;</span> |
| <span class="linenos">330</span><span class="w"> </span><span class="k">auto</span><span class="w"> </span><span class="n">table_source_options</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">TableSourceNodeOptions</span><span class="p">{</span><span class="n">table</span><span class="p">,</span><span class="w"> </span><span class="n">max_batch_size</span><span class="p">};</span> |
| <span class="linenos">331</span> |
| <span class="linenos">332</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">Declaration</span><span class="w"> </span><span class="n">source</span><span class="p">{</span><span class="s">"table_source"</span><span class="p">,</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">move</span><span class="p">(</span><span class="n">table_source_options</span><span class="p">)};</span> |
| <span class="linenos">333</span> |
| <span class="linenos">334</span><span class="w"> </span><span class="k">return</span><span class="w"> </span><span class="n">ExecutePlanAndCollectAsTable</span><span class="p">(</span><span class="n">std</span><span class="o">::</span><span class="n">move</span><span class="p">(</span><span class="n">source</span><span class="p">));</span> |
| <span class="linenos">335</span><span class="p">}</span> |
| <span class="linenos">336</span><span class="c1">// (Doc section: Table Source Example)</span> |
| <span class="linenos">337</span> |
| <span class="linenos">338</span><span class="c1">// (Doc section: Filter Example)</span> |
| <span class="linenos">339</span> |
| <span class="linenos">340</span><span class="c1">/// \brief An example showing a filter node</span> |
| <span class="linenos">341</span><span class="c1">///</span> |
| <span class="linenos">342</span><span class="c1">/// Source-Filter-Table</span> |
| <span class="linenos">343</span><span class="c1">/// This example shows how a filter can be used in an execution plan,</span> |
| <span class="linenos">344</span><span class="c1">/// to filter data from a source. The output from the exeuction plan</span> |
| <span class="linenos">345</span><span class="c1">/// is collected into a table.</span> |
| <span class="linenos">346</span><span class="n">arrow</span><span class="o">::</span><span class="n">Status</span><span class="w"> </span><span class="n">ScanFilterSinkExample</span><span class="p">()</span><span class="w"> </span><span class="p">{</span> |
| <span class="linenos">347</span><span class="w"> </span><span class="n">ARROW_ASSIGN_OR_RAISE</span><span class="p">(</span><span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o"><</span><span class="n">arrow</span><span class="o">::</span><span class="n">dataset</span><span class="o">::</span><span class="n">Dataset</span><span class="o">></span><span class="w"> </span><span class="n">dataset</span><span class="p">,</span><span class="w"> </span><span class="n">GetDataset</span><span class="p">());</span> |
| <span class="linenos">348</span> |
| <span class="linenos">349</span><span class="w"> </span><span class="k">auto</span><span class="w"> </span><span class="n">options</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">make_shared</span><span class="o"><</span><span class="n">arrow</span><span class="o">::</span><span class="n">dataset</span><span class="o">::</span><span class="n">ScanOptions</span><span class="o">></span><span class="p">();</span> |
| <span class="linenos">350</span><span class="w"> </span><span class="c1">// specify the filter. This filter removes all rows where the</span> |
| <span class="linenos">351</span><span class="w"> </span><span class="c1">// value of the "a" column is greater than 3.</span> |
| <span class="linenos">352</span><span class="w"> </span><span class="n">cp</span><span class="o">::</span><span class="n">Expression</span><span class="w"> </span><span class="n">filter_expr</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">cp</span><span class="o">::</span><span class="n">greater</span><span class="p">(</span><span class="n">cp</span><span class="o">::</span><span class="n">field_ref</span><span class="p">(</span><span class="s">"a"</span><span class="p">),</span><span class="w"> </span><span class="n">cp</span><span class="o">::</span><span class="n">literal</span><span class="p">(</span><span class="mi">3</span><span class="p">));</span> |
| <span class="linenos">353</span><span class="w"> </span><span class="c1">// set filter for scanner : on-disk / push-down filtering.</span> |
| <span class="linenos">354</span><span class="w"> </span><span class="c1">// This step can be skipped if you are not reading from disk.</span> |
| <span class="linenos">355</span><span class="w"> </span><span class="n">options</span><span class="o">-></span><span class="n">filter</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">filter_expr</span><span class="p">;</span> |
| <span class="linenos">356</span><span class="w"> </span><span class="c1">// empty projection</span> |
| <span class="linenos">357</span><span class="w"> </span><span class="n">options</span><span class="o">-></span><span class="n">projection</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">cp</span><span class="o">::</span><span class="n">project</span><span class="p">({},</span><span class="w"> </span><span class="p">{});</span> |
| <span class="linenos">358</span> |
| <span class="linenos">359</span><span class="w"> </span><span class="c1">// construct the scan node</span> |
| <span class="linenos">360</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">cout</span><span class="w"> </span><span class="o"><<</span><span class="w"> </span><span class="s">"Initialized Scanning Options"</span><span class="w"> </span><span class="o"><<</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">endl</span><span class="p">;</span> |
| <span class="linenos">361</span> |
| <span class="linenos">362</span><span class="w"> </span><span class="k">auto</span><span class="w"> </span><span class="n">scan_node_options</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">dataset</span><span class="o">::</span><span class="n">ScanNodeOptions</span><span class="p">{</span><span class="n">dataset</span><span class="p">,</span><span class="w"> </span><span class="n">options</span><span class="p">};</span> |
| <span class="linenos">363</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">cout</span><span class="w"> </span><span class="o"><<</span><span class="w"> </span><span class="s">"Scan node options created"</span><span class="w"> </span><span class="o"><<</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">endl</span><span class="p">;</span> |
| <span class="linenos">364</span> |
| <span class="linenos">365</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">Declaration</span><span class="w"> </span><span class="n">scan</span><span class="p">{</span><span class="s">"scan"</span><span class="p">,</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">move</span><span class="p">(</span><span class="n">scan_node_options</span><span class="p">)};</span> |
| <span class="linenos">366</span> |
| <span class="linenos">367</span><span class="w"> </span><span class="c1">// pipe the scan node into the filter node</span> |
| <span class="linenos">368</span><span class="w"> </span><span class="c1">// Need to set the filter in scan node options and filter node options.</span> |
| <span class="linenos">369</span><span class="w"> </span><span class="c1">// At scan node it is used for on-disk / push-down filtering.</span> |
| <span class="linenos">370</span><span class="w"> </span><span class="c1">// At filter node it is used for in-memory filtering.</span> |
| <span class="linenos">371</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">Declaration</span><span class="w"> </span><span class="n">filter</span><span class="p">{</span> |
| <span class="linenos">372</span><span class="w"> </span><span class="s">"filter"</span><span class="p">,</span><span class="w"> </span><span class="p">{</span><span class="n">std</span><span class="o">::</span><span class="n">move</span><span class="p">(</span><span class="n">scan</span><span class="p">)},</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">FilterNodeOptions</span><span class="p">(</span><span class="n">std</span><span class="o">::</span><span class="n">move</span><span class="p">(</span><span class="n">filter_expr</span><span class="p">))};</span> |
| <span class="linenos">373</span> |
| <span class="linenos">374</span><span class="w"> </span><span class="k">return</span><span class="w"> </span><span class="n">ExecutePlanAndCollectAsTable</span><span class="p">(</span><span class="n">std</span><span class="o">::</span><span class="n">move</span><span class="p">(</span><span class="n">filter</span><span class="p">));</span> |
| <span class="linenos">375</span><span class="p">}</span> |
| <span class="linenos">376</span> |
| <span class="linenos">377</span><span class="c1">// (Doc section: Filter Example)</span> |
| <span class="linenos">378</span> |
| <span class="linenos">379</span><span class="c1">// (Doc section: Project Example)</span> |
| <span class="linenos">380</span> |
| <span class="linenos">381</span><span class="c1">/// \brief An example showing a project node</span> |
| <span class="linenos">382</span><span class="c1">///</span> |
| <span class="linenos">383</span><span class="c1">/// Scan-Project-Table</span> |
| <span class="linenos">384</span><span class="c1">/// This example shows how a Scan operation can be used to load the data</span> |
| <span class="linenos">385</span><span class="c1">/// into the execution plan, how a project operation can be applied on the</span> |
| <span class="linenos">386</span><span class="c1">/// data stream and how the output is collected into a table</span> |
| <span class="linenos">387</span><span class="n">arrow</span><span class="o">::</span><span class="n">Status</span><span class="w"> </span><span class="n">ScanProjectSinkExample</span><span class="p">()</span><span class="w"> </span><span class="p">{</span> |
| <span class="linenos">388</span><span class="w"> </span><span class="n">ARROW_ASSIGN_OR_RAISE</span><span class="p">(</span><span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o"><</span><span class="n">arrow</span><span class="o">::</span><span class="n">dataset</span><span class="o">::</span><span class="n">Dataset</span><span class="o">></span><span class="w"> </span><span class="n">dataset</span><span class="p">,</span><span class="w"> </span><span class="n">GetDataset</span><span class="p">());</span> |
| <span class="linenos">389</span> |
| <span class="linenos">390</span><span class="w"> </span><span class="k">auto</span><span class="w"> </span><span class="n">options</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">make_shared</span><span class="o"><</span><span class="n">arrow</span><span class="o">::</span><span class="n">dataset</span><span class="o">::</span><span class="n">ScanOptions</span><span class="o">></span><span class="p">();</span> |
| <span class="linenos">391</span><span class="w"> </span><span class="c1">// projection</span> |
| <span class="linenos">392</span><span class="w"> </span><span class="n">cp</span><span class="o">::</span><span class="n">Expression</span><span class="w"> </span><span class="n">a_times_2</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">cp</span><span class="o">::</span><span class="n">call</span><span class="p">(</span><span class="s">"multiply"</span><span class="p">,</span><span class="w"> </span><span class="p">{</span><span class="n">cp</span><span class="o">::</span><span class="n">field_ref</span><span class="p">(</span><span class="s">"a"</span><span class="p">),</span><span class="w"> </span><span class="n">cp</span><span class="o">::</span><span class="n">literal</span><span class="p">(</span><span class="mi">2</span><span class="p">)});</span> |
| <span class="linenos">393</span><span class="w"> </span><span class="n">options</span><span class="o">-></span><span class="n">projection</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">cp</span><span class="o">::</span><span class="n">project</span><span class="p">({},</span><span class="w"> </span><span class="p">{});</span> |
| <span class="linenos">394</span> |
| <span class="linenos">395</span><span class="w"> </span><span class="k">auto</span><span class="w"> </span><span class="n">scan_node_options</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">dataset</span><span class="o">::</span><span class="n">ScanNodeOptions</span><span class="p">{</span><span class="n">dataset</span><span class="p">,</span><span class="w"> </span><span class="n">options</span><span class="p">};</span> |
| <span class="linenos">396</span> |
| <span class="linenos">397</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">Declaration</span><span class="w"> </span><span class="n">scan</span><span class="p">{</span><span class="s">"scan"</span><span class="p">,</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">move</span><span class="p">(</span><span class="n">scan_node_options</span><span class="p">)};</span> |
| <span class="linenos">398</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">Declaration</span><span class="w"> </span><span class="n">project</span><span class="p">{</span> |
| <span class="linenos">399</span><span class="w"> </span><span class="s">"project"</span><span class="p">,</span><span class="w"> </span><span class="p">{</span><span class="n">std</span><span class="o">::</span><span class="n">move</span><span class="p">(</span><span class="n">scan</span><span class="p">)},</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">ProjectNodeOptions</span><span class="p">({</span><span class="n">a_times_2</span><span class="p">})};</span> |
| <span class="linenos">400</span> |
| <span class="linenos">401</span><span class="w"> </span><span class="k">return</span><span class="w"> </span><span class="n">ExecutePlanAndCollectAsTable</span><span class="p">(</span><span class="n">std</span><span class="o">::</span><span class="n">move</span><span class="p">(</span><span class="n">project</span><span class="p">));</span> |
| <span class="linenos">402</span><span class="p">}</span> |
| <span class="linenos">403</span> |
| <span class="linenos">404</span><span class="c1">// (Doc section: Project Example)</span> |
| <span class="linenos">405</span> |
| <span class="linenos">406</span><span class="c1">// (Doc section: Scalar Aggregate Example)</span> |
| <span class="linenos">407</span> |
| <span class="linenos">408</span><span class="c1">/// \brief An example showing an aggregation node to aggregate an entire table</span> |
| <span class="linenos">409</span><span class="c1">///</span> |
| <span class="linenos">410</span><span class="c1">/// Source-Aggregation-Table</span> |
| <span class="linenos">411</span><span class="c1">/// This example shows how an aggregation operation can be applied on a</span> |
| <span class="linenos">412</span><span class="c1">/// execution plan resulting in a scalar output. The source node loads the</span> |
| <span class="linenos">413</span><span class="c1">/// data and the aggregation (counting unique types in column 'a')</span> |
| <span class="linenos">414</span><span class="c1">/// is applied on this data. The output is collected into a table (that will</span> |
| <span class="linenos">415</span><span class="c1">/// have exactly one row)</span> |
| <span class="linenos">416</span><span class="n">arrow</span><span class="o">::</span><span class="n">Status</span><span class="w"> </span><span class="n">SourceScalarAggregateSinkExample</span><span class="p">()</span><span class="w"> </span><span class="p">{</span> |
| <span class="linenos">417</span><span class="w"> </span><span class="n">ARROW_ASSIGN_OR_RAISE</span><span class="p">(</span><span class="k">auto</span><span class="w"> </span><span class="n">basic_data</span><span class="p">,</span><span class="w"> </span><span class="n">MakeBasicBatches</span><span class="p">());</span> |
| <span class="linenos">418</span> |
| <span class="linenos">419</span><span class="w"> </span><span class="k">auto</span><span class="w"> </span><span class="n">source_node_options</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">SourceNodeOptions</span><span class="p">{</span><span class="n">basic_data</span><span class="p">.</span><span class="n">schema</span><span class="p">,</span><span class="w"> </span><span class="n">basic_data</span><span class="p">.</span><span class="n">gen</span><span class="p">()};</span> |
| <span class="linenos">420</span> |
| <span class="linenos">421</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">Declaration</span><span class="w"> </span><span class="n">source</span><span class="p">{</span><span class="s">"source"</span><span class="p">,</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">move</span><span class="p">(</span><span class="n">source_node_options</span><span class="p">)};</span> |
| <span class="linenos">422</span><span class="w"> </span><span class="k">auto</span><span class="w"> </span><span class="n">aggregate_options</span><span class="w"> </span><span class="o">=</span> |
| <span class="linenos">423</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">AggregateNodeOptions</span><span class="p">{</span><span class="cm">/*aggregates=*/</span><span class="p">{{</span><span class="s">"sum"</span><span class="p">,</span><span class="w"> </span><span class="k">nullptr</span><span class="p">,</span><span class="w"> </span><span class="s">"a"</span><span class="p">,</span><span class="w"> </span><span class="s">"sum(a)"</span><span class="p">}}};</span> |
| <span class="linenos">424</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">Declaration</span><span class="w"> </span><span class="n">aggregate</span><span class="p">{</span> |
| <span class="linenos">425</span><span class="w"> </span><span class="s">"aggregate"</span><span class="p">,</span><span class="w"> </span><span class="p">{</span><span class="n">std</span><span class="o">::</span><span class="n">move</span><span class="p">(</span><span class="n">source</span><span class="p">)},</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">move</span><span class="p">(</span><span class="n">aggregate_options</span><span class="p">)};</span> |
| <span class="linenos">426</span> |
| <span class="linenos">427</span><span class="w"> </span><span class="k">return</span><span class="w"> </span><span class="n">ExecutePlanAndCollectAsTable</span><span class="p">(</span><span class="n">std</span><span class="o">::</span><span class="n">move</span><span class="p">(</span><span class="n">aggregate</span><span class="p">));</span> |
| <span class="linenos">428</span><span class="p">}</span> |
| <span class="linenos">429</span><span class="c1">// (Doc section: Scalar Aggregate Example)</span> |
| <span class="linenos">430</span> |
| <span class="linenos">431</span><span class="c1">// (Doc section: Group Aggregate Example)</span> |
| <span class="linenos">432</span> |
| <span class="linenos">433</span><span class="c1">/// \brief An example showing an aggregation node to perform a group-by operation</span> |
| <span class="linenos">434</span><span class="c1">///</span> |
| <span class="linenos">435</span><span class="c1">/// Source-Aggregation-Table</span> |
| <span class="linenos">436</span><span class="c1">/// This example shows how an aggregation operation can be applied on a</span> |
| <span class="linenos">437</span><span class="c1">/// execution plan resulting in grouped output. The source node loads the</span> |
| <span class="linenos">438</span><span class="c1">/// data and the aggregation (counting unique types in column 'a') is</span> |
| <span class="linenos">439</span><span class="c1">/// applied on this data. The output is collected into a table that will contain</span> |
| <span class="linenos">440</span><span class="c1">/// one row for each unique combination of group keys.</span> |
| <span class="linenos">441</span><span class="n">arrow</span><span class="o">::</span><span class="n">Status</span><span class="w"> </span><span class="n">SourceGroupAggregateSinkExample</span><span class="p">()</span><span class="w"> </span><span class="p">{</span> |
| <span class="linenos">442</span><span class="w"> </span><span class="n">ARROW_ASSIGN_OR_RAISE</span><span class="p">(</span><span class="k">auto</span><span class="w"> </span><span class="n">basic_data</span><span class="p">,</span><span class="w"> </span><span class="n">MakeBasicBatches</span><span class="p">());</span> |
| <span class="linenos">443</span> |
| <span class="linenos">444</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">AsyncGenerator</span><span class="o"><</span><span class="n">std</span><span class="o">::</span><span class="n">optional</span><span class="o"><</span><span class="n">cp</span><span class="o">::</span><span class="n">ExecBatch</span><span class="o">>></span><span class="w"> </span><span class="n">sink_gen</span><span class="p">;</span> |
| <span class="linenos">445</span> |
| <span class="linenos">446</span><span class="w"> </span><span class="k">auto</span><span class="w"> </span><span class="n">source_node_options</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">SourceNodeOptions</span><span class="p">{</span><span class="n">basic_data</span><span class="p">.</span><span class="n">schema</span><span class="p">,</span><span class="w"> </span><span class="n">basic_data</span><span class="p">.</span><span class="n">gen</span><span class="p">()};</span> |
| <span class="linenos">447</span> |
| <span class="linenos">448</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">Declaration</span><span class="w"> </span><span class="n">source</span><span class="p">{</span><span class="s">"source"</span><span class="p">,</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">move</span><span class="p">(</span><span class="n">source_node_options</span><span class="p">)};</span> |
| <span class="linenos">449</span><span class="w"> </span><span class="k">auto</span><span class="w"> </span><span class="n">options</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">make_shared</span><span class="o"><</span><span class="n">cp</span><span class="o">::</span><span class="n">CountOptions</span><span class="o">></span><span class="p">(</span><span class="n">cp</span><span class="o">::</span><span class="n">CountOptions</span><span class="o">::</span><span class="n">ONLY_VALID</span><span class="p">);</span> |
| <span class="linenos">450</span><span class="w"> </span><span class="k">auto</span><span class="w"> </span><span class="n">aggregate_options</span><span class="w"> </span><span class="o">=</span> |
| <span class="linenos">451</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">AggregateNodeOptions</span><span class="p">{</span><span class="cm">/*aggregates=*/</span><span class="p">{{</span><span class="s">"hash_count"</span><span class="p">,</span><span class="w"> </span><span class="n">options</span><span class="p">,</span><span class="w"> </span><span class="s">"a"</span><span class="p">,</span><span class="w"> </span><span class="s">"count(a)"</span><span class="p">}},</span> |
| <span class="linenos">452</span><span class="w"> </span><span class="cm">/*keys=*/</span><span class="p">{</span><span class="s">"b"</span><span class="p">}};</span> |
| <span class="linenos">453</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">Declaration</span><span class="w"> </span><span class="n">aggregate</span><span class="p">{</span> |
| <span class="linenos">454</span><span class="w"> </span><span class="s">"aggregate"</span><span class="p">,</span><span class="w"> </span><span class="p">{</span><span class="n">std</span><span class="o">::</span><span class="n">move</span><span class="p">(</span><span class="n">source</span><span class="p">)},</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">move</span><span class="p">(</span><span class="n">aggregate_options</span><span class="p">)};</span> |
| <span class="linenos">455</span> |
| <span class="linenos">456</span><span class="w"> </span><span class="k">return</span><span class="w"> </span><span class="n">ExecutePlanAndCollectAsTable</span><span class="p">(</span><span class="n">std</span><span class="o">::</span><span class="n">move</span><span class="p">(</span><span class="n">aggregate</span><span class="p">));</span> |
| <span class="linenos">457</span><span class="p">}</span> |
| <span class="linenos">458</span><span class="c1">// (Doc section: Group Aggregate Example)</span> |
| <span class="linenos">459</span> |
| <span class="linenos">460</span><span class="c1">// (Doc section: ConsumingSink Example)</span> |
| <span class="linenos">461</span> |
| <span class="linenos">462</span><span class="c1">/// \brief An example showing a consuming sink node</span> |
| <span class="linenos">463</span><span class="c1">///</span> |
| <span class="linenos">464</span><span class="c1">/// Source-Consuming-Sink</span> |
| <span class="linenos">465</span><span class="c1">/// This example shows how the data can be consumed within the execution plan</span> |
| <span class="linenos">466</span><span class="c1">/// by using a ConsumingSink node. There is no data output from this execution plan.</span> |
| <span class="linenos">467</span><span class="n">arrow</span><span class="o">::</span><span class="n">Status</span><span class="w"> </span><span class="n">SourceConsumingSinkExample</span><span class="p">()</span><span class="w"> </span><span class="p">{</span> |
| <span class="linenos">468</span><span class="w"> </span><span class="n">ARROW_ASSIGN_OR_RAISE</span><span class="p">(</span><span class="k">auto</span><span class="w"> </span><span class="n">basic_data</span><span class="p">,</span><span class="w"> </span><span class="n">MakeBasicBatches</span><span class="p">());</span> |
| <span class="linenos">469</span> |
| <span class="linenos">470</span><span class="w"> </span><span class="k">auto</span><span class="w"> </span><span class="n">source_node_options</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">SourceNodeOptions</span><span class="p">{</span><span class="n">basic_data</span><span class="p">.</span><span class="n">schema</span><span class="p">,</span><span class="w"> </span><span class="n">basic_data</span><span class="p">.</span><span class="n">gen</span><span class="p">()};</span> |
| <span class="linenos">471</span> |
| <span class="linenos">472</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">Declaration</span><span class="w"> </span><span class="n">source</span><span class="p">{</span><span class="s">"source"</span><span class="p">,</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">move</span><span class="p">(</span><span class="n">source_node_options</span><span class="p">)};</span> |
| <span class="linenos">473</span> |
| <span class="linenos">474</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">atomic</span><span class="o"><</span><span class="kt">uint32_t</span><span class="o">></span><span class="w"> </span><span class="n">batches_seen</span><span class="p">{</span><span class="mi">0</span><span class="p">};</span> |
| <span class="linenos">475</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">Future</span><span class="o"><></span><span class="w"> </span><span class="n">finish</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">Future</span><span class="o"><>::</span><span class="n">Make</span><span class="p">();</span> |
| <span class="linenos">476</span><span class="w"> </span><span class="k">struct</span><span class="w"> </span><span class="nc">CustomSinkNodeConsumer</span><span class="w"> </span><span class="o">:</span><span class="w"> </span><span class="k">public</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">SinkNodeConsumer</span><span class="w"> </span><span class="p">{</span> |
| <span class="linenos">477</span><span class="w"> </span><span class="n">CustomSinkNodeConsumer</span><span class="p">(</span><span class="n">std</span><span class="o">::</span><span class="n">atomic</span><span class="o"><</span><span class="kt">uint32_t</span><span class="o">>*</span><span class="w"> </span><span class="n">batches_seen</span><span class="p">,</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">Future</span><span class="o"><></span><span class="w"> </span><span class="n">finish</span><span class="p">)</span> |
| <span class="linenos">478</span><span class="w"> </span><span class="o">:</span><span class="w"> </span><span class="n">batches_seen</span><span class="p">(</span><span class="n">batches_seen</span><span class="p">),</span><span class="w"> </span><span class="n">finish</span><span class="p">(</span><span class="n">std</span><span class="o">::</span><span class="n">move</span><span class="p">(</span><span class="n">finish</span><span class="p">))</span><span class="w"> </span><span class="p">{}</span> |
| <span class="linenos">479</span> |
| <span class="linenos">480</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">Status</span><span class="w"> </span><span class="n">Init</span><span class="p">(</span><span class="k">const</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o"><</span><span class="n">arrow</span><span class="o">::</span><span class="n">Schema</span><span class="o">>&</span><span class="w"> </span><span class="n">schema</span><span class="p">,</span> |
| <span class="linenos">481</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">BackpressureControl</span><span class="o">*</span><span class="w"> </span><span class="n">backpressure_control</span><span class="p">,</span> |
| <span class="linenos">482</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">ExecPlan</span><span class="o">*</span><span class="w"> </span><span class="n">plan</span><span class="p">)</span><span class="w"> </span><span class="k">override</span><span class="w"> </span><span class="p">{</span> |
| <span class="linenos">483</span><span class="w"> </span><span class="c1">// This will be called as the plan is started (before the first call to Consume)</span> |
| <span class="linenos">484</span><span class="w"> </span><span class="c1">// and provides the schema of the data coming into the node, controls for pausing /</span> |
| <span class="linenos">485</span><span class="w"> </span><span class="c1">// resuming input, and a pointer to the plan itself which can be used to access</span> |
| <span class="linenos">486</span><span class="w"> </span><span class="c1">// other utilities such as the thread indexer or async task scheduler.</span> |
| <span class="linenos">487</span><span class="w"> </span><span class="k">return</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">Status</span><span class="o">::</span><span class="n">OK</span><span class="p">();</span> |
| <span class="linenos">488</span><span class="w"> </span><span class="p">}</span> |
| <span class="linenos">489</span> |
| <span class="linenos">490</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">Status</span><span class="w"> </span><span class="n">Consume</span><span class="p">(</span><span class="n">cp</span><span class="o">::</span><span class="n">ExecBatch</span><span class="w"> </span><span class="n">batch</span><span class="p">)</span><span class="w"> </span><span class="k">override</span><span class="w"> </span><span class="p">{</span> |
| <span class="linenos">491</span><span class="w"> </span><span class="p">(</span><span class="o">*</span><span class="n">batches_seen</span><span class="p">)</span><span class="o">++</span><span class="p">;</span> |
| <span class="linenos">492</span><span class="w"> </span><span class="k">return</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">Status</span><span class="o">::</span><span class="n">OK</span><span class="p">();</span> |
| <span class="linenos">493</span><span class="w"> </span><span class="p">}</span> |
| <span class="linenos">494</span> |
| <span class="linenos">495</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">Future</span><span class="o"><></span><span class="w"> </span><span class="n">Finish</span><span class="p">()</span><span class="w"> </span><span class="k">override</span><span class="w"> </span><span class="p">{</span> |
| <span class="linenos">496</span><span class="w"> </span><span class="c1">// Here you can perform whatever (possibly async) cleanup is needed, e.g. closing</span> |
| <span class="linenos">497</span><span class="w"> </span><span class="c1">// output file handles and flushing remaining work</span> |
| <span class="linenos">498</span><span class="w"> </span><span class="k">return</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">Future</span><span class="o"><>::</span><span class="n">MakeFinished</span><span class="p">();</span> |
| <span class="linenos">499</span><span class="w"> </span><span class="p">}</span> |
| <span class="linenos">500</span> |
| <span class="linenos">501</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">atomic</span><span class="o"><</span><span class="kt">uint32_t</span><span class="o">>*</span><span class="w"> </span><span class="n">batches_seen</span><span class="p">;</span> |
| <span class="linenos">502</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">Future</span><span class="o"><></span><span class="w"> </span><span class="n">finish</span><span class="p">;</span> |
| <span class="linenos">503</span><span class="w"> </span><span class="p">};</span> |
| <span class="linenos">504</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o"><</span><span class="n">CustomSinkNodeConsumer</span><span class="o">></span><span class="w"> </span><span class="n">consumer</span><span class="w"> </span><span class="o">=</span> |
| <span class="linenos">505</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">make_shared</span><span class="o"><</span><span class="n">CustomSinkNodeConsumer</span><span class="o">></span><span class="p">(</span><span class="o">&</span><span class="n">batches_seen</span><span class="p">,</span><span class="w"> </span><span class="n">finish</span><span class="p">);</span> |
| <span class="linenos">506</span> |
| <span class="linenos">507</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">Declaration</span><span class="w"> </span><span class="n">consuming_sink</span><span class="p">{</span><span class="s">"consuming_sink"</span><span class="p">,</span> |
| <span class="linenos">508</span><span class="w"> </span><span class="p">{</span><span class="n">std</span><span class="o">::</span><span class="n">move</span><span class="p">(</span><span class="n">source</span><span class="p">)},</span> |
| <span class="linenos">509</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">ConsumingSinkNodeOptions</span><span class="p">(</span><span class="n">std</span><span class="o">::</span><span class="n">move</span><span class="p">(</span><span class="n">consumer</span><span class="p">))};</span> |
| <span class="linenos">510</span> |
| <span class="linenos">511</span><span class="w"> </span><span class="c1">// Since we are consuming the data within the plan there is no output and we simply</span> |
| <span class="linenos">512</span><span class="w"> </span><span class="c1">// run the plan to completion instead of collecting into a table.</span> |
| <span class="linenos">513</span><span class="w"> </span><span class="n">ARROW_RETURN_NOT_OK</span><span class="p">(</span><span class="n">ac</span><span class="o">::</span><span class="n">DeclarationToStatus</span><span class="p">(</span><span class="n">std</span><span class="o">::</span><span class="n">move</span><span class="p">(</span><span class="n">consuming_sink</span><span class="p">)));</span> |
| <span class="linenos">514</span> |
| <span class="linenos">515</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">cout</span><span class="w"> </span><span class="o"><<</span><span class="w"> </span><span class="s">"The consuming sink node saw "</span><span class="w"> </span><span class="o"><<</span><span class="w"> </span><span class="n">batches_seen</span><span class="p">.</span><span class="n">load</span><span class="p">()</span><span class="w"> </span><span class="o"><<</span><span class="w"> </span><span class="s">" batches"</span> |
| <span class="linenos">516</span><span class="w"> </span><span class="o"><<</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">endl</span><span class="p">;</span> |
| <span class="linenos">517</span><span class="w"> </span><span class="k">return</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">Status</span><span class="o">::</span><span class="n">OK</span><span class="p">();</span> |
| <span class="linenos">518</span><span class="p">}</span> |
| <span class="linenos">519</span><span class="c1">// (Doc section: ConsumingSink Example)</span> |
| <span class="linenos">520</span> |
| <span class="linenos">521</span><span class="c1">// (Doc section: OrderBySink Example)</span> |
| <span class="linenos">522</span> |
| <span class="linenos">523</span><span class="n">arrow</span><span class="o">::</span><span class="n">Status</span><span class="w"> </span><span class="n">ExecutePlanAndCollectAsTableWithCustomSink</span><span class="p">(</span> |
| <span class="linenos">524</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o"><</span><span class="n">ac</span><span class="o">::</span><span class="n">ExecPlan</span><span class="o">></span><span class="w"> </span><span class="n">plan</span><span class="p">,</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o"><</span><span class="n">arrow</span><span class="o">::</span><span class="n">Schema</span><span class="o">></span><span class="w"> </span><span class="n">schema</span><span class="p">,</span> |
| <span class="linenos">525</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">AsyncGenerator</span><span class="o"><</span><span class="n">std</span><span class="o">::</span><span class="n">optional</span><span class="o"><</span><span class="n">cp</span><span class="o">::</span><span class="n">ExecBatch</span><span class="o">>></span><span class="w"> </span><span class="n">sink_gen</span><span class="p">)</span><span class="w"> </span><span class="p">{</span> |
| <span class="linenos">526</span><span class="w"> </span><span class="c1">// translate sink_gen (async) to sink_reader (sync)</span> |
| <span class="linenos">527</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o"><</span><span class="n">arrow</span><span class="o">::</span><span class="n">RecordBatchReader</span><span class="o">></span><span class="w"> </span><span class="n">sink_reader</span><span class="w"> </span><span class="o">=</span> |
| <span class="linenos">528</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">MakeGeneratorReader</span><span class="p">(</span><span class="n">schema</span><span class="p">,</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">move</span><span class="p">(</span><span class="n">sink_gen</span><span class="p">),</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">default_memory_pool</span><span class="p">());</span> |
| <span class="linenos">529</span> |
| <span class="linenos">530</span><span class="w"> </span><span class="c1">// validate the ExecPlan</span> |
| <span class="linenos">531</span><span class="w"> </span><span class="n">ARROW_RETURN_NOT_OK</span><span class="p">(</span><span class="n">plan</span><span class="o">-></span><span class="n">Validate</span><span class="p">());</span> |
| <span class="linenos">532</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">cout</span><span class="w"> </span><span class="o"><<</span><span class="w"> </span><span class="s">"ExecPlan created : "</span><span class="w"> </span><span class="o"><<</span><span class="w"> </span><span class="n">plan</span><span class="o">-></span><span class="n">ToString</span><span class="p">()</span><span class="w"> </span><span class="o"><<</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">endl</span><span class="p">;</span> |
| <span class="linenos">533</span><span class="w"> </span><span class="c1">// start the ExecPlan</span> |
| <span class="linenos">534</span><span class="w"> </span><span class="n">plan</span><span class="o">-></span><span class="n">StartProducing</span><span class="p">();</span> |
| <span class="linenos">535</span> |
| <span class="linenos">536</span><span class="w"> </span><span class="c1">// collect sink_reader into a Table</span> |
| <span class="linenos">537</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o"><</span><span class="n">arrow</span><span class="o">::</span><span class="n">Table</span><span class="o">></span><span class="w"> </span><span class="n">response_table</span><span class="p">;</span> |
| <span class="linenos">538</span> |
| <span class="linenos">539</span><span class="w"> </span><span class="n">ARROW_ASSIGN_OR_RAISE</span><span class="p">(</span><span class="n">response_table</span><span class="p">,</span> |
| <span class="linenos">540</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">Table</span><span class="o">::</span><span class="n">FromRecordBatchReader</span><span class="p">(</span><span class="n">sink_reader</span><span class="p">.</span><span class="n">get</span><span class="p">()));</span> |
| <span class="linenos">541</span> |
| <span class="linenos">542</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">cout</span><span class="w"> </span><span class="o"><<</span><span class="w"> </span><span class="s">"Results : "</span><span class="w"> </span><span class="o"><<</span><span class="w"> </span><span class="n">response_table</span><span class="o">-></span><span class="n">ToString</span><span class="p">()</span><span class="w"> </span><span class="o"><<</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">endl</span><span class="p">;</span> |
| <span class="linenos">543</span> |
| <span class="linenos">544</span><span class="w"> </span><span class="c1">// stop producing</span> |
| <span class="linenos">545</span><span class="w"> </span><span class="n">plan</span><span class="o">-></span><span class="n">StopProducing</span><span class="p">();</span> |
| <span class="linenos">546</span><span class="w"> </span><span class="c1">// plan mark finished</span> |
| <span class="linenos">547</span><span class="w"> </span><span class="k">auto</span><span class="w"> </span><span class="n">future</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">plan</span><span class="o">-></span><span class="n">finished</span><span class="p">();</span> |
| <span class="linenos">548</span><span class="w"> </span><span class="k">return</span><span class="w"> </span><span class="n">future</span><span class="p">.</span><span class="n">status</span><span class="p">();</span> |
| <span class="linenos">549</span><span class="p">}</span> |
| <span class="linenos">550</span> |
| <span class="linenos">551</span><span class="c1">/// \brief An example showing an order-by node</span> |
| <span class="linenos">552</span><span class="c1">///</span> |
| <span class="linenos">553</span><span class="c1">/// Source-OrderBy-Sink</span> |
| <span class="linenos">554</span><span class="c1">/// In this example, the data enters through the source node</span> |
| <span class="linenos">555</span><span class="c1">/// and the data is ordered in the sink node. The order can be</span> |
| <span class="linenos">556</span><span class="c1">/// ASCENDING or DESCENDING and it is configurable. The output</span> |
| <span class="linenos">557</span><span class="c1">/// is obtained as a table from the sink node.</span> |
| <span class="linenos">558</span><span class="n">arrow</span><span class="o">::</span><span class="n">Status</span><span class="w"> </span><span class="n">SourceOrderBySinkExample</span><span class="p">()</span><span class="w"> </span><span class="p">{</span> |
| <span class="linenos">559</span><span class="w"> </span><span class="n">ARROW_ASSIGN_OR_RAISE</span><span class="p">(</span><span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o"><</span><span class="n">ac</span><span class="o">::</span><span class="n">ExecPlan</span><span class="o">></span><span class="w"> </span><span class="n">plan</span><span class="p">,</span> |
| <span class="linenos">560</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">ExecPlan</span><span class="o">::</span><span class="n">Make</span><span class="p">(</span><span class="o">*</span><span class="n">cp</span><span class="o">::</span><span class="n">threaded_exec_context</span><span class="p">()));</span> |
| <span class="linenos">561</span> |
| <span class="linenos">562</span><span class="w"> </span><span class="n">ARROW_ASSIGN_OR_RAISE</span><span class="p">(</span><span class="k">auto</span><span class="w"> </span><span class="n">basic_data</span><span class="p">,</span><span class="w"> </span><span class="n">MakeSortTestBasicBatches</span><span class="p">());</span> |
| <span class="linenos">563</span> |
| <span class="linenos">564</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">AsyncGenerator</span><span class="o"><</span><span class="n">std</span><span class="o">::</span><span class="n">optional</span><span class="o"><</span><span class="n">cp</span><span class="o">::</span><span class="n">ExecBatch</span><span class="o">>></span><span class="w"> </span><span class="n">sink_gen</span><span class="p">;</span> |
| <span class="linenos">565</span> |
| <span class="linenos">566</span><span class="w"> </span><span class="k">auto</span><span class="w"> </span><span class="n">source_node_options</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">SourceNodeOptions</span><span class="p">{</span><span class="n">basic_data</span><span class="p">.</span><span class="n">schema</span><span class="p">,</span><span class="w"> </span><span class="n">basic_data</span><span class="p">.</span><span class="n">gen</span><span class="p">()};</span> |
| <span class="linenos">567</span><span class="w"> </span><span class="n">ARROW_ASSIGN_OR_RAISE</span><span class="p">(</span><span class="n">ac</span><span class="o">::</span><span class="n">ExecNode</span><span class="w"> </span><span class="o">*</span><span class="w"> </span><span class="n">source</span><span class="p">,</span> |
| <span class="linenos">568</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">MakeExecNode</span><span class="p">(</span><span class="s">"source"</span><span class="p">,</span><span class="w"> </span><span class="n">plan</span><span class="p">.</span><span class="n">get</span><span class="p">(),</span><span class="w"> </span><span class="p">{},</span><span class="w"> </span><span class="n">source_node_options</span><span class="p">));</span> |
| <span class="linenos">569</span> |
| <span class="linenos">570</span><span class="w"> </span><span class="n">ARROW_RETURN_NOT_OK</span><span class="p">(</span><span class="n">ac</span><span class="o">::</span><span class="n">MakeExecNode</span><span class="p">(</span> |
| <span class="linenos">571</span><span class="w"> </span><span class="s">"order_by_sink"</span><span class="p">,</span><span class="w"> </span><span class="n">plan</span><span class="p">.</span><span class="n">get</span><span class="p">(),</span><span class="w"> </span><span class="p">{</span><span class="n">source</span><span class="p">},</span> |
| <span class="linenos">572</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">OrderBySinkNodeOptions</span><span class="p">{</span> |
| <span class="linenos">573</span><span class="w"> </span><span class="n">cp</span><span class="o">::</span><span class="n">SortOptions</span><span class="p">{{</span><span class="n">cp</span><span class="o">::</span><span class="n">SortKey</span><span class="p">{</span><span class="s">"a"</span><span class="p">,</span><span class="w"> </span><span class="n">cp</span><span class="o">::</span><span class="n">SortOrder</span><span class="o">::</span><span class="n">Descending</span><span class="p">}}},</span><span class="w"> </span><span class="o">&</span><span class="n">sink_gen</span><span class="p">}));</span> |
| <span class="linenos">574</span> |
| <span class="linenos">575</span><span class="w"> </span><span class="k">return</span><span class="w"> </span><span class="n">ExecutePlanAndCollectAsTableWithCustomSink</span><span class="p">(</span><span class="n">plan</span><span class="p">,</span><span class="w"> </span><span class="n">basic_data</span><span class="p">.</span><span class="n">schema</span><span class="p">,</span><span class="w"> </span><span class="n">sink_gen</span><span class="p">);</span> |
| <span class="linenos">576</span><span class="p">}</span> |
| <span class="linenos">577</span> |
| <span class="linenos">578</span><span class="c1">// (Doc section: OrderBySink Example)</span> |
| <span class="linenos">579</span> |
| <span class="linenos">580</span><span class="c1">// (Doc section: HashJoin Example)</span> |
| <span class="linenos">581</span> |
| <span class="linenos">582</span><span class="c1">/// \brief An example showing a hash join node</span> |
| <span class="linenos">583</span><span class="c1">///</span> |
| <span class="linenos">584</span><span class="c1">/// Source-HashJoin-Table</span> |
| <span class="linenos">585</span><span class="c1">/// This example shows how source node gets the data and how a self-join</span> |
| <span class="linenos">586</span><span class="c1">/// is applied on the data. The join options are configurable. The output</span> |
| <span class="linenos">587</span><span class="c1">/// is collected into a table.</span> |
| <span class="linenos">588</span><span class="n">arrow</span><span class="o">::</span><span class="n">Status</span><span class="w"> </span><span class="n">SourceHashJoinSinkExample</span><span class="p">()</span><span class="w"> </span><span class="p">{</span> |
| <span class="linenos">589</span><span class="w"> </span><span class="n">ARROW_ASSIGN_OR_RAISE</span><span class="p">(</span><span class="k">auto</span><span class="w"> </span><span class="n">input</span><span class="p">,</span><span class="w"> </span><span class="n">MakeGroupableBatches</span><span class="p">());</span> |
| <span class="linenos">590</span> |
| <span class="linenos">591</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">Declaration</span><span class="w"> </span><span class="n">left</span><span class="p">{</span><span class="s">"source"</span><span class="p">,</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">SourceNodeOptions</span><span class="p">{</span><span class="n">input</span><span class="p">.</span><span class="n">schema</span><span class="p">,</span><span class="w"> </span><span class="n">input</span><span class="p">.</span><span class="n">gen</span><span class="p">()}};</span> |
| <span class="linenos">592</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">Declaration</span><span class="w"> </span><span class="n">right</span><span class="p">{</span><span class="s">"source"</span><span class="p">,</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">SourceNodeOptions</span><span class="p">{</span><span class="n">input</span><span class="p">.</span><span class="n">schema</span><span class="p">,</span><span class="w"> </span><span class="n">input</span><span class="p">.</span><span class="n">gen</span><span class="p">()}};</span> |
| <span class="linenos">593</span> |
| <span class="linenos">594</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">HashJoinNodeOptions</span><span class="w"> </span><span class="n">join_opts</span><span class="p">{</span> |
| <span class="linenos">595</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">JoinType</span><span class="o">::</span><span class="n">INNER</span><span class="p">,</span> |
| <span class="linenos">596</span><span class="w"> </span><span class="cm">/*left_keys=*/</span><span class="p">{</span><span class="s">"str"</span><span class="p">},</span> |
| <span class="linenos">597</span><span class="w"> </span><span class="cm">/*right_keys=*/</span><span class="p">{</span><span class="s">"str"</span><span class="p">},</span><span class="w"> </span><span class="n">cp</span><span class="o">::</span><span class="n">literal</span><span class="p">(</span><span class="nb">true</span><span class="p">),</span><span class="w"> </span><span class="s">"l_"</span><span class="p">,</span><span class="w"> </span><span class="s">"r_"</span><span class="p">};</span> |
| <span class="linenos">598</span> |
| <span class="linenos">599</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">Declaration</span><span class="w"> </span><span class="n">hashjoin</span><span class="p">{</span> |
| <span class="linenos">600</span><span class="w"> </span><span class="s">"hashjoin"</span><span class="p">,</span><span class="w"> </span><span class="p">{</span><span class="n">std</span><span class="o">::</span><span class="n">move</span><span class="p">(</span><span class="n">left</span><span class="p">),</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">move</span><span class="p">(</span><span class="n">right</span><span class="p">)},</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">move</span><span class="p">(</span><span class="n">join_opts</span><span class="p">)};</span> |
| <span class="linenos">601</span> |
| <span class="linenos">602</span><span class="w"> </span><span class="k">return</span><span class="w"> </span><span class="n">ExecutePlanAndCollectAsTable</span><span class="p">(</span><span class="n">std</span><span class="o">::</span><span class="n">move</span><span class="p">(</span><span class="n">hashjoin</span><span class="p">));</span> |
| <span class="linenos">603</span><span class="p">}</span> |
| <span class="linenos">604</span> |
| <span class="linenos">605</span><span class="c1">// (Doc section: HashJoin Example)</span> |
| <span class="linenos">606</span> |
| <span class="linenos">607</span><span class="c1">// (Doc section: KSelect Example)</span> |
| <span class="linenos">608</span> |
| <span class="linenos">609</span><span class="c1">/// \brief An example showing a select-k node</span> |
| <span class="linenos">610</span><span class="c1">///</span> |
| <span class="linenos">611</span><span class="c1">/// Source-KSelect</span> |
| <span class="linenos">612</span><span class="c1">/// This example shows how K number of elements can be selected</span> |
| <span class="linenos">613</span><span class="c1">/// either from the top or bottom. The output node is a modified</span> |
| <span class="linenos">614</span><span class="c1">/// sink node where output can be obtained as a table.</span> |
| <span class="linenos">615</span><span class="n">arrow</span><span class="o">::</span><span class="n">Status</span><span class="w"> </span><span class="n">SourceKSelectExample</span><span class="p">()</span><span class="w"> </span><span class="p">{</span> |
| <span class="linenos">616</span><span class="w"> </span><span class="n">ARROW_ASSIGN_OR_RAISE</span><span class="p">(</span><span class="k">auto</span><span class="w"> </span><span class="n">input</span><span class="p">,</span><span class="w"> </span><span class="n">MakeGroupableBatches</span><span class="p">());</span> |
| <span class="linenos">617</span><span class="w"> </span><span class="n">ARROW_ASSIGN_OR_RAISE</span><span class="p">(</span><span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o"><</span><span class="n">ac</span><span class="o">::</span><span class="n">ExecPlan</span><span class="o">></span><span class="w"> </span><span class="n">plan</span><span class="p">,</span> |
| <span class="linenos">618</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">ExecPlan</span><span class="o">::</span><span class="n">Make</span><span class="p">(</span><span class="o">*</span><span class="n">cp</span><span class="o">::</span><span class="n">threaded_exec_context</span><span class="p">()));</span> |
| <span class="linenos">619</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">AsyncGenerator</span><span class="o"><</span><span class="n">std</span><span class="o">::</span><span class="n">optional</span><span class="o"><</span><span class="n">cp</span><span class="o">::</span><span class="n">ExecBatch</span><span class="o">>></span><span class="w"> </span><span class="n">sink_gen</span><span class="p">;</span> |
| <span class="linenos">620</span> |
| <span class="linenos">621</span><span class="w"> </span><span class="n">ARROW_ASSIGN_OR_RAISE</span><span class="p">(</span> |
| <span class="linenos">622</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">ExecNode</span><span class="w"> </span><span class="o">*</span><span class="w"> </span><span class="n">source</span><span class="p">,</span> |
| <span class="linenos">623</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">MakeExecNode</span><span class="p">(</span><span class="s">"source"</span><span class="p">,</span><span class="w"> </span><span class="n">plan</span><span class="p">.</span><span class="n">get</span><span class="p">(),</span><span class="w"> </span><span class="p">{},</span> |
| <span class="linenos">624</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">SourceNodeOptions</span><span class="p">{</span><span class="n">input</span><span class="p">.</span><span class="n">schema</span><span class="p">,</span><span class="w"> </span><span class="n">input</span><span class="p">.</span><span class="n">gen</span><span class="p">()}));</span> |
| <span class="linenos">625</span> |
| <span class="linenos">626</span><span class="w"> </span><span class="n">cp</span><span class="o">::</span><span class="n">SelectKOptions</span><span class="w"> </span><span class="n">options</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">cp</span><span class="o">::</span><span class="n">SelectKOptions</span><span class="o">::</span><span class="n">TopKDefault</span><span class="p">(</span><span class="cm">/*k=*/</span><span class="mi">2</span><span class="p">,</span><span class="w"> </span><span class="p">{</span><span class="s">"i32"</span><span class="p">});</span> |
| <span class="linenos">627</span> |
| <span class="linenos">628</span><span class="w"> </span><span class="n">ARROW_RETURN_NOT_OK</span><span class="p">(</span><span class="n">ac</span><span class="o">::</span><span class="n">MakeExecNode</span><span class="p">(</span><span class="s">"select_k_sink"</span><span class="p">,</span><span class="w"> </span><span class="n">plan</span><span class="p">.</span><span class="n">get</span><span class="p">(),</span><span class="w"> </span><span class="p">{</span><span class="n">source</span><span class="p">},</span> |
| <span class="linenos">629</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">SelectKSinkNodeOptions</span><span class="p">{</span><span class="n">options</span><span class="p">,</span><span class="w"> </span><span class="o">&</span><span class="n">sink_gen</span><span class="p">}));</span> |
| <span class="linenos">630</span> |
| <span class="linenos">631</span><span class="w"> </span><span class="k">auto</span><span class="w"> </span><span class="n">schema</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">schema</span><span class="p">(</span> |
| <span class="linenos">632</span><span class="w"> </span><span class="p">{</span><span class="n">arrow</span><span class="o">::</span><span class="n">field</span><span class="p">(</span><span class="s">"i32"</span><span class="p">,</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">int32</span><span class="p">()),</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">field</span><span class="p">(</span><span class="s">"str"</span><span class="p">,</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">utf8</span><span class="p">())});</span> |
| <span class="linenos">633</span> |
| <span class="linenos">634</span><span class="w"> </span><span class="k">return</span><span class="w"> </span><span class="n">ExecutePlanAndCollectAsTableWithCustomSink</span><span class="p">(</span><span class="n">plan</span><span class="p">,</span><span class="w"> </span><span class="n">schema</span><span class="p">,</span><span class="w"> </span><span class="n">sink_gen</span><span class="p">);</span> |
| <span class="linenos">635</span><span class="p">}</span> |
| <span class="linenos">636</span> |
| <span class="linenos">637</span><span class="c1">// (Doc section: KSelect Example)</span> |
| <span class="linenos">638</span> |
| <span class="linenos">639</span><span class="c1">// (Doc section: Write Example)</span> |
| <span class="linenos">640</span> |
| <span class="linenos">641</span><span class="c1">/// \brief An example showing a write node</span> |
| <span class="linenos">642</span><span class="c1">/// \param file_path The destination to write to</span> |
| <span class="linenos">643</span><span class="c1">///</span> |
| <span class="linenos">644</span><span class="c1">/// Scan-Filter-Write</span> |
| <span class="linenos">645</span><span class="c1">/// This example shows how scan node can be used to load the data</span> |
| <span class="linenos">646</span><span class="c1">/// and after processing how it can be written to disk.</span> |
| <span class="linenos">647</span><span class="n">arrow</span><span class="o">::</span><span class="n">Status</span><span class="w"> </span><span class="n">ScanFilterWriteExample</span><span class="p">(</span><span class="k">const</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">string</span><span class="o">&</span><span class="w"> </span><span class="n">file_path</span><span class="p">)</span><span class="w"> </span><span class="p">{</span> |
| <span class="linenos">648</span><span class="w"> </span><span class="n">ARROW_ASSIGN_OR_RAISE</span><span class="p">(</span><span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o"><</span><span class="n">arrow</span><span class="o">::</span><span class="n">dataset</span><span class="o">::</span><span class="n">Dataset</span><span class="o">></span><span class="w"> </span><span class="n">dataset</span><span class="p">,</span><span class="w"> </span><span class="n">GetDataset</span><span class="p">());</span> |
| <span class="linenos">649</span> |
| <span class="linenos">650</span><span class="w"> </span><span class="k">auto</span><span class="w"> </span><span class="n">options</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">make_shared</span><span class="o"><</span><span class="n">arrow</span><span class="o">::</span><span class="n">dataset</span><span class="o">::</span><span class="n">ScanOptions</span><span class="o">></span><span class="p">();</span> |
| <span class="linenos">651</span><span class="w"> </span><span class="c1">// empty projection</span> |
| <span class="linenos">652</span><span class="w"> </span><span class="n">options</span><span class="o">-></span><span class="n">projection</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">cp</span><span class="o">::</span><span class="n">project</span><span class="p">({},</span><span class="w"> </span><span class="p">{});</span> |
| <span class="linenos">653</span> |
| <span class="linenos">654</span><span class="w"> </span><span class="k">auto</span><span class="w"> </span><span class="n">scan_node_options</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">dataset</span><span class="o">::</span><span class="n">ScanNodeOptions</span><span class="p">{</span><span class="n">dataset</span><span class="p">,</span><span class="w"> </span><span class="n">options</span><span class="p">};</span> |
| <span class="linenos">655</span> |
| <span class="linenos">656</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">Declaration</span><span class="w"> </span><span class="n">scan</span><span class="p">{</span><span class="s">"scan"</span><span class="p">,</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">move</span><span class="p">(</span><span class="n">scan_node_options</span><span class="p">)};</span> |
| <span class="linenos">657</span> |
| <span class="linenos">658</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">AsyncGenerator</span><span class="o"><</span><span class="n">std</span><span class="o">::</span><span class="n">optional</span><span class="o"><</span><span class="n">cp</span><span class="o">::</span><span class="n">ExecBatch</span><span class="o">>></span><span class="w"> </span><span class="n">sink_gen</span><span class="p">;</span> |
| <span class="linenos">659</span> |
| <span class="linenos">660</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">string</span><span class="w"> </span><span class="n">root_path</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="s">""</span><span class="p">;</span> |
| <span class="linenos">661</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">string</span><span class="w"> </span><span class="n">uri</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="s">"file://"</span><span class="w"> </span><span class="o">+</span><span class="w"> </span><span class="n">file_path</span><span class="p">;</span> |
| <span class="linenos">662</span><span class="w"> </span><span class="n">ARROW_ASSIGN_OR_RAISE</span><span class="p">(</span><span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o"><</span><span class="n">arrow</span><span class="o">::</span><span class="n">fs</span><span class="o">::</span><span class="n">FileSystem</span><span class="o">></span><span class="w"> </span><span class="n">filesystem</span><span class="p">,</span> |
| <span class="linenos">663</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">fs</span><span class="o">::</span><span class="n">FileSystemFromUri</span><span class="p">(</span><span class="n">uri</span><span class="p">,</span><span class="w"> </span><span class="o">&</span><span class="n">root_path</span><span class="p">));</span> |
| <span class="linenos">664</span> |
| <span class="linenos">665</span><span class="w"> </span><span class="k">auto</span><span class="w"> </span><span class="n">base_path</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">root_path</span><span class="w"> </span><span class="o">+</span><span class="w"> </span><span class="s">"/parquet_dataset"</span><span class="p">;</span> |
| <span class="linenos">666</span><span class="w"> </span><span class="c1">// Uncomment the following line, if run repeatedly</span> |
| <span class="linenos">667</span><span class="w"> </span><span class="c1">// ARROW_RETURN_NOT_OK(filesystem->DeleteDirContents(base_path));</span> |
| <span class="linenos">668</span><span class="w"> </span><span class="n">ARROW_RETURN_NOT_OK</span><span class="p">(</span><span class="n">filesystem</span><span class="o">-></span><span class="n">CreateDir</span><span class="p">(</span><span class="n">base_path</span><span class="p">));</span> |
| <span class="linenos">669</span> |
| <span class="linenos">670</span><span class="w"> </span><span class="c1">// The partition schema determines which fields are part of the partitioning.</span> |
| <span class="linenos">671</span><span class="w"> </span><span class="k">auto</span><span class="w"> </span><span class="n">partition_schema</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">schema</span><span class="p">({</span><span class="n">arrow</span><span class="o">::</span><span class="n">field</span><span class="p">(</span><span class="s">"a"</span><span class="p">,</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">int32</span><span class="p">())});</span> |
| <span class="linenos">672</span><span class="w"> </span><span class="c1">// We'll use Hive-style partitioning,</span> |
| <span class="linenos">673</span><span class="w"> </span><span class="c1">// which creates directories with "key=value" pairs.</span> |
| <span class="linenos">674</span> |
| <span class="linenos">675</span><span class="w"> </span><span class="k">auto</span><span class="w"> </span><span class="n">partitioning</span><span class="w"> </span><span class="o">=</span> |
| <span class="linenos">676</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">make_shared</span><span class="o"><</span><span class="n">arrow</span><span class="o">::</span><span class="n">dataset</span><span class="o">::</span><span class="n">HivePartitioning</span><span class="o">></span><span class="p">(</span><span class="n">partition_schema</span><span class="p">);</span> |
| <span class="linenos">677</span><span class="w"> </span><span class="c1">// We'll write Parquet files.</span> |
| <span class="linenos">678</span><span class="w"> </span><span class="k">auto</span><span class="w"> </span><span class="n">format</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">make_shared</span><span class="o"><</span><span class="n">arrow</span><span class="o">::</span><span class="n">dataset</span><span class="o">::</span><span class="n">ParquetFileFormat</span><span class="o">></span><span class="p">();</span> |
| <span class="linenos">679</span> |
| <span class="linenos">680</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">dataset</span><span class="o">::</span><span class="n">FileSystemDatasetWriteOptions</span><span class="w"> </span><span class="n">write_options</span><span class="p">;</span> |
| <span class="linenos">681</span><span class="w"> </span><span class="n">write_options</span><span class="p">.</span><span class="n">file_write_options</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">format</span><span class="o">-></span><span class="n">DefaultWriteOptions</span><span class="p">();</span> |
| <span class="linenos">682</span><span class="w"> </span><span class="n">write_options</span><span class="p">.</span><span class="n">filesystem</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">filesystem</span><span class="p">;</span> |
| <span class="linenos">683</span><span class="w"> </span><span class="n">write_options</span><span class="p">.</span><span class="n">base_dir</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">base_path</span><span class="p">;</span> |
| <span class="linenos">684</span><span class="w"> </span><span class="n">write_options</span><span class="p">.</span><span class="n">partitioning</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">partitioning</span><span class="p">;</span> |
| <span class="linenos">685</span><span class="w"> </span><span class="n">write_options</span><span class="p">.</span><span class="n">basename_template</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="s">"part{i}.parquet"</span><span class="p">;</span> |
| <span class="linenos">686</span> |
| <span class="linenos">687</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">dataset</span><span class="o">::</span><span class="n">WriteNodeOptions</span><span class="w"> </span><span class="n">write_node_options</span><span class="p">{</span><span class="n">write_options</span><span class="p">};</span> |
| <span class="linenos">688</span> |
| <span class="linenos">689</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">Declaration</span><span class="w"> </span><span class="n">write</span><span class="p">{</span><span class="s">"write"</span><span class="p">,</span><span class="w"> </span><span class="p">{</span><span class="n">std</span><span class="o">::</span><span class="n">move</span><span class="p">(</span><span class="n">scan</span><span class="p">)},</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">move</span><span class="p">(</span><span class="n">write_node_options</span><span class="p">)};</span> |
| <span class="linenos">690</span> |
| <span class="linenos">691</span><span class="w"> </span><span class="c1">// Since the write node has no output we simply run the plan to completion and the</span> |
| <span class="linenos">692</span><span class="w"> </span><span class="c1">// data should be written</span> |
| <span class="linenos">693</span><span class="w"> </span><span class="n">ARROW_RETURN_NOT_OK</span><span class="p">(</span><span class="n">ac</span><span class="o">::</span><span class="n">DeclarationToStatus</span><span class="p">(</span><span class="n">std</span><span class="o">::</span><span class="n">move</span><span class="p">(</span><span class="n">write</span><span class="p">)));</span> |
| <span class="linenos">694</span> |
| <span class="linenos">695</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">cout</span><span class="w"> </span><span class="o"><<</span><span class="w"> </span><span class="s">"Dataset written to "</span><span class="w"> </span><span class="o"><<</span><span class="w"> </span><span class="n">base_path</span><span class="w"> </span><span class="o"><<</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">endl</span><span class="p">;</span> |
| <span class="linenos">696</span><span class="w"> </span><span class="k">return</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">Status</span><span class="o">::</span><span class="n">OK</span><span class="p">();</span> |
| <span class="linenos">697</span><span class="p">}</span> |
| <span class="linenos">698</span> |
| <span class="linenos">699</span><span class="c1">// (Doc section: Write Example)</span> |
| <span class="linenos">700</span> |
| <span class="linenos">701</span><span class="c1">// (Doc section: Union Example)</span> |
| <span class="linenos">702</span> |
| <span class="linenos">703</span><span class="c1">/// \brief An example showing a union node</span> |
| <span class="linenos">704</span><span class="c1">///</span> |
| <span class="linenos">705</span><span class="c1">/// Source-Union-Table</span> |
| <span class="linenos">706</span><span class="c1">/// This example shows how a union operation can be applied on two</span> |
| <span class="linenos">707</span><span class="c1">/// data sources. The output is collected into a table.</span> |
| <span class="linenos">708</span><span class="n">arrow</span><span class="o">::</span><span class="n">Status</span><span class="w"> </span><span class="n">SourceUnionSinkExample</span><span class="p">()</span><span class="w"> </span><span class="p">{</span> |
| <span class="linenos">709</span><span class="w"> </span><span class="n">ARROW_ASSIGN_OR_RAISE</span><span class="p">(</span><span class="k">auto</span><span class="w"> </span><span class="n">basic_data</span><span class="p">,</span><span class="w"> </span><span class="n">MakeBasicBatches</span><span class="p">());</span> |
| <span class="linenos">710</span> |
| <span class="linenos">711</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">Declaration</span><span class="w"> </span><span class="n">lhs</span><span class="p">{</span><span class="s">"source"</span><span class="p">,</span> |
| <span class="linenos">712</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">SourceNodeOptions</span><span class="p">{</span><span class="n">basic_data</span><span class="p">.</span><span class="n">schema</span><span class="p">,</span><span class="w"> </span><span class="n">basic_data</span><span class="p">.</span><span class="n">gen</span><span class="p">()}};</span> |
| <span class="linenos">713</span><span class="w"> </span><span class="n">lhs</span><span class="p">.</span><span class="n">label</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="s">"lhs"</span><span class="p">;</span> |
| <span class="linenos">714</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">Declaration</span><span class="w"> </span><span class="n">rhs</span><span class="p">{</span><span class="s">"source"</span><span class="p">,</span> |
| <span class="linenos">715</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">SourceNodeOptions</span><span class="p">{</span><span class="n">basic_data</span><span class="p">.</span><span class="n">schema</span><span class="p">,</span><span class="w"> </span><span class="n">basic_data</span><span class="p">.</span><span class="n">gen</span><span class="p">()}};</span> |
| <span class="linenos">716</span><span class="w"> </span><span class="n">rhs</span><span class="p">.</span><span class="n">label</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="s">"rhs"</span><span class="p">;</span> |
| <span class="linenos">717</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">Declaration</span><span class="w"> </span><span class="n">union_plan</span><span class="p">{</span> |
| <span class="linenos">718</span><span class="w"> </span><span class="s">"union"</span><span class="p">,</span><span class="w"> </span><span class="p">{</span><span class="n">std</span><span class="o">::</span><span class="n">move</span><span class="p">(</span><span class="n">lhs</span><span class="p">),</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">move</span><span class="p">(</span><span class="n">rhs</span><span class="p">)},</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">ExecNodeOptions</span><span class="p">{}};</span> |
| <span class="linenos">719</span> |
| <span class="linenos">720</span><span class="w"> </span><span class="k">return</span><span class="w"> </span><span class="n">ExecutePlanAndCollectAsTable</span><span class="p">(</span><span class="n">std</span><span class="o">::</span><span class="n">move</span><span class="p">(</span><span class="n">union_plan</span><span class="p">));</span> |
| <span class="linenos">721</span><span class="p">}</span> |
| <span class="linenos">722</span> |
| <span class="linenos">723</span><span class="c1">// (Doc section: Union Example)</span> |
| <span class="linenos">724</span> |
| <span class="linenos">725</span><span class="c1">// (Doc section: Table Sink Example)</span> |
| <span class="linenos">726</span> |
| <span class="linenos">727</span><span class="c1">/// \brief An example showing a table sink node</span> |
| <span class="linenos">728</span><span class="c1">///</span> |
| <span class="linenos">729</span><span class="c1">/// TableSink Example</span> |
| <span class="linenos">730</span><span class="c1">/// This example shows how a table_sink can be used</span> |
| <span class="linenos">731</span><span class="c1">/// in an execution plan. This includes a source node</span> |
| <span class="linenos">732</span><span class="c1">/// receiving data as batches and the table sink node</span> |
| <span class="linenos">733</span><span class="c1">/// which emits the output as a table.</span> |
| <span class="linenos">734</span><span class="n">arrow</span><span class="o">::</span><span class="n">Status</span><span class="w"> </span><span class="n">TableSinkExample</span><span class="p">()</span><span class="w"> </span><span class="p">{</span> |
| <span class="linenos">735</span><span class="w"> </span><span class="n">ARROW_ASSIGN_OR_RAISE</span><span class="p">(</span><span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o"><</span><span class="n">ac</span><span class="o">::</span><span class="n">ExecPlan</span><span class="o">></span><span class="w"> </span><span class="n">plan</span><span class="p">,</span> |
| <span class="linenos">736</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">ExecPlan</span><span class="o">::</span><span class="n">Make</span><span class="p">(</span><span class="o">*</span><span class="n">cp</span><span class="o">::</span><span class="n">threaded_exec_context</span><span class="p">()));</span> |
| <span class="linenos">737</span> |
| <span class="linenos">738</span><span class="w"> </span><span class="n">ARROW_ASSIGN_OR_RAISE</span><span class="p">(</span><span class="k">auto</span><span class="w"> </span><span class="n">basic_data</span><span class="p">,</span><span class="w"> </span><span class="n">MakeBasicBatches</span><span class="p">());</span> |
| <span class="linenos">739</span> |
| <span class="linenos">740</span><span class="w"> </span><span class="k">auto</span><span class="w"> </span><span class="n">source_node_options</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">SourceNodeOptions</span><span class="p">{</span><span class="n">basic_data</span><span class="p">.</span><span class="n">schema</span><span class="p">,</span><span class="w"> </span><span class="n">basic_data</span><span class="p">.</span><span class="n">gen</span><span class="p">()};</span> |
| <span class="linenos">741</span> |
| <span class="linenos">742</span><span class="w"> </span><span class="n">ARROW_ASSIGN_OR_RAISE</span><span class="p">(</span><span class="n">ac</span><span class="o">::</span><span class="n">ExecNode</span><span class="w"> </span><span class="o">*</span><span class="w"> </span><span class="n">source</span><span class="p">,</span> |
| <span class="linenos">743</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">MakeExecNode</span><span class="p">(</span><span class="s">"source"</span><span class="p">,</span><span class="w"> </span><span class="n">plan</span><span class="p">.</span><span class="n">get</span><span class="p">(),</span><span class="w"> </span><span class="p">{},</span><span class="w"> </span><span class="n">source_node_options</span><span class="p">));</span> |
| <span class="linenos">744</span> |
| <span class="linenos">745</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o"><</span><span class="n">arrow</span><span class="o">::</span><span class="n">Table</span><span class="o">></span><span class="w"> </span><span class="n">output_table</span><span class="p">;</span> |
| <span class="linenos">746</span><span class="w"> </span><span class="k">auto</span><span class="w"> </span><span class="n">table_sink_options</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">TableSinkNodeOptions</span><span class="p">{</span><span class="o">&</span><span class="n">output_table</span><span class="p">};</span> |
| <span class="linenos">747</span> |
| <span class="linenos">748</span><span class="w"> </span><span class="n">ARROW_RETURN_NOT_OK</span><span class="p">(</span> |
| <span class="linenos">749</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">MakeExecNode</span><span class="p">(</span><span class="s">"table_sink"</span><span class="p">,</span><span class="w"> </span><span class="n">plan</span><span class="p">.</span><span class="n">get</span><span class="p">(),</span><span class="w"> </span><span class="p">{</span><span class="n">source</span><span class="p">},</span><span class="w"> </span><span class="n">table_sink_options</span><span class="p">));</span> |
| <span class="linenos">750</span><span class="w"> </span><span class="c1">// validate the ExecPlan</span> |
| <span class="linenos">751</span><span class="w"> </span><span class="n">ARROW_RETURN_NOT_OK</span><span class="p">(</span><span class="n">plan</span><span class="o">-></span><span class="n">Validate</span><span class="p">());</span> |
| <span class="linenos">752</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">cout</span><span class="w"> </span><span class="o"><<</span><span class="w"> </span><span class="s">"ExecPlan created : "</span><span class="w"> </span><span class="o"><<</span><span class="w"> </span><span class="n">plan</span><span class="o">-></span><span class="n">ToString</span><span class="p">()</span><span class="w"> </span><span class="o"><<</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">endl</span><span class="p">;</span> |
| <span class="linenos">753</span><span class="w"> </span><span class="c1">// start the ExecPlan</span> |
| <span class="linenos">754</span><span class="w"> </span><span class="n">plan</span><span class="o">-></span><span class="n">StartProducing</span><span class="p">();</span> |
| <span class="linenos">755</span> |
| <span class="linenos">756</span><span class="w"> </span><span class="c1">// Wait for the plan to finish</span> |
| <span class="linenos">757</span><span class="w"> </span><span class="k">auto</span><span class="w"> </span><span class="n">finished</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">plan</span><span class="o">-></span><span class="n">finished</span><span class="p">();</span> |
| <span class="linenos">758</span><span class="w"> </span><span class="n">RETURN_NOT_OK</span><span class="p">(</span><span class="n">finished</span><span class="p">.</span><span class="n">status</span><span class="p">());</span> |
| <span class="linenos">759</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">cout</span><span class="w"> </span><span class="o"><<</span><span class="w"> </span><span class="s">"Results : "</span><span class="w"> </span><span class="o"><<</span><span class="w"> </span><span class="n">output_table</span><span class="o">-></span><span class="n">ToString</span><span class="p">()</span><span class="w"> </span><span class="o"><<</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">endl</span><span class="p">;</span> |
| <span class="linenos">760</span><span class="w"> </span><span class="k">return</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">Status</span><span class="o">::</span><span class="n">OK</span><span class="p">();</span> |
| <span class="linenos">761</span><span class="p">}</span> |
| <span class="linenos">762</span> |
| <span class="linenos">763</span><span class="c1">// (Doc section: Table Sink Example)</span> |
| <span class="linenos">764</span> |
| <span class="linenos">765</span><span class="c1">// (Doc section: RecordBatchReaderSource Example)</span> |
| <span class="linenos">766</span> |
| <span class="linenos">767</span><span class="c1">/// \brief An example showing the usage of a RecordBatchReader as the data source.</span> |
| <span class="linenos">768</span><span class="c1">///</span> |
| <span class="linenos">769</span><span class="c1">/// RecordBatchReaderSourceSink Example</span> |
| <span class="linenos">770</span><span class="c1">/// This example shows how a record_batch_reader_source can be used</span> |
| <span class="linenos">771</span><span class="c1">/// in an execution plan. This includes the source node</span> |
| <span class="linenos">772</span><span class="c1">/// receiving data from a TableRecordBatchReader.</span> |
| <span class="linenos">773</span> |
| <span class="linenos">774</span><span class="n">arrow</span><span class="o">::</span><span class="n">Status</span><span class="w"> </span><span class="n">RecordBatchReaderSourceSinkExample</span><span class="p">()</span><span class="w"> </span><span class="p">{</span> |
| <span class="linenos">775</span><span class="w"> </span><span class="n">ARROW_ASSIGN_OR_RAISE</span><span class="p">(</span><span class="k">auto</span><span class="w"> </span><span class="n">table</span><span class="p">,</span><span class="w"> </span><span class="n">GetTable</span><span class="p">());</span> |
| <span class="linenos">776</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o"><</span><span class="n">arrow</span><span class="o">::</span><span class="n">RecordBatchReader</span><span class="o">></span><span class="w"> </span><span class="n">reader</span><span class="w"> </span><span class="o">=</span> |
| <span class="linenos">777</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">make_shared</span><span class="o"><</span><span class="n">arrow</span><span class="o">::</span><span class="n">TableBatchReader</span><span class="o">></span><span class="p">(</span><span class="n">table</span><span class="p">);</span> |
| <span class="linenos">778</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">Declaration</span><span class="w"> </span><span class="n">reader_source</span><span class="p">{</span><span class="s">"record_batch_reader_source"</span><span class="p">,</span> |
| <span class="linenos">779</span><span class="w"> </span><span class="n">ac</span><span class="o">::</span><span class="n">RecordBatchReaderSourceNodeOptions</span><span class="p">{</span><span class="n">reader</span><span class="p">}};</span> |
| <span class="linenos">780</span><span class="w"> </span><span class="k">return</span><span class="w"> </span><span class="n">ExecutePlanAndCollectAsTable</span><span class="p">(</span><span class="n">std</span><span class="o">::</span><span class="n">move</span><span class="p">(</span><span class="n">reader_source</span><span class="p">));</span> |
| <span class="linenos">781</span><span class="p">}</span> |
| <span class="linenos">782</span> |
| <span class="linenos">783</span><span class="c1">// (Doc section: RecordBatchReaderSource Example)</span> |
| <span class="linenos">784</span> |
| <span class="linenos">785</span><span class="k">enum</span><span class="w"> </span><span class="nc">ExampleMode</span><span class="w"> </span><span class="p">{</span> |
| <span class="linenos">786</span><span class="w"> </span><span class="n">SOURCE_SINK</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="mi">0</span><span class="p">,</span> |
| <span class="linenos">787</span><span class="w"> </span><span class="n">TABLE_SOURCE_SINK</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="mi">1</span><span class="p">,</span> |
| <span class="linenos">788</span><span class="w"> </span><span class="n">SCAN</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="mi">2</span><span class="p">,</span> |
| <span class="linenos">789</span><span class="w"> </span><span class="n">FILTER</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="mi">3</span><span class="p">,</span> |
| <span class="linenos">790</span><span class="w"> </span><span class="n">PROJECT</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="mi">4</span><span class="p">,</span> |
| <span class="linenos">791</span><span class="w"> </span><span class="n">SCALAR_AGGREGATION</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="mi">5</span><span class="p">,</span> |
| <span class="linenos">792</span><span class="w"> </span><span class="n">GROUP_AGGREGATION</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="mi">6</span><span class="p">,</span> |
| <span class="linenos">793</span><span class="w"> </span><span class="n">CONSUMING_SINK</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="mi">7</span><span class="p">,</span> |
| <span class="linenos">794</span><span class="w"> </span><span class="n">ORDER_BY_SINK</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="mi">8</span><span class="p">,</span> |
| <span class="linenos">795</span><span class="w"> </span><span class="n">HASHJOIN</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="mi">9</span><span class="p">,</span> |
| <span class="linenos">796</span><span class="w"> </span><span class="n">KSELECT</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="mi">10</span><span class="p">,</span> |
| <span class="linenos">797</span><span class="w"> </span><span class="n">WRITE</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="mi">11</span><span class="p">,</span> |
| <span class="linenos">798</span><span class="w"> </span><span class="n">UNION</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="mi">12</span><span class="p">,</span> |
| <span class="linenos">799</span><span class="w"> </span><span class="n">TABLE_SOURCE_TABLE_SINK</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="mi">13</span><span class="p">,</span> |
| <span class="linenos">800</span><span class="w"> </span><span class="n">RECORD_BATCH_READER_SOURCE</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="mi">14</span> |
| <span class="linenos">801</span><span class="p">};</span> |
| <span class="linenos">802</span> |
| <span class="linenos">803</span><span class="kt">int</span><span class="w"> </span><span class="nf">main</span><span class="p">(</span><span class="kt">int</span><span class="w"> </span><span class="n">argc</span><span class="p">,</span><span class="w"> </span><span class="kt">char</span><span class="o">**</span><span class="w"> </span><span class="n">argv</span><span class="p">)</span><span class="w"> </span><span class="p">{</span> |
| <span class="linenos">804</span><span class="w"> </span><span class="k">if</span><span class="w"> </span><span class="p">(</span><span class="n">argc</span><span class="w"> </span><span class="o"><</span><span class="w"> </span><span class="mi">3</span><span class="p">)</span><span class="w"> </span><span class="p">{</span> |
| <span class="linenos">805</span><span class="w"> </span><span class="c1">// Fake success for CI purposes.</span> |
| <span class="linenos">806</span><span class="w"> </span><span class="k">return</span><span class="w"> </span><span class="n">EXIT_SUCCESS</span><span class="p">;</span> |
| <span class="linenos">807</span><span class="w"> </span><span class="p">}</span> |
| <span class="linenos">808</span> |
| <span class="linenos">809</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">string</span><span class="w"> </span><span class="n">base_save_path</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">argv</span><span class="p">[</span><span class="mi">1</span><span class="p">];</span> |
| <span class="linenos">810</span><span class="w"> </span><span class="kt">int</span><span class="w"> </span><span class="n">mode</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">atoi</span><span class="p">(</span><span class="n">argv</span><span class="p">[</span><span class="mi">2</span><span class="p">]);</span> |
| <span class="linenos">811</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">Status</span><span class="w"> </span><span class="n">status</span><span class="p">;</span> |
| <span class="linenos">812</span><span class="w"> </span><span class="c1">// ensure arrow::dataset node factories are in the registry</span> |
| <span class="linenos">813</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">dataset</span><span class="o">::</span><span class="n">internal</span><span class="o">::</span><span class="n">Initialize</span><span class="p">();</span> |
| <span class="linenos">814</span><span class="w"> </span><span class="k">switch</span><span class="w"> </span><span class="p">(</span><span class="n">mode</span><span class="p">)</span><span class="w"> </span><span class="p">{</span> |
| <span class="linenos">815</span><span class="w"> </span><span class="k">case</span><span class="w"> </span><span class="no">SOURCE_SINK</span><span class="p">:</span> |
| <span class="linenos">816</span><span class="w"> </span><span class="n">PrintBlock</span><span class="p">(</span><span class="s">"Source Sink Example"</span><span class="p">);</span> |
| <span class="linenos">817</span><span class="w"> </span><span class="n">status</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">SourceSinkExample</span><span class="p">();</span> |
| <span class="linenos">818</span><span class="w"> </span><span class="k">break</span><span class="p">;</span> |
| <span class="linenos">819</span><span class="w"> </span><span class="k">case</span><span class="w"> </span><span class="no">TABLE_SOURCE_SINK</span><span class="p">:</span> |
| <span class="linenos">820</span><span class="w"> </span><span class="n">PrintBlock</span><span class="p">(</span><span class="s">"Table Source Sink Example"</span><span class="p">);</span> |
| <span class="linenos">821</span><span class="w"> </span><span class="n">status</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">TableSourceSinkExample</span><span class="p">();</span> |
| <span class="linenos">822</span><span class="w"> </span><span class="k">break</span><span class="p">;</span> |
| <span class="linenos">823</span><span class="w"> </span><span class="k">case</span><span class="w"> </span><span class="no">SCAN</span><span class="p">:</span> |
| <span class="linenos">824</span><span class="w"> </span><span class="n">PrintBlock</span><span class="p">(</span><span class="s">"Scan Example"</span><span class="p">);</span> |
| <span class="linenos">825</span><span class="w"> </span><span class="n">status</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">ScanSinkExample</span><span class="p">();</span> |
| <span class="linenos">826</span><span class="w"> </span><span class="k">break</span><span class="p">;</span> |
| <span class="linenos">827</span><span class="w"> </span><span class="k">case</span><span class="w"> </span><span class="no">FILTER</span><span class="p">:</span> |
| <span class="linenos">828</span><span class="w"> </span><span class="n">PrintBlock</span><span class="p">(</span><span class="s">"Filter Example"</span><span class="p">);</span> |
| <span class="linenos">829</span><span class="w"> </span><span class="n">status</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">ScanFilterSinkExample</span><span class="p">();</span> |
| <span class="linenos">830</span><span class="w"> </span><span class="k">break</span><span class="p">;</span> |
| <span class="linenos">831</span><span class="w"> </span><span class="k">case</span><span class="w"> </span><span class="no">PROJECT</span><span class="p">:</span> |
| <span class="linenos">832</span><span class="w"> </span><span class="n">PrintBlock</span><span class="p">(</span><span class="s">"Project Example"</span><span class="p">);</span> |
| <span class="linenos">833</span><span class="w"> </span><span class="n">status</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">ScanProjectSinkExample</span><span class="p">();</span> |
| <span class="linenos">834</span><span class="w"> </span><span class="k">break</span><span class="p">;</span> |
| <span class="linenos">835</span><span class="w"> </span><span class="k">case</span><span class="w"> </span><span class="no">GROUP_AGGREGATION</span><span class="p">:</span> |
| <span class="linenos">836</span><span class="w"> </span><span class="n">PrintBlock</span><span class="p">(</span><span class="s">"Aggregate Example"</span><span class="p">);</span> |
| <span class="linenos">837</span><span class="w"> </span><span class="n">status</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">SourceGroupAggregateSinkExample</span><span class="p">();</span> |
| <span class="linenos">838</span><span class="w"> </span><span class="k">break</span><span class="p">;</span> |
| <span class="linenos">839</span><span class="w"> </span><span class="k">case</span><span class="w"> </span><span class="no">SCALAR_AGGREGATION</span><span class="p">:</span> |
| <span class="linenos">840</span><span class="w"> </span><span class="n">PrintBlock</span><span class="p">(</span><span class="s">"Aggregate Example"</span><span class="p">);</span> |
| <span class="linenos">841</span><span class="w"> </span><span class="n">status</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">SourceScalarAggregateSinkExample</span><span class="p">();</span> |
| <span class="linenos">842</span><span class="w"> </span><span class="k">break</span><span class="p">;</span> |
| <span class="linenos">843</span><span class="w"> </span><span class="k">case</span><span class="w"> </span><span class="no">CONSUMING_SINK</span><span class="p">:</span> |
| <span class="linenos">844</span><span class="w"> </span><span class="n">PrintBlock</span><span class="p">(</span><span class="s">"Consuming-Sink Example"</span><span class="p">);</span> |
| <span class="linenos">845</span><span class="w"> </span><span class="n">status</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">SourceConsumingSinkExample</span><span class="p">();</span> |
| <span class="linenos">846</span><span class="w"> </span><span class="k">break</span><span class="p">;</span> |
| <span class="linenos">847</span><span class="w"> </span><span class="k">case</span><span class="w"> </span><span class="no">ORDER_BY_SINK</span><span class="p">:</span> |
| <span class="linenos">848</span><span class="w"> </span><span class="n">PrintBlock</span><span class="p">(</span><span class="s">"OrderBy Example"</span><span class="p">);</span> |
| <span class="linenos">849</span><span class="w"> </span><span class="n">status</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">SourceOrderBySinkExample</span><span class="p">();</span> |
| <span class="linenos">850</span><span class="w"> </span><span class="k">break</span><span class="p">;</span> |
| <span class="linenos">851</span><span class="w"> </span><span class="k">case</span><span class="w"> </span><span class="no">HASHJOIN</span><span class="p">:</span> |
| <span class="linenos">852</span><span class="w"> </span><span class="n">PrintBlock</span><span class="p">(</span><span class="s">"HashJoin Example"</span><span class="p">);</span> |
| <span class="linenos">853</span><span class="w"> </span><span class="n">status</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">SourceHashJoinSinkExample</span><span class="p">();</span> |
| <span class="linenos">854</span><span class="w"> </span><span class="k">break</span><span class="p">;</span> |
| <span class="linenos">855</span><span class="w"> </span><span class="k">case</span><span class="w"> </span><span class="no">KSELECT</span><span class="p">:</span> |
| <span class="linenos">856</span><span class="w"> </span><span class="n">PrintBlock</span><span class="p">(</span><span class="s">"KSelect Example"</span><span class="p">);</span> |
| <span class="linenos">857</span><span class="w"> </span><span class="n">status</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">SourceKSelectExample</span><span class="p">();</span> |
| <span class="linenos">858</span><span class="w"> </span><span class="k">break</span><span class="p">;</span> |
| <span class="linenos">859</span><span class="w"> </span><span class="k">case</span><span class="w"> </span><span class="no">WRITE</span><span class="p">:</span> |
| <span class="linenos">860</span><span class="w"> </span><span class="n">PrintBlock</span><span class="p">(</span><span class="s">"Write Example"</span><span class="p">);</span> |
| <span class="linenos">861</span><span class="w"> </span><span class="n">status</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">ScanFilterWriteExample</span><span class="p">(</span><span class="n">base_save_path</span><span class="p">);</span> |
| <span class="linenos">862</span><span class="w"> </span><span class="k">break</span><span class="p">;</span> |
| <span class="linenos">863</span><span class="w"> </span><span class="k">case</span><span class="w"> </span><span class="no">UNION</span><span class="p">:</span> |
| <span class="linenos">864</span><span class="w"> </span><span class="n">PrintBlock</span><span class="p">(</span><span class="s">"Union Example"</span><span class="p">);</span> |
| <span class="linenos">865</span><span class="w"> </span><span class="n">status</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">SourceUnionSinkExample</span><span class="p">();</span> |
| <span class="linenos">866</span><span class="w"> </span><span class="k">break</span><span class="p">;</span> |
| <span class="linenos">867</span><span class="w"> </span><span class="k">case</span><span class="w"> </span><span class="no">TABLE_SOURCE_TABLE_SINK</span><span class="p">:</span> |
| <span class="linenos">868</span><span class="w"> </span><span class="n">PrintBlock</span><span class="p">(</span><span class="s">"TableSink Example"</span><span class="p">);</span> |
| <span class="linenos">869</span><span class="w"> </span><span class="n">status</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">TableSinkExample</span><span class="p">();</span> |
| <span class="linenos">870</span><span class="w"> </span><span class="k">break</span><span class="p">;</span> |
| <span class="linenos">871</span><span class="w"> </span><span class="k">case</span><span class="w"> </span><span class="no">RECORD_BATCH_READER_SOURCE</span><span class="p">:</span> |
| <span class="linenos">872</span><span class="w"> </span><span class="n">PrintBlock</span><span class="p">(</span><span class="s">"RecordBatchReaderSource Example"</span><span class="p">);</span> |
| <span class="linenos">873</span><span class="w"> </span><span class="n">status</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">RecordBatchReaderSourceSinkExample</span><span class="p">();</span> |
| <span class="linenos">874</span><span class="w"> </span><span class="k">break</span><span class="p">;</span> |
| <span class="linenos">875</span><span class="w"> </span><span class="k">default</span><span class="o">:</span> |
| <span class="linenos">876</span><span class="w"> </span><span class="k">break</span><span class="p">;</span> |
| <span class="linenos">877</span><span class="w"> </span><span class="p">}</span> |
| <span class="linenos">878</span> |
| <span class="linenos">879</span><span class="w"> </span><span class="k">if</span><span class="w"> </span><span class="p">(</span><span class="n">status</span><span class="p">.</span><span class="n">ok</span><span class="p">())</span><span class="w"> </span><span class="p">{</span> |
| <span class="linenos">880</span><span class="w"> </span><span class="k">return</span><span class="w"> </span><span class="n">EXIT_SUCCESS</span><span class="p">;</span> |
| <span class="linenos">881</span><span class="w"> </span><span class="p">}</span><span class="w"> </span><span class="k">else</span><span class="w"> </span><span class="p">{</span> |
| <span class="linenos">882</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">cout</span><span class="w"> </span><span class="o"><<</span><span class="w"> </span><span class="s">"Error occurred: "</span><span class="w"> </span><span class="o"><<</span><span class="w"> </span><span class="n">status</span><span class="p">.</span><span class="n">message</span><span class="p">()</span><span class="w"> </span><span class="o"><<</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">endl</span><span class="p">;</span> |
| <span class="linenos">883</span><span class="w"> </span><span class="k">return</span><span class="w"> </span><span class="n">EXIT_FAILURE</span><span class="p">;</span> |
| <span class="linenos">884</span><span class="w"> </span><span class="p">}</span> |
| <span class="linenos">885</span><span class="p">}</span> |
| </pre></div> |
| </div> |
| </section> |
| </section> |
| |
| |
| </div> |
| |
| |
| <!-- Previous / next buttons --> |
| <div class='prev-next-area'> |
| <a class='left-prev' id="prev-link" href="gandiva.html" title="previous page"> |
| <i class="fas fa-angle-left"></i> |
| <div class="prev-next-info"> |
| <p class="prev-next-subtitle">previous</p> |
| <p class="prev-next-title">The Gandiva Expression Compiler</p> |
| </div> |
| </a> |
| <a class='right-next' id="next-link" href="io.html" title="next page"> |
| <div class="prev-next-info"> |
| <p class="prev-next-subtitle">next</p> |
| <p class="prev-next-title">Input / output and filesystems</p> |
| </div> |
| <i class="fas fa-angle-right"></i> |
| </a> |
| </div> |
| |
| </main> |
| |
| |
| </div> |
| </div> |
| |
| <script src="../_static/scripts/pydata-sphinx-theme.js?digest=1999514e3f237ded88cf"></script> |
| |
| <footer class="footer mt-5 mt-md-0"> |
| <div class="container"> |
| |
| <div class="footer-item"> |
| <p class="copyright"> |
| © Copyright 2016-2023 Apache Software Foundation.<br> |
| </p> |
| </div> |
| |
| <div class="footer-item"> |
| <p class="sphinx-version"> |
| Created using <a href="http://sphinx-doc.org/">Sphinx</a> 5.3.0.<br> |
| </p> |
| </div> |
| |
| </div> |
| </footer> |
| <script type="text/javascript" src="/docs/_static/versionwarning.js"></script> |
| |
| </body> |
| </html> |