blob: b54304e1ba14db16d9319c8a7970e971c2fd5380 [file] [log] [blame]
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>Tabular Datasets &#8212; Apache Arrow v7.0.0</title>
<link href="../_static/styles/theme.css?digest=1999514e3f237ded88cf" rel="stylesheet">
<link href="../_static/styles/pydata-sphinx-theme.css?digest=1999514e3f237ded88cf" rel="stylesheet">
<link rel="stylesheet"
href="../_static/vendor/fontawesome/5.13.0/css/all.min.css">
<link rel="preload" as="font" type="font/woff2" crossorigin
href="../_static/vendor/fontawesome/5.13.0/webfonts/fa-solid-900.woff2">
<link rel="preload" as="font" type="font/woff2" crossorigin
href="../_static/vendor/fontawesome/5.13.0/webfonts/fa-brands-400.woff2">
<link rel="stylesheet" type="text/css" href="../_static/pygments.css" />
<link rel="stylesheet" type="text/css" href="../_static/styles/pydata-sphinx-theme.css" />
<link rel="stylesheet" type="text/css" href="../_static/tabs.css" />
<link rel="stylesheet" type="text/css" href="../_static/theme_overrides.css" />
<link rel="preload" as="script" href="../_static/scripts/pydata-sphinx-theme.js?digest=1999514e3f237ded88cf">
<script data-url_root="../" id="documentation_options" src="../_static/documentation_options.js"></script>
<script src="../_static/jquery.js"></script>
<script src="../_static/underscore.js"></script>
<script src="../_static/doctools.js"></script>
<link rel="canonical" href="https://arrow.apache.org/docs/cpp/dataset.html" />
<link rel="shortcut icon" href="../_static/favicon.ico"/>
<link rel="index" title="Index" href="../genindex.html" />
<link rel="search" title="Search" href="../search.html" />
<link rel="next" title="Arrow Flight RPC" href="flight.html" />
<link rel="prev" title="Reading JSON files" href="json.html" />
<meta name="viewport" content="width=device-width, initial-scale=1" />
<meta name="docsearch:language" content="None">
<!-- Google Analytics -->
<!-- Matomo -->
<script>
var _paq = window._paq = window._paq || [];
/* tracker methods like "setCustomDimension" should be called before "trackPageView" */
/* We explicitly disable cookie tracking to avoid privacy issues */
_paq.push(['disableCookies']);
_paq.push(['trackPageView']);
_paq.push(['enableLinkTracking']);
(function() {
var u="https://analytics.apache.org/";
_paq.push(['setTrackerUrl', u+'matomo.php']);
_paq.push(['setSiteId', '20']);
var d=document, g=d.createElement('script'), s=d.getElementsByTagName('script')[0];
g.async=true; g.src=u+'matomo.js'; s.parentNode.insertBefore(g,s);
})();
</script>
<!-- End Matomo Code -->
</head>
<body data-spy="scroll" data-target="#bd-toc-nav" data-offset="80">
<div class="container-fluid" id="banner"></div>
<div class="container-xl">
<div class="row">
<!-- Only show if we have sidebars configured, else just a small margin -->
<div class="col-12 col-md-3 bd-sidebar">
<div class="sidebar-start-items">
<a class="navbar-brand" href="../index.html">
<img src="../_static/arrow.png" class="logo" alt="logo">
</a>
<div id="version-search-wrapper">
<div id="version-button" class="dropdown">
<button type="button" class="btn btn-secondary btn-sm navbar-btn dropdown-toggle" id="version_switcher_button" data-toggle="dropdown">
7.0.0
<span class="caret"></span>
</button>
<div id="version_switcher" class="dropdown-menu list-group-flush py-0" aria-labelledby="version_switcher_button">
<!-- dropdown will be populated by javascript on page load -->
</div>
</div>
<script type="text/javascript">
// Function to construct the target URL from the JSON components
function buildURL(entry) {
var template = "https://arrow.apache.org/docs/{version}"; // supplied by jinja
template = template.replace("{version}", entry.version);
return template;
}
// Function to check if corresponding page path exists in other version of docs
// and, if so, go there instead of the homepage of the other docs version
function checkPageExistsAndRedirect(event) {
const currentFilePath = "cpp/dataset.html",
otherDocsHomepage = event.target.getAttribute("href");
let tryUrl = `${otherDocsHomepage}${currentFilePath}`;
$.ajax({
type: 'HEAD',
url: tryUrl,
// if the page exists, go there
success: function() {
location.href = tryUrl;
}
}).fail(function() {
location.href = otherDocsHomepage;
});
return false;
}
// Function to populate the version switcher
(function () {
// get JSON config
$.getJSON("/docs/_static/versions.json", function(data, textStatus, jqXHR) {
// create the nodes first (before AJAX calls) to ensure the order is
// correct (for now, links will go to doc version homepage)
$.each(data, function(index, entry) {
// if no custom name specified (e.g., "latest"), use version string
if (!("name" in entry)) {
entry.name = entry.version;
}
// construct the appropriate URL, and add it to the dropdown
entry.url = buildURL(entry);
const node = document.createElement("a");
node.setAttribute("class", "list-group-item list-group-item-action py-1");
node.setAttribute("href", `${entry.url}`);
node.textContent = `${entry.name}`;
node.onclick = checkPageExistsAndRedirect;
$("#version_switcher").append(node);
});
});
})();
</script>
<form id="search-box" class="bd-search d-flex align-items-center" action="../search.html" method="get">
<i class="icon fas fa-search"></i>
<input type="search" class="form-control" name="q" id="search-input" placeholder="Search the docs ..." aria-label="Search the docs ..." autocomplete="off" >
</form>
</div>
<nav class="bd-links" id="bd-docs-nav" aria-label="Main navigation">
<div class="bd-toc-item active">
<p aria-level="2" class="caption" role="heading">
<span class="caption-text">
Supported Environments
</span>
</p>
<ul class="current nav bd-sidenav">
<li class="toctree-l1">
<a class="reference internal" href="../c_glib/index.html">
C/GLib
</a>
</li>
<li class="toctree-l1 current active has-children">
<a class="reference internal" href="index.html">
C++
</a>
<input checked="" class="toctree-checkbox" id="toctree-checkbox-1" name="toctree-checkbox-1" type="checkbox"/>
<label for="toctree-checkbox-1">
<i class="fas fa-chevron-down">
</i>
</label>
<ul class="current">
<li class="toctree-l2 current active has-children">
<a class="reference internal" href="getting_started.html">
User Guide
</a>
<input checked="" class="toctree-checkbox" id="toctree-checkbox-2" name="toctree-checkbox-2" type="checkbox"/>
<label for="toctree-checkbox-2">
<i class="fas fa-chevron-down">
</i>
</label>
<ul class="current">
<li class="toctree-l3">
<a class="reference internal" href="overview.html">
High-Level Overview
</a>
</li>
<li class="toctree-l3">
<a class="reference internal" href="conventions.html">
Conventions
</a>
</li>
<li class="toctree-l3">
<a class="reference internal" href="build_system.html">
Using Arrow C++ in your own project
</a>
</li>
<li class="toctree-l3">
<a class="reference internal" href="memory.html">
Memory Management
</a>
</li>
<li class="toctree-l3">
<a class="reference internal" href="arrays.html">
Arrays
</a>
</li>
<li class="toctree-l3">
<a class="reference internal" href="datatypes.html">
Data Types
</a>
</li>
<li class="toctree-l3">
<a class="reference internal" href="tables.html">
Tabular Data
</a>
</li>
<li class="toctree-l3">
<a class="reference internal" href="compute.html">
Compute Functions
</a>
</li>
<li class="toctree-l3">
<a class="reference internal" href="streaming_execution.html">
Streaming execution engine
</a>
</li>
<li class="toctree-l3">
<a class="reference internal" href="io.html">
Input / output and filesystems
</a>
</li>
<li class="toctree-l3">
<a class="reference internal" href="ipc.html">
Reading and writing the Arrow IPC format
</a>
</li>
<li class="toctree-l3">
<a class="reference internal" href="parquet.html">
Reading and writing Parquet files
</a>
</li>
<li class="toctree-l3">
<a class="reference internal" href="csv.html">
Reading and Writing CSV files
</a>
</li>
<li class="toctree-l3">
<a class="reference internal" href="json.html">
Reading JSON files
</a>
</li>
<li class="toctree-l3 current active">
<a class="current reference internal" href="#">
Tabular Datasets
</a>
</li>
<li class="toctree-l3">
<a class="reference internal" href="flight.html">
Arrow Flight RPC
</a>
</li>
<li class="toctree-l3">
<a class="reference internal" href="gdb.html">
Debugging code using Arrow
</a>
</li>
</ul>
</li>
<li class="toctree-l2 has-children">
<a class="reference internal" href="examples/index.html">
Examples
</a>
<input class="toctree-checkbox" id="toctree-checkbox-3" name="toctree-checkbox-3" type="checkbox"/>
<label for="toctree-checkbox-3">
<i class="fas fa-chevron-down">
</i>
</label>
<ul>
<li class="toctree-l3">
<a class="reference internal" href="examples/cmake_minimal_build.html">
Minimal build using CMake
</a>
</li>
<li class="toctree-l3">
<a class="reference internal" href="examples/compute_and_write_example.html">
Compute and Write CSV Example
</a>
</li>
<li class="toctree-l3">
<a class="reference internal" href="examples/dataset_documentation_example.html">
Arrow Datasets example
</a>
</li>
<li class="toctree-l3">
<a class="reference internal" href="examples/row_columnar_conversion.html">
Row to columnar conversion
</a>
</li>
<li class="toctree-l3">
<a class="reference internal" href="examples/tuple_range_conversion.html">
std::tuple-like ranges to Arrow
</a>
</li>
</ul>
</li>
<li class="toctree-l2 has-children">
<a class="reference internal" href="api.html">
API Reference
</a>
<input class="toctree-checkbox" id="toctree-checkbox-4" name="toctree-checkbox-4" type="checkbox"/>
<label for="toctree-checkbox-4">
<i class="fas fa-chevron-down">
</i>
</label>
<ul>
<li class="toctree-l3">
<a class="reference internal" href="api/support.html">
Programming Support
</a>
</li>
<li class="toctree-l3">
<a class="reference internal" href="api/memory.html">
Memory (management)
</a>
</li>
<li class="toctree-l3">
<a class="reference internal" href="api/datatype.html">
Data Types
</a>
</li>
<li class="toctree-l3">
<a class="reference internal" href="api/array.html">
Arrays
</a>
</li>
<li class="toctree-l3">
<a class="reference internal" href="api/scalar.html">
Scalars
</a>
</li>
<li class="toctree-l3">
<a class="reference internal" href="api/builder.html">
Array Builders
</a>
</li>
<li class="toctree-l3">
<a class="reference internal" href="api/table.html">
Two-dimensional Datasets
</a>
</li>
<li class="toctree-l3">
<a class="reference internal" href="api/c_abi.html">
C Interfaces
</a>
</li>
<li class="toctree-l3">
<a class="reference internal" href="api/compute.html">
Compute Functions
</a>
</li>
<li class="toctree-l3">
<a class="reference internal" href="api/tensor.html">
Tensors
</a>
</li>
<li class="toctree-l3">
<a class="reference internal" href="api/utilities.html">
Utilities
</a>
</li>
<li class="toctree-l3">
<a class="reference internal" href="api/async.html">
Asynchronous programming
</a>
</li>
<li class="toctree-l3">
<a class="reference internal" href="api/io.html">
Input / output
</a>
</li>
<li class="toctree-l3">
<a class="reference internal" href="api/ipc.html">
Arrow IPC
</a>
</li>
<li class="toctree-l3">
<a class="reference internal" href="api/formats.html">
File Formats
</a>
</li>
<li class="toctree-l3">
<a class="reference internal" href="api/cuda.html">
CUDA support
</a>
</li>
<li class="toctree-l3">
<a class="reference internal" href="api/flight.html">
Arrow Flight RPC
</a>
</li>
<li class="toctree-l3">
<a class="reference internal" href="api/filesystem.html">
Filesystems
</a>
</li>
<li class="toctree-l3">
<a class="reference internal" href="api/dataset.html">
Dataset
</a>
</li>
</ul>
</li>
</ul>
</li>
<li class="toctree-l1">
<a class="reference external" href="https://github.com/apache/arrow/blob/master/csharp/README.md">
C#
</a>
</li>
<li class="toctree-l1">
<a class="reference external" href="https://pkg.go.dev/github.com/apache/arrow/go">
Go
</a>
</li>
<li class="toctree-l1 has-children">
<a class="reference internal" href="../java/index.html">
Java
</a>
<input class="toctree-checkbox" id="toctree-checkbox-5" name="toctree-checkbox-5" type="checkbox"/>
<label for="toctree-checkbox-5">
<i class="fas fa-chevron-down">
</i>
</label>
<ul>
<li class="toctree-l2">
<a class="reference internal" href="../java/vector.html">
ValueVector
</a>
</li>
<li class="toctree-l2">
<a class="reference internal" href="../java/vector_schema_root.html">
VectorSchemaRoot
</a>
</li>
<li class="toctree-l2">
<a class="reference internal" href="../java/ipc.html">
Reading/Writing IPC formats
</a>
</li>
<li class="toctree-l2">
<a class="reference internal" href="../java/algorithm.html">
Java Algorithms
</a>
</li>
<li class="toctree-l2">
<a class="reference internal" href="../java/dataset.html">
Dataset
</a>
</li>
<li class="toctree-l2">
<a class="reference internal" href="../java/reference/index.html">
Reference (javadoc)
</a>
</li>
</ul>
</li>
<li class="toctree-l1">
<a class="reference internal" href="../js/index.html">
JavaScript
</a>
</li>
<li class="toctree-l1">
<a class="reference external" href="https://github.com/apache/arrow/blob/master/julia/Arrow/README.md">
Julia
</a>
</li>
<li class="toctree-l1">
<a class="reference external" href="https://github.com/apache/arrow/blob/master/matlab/README.md">
MATLAB
</a>
</li>
<li class="toctree-l1 has-children">
<a class="reference internal" href="../python/index.html">
Python
</a>
<input class="toctree-checkbox" id="toctree-checkbox-6" name="toctree-checkbox-6" type="checkbox"/>
<label for="toctree-checkbox-6">
<i class="fas fa-chevron-down">
</i>
</label>
<ul>
<li class="toctree-l2">
<a class="reference internal" href="../python/install.html">
Installing PyArrow
</a>
</li>
<li class="toctree-l2">
<a class="reference internal" href="../python/getstarted.html">
Getting Started
</a>
</li>
<li class="toctree-l2">
<a class="reference internal" href="../python/data.html">
Data Types and In-Memory Data Model
</a>
</li>
<li class="toctree-l2">
<a class="reference internal" href="../python/compute.html">
Compute Functions
</a>
</li>
<li class="toctree-l2">
<a class="reference internal" href="../python/memory.html">
Memory and IO Interfaces
</a>
</li>
<li class="toctree-l2">
<a class="reference internal" href="../python/ipc.html">
Streaming, Serialization, and IPC
</a>
</li>
<li class="toctree-l2">
<a class="reference internal" href="../python/filesystems.html">
Filesystem Interface
</a>
</li>
<li class="toctree-l2 has-children">
<a class="reference internal" href="../python/filesystems_deprecated.html">
Filesystem Interface (legacy)
</a>
<input class="toctree-checkbox" id="toctree-checkbox-7" name="toctree-checkbox-7" type="checkbox"/>
<label for="toctree-checkbox-7">
<i class="fas fa-chevron-down">
</i>
</label>
<ul>
<li class="toctree-l3">
<a class="reference internal" href="../python/generated/pyarrow.hdfs.connect.html">
pyarrow.hdfs.connect
</a>
</li>
<li class="toctree-l3">
<a class="reference internal" href="../python/generated/pyarrow.HadoopFileSystem.cat.html">
pyarrow.HadoopFileSystem.cat
</a>
</li>
<li class="toctree-l3">
<a class="reference internal" href="../python/generated/pyarrow.HadoopFileSystem.chmod.html">
pyarrow.HadoopFileSystem.chmod
</a>
</li>
<li class="toctree-l3">
<a class="reference internal" href="../python/generated/pyarrow.HadoopFileSystem.chown.html">
pyarrow.HadoopFileSystem.chown
</a>
</li>
<li class="toctree-l3">
<a class="reference internal" href="../python/generated/pyarrow.HadoopFileSystem.delete.html">
pyarrow.HadoopFileSystem.delete
</a>
</li>
<li class="toctree-l3">
<a class="reference internal" href="../python/generated/pyarrow.HadoopFileSystem.df.html">
pyarrow.HadoopFileSystem.df
</a>
</li>
<li class="toctree-l3">
<a class="reference internal" href="../python/generated/pyarrow.HadoopFileSystem.disk_usage.html">
pyarrow.HadoopFileSystem.disk_usage
</a>
</li>
<li class="toctree-l3">
<a class="reference internal" href="../python/generated/pyarrow.HadoopFileSystem.download.html">
pyarrow.HadoopFileSystem.download
</a>
</li>
<li class="toctree-l3">
<a class="reference internal" href="../python/generated/pyarrow.HadoopFileSystem.exists.html">
pyarrow.HadoopFileSystem.exists
</a>
</li>
<li class="toctree-l3">
<a class="reference internal" href="../python/generated/pyarrow.HadoopFileSystem.get_capacity.html">
pyarrow.HadoopFileSystem.get_capacity
</a>
</li>
<li class="toctree-l3">
<a class="reference internal" href="../python/generated/pyarrow.HadoopFileSystem.get_space_used.html">
pyarrow.HadoopFileSystem.get_space_used
</a>
</li>
<li class="toctree-l3">
<a class="reference internal" href="../python/generated/pyarrow.HadoopFileSystem.info.html">
pyarrow.HadoopFileSystem.info
</a>
</li>
<li class="toctree-l3">
<a class="reference internal" href="../python/generated/pyarrow.HadoopFileSystem.ls.html">
pyarrow.HadoopFileSystem.ls
</a>
</li>
<li class="toctree-l3">
<a class="reference internal" href="../python/generated/pyarrow.HadoopFileSystem.mkdir.html">
pyarrow.HadoopFileSystem.mkdir
</a>
</li>
<li class="toctree-l3">
<a class="reference internal" href="../python/generated/pyarrow.HadoopFileSystem.open.html">
pyarrow.HadoopFileSystem.open
</a>
</li>
<li class="toctree-l3">
<a class="reference internal" href="../python/generated/pyarrow.HadoopFileSystem.rename.html">
pyarrow.HadoopFileSystem.rename
</a>
</li>
<li class="toctree-l3">
<a class="reference internal" href="../python/generated/pyarrow.HadoopFileSystem.rm.html">
pyarrow.HadoopFileSystem.rm
</a>
</li>
<li class="toctree-l3">
<a class="reference internal" href="../python/generated/pyarrow.HadoopFileSystem.upload.html">
pyarrow.HadoopFileSystem.upload
</a>
</li>
<li class="toctree-l3">
<a class="reference internal" href="../python/generated/pyarrow.HdfsFile.html">
pyarrow.HdfsFile
</a>
</li>
</ul>
</li>
<li class="toctree-l2">
<a class="reference internal" href="../python/plasma.html">
The Plasma In-Memory Object Store
</a>
</li>
<li class="toctree-l2">
<a class="reference internal" href="../python/numpy.html">
NumPy Integration
</a>
</li>
<li class="toctree-l2">
<a class="reference internal" href="../python/pandas.html">
Pandas Integration
</a>
</li>
<li class="toctree-l2">
<a class="reference internal" href="../python/timestamps.html">
Timestamps
</a>
</li>
<li class="toctree-l2">
<a class="reference internal" href="../python/csv.html">
Reading and Writing CSV files
</a>
</li>
<li class="toctree-l2">
<a class="reference internal" href="../python/feather.html">
Feather File Format
</a>
</li>
<li class="toctree-l2">
<a class="reference internal" href="../python/json.html">
Reading JSON files
</a>
</li>
<li class="toctree-l2">
<a class="reference internal" href="../python/parquet.html">
Reading and Writing the Apache Parquet Format
</a>
</li>
<li class="toctree-l2">
<a class="reference internal" href="../python/dataset.html">
Tabular Datasets
</a>
</li>
<li class="toctree-l2">
<a class="reference internal" href="../python/extending_types.html">
Extending pyarrow
</a>
</li>
<li class="toctree-l2 has-children">
<a class="reference internal" href="../python/integration.html">
PyArrow Integrations
</a>
<input class="toctree-checkbox" id="toctree-checkbox-8" name="toctree-checkbox-8" type="checkbox"/>
<label for="toctree-checkbox-8">
<i class="fas fa-chevron-down">
</i>
</label>
<ul>
<li class="toctree-l3">
<a class="reference internal" href="../python/integration/python_r.html">
Integrating PyArrow with R
</a>
</li>
<li class="toctree-l3">
<a class="reference internal" href="../python/integration/extending.html">
Using pyarrow from C++ and Cython Code
</a>
</li>
<li class="toctree-l3">
<a class="reference internal" href="../python/integration/cuda.html">
CUDA Integration
</a>
</li>
</ul>
</li>
<li class="toctree-l2 has-children">
<a class="reference internal" href="../python/api.html">
API Reference
</a>
<input class="toctree-checkbox" id="toctree-checkbox-9" name="toctree-checkbox-9" type="checkbox"/>
<label for="toctree-checkbox-9">
<i class="fas fa-chevron-down">
</i>
</label>
<ul>
<li class="toctree-l3 has-children">
<a class="reference internal" href="../python/api/datatypes.html">
Data Types and Schemas
</a>
<input class="toctree-checkbox" id="toctree-checkbox-10" name="toctree-checkbox-10" type="checkbox"/>
<label for="toctree-checkbox-10">
<i class="fas fa-chevron-down">
</i>
</label>
<ul>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.null.html">
pyarrow.null
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.bool_.html">
pyarrow.bool_
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.int8.html">
pyarrow.int8
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.int16.html">
pyarrow.int16
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.int32.html">
pyarrow.int32
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.int64.html">
pyarrow.int64
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.uint8.html">
pyarrow.uint8
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.uint16.html">
pyarrow.uint16
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.uint32.html">
pyarrow.uint32
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.uint64.html">
pyarrow.uint64
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.float16.html">
pyarrow.float16
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.float32.html">
pyarrow.float32
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.float64.html">
pyarrow.float64
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.time32.html">
pyarrow.time32
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.time64.html">
pyarrow.time64
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.timestamp.html">
pyarrow.timestamp
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.date32.html">
pyarrow.date32
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.date64.html">
pyarrow.date64
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.duration.html">
pyarrow.duration
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.month_day_nano_interval.html">
pyarrow.month_day_nano_interval
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.binary.html">
pyarrow.binary
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.string.html">
pyarrow.string
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.utf8.html">
pyarrow.utf8
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.large_binary.html">
pyarrow.large_binary
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.large_string.html">
pyarrow.large_string
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.large_utf8.html">
pyarrow.large_utf8
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.decimal128.html">
pyarrow.decimal128
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.list_.html">
pyarrow.list_
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.large_list.html">
pyarrow.large_list
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.map_.html">
pyarrow.map_
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.struct.html">
pyarrow.struct
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.dictionary.html">
pyarrow.dictionary
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.field.html">
pyarrow.field
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.schema.html">
pyarrow.schema
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.from_numpy_dtype.html">
pyarrow.from_numpy_dtype
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.unify_schemas.html">
pyarrow.unify_schemas
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.DataType.html">
pyarrow.DataType
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.DictionaryType.html">
pyarrow.DictionaryType
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.ListType.html">
pyarrow.ListType
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.MapType.html">
pyarrow.MapType
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.StructType.html">
pyarrow.StructType
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.UnionType.html">
pyarrow.UnionType
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.TimestampType.html">
pyarrow.TimestampType
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.Time32Type.html">
pyarrow.Time32Type
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.Time64Type.html">
pyarrow.Time64Type
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.FixedSizeBinaryType.html">
pyarrow.FixedSizeBinaryType
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.Decimal128Type.html">
pyarrow.Decimal128Type
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.Field.html">
pyarrow.Field
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.Schema.html">
pyarrow.Schema
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.ExtensionType.html">
pyarrow.ExtensionType
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.PyExtensionType.html">
pyarrow.PyExtensionType
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.register_extension_type.html">
pyarrow.register_extension_type
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.unregister_extension_type.html">
pyarrow.unregister_extension_type
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.types.is_boolean.html">
pyarrow.types.is_boolean
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.types.is_integer.html">
pyarrow.types.is_integer
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.types.is_signed_integer.html">
pyarrow.types.is_signed_integer
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.types.is_unsigned_integer.html">
pyarrow.types.is_unsigned_integer
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.types.is_int8.html">
pyarrow.types.is_int8
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.types.is_int16.html">
pyarrow.types.is_int16
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.types.is_int32.html">
pyarrow.types.is_int32
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.types.is_int64.html">
pyarrow.types.is_int64
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.types.is_uint8.html">
pyarrow.types.is_uint8
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.types.is_uint16.html">
pyarrow.types.is_uint16
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.types.is_uint32.html">
pyarrow.types.is_uint32
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.types.is_uint64.html">
pyarrow.types.is_uint64
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.types.is_floating.html">
pyarrow.types.is_floating
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.types.is_float16.html">
pyarrow.types.is_float16
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.types.is_float32.html">
pyarrow.types.is_float32
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.types.is_float64.html">
pyarrow.types.is_float64
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.types.is_decimal.html">
pyarrow.types.is_decimal
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.types.is_list.html">
pyarrow.types.is_list
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.types.is_large_list.html">
pyarrow.types.is_large_list
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.types.is_struct.html">
pyarrow.types.is_struct
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.types.is_union.html">
pyarrow.types.is_union
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.types.is_nested.html">
pyarrow.types.is_nested
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.types.is_temporal.html">
pyarrow.types.is_temporal
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.types.is_timestamp.html">
pyarrow.types.is_timestamp
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.types.is_date.html">
pyarrow.types.is_date
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.types.is_date32.html">
pyarrow.types.is_date32
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.types.is_date64.html">
pyarrow.types.is_date64
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.types.is_time.html">
pyarrow.types.is_time
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.types.is_time32.html">
pyarrow.types.is_time32
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.types.is_time64.html">
pyarrow.types.is_time64
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.types.is_null.html">
pyarrow.types.is_null
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.types.is_binary.html">
pyarrow.types.is_binary
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.types.is_unicode.html">
pyarrow.types.is_unicode
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.types.is_string.html">
pyarrow.types.is_string
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.types.is_large_binary.html">
pyarrow.types.is_large_binary
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.types.is_large_unicode.html">
pyarrow.types.is_large_unicode
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.types.is_large_string.html">
pyarrow.types.is_large_string
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.types.is_fixed_size_binary.html">
pyarrow.types.is_fixed_size_binary
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.types.is_map.html">
pyarrow.types.is_map
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.types.is_dictionary.html">
pyarrow.types.is_dictionary
</a>
</li>
</ul>
</li>
<li class="toctree-l3 has-children">
<a class="reference internal" href="../python/api/arrays.html">
Arrays and Scalars
</a>
<input class="toctree-checkbox" id="toctree-checkbox-11" name="toctree-checkbox-11" type="checkbox"/>
<label for="toctree-checkbox-11">
<i class="fas fa-chevron-down">
</i>
</label>
<ul>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.array.html">
pyarrow.array
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.nulls.html">
pyarrow.nulls
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.Array.html">
pyarrow.Array
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.BooleanArray.html">
pyarrow.BooleanArray
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.FloatingPointArray.html">
pyarrow.FloatingPointArray
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.IntegerArray.html">
pyarrow.IntegerArray
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.Int8Array.html">
pyarrow.Int8Array
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.Int16Array.html">
pyarrow.Int16Array
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.Int32Array.html">
pyarrow.Int32Array
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.Int64Array.html">
pyarrow.Int64Array
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.NullArray.html">
pyarrow.NullArray
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.NumericArray.html">
pyarrow.NumericArray
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.UInt8Array.html">
pyarrow.UInt8Array
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.UInt16Array.html">
pyarrow.UInt16Array
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.UInt32Array.html">
pyarrow.UInt32Array
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.UInt64Array.html">
pyarrow.UInt64Array
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.BinaryArray.html">
pyarrow.BinaryArray
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.StringArray.html">
pyarrow.StringArray
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.FixedSizeBinaryArray.html">
pyarrow.FixedSizeBinaryArray
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.LargeBinaryArray.html">
pyarrow.LargeBinaryArray
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.LargeStringArray.html">
pyarrow.LargeStringArray
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.Time32Array.html">
pyarrow.Time32Array
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.Time64Array.html">
pyarrow.Time64Array
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.Date32Array.html">
pyarrow.Date32Array
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.Date64Array.html">
pyarrow.Date64Array
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.TimestampArray.html">
pyarrow.TimestampArray
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.DurationArray.html">
pyarrow.DurationArray
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.MonthDayNanoIntervalArray.html">
pyarrow.MonthDayNanoIntervalArray
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.Decimal128Array.html">
pyarrow.Decimal128Array
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.DictionaryArray.html">
pyarrow.DictionaryArray
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.ListArray.html">
pyarrow.ListArray
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.FixedSizeListArray.html">
pyarrow.FixedSizeListArray
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.LargeListArray.html">
pyarrow.LargeListArray
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.MapArray.html">
pyarrow.MapArray
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.StructArray.html">
pyarrow.StructArray
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.UnionArray.html">
pyarrow.UnionArray
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.ExtensionArray.html">
pyarrow.ExtensionArray
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.scalar.html">
pyarrow.scalar
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.NA.html">
pyarrow.NA
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.Scalar.html">
pyarrow.Scalar
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.BooleanScalar.html">
pyarrow.BooleanScalar
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.Int8Scalar.html">
pyarrow.Int8Scalar
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.Int16Scalar.html">
pyarrow.Int16Scalar
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.Int32Scalar.html">
pyarrow.Int32Scalar
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.Int64Scalar.html">
pyarrow.Int64Scalar
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.UInt8Scalar.html">
pyarrow.UInt8Scalar
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.UInt16Scalar.html">
pyarrow.UInt16Scalar
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.UInt32Scalar.html">
pyarrow.UInt32Scalar
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.UInt64Scalar.html">
pyarrow.UInt64Scalar
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.FloatScalar.html">
pyarrow.FloatScalar
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.DoubleScalar.html">
pyarrow.DoubleScalar
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.BinaryScalar.html">
pyarrow.BinaryScalar
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.StringScalar.html">
pyarrow.StringScalar
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.FixedSizeBinaryScalar.html">
pyarrow.FixedSizeBinaryScalar
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.LargeBinaryScalar.html">
pyarrow.LargeBinaryScalar
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.LargeStringScalar.html">
pyarrow.LargeStringScalar
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.Time32Scalar.html">
pyarrow.Time32Scalar
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.Time64Scalar.html">
pyarrow.Time64Scalar
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.Date32Scalar.html">
pyarrow.Date32Scalar
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.Date64Scalar.html">
pyarrow.Date64Scalar
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.TimestampScalar.html">
pyarrow.TimestampScalar
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.DurationScalar.html">
pyarrow.DurationScalar
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.MonthDayNanoIntervalScalar.html">
pyarrow.MonthDayNanoIntervalScalar
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.Decimal128Scalar.html">
pyarrow.Decimal128Scalar
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.DictionaryScalar.html">
pyarrow.DictionaryScalar
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.ListScalar.html">
pyarrow.ListScalar
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.LargeListScalar.html">
pyarrow.LargeListScalar
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.MapScalar.html">
pyarrow.MapScalar
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.StructScalar.html">
pyarrow.StructScalar
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.UnionScalar.html">
pyarrow.UnionScalar
</a>
</li>
</ul>
</li>
<li class="toctree-l3 has-children">
<a class="reference internal" href="../python/api/memory.html">
Buffers and Memory
</a>
<input class="toctree-checkbox" id="toctree-checkbox-12" name="toctree-checkbox-12" type="checkbox"/>
<label for="toctree-checkbox-12">
<i class="fas fa-chevron-down">
</i>
</label>
<ul>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.allocate_buffer.html">
pyarrow.allocate_buffer
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.py_buffer.html">
pyarrow.py_buffer
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.foreign_buffer.html">
pyarrow.foreign_buffer
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.Buffer.html">
pyarrow.Buffer
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.ResizableBuffer.html">
pyarrow.ResizableBuffer
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.Codec.html">
pyarrow.Codec
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compress.html">
pyarrow.compress
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.decompress.html">
pyarrow.decompress
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.MemoryPool.html">
pyarrow.MemoryPool
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.default_memory_pool.html">
pyarrow.default_memory_pool
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.jemalloc_memory_pool.html">
pyarrow.jemalloc_memory_pool
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.mimalloc_memory_pool.html">
pyarrow.mimalloc_memory_pool
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.system_memory_pool.html">
pyarrow.system_memory_pool
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.jemalloc_set_decay_ms.html">
pyarrow.jemalloc_set_decay_ms
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.set_memory_pool.html">
pyarrow.set_memory_pool
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.log_memory_allocations.html">
pyarrow.log_memory_allocations
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.total_allocated_bytes.html">
pyarrow.total_allocated_bytes
</a>
</li>
</ul>
</li>
<li class="toctree-l3 has-children">
<a class="reference internal" href="../python/api/compute.html">
Compute Functions
</a>
<input class="toctree-checkbox" id="toctree-checkbox-13" name="toctree-checkbox-13" type="checkbox"/>
<label for="toctree-checkbox-13">
<i class="fas fa-chevron-down">
</i>
</label>
<ul>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.all.html">
pyarrow.compute.all
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.any.html">
pyarrow.compute.any
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.approximate_median.html">
pyarrow.compute.approximate_median
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.count.html">
pyarrow.compute.count
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.count_distinct.html">
pyarrow.compute.count_distinct
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.index.html">
pyarrow.compute.index
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.max.html">
pyarrow.compute.max
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.mean.html">
pyarrow.compute.mean
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.min.html">
pyarrow.compute.min
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.min_max.html">
pyarrow.compute.min_max
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.mode.html">
pyarrow.compute.mode
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.product.html">
pyarrow.compute.product
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.quantile.html">
pyarrow.compute.quantile
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.stddev.html">
pyarrow.compute.stddev
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.sum.html">
pyarrow.compute.sum
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.tdigest.html">
pyarrow.compute.tdigest
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.variance.html">
pyarrow.compute.variance
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.abs.html">
pyarrow.compute.abs
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.abs_checked.html">
pyarrow.compute.abs_checked
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.add.html">
pyarrow.compute.add
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.add_checked.html">
pyarrow.compute.add_checked
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.divide.html">
pyarrow.compute.divide
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.divide_checked.html">
pyarrow.compute.divide_checked
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.multiply.html">
pyarrow.compute.multiply
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.multiply_checked.html">
pyarrow.compute.multiply_checked
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.negate.html">
pyarrow.compute.negate
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.negate_checked.html">
pyarrow.compute.negate_checked
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.power.html">
pyarrow.compute.power
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.power_checked.html">
pyarrow.compute.power_checked
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.sign.html">
pyarrow.compute.sign
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.subtract.html">
pyarrow.compute.subtract
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.subtract_checked.html">
pyarrow.compute.subtract_checked
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.bit_wise_and.html">
pyarrow.compute.bit_wise_and
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.bit_wise_not.html">
pyarrow.compute.bit_wise_not
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.bit_wise_or.html">
pyarrow.compute.bit_wise_or
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.bit_wise_xor.html">
pyarrow.compute.bit_wise_xor
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.shift_left.html">
pyarrow.compute.shift_left
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.shift_left_checked.html">
pyarrow.compute.shift_left_checked
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.shift_right.html">
pyarrow.compute.shift_right
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.shift_right_checked.html">
pyarrow.compute.shift_right_checked
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.ceil.html">
pyarrow.compute.ceil
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.floor.html">
pyarrow.compute.floor
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.round.html">
pyarrow.compute.round
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.round_to_multiple.html">
pyarrow.compute.round_to_multiple
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.trunc.html">
pyarrow.compute.trunc
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.ln.html">
pyarrow.compute.ln
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.ln_checked.html">
pyarrow.compute.ln_checked
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.log10.html">
pyarrow.compute.log10
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.log10_checked.html">
pyarrow.compute.log10_checked
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.log1p.html">
pyarrow.compute.log1p
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.log1p_checked.html">
pyarrow.compute.log1p_checked
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.log2.html">
pyarrow.compute.log2
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.log2_checked.html">
pyarrow.compute.log2_checked
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.logb.html">
pyarrow.compute.logb
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.logb_checked.html">
pyarrow.compute.logb_checked
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.acos.html">
pyarrow.compute.acos
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.acos_checked.html">
pyarrow.compute.acos_checked
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.asin.html">
pyarrow.compute.asin
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.asin_checked.html">
pyarrow.compute.asin_checked
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.atan.html">
pyarrow.compute.atan
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.atan2.html">
pyarrow.compute.atan2
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.cos.html">
pyarrow.compute.cos
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.cos_checked.html">
pyarrow.compute.cos_checked
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.sin.html">
pyarrow.compute.sin
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.sin_checked.html">
pyarrow.compute.sin_checked
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.tan.html">
pyarrow.compute.tan
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.tan_checked.html">
pyarrow.compute.tan_checked
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.equal.html">
pyarrow.compute.equal
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.greater.html">
pyarrow.compute.greater
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.greater_equal.html">
pyarrow.compute.greater_equal
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.less.html">
pyarrow.compute.less
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.less_equal.html">
pyarrow.compute.less_equal
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.not_equal.html">
pyarrow.compute.not_equal
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.max_element_wise.html">
pyarrow.compute.max_element_wise
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.min_element_wise.html">
pyarrow.compute.min_element_wise
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.and_.html">
pyarrow.compute.and_
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.and_kleene.html">
pyarrow.compute.and_kleene
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.and_not.html">
pyarrow.compute.and_not
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.and_not_kleene.html">
pyarrow.compute.and_not_kleene
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.invert.html">
pyarrow.compute.invert
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.or_.html">
pyarrow.compute.or_
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.or_kleene.html">
pyarrow.compute.or_kleene
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.xor.html">
pyarrow.compute.xor
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.ascii_is_alnum.html">
pyarrow.compute.ascii_is_alnum
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.ascii_is_alpha.html">
pyarrow.compute.ascii_is_alpha
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.ascii_is_decimal.html">
pyarrow.compute.ascii_is_decimal
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.ascii_is_lower.html">
pyarrow.compute.ascii_is_lower
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.ascii_is_printable.html">
pyarrow.compute.ascii_is_printable
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.ascii_is_space.html">
pyarrow.compute.ascii_is_space
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.ascii_is_upper.html">
pyarrow.compute.ascii_is_upper
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.utf8_is_alnum.html">
pyarrow.compute.utf8_is_alnum
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.utf8_is_alpha.html">
pyarrow.compute.utf8_is_alpha
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.utf8_is_decimal.html">
pyarrow.compute.utf8_is_decimal
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.utf8_is_digit.html">
pyarrow.compute.utf8_is_digit
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.utf8_is_lower.html">
pyarrow.compute.utf8_is_lower
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.utf8_is_numeric.html">
pyarrow.compute.utf8_is_numeric
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.utf8_is_printable.html">
pyarrow.compute.utf8_is_printable
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.utf8_is_space.html">
pyarrow.compute.utf8_is_space
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.utf8_is_upper.html">
pyarrow.compute.utf8_is_upper
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.ascii_is_title.html">
pyarrow.compute.ascii_is_title
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.utf8_is_title.html">
pyarrow.compute.utf8_is_title
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.string_is_ascii.html">
pyarrow.compute.string_is_ascii
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.ascii_capitalize.html">
pyarrow.compute.ascii_capitalize
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.ascii_lower.html">
pyarrow.compute.ascii_lower
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.ascii_reverse.html">
pyarrow.compute.ascii_reverse
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.ascii_swapcase.html">
pyarrow.compute.ascii_swapcase
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.ascii_title.html">
pyarrow.compute.ascii_title
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.ascii_upper.html">
pyarrow.compute.ascii_upper
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.binary_length.html">
pyarrow.compute.binary_length
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.binary_repeat.html">
pyarrow.compute.binary_repeat
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.binary_replace_slice.html">
pyarrow.compute.binary_replace_slice
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.binary_reverse.html">
pyarrow.compute.binary_reverse
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.replace_substring.html">
pyarrow.compute.replace_substring
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.replace_substring_regex.html">
pyarrow.compute.replace_substring_regex
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.utf8_capitalize.html">
pyarrow.compute.utf8_capitalize
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.utf8_length.html">
pyarrow.compute.utf8_length
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.utf8_lower.html">
pyarrow.compute.utf8_lower
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.utf8_replace_slice.html">
pyarrow.compute.utf8_replace_slice
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.utf8_reverse.html">
pyarrow.compute.utf8_reverse
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.utf8_swapcase.html">
pyarrow.compute.utf8_swapcase
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.utf8_title.html">
pyarrow.compute.utf8_title
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.utf8_upper.html">
pyarrow.compute.utf8_upper
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.ascii_center.html">
pyarrow.compute.ascii_center
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.ascii_lpad.html">
pyarrow.compute.ascii_lpad
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.ascii_rpad.html">
pyarrow.compute.ascii_rpad
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.utf8_center.html">
pyarrow.compute.utf8_center
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.utf8_lpad.html">
pyarrow.compute.utf8_lpad
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.utf8_rpad.html">
pyarrow.compute.utf8_rpad
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.ascii_ltrim.html">
pyarrow.compute.ascii_ltrim
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.ascii_ltrim_whitespace.html">
pyarrow.compute.ascii_ltrim_whitespace
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.ascii_rtrim.html">
pyarrow.compute.ascii_rtrim
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.ascii_rtrim_whitespace.html">
pyarrow.compute.ascii_rtrim_whitespace
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.ascii_trim.html">
pyarrow.compute.ascii_trim
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.ascii_trim_whitespace.html">
pyarrow.compute.ascii_trim_whitespace
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.utf8_ltrim.html">
pyarrow.compute.utf8_ltrim
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.utf8_ltrim_whitespace.html">
pyarrow.compute.utf8_ltrim_whitespace
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.utf8_rtrim.html">
pyarrow.compute.utf8_rtrim
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.utf8_rtrim_whitespace.html">
pyarrow.compute.utf8_rtrim_whitespace
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.utf8_trim.html">
pyarrow.compute.utf8_trim
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.utf8_trim_whitespace.html">
pyarrow.compute.utf8_trim_whitespace
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.ascii_split_whitespace.html">
pyarrow.compute.ascii_split_whitespace
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.split_pattern.html">
pyarrow.compute.split_pattern
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.split_pattern_regex.html">
pyarrow.compute.split_pattern_regex
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.utf8_split_whitespace.html">
pyarrow.compute.utf8_split_whitespace
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.extract_regex.html">
pyarrow.compute.extract_regex
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.binary_join.html">
pyarrow.compute.binary_join
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.binary_join_element_wise.html">
pyarrow.compute.binary_join_element_wise
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.utf8_slice_codeunits.html">
pyarrow.compute.utf8_slice_codeunits
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.count_substring.html">
pyarrow.compute.count_substring
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.count_substring_regex.html">
pyarrow.compute.count_substring_regex
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.ends_with.html">
pyarrow.compute.ends_with
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.find_substring.html">
pyarrow.compute.find_substring
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.find_substring_regex.html">
pyarrow.compute.find_substring_regex
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.index_in.html">
pyarrow.compute.index_in
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.is_in.html">
pyarrow.compute.is_in
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.match_like.html">
pyarrow.compute.match_like
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.match_substring.html">
pyarrow.compute.match_substring
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.match_substring_regex.html">
pyarrow.compute.match_substring_regex
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.starts_with.html">
pyarrow.compute.starts_with
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.indices_nonzero.html">
pyarrow.compute.indices_nonzero
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.is_finite.html">
pyarrow.compute.is_finite
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.is_inf.html">
pyarrow.compute.is_inf
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.is_nan.html">
pyarrow.compute.is_nan
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.is_null.html">
pyarrow.compute.is_null
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.is_valid.html">
pyarrow.compute.is_valid
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.case_when.html">
pyarrow.compute.case_when
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.choose.html">
pyarrow.compute.choose
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.coalesce.html">
pyarrow.compute.coalesce
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.if_else.html">
pyarrow.compute.if_else
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.cast.html">
pyarrow.compute.cast
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.ceil_temporal.html">
pyarrow.compute.ceil_temporal
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.floor_temporal.html">
pyarrow.compute.floor_temporal
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.round_temporal.html">
pyarrow.compute.round_temporal
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.strftime.html">
pyarrow.compute.strftime
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.strptime.html">
pyarrow.compute.strptime
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.day.html">
pyarrow.compute.day
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.day_of_week.html">
pyarrow.compute.day_of_week
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.day_of_year.html">
pyarrow.compute.day_of_year
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.hour.html">
pyarrow.compute.hour
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.iso_week.html">
pyarrow.compute.iso_week
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.iso_year.html">
pyarrow.compute.iso_year
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.iso_calendar.html">
pyarrow.compute.iso_calendar
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.microsecond.html">
pyarrow.compute.microsecond
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.millisecond.html">
pyarrow.compute.millisecond
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.minute.html">
pyarrow.compute.minute
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.month.html">
pyarrow.compute.month
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.nanosecond.html">
pyarrow.compute.nanosecond
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.quarter.html">
pyarrow.compute.quarter
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.second.html">
pyarrow.compute.second
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.subsecond.html">
pyarrow.compute.subsecond
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.us_week.html">
pyarrow.compute.us_week
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.week.html">
pyarrow.compute.week
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.year.html">
pyarrow.compute.year
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.year_month_day.html">
pyarrow.compute.year_month_day
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.day_time_interval_between.html">
pyarrow.compute.day_time_interval_between
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.days_between.html">
pyarrow.compute.days_between
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.hours_between.html">
pyarrow.compute.hours_between
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.microseconds_between.html">
pyarrow.compute.microseconds_between
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.milliseconds_between.html">
pyarrow.compute.milliseconds_between
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.minutes_between.html">
pyarrow.compute.minutes_between
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.month_day_nano_interval_between.html">
pyarrow.compute.month_day_nano_interval_between
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.month_interval_between.html">
pyarrow.compute.month_interval_between
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.nanoseconds_between.html">
pyarrow.compute.nanoseconds_between
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.quarters_between.html">
pyarrow.compute.quarters_between
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.seconds_between.html">
pyarrow.compute.seconds_between
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.weeks_between.html">
pyarrow.compute.weeks_between
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.years_between.html">
pyarrow.compute.years_between
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.assume_timezone.html">
pyarrow.compute.assume_timezone
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.dictionary_encode.html">
pyarrow.compute.dictionary_encode
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.unique.html">
pyarrow.compute.unique
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.value_counts.html">
pyarrow.compute.value_counts
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.array_filter.html">
pyarrow.compute.array_filter
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.array_take.html">
pyarrow.compute.array_take
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.drop_null.html">
pyarrow.compute.drop_null
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.filter.html">
pyarrow.compute.filter
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.take.html">
pyarrow.compute.take
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.array_sort_indices.html">
pyarrow.compute.array_sort_indices
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.partition_nth_indices.html">
pyarrow.compute.partition_nth_indices
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.select_k_unstable.html">
pyarrow.compute.select_k_unstable
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.sort_indices.html">
pyarrow.compute.sort_indices
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.fill_null_backward.html">
pyarrow.compute.fill_null_backward
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.fill_null_forward.html">
pyarrow.compute.fill_null_forward
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.list_element.html">
pyarrow.compute.list_element
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.list_flatten.html">
pyarrow.compute.list_flatten
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.list_parent_indices.html">
pyarrow.compute.list_parent_indices
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.list_value_length.html">
pyarrow.compute.list_value_length
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.make_struct.html">
pyarrow.compute.make_struct
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.replace_with_mask.html">
pyarrow.compute.replace_with_mask
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.struct_field.html">
pyarrow.compute.struct_field
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.ArraySortOptions.html">
pyarrow.compute.ArraySortOptions
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.AssumeTimezoneOptions.html">
pyarrow.compute.AssumeTimezoneOptions
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.CastOptions.html">
pyarrow.compute.CastOptions
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.CountOptions.html">
pyarrow.compute.CountOptions
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.CountOptions.html">
pyarrow.compute.CountOptions
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.DayOfWeekOptions.html">
pyarrow.compute.DayOfWeekOptions
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.DictionaryEncodeOptions.html">
pyarrow.compute.DictionaryEncodeOptions
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.ElementWiseAggregateOptions.html">
pyarrow.compute.ElementWiseAggregateOptions
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.ExtractRegexOptions.html">
pyarrow.compute.ExtractRegexOptions
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.FilterOptions.html">
pyarrow.compute.FilterOptions
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.IndexOptions.html">
pyarrow.compute.IndexOptions
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.JoinOptions.html">
pyarrow.compute.JoinOptions
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.MakeStructOptions.html">
pyarrow.compute.MakeStructOptions
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.MatchSubstringOptions.html">
pyarrow.compute.MatchSubstringOptions
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.ModeOptions.html">
pyarrow.compute.ModeOptions
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.NullOptions.html">
pyarrow.compute.NullOptions
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.PadOptions.html">
pyarrow.compute.PadOptions
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.PartitionNthOptions.html">
pyarrow.compute.PartitionNthOptions
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.QuantileOptions.html">
pyarrow.compute.QuantileOptions
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.ReplaceSliceOptions.html">
pyarrow.compute.ReplaceSliceOptions
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.ReplaceSubstringOptions.html">
pyarrow.compute.ReplaceSubstringOptions
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.RoundOptions.html">
pyarrow.compute.RoundOptions
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.RoundTemporalOptions.html">
pyarrow.compute.RoundTemporalOptions
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.RoundToMultipleOptions.html">
pyarrow.compute.RoundToMultipleOptions
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.ScalarAggregateOptions.html">
pyarrow.compute.ScalarAggregateOptions
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.ScalarAggregateOptions.html">
pyarrow.compute.ScalarAggregateOptions
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.SelectKOptions.html">
pyarrow.compute.SelectKOptions
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.SetLookupOptions.html">
pyarrow.compute.SetLookupOptions
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.SliceOptions.html">
pyarrow.compute.SliceOptions
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.SortOptions.html">
pyarrow.compute.SortOptions
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.SplitOptions.html">
pyarrow.compute.SplitOptions
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.SplitPatternOptions.html">
pyarrow.compute.SplitPatternOptions
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.StrftimeOptions.html">
pyarrow.compute.StrftimeOptions
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.StrptimeOptions.html">
pyarrow.compute.StrptimeOptions
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.StructFieldOptions.html">
pyarrow.compute.StructFieldOptions
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.TakeOptions.html">
pyarrow.compute.TakeOptions
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.TDigestOptions.html">
pyarrow.compute.TDigestOptions
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.TDigestOptions.html">
pyarrow.compute.TDigestOptions
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.TrimOptions.html">
pyarrow.compute.TrimOptions
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.VarianceOptions.html">
pyarrow.compute.VarianceOptions
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.compute.WeekOptions.html">
pyarrow.compute.WeekOptions
</a>
</li>
</ul>
</li>
<li class="toctree-l3 has-children">
<a class="reference internal" href="../python/api/files.html">
Streams and File Access
</a>
<input class="toctree-checkbox" id="toctree-checkbox-14" name="toctree-checkbox-14" type="checkbox"/>
<label for="toctree-checkbox-14">
<i class="fas fa-chevron-down">
</i>
</label>
<ul>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.input_stream.html">
pyarrow.input_stream
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.output_stream.html">
pyarrow.output_stream
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.memory_map.html">
pyarrow.memory_map
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.create_memory_map.html">
pyarrow.create_memory_map
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.NativeFile.html">
pyarrow.NativeFile
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.OSFile.html">
pyarrow.OSFile
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.PythonFile.html">
pyarrow.PythonFile
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.BufferReader.html">
pyarrow.BufferReader
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.BufferOutputStream.html">
pyarrow.BufferOutputStream
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.FixedSizeBufferWriter.html">
pyarrow.FixedSizeBufferWriter
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.MemoryMappedFile.html">
pyarrow.MemoryMappedFile
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.CompressedInputStream.html">
pyarrow.CompressedInputStream
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.CompressedOutputStream.html">
pyarrow.CompressedOutputStream
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.hdfs.connect.html">
pyarrow.hdfs.connect
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.LocalFileSystem.html">
pyarrow.LocalFileSystem
</a>
</li>
</ul>
</li>
<li class="toctree-l3 has-children">
<a class="reference internal" href="../python/api/tables.html">
Tables and Tensors
</a>
<input class="toctree-checkbox" id="toctree-checkbox-15" name="toctree-checkbox-15" type="checkbox"/>
<label for="toctree-checkbox-15">
<i class="fas fa-chevron-down">
</i>
</label>
<ul>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.chunked_array.html">
pyarrow.chunked_array
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.concat_arrays.html">
pyarrow.concat_arrays
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.concat_tables.html">
pyarrow.concat_tables
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.record_batch.html">
pyarrow.record_batch
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.table.html">
pyarrow.table
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.ChunkedArray.html">
pyarrow.ChunkedArray
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.RecordBatch.html">
pyarrow.RecordBatch
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.Table.html">
pyarrow.Table
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.TableGroupBy.html">
pyarrow.TableGroupBy
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.Tensor.html">
pyarrow.Tensor
</a>
</li>
</ul>
</li>
<li class="toctree-l3 has-children">
<a class="reference internal" href="../python/api/ipc.html">
Serialization and IPC
</a>
<input class="toctree-checkbox" id="toctree-checkbox-16" name="toctree-checkbox-16" type="checkbox"/>
<label for="toctree-checkbox-16">
<i class="fas fa-chevron-down">
</i>
</label>
<ul>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.ipc.new_file.html">
pyarrow.ipc.new_file
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.ipc.open_file.html">
pyarrow.ipc.open_file
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.ipc.new_stream.html">
pyarrow.ipc.new_stream
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.ipc.open_stream.html">
pyarrow.ipc.open_stream
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.ipc.read_message.html">
pyarrow.ipc.read_message
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.ipc.read_record_batch.html">
pyarrow.ipc.read_record_batch
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.ipc.get_record_batch_size.html">
pyarrow.ipc.get_record_batch_size
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.ipc.read_tensor.html">
pyarrow.ipc.read_tensor
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.ipc.write_tensor.html">
pyarrow.ipc.write_tensor
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.ipc.get_tensor_size.html">
pyarrow.ipc.get_tensor_size
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.ipc.IpcWriteOptions.html">
pyarrow.ipc.IpcWriteOptions
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.ipc.Message.html">
pyarrow.ipc.Message
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.ipc.MessageReader.html">
pyarrow.ipc.MessageReader
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.ipc.RecordBatchFileReader.html">
pyarrow.ipc.RecordBatchFileReader
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.ipc.RecordBatchFileWriter.html">
pyarrow.ipc.RecordBatchFileWriter
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.ipc.RecordBatchStreamReader.html">
pyarrow.ipc.RecordBatchStreamReader
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.ipc.RecordBatchStreamWriter.html">
pyarrow.ipc.RecordBatchStreamWriter
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.serialize.html">
pyarrow.serialize
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.serialize_to.html">
pyarrow.serialize_to
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.deserialize.html">
pyarrow.deserialize
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.deserialize_components.html">
pyarrow.deserialize_components
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.deserialize_from.html">
pyarrow.deserialize_from
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.read_serialized.html">
pyarrow.read_serialized
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.SerializedPyObject.html">
pyarrow.SerializedPyObject
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.SerializationContext.html">
pyarrow.SerializationContext
</a>
</li>
</ul>
</li>
<li class="toctree-l3 has-children">
<a class="reference internal" href="../python/api/flight.html">
Arrow Flight
</a>
<input class="toctree-checkbox" id="toctree-checkbox-17" name="toctree-checkbox-17" type="checkbox"/>
<label for="toctree-checkbox-17">
<i class="fas fa-chevron-down">
</i>
</label>
<ul>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.flight.Action.html">
pyarrow.flight.Action
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.flight.ActionType.html">
pyarrow.flight.ActionType
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.flight.DescriptorType.html">
pyarrow.flight.DescriptorType
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.flight.FlightDescriptor.html">
pyarrow.flight.FlightDescriptor
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.flight.FlightEndpoint.html">
pyarrow.flight.FlightEndpoint
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.flight.FlightInfo.html">
pyarrow.flight.FlightInfo
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.flight.Location.html">
pyarrow.flight.Location
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.flight.Ticket.html">
pyarrow.flight.Ticket
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.flight.Result.html">
pyarrow.flight.Result
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.flight.FlightCallOptions.html">
pyarrow.flight.FlightCallOptions
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.flight.FlightClient.html">
pyarrow.flight.FlightClient
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.flight.ClientMiddlewareFactory.html">
pyarrow.flight.ClientMiddlewareFactory
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.flight.ClientMiddleware.html">
pyarrow.flight.ClientMiddleware
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.flight.FlightServerBase.html">
pyarrow.flight.FlightServerBase
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.flight.GeneratorStream.html">
pyarrow.flight.GeneratorStream
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.flight.RecordBatchStream.html">
pyarrow.flight.RecordBatchStream
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.flight.ServerMiddlewareFactory.html">
pyarrow.flight.ServerMiddlewareFactory
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.flight.ServerMiddleware.html">
pyarrow.flight.ServerMiddleware
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.flight.ClientAuthHandler.html">
pyarrow.flight.ClientAuthHandler
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.flight.ServerAuthHandler.html">
pyarrow.flight.ServerAuthHandler
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.flight.FlightMethod.html">
pyarrow.flight.FlightMethod
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.flight.CallInfo.html">
pyarrow.flight.CallInfo
</a>
</li>
</ul>
</li>
<li class="toctree-l3 has-children">
<a class="reference internal" href="../python/api/formats.html">
Tabular File Formats
</a>
<input class="toctree-checkbox" id="toctree-checkbox-18" name="toctree-checkbox-18" type="checkbox"/>
<label for="toctree-checkbox-18">
<i class="fas fa-chevron-down">
</i>
</label>
<ul>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.csv.ConvertOptions.html">
pyarrow.csv.ConvertOptions
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.csv.CSVStreamingReader.html">
pyarrow.csv.CSVStreamingReader
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.csv.CSVWriter.html">
pyarrow.csv.CSVWriter
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.csv.ISO8601.html">
pyarrow.csv.ISO8601
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.csv.ParseOptions.html">
pyarrow.csv.ParseOptions
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.csv.ReadOptions.html">
pyarrow.csv.ReadOptions
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.csv.WriteOptions.html">
pyarrow.csv.WriteOptions
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.csv.open_csv.html">
pyarrow.csv.open_csv
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.csv.read_csv.html">
pyarrow.csv.read_csv
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.csv.write_csv.html">
pyarrow.csv.write_csv
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.csv.InvalidRow.html">
pyarrow.csv.InvalidRow
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.feather.read_feather.html">
pyarrow.feather.read_feather
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.feather.read_table.html">
pyarrow.feather.read_table
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.feather.write_feather.html">
pyarrow.feather.write_feather
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.json.ReadOptions.html">
pyarrow.json.ReadOptions
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.json.ParseOptions.html">
pyarrow.json.ParseOptions
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.json.read_json.html">
pyarrow.json.read_json
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.parquet.ParquetDataset.html">
pyarrow.parquet.ParquetDataset
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.parquet.ParquetFile.html">
pyarrow.parquet.ParquetFile
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.parquet.ParquetWriter.html">
pyarrow.parquet.ParquetWriter
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.parquet.read_table.html">
pyarrow.parquet.read_table
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.parquet.read_metadata.html">
pyarrow.parquet.read_metadata
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.parquet.read_pandas.html">
pyarrow.parquet.read_pandas
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.parquet.read_schema.html">
pyarrow.parquet.read_schema
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.parquet.write_metadata.html">
pyarrow.parquet.write_metadata
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.parquet.write_table.html">
pyarrow.parquet.write_table
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.parquet.write_to_dataset.html">
pyarrow.parquet.write_to_dataset
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.orc.ORCFile.html">
pyarrow.orc.ORCFile
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.orc.ORCWriter.html">
pyarrow.orc.ORCWriter
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.orc.read_table.html">
pyarrow.orc.read_table
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.orc.write_table.html">
pyarrow.orc.write_table
</a>
</li>
</ul>
</li>
<li class="toctree-l3 has-children">
<a class="reference internal" href="../python/api/filesystems.html">
Filesystems
</a>
<input class="toctree-checkbox" id="toctree-checkbox-19" name="toctree-checkbox-19" type="checkbox"/>
<label for="toctree-checkbox-19">
<i class="fas fa-chevron-down">
</i>
</label>
<ul>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.fs.FileInfo.html">
pyarrow.fs.FileInfo
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.fs.FileSelector.html">
pyarrow.fs.FileSelector
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.fs.FileSystem.html">
pyarrow.fs.FileSystem
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.fs.LocalFileSystem.html">
pyarrow.fs.LocalFileSystem
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.fs.S3FileSystem.html">
pyarrow.fs.S3FileSystem
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.fs.HadoopFileSystem.html">
pyarrow.fs.HadoopFileSystem
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.fs.SubTreeFileSystem.html">
pyarrow.fs.SubTreeFileSystem
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.fs.PyFileSystem.html">
pyarrow.fs.PyFileSystem
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.fs.FileSystemHandler.html">
pyarrow.fs.FileSystemHandler
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.fs.FSSpecHandler.html">
pyarrow.fs.FSSpecHandler
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.fs.copy_files.html">
pyarrow.fs.copy_files
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.fs.initialize_s3.html">
pyarrow.fs.initialize_s3
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.fs.finalize_s3.html">
pyarrow.fs.finalize_s3
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.fs.resolve_s3_region.html">
pyarrow.fs.resolve_s3_region
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.fs.S3LogLevel.html">
pyarrow.fs.S3LogLevel
</a>
</li>
</ul>
</li>
<li class="toctree-l3 has-children">
<a class="reference internal" href="../python/api/dataset.html">
Dataset
</a>
<input class="toctree-checkbox" id="toctree-checkbox-20" name="toctree-checkbox-20" type="checkbox"/>
<label for="toctree-checkbox-20">
<i class="fas fa-chevron-down">
</i>
</label>
<ul>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.dataset.dataset.html">
pyarrow.dataset.dataset
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.dataset.parquet_dataset.html">
pyarrow.dataset.parquet_dataset
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.dataset.partitioning.html">
pyarrow.dataset.partitioning
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.dataset.field.html">
pyarrow.dataset.field
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.dataset.scalar.html">
pyarrow.dataset.scalar
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.dataset.write_dataset.html">
pyarrow.dataset.write_dataset
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.dataset.FileFormat.html">
pyarrow.dataset.FileFormat
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.dataset.CsvFileFormat.html">
pyarrow.dataset.CsvFileFormat
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.dataset.CsvFragmentScanOptions.html">
pyarrow.dataset.CsvFragmentScanOptions
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.dataset.IpcFileFormat.html">
pyarrow.dataset.IpcFileFormat
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.dataset.ParquetFileFormat.html">
pyarrow.dataset.ParquetFileFormat
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.dataset.ParquetReadOptions.html">
pyarrow.dataset.ParquetReadOptions
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.dataset.ParquetFragmentScanOptions.html">
pyarrow.dataset.ParquetFragmentScanOptions
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.dataset.Partitioning.html">
pyarrow.dataset.Partitioning
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.dataset.PartitioningFactory.html">
pyarrow.dataset.PartitioningFactory
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.dataset.DirectoryPartitioning.html">
pyarrow.dataset.DirectoryPartitioning
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.dataset.HivePartitioning.html">
pyarrow.dataset.HivePartitioning
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.dataset.Dataset.html">
pyarrow.dataset.Dataset
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.dataset.FileSystemDataset.html">
pyarrow.dataset.FileSystemDataset
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.dataset.FileSystemFactoryOptions.html">
pyarrow.dataset.FileSystemFactoryOptions
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.dataset.FileSystemDatasetFactory.html">
pyarrow.dataset.FileSystemDatasetFactory
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.dataset.UnionDataset.html">
pyarrow.dataset.UnionDataset
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.dataset.Fragment.html">
pyarrow.dataset.Fragment
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.dataset.FragmentScanOptions.html">
pyarrow.dataset.FragmentScanOptions
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.dataset.Scanner.html">
pyarrow.dataset.Scanner
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.dataset.Expression.html">
pyarrow.dataset.Expression
</a>
</li>
</ul>
</li>
<li class="toctree-l3 has-children">
<a class="reference internal" href="../python/api/plasma.html">
Plasma In-Memory Object Store
</a>
<input class="toctree-checkbox" id="toctree-checkbox-21" name="toctree-checkbox-21" type="checkbox"/>
<label for="toctree-checkbox-21">
<i class="fas fa-chevron-down">
</i>
</label>
<ul>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.plasma.ObjectID.html">
pyarrow.plasma.ObjectID
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.plasma.PlasmaClient.html">
pyarrow.plasma.PlasmaClient
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.plasma.PlasmaBuffer.html">
pyarrow.plasma.PlasmaBuffer
</a>
</li>
</ul>
</li>
<li class="toctree-l3 has-children">
<a class="reference internal" href="../python/api/cuda.html">
CUDA Integration
</a>
<input class="toctree-checkbox" id="toctree-checkbox-22" name="toctree-checkbox-22" type="checkbox"/>
<label for="toctree-checkbox-22">
<i class="fas fa-chevron-down">
</i>
</label>
<ul>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.cuda.Context.html">
pyarrow.cuda.Context
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.cuda.CudaBuffer.html">
pyarrow.cuda.CudaBuffer
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.cuda.new_host_buffer.html">
pyarrow.cuda.new_host_buffer
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.cuda.HostBuffer.html">
pyarrow.cuda.HostBuffer
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.cuda.BufferReader.html">
pyarrow.cuda.BufferReader
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.cuda.BufferWriter.html">
pyarrow.cuda.BufferWriter
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.cuda.serialize_record_batch.html">
pyarrow.cuda.serialize_record_batch
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.cuda.read_record_batch.html">
pyarrow.cuda.read_record_batch
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.cuda.read_message.html">
pyarrow.cuda.read_message
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.cuda.IpcMemHandle.html">
pyarrow.cuda.IpcMemHandle
</a>
</li>
</ul>
</li>
<li class="toctree-l3 has-children">
<a class="reference internal" href="../python/api/misc.html">
Miscellaneous
</a>
<input class="toctree-checkbox" id="toctree-checkbox-23" name="toctree-checkbox-23" type="checkbox"/>
<label for="toctree-checkbox-23">
<i class="fas fa-chevron-down">
</i>
</label>
<ul>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.cpu_count.html">
pyarrow.cpu_count
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.set_cpu_count.html">
pyarrow.set_cpu_count
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.get_include.html">
pyarrow.get_include
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.get_libraries.html">
pyarrow.get_libraries
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../python/generated/pyarrow.get_library_dirs.html">
pyarrow.get_library_dirs
</a>
</li>
</ul>
</li>
</ul>
</li>
<li class="toctree-l2">
<a class="reference internal" href="../python/getting_involved.html">
Getting Involved
</a>
</li>
<li class="toctree-l2">
<a class="reference internal" href="../python/benchmarks.html">
Benchmarks
</a>
</li>
</ul>
</li>
<li class="toctree-l1">
<a class="reference internal" href="../r/index.html">
R
</a>
</li>
<li class="toctree-l1">
<a class="reference external" href="https://github.com/apache/arrow/blob/master/ruby/README.md">
Ruby
</a>
</li>
<li class="toctree-l1">
<a class="reference external" href="https://docs.rs/crate/arrow/">
Rust
</a>
</li>
<li class="toctree-l1">
<a class="reference internal" href="../status.html">
Implementation Status
</a>
</li>
</ul>
<p aria-level="2" class="caption" role="heading">
<span class="caption-text">
Cookbooks
</span>
</p>
<ul class="nav bd-sidenav">
<li class="toctree-l1">
<a class="reference external" href="https://arrow.apache.org/cookbook/cpp/">
C++
</a>
</li>
<li class="toctree-l1">
<a class="reference external" href="https://arrow.apache.org/cookbook/py/">
Python
</a>
</li>
<li class="toctree-l1">
<a class="reference external" href="https://arrow.apache.org/cookbook/r/">
R
</a>
</li>
</ul>
<p aria-level="2" class="caption" role="heading">
<span class="caption-text">
Specifications and Protocols
</span>
</p>
<ul class="nav bd-sidenav">
<li class="toctree-l1">
<a class="reference internal" href="../format/Versioning.html">
Format Versioning and Stability
</a>
</li>
<li class="toctree-l1">
<a class="reference internal" href="../format/Columnar.html">
Arrow Columnar Format
</a>
</li>
<li class="toctree-l1">
<a class="reference internal" href="../format/Flight.html">
Arrow Flight RPC
</a>
</li>
<li class="toctree-l1">
<a class="reference internal" href="../format/Integration.html">
Integration Testing
</a>
</li>
<li class="toctree-l1">
<a class="reference internal" href="../format/CDataInterface.html">
The Arrow C data interface
</a>
</li>
<li class="toctree-l1">
<a class="reference internal" href="../format/CStreamInterface.html">
The Arrow C stream interface
</a>
</li>
<li class="toctree-l1">
<a class="reference internal" href="../format/Other.html">
Other Data Structures
</a>
</li>
</ul>
<p aria-level="2" class="caption" role="heading">
<span class="caption-text">
Development
</span>
</p>
<ul class="nav bd-sidenav">
<li class="toctree-l1">
<a class="reference internal" href="../developers/contributing.html">
Contributing to Apache Arrow
</a>
</li>
<li class="toctree-l1 has-children">
<a class="reference internal" href="../developers/guide/index.html">
New Contributor’s Guide
</a>
<input class="toctree-checkbox" id="toctree-checkbox-24" name="toctree-checkbox-24" type="checkbox"/>
<label for="toctree-checkbox-24">
<i class="fas fa-chevron-down">
</i>
</label>
<ul>
<li class="toctree-l2">
<a class="reference internal" href="../developers/guide/architectural_overview.html">
Architectural Overview
</a>
</li>
<li class="toctree-l2">
<a class="reference internal" href="../developers/guide/communication.html">
Communication
</a>
</li>
<li class="toctree-l2 has-children">
<a class="reference internal" href="../developers/guide/step_by_step/index.html">
Steps in making your first PR
</a>
<input class="toctree-checkbox" id="toctree-checkbox-25" name="toctree-checkbox-25" type="checkbox"/>
<label for="toctree-checkbox-25">
<i class="fas fa-chevron-down">
</i>
</label>
<ul>
<li class="toctree-l3">
<a class="reference internal" href="../developers/guide/step_by_step/set_up.html">
Set up
</a>
</li>
<li class="toctree-l3">
<a class="reference internal" href="../developers/guide/step_by_step/building.html">
Building the Arrow libraries 🏋🏿‍♀️
</a>
</li>
<li class="toctree-l3">
<a class="reference internal" href="../developers/guide/step_by_step/finding_issues.html">
Finding good first issues 🔎
</a>
</li>
<li class="toctree-l3">
<a class="reference internal" href="../developers/guide/step_by_step/arrow_codebase.html">
Working on the Arrow codebase 🧐
</a>
</li>
<li class="toctree-l3">
<a class="reference internal" href="../developers/guide/step_by_step/testing.html">
Testing 🧪
</a>
</li>
<li class="toctree-l3">
<a class="reference internal" href="../developers/guide/step_by_step/pr_and_github.html">
Lifecycle of a pull request
</a>
</li>
</ul>
</li>
<li class="toctree-l2">
<a class="reference internal" href="../developers/guide/documentation.html">
Helping with documentation
</a>
</li>
<li class="toctree-l2 has-children">
<a class="reference internal" href="../developers/guide/tutorials/index.html">
Tutorials
</a>
<input class="toctree-checkbox" id="toctree-checkbox-26" name="toctree-checkbox-26" type="checkbox"/>
<label for="toctree-checkbox-26">
<i class="fas fa-chevron-down">
</i>
</label>
<ul>
<li class="toctree-l3">
<a class="reference internal" href="../developers/guide/tutorials/python_tutorial.html">
Python tutorial
</a>
</li>
<li class="toctree-l3">
<a class="reference internal" href="../developers/guide/tutorials/r_tutorial.html">
R tutorial
</a>
</li>
</ul>
</li>
<li class="toctree-l2">
<a class="reference internal" href="../developers/guide/resources.html">
Additional information and resources
</a>
</li>
</ul>
</li>
<li class="toctree-l1">
<a class="reference internal" href="../developers/reviewing.html">
Reviewing contributions
</a>
</li>
<li class="toctree-l1 has-children">
<a class="reference internal" href="../developers/cpp/index.html">
C++ Development
</a>
<input class="toctree-checkbox" id="toctree-checkbox-27" name="toctree-checkbox-27" type="checkbox"/>
<label for="toctree-checkbox-27">
<i class="fas fa-chevron-down">
</i>
</label>
<ul>
<li class="toctree-l2">
<a class="reference internal" href="../developers/cpp/building.html">
Building Arrow C++
</a>
</li>
<li class="toctree-l2">
<a class="reference internal" href="../developers/cpp/development.html">
Development Guidelines
</a>
</li>
<li class="toctree-l2">
<a class="reference internal" href="../developers/cpp/windows.html">
Developing on Windows
</a>
</li>
<li class="toctree-l2">
<a class="reference internal" href="../developers/cpp/conventions.html">
Conventions
</a>
</li>
<li class="toctree-l2">
<a class="reference internal" href="../developers/cpp/fuzzing.html">
Fuzzing Arrow C++
</a>
</li>
</ul>
</li>
<li class="toctree-l1">
<a class="reference internal" href="../developers/python.html">
Python Development
</a>
</li>
<li class="toctree-l1 has-children">
<a class="reference internal" href="../developers/continuous_integration/index.html">
Continuous Integration
</a>
<input class="toctree-checkbox" id="toctree-checkbox-28" name="toctree-checkbox-28" type="checkbox"/>
<label for="toctree-checkbox-28">
<i class="fas fa-chevron-down">
</i>
</label>
<ul>
<li class="toctree-l2">
<a class="reference internal" href="../developers/continuous_integration/overview.html">
Continuous Integration
</a>
</li>
<li class="toctree-l2">
<a class="reference internal" href="../developers/continuous_integration/docker.html">
Running Docker Builds
</a>
</li>
<li class="toctree-l2">
<a class="reference internal" href="../developers/continuous_integration/archery.html">
Daily Development using Archery
</a>
</li>
<li class="toctree-l2">
<a class="reference internal" href="../developers/continuous_integration/crossbow.html">
Packaging and Testing with Crossbow
</a>
</li>
</ul>
</li>
<li class="toctree-l1">
<a class="reference internal" href="../developers/benchmarks.html">
Benchmarks
</a>
</li>
<li class="toctree-l1">
<a class="reference internal" href="../developers/documentation.html">
Building the Documentation
</a>
</li>
<li class="toctree-l1">
<a class="reference internal" href="../developers/computeir.html">
Arrow Compute IR (Intermediate Representation)
</a>
</li>
</ul>
</div>
</nav>
</div>
<div class="sidebar-end-items">
</div>
</div>
<div class="d-none d-xl-block col-xl-2 bd-toc">
<div class="toc-item">
<div class="tocsection onthispage pt-5 pb-3">
<i class="fas fa-list"></i> On this page
</div>
<nav id="bd-toc-nav">
<ul class="visible nav section-nav flex-column">
<li class="toc-h2 nav-item toc-entry">
<a class="reference internal nav-link" href="#reading-datasets">
Reading Datasets
</a>
<ul class="visible nav section-nav flex-column">
<li class="toc-h3 nav-item toc-entry">
<a class="reference internal nav-link" href="#dataset-discovery">
Dataset discovery
</a>
</li>
<li class="toc-h3 nav-item toc-entry">
<a class="reference internal nav-link" href="#reading-different-file-formats">
Reading different file formats
</a>
</li>
<li class="toc-h3 nav-item toc-entry">
<a class="reference internal nav-link" href="#customizing-file-formats">
Customizing file formats
</a>
</li>
</ul>
</li>
<li class="toc-h2 nav-item toc-entry">
<a class="reference internal nav-link" href="#filtering-data">
Filtering data
</a>
</li>
<li class="toc-h2 nav-item toc-entry">
<a class="reference internal nav-link" href="#projecting-columns">
Projecting columns
</a>
</li>
<li class="toc-h2 nav-item toc-entry">
<a class="reference internal nav-link" href="#reading-and-writing-partitioned-data">
Reading and writing partitioned data
</a>
<ul class="visible nav section-nav flex-column">
<li class="toc-h3 nav-item toc-entry">
<a class="reference internal nav-link" href="#different-partitioning-schemes">
Different partitioning schemes
</a>
</li>
<li class="toc-h3 nav-item toc-entry">
<a class="reference internal nav-link" href="#partitioning-performance-considerations">
Partitioning performance considerations
</a>
</li>
</ul>
</li>
<li class="toc-h2 nav-item toc-entry">
<a class="reference internal nav-link" href="#reading-from-other-data-sources">
Reading from other data sources
</a>
<ul class="visible nav section-nav flex-column">
<li class="toc-h3 nav-item toc-entry">
<a class="reference internal nav-link" href="#reading-in-memory-data">
Reading in-memory data
</a>
</li>
<li class="toc-h3 nav-item toc-entry">
<a class="reference internal nav-link" href="#reading-from-cloud-storage">
Reading from cloud storage
</a>
</li>
</ul>
</li>
<li class="toc-h2 nav-item toc-entry">
<a class="reference internal nav-link" href="#a-note-on-transactions-acid-guarantees">
A note on transactions &amp; ACID guarantees
</a>
</li>
<li class="toc-h2 nav-item toc-entry">
<a class="reference internal nav-link" href="#full-example">
Full Example
</a>
</li>
</ul>
</nav>
</div>
<div class="toc-item">
<div class="tocsection editthispage">
<a href="https://github.com/apache/arrow/edit/master/docs/source/cpp/dataset.rst">
<i class="fas fa-pencil-alt"></i> Edit this page
</a>
</div>
</div>
</div>
<main class="col-12 col-md-9 col-xl-7 py-md-5 pl-md-5 pr-md-4 bd-content" role="main">
<div>
<div class="section" id="tabular-datasets">
<h1>Tabular Datasets<a class="headerlink" href="#tabular-datasets" title="Permalink to this headline"></a></h1>
<div class="admonition seealso">
<p class="admonition-title">See also</p>
<p><a class="reference internal" href="api/dataset.html"><span class="doc">Dataset API reference</span></a></p>
</div>
<div class="admonition warning">
<p class="admonition-title">Warning</p>
<p>The <code class="docutils literal notranslate"><span class="pre">arrow::dataset</span></code> namespace is experimental, and a stable API
is not yet guaranteed.</p>
</div>
<p>The Arrow Datasets library provides functionality to efficiently work with
tabular, potentially larger than memory, and multi-file datasets. This includes:</p>
<ul class="simple">
<li><p>A unified interface that supports different sources and file formats
(currently, Parquet, ORC, Feather / Arrow IPC, and CSV files) and different
file systems (local, cloud).</p></li>
<li><p>Discovery of sources (crawling directories, handling partitioned datasets with
various partitioning schemes, basic schema normalization, …)</p></li>
<li><p>Optimized reading with predicate pushdown (filtering rows), projection
(selecting and deriving columns), and optionally parallel reading.</p></li>
</ul>
<p>The goal is to expand support to other file formats and data sources
(e.g. database connections) in the future.</p>
<div class="section" id="reading-datasets">
<span id="cpp-dataset-reading"></span><h2>Reading Datasets<a class="headerlink" href="#reading-datasets" title="Permalink to this headline"></a></h2>
<p>For the examples below, let’s create a small dataset consisting
of a directory with two parquet files:</p>
<div class="highlight-cpp notranslate"><div class="highlight"><pre><span></span><span class="lineno">52 </span><span class="c1">// Generate some data for the rest of this example.</span>
<span class="lineno">53 </span><span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o">&lt;</span><span class="n">arrow</span><span class="o">::</span><span class="n">Table</span><span class="o">&gt;</span> <span class="n">CreateTable</span><span class="p">()</span> <span class="p">{</span>
<span class="lineno">54 </span> <span class="k">auto</span> <span class="n">schema</span> <span class="o">=</span>
<span class="lineno">55 </span> <span class="n">arrow</span><span class="o">::</span><span class="n">schema</span><span class="p">({</span><span class="n">arrow</span><span class="o">::</span><span class="n">field</span><span class="p">(</span><span class="s">&quot;a&quot;</span><span class="p">,</span> <span class="n">arrow</span><span class="o">::</span><span class="n">int64</span><span class="p">()),</span> <span class="n">arrow</span><span class="o">::</span><span class="n">field</span><span class="p">(</span><span class="s">&quot;b&quot;</span><span class="p">,</span> <span class="n">arrow</span><span class="o">::</span><span class="n">int64</span><span class="p">()),</span>
<span class="lineno">56 </span> <span class="n">arrow</span><span class="o">::</span><span class="n">field</span><span class="p">(</span><span class="s">&quot;c&quot;</span><span class="p">,</span> <span class="n">arrow</span><span class="o">::</span><span class="n">int64</span><span class="p">())});</span>
<span class="lineno">57 </span> <span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o">&lt;</span><span class="n">arrow</span><span class="o">::</span><span class="n">Array</span><span class="o">&gt;</span> <span class="n">array_a</span><span class="p">;</span>
<span class="lineno">58 </span> <span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o">&lt;</span><span class="n">arrow</span><span class="o">::</span><span class="n">Array</span><span class="o">&gt;</span> <span class="n">array_b</span><span class="p">;</span>
<span class="lineno">59 </span> <span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o">&lt;</span><span class="n">arrow</span><span class="o">::</span><span class="n">Array</span><span class="o">&gt;</span> <span class="n">array_c</span><span class="p">;</span>
<span class="lineno">60 </span> <span class="n">arrow</span><span class="o">::</span><span class="n">NumericBuilder</span><span class="o">&lt;</span><span class="n">arrow</span><span class="o">::</span><span class="n">Int64Type</span><span class="o">&gt;</span> <span class="n">builder</span><span class="p">;</span>
<span class="lineno">61 </span> <span class="n">ABORT_ON_FAILURE</span><span class="p">(</span><span class="n">builder</span><span class="p">.</span><span class="n">AppendValues</span><span class="p">({</span><span class="mi">0</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">3</span><span class="p">,</span> <span class="mi">4</span><span class="p">,</span> <span class="mi">5</span><span class="p">,</span> <span class="mi">6</span><span class="p">,</span> <span class="mi">7</span><span class="p">,</span> <span class="mi">8</span><span class="p">,</span> <span class="mi">9</span><span class="p">}));</span>
<span class="lineno">62 </span> <span class="n">ABORT_ON_FAILURE</span><span class="p">(</span><span class="n">builder</span><span class="p">.</span><span class="n">Finish</span><span class="p">(</span><span class="o">&amp;</span><span class="n">array_a</span><span class="p">));</span>
<span class="lineno">63 </span> <span class="n">builder</span><span class="p">.</span><span class="n">Reset</span><span class="p">();</span>
<span class="lineno">64 </span> <span class="n">ABORT_ON_FAILURE</span><span class="p">(</span><span class="n">builder</span><span class="p">.</span><span class="n">AppendValues</span><span class="p">({</span><span class="mi">9</span><span class="p">,</span> <span class="mi">8</span><span class="p">,</span> <span class="mi">7</span><span class="p">,</span> <span class="mi">6</span><span class="p">,</span> <span class="mi">5</span><span class="p">,</span> <span class="mi">4</span><span class="p">,</span> <span class="mi">3</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">0</span><span class="p">}));</span>
<span class="lineno">65 </span> <span class="n">ABORT_ON_FAILURE</span><span class="p">(</span><span class="n">builder</span><span class="p">.</span><span class="n">Finish</span><span class="p">(</span><span class="o">&amp;</span><span class="n">array_b</span><span class="p">));</span>
<span class="lineno">66 </span> <span class="n">builder</span><span class="p">.</span><span class="n">Reset</span><span class="p">();</span>
<span class="lineno">67 </span> <span class="n">ABORT_ON_FAILURE</span><span class="p">(</span><span class="n">builder</span><span class="p">.</span><span class="n">AppendValues</span><span class="p">({</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">}));</span>
<span class="lineno">68 </span> <span class="n">ABORT_ON_FAILURE</span><span class="p">(</span><span class="n">builder</span><span class="p">.</span><span class="n">Finish</span><span class="p">(</span><span class="o">&amp;</span><span class="n">array_c</span><span class="p">));</span>
<span class="lineno">69 </span> <span class="k">return</span> <span class="n">arrow</span><span class="o">::</span><span class="n">Table</span><span class="o">::</span><span class="n">Make</span><span class="p">(</span><span class="n">schema</span><span class="p">,</span> <span class="p">{</span><span class="n">array_a</span><span class="p">,</span> <span class="n">array_b</span><span class="p">,</span> <span class="n">array_c</span><span class="p">});</span>
<span class="lineno">70 </span><span class="p">}</span>
<span class="lineno">71 </span>
<span class="lineno">72 </span><span class="c1">// Set up a dataset by writing two Parquet files.</span>
<span class="lineno">73 </span><span class="n">std</span><span class="o">::</span><span class="n">string</span> <span class="n">CreateExampleParquetDataset</span><span class="p">(</span><span class="k">const</span> <span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o">&lt;</span><span class="n">fs</span><span class="o">::</span><span class="n">FileSystem</span><span class="o">&gt;&amp;</span> <span class="n">filesystem</span><span class="p">,</span>
<span class="lineno">74 </span> <span class="k">const</span> <span class="n">std</span><span class="o">::</span><span class="n">string</span><span class="o">&amp;</span> <span class="n">root_path</span><span class="p">)</span> <span class="p">{</span>
<span class="lineno">75 </span> <span class="k">auto</span> <span class="n">base_path</span> <span class="o">=</span> <span class="n">root_path</span> <span class="o">+</span> <span class="s">&quot;/parquet_dataset&quot;</span><span class="p">;</span>
<span class="lineno">76 </span> <span class="n">ABORT_ON_FAILURE</span><span class="p">(</span><span class="n">filesystem</span><span class="o">-&gt;</span><span class="n">CreateDir</span><span class="p">(</span><span class="n">base_path</span><span class="p">));</span>
<span class="lineno">77 </span> <span class="c1">// Create an Arrow Table</span>
<span class="lineno">78 </span> <span class="k">auto</span> <span class="n">table</span> <span class="o">=</span> <span class="n">CreateTable</span><span class="p">();</span>
<span class="lineno">79 </span> <span class="c1">// Write it into two Parquet files</span>
<span class="lineno">80 </span> <span class="k">auto</span> <span class="n">output</span> <span class="o">=</span> <span class="n">filesystem</span><span class="o">-&gt;</span><span class="n">OpenOutputStream</span><span class="p">(</span><span class="n">base_path</span> <span class="o">+</span> <span class="s">&quot;/data1.parquet&quot;</span><span class="p">).</span><span class="n">ValueOrDie</span><span class="p">();</span>
<span class="lineno">81 </span> <span class="n">ABORT_ON_FAILURE</span><span class="p">(</span><span class="n">parquet</span><span class="o">::</span><span class="n">arrow</span><span class="o">::</span><span class="n">WriteTable</span><span class="p">(</span>
<span class="lineno">82 </span> <span class="o">*</span><span class="n">table</span><span class="o">-&gt;</span><span class="n">Slice</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="mi">5</span><span class="p">),</span> <span class="n">arrow</span><span class="o">::</span><span class="n">default_memory_pool</span><span class="p">(),</span> <span class="n">output</span><span class="p">,</span> <span class="cm">/*chunk_size=*/</span><span class="mi">2048</span><span class="p">));</span>
<span class="lineno">83 </span> <span class="n">output</span> <span class="o">=</span> <span class="n">filesystem</span><span class="o">-&gt;</span><span class="n">OpenOutputStream</span><span class="p">(</span><span class="n">base_path</span> <span class="o">+</span> <span class="s">&quot;/data2.parquet&quot;</span><span class="p">).</span><span class="n">ValueOrDie</span><span class="p">();</span>
<span class="lineno">84 </span> <span class="n">ABORT_ON_FAILURE</span><span class="p">(</span><span class="n">parquet</span><span class="o">::</span><span class="n">arrow</span><span class="o">::</span><span class="n">WriteTable</span><span class="p">(</span>
<span class="lineno">85 </span> <span class="o">*</span><span class="n">table</span><span class="o">-&gt;</span><span class="n">Slice</span><span class="p">(</span><span class="mi">5</span><span class="p">),</span> <span class="n">arrow</span><span class="o">::</span><span class="n">default_memory_pool</span><span class="p">(),</span> <span class="n">output</span><span class="p">,</span> <span class="cm">/*chunk_size=*/</span><span class="mi">2048</span><span class="p">));</span>
<span class="lineno">86 </span> <span class="k">return</span> <span class="n">base_path</span><span class="p">;</span>
<span class="lineno">87 </span><span class="p">}</span>
</pre></div>
</div>
<p>(See the full example at bottom: <a class="reference internal" href="#cpp-dataset-full-example"><span class="std std-ref">A note on transactions &amp; ACID guarantees</span></a>.)</p>
<div class="section" id="dataset-discovery">
<h3>Dataset discovery<a class="headerlink" href="#dataset-discovery" title="Permalink to this headline"></a></h3>
<p>A <a class="reference internal" href="api/dataset.html#_CPPv4N5arrow7dataset7DatasetE" title="arrow::dataset::Dataset"><code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">arrow::dataset::Dataset</span></code></a> object can be created using the various
<a class="reference internal" href="api/dataset.html#_CPPv4N5arrow7dataset14DatasetFactoryE" title="arrow::dataset::DatasetFactory"><code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">arrow::dataset::DatasetFactory</span></code></a> objects. Here, we’ll use the
<a class="reference internal" href="api/dataset.html#_CPPv4N5arrow7dataset24FileSystemDatasetFactoryE" title="arrow::dataset::FileSystemDatasetFactory"><code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">arrow::dataset::FileSystemDatasetFactory</span></code></a>, which can create a dataset
given a base directory path:</p>
<div class="highlight-cpp notranslate"><div class="highlight"><pre><span></span><span class="lineno">159 </span><span class="c1">// Read the whole dataset with the given format, without partitioning.</span>
<span class="lineno">160 </span><span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o">&lt;</span><span class="n">arrow</span><span class="o">::</span><span class="n">Table</span><span class="o">&gt;</span> <span class="n">ScanWholeDataset</span><span class="p">(</span>
<span class="lineno">161 </span> <span class="k">const</span> <span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o">&lt;</span><span class="n">fs</span><span class="o">::</span><span class="n">FileSystem</span><span class="o">&gt;&amp;</span> <span class="n">filesystem</span><span class="p">,</span>
<span class="lineno">162 </span> <span class="k">const</span> <span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o">&lt;</span><span class="n">ds</span><span class="o">::</span><span class="n">FileFormat</span><span class="o">&gt;&amp;</span> <span class="n">format</span><span class="p">,</span> <span class="k">const</span> <span class="n">std</span><span class="o">::</span><span class="n">string</span><span class="o">&amp;</span> <span class="n">base_dir</span><span class="p">)</span> <span class="p">{</span>
<span class="lineno">163 </span> <span class="c1">// Create a dataset by scanning the filesystem for files</span>
<span class="lineno">164 </span><span class="hll"> <span class="n">fs</span><span class="o">::</span><span class="n">FileSelector</span> <span class="n">selector</span><span class="p">;</span>
</span><span class="lineno">165 </span><span class="hll"> <span class="n">selector</span><span class="p">.</span><span class="n">base_dir</span> <span class="o">=</span> <span class="n">base_dir</span><span class="p">;</span>
</span><span class="lineno">166 </span><span class="hll"> <span class="k">auto</span> <span class="n">factory</span> <span class="o">=</span> <span class="n">ds</span><span class="o">::</span><span class="n">FileSystemDatasetFactory</span><span class="o">::</span><span class="n">Make</span><span class="p">(</span><span class="n">filesystem</span><span class="p">,</span> <span class="n">selector</span><span class="p">,</span> <span class="n">format</span><span class="p">,</span>
</span><span class="lineno">167 </span><span class="hll"> <span class="n">ds</span><span class="o">::</span><span class="n">FileSystemFactoryOptions</span><span class="p">())</span>
</span><span class="lineno">168 </span><span class="hll"> <span class="p">.</span><span class="n">ValueOrDie</span><span class="p">();</span>
</span><span class="lineno">169 </span><span class="hll"> <span class="k">auto</span> <span class="n">dataset</span> <span class="o">=</span> <span class="n">factory</span><span class="o">-&gt;</span><span class="n">Finish</span><span class="p">().</span><span class="n">ValueOrDie</span><span class="p">();</span>
</span><span class="lineno">170 </span> <span class="c1">// Print out the fragments</span>
<span class="lineno">171 </span> <span class="k">for</span> <span class="p">(</span><span class="k">const</span> <span class="k">auto</span><span class="o">&amp;</span> <span class="nl">fragment</span> <span class="p">:</span> <span class="n">dataset</span><span class="o">-&gt;</span><span class="n">GetFragments</span><span class="p">().</span><span class="n">ValueOrDie</span><span class="p">())</span> <span class="p">{</span>
<span class="lineno">172 </span> <span class="n">std</span><span class="o">::</span><span class="n">cout</span> <span class="o">&lt;&lt;</span> <span class="s">&quot;Found fragment: &quot;</span> <span class="o">&lt;&lt;</span> <span class="p">(</span><span class="o">*</span><span class="n">fragment</span><span class="p">)</span><span class="o">-&gt;</span><span class="n">ToString</span><span class="p">()</span> <span class="o">&lt;&lt;</span> <span class="n">std</span><span class="o">::</span><span class="n">endl</span><span class="p">;</span>
<span class="lineno">173 </span> <span class="p">}</span>
<span class="lineno">174 </span> <span class="c1">// Read the entire dataset as a Table</span>
<span class="lineno">175 </span> <span class="k">auto</span> <span class="n">scan_builder</span> <span class="o">=</span> <span class="n">dataset</span><span class="o">-&gt;</span><span class="n">NewScan</span><span class="p">().</span><span class="n">ValueOrDie</span><span class="p">();</span>
<span class="lineno">176 </span> <span class="k">auto</span> <span class="n">scanner</span> <span class="o">=</span> <span class="n">scan_builder</span><span class="o">-&gt;</span><span class="n">Finish</span><span class="p">().</span><span class="n">ValueOrDie</span><span class="p">();</span>
<span class="lineno">177 </span> <span class="k">return</span> <span class="n">scanner</span><span class="o">-&gt;</span><span class="n">ToTable</span><span class="p">().</span><span class="n">ValueOrDie</span><span class="p">();</span>
<span class="lineno">178 </span><span class="p">}</span>
</pre></div>
</div>
<p>We’re also passing the filesystem to use and the file format to use for reading.
This lets us choose between (for example) reading local files or files in Amazon
S3, or between Parquet and CSV.</p>
<p>In addition to searching a base directory, we can list file paths manually.</p>
<p>Creating a <a class="reference internal" href="api/dataset.html#_CPPv4N5arrow7dataset7DatasetE" title="arrow::dataset::Dataset"><code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">arrow::dataset::Dataset</span></code></a> does not begin reading the data
itself. It only crawls the directory to find all the files (if needed), which can
be retrieved with <a class="reference internal" href="api/dataset.html#_CPPv4NK5arrow7dataset17FileSystemDataset5filesEv" title="arrow::dataset::FileSystemDataset::files"><code class="xref cpp cpp-func docutils literal notranslate"><span class="pre">arrow::dataset::FileSystemDataset::files()</span></code></a>:</p>
<div class="highlight-cpp notranslate"><div class="highlight"><pre><span></span><span class="c1">// Print out the files crawled (only for FileSystemDataset)</span>
<span class="k">for</span> <span class="p">(</span><span class="k">const</span> <span class="k">auto</span><span class="o">&amp;</span> <span class="nl">filename</span> <span class="p">:</span> <span class="n">dataset</span><span class="o">-&gt;</span><span class="n">files</span><span class="p">())</span> <span class="p">{</span>
<span class="n">std</span><span class="o">::</span><span class="n">cout</span> <span class="o">&lt;&lt;</span> <span class="n">filename</span> <span class="o">&lt;&lt;</span> <span class="n">std</span><span class="o">::</span><span class="n">endl</span><span class="p">;</span>
<span class="p">}</span>
</pre></div>
</div>
<p>…and infers the dataset’s schema (by default from the first file):</p>
<div class="highlight-cpp notranslate"><div class="highlight"><pre><span></span><span class="n">std</span><span class="o">::</span><span class="n">cout</span> <span class="o">&lt;&lt;</span> <span class="n">dataset</span><span class="o">-&gt;</span><span class="n">schema</span><span class="p">()</span><span class="o">-&gt;</span><span class="n">ToString</span><span class="p">()</span> <span class="o">&lt;&lt;</span> <span class="n">std</span><span class="o">::</span><span class="n">endl</span><span class="p">;</span>
</pre></div>
</div>
<p>Using the <a class="reference internal" href="api/dataset.html#_CPPv4N5arrow7dataset7Dataset7NewScanEv" title="arrow::dataset::Dataset::NewScan"><code class="xref cpp cpp-func docutils literal notranslate"><span class="pre">arrow::dataset::Dataset::NewScan()</span></code></a> method, we can build a
<a class="reference internal" href="api/dataset.html#_CPPv4N5arrow7dataset7ScannerE" title="arrow::dataset::Scanner"><code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">arrow::dataset::Scanner</span></code></a> and read the dataset (or a portion of it) into
a <a class="reference internal" href="api/table.html#_CPPv4N5arrow5TableE" title="arrow::Table"><code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">arrow::Table</span></code></a> with the <a class="reference internal" href="api/dataset.html#_CPPv4N5arrow7dataset7Scanner7ToTableEv" title="arrow::dataset::Scanner::ToTable"><code class="xref cpp cpp-func docutils literal notranslate"><span class="pre">arrow::dataset::Scanner::ToTable()</span></code></a>
method:</p>
<div class="highlight-cpp notranslate"><div class="highlight"><pre><span></span><span class="lineno">159 </span><span class="c1">// Read the whole dataset with the given format, without partitioning.</span>
<span class="lineno">160 </span><span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o">&lt;</span><span class="n">arrow</span><span class="o">::</span><span class="n">Table</span><span class="o">&gt;</span> <span class="n">ScanWholeDataset</span><span class="p">(</span>
<span class="lineno">161 </span> <span class="k">const</span> <span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o">&lt;</span><span class="n">fs</span><span class="o">::</span><span class="n">FileSystem</span><span class="o">&gt;&amp;</span> <span class="n">filesystem</span><span class="p">,</span>
<span class="lineno">162 </span> <span class="k">const</span> <span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o">&lt;</span><span class="n">ds</span><span class="o">::</span><span class="n">FileFormat</span><span class="o">&gt;&amp;</span> <span class="n">format</span><span class="p">,</span> <span class="k">const</span> <span class="n">std</span><span class="o">::</span><span class="n">string</span><span class="o">&amp;</span> <span class="n">base_dir</span><span class="p">)</span> <span class="p">{</span>
<span class="lineno">163 </span> <span class="c1">// Create a dataset by scanning the filesystem for files</span>
<span class="lineno">164 </span> <span class="n">fs</span><span class="o">::</span><span class="n">FileSelector</span> <span class="n">selector</span><span class="p">;</span>
<span class="lineno">165 </span> <span class="n">selector</span><span class="p">.</span><span class="n">base_dir</span> <span class="o">=</span> <span class="n">base_dir</span><span class="p">;</span>
<span class="lineno">166 </span> <span class="k">auto</span> <span class="n">factory</span> <span class="o">=</span> <span class="n">ds</span><span class="o">::</span><span class="n">FileSystemDatasetFactory</span><span class="o">::</span><span class="n">Make</span><span class="p">(</span><span class="n">filesystem</span><span class="p">,</span> <span class="n">selector</span><span class="p">,</span> <span class="n">format</span><span class="p">,</span>
<span class="lineno">167 </span> <span class="n">ds</span><span class="o">::</span><span class="n">FileSystemFactoryOptions</span><span class="p">())</span>
<span class="lineno">168 </span> <span class="p">.</span><span class="n">ValueOrDie</span><span class="p">();</span>
<span class="lineno">169 </span> <span class="k">auto</span> <span class="n">dataset</span> <span class="o">=</span> <span class="n">factory</span><span class="o">-&gt;</span><span class="n">Finish</span><span class="p">().</span><span class="n">ValueOrDie</span><span class="p">();</span>
<span class="lineno">170 </span> <span class="c1">// Print out the fragments</span>
<span class="lineno">171 </span> <span class="k">for</span> <span class="p">(</span><span class="k">const</span> <span class="k">auto</span><span class="o">&amp;</span> <span class="nl">fragment</span> <span class="p">:</span> <span class="n">dataset</span><span class="o">-&gt;</span><span class="n">GetFragments</span><span class="p">().</span><span class="n">ValueOrDie</span><span class="p">())</span> <span class="p">{</span>
<span class="lineno">172 </span> <span class="n">std</span><span class="o">::</span><span class="n">cout</span> <span class="o">&lt;&lt;</span> <span class="s">&quot;Found fragment: &quot;</span> <span class="o">&lt;&lt;</span> <span class="p">(</span><span class="o">*</span><span class="n">fragment</span><span class="p">)</span><span class="o">-&gt;</span><span class="n">ToString</span><span class="p">()</span> <span class="o">&lt;&lt;</span> <span class="n">std</span><span class="o">::</span><span class="n">endl</span><span class="p">;</span>
<span class="lineno">173 </span> <span class="p">}</span>
<span class="lineno">174 </span><span class="hll"> <span class="c1">// Read the entire dataset as a Table</span>
</span><span class="lineno">175 </span><span class="hll"> <span class="k">auto</span> <span class="n">scan_builder</span> <span class="o">=</span> <span class="n">dataset</span><span class="o">-&gt;</span><span class="n">NewScan</span><span class="p">().</span><span class="n">ValueOrDie</span><span class="p">();</span>
</span><span class="lineno">176 </span><span class="hll"> <span class="k">auto</span> <span class="n">scanner</span> <span class="o">=</span> <span class="n">scan_builder</span><span class="o">-&gt;</span><span class="n">Finish</span><span class="p">().</span><span class="n">ValueOrDie</span><span class="p">();</span>
</span><span class="lineno">177 </span><span class="hll"> <span class="k">return</span> <span class="n">scanner</span><span class="o">-&gt;</span><span class="n">ToTable</span><span class="p">().</span><span class="n">ValueOrDie</span><span class="p">();</span>
</span><span class="lineno">178 </span><span class="p">}</span>
</pre></div>
</div>
<div class="admonition note">
<p class="admonition-title">Note</p>
<p>Depending on the size of your dataset, this can require a lot of
memory; see <a class="reference internal" href="#cpp-dataset-filtering-data"><span class="std std-ref">Filtering data</span></a> below on
filtering/projecting.</p>
</div>
</div>
<div class="section" id="reading-different-file-formats">
<h3>Reading different file formats<a class="headerlink" href="#reading-different-file-formats" title="Permalink to this headline"></a></h3>
<p>The above examples use Parquet files on local disk, but the Dataset API
provides a consistent interface across multiple file formats and filesystems.
(See <a class="reference internal" href="#cpp-dataset-cloud-storage"><span class="std std-ref">Reading from cloud storage</span></a> for more information on the latter.)
Currently, Parquet, ORC, Feather / Arrow IPC, and CSV file formats are
supported; more formats are planned in the future.</p>
<p>If we save the table as Feather files instead of Parquet files:</p>
<div class="highlight-cpp notranslate"><div class="highlight"><pre><span></span><span class="lineno"> 91 </span><span class="c1">// Set up a dataset by writing two Feather files.</span>
<span class="lineno"> 92 </span><span class="n">std</span><span class="o">::</span><span class="n">string</span> <span class="n">CreateExampleFeatherDataset</span><span class="p">(</span><span class="k">const</span> <span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o">&lt;</span><span class="n">fs</span><span class="o">::</span><span class="n">FileSystem</span><span class="o">&gt;&amp;</span> <span class="n">filesystem</span><span class="p">,</span>
<span class="lineno"> 93 </span> <span class="k">const</span> <span class="n">std</span><span class="o">::</span><span class="n">string</span><span class="o">&amp;</span> <span class="n">root_path</span><span class="p">)</span> <span class="p">{</span>
<span class="lineno"> 94 </span> <span class="k">auto</span> <span class="n">base_path</span> <span class="o">=</span> <span class="n">root_path</span> <span class="o">+</span> <span class="s">&quot;/feather_dataset&quot;</span><span class="p">;</span>
<span class="lineno"> 95 </span> <span class="n">ABORT_ON_FAILURE</span><span class="p">(</span><span class="n">filesystem</span><span class="o">-&gt;</span><span class="n">CreateDir</span><span class="p">(</span><span class="n">base_path</span><span class="p">));</span>
<span class="lineno"> 96 </span> <span class="c1">// Create an Arrow Table</span>
<span class="lineno"> 97 </span> <span class="k">auto</span> <span class="n">table</span> <span class="o">=</span> <span class="n">CreateTable</span><span class="p">();</span>
<span class="lineno"> 98 </span> <span class="c1">// Write it into two Feather files</span>
<span class="lineno"> 99 </span> <span class="k">auto</span> <span class="n">output</span> <span class="o">=</span> <span class="n">filesystem</span><span class="o">-&gt;</span><span class="n">OpenOutputStream</span><span class="p">(</span><span class="n">base_path</span> <span class="o">+</span> <span class="s">&quot;/data1.feather&quot;</span><span class="p">).</span><span class="n">ValueOrDie</span><span class="p">();</span>
<span class="lineno">100 </span> <span class="k">auto</span> <span class="n">writer</span> <span class="o">=</span> <span class="n">arrow</span><span class="o">::</span><span class="n">ipc</span><span class="o">::</span><span class="n">MakeFileWriter</span><span class="p">(</span><span class="n">output</span><span class="p">.</span><span class="n">get</span><span class="p">(),</span> <span class="n">table</span><span class="o">-&gt;</span><span class="n">schema</span><span class="p">()).</span><span class="n">ValueOrDie</span><span class="p">();</span>
<span class="lineno">101 </span> <span class="n">ABORT_ON_FAILURE</span><span class="p">(</span><span class="n">writer</span><span class="o">-&gt;</span><span class="n">WriteTable</span><span class="p">(</span><span class="o">*</span><span class="n">table</span><span class="o">-&gt;</span><span class="n">Slice</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="mi">5</span><span class="p">)));</span>
<span class="lineno">102 </span> <span class="n">ABORT_ON_FAILURE</span><span class="p">(</span><span class="n">writer</span><span class="o">-&gt;</span><span class="n">Close</span><span class="p">());</span>
<span class="lineno">103 </span> <span class="n">output</span> <span class="o">=</span> <span class="n">filesystem</span><span class="o">-&gt;</span><span class="n">OpenOutputStream</span><span class="p">(</span><span class="n">base_path</span> <span class="o">+</span> <span class="s">&quot;/data2.feather&quot;</span><span class="p">).</span><span class="n">ValueOrDie</span><span class="p">();</span>
<span class="lineno">104 </span> <span class="n">writer</span> <span class="o">=</span> <span class="n">arrow</span><span class="o">::</span><span class="n">ipc</span><span class="o">::</span><span class="n">MakeFileWriter</span><span class="p">(</span><span class="n">output</span><span class="p">.</span><span class="n">get</span><span class="p">(),</span> <span class="n">table</span><span class="o">-&gt;</span><span class="n">schema</span><span class="p">()).</span><span class="n">ValueOrDie</span><span class="p">();</span>
<span class="lineno">105 </span> <span class="n">ABORT_ON_FAILURE</span><span class="p">(</span><span class="n">writer</span><span class="o">-&gt;</span><span class="n">WriteTable</span><span class="p">(</span><span class="o">*</span><span class="n">table</span><span class="o">-&gt;</span><span class="n">Slice</span><span class="p">(</span><span class="mi">5</span><span class="p">)));</span>
<span class="lineno">106 </span> <span class="n">ABORT_ON_FAILURE</span><span class="p">(</span><span class="n">writer</span><span class="o">-&gt;</span><span class="n">Close</span><span class="p">());</span>
<span class="lineno">107 </span> <span class="k">return</span> <span class="n">base_path</span><span class="p">;</span>
<span class="lineno">108 </span><span class="p">}</span>
</pre></div>
</div>
<p>…then we can read the Feather file by passing an <a class="reference internal" href="api/dataset.html#_CPPv4N5arrow7dataset13IpcFileFormatE" title="arrow::dataset::IpcFileFormat"><code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">arrow::dataset::IpcFileFormat</span></code></a>:</p>
<div class="highlight-cpp notranslate"><div class="highlight"><pre><span></span><span class="k">auto</span> <span class="n">format</span> <span class="o">=</span> <span class="n">std</span><span class="o">::</span><span class="n">make_shared</span><span class="o">&lt;</span><span class="n">ds</span><span class="o">::</span><span class="n">ParquetFileFormat</span><span class="o">&gt;</span><span class="p">();</span>
<span class="c1">// ...</span>
<span class="k">auto</span> <span class="n">factory</span> <span class="o">=</span> <span class="n">ds</span><span class="o">::</span><span class="n">FileSystemDatasetFactory</span><span class="o">::</span><span class="n">Make</span><span class="p">(</span><span class="n">filesystem</span><span class="p">,</span> <span class="n">selector</span><span class="p">,</span> <span class="n">format</span><span class="p">,</span> <span class="n">options</span><span class="p">)</span>
<span class="p">.</span><span class="n">ValueOrDie</span><span class="p">();</span>
</pre></div>
</div>
</div>
<div class="section" id="customizing-file-formats">
<h3>Customizing file formats<a class="headerlink" href="#customizing-file-formats" title="Permalink to this headline"></a></h3>
<p><a class="reference internal" href="api/dataset.html#_CPPv4N5arrow7dataset10FileFormatE" title="arrow::dataset::FileFormat"><code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">arrow::dataset::FileFormat</span></code></a> objects have properties that control how
files are read. For example:</p>
<div class="highlight-cpp notranslate"><div class="highlight"><pre><span></span><span class="k">auto</span> <span class="n">format</span> <span class="o">=</span> <span class="n">std</span><span class="o">::</span><span class="n">make_shared</span><span class="o">&lt;</span><span class="n">ds</span><span class="o">::</span><span class="n">ParquetFileFormat</span><span class="o">&gt;</span><span class="p">();</span>
<span class="n">format</span><span class="o">-&gt;</span><span class="n">reader_options</span><span class="p">.</span><span class="n">dict_columns</span><span class="p">.</span><span class="n">insert</span><span class="p">(</span><span class="s">&quot;a&quot;</span><span class="p">);</span>
</pre></div>
</div>
<p>Will configure column <code class="docutils literal notranslate"><span class="pre">&quot;a&quot;</span></code> to be dictionary-encoded when read. Similarly,
setting <a class="reference internal" href="api/dataset.html#_CPPv4N5arrow7dataset13CsvFileFormat13parse_optionsE" title="arrow::dataset::CsvFileFormat::parse_options"><code class="xref cpp cpp-member docutils literal notranslate"><span class="pre">arrow::dataset::CsvFileFormat::parse_options</span></code></a> lets us change
things like reading comma-separated or tab-separated data.</p>
<p>Additionally, passing an <a class="reference internal" href="api/dataset.html#_CPPv4N5arrow7dataset19FragmentScanOptionsE" title="arrow::dataset::FragmentScanOptions"><code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">arrow::dataset::FragmentScanOptions</span></code></a> to
<a class="reference internal" href="api/dataset.html#_CPPv4N5arrow7dataset14ScannerBuilder19FragmentScanOptionsENSt10shared_ptrI19FragmentScanOptionsEE" title="arrow::dataset::ScannerBuilder::FragmentScanOptions"><code class="xref cpp cpp-func docutils literal notranslate"><span class="pre">arrow::dataset::ScannerBuilder::FragmentScanOptions()</span></code></a> offers fine-grained
control over data scanning. For example, for CSV files, we can change what values
are converted into Boolean true and false at scan time.</p>
</div>
</div>
<div class="section" id="filtering-data">
<span id="cpp-dataset-filtering-data"></span><h2>Filtering data<a class="headerlink" href="#filtering-data" title="Permalink to this headline"></a></h2>
<p>So far, we’ve been reading the entire dataset, but if we need only a subset of the
data, this can waste time or memory reading data we don’t need. The
<a class="reference internal" href="api/dataset.html#_CPPv4N5arrow7dataset7ScannerE" title="arrow::dataset::Scanner"><code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">arrow::dataset::Scanner</span></code></a> offers control over what data to read.</p>
<p>In this snippet, we use <a class="reference internal" href="api/dataset.html#_CPPv4N5arrow7dataset14ScannerBuilder7ProjectENSt6vectorINSt6stringEEE" title="arrow::dataset::ScannerBuilder::Project"><code class="xref cpp cpp-func docutils literal notranslate"><span class="pre">arrow::dataset::ScannerBuilder::Project()</span></code></a> to select
which columns to read:</p>
<div class="highlight-cpp notranslate"><div class="highlight"><pre><span></span><span class="lineno">182 </span><span class="c1">// Read a dataset, but select only column &quot;b&quot; and only rows where b &lt; 4.</span>
<span class="lineno">183 </span><span class="c1">//</span>
<span class="lineno">184 </span><span class="c1">// This is useful when you only want a few columns from a dataset. Where possible,</span>
<span class="lineno">185 </span><span class="c1">// Datasets will push down the column selection such that less work is done.</span>
<span class="lineno">186 </span><span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o">&lt;</span><span class="n">arrow</span><span class="o">::</span><span class="n">Table</span><span class="o">&gt;</span> <span class="n">FilterAndSelectDataset</span><span class="p">(</span>
<span class="lineno">187 </span> <span class="k">const</span> <span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o">&lt;</span><span class="n">fs</span><span class="o">::</span><span class="n">FileSystem</span><span class="o">&gt;&amp;</span> <span class="n">filesystem</span><span class="p">,</span>
<span class="lineno">188 </span> <span class="k">const</span> <span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o">&lt;</span><span class="n">ds</span><span class="o">::</span><span class="n">FileFormat</span><span class="o">&gt;&amp;</span> <span class="n">format</span><span class="p">,</span> <span class="k">const</span> <span class="n">std</span><span class="o">::</span><span class="n">string</span><span class="o">&amp;</span> <span class="n">base_dir</span><span class="p">)</span> <span class="p">{</span>
<span class="lineno">189 </span> <span class="n">fs</span><span class="o">::</span><span class="n">FileSelector</span> <span class="n">selector</span><span class="p">;</span>
<span class="lineno">190 </span> <span class="n">selector</span><span class="p">.</span><span class="n">base_dir</span> <span class="o">=</span> <span class="n">base_dir</span><span class="p">;</span>
<span class="lineno">191 </span> <span class="k">auto</span> <span class="n">factory</span> <span class="o">=</span> <span class="n">ds</span><span class="o">::</span><span class="n">FileSystemDatasetFactory</span><span class="o">::</span><span class="n">Make</span><span class="p">(</span><span class="n">filesystem</span><span class="p">,</span> <span class="n">selector</span><span class="p">,</span> <span class="n">format</span><span class="p">,</span>
<span class="lineno">192 </span> <span class="n">ds</span><span class="o">::</span><span class="n">FileSystemFactoryOptions</span><span class="p">())</span>
<span class="lineno">193 </span> <span class="p">.</span><span class="n">ValueOrDie</span><span class="p">();</span>
<span class="lineno">194 </span> <span class="k">auto</span> <span class="n">dataset</span> <span class="o">=</span> <span class="n">factory</span><span class="o">-&gt;</span><span class="n">Finish</span><span class="p">().</span><span class="n">ValueOrDie</span><span class="p">();</span>
<span class="lineno">195 </span> <span class="c1">// Read specified columns with a row filter</span>
<span class="lineno">196 </span> <span class="k">auto</span> <span class="n">scan_builder</span> <span class="o">=</span> <span class="n">dataset</span><span class="o">-&gt;</span><span class="n">NewScan</span><span class="p">().</span><span class="n">ValueOrDie</span><span class="p">();</span>
<span class="lineno">197 </span><span class="hll"> <span class="n">ABORT_ON_FAILURE</span><span class="p">(</span><span class="n">scan_builder</span><span class="o">-&gt;</span><span class="n">Project</span><span class="p">({</span><span class="s">&quot;b&quot;</span><span class="p">}));</span>
</span><span class="lineno">198 </span> <span class="n">ABORT_ON_FAILURE</span><span class="p">(</span><span class="n">scan_builder</span><span class="o">-&gt;</span><span class="n">Filter</span><span class="p">(</span><span class="n">cp</span><span class="o">::</span><span class="n">less</span><span class="p">(</span><span class="n">cp</span><span class="o">::</span><span class="n">field_ref</span><span class="p">(</span><span class="s">&quot;b&quot;</span><span class="p">),</span> <span class="n">cp</span><span class="o">::</span><span class="n">literal</span><span class="p">(</span><span class="mi">4</span><span class="p">))));</span>
<span class="lineno">199 </span> <span class="k">auto</span> <span class="n">scanner</span> <span class="o">=</span> <span class="n">scan_builder</span><span class="o">-&gt;</span><span class="n">Finish</span><span class="p">().</span><span class="n">ValueOrDie</span><span class="p">();</span>
<span class="lineno">200 </span> <span class="k">return</span> <span class="n">scanner</span><span class="o">-&gt;</span><span class="n">ToTable</span><span class="p">().</span><span class="n">ValueOrDie</span><span class="p">();</span>
<span class="lineno">201 </span><span class="p">}</span>
</pre></div>
</div>
<p>Some formats, such as Parquet, can reduce I/O costs here by reading only the
specified columns from the filesystem.</p>
<p>A filter can be provided with <a class="reference internal" href="api/dataset.html#_CPPv4N5arrow7dataset14ScannerBuilder6FilterERKN7compute10ExpressionE" title="arrow::dataset::ScannerBuilder::Filter"><code class="xref cpp cpp-func docutils literal notranslate"><span class="pre">arrow::dataset::ScannerBuilder::Filter()</span></code></a>, so
that rows which do not match the filter predicate will not be included in the
returned table. Again, some formats, such as Parquet, can use this filter to
reduce the amount of I/O needed.</p>
<div class="highlight-cpp notranslate"><div class="highlight"><pre><span></span><span class="lineno">182 </span><span class="c1">// Read a dataset, but select only column &quot;b&quot; and only rows where b &lt; 4.</span>
<span class="lineno">183 </span><span class="c1">//</span>
<span class="lineno">184 </span><span class="c1">// This is useful when you only want a few columns from a dataset. Where possible,</span>
<span class="lineno">185 </span><span class="c1">// Datasets will push down the column selection such that less work is done.</span>
<span class="lineno">186 </span><span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o">&lt;</span><span class="n">arrow</span><span class="o">::</span><span class="n">Table</span><span class="o">&gt;</span> <span class="n">FilterAndSelectDataset</span><span class="p">(</span>
<span class="lineno">187 </span> <span class="k">const</span> <span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o">&lt;</span><span class="n">fs</span><span class="o">::</span><span class="n">FileSystem</span><span class="o">&gt;&amp;</span> <span class="n">filesystem</span><span class="p">,</span>
<span class="lineno">188 </span> <span class="k">const</span> <span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o">&lt;</span><span class="n">ds</span><span class="o">::</span><span class="n">FileFormat</span><span class="o">&gt;&amp;</span> <span class="n">format</span><span class="p">,</span> <span class="k">const</span> <span class="n">std</span><span class="o">::</span><span class="n">string</span><span class="o">&amp;</span> <span class="n">base_dir</span><span class="p">)</span> <span class="p">{</span>
<span class="lineno">189 </span> <span class="n">fs</span><span class="o">::</span><span class="n">FileSelector</span> <span class="n">selector</span><span class="p">;</span>
<span class="lineno">190 </span> <span class="n">selector</span><span class="p">.</span><span class="n">base_dir</span> <span class="o">=</span> <span class="n">base_dir</span><span class="p">;</span>
<span class="lineno">191 </span> <span class="k">auto</span> <span class="n">factory</span> <span class="o">=</span> <span class="n">ds</span><span class="o">::</span><span class="n">FileSystemDatasetFactory</span><span class="o">::</span><span class="n">Make</span><span class="p">(</span><span class="n">filesystem</span><span class="p">,</span> <span class="n">selector</span><span class="p">,</span> <span class="n">format</span><span class="p">,</span>
<span class="lineno">192 </span> <span class="n">ds</span><span class="o">::</span><span class="n">FileSystemFactoryOptions</span><span class="p">())</span>
<span class="lineno">193 </span> <span class="p">.</span><span class="n">ValueOrDie</span><span class="p">();</span>
<span class="lineno">194 </span> <span class="k">auto</span> <span class="n">dataset</span> <span class="o">=</span> <span class="n">factory</span><span class="o">-&gt;</span><span class="n">Finish</span><span class="p">().</span><span class="n">ValueOrDie</span><span class="p">();</span>
<span class="lineno">195 </span> <span class="c1">// Read specified columns with a row filter</span>
<span class="lineno">196 </span> <span class="k">auto</span> <span class="n">scan_builder</span> <span class="o">=</span> <span class="n">dataset</span><span class="o">-&gt;</span><span class="n">NewScan</span><span class="p">().</span><span class="n">ValueOrDie</span><span class="p">();</span>
<span class="lineno">197 </span> <span class="n">ABORT_ON_FAILURE</span><span class="p">(</span><span class="n">scan_builder</span><span class="o">-&gt;</span><span class="n">Project</span><span class="p">({</span><span class="s">&quot;b&quot;</span><span class="p">}));</span>
<span class="lineno">198 </span><span class="hll"> <span class="n">ABORT_ON_FAILURE</span><span class="p">(</span><span class="n">scan_builder</span><span class="o">-&gt;</span><span class="n">Filter</span><span class="p">(</span><span class="n">cp</span><span class="o">::</span><span class="n">less</span><span class="p">(</span><span class="n">cp</span><span class="o">::</span><span class="n">field_ref</span><span class="p">(</span><span class="s">&quot;b&quot;</span><span class="p">),</span> <span class="n">cp</span><span class="o">::</span><span class="n">literal</span><span class="p">(</span><span class="mi">4</span><span class="p">))));</span>
</span><span class="lineno">199 </span> <span class="k">auto</span> <span class="n">scanner</span> <span class="o">=</span> <span class="n">scan_builder</span><span class="o">-&gt;</span><span class="n">Finish</span><span class="p">().</span><span class="n">ValueOrDie</span><span class="p">();</span>
<span class="lineno">200 </span> <span class="k">return</span> <span class="n">scanner</span><span class="o">-&gt;</span><span class="n">ToTable</span><span class="p">().</span><span class="n">ValueOrDie</span><span class="p">();</span>
<span class="lineno">201 </span><span class="p">}</span>
</pre></div>
</div>
</div>
<div class="section" id="projecting-columns">
<h2>Projecting columns<a class="headerlink" href="#projecting-columns" title="Permalink to this headline"></a></h2>
<p>In addition to selecting columns, <a class="reference internal" href="api/dataset.html#_CPPv4N5arrow7dataset14ScannerBuilder7ProjectENSt6vectorINSt6stringEEE" title="arrow::dataset::ScannerBuilder::Project"><code class="xref cpp cpp-func docutils literal notranslate"><span class="pre">arrow::dataset::ScannerBuilder::Project()</span></code></a>
can also be used for more complex projections, such as renaming columns, casting
them to other types, and even deriving new columns based on evaluating
expressions.</p>
<p>In this case, we pass a vector of expressions used to construct column values
and a vector of names for the columns:</p>
<div class="highlight-cpp notranslate"><div class="highlight"><pre><span></span><span class="lineno">205 </span><span class="c1">// Read a dataset, but with column projection.</span>
<span class="lineno">206 </span><span class="c1">//</span>
<span class="lineno">207 </span><span class="c1">// This is useful to derive new columns from existing data. For example, here we</span>
<span class="lineno">208 </span><span class="c1">// demonstrate casting a column to a different type, and turning a numeric column into a</span>
<span class="lineno">209 </span><span class="c1">// boolean column based on a predicate. You could also rename columns or perform</span>
<span class="lineno">210 </span><span class="c1">// computations involving multiple columns.</span>
<span class="lineno">211 </span><span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o">&lt;</span><span class="n">arrow</span><span class="o">::</span><span class="n">Table</span><span class="o">&gt;</span> <span class="n">ProjectDataset</span><span class="p">(</span>
<span class="lineno">212 </span> <span class="k">const</span> <span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o">&lt;</span><span class="n">fs</span><span class="o">::</span><span class="n">FileSystem</span><span class="o">&gt;&amp;</span> <span class="n">filesystem</span><span class="p">,</span>
<span class="lineno">213 </span> <span class="k">const</span> <span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o">&lt;</span><span class="n">ds</span><span class="o">::</span><span class="n">FileFormat</span><span class="o">&gt;&amp;</span> <span class="n">format</span><span class="p">,</span> <span class="k">const</span> <span class="n">std</span><span class="o">::</span><span class="n">string</span><span class="o">&amp;</span> <span class="n">base_dir</span><span class="p">)</span> <span class="p">{</span>
<span class="lineno">214 </span> <span class="n">fs</span><span class="o">::</span><span class="n">FileSelector</span> <span class="n">selector</span><span class="p">;</span>
<span class="lineno">215 </span> <span class="n">selector</span><span class="p">.</span><span class="n">base_dir</span> <span class="o">=</span> <span class="n">base_dir</span><span class="p">;</span>
<span class="lineno">216 </span> <span class="k">auto</span> <span class="n">factory</span> <span class="o">=</span> <span class="n">ds</span><span class="o">::</span><span class="n">FileSystemDatasetFactory</span><span class="o">::</span><span class="n">Make</span><span class="p">(</span><span class="n">filesystem</span><span class="p">,</span> <span class="n">selector</span><span class="p">,</span> <span class="n">format</span><span class="p">,</span>
<span class="lineno">217 </span> <span class="n">ds</span><span class="o">::</span><span class="n">FileSystemFactoryOptions</span><span class="p">())</span>
<span class="lineno">218 </span> <span class="p">.</span><span class="n">ValueOrDie</span><span class="p">();</span>
<span class="lineno">219 </span> <span class="k">auto</span> <span class="n">dataset</span> <span class="o">=</span> <span class="n">factory</span><span class="o">-&gt;</span><span class="n">Finish</span><span class="p">().</span><span class="n">ValueOrDie</span><span class="p">();</span>
<span class="lineno">220 </span> <span class="c1">// Read specified columns with a row filter</span>
<span class="lineno">221 </span> <span class="k">auto</span> <span class="n">scan_builder</span> <span class="o">=</span> <span class="n">dataset</span><span class="o">-&gt;</span><span class="n">NewScan</span><span class="p">().</span><span class="n">ValueOrDie</span><span class="p">();</span>
<span class="lineno">222 </span><span class="hll"> <span class="n">ABORT_ON_FAILURE</span><span class="p">(</span><span class="n">scan_builder</span><span class="o">-&gt;</span><span class="n">Project</span><span class="p">(</span>
</span><span class="lineno">223 </span><span class="hll"> <span class="p">{</span>
</span><span class="lineno">224 </span><span class="hll"> <span class="c1">// Leave column &quot;a&quot; as-is.</span>
</span><span class="lineno">225 </span><span class="hll"> <span class="n">cp</span><span class="o">::</span><span class="n">field_ref</span><span class="p">(</span><span class="s">&quot;a&quot;</span><span class="p">),</span>
</span><span class="lineno">226 </span><span class="hll"> <span class="c1">// Cast column &quot;b&quot; to float32.</span>
</span><span class="lineno">227 </span><span class="hll"> <span class="n">cp</span><span class="o">::</span><span class="n">call</span><span class="p">(</span><span class="s">&quot;cast&quot;</span><span class="p">,</span> <span class="p">{</span><span class="n">cp</span><span class="o">::</span><span class="n">field_ref</span><span class="p">(</span><span class="s">&quot;b&quot;</span><span class="p">)},</span>
</span><span class="lineno">228 </span><span class="hll"> <span class="n">arrow</span><span class="o">::</span><span class="n">compute</span><span class="o">::</span><span class="n">CastOptions</span><span class="o">::</span><span class="n">Safe</span><span class="p">(</span><span class="n">arrow</span><span class="o">::</span><span class="n">float32</span><span class="p">())),</span>
</span><span class="lineno">229 </span><span class="hll"> <span class="c1">// Derive a boolean column from &quot;c&quot;.</span>
</span><span class="lineno">230 </span><span class="hll"> <span class="n">cp</span><span class="o">::</span><span class="n">equal</span><span class="p">(</span><span class="n">cp</span><span class="o">::</span><span class="n">field_ref</span><span class="p">(</span><span class="s">&quot;c&quot;</span><span class="p">),</span> <span class="n">cp</span><span class="o">::</span><span class="n">literal</span><span class="p">(</span><span class="mi">1</span><span class="p">)),</span>
</span><span class="lineno">231 </span><span class="hll"> <span class="p">},</span>
</span><span class="lineno">232 </span><span class="hll"> <span class="p">{</span><span class="s">&quot;a_renamed&quot;</span><span class="p">,</span> <span class="s">&quot;b_as_float32&quot;</span><span class="p">,</span> <span class="s">&quot;c_1&quot;</span><span class="p">}));</span>
</span><span class="lineno">233 </span> <span class="k">auto</span> <span class="n">scanner</span> <span class="o">=</span> <span class="n">scan_builder</span><span class="o">-&gt;</span><span class="n">Finish</span><span class="p">().</span><span class="n">ValueOrDie</span><span class="p">();</span>
<span class="lineno">234 </span> <span class="k">return</span> <span class="n">scanner</span><span class="o">-&gt;</span><span class="n">ToTable</span><span class="p">().</span><span class="n">ValueOrDie</span><span class="p">();</span>
<span class="lineno">235 </span><span class="p">}</span>
</pre></div>
</div>
<p>This also determines the column selection; only the given columns will be
present in the resulting table. If you want to include a derived column in
<em>addition</em> to the existing columns, you can build up the expressions from the
dataset schema:</p>
<div class="highlight-cpp notranslate"><div class="highlight"><pre><span></span><span class="lineno">239 </span><span class="c1">// Read a dataset, but with column projection.</span>
<span class="lineno">240 </span><span class="c1">//</span>
<span class="lineno">241 </span><span class="c1">// This time, we read all original columns plus one derived column. This simply combines</span>
<span class="lineno">242 </span><span class="c1">// the previous two examples: selecting a subset of columns by name, and deriving new</span>
<span class="lineno">243 </span><span class="c1">// columns with an expression.</span>
<span class="lineno">244 </span><span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o">&lt;</span><span class="n">arrow</span><span class="o">::</span><span class="n">Table</span><span class="o">&gt;</span> <span class="n">SelectAndProjectDataset</span><span class="p">(</span>
<span class="lineno">245 </span> <span class="k">const</span> <span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o">&lt;</span><span class="n">fs</span><span class="o">::</span><span class="n">FileSystem</span><span class="o">&gt;&amp;</span> <span class="n">filesystem</span><span class="p">,</span>
<span class="lineno">246 </span> <span class="k">const</span> <span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o">&lt;</span><span class="n">ds</span><span class="o">::</span><span class="n">FileFormat</span><span class="o">&gt;&amp;</span> <span class="n">format</span><span class="p">,</span> <span class="k">const</span> <span class="n">std</span><span class="o">::</span><span class="n">string</span><span class="o">&amp;</span> <span class="n">base_dir</span><span class="p">)</span> <span class="p">{</span>
<span class="lineno">247 </span> <span class="n">fs</span><span class="o">::</span><span class="n">FileSelector</span> <span class="n">selector</span><span class="p">;</span>
<span class="lineno">248 </span> <span class="n">selector</span><span class="p">.</span><span class="n">base_dir</span> <span class="o">=</span> <span class="n">base_dir</span><span class="p">;</span>
<span class="lineno">249 </span> <span class="k">auto</span> <span class="n">factory</span> <span class="o">=</span> <span class="n">ds</span><span class="o">::</span><span class="n">FileSystemDatasetFactory</span><span class="o">::</span><span class="n">Make</span><span class="p">(</span><span class="n">filesystem</span><span class="p">,</span> <span class="n">selector</span><span class="p">,</span> <span class="n">format</span><span class="p">,</span>
<span class="lineno">250 </span> <span class="n">ds</span><span class="o">::</span><span class="n">FileSystemFactoryOptions</span><span class="p">())</span>
<span class="lineno">251 </span> <span class="p">.</span><span class="n">ValueOrDie</span><span class="p">();</span>
<span class="lineno">252 </span> <span class="k">auto</span> <span class="n">dataset</span> <span class="o">=</span> <span class="n">factory</span><span class="o">-&gt;</span><span class="n">Finish</span><span class="p">().</span><span class="n">ValueOrDie</span><span class="p">();</span>
<span class="lineno">253 </span> <span class="c1">// Read specified columns with a row filter</span>
<span class="lineno">254 </span> <span class="k">auto</span> <span class="n">scan_builder</span> <span class="o">=</span> <span class="n">dataset</span><span class="o">-&gt;</span><span class="n">NewScan</span><span class="p">().</span><span class="n">ValueOrDie</span><span class="p">();</span>
<span class="lineno">255 </span><span class="hll"> <span class="n">std</span><span class="o">::</span><span class="n">vector</span><span class="o">&lt;</span><span class="n">std</span><span class="o">::</span><span class="n">string</span><span class="o">&gt;</span> <span class="n">names</span><span class="p">;</span>
</span><span class="lineno">256 </span><span class="hll"> <span class="n">std</span><span class="o">::</span><span class="n">vector</span><span class="o">&lt;</span><span class="n">cp</span><span class="o">::</span><span class="n">Expression</span><span class="o">&gt;</span> <span class="n">exprs</span><span class="p">;</span>
</span><span class="lineno">257 </span><span class="hll"> <span class="c1">// Read all the original columns.</span>
</span><span class="lineno">258 </span><span class="hll"> <span class="k">for</span> <span class="p">(</span><span class="k">const</span> <span class="k">auto</span><span class="o">&amp;</span> <span class="nl">field</span> <span class="p">:</span> <span class="n">dataset</span><span class="o">-&gt;</span><span class="n">schema</span><span class="p">()</span><span class="o">-&gt;</span><span class="n">fields</span><span class="p">())</span> <span class="p">{</span>
</span><span class="lineno">259 </span><span class="hll"> <span class="n">names</span><span class="p">.</span><span class="n">push_back</span><span class="p">(</span><span class="n">field</span><span class="o">-&gt;</span><span class="n">name</span><span class="p">());</span>
</span><span class="lineno">260 </span><span class="hll"> <span class="n">exprs</span><span class="p">.</span><span class="n">push_back</span><span class="p">(</span><span class="n">cp</span><span class="o">::</span><span class="n">field_ref</span><span class="p">(</span><span class="n">field</span><span class="o">-&gt;</span><span class="n">name</span><span class="p">()));</span>
</span><span class="lineno">261 </span><span class="hll"> <span class="p">}</span>
</span><span class="lineno">262 </span><span class="hll"> <span class="c1">// Also derive a new column.</span>
</span><span class="lineno">263 </span><span class="hll"> <span class="n">names</span><span class="p">.</span><span class="n">emplace_back</span><span class="p">(</span><span class="s">&quot;b_large&quot;</span><span class="p">);</span>
</span><span class="lineno">264 </span><span class="hll"> <span class="n">exprs</span><span class="p">.</span><span class="n">push_back</span><span class="p">(</span><span class="n">cp</span><span class="o">::</span><span class="n">greater</span><span class="p">(</span><span class="n">cp</span><span class="o">::</span><span class="n">field_ref</span><span class="p">(</span><span class="s">&quot;b&quot;</span><span class="p">),</span> <span class="n">cp</span><span class="o">::</span><span class="n">literal</span><span class="p">(</span><span class="mi">1</span><span class="p">)));</span>
</span><span class="lineno">265 </span><span class="hll"> <span class="n">ABORT_ON_FAILURE</span><span class="p">(</span><span class="n">scan_builder</span><span class="o">-&gt;</span><span class="n">Project</span><span class="p">(</span><span class="n">exprs</span><span class="p">,</span> <span class="n">names</span><span class="p">));</span>
</span><span class="lineno">266 </span> <span class="k">auto</span> <span class="n">scanner</span> <span class="o">=</span> <span class="n">scan_builder</span><span class="o">-&gt;</span><span class="n">Finish</span><span class="p">().</span><span class="n">ValueOrDie</span><span class="p">();</span>
<span class="lineno">267 </span> <span class="k">return</span> <span class="n">scanner</span><span class="o">-&gt;</span><span class="n">ToTable</span><span class="p">().</span><span class="n">ValueOrDie</span><span class="p">();</span>
<span class="lineno">268 </span><span class="p">}</span>
</pre></div>
</div>
<div class="admonition note">
<p class="admonition-title">Note</p>
<p>When combining filters and projections, Arrow will determine all
necessary columns to read. For instance, if you filter on a column that
isn’t ultimately selected, Arrow will still read the column to evaluate
the filter.</p>
</div>
</div>
<div class="section" id="reading-and-writing-partitioned-data">
<h2>Reading and writing partitioned data<a class="headerlink" href="#reading-and-writing-partitioned-data" title="Permalink to this headline"></a></h2>
<p>So far, we’ve been working with datasets consisting of flat directories with
files. Oftentimes, a dataset will have one or more columns that are frequently
filtered on. Instead of having to read and then filter the data, by organizing the
files into a nested directory structure, we can define a partitioned dataset,
where sub-directory names hold information about which subset of the data is
stored in that directory. Then, we can more efficiently filter data by using that
information to avoid loading files that don’t match the filter.</p>
<p>For example, a dataset partitioned by year and month may have the following layout:</p>
<div class="highlight-text notranslate"><div class="highlight"><pre><span></span>dataset_name/
year=2007/
month=01/
data0.parquet
data1.parquet
...
month=02/
data0.parquet
data1.parquet
...
month=03/
...
year=2008/
month=01/
...
...
</pre></div>
</div>
<p>The above partitioning scheme is using “/key=value/” directory names, as found in
Apache Hive. Under this convention, the file at
<code class="docutils literal notranslate"><span class="pre">dataset_name/year=2007/month=01/data0.parquet</span></code> contains only data for which
<code class="docutils literal notranslate"><span class="pre">year</span> <span class="pre">==</span> <span class="pre">2007</span></code> and <code class="docutils literal notranslate"><span class="pre">month</span> <span class="pre">==</span> <span class="pre">01</span></code>.</p>
<p>Let’s create a small partitioned dataset. For this, we’ll use Arrow’s dataset
writing functionality.</p>
<div class="highlight-cpp notranslate"><div class="highlight"><pre><span></span><span class="lineno">112 </span><span class="c1">// Set up a dataset by writing files with partitioning</span>
<span class="lineno">113 </span><span class="n">std</span><span class="o">::</span><span class="n">string</span> <span class="n">CreateExampleParquetHivePartitionedDataset</span><span class="p">(</span>
<span class="lineno">114 </span> <span class="k">const</span> <span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o">&lt;</span><span class="n">fs</span><span class="o">::</span><span class="n">FileSystem</span><span class="o">&gt;&amp;</span> <span class="n">filesystem</span><span class="p">,</span> <span class="k">const</span> <span class="n">std</span><span class="o">::</span><span class="n">string</span><span class="o">&amp;</span> <span class="n">root_path</span><span class="p">)</span> <span class="p">{</span>
<span class="lineno">115 </span> <span class="k">auto</span> <span class="n">base_path</span> <span class="o">=</span> <span class="n">root_path</span> <span class="o">+</span> <span class="s">&quot;/parquet_dataset&quot;</span><span class="p">;</span>
<span class="lineno">116 </span> <span class="n">ABORT_ON_FAILURE</span><span class="p">(</span><span class="n">filesystem</span><span class="o">-&gt;</span><span class="n">CreateDir</span><span class="p">(</span><span class="n">base_path</span><span class="p">));</span>
<span class="lineno">117 </span> <span class="c1">// Create an Arrow Table</span>
<span class="lineno">118 </span> <span class="k">auto</span> <span class="n">schema</span> <span class="o">=</span> <span class="n">arrow</span><span class="o">::</span><span class="n">schema</span><span class="p">(</span>
<span class="lineno">119 </span> <span class="p">{</span><span class="n">arrow</span><span class="o">::</span><span class="n">field</span><span class="p">(</span><span class="s">&quot;a&quot;</span><span class="p">,</span> <span class="n">arrow</span><span class="o">::</span><span class="n">int64</span><span class="p">()),</span> <span class="n">arrow</span><span class="o">::</span><span class="n">field</span><span class="p">(</span><span class="s">&quot;b&quot;</span><span class="p">,</span> <span class="n">arrow</span><span class="o">::</span><span class="n">int64</span><span class="p">()),</span>
<span class="lineno">120 </span> <span class="n">arrow</span><span class="o">::</span><span class="n">field</span><span class="p">(</span><span class="s">&quot;c&quot;</span><span class="p">,</span> <span class="n">arrow</span><span class="o">::</span><span class="n">int64</span><span class="p">()),</span> <span class="n">arrow</span><span class="o">::</span><span class="n">field</span><span class="p">(</span><span class="s">&quot;part&quot;</span><span class="p">,</span> <span class="n">arrow</span><span class="o">::</span><span class="n">utf8</span><span class="p">())});</span>
<span class="lineno">121 </span> <span class="n">std</span><span class="o">::</span><span class="n">vector</span><span class="o">&lt;</span><span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o">&lt;</span><span class="n">arrow</span><span class="o">::</span><span class="n">Array</span><span class="o">&gt;&gt;</span> <span class="n">arrays</span><span class="p">(</span><span class="mi">4</span><span class="p">);</span>
<span class="lineno">122 </span> <span class="n">arrow</span><span class="o">::</span><span class="n">NumericBuilder</span><span class="o">&lt;</span><span class="n">arrow</span><span class="o">::</span><span class="n">Int64Type</span><span class="o">&gt;</span> <span class="n">builder</span><span class="p">;</span>
<span class="lineno">123 </span> <span class="n">ABORT_ON_FAILURE</span><span class="p">(</span><span class="n">builder</span><span class="p">.</span><span class="n">AppendValues</span><span class="p">({</span><span class="mi">0</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">3</span><span class="p">,</span> <span class="mi">4</span><span class="p">,</span> <span class="mi">5</span><span class="p">,</span> <span class="mi">6</span><span class="p">,</span> <span class="mi">7</span><span class="p">,</span> <span class="mi">8</span><span class="p">,</span> <span class="mi">9</span><span class="p">}));</span>
<span class="lineno">124 </span> <span class="n">ABORT_ON_FAILURE</span><span class="p">(</span><span class="n">builder</span><span class="p">.</span><span class="n">Finish</span><span class="p">(</span><span class="o">&amp;</span><span class="n">arrays</span><span class="p">[</span><span class="mi">0</span><span class="p">]));</span>
<span class="lineno">125 </span> <span class="n">builder</span><span class="p">.</span><span class="n">Reset</span><span class="p">();</span>
<span class="lineno">126 </span> <span class="n">ABORT_ON_FAILURE</span><span class="p">(</span><span class="n">builder</span><span class="p">.</span><span class="n">AppendValues</span><span class="p">({</span><span class="mi">9</span><span class="p">,</span> <span class="mi">8</span><span class="p">,</span> <span class="mi">7</span><span class="p">,</span> <span class="mi">6</span><span class="p">,</span> <span class="mi">5</span><span class="p">,</span> <span class="mi">4</span><span class="p">,</span> <span class="mi">3</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">0</span><span class="p">}));</span>
<span class="lineno">127 </span> <span class="n">ABORT_ON_FAILURE</span><span class="p">(</span><span class="n">builder</span><span class="p">.</span><span class="n">Finish</span><span class="p">(</span><span class="o">&amp;</span><span class="n">arrays</span><span class="p">[</span><span class="mi">1</span><span class="p">]));</span>
<span class="lineno">128 </span> <span class="n">builder</span><span class="p">.</span><span class="n">Reset</span><span class="p">();</span>
<span class="lineno">129 </span> <span class="n">ABORT_ON_FAILURE</span><span class="p">(</span><span class="n">builder</span><span class="p">.</span><span class="n">AppendValues</span><span class="p">({</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">}));</span>
<span class="lineno">130 </span> <span class="n">ABORT_ON_FAILURE</span><span class="p">(</span><span class="n">builder</span><span class="p">.</span><span class="n">Finish</span><span class="p">(</span><span class="o">&amp;</span><span class="n">arrays</span><span class="p">[</span><span class="mi">2</span><span class="p">]));</span>
<span class="lineno">131 </span> <span class="n">arrow</span><span class="o">::</span><span class="n">StringBuilder</span> <span class="n">string_builder</span><span class="p">;</span>
<span class="lineno">132 </span> <span class="n">ABORT_ON_FAILURE</span><span class="p">(</span>
<span class="lineno">133 </span> <span class="n">string_builder</span><span class="p">.</span><span class="n">AppendValues</span><span class="p">({</span><span class="s">&quot;a&quot;</span><span class="p">,</span> <span class="s">&quot;a&quot;</span><span class="p">,</span> <span class="s">&quot;a&quot;</span><span class="p">,</span> <span class="s">&quot;a&quot;</span><span class="p">,</span> <span class="s">&quot;a&quot;</span><span class="p">,</span> <span class="s">&quot;b&quot;</span><span class="p">,</span> <span class="s">&quot;b&quot;</span><span class="p">,</span> <span class="s">&quot;b&quot;</span><span class="p">,</span> <span class="s">&quot;b&quot;</span><span class="p">,</span> <span class="s">&quot;b&quot;</span><span class="p">}));</span>
<span class="lineno">134 </span> <span class="n">ABORT_ON_FAILURE</span><span class="p">(</span><span class="n">string_builder</span><span class="p">.</span><span class="n">Finish</span><span class="p">(</span><span class="o">&amp;</span><span class="n">arrays</span><span class="p">[</span><span class="mi">3</span><span class="p">]));</span>
<span class="lineno">135 </span> <span class="k">auto</span> <span class="n">table</span> <span class="o">=</span> <span class="n">arrow</span><span class="o">::</span><span class="n">Table</span><span class="o">::</span><span class="n">Make</span><span class="p">(</span><span class="n">schema</span><span class="p">,</span> <span class="n">arrays</span><span class="p">);</span>
<span class="lineno">136 </span><span class="hll"> <span class="c1">// Write it using Datasets</span>
</span><span class="lineno">137 </span><span class="hll"> <span class="k">auto</span> <span class="n">dataset</span> <span class="o">=</span> <span class="n">std</span><span class="o">::</span><span class="n">make_shared</span><span class="o">&lt;</span><span class="n">ds</span><span class="o">::</span><span class="n">InMemoryDataset</span><span class="o">&gt;</span><span class="p">(</span><span class="n">table</span><span class="p">);</span>
</span><span class="lineno">138 </span><span class="hll"> <span class="k">auto</span> <span class="n">scanner_builder</span> <span class="o">=</span> <span class="n">dataset</span><span class="o">-&gt;</span><span class="n">NewScan</span><span class="p">().</span><span class="n">ValueOrDie</span><span class="p">();</span>
</span><span class="lineno">139 </span><span class="hll"> <span class="k">auto</span> <span class="n">scanner</span> <span class="o">=</span> <span class="n">scanner_builder</span><span class="o">-&gt;</span><span class="n">Finish</span><span class="p">().</span><span class="n">ValueOrDie</span><span class="p">();</span>
</span><span class="lineno">140 </span><span class="hll">
</span><span class="lineno">141 </span><span class="hll"> <span class="c1">// The partition schema determines which fields are part of the partitioning.</span>
</span><span class="lineno">142 </span><span class="hll"> <span class="k">auto</span> <span class="n">partition_schema</span> <span class="o">=</span> <span class="n">arrow</span><span class="o">::</span><span class="n">schema</span><span class="p">({</span><span class="n">arrow</span><span class="o">::</span><span class="n">field</span><span class="p">(</span><span class="s">&quot;part&quot;</span><span class="p">,</span> <span class="n">arrow</span><span class="o">::</span><span class="n">utf8</span><span class="p">())});</span>
</span><span class="lineno">143 </span><span class="hll"> <span class="c1">// We&#39;ll use Hive-style partitioning, which creates directories with &quot;key=value&quot; pairs.</span>
</span><span class="lineno">144 </span><span class="hll"> <span class="k">auto</span> <span class="n">partitioning</span> <span class="o">=</span> <span class="n">std</span><span class="o">::</span><span class="n">make_shared</span><span class="o">&lt;</span><span class="n">ds</span><span class="o">::</span><span class="n">HivePartitioning</span><span class="o">&gt;</span><span class="p">(</span><span class="n">partition_schema</span><span class="p">);</span>
</span><span class="lineno">145 </span><span class="hll"> <span class="c1">// We&#39;ll write Parquet files.</span>
</span><span class="lineno">146 </span><span class="hll"> <span class="k">auto</span> <span class="n">format</span> <span class="o">=</span> <span class="n">std</span><span class="o">::</span><span class="n">make_shared</span><span class="o">&lt;</span><span class="n">ds</span><span class="o">::</span><span class="n">ParquetFileFormat</span><span class="o">&gt;</span><span class="p">();</span>
</span><span class="lineno">147 </span><span class="hll"> <span class="n">ds</span><span class="o">::</span><span class="n">FileSystemDatasetWriteOptions</span> <span class="n">write_options</span><span class="p">;</span>
</span><span class="lineno">148 </span><span class="hll"> <span class="n">write_options</span><span class="p">.</span><span class="n">file_write_options</span> <span class="o">=</span> <span class="n">format</span><span class="o">-&gt;</span><span class="n">DefaultWriteOptions</span><span class="p">();</span>
</span><span class="lineno">149 </span><span class="hll"> <span class="n">write_options</span><span class="p">.</span><span class="n">filesystem</span> <span class="o">=</span> <span class="n">filesystem</span><span class="p">;</span>
</span><span class="lineno">150 </span><span class="hll"> <span class="n">write_options</span><span class="p">.</span><span class="n">base_dir</span> <span class="o">=</span> <span class="n">base_path</span><span class="p">;</span>
</span><span class="lineno">151 </span><span class="hll"> <span class="n">write_options</span><span class="p">.</span><span class="n">partitioning</span> <span class="o">=</span> <span class="n">partitioning</span><span class="p">;</span>
</span><span class="lineno">152 </span><span class="hll"> <span class="n">write_options</span><span class="p">.</span><span class="n">basename_template</span> <span class="o">=</span> <span class="s">&quot;part{i}.parquet&quot;</span><span class="p">;</span>
</span><span class="lineno">153 </span><span class="hll"> <span class="n">ABORT_ON_FAILURE</span><span class="p">(</span><span class="n">ds</span><span class="o">::</span><span class="n">FileSystemDataset</span><span class="o">::</span><span class="n">Write</span><span class="p">(</span><span class="n">write_options</span><span class="p">,</span> <span class="n">scanner</span><span class="p">));</span>
</span><span class="lineno">154 </span> <span class="k">return</span> <span class="n">base_path</span><span class="p">;</span>
<span class="lineno">155 </span><span class="p">}</span>
</pre></div>
</div>
<p>The above created a directory with two subdirectories (“part=a” and “part=b”),
and the Parquet files written in those directories no longer include the “part”
column.</p>
<p>Reading this dataset, we now specify that the dataset should use a Hive-like
partitioning scheme:</p>
<div class="highlight-cpp notranslate"><div class="highlight"><pre><span></span><span class="lineno">272 </span><span class="c1">// Read an entire dataset, but with partitioning information.</span>
<span class="lineno">273 </span><span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o">&lt;</span><span class="n">arrow</span><span class="o">::</span><span class="n">Table</span><span class="o">&gt;</span> <span class="n">ScanPartitionedDataset</span><span class="p">(</span>
<span class="lineno">274 </span> <span class="k">const</span> <span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o">&lt;</span><span class="n">fs</span><span class="o">::</span><span class="n">FileSystem</span><span class="o">&gt;&amp;</span> <span class="n">filesystem</span><span class="p">,</span>
<span class="lineno">275 </span> <span class="k">const</span> <span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o">&lt;</span><span class="n">ds</span><span class="o">::</span><span class="n">FileFormat</span><span class="o">&gt;&amp;</span> <span class="n">format</span><span class="p">,</span> <span class="k">const</span> <span class="n">std</span><span class="o">::</span><span class="n">string</span><span class="o">&amp;</span> <span class="n">base_dir</span><span class="p">)</span> <span class="p">{</span>
<span class="lineno">276 </span> <span class="n">fs</span><span class="o">::</span><span class="n">FileSelector</span> <span class="n">selector</span><span class="p">;</span>
<span class="lineno">277 </span> <span class="n">selector</span><span class="p">.</span><span class="n">base_dir</span> <span class="o">=</span> <span class="n">base_dir</span><span class="p">;</span>
<span class="lineno">278 </span><span class="hll"> <span class="n">selector</span><span class="p">.</span><span class="n">recursive</span> <span class="o">=</span> <span class="nb">true</span><span class="p">;</span> <span class="c1">// Make sure to search subdirectories</span>
</span><span class="lineno">279 </span> <span class="n">ds</span><span class="o">::</span><span class="n">FileSystemFactoryOptions</span> <span class="n">options</span><span class="p">;</span>
<span class="lineno">280 </span><span class="hll"> <span class="c1">// We&#39;ll use Hive-style partitioning. We&#39;ll let Arrow Datasets infer the partition</span>
</span><span class="lineno">281 </span><span class="hll"> <span class="c1">// schema.</span>
</span><span class="lineno">282 </span><span class="hll"> <span class="n">options</span><span class="p">.</span><span class="n">partitioning</span> <span class="o">=</span> <span class="n">ds</span><span class="o">::</span><span class="n">HivePartitioning</span><span class="o">::</span><span class="n">MakeFactory</span><span class="p">();</span>
</span><span class="lineno">283 </span> <span class="k">auto</span> <span class="n">factory</span> <span class="o">=</span> <span class="n">ds</span><span class="o">::</span><span class="n">FileSystemDatasetFactory</span><span class="o">::</span><span class="n">Make</span><span class="p">(</span><span class="n">filesystem</span><span class="p">,</span> <span class="n">selector</span><span class="p">,</span> <span class="n">format</span><span class="p">,</span> <span class="n">options</span><span class="p">)</span>
<span class="lineno">284 </span> <span class="p">.</span><span class="n">ValueOrDie</span><span class="p">();</span>
<span class="lineno">285 </span> <span class="k">auto</span> <span class="n">dataset</span> <span class="o">=</span> <span class="n">factory</span><span class="o">-&gt;</span><span class="n">Finish</span><span class="p">().</span><span class="n">ValueOrDie</span><span class="p">();</span>
<span class="lineno">286 </span> <span class="c1">// Print out the fragments</span>
<span class="lineno">287 </span> <span class="k">for</span> <span class="p">(</span><span class="k">const</span> <span class="k">auto</span><span class="o">&amp;</span> <span class="nl">fragment</span> <span class="p">:</span> <span class="n">dataset</span><span class="o">-&gt;</span><span class="n">GetFragments</span><span class="p">().</span><span class="n">ValueOrDie</span><span class="p">())</span> <span class="p">{</span>
<span class="lineno">288 </span> <span class="n">std</span><span class="o">::</span><span class="n">cout</span> <span class="o">&lt;&lt;</span> <span class="s">&quot;Found fragment: &quot;</span> <span class="o">&lt;&lt;</span> <span class="p">(</span><span class="o">*</span><span class="n">fragment</span><span class="p">)</span><span class="o">-&gt;</span><span class="n">ToString</span><span class="p">()</span> <span class="o">&lt;&lt;</span> <span class="n">std</span><span class="o">::</span><span class="n">endl</span><span class="p">;</span>
<span class="lineno">289 </span> <span class="n">std</span><span class="o">::</span><span class="n">cout</span> <span class="o">&lt;&lt;</span> <span class="s">&quot;Partition expression: &quot;</span>
<span class="lineno">290 </span> <span class="o">&lt;&lt;</span> <span class="p">(</span><span class="o">*</span><span class="n">fragment</span><span class="p">)</span><span class="o">-&gt;</span><span class="n">partition_expression</span><span class="p">().</span><span class="n">ToString</span><span class="p">()</span> <span class="o">&lt;&lt;</span> <span class="n">std</span><span class="o">::</span><span class="n">endl</span><span class="p">;</span>
<span class="lineno">291 </span> <span class="p">}</span>
<span class="lineno">292 </span> <span class="k">auto</span> <span class="n">scan_builder</span> <span class="o">=</span> <span class="n">dataset</span><span class="o">-&gt;</span><span class="n">NewScan</span><span class="p">().</span><span class="n">ValueOrDie</span><span class="p">();</span>
<span class="lineno">293 </span> <span class="k">auto</span> <span class="n">scanner</span> <span class="o">=</span> <span class="n">scan_builder</span><span class="o">-&gt;</span><span class="n">Finish</span><span class="p">().</span><span class="n">ValueOrDie</span><span class="p">();</span>
<span class="lineno">294 </span> <span class="k">return</span> <span class="n">scanner</span><span class="o">-&gt;</span><span class="n">ToTable</span><span class="p">().</span><span class="n">ValueOrDie</span><span class="p">();</span>
<span class="lineno">295 </span><span class="p">}</span>
</pre></div>
</div>
<p>Although the partition fields are not included in the actual Parquet files,
they will be added back to the resulting table when scanning this dataset:</p>
<div class="highlight-text notranslate"><div class="highlight"><pre><span></span>$ ./debug/dataset_documentation_example file:///tmp parquet_hive partitioned
Found fragment: /tmp/parquet_dataset/part=a/part0.parquet
Partition expression: (part == &quot;a&quot;)
Found fragment: /tmp/parquet_dataset/part=b/part1.parquet
Partition expression: (part == &quot;b&quot;)
Read 20 rows
a: int64
-- field metadata --
PARQUET:field_id: &#39;1&#39;
b: double
-- field metadata --
PARQUET:field_id: &#39;2&#39;
c: int64
-- field metadata --
PARQUET:field_id: &#39;3&#39;
part: string
----
# snip...
</pre></div>
</div>
<p>We can now filter on the partition keys, which avoids loading files
altogether if they do not match the filter:</p>
<div class="highlight-cpp notranslate"><div class="highlight"><pre><span></span><span class="lineno">299 </span><span class="c1">// Read an entire dataset, but with partitioning information. Also, filter the dataset on</span>
<span class="lineno">300 </span><span class="c1">// the partition values.</span>
<span class="lineno">301 </span><span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o">&lt;</span><span class="n">arrow</span><span class="o">::</span><span class="n">Table</span><span class="o">&gt;</span> <span class="n">FilterPartitionedDataset</span><span class="p">(</span>
<span class="lineno">302 </span> <span class="k">const</span> <span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o">&lt;</span><span class="n">fs</span><span class="o">::</span><span class="n">FileSystem</span><span class="o">&gt;&amp;</span> <span class="n">filesystem</span><span class="p">,</span>
<span class="lineno">303 </span> <span class="k">const</span> <span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o">&lt;</span><span class="n">ds</span><span class="o">::</span><span class="n">FileFormat</span><span class="o">&gt;&amp;</span> <span class="n">format</span><span class="p">,</span> <span class="k">const</span> <span class="n">std</span><span class="o">::</span><span class="n">string</span><span class="o">&amp;</span> <span class="n">base_dir</span><span class="p">)</span> <span class="p">{</span>
<span class="lineno">304 </span> <span class="n">fs</span><span class="o">::</span><span class="n">FileSelector</span> <span class="n">selector</span><span class="p">;</span>
<span class="lineno">305 </span> <span class="n">selector</span><span class="p">.</span><span class="n">base_dir</span> <span class="o">=</span> <span class="n">base_dir</span><span class="p">;</span>
<span class="lineno">306 </span> <span class="n">selector</span><span class="p">.</span><span class="n">recursive</span> <span class="o">=</span> <span class="nb">true</span><span class="p">;</span>
<span class="lineno">307 </span> <span class="n">ds</span><span class="o">::</span><span class="n">FileSystemFactoryOptions</span> <span class="n">options</span><span class="p">;</span>
<span class="lineno">308 </span> <span class="n">options</span><span class="p">.</span><span class="n">partitioning</span> <span class="o">=</span> <span class="n">ds</span><span class="o">::</span><span class="n">HivePartitioning</span><span class="o">::</span><span class="n">MakeFactory</span><span class="p">();</span>
<span class="lineno">309 </span> <span class="k">auto</span> <span class="n">factory</span> <span class="o">=</span> <span class="n">ds</span><span class="o">::</span><span class="n">FileSystemDatasetFactory</span><span class="o">::</span><span class="n">Make</span><span class="p">(</span><span class="n">filesystem</span><span class="p">,</span> <span class="n">selector</span><span class="p">,</span> <span class="n">format</span><span class="p">,</span> <span class="n">options</span><span class="p">)</span>
<span class="lineno">310 </span> <span class="p">.</span><span class="n">ValueOrDie</span><span class="p">();</span>
<span class="lineno">311 </span> <span class="k">auto</span> <span class="n">dataset</span> <span class="o">=</span> <span class="n">factory</span><span class="o">-&gt;</span><span class="n">Finish</span><span class="p">().</span><span class="n">ValueOrDie</span><span class="p">();</span>
<span class="lineno">312 </span> <span class="k">auto</span> <span class="n">scan_builder</span> <span class="o">=</span> <span class="n">dataset</span><span class="o">-&gt;</span><span class="n">NewScan</span><span class="p">().</span><span class="n">ValueOrDie</span><span class="p">();</span>
<span class="lineno">313 </span><span class="hll"> <span class="c1">// Filter based on the partition values. This will mean that we won&#39;t even read the</span>
</span><span class="lineno">314 </span><span class="hll"> <span class="c1">// files whose partition expressions don&#39;t match the filter.</span>
</span><span class="lineno">315 </span><span class="hll"> <span class="n">ABORT_ON_FAILURE</span><span class="p">(</span>
</span><span class="lineno">316 </span><span class="hll"> <span class="n">scan_builder</span><span class="o">-&gt;</span><span class="n">Filter</span><span class="p">(</span><span class="n">cp</span><span class="o">::</span><span class="n">equal</span><span class="p">(</span><span class="n">cp</span><span class="o">::</span><span class="n">field_ref</span><span class="p">(</span><span class="s">&quot;part&quot;</span><span class="p">),</span> <span class="n">cp</span><span class="o">::</span><span class="n">literal</span><span class="p">(</span><span class="s">&quot;b&quot;</span><span class="p">))));</span>
</span><span class="lineno">317 </span> <span class="k">auto</span> <span class="n">scanner</span> <span class="o">=</span> <span class="n">scan_builder</span><span class="o">-&gt;</span><span class="n">Finish</span><span class="p">().</span><span class="n">ValueOrDie</span><span class="p">();</span>
<span class="lineno">318 </span> <span class="k">return</span> <span class="n">scanner</span><span class="o">-&gt;</span><span class="n">ToTable</span><span class="p">().</span><span class="n">ValueOrDie</span><span class="p">();</span>
<span class="lineno">319 </span><span class="p">}</span>
</pre></div>
</div>
<div class="section" id="different-partitioning-schemes">
<h3>Different partitioning schemes<a class="headerlink" href="#different-partitioning-schemes" title="Permalink to this headline"></a></h3>
<p>The above example uses a Hive-like directory scheme, such as “/year=2009/month=11/day=15”.
We specified this by passing the Hive partitioning factory. In this case, the types of
the partition keys are inferred from the file paths.</p>
<p>It is also possible to directly construct the partitioning and explicitly define
the schema of the partition keys. For example:</p>
<div class="highlight-cpp notranslate"><div class="highlight"><pre><span></span><span class="k">auto</span> <span class="n">part</span> <span class="o">=</span> <span class="n">std</span><span class="o">::</span><span class="n">make_shared</span><span class="o">&lt;</span><span class="n">ds</span><span class="o">::</span><span class="n">HivePartitioning</span><span class="o">&gt;</span><span class="p">(</span><span class="n">arrow</span><span class="o">::</span><span class="n">schema</span><span class="p">({</span>
<span class="n">arrow</span><span class="o">::</span><span class="n">field</span><span class="p">(</span><span class="s">&quot;year&quot;</span><span class="p">,</span> <span class="n">arrow</span><span class="o">::</span><span class="n">int16</span><span class="p">()),</span>
<span class="n">arrow</span><span class="o">::</span><span class="n">field</span><span class="p">(</span><span class="s">&quot;month&quot;</span><span class="p">,</span> <span class="n">arrow</span><span class="o">::</span><span class="n">int8</span><span class="p">()),</span>
<span class="n">arrow</span><span class="o">::</span><span class="n">field</span><span class="p">(</span><span class="s">&quot;day&quot;</span><span class="p">,</span> <span class="n">arrow</span><span class="o">::</span><span class="n">int32</span><span class="p">())</span>
<span class="p">}));</span>
</pre></div>
</div>
<p>Arrow supports another partitioning scheme, “directory partitioning”, where the
segments in the file path represent the values of the partition keys without
including the name (the field names are implicit in the segment’s index). For
example, given field names “year”, “month”, and “day”, one path might be
“/2019/11/15”.</p>
<p>Since the names are not included in the file paths, these must be specified
when constructing a directory partitioning:</p>
<div class="highlight-cpp notranslate"><div class="highlight"><pre><span></span><span class="k">auto</span> <span class="n">part</span> <span class="o">=</span> <span class="n">ds</span><span class="o">::</span><span class="n">DirectoryPartitioning</span><span class="o">::</span><span class="n">MakeFactory</span><span class="p">({</span><span class="s">&quot;year&quot;</span><span class="p">,</span> <span class="s">&quot;month&quot;</span><span class="p">,</span> <span class="s">&quot;day&quot;</span><span class="p">});</span>
</pre></div>
</div>
<p>Directory partitioning also supports providing a full schema rather than inferring
types from file paths.</p>
</div>
<div class="section" id="partitioning-performance-considerations">
<h3>Partitioning performance considerations<a class="headerlink" href="#partitioning-performance-considerations" title="Permalink to this headline"></a></h3>
<p>Partitioning datasets has two aspects that affect performance: it increases the number of
files and it creates a directory structure around the files. Both of these have benefits
as well as costs. Depending on the configuration and the size of your dataset, the costs
can outweigh the benefits.</p>
<p>Because partitions split up the dataset into multiple files, partitioned datasets can be
read and written with parallelism. However, each additional file adds a little overhead in
processing for filesystem interaction. It also increases the overall dataset size since
each file has some shared metadata. For example, each parquet file contains the schema and
group-level statistics. The number of partitions is a floor for the number of files. If
you partition a dataset by date with a year of data, you will have at least 365 files. If
you further partition by another dimension with 1,000 unique values, you will have up to
365,000 files. This fine of partitioning often leads to small files that mostly consist of
metadata.</p>
<p>Partitioned datasets create nested folder structures, and those allow us to prune which
files are loaded in a scan. However, this adds overhead to discovering files in the dataset,
as we’ll need to recursively “list directory” to find the data files. Too fine
partitions can cause problems here: Partitioning a dataset by date for a years worth
of data will require 365 list calls to find all the files; adding another column with
cardinality 1,000 will make that 365,365 calls.</p>
<p>The most optimal partitioning layout will depend on your data, access patterns, and which
systems will be reading the data. Most systems, including Arrow, should work across a
range of file sizes and partitioning layouts, but there are extremes you should avoid. These
guidelines can help avoid some known worst cases:</p>
<ul class="simple">
<li><p>Avoid files smaller than 20MB and larger than 2GB.</p></li>
<li><p>Avoid partitioning layouts with more than 10,000 distinct partitions.</p></li>
</ul>
<p>For file formats that have a notion of groups within a file, such as Parquet, similar
guidelines apply. Row groups can provide parallelism when reading and allow data skipping
based on statistics, but very small groups can cause metadata to be a significant portion
of file size. Arrow’s file writer provides sensible defaults for group sizing in most cases.</p>
</div>
</div>
<div class="section" id="reading-from-other-data-sources">
<h2>Reading from other data sources<a class="headerlink" href="#reading-from-other-data-sources" title="Permalink to this headline"></a></h2>
<div class="section" id="reading-in-memory-data">
<h3>Reading in-memory data<a class="headerlink" href="#reading-in-memory-data" title="Permalink to this headline"></a></h3>
<p>If you already have data in memory that you’d like to use with the Datasets API
(e.g. to filter/project data, or to write it out to a filesystem), you can wrap it
in an <a class="reference internal" href="api/dataset.html#_CPPv4N5arrow7dataset15InMemoryDatasetE" title="arrow::dataset::InMemoryDataset"><code class="xref cpp cpp-class docutils literal notranslate"><span class="pre">arrow::dataset::InMemoryDataset</span></code></a>:</p>
<div class="highlight-cpp notranslate"><div class="highlight"><pre><span></span><span class="k">auto</span> <span class="n">table</span> <span class="o">=</span> <span class="n">arrow</span><span class="o">::</span><span class="n">Table</span><span class="o">::</span><span class="n">FromRecordBatches</span><span class="p">(...);</span>
<span class="k">auto</span> <span class="n">dataset</span> <span class="o">=</span> <span class="n">std</span><span class="o">::</span><span class="n">make_shared</span><span class="o">&lt;</span><span class="n">arrow</span><span class="o">::</span><span class="n">dataset</span><span class="o">::</span><span class="n">InMemoryDataset</span><span class="o">&gt;</span><span class="p">(</span><span class="n">std</span><span class="o">::</span><span class="n">move</span><span class="p">(</span><span class="n">table</span><span class="p">));</span>
<span class="c1">// Scan the dataset, filter, it, etc.</span>
<span class="k">auto</span> <span class="n">scanner_builder</span> <span class="o">=</span> <span class="n">dataset</span><span class="o">-&gt;</span><span class="n">NewScan</span><span class="p">();</span>
</pre></div>
</div>
<p>In the example, we used the InMemoryDataset to write our example data to local
disk which was used in the rest of the example:</p>
<div class="highlight-cpp notranslate"><div class="highlight"><pre><span></span><span class="lineno">112 </span><span class="c1">// Set up a dataset by writing files with partitioning</span>
<span class="lineno">113 </span><span class="n">std</span><span class="o">::</span><span class="n">string</span> <span class="n">CreateExampleParquetHivePartitionedDataset</span><span class="p">(</span>
<span class="lineno">114 </span> <span class="k">const</span> <span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o">&lt;</span><span class="n">fs</span><span class="o">::</span><span class="n">FileSystem</span><span class="o">&gt;&amp;</span> <span class="n">filesystem</span><span class="p">,</span> <span class="k">const</span> <span class="n">std</span><span class="o">::</span><span class="n">string</span><span class="o">&amp;</span> <span class="n">root_path</span><span class="p">)</span> <span class="p">{</span>
<span class="lineno">115 </span> <span class="k">auto</span> <span class="n">base_path</span> <span class="o">=</span> <span class="n">root_path</span> <span class="o">+</span> <span class="s">&quot;/parquet_dataset&quot;</span><span class="p">;</span>
<span class="lineno">116 </span> <span class="n">ABORT_ON_FAILURE</span><span class="p">(</span><span class="n">filesystem</span><span class="o">-&gt;</span><span class="n">CreateDir</span><span class="p">(</span><span class="n">base_path</span><span class="p">));</span>
<span class="lineno">117 </span> <span class="c1">// Create an Arrow Table</span>
<span class="lineno">118 </span> <span class="k">auto</span> <span class="n">schema</span> <span class="o">=</span> <span class="n">arrow</span><span class="o">::</span><span class="n">schema</span><span class="p">(</span>
<span class="lineno">119 </span> <span class="p">{</span><span class="n">arrow</span><span class="o">::</span><span class="n">field</span><span class="p">(</span><span class="s">&quot;a&quot;</span><span class="p">,</span> <span class="n">arrow</span><span class="o">::</span><span class="n">int64</span><span class="p">()),</span> <span class="n">arrow</span><span class="o">::</span><span class="n">field</span><span class="p">(</span><span class="s">&quot;b&quot;</span><span class="p">,</span> <span class="n">arrow</span><span class="o">::</span><span class="n">int64</span><span class="p">()),</span>
<span class="lineno">120 </span> <span class="n">arrow</span><span class="o">::</span><span class="n">field</span><span class="p">(</span><span class="s">&quot;c&quot;</span><span class="p">,</span> <span class="n">arrow</span><span class="o">::</span><span class="n">int64</span><span class="p">()),</span> <span class="n">arrow</span><span class="o">::</span><span class="n">field</span><span class="p">(</span><span class="s">&quot;part&quot;</span><span class="p">,</span> <span class="n">arrow</span><span class="o">::</span><span class="n">utf8</span><span class="p">())});</span>
<span class="lineno">121 </span> <span class="n">std</span><span class="o">::</span><span class="n">vector</span><span class="o">&lt;</span><span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o">&lt;</span><span class="n">arrow</span><span class="o">::</span><span class="n">Array</span><span class="o">&gt;&gt;</span> <span class="n">arrays</span><span class="p">(</span><span class="mi">4</span><span class="p">);</span>
<span class="lineno">122 </span> <span class="n">arrow</span><span class="o">::</span><span class="n">NumericBuilder</span><span class="o">&lt;</span><span class="n">arrow</span><span class="o">::</span><span class="n">Int64Type</span><span class="o">&gt;</span> <span class="n">builder</span><span class="p">;</span>
<span class="lineno">123 </span> <span class="n">ABORT_ON_FAILURE</span><span class="p">(</span><span class="n">builder</span><span class="p">.</span><span class="n">AppendValues</span><span class="p">({</span><span class="mi">0</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">3</span><span class="p">,</span> <span class="mi">4</span><span class="p">,</span> <span class="mi">5</span><span class="p">,</span> <span class="mi">6</span><span class="p">,</span> <span class="mi">7</span><span class="p">,</span> <span class="mi">8</span><span class="p">,</span> <span class="mi">9</span><span class="p">}));</span>
<span class="lineno">124 </span> <span class="n">ABORT_ON_FAILURE</span><span class="p">(</span><span class="n">builder</span><span class="p">.</span><span class="n">Finish</span><span class="p">(</span><span class="o">&amp;</span><span class="n">arrays</span><span class="p">[</span><span class="mi">0</span><span class="p">]));</span>
<span class="lineno">125 </span> <span class="n">builder</span><span class="p">.</span><span class="n">Reset</span><span class="p">();</span>
<span class="lineno">126 </span> <span class="n">ABORT_ON_FAILURE</span><span class="p">(</span><span class="n">builder</span><span class="p">.</span><span class="n">AppendValues</span><span class="p">({</span><span class="mi">9</span><span class="p">,</span> <span class="mi">8</span><span class="p">,</span> <span class="mi">7</span><span class="p">,</span> <span class="mi">6</span><span class="p">,</span> <span class="mi">5</span><span class="p">,</span> <span class="mi">4</span><span class="p">,</span> <span class="mi">3</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">0</span><span class="p">}));</span>
<span class="lineno">127 </span> <span class="n">ABORT_ON_FAILURE</span><span class="p">(</span><span class="n">builder</span><span class="p">.</span><span class="n">Finish</span><span class="p">(</span><span class="o">&amp;</span><span class="n">arrays</span><span class="p">[</span><span class="mi">1</span><span class="p">]));</span>
<span class="lineno">128 </span> <span class="n">builder</span><span class="p">.</span><span class="n">Reset</span><span class="p">();</span>
<span class="lineno">129 </span> <span class="n">ABORT_ON_FAILURE</span><span class="p">(</span><span class="n">builder</span><span class="p">.</span><span class="n">AppendValues</span><span class="p">({</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">}));</span>
<span class="lineno">130 </span> <span class="n">ABORT_ON_FAILURE</span><span class="p">(</span><span class="n">builder</span><span class="p">.</span><span class="n">Finish</span><span class="p">(</span><span class="o">&amp;</span><span class="n">arrays</span><span class="p">[</span><span class="mi">2</span><span class="p">]));</span>
<span class="lineno">131 </span> <span class="n">arrow</span><span class="o">::</span><span class="n">StringBuilder</span> <span class="n">string_builder</span><span class="p">;</span>
<span class="lineno">132 </span> <span class="n">ABORT_ON_FAILURE</span><span class="p">(</span>
<span class="lineno">133 </span> <span class="n">string_builder</span><span class="p">.</span><span class="n">AppendValues</span><span class="p">({</span><span class="s">&quot;a&quot;</span><span class="p">,</span> <span class="s">&quot;a&quot;</span><span class="p">,</span> <span class="s">&quot;a&quot;</span><span class="p">,</span> <span class="s">&quot;a&quot;</span><span class="p">,</span> <span class="s">&quot;a&quot;</span><span class="p">,</span> <span class="s">&quot;b&quot;</span><span class="p">,</span> <span class="s">&quot;b&quot;</span><span class="p">,</span> <span class="s">&quot;b&quot;</span><span class="p">,</span> <span class="s">&quot;b&quot;</span><span class="p">,</span> <span class="s">&quot;b&quot;</span><span class="p">}));</span>
<span class="lineno">134 </span> <span class="n">ABORT_ON_FAILURE</span><span class="p">(</span><span class="n">string_builder</span><span class="p">.</span><span class="n">Finish</span><span class="p">(</span><span class="o">&amp;</span><span class="n">arrays</span><span class="p">[</span><span class="mi">3</span><span class="p">]));</span>
<span class="lineno">135 </span><span class="hll"> <span class="k">auto</span> <span class="n">table</span> <span class="o">=</span> <span class="n">arrow</span><span class="o">::</span><span class="n">Table</span><span class="o">::</span><span class="n">Make</span><span class="p">(</span><span class="n">schema</span><span class="p">,</span> <span class="n">arrays</span><span class="p">);</span>
</span><span class="lineno">136 </span><span class="hll"> <span class="c1">// Write it using Datasets</span>
</span><span class="lineno">137 </span><span class="hll"> <span class="k">auto</span> <span class="n">dataset</span> <span class="o">=</span> <span class="n">std</span><span class="o">::</span><span class="n">make_shared</span><span class="o">&lt;</span><span class="n">ds</span><span class="o">::</span><span class="n">InMemoryDataset</span><span class="o">&gt;</span><span class="p">(</span><span class="n">table</span><span class="p">);</span>
</span><span class="lineno">138 </span><span class="hll"> <span class="k">auto</span> <span class="n">scanner_builder</span> <span class="o">=</span> <span class="n">dataset</span><span class="o">-&gt;</span><span class="n">NewScan</span><span class="p">().</span><span class="n">ValueOrDie</span><span class="p">();</span>
</span><span class="lineno">139 </span><span class="hll"> <span class="k">auto</span> <span class="n">scanner</span> <span class="o">=</span> <span class="n">scanner_builder</span><span class="o">-&gt;</span><span class="n">Finish</span><span class="p">().</span><span class="n">ValueOrDie</span><span class="p">();</span>
</span><span class="lineno">140 </span>
<span class="lineno">141 </span> <span class="c1">// The partition schema determines which fields are part of the partitioning.</span>
<span class="lineno">142 </span> <span class="k">auto</span> <span class="n">partition_schema</span> <span class="o">=</span> <span class="n">arrow</span><span class="o">::</span><span class="n">schema</span><span class="p">({</span><span class="n">arrow</span><span class="o">::</span><span class="n">field</span><span class="p">(</span><span class="s">&quot;part&quot;</span><span class="p">,</span> <span class="n">arrow</span><span class="o">::</span><span class="n">utf8</span><span class="p">())});</span>
<span class="lineno">143 </span> <span class="c1">// We&#39;ll use Hive-style partitioning, which creates directories with &quot;key=value&quot; pairs.</span>
<span class="lineno">144 </span> <span class="k">auto</span> <span class="n">partitioning</span> <span class="o">=</span> <span class="n">std</span><span class="o">::</span><span class="n">make_shared</span><span class="o">&lt;</span><span class="n">ds</span><span class="o">::</span><span class="n">HivePartitioning</span><span class="o">&gt;</span><span class="p">(</span><span class="n">partition_schema</span><span class="p">);</span>
<span class="lineno">145 </span> <span class="c1">// We&#39;ll write Parquet files.</span>
<span class="lineno">146 </span> <span class="k">auto</span> <span class="n">format</span> <span class="o">=</span> <span class="n">std</span><span class="o">::</span><span class="n">make_shared</span><span class="o">&lt;</span><span class="n">ds</span><span class="o">::</span><span class="n">ParquetFileFormat</span><span class="o">&gt;</span><span class="p">();</span>
<span class="lineno">147 </span> <span class="n">ds</span><span class="o">::</span><span class="n">FileSystemDatasetWriteOptions</span> <span class="n">write_options</span><span class="p">;</span>
<span class="lineno">148 </span> <span class="n">write_options</span><span class="p">.</span><span class="n">file_write_options</span> <span class="o">=</span> <span class="n">format</span><span class="o">-&gt;</span><span class="n">DefaultWriteOptions</span><span class="p">();</span>
<span class="lineno">149 </span> <span class="n">write_options</span><span class="p">.</span><span class="n">filesystem</span> <span class="o">=</span> <span class="n">filesystem</span><span class="p">;</span>
<span class="lineno">150 </span> <span class="n">write_options</span><span class="p">.</span><span class="n">base_dir</span> <span class="o">=</span> <span class="n">base_path</span><span class="p">;</span>
<span class="lineno">151 </span> <span class="n">write_options</span><span class="p">.</span><span class="n">partitioning</span> <span class="o">=</span> <span class="n">partitioning</span><span class="p">;</span>
<span class="lineno">152 </span> <span class="n">write_options</span><span class="p">.</span><span class="n">basename_template</span> <span class="o">=</span> <span class="s">&quot;part{i}.parquet&quot;</span><span class="p">;</span>
<span class="lineno">153 </span> <span class="n">ABORT_ON_FAILURE</span><span class="p">(</span><span class="n">ds</span><span class="o">::</span><span class="n">FileSystemDataset</span><span class="o">::</span><span class="n">Write</span><span class="p">(</span><span class="n">write_options</span><span class="p">,</span> <span class="n">scanner</span><span class="p">));</span>
<span class="lineno">154 </span> <span class="k">return</span> <span class="n">base_path</span><span class="p">;</span>
<span class="lineno">155 </span><span class="p">}</span>
</pre></div>
</div>
</div>
<div class="section" id="reading-from-cloud-storage">
<span id="cpp-dataset-cloud-storage"></span><h3>Reading from cloud storage<a class="headerlink" href="#reading-from-cloud-storage" title="Permalink to this headline"></a></h3>
<p>In addition to local files, Arrow Datasets also support reading from cloud
storage systems, such as Amazon S3, by passing a different filesystem.</p>
<p>See the <a class="reference internal" href="io.html#cpp-filesystems"><span class="std std-ref">filesystem</span></a> docs for more details on the available
filesystems.</p>
</div>
</div>
<div class="section" id="a-note-on-transactions-acid-guarantees">
<span id="cpp-dataset-full-example"></span><h2>A note on transactions &amp; ACID guarantees<a class="headerlink" href="#a-note-on-transactions-acid-guarantees" title="Permalink to this headline"></a></h2>
<p>The dataset API offers no transaction support or any ACID guarantees. This affects
both reading and writing. Concurrent reads are fine. Concurrent writes or writes
concurring with reads may have unexpected behavior. Various approaches can be used
to avoid operating on the same files such as using a unique basename template for
each writer, a temporary directory for new files, or separate storage of the file
list instead of relying on directory discovery.</p>
<p>Unexpectedly killing the process while a write is in progress can leave the system
in an inconsistent state. Write calls generally return as soon as the bytes to be
written have been completely delivered to the OS page cache. Even though a write
operation has been completed it is possible for part of the file to be lost if
there is a sudden power loss immediately after the write call.</p>
<p>Most file formats have magic numbers which are written at the end. This means a
partial file write can safely be detected and discarded. The CSV file format does
not have any such concept and a partially written CSV file may be detected as valid.</p>
</div>
<div class="section" id="full-example">
<h2>Full Example<a class="headerlink" href="#full-example" title="Permalink to this headline"></a></h2>
<div class="highlight-cpp notranslate"><div class="highlight"><pre><span></span><span class="lineno"> 1 </span><span class="c1">// Licensed to the Apache Software Foundation (ASF) under one</span>
<span class="lineno"> 2 </span><span class="c1">// or more contributor license agreements. See the NOTICE file</span>
<span class="lineno"> 3 </span><span class="c1">// distributed with this work for additional information</span>
<span class="lineno"> 4 </span><span class="c1">// regarding copyright ownership. The ASF licenses this file</span>
<span class="lineno"> 5 </span><span class="c1">// to you under the Apache License, Version 2.0 (the</span>
<span class="lineno"> 6 </span><span class="c1">// &quot;License&quot;); you may not use this file except in compliance</span>
<span class="lineno"> 7 </span><span class="c1">// with the License. You may obtain a copy of the License at</span>
<span class="lineno"> 8 </span><span class="c1">//</span>
<span class="lineno"> 9 </span><span class="c1">// http://www.apache.org/licenses/LICENSE-2.0</span>
<span class="lineno"> 10 </span><span class="c1">//</span>
<span class="lineno"> 11 </span><span class="c1">// Unless required by applicable law or agreed to in writing,</span>
<span class="lineno"> 12 </span><span class="c1">// software distributed under the License is distributed on an</span>
<span class="lineno"> 13 </span><span class="c1">// &quot;AS IS&quot; BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY</span>
<span class="lineno"> 14 </span><span class="c1">// KIND, either express or implied. See the License for the</span>
<span class="lineno"> 15 </span><span class="c1">// specific language governing permissions and limitations</span>
<span class="lineno"> 16 </span><span class="c1">// under the License.</span>
<span class="lineno"> 17 </span>
<span class="lineno"> 18 </span><span class="c1">// This example showcases various ways to work with Datasets. It&#39;s</span>
<span class="lineno"> 19 </span><span class="c1">// intended to be paired with the documentation.</span>
<span class="lineno"> 20 </span>
<span class="lineno"> 21 </span><span class="cp">#include</span> <span class="cpf">&lt;arrow/api.h&gt;</span><span class="cp"></span>
<span class="lineno"> 22 </span><span class="cp">#include</span> <span class="cpf">&lt;arrow/compute/cast.h&gt;</span><span class="cp"></span>
<span class="lineno"> 23 </span><span class="cp">#include</span> <span class="cpf">&lt;arrow/compute/exec/expression.h&gt;</span><span class="cp"></span>
<span class="lineno"> 24 </span><span class="cp">#include</span> <span class="cpf">&lt;arrow/dataset/dataset.h&gt;</span><span class="cp"></span>
<span class="lineno"> 25 </span><span class="cp">#include</span> <span class="cpf">&lt;arrow/dataset/discovery.h&gt;</span><span class="cp"></span>
<span class="lineno"> 26 </span><span class="cp">#include</span> <span class="cpf">&lt;arrow/dataset/file_base.h&gt;</span><span class="cp"></span>
<span class="lineno"> 27 </span><span class="cp">#include</span> <span class="cpf">&lt;arrow/dataset/file_ipc.h&gt;</span><span class="cp"></span>
<span class="lineno"> 28 </span><span class="cp">#include</span> <span class="cpf">&lt;arrow/dataset/file_parquet.h&gt;</span><span class="cp"></span>
<span class="lineno"> 29 </span><span class="cp">#include</span> <span class="cpf">&lt;arrow/dataset/scanner.h&gt;</span><span class="cp"></span>
<span class="lineno"> 30 </span><span class="cp">#include</span> <span class="cpf">&lt;arrow/filesystem/filesystem.h&gt;</span><span class="cp"></span>
<span class="lineno"> 31 </span><span class="cp">#include</span> <span class="cpf">&lt;arrow/ipc/writer.h&gt;</span><span class="cp"></span>
<span class="lineno"> 32 </span><span class="cp">#include</span> <span class="cpf">&lt;arrow/util/iterator.h&gt;</span><span class="cp"></span>
<span class="lineno"> 33 </span><span class="cp">#include</span> <span class="cpf">&lt;parquet/arrow/writer.h&gt;</span><span class="cp"></span>
<span class="lineno"> 34 </span>
<span class="lineno"> 35 </span><span class="cp">#include</span> <span class="cpf">&lt;iostream&gt;</span><span class="cp"></span>
<span class="lineno"> 36 </span><span class="cp">#include</span> <span class="cpf">&lt;vector&gt;</span><span class="cp"></span>
<span class="lineno"> 37 </span>
<span class="lineno"> 38 </span><span class="k">namespace</span> <span class="n">ds</span> <span class="o">=</span> <span class="n">arrow</span><span class="o">::</span><span class="n">dataset</span><span class="p">;</span>
<span class="lineno"> 39 </span><span class="k">namespace</span> <span class="n">fs</span> <span class="o">=</span> <span class="n">arrow</span><span class="o">::</span><span class="n">fs</span><span class="p">;</span>
<span class="lineno"> 40 </span><span class="k">namespace</span> <span class="n">cp</span> <span class="o">=</span> <span class="n">arrow</span><span class="o">::</span><span class="n">compute</span><span class="p">;</span>
<span class="lineno"> 41 </span>
<span class="lineno"> 42 </span><span class="cp">#define ABORT_ON_FAILURE(expr) \</span>
<span class="lineno"> 43 </span><span class="cp"> do { \</span>
<span class="lineno"> 44 </span><span class="cp"> arrow::Status status_ = (expr); \</span>
<span class="lineno"> 45 </span><span class="cp"> if (!status_.ok()) { \</span>
<span class="lineno"> 46 </span><span class="cp"> std::cerr &lt;&lt; status_.message() &lt;&lt; std::endl; \</span>
<span class="lineno"> 47 </span><span class="cp"> abort(); \</span>
<span class="lineno"> 48 </span><span class="cp"> } \</span>
<span class="lineno"> 49 </span><span class="cp"> } while (0);</span>
<span class="lineno"> 50 </span>
<span class="lineno"> 51 </span><span class="c1">// (Doc section: Reading Datasets)</span>
<span class="lineno"> 52 </span><span class="c1">// Generate some data for the rest of this example.</span>
<span class="lineno"> 53 </span><span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o">&lt;</span><span class="n">arrow</span><span class="o">::</span><span class="n">Table</span><span class="o">&gt;</span> <span class="n">CreateTable</span><span class="p">()</span> <span class="p">{</span>
<span class="lineno"> 54 </span> <span class="k">auto</span> <span class="n">schema</span> <span class="o">=</span>
<span class="lineno"> 55 </span> <span class="n">arrow</span><span class="o">::</span><span class="n">schema</span><span class="p">({</span><span class="n">arrow</span><span class="o">::</span><span class="n">field</span><span class="p">(</span><span class="s">&quot;a&quot;</span><span class="p">,</span> <span class="n">arrow</span><span class="o">::</span><span class="n">int64</span><span class="p">()),</span> <span class="n">arrow</span><span class="o">::</span><span class="n">field</span><span class="p">(</span><span class="s">&quot;b&quot;</span><span class="p">,</span> <span class="n">arrow</span><span class="o">::</span><span class="n">int64</span><span class="p">()),</span>
<span class="lineno"> 56 </span> <span class="n">arrow</span><span class="o">::</span><span class="n">field</span><span class="p">(</span><span class="s">&quot;c&quot;</span><span class="p">,</span> <span class="n">arrow</span><span class="o">::</span><span class="n">int64</span><span class="p">())});</span>
<span class="lineno"> 57 </span> <span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o">&lt;</span><span class="n">arrow</span><span class="o">::</span><span class="n">Array</span><span class="o">&gt;</span> <span class="n">array_a</span><span class="p">;</span>
<span class="lineno"> 58 </span> <span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o">&lt;</span><span class="n">arrow</span><span class="o">::</span><span class="n">Array</span><span class="o">&gt;</span> <span class="n">array_b</span><span class="p">;</span>
<span class="lineno"> 59 </span> <span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o">&lt;</span><span class="n">arrow</span><span class="o">::</span><span class="n">Array</span><span class="o">&gt;</span> <span class="n">array_c</span><span class="p">;</span>
<span class="lineno"> 60 </span> <span class="n">arrow</span><span class="o">::</span><span class="n">NumericBuilder</span><span class="o">&lt;</span><span class="n">arrow</span><span class="o">::</span><span class="n">Int64Type</span><span class="o">&gt;</span> <span class="n">builder</span><span class="p">;</span>
<span class="lineno"> 61 </span> <span class="n">ABORT_ON_FAILURE</span><span class="p">(</span><span class="n">builder</span><span class="p">.</span><span class="n">AppendValues</span><span class="p">({</span><span class="mi">0</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">3</span><span class="p">,</span> <span class="mi">4</span><span class="p">,</span> <span class="mi">5</span><span class="p">,</span> <span class="mi">6</span><span class="p">,</span> <span class="mi">7</span><span class="p">,</span> <span class="mi">8</span><span class="p">,</span> <span class="mi">9</span><span class="p">}));</span>
<span class="lineno"> 62 </span> <span class="n">ABORT_ON_FAILURE</span><span class="p">(</span><span class="n">builder</span><span class="p">.</span><span class="n">Finish</span><span class="p">(</span><span class="o">&amp;</span><span class="n">array_a</span><span class="p">));</span>
<span class="lineno"> 63 </span> <span class="n">builder</span><span class="p">.</span><span class="n">Reset</span><span class="p">();</span>
<span class="lineno"> 64 </span> <span class="n">ABORT_ON_FAILURE</span><span class="p">(</span><span class="n">builder</span><span class="p">.</span><span class="n">AppendValues</span><span class="p">({</span><span class="mi">9</span><span class="p">,</span> <span class="mi">8</span><span class="p">,</span> <span class="mi">7</span><span class="p">,</span> <span class="mi">6</span><span class="p">,</span> <span class="mi">5</span><span class="p">,</span> <span class="mi">4</span><span class="p">,</span> <span class="mi">3</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">0</span><span class="p">}));</span>
<span class="lineno"> 65 </span> <span class="n">ABORT_ON_FAILURE</span><span class="p">(</span><span class="n">builder</span><span class="p">.</span><span class="n">Finish</span><span class="p">(</span><span class="o">&amp;</span><span class="n">array_b</span><span class="p">));</span>
<span class="lineno"> 66 </span> <span class="n">builder</span><span class="p">.</span><span class="n">Reset</span><span class="p">();</span>
<span class="lineno"> 67 </span> <span class="n">ABORT_ON_FAILURE</span><span class="p">(</span><span class="n">builder</span><span class="p">.</span><span class="n">AppendValues</span><span class="p">({</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">}));</span>
<span class="lineno"> 68 </span> <span class="n">ABORT_ON_FAILURE</span><span class="p">(</span><span class="n">builder</span><span class="p">.</span><span class="n">Finish</span><span class="p">(</span><span class="o">&amp;</span><span class="n">array_c</span><span class="p">));</span>
<span class="lineno"> 69 </span> <span class="k">return</span> <span class="n">arrow</span><span class="o">::</span><span class="n">Table</span><span class="o">::</span><span class="n">Make</span><span class="p">(</span><span class="n">schema</span><span class="p">,</span> <span class="p">{</span><span class="n">array_a</span><span class="p">,</span> <span class="n">array_b</span><span class="p">,</span> <span class="n">array_c</span><span class="p">});</span>
<span class="lineno"> 70 </span><span class="p">}</span>
<span class="lineno"> 71 </span>
<span class="lineno"> 72 </span><span class="c1">// Set up a dataset by writing two Parquet files.</span>
<span class="lineno"> 73 </span><span class="n">std</span><span class="o">::</span><span class="n">string</span> <span class="n">CreateExampleParquetDataset</span><span class="p">(</span><span class="k">const</span> <span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o">&lt;</span><span class="n">fs</span><span class="o">::</span><span class="n">FileSystem</span><span class="o">&gt;&amp;</span> <span class="n">filesystem</span><span class="p">,</span>
<span class="lineno"> 74 </span> <span class="k">const</span> <span class="n">std</span><span class="o">::</span><span class="n">string</span><span class="o">&amp;</span> <span class="n">root_path</span><span class="p">)</span> <span class="p">{</span>
<span class="lineno"> 75 </span> <span class="k">auto</span> <span class="n">base_path</span> <span class="o">=</span> <span class="n">root_path</span> <span class="o">+</span> <span class="s">&quot;/parquet_dataset&quot;</span><span class="p">;</span>
<span class="lineno"> 76 </span> <span class="n">ABORT_ON_FAILURE</span><span class="p">(</span><span class="n">filesystem</span><span class="o">-&gt;</span><span class="n">CreateDir</span><span class="p">(</span><span class="n">base_path</span><span class="p">));</span>
<span class="lineno"> 77 </span> <span class="c1">// Create an Arrow Table</span>
<span class="lineno"> 78 </span> <span class="k">auto</span> <span class="n">table</span> <span class="o">=</span> <span class="n">CreateTable</span><span class="p">();</span>
<span class="lineno"> 79 </span> <span class="c1">// Write it into two Parquet files</span>
<span class="lineno"> 80 </span> <span class="k">auto</span> <span class="n">output</span> <span class="o">=</span> <span class="n">filesystem</span><span class="o">-&gt;</span><span class="n">OpenOutputStream</span><span class="p">(</span><span class="n">base_path</span> <span class="o">+</span> <span class="s">&quot;/data1.parquet&quot;</span><span class="p">).</span><span class="n">ValueOrDie</span><span class="p">();</span>
<span class="lineno"> 81 </span> <span class="n">ABORT_ON_FAILURE</span><span class="p">(</span><span class="n">parquet</span><span class="o">::</span><span class="n">arrow</span><span class="o">::</span><span class="n">WriteTable</span><span class="p">(</span>
<span class="lineno"> 82 </span> <span class="o">*</span><span class="n">table</span><span class="o">-&gt;</span><span class="n">Slice</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="mi">5</span><span class="p">),</span> <span class="n">arrow</span><span class="o">::</span><span class="n">default_memory_pool</span><span class="p">(),</span> <span class="n">output</span><span class="p">,</span> <span class="cm">/*chunk_size=*/</span><span class="mi">2048</span><span class="p">));</span>
<span class="lineno"> 83 </span> <span class="n">output</span> <span class="o">=</span> <span class="n">filesystem</span><span class="o">-&gt;</span><span class="n">OpenOutputStream</span><span class="p">(</span><span class="n">base_path</span> <span class="o">+</span> <span class="s">&quot;/data2.parquet&quot;</span><span class="p">).</span><span class="n">ValueOrDie</span><span class="p">();</span>
<span class="lineno"> 84 </span> <span class="n">ABORT_ON_FAILURE</span><span class="p">(</span><span class="n">parquet</span><span class="o">::</span><span class="n">arrow</span><span class="o">::</span><span class="n">WriteTable</span><span class="p">(</span>
<span class="lineno"> 85 </span> <span class="o">*</span><span class="n">table</span><span class="o">-&gt;</span><span class="n">Slice</span><span class="p">(</span><span class="mi">5</span><span class="p">),</span> <span class="n">arrow</span><span class="o">::</span><span class="n">default_memory_pool</span><span class="p">(),</span> <span class="n">output</span><span class="p">,</span> <span class="cm">/*chunk_size=*/</span><span class="mi">2048</span><span class="p">));</span>
<span class="lineno"> 86 </span> <span class="k">return</span> <span class="n">base_path</span><span class="p">;</span>
<span class="lineno"> 87 </span><span class="p">}</span>
<span class="lineno"> 88 </span><span class="c1">// (Doc section: Reading Datasets)</span>
<span class="lineno"> 89 </span>
<span class="lineno"> 90 </span><span class="c1">// (Doc section: Reading different file formats)</span>
<span class="lineno"> 91 </span><span class="c1">// Set up a dataset by writing two Feather files.</span>
<span class="lineno"> 92 </span><span class="n">std</span><span class="o">::</span><span class="n">string</span> <span class="n">CreateExampleFeatherDataset</span><span class="p">(</span><span class="k">const</span> <span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o">&lt;</span><span class="n">fs</span><span class="o">::</span><span class="n">FileSystem</span><span class="o">&gt;&amp;</span> <span class="n">filesystem</span><span class="p">,</span>
<span class="lineno"> 93 </span> <span class="k">const</span> <span class="n">std</span><span class="o">::</span><span class="n">string</span><span class="o">&amp;</span> <span class="n">root_path</span><span class="p">)</span> <span class="p">{</span>
<span class="lineno"> 94 </span> <span class="k">auto</span> <span class="n">base_path</span> <span class="o">=</span> <span class="n">root_path</span> <span class="o">+</span> <span class="s">&quot;/feather_dataset&quot;</span><span class="p">;</span>
<span class="lineno"> 95 </span> <span class="n">ABORT_ON_FAILURE</span><span class="p">(</span><span class="n">filesystem</span><span class="o">-&gt;</span><span class="n">CreateDir</span><span class="p">(</span><span class="n">base_path</span><span class="p">));</span>
<span class="lineno"> 96 </span> <span class="c1">// Create an Arrow Table</span>
<span class="lineno"> 97 </span> <span class="k">auto</span> <span class="n">table</span> <span class="o">=</span> <span class="n">CreateTable</span><span class="p">();</span>
<span class="lineno"> 98 </span> <span class="c1">// Write it into two Feather files</span>
<span class="lineno"> 99 </span> <span class="k">auto</span> <span class="n">output</span> <span class="o">=</span> <span class="n">filesystem</span><span class="o">-&gt;</span><span class="n">OpenOutputStream</span><span class="p">(</span><span class="n">base_path</span> <span class="o">+</span> <span class="s">&quot;/data1.feather&quot;</span><span class="p">).</span><span class="n">ValueOrDie</span><span class="p">();</span>
<span class="lineno">100 </span> <span class="k">auto</span> <span class="n">writer</span> <span class="o">=</span> <span class="n">arrow</span><span class="o">::</span><span class="n">ipc</span><span class="o">::</span><span class="n">MakeFileWriter</span><span class="p">(</span><span class="n">output</span><span class="p">.</span><span class="n">get</span><span class="p">(),</span> <span class="n">table</span><span class="o">-&gt;</span><span class="n">schema</span><span class="p">()).</span><span class="n">ValueOrDie</span><span class="p">();</span>
<span class="lineno">101 </span> <span class="n">ABORT_ON_FAILURE</span><span class="p">(</span><span class="n">writer</span><span class="o">-&gt;</span><span class="n">WriteTable</span><span class="p">(</span><span class="o">*</span><span class="n">table</span><span class="o">-&gt;</span><span class="n">Slice</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="mi">5</span><span class="p">)));</span>
<span class="lineno">102 </span> <span class="n">ABORT_ON_FAILURE</span><span class="p">(</span><span class="n">writer</span><span class="o">-&gt;</span><span class="n">Close</span><span class="p">());</span>
<span class="lineno">103 </span> <span class="n">output</span> <span class="o">=</span> <span class="n">filesystem</span><span class="o">-&gt;</span><span class="n">OpenOutputStream</span><span class="p">(</span><span class="n">base_path</span> <span class="o">+</span> <span class="s">&quot;/data2.feather&quot;</span><span class="p">).</span><span class="n">ValueOrDie</span><span class="p">();</span>
<span class="lineno">104 </span> <span class="n">writer</span> <span class="o">=</span> <span class="n">arrow</span><span class="o">::</span><span class="n">ipc</span><span class="o">::</span><span class="n">MakeFileWriter</span><span class="p">(</span><span class="n">output</span><span class="p">.</span><span class="n">get</span><span class="p">(),</span> <span class="n">table</span><span class="o">-&gt;</span><span class="n">schema</span><span class="p">()).</span><span class="n">ValueOrDie</span><span class="p">();</span>
<span class="lineno">105 </span> <span class="n">ABORT_ON_FAILURE</span><span class="p">(</span><span class="n">writer</span><span class="o">-&gt;</span><span class="n">WriteTable</span><span class="p">(</span><span class="o">*</span><span class="n">table</span><span class="o">-&gt;</span><span class="n">Slice</span><span class="p">(</span><span class="mi">5</span><span class="p">)));</span>
<span class="lineno">106 </span> <span class="n">ABORT_ON_FAILURE</span><span class="p">(</span><span class="n">writer</span><span class="o">-&gt;</span><span class="n">Close</span><span class="p">());</span>
<span class="lineno">107 </span> <span class="k">return</span> <span class="n">base_path</span><span class="p">;</span>
<span class="lineno">108 </span><span class="p">}</span>
<span class="lineno">109 </span><span class="c1">// (Doc section: Reading different file formats)</span>
<span class="lineno">110 </span>
<span class="lineno">111 </span><span class="c1">// (Doc section: Reading and writing partitioned data)</span>
<span class="lineno">112 </span><span class="c1">// Set up a dataset by writing files with partitioning</span>
<span class="lineno">113 </span><span class="n">std</span><span class="o">::</span><span class="n">string</span> <span class="n">CreateExampleParquetHivePartitionedDataset</span><span class="p">(</span>
<span class="lineno">114 </span> <span class="k">const</span> <span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o">&lt;</span><span class="n">fs</span><span class="o">::</span><span class="n">FileSystem</span><span class="o">&gt;&amp;</span> <span class="n">filesystem</span><span class="p">,</span> <span class="k">const</span> <span class="n">std</span><span class="o">::</span><span class="n">string</span><span class="o">&amp;</span> <span class="n">root_path</span><span class="p">)</span> <span class="p">{</span>
<span class="lineno">115 </span> <span class="k">auto</span> <span class="n">base_path</span> <span class="o">=</span> <span class="n">root_path</span> <span class="o">+</span> <span class="s">&quot;/parquet_dataset&quot;</span><span class="p">;</span>
<span class="lineno">116 </span> <span class="n">ABORT_ON_FAILURE</span><span class="p">(</span><span class="n">filesystem</span><span class="o">-&gt;</span><span class="n">CreateDir</span><span class="p">(</span><span class="n">base_path</span><span class="p">));</span>
<span class="lineno">117 </span> <span class="c1">// Create an Arrow Table</span>
<span class="lineno">118 </span> <span class="k">auto</span> <span class="n">schema</span> <span class="o">=</span> <span class="n">arrow</span><span class="o">::</span><span class="n">schema</span><span class="p">(</span>
<span class="lineno">119 </span> <span class="p">{</span><span class="n">arrow</span><span class="o">::</span><span class="n">field</span><span class="p">(</span><span class="s">&quot;a&quot;</span><span class="p">,</span> <span class="n">arrow</span><span class="o">::</span><span class="n">int64</span><span class="p">()),</span> <span class="n">arrow</span><span class="o">::</span><span class="n">field</span><span class="p">(</span><span class="s">&quot;b&quot;</span><span class="p">,</span> <span class="n">arrow</span><span class="o">::</span><span class="n">int64</span><span class="p">()),</span>
<span class="lineno">120 </span> <span class="n">arrow</span><span class="o">::</span><span class="n">field</span><span class="p">(</span><span class="s">&quot;c&quot;</span><span class="p">,</span> <span class="n">arrow</span><span class="o">::</span><span class="n">int64</span><span class="p">()),</span> <span class="n">arrow</span><span class="o">::</span><span class="n">field</span><span class="p">(</span><span class="s">&quot;part&quot;</span><span class="p">,</span> <span class="n">arrow</span><span class="o">::</span><span class="n">utf8</span><span class="p">())});</span>
<span class="lineno">121 </span> <span class="n">std</span><span class="o">::</span><span class="n">vector</span><span class="o">&lt;</span><span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o">&lt;</span><span class="n">arrow</span><span class="o">::</span><span class="n">Array</span><span class="o">&gt;&gt;</span> <span class="n">arrays</span><span class="p">(</span><span class="mi">4</span><span class="p">);</span>
<span class="lineno">122 </span> <span class="n">arrow</span><span class="o">::</span><span class="n">NumericBuilder</span><span class="o">&lt;</span><span class="n">arrow</span><span class="o">::</span><span class="n">Int64Type</span><span class="o">&gt;</span> <span class="n">builder</span><span class="p">;</span>
<span class="lineno">123 </span> <span class="n">ABORT_ON_FAILURE</span><span class="p">(</span><span class="n">builder</span><span class="p">.</span><span class="n">AppendValues</span><span class="p">({</span><span class="mi">0</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">3</span><span class="p">,</span> <span class="mi">4</span><span class="p">,</span> <span class="mi">5</span><span class="p">,</span> <span class="mi">6</span><span class="p">,</span> <span class="mi">7</span><span class="p">,</span> <span class="mi">8</span><span class="p">,</span> <span class="mi">9</span><span class="p">}));</span>
<span class="lineno">124 </span> <span class="n">ABORT_ON_FAILURE</span><span class="p">(</span><span class="n">builder</span><span class="p">.</span><span class="n">Finish</span><span class="p">(</span><span class="o">&amp;</span><span class="n">arrays</span><span class="p">[</span><span class="mi">0</span><span class="p">]));</span>
<span class="lineno">125 </span> <span class="n">builder</span><span class="p">.</span><span class="n">Reset</span><span class="p">();</span>
<span class="lineno">126 </span> <span class="n">ABORT_ON_FAILURE</span><span class="p">(</span><span class="n">builder</span><span class="p">.</span><span class="n">AppendValues</span><span class="p">({</span><span class="mi">9</span><span class="p">,</span> <span class="mi">8</span><span class="p">,</span> <span class="mi">7</span><span class="p">,</span> <span class="mi">6</span><span class="p">,</span> <span class="mi">5</span><span class="p">,</span> <span class="mi">4</span><span class="p">,</span> <span class="mi">3</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">0</span><span class="p">}));</span>
<span class="lineno">127 </span> <span class="n">ABORT_ON_FAILURE</span><span class="p">(</span><span class="n">builder</span><span class="p">.</span><span class="n">Finish</span><span class="p">(</span><span class="o">&amp;</span><span class="n">arrays</span><span class="p">[</span><span class="mi">1</span><span class="p">]));</span>
<span class="lineno">128 </span> <span class="n">builder</span><span class="p">.</span><span class="n">Reset</span><span class="p">();</span>
<span class="lineno">129 </span> <span class="n">ABORT_ON_FAILURE</span><span class="p">(</span><span class="n">builder</span><span class="p">.</span><span class="n">AppendValues</span><span class="p">({</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">}));</span>
<span class="lineno">130 </span> <span class="n">ABORT_ON_FAILURE</span><span class="p">(</span><span class="n">builder</span><span class="p">.</span><span class="n">Finish</span><span class="p">(</span><span class="o">&amp;</span><span class="n">arrays</span><span class="p">[</span><span class="mi">2</span><span class="p">]));</span>
<span class="lineno">131 </span> <span class="n">arrow</span><span class="o">::</span><span class="n">StringBuilder</span> <span class="n">string_builder</span><span class="p">;</span>
<span class="lineno">132 </span> <span class="n">ABORT_ON_FAILURE</span><span class="p">(</span>
<span class="lineno">133 </span> <span class="n">string_builder</span><span class="p">.</span><span class="n">AppendValues</span><span class="p">({</span><span class="s">&quot;a&quot;</span><span class="p">,</span> <span class="s">&quot;a&quot;</span><span class="p">,</span> <span class="s">&quot;a&quot;</span><span class="p">,</span> <span class="s">&quot;a&quot;</span><span class="p">,</span> <span class="s">&quot;a&quot;</span><span class="p">,</span> <span class="s">&quot;b&quot;</span><span class="p">,</span> <span class="s">&quot;b&quot;</span><span class="p">,</span> <span class="s">&quot;b&quot;</span><span class="p">,</span> <span class="s">&quot;b&quot;</span><span class="p">,</span> <span class="s">&quot;b&quot;</span><span class="p">}));</span>
<span class="lineno">134 </span> <span class="n">ABORT_ON_FAILURE</span><span class="p">(</span><span class="n">string_builder</span><span class="p">.</span><span class="n">Finish</span><span class="p">(</span><span class="o">&amp;</span><span class="n">arrays</span><span class="p">[</span><span class="mi">3</span><span class="p">]));</span>
<span class="lineno">135 </span> <span class="k">auto</span> <span class="n">table</span> <span class="o">=</span> <span class="n">arrow</span><span class="o">::</span><span class="n">Table</span><span class="o">::</span><span class="n">Make</span><span class="p">(</span><span class="n">schema</span><span class="p">,</span> <span class="n">arrays</span><span class="p">);</span>
<span class="lineno">136 </span> <span class="c1">// Write it using Datasets</span>
<span class="lineno">137 </span> <span class="k">auto</span> <span class="n">dataset</span> <span class="o">=</span> <span class="n">std</span><span class="o">::</span><span class="n">make_shared</span><span class="o">&lt;</span><span class="n">ds</span><span class="o">::</span><span class="n">InMemoryDataset</span><span class="o">&gt;</span><span class="p">(</span><span class="n">table</span><span class="p">);</span>
<span class="lineno">138 </span> <span class="k">auto</span> <span class="n">scanner_builder</span> <span class="o">=</span> <span class="n">dataset</span><span class="o">-&gt;</span><span class="n">NewScan</span><span class="p">().</span><span class="n">ValueOrDie</span><span class="p">();</span>
<span class="lineno">139 </span> <span class="k">auto</span> <span class="n">scanner</span> <span class="o">=</span> <span class="n">scanner_builder</span><span class="o">-&gt;</span><span class="n">Finish</span><span class="p">().</span><span class="n">ValueOrDie</span><span class="p">();</span>
<span class="lineno">140 </span>
<span class="lineno">141 </span> <span class="c1">// The partition schema determines which fields are part of the partitioning.</span>
<span class="lineno">142 </span> <span class="k">auto</span> <span class="n">partition_schema</span> <span class="o">=</span> <span class="n">arrow</span><span class="o">::</span><span class="n">schema</span><span class="p">({</span><span class="n">arrow</span><span class="o">::</span><span class="n">field</span><span class="p">(</span><span class="s">&quot;part&quot;</span><span class="p">,</span> <span class="n">arrow</span><span class="o">::</span><span class="n">utf8</span><span class="p">())});</span>
<span class="lineno">143 </span> <span class="c1">// We&#39;ll use Hive-style partitioning, which creates directories with &quot;key=value&quot; pairs.</span>
<span class="lineno">144 </span> <span class="k">auto</span> <span class="n">partitioning</span> <span class="o">=</span> <span class="n">std</span><span class="o">::</span><span class="n">make_shared</span><span class="o">&lt;</span><span class="n">ds</span><span class="o">::</span><span class="n">HivePartitioning</span><span class="o">&gt;</span><span class="p">(</span><span class="n">partition_schema</span><span class="p">);</span>
<span class="lineno">145 </span> <span class="c1">// We&#39;ll write Parquet files.</span>
<span class="lineno">146 </span> <span class="k">auto</span> <span class="n">format</span> <span class="o">=</span> <span class="n">std</span><span class="o">::</span><span class="n">make_shared</span><span class="o">&lt;</span><span class="n">ds</span><span class="o">::</span><span class="n">ParquetFileFormat</span><span class="o">&gt;</span><span class="p">();</span>
<span class="lineno">147 </span> <span class="n">ds</span><span class="o">::</span><span class="n">FileSystemDatasetWriteOptions</span> <span class="n">write_options</span><span class="p">;</span>
<span class="lineno">148 </span> <span class="n">write_options</span><span class="p">.</span><span class="n">file_write_options</span> <span class="o">=</span> <span class="n">format</span><span class="o">-&gt;</span><span class="n">DefaultWriteOptions</span><span class="p">();</span>
<span class="lineno">149 </span> <span class="n">write_options</span><span class="p">.</span><span class="n">filesystem</span> <span class="o">=</span> <span class="n">filesystem</span><span class="p">;</span>
<span class="lineno">150 </span> <span class="n">write_options</span><span class="p">.</span><span class="n">base_dir</span> <span class="o">=</span> <span class="n">base_path</span><span class="p">;</span>
<span class="lineno">151 </span> <span class="n">write_options</span><span class="p">.</span><span class="n">partitioning</span> <span class="o">=</span> <span class="n">partitioning</span><span class="p">;</span>
<span class="lineno">152 </span> <span class="n">write_options</span><span class="p">.</span><span class="n">basename_template</span> <span class="o">=</span> <span class="s">&quot;part{i}.parquet&quot;</span><span class="p">;</span>
<span class="lineno">153 </span> <span class="n">ABORT_ON_FAILURE</span><span class="p">(</span><span class="n">ds</span><span class="o">::</span><span class="n">FileSystemDataset</span><span class="o">::</span><span class="n">Write</span><span class="p">(</span><span class="n">write_options</span><span class="p">,</span> <span class="n">scanner</span><span class="p">));</span>
<span class="lineno">154 </span> <span class="k">return</span> <span class="n">base_path</span><span class="p">;</span>
<span class="lineno">155 </span><span class="p">}</span>
<span class="lineno">156 </span><span class="c1">// (Doc section: Reading and writing partitioned data)</span>
<span class="lineno">157 </span>
<span class="lineno">158 </span><span class="c1">// (Doc section: Dataset discovery)</span>
<span class="lineno">159 </span><span class="c1">// Read the whole dataset with the given format, without partitioning.</span>
<span class="lineno">160 </span><span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o">&lt;</span><span class="n">arrow</span><span class="o">::</span><span class="n">Table</span><span class="o">&gt;</span> <span class="n">ScanWholeDataset</span><span class="p">(</span>
<span class="lineno">161 </span> <span class="k">const</span> <span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o">&lt;</span><span class="n">fs</span><span class="o">::</span><span class="n">FileSystem</span><span class="o">&gt;&amp;</span> <span class="n">filesystem</span><span class="p">,</span>
<span class="lineno">162 </span> <span class="k">const</span> <span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o">&lt;</span><span class="n">ds</span><span class="o">::</span><span class="n">FileFormat</span><span class="o">&gt;&amp;</span> <span class="n">format</span><span class="p">,</span> <span class="k">const</span> <span class="n">std</span><span class="o">::</span><span class="n">string</span><span class="o">&amp;</span> <span class="n">base_dir</span><span class="p">)</span> <span class="p">{</span>
<span class="lineno">163 </span> <span class="c1">// Create a dataset by scanning the filesystem for files</span>
<span class="lineno">164 </span> <span class="n">fs</span><span class="o">::</span><span class="n">FileSelector</span> <span class="n">selector</span><span class="p">;</span>
<span class="lineno">165 </span> <span class="n">selector</span><span class="p">.</span><span class="n">base_dir</span> <span class="o">=</span> <span class="n">base_dir</span><span class="p">;</span>
<span class="lineno">166 </span> <span class="k">auto</span> <span class="n">factory</span> <span class="o">=</span> <span class="n">ds</span><span class="o">::</span><span class="n">FileSystemDatasetFactory</span><span class="o">::</span><span class="n">Make</span><span class="p">(</span><span class="n">filesystem</span><span class="p">,</span> <span class="n">selector</span><span class="p">,</span> <span class="n">format</span><span class="p">,</span>
<span class="lineno">167 </span> <span class="n">ds</span><span class="o">::</span><span class="n">FileSystemFactoryOptions</span><span class="p">())</span>
<span class="lineno">168 </span> <span class="p">.</span><span class="n">ValueOrDie</span><span class="p">();</span>
<span class="lineno">169 </span> <span class="k">auto</span> <span class="n">dataset</span> <span class="o">=</span> <span class="n">factory</span><span class="o">-&gt;</span><span class="n">Finish</span><span class="p">().</span><span class="n">ValueOrDie</span><span class="p">();</span>
<span class="lineno">170 </span> <span class="c1">// Print out the fragments</span>
<span class="lineno">171 </span> <span class="k">for</span> <span class="p">(</span><span class="k">const</span> <span class="k">auto</span><span class="o">&amp;</span> <span class="nl">fragment</span> <span class="p">:</span> <span class="n">dataset</span><span class="o">-&gt;</span><span class="n">GetFragments</span><span class="p">().</span><span class="n">ValueOrDie</span><span class="p">())</span> <span class="p">{</span>
<span class="lineno">172 </span> <span class="n">std</span><span class="o">::</span><span class="n">cout</span> <span class="o">&lt;&lt;</span> <span class="s">&quot;Found fragment: &quot;</span> <span class="o">&lt;&lt;</span> <span class="p">(</span><span class="o">*</span><span class="n">fragment</span><span class="p">)</span><span class="o">-&gt;</span><span class="n">ToString</span><span class="p">()</span> <span class="o">&lt;&lt;</span> <span class="n">std</span><span class="o">::</span><span class="n">endl</span><span class="p">;</span>
<span class="lineno">173 </span> <span class="p">}</span>
<span class="lineno">174 </span> <span class="c1">// Read the entire dataset as a Table</span>
<span class="lineno">175 </span> <span class="k">auto</span> <span class="n">scan_builder</span> <span class="o">=</span> <span class="n">dataset</span><span class="o">-&gt;</span><span class="n">NewScan</span><span class="p">().</span><span class="n">ValueOrDie</span><span class="p">();</span>
<span class="lineno">176 </span> <span class="k">auto</span> <span class="n">scanner</span> <span class="o">=</span> <span class="n">scan_builder</span><span class="o">-&gt;</span><span class="n">Finish</span><span class="p">().</span><span class="n">ValueOrDie</span><span class="p">();</span>
<span class="lineno">177 </span> <span class="k">return</span> <span class="n">scanner</span><span class="o">-&gt;</span><span class="n">ToTable</span><span class="p">().</span><span class="n">ValueOrDie</span><span class="p">();</span>
<span class="lineno">178 </span><span class="p">}</span>
<span class="lineno">179 </span><span class="c1">// (Doc section: Dataset discovery)</span>
<span class="lineno">180 </span>
<span class="lineno">181 </span><span class="c1">// (Doc section: Filtering data)</span>
<span class="lineno">182 </span><span class="c1">// Read a dataset, but select only column &quot;b&quot; and only rows where b &lt; 4.</span>
<span class="lineno">183 </span><span class="c1">//</span>
<span class="lineno">184 </span><span class="c1">// This is useful when you only want a few columns from a dataset. Where possible,</span>
<span class="lineno">185 </span><span class="c1">// Datasets will push down the column selection such that less work is done.</span>
<span class="lineno">186 </span><span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o">&lt;</span><span class="n">arrow</span><span class="o">::</span><span class="n">Table</span><span class="o">&gt;</span> <span class="n">FilterAndSelectDataset</span><span class="p">(</span>
<span class="lineno">187 </span> <span class="k">const</span> <span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o">&lt;</span><span class="n">fs</span><span class="o">::</span><span class="n">FileSystem</span><span class="o">&gt;&amp;</span> <span class="n">filesystem</span><span class="p">,</span>
<span class="lineno">188 </span> <span class="k">const</span> <span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o">&lt;</span><span class="n">ds</span><span class="o">::</span><span class="n">FileFormat</span><span class="o">&gt;&amp;</span> <span class="n">format</span><span class="p">,</span> <span class="k">const</span> <span class="n">std</span><span class="o">::</span><span class="n">string</span><span class="o">&amp;</span> <span class="n">base_dir</span><span class="p">)</span> <span class="p">{</span>
<span class="lineno">189 </span> <span class="n">fs</span><span class="o">::</span><span class="n">FileSelector</span> <span class="n">selector</span><span class="p">;</span>
<span class="lineno">190 </span> <span class="n">selector</span><span class="p">.</span><span class="n">base_dir</span> <span class="o">=</span> <span class="n">base_dir</span><span class="p">;</span>
<span class="lineno">191 </span> <span class="k">auto</span> <span class="n">factory</span> <span class="o">=</span> <span class="n">ds</span><span class="o">::</span><span class="n">FileSystemDatasetFactory</span><span class="o">::</span><span class="n">Make</span><span class="p">(</span><span class="n">filesystem</span><span class="p">,</span> <span class="n">selector</span><span class="p">,</span> <span class="n">format</span><span class="p">,</span>
<span class="lineno">192 </span> <span class="n">ds</span><span class="o">::</span><span class="n">FileSystemFactoryOptions</span><span class="p">())</span>
<span class="lineno">193 </span> <span class="p">.</span><span class="n">ValueOrDie</span><span class="p">();</span>
<span class="lineno">194 </span> <span class="k">auto</span> <span class="n">dataset</span> <span class="o">=</span> <span class="n">factory</span><span class="o">-&gt;</span><span class="n">Finish</span><span class="p">().</span><span class="n">ValueOrDie</span><span class="p">();</span>
<span class="lineno">195 </span> <span class="c1">// Read specified columns with a row filter</span>
<span class="lineno">196 </span> <span class="k">auto</span> <span class="n">scan_builder</span> <span class="o">=</span> <span class="n">dataset</span><span class="o">-&gt;</span><span class="n">NewScan</span><span class="p">().</span><span class="n">ValueOrDie</span><span class="p">();</span>
<span class="lineno">197 </span> <span class="n">ABORT_ON_FAILURE</span><span class="p">(</span><span class="n">scan_builder</span><span class="o">-&gt;</span><span class="n">Project</span><span class="p">({</span><span class="s">&quot;b&quot;</span><span class="p">}));</span>
<span class="lineno">198 </span> <span class="n">ABORT_ON_FAILURE</span><span class="p">(</span><span class="n">scan_builder</span><span class="o">-&gt;</span><span class="n">Filter</span><span class="p">(</span><span class="n">cp</span><span class="o">::</span><span class="n">less</span><span class="p">(</span><span class="n">cp</span><span class="o">::</span><span class="n">field_ref</span><span class="p">(</span><span class="s">&quot;b&quot;</span><span class="p">),</span> <span class="n">cp</span><span class="o">::</span><span class="n">literal</span><span class="p">(</span><span class="mi">4</span><span class="p">))));</span>
<span class="lineno">199 </span> <span class="k">auto</span> <span class="n">scanner</span> <span class="o">=</span> <span class="n">scan_builder</span><span class="o">-&gt;</span><span class="n">Finish</span><span class="p">().</span><span class="n">ValueOrDie</span><span class="p">();</span>
<span class="lineno">200 </span> <span class="k">return</span> <span class="n">scanner</span><span class="o">-&gt;</span><span class="n">ToTable</span><span class="p">().</span><span class="n">ValueOrDie</span><span class="p">();</span>
<span class="lineno">201 </span><span class="p">}</span>
<span class="lineno">202 </span><span class="c1">// (Doc section: Filtering data)</span>
<span class="lineno">203 </span>
<span class="lineno">204 </span><span class="c1">// (Doc section: Projecting columns)</span>
<span class="lineno">205 </span><span class="c1">// Read a dataset, but with column projection.</span>
<span class="lineno">206 </span><span class="c1">//</span>
<span class="lineno">207 </span><span class="c1">// This is useful to derive new columns from existing data. For example, here we</span>
<span class="lineno">208 </span><span class="c1">// demonstrate casting a column to a different type, and turning a numeric column into a</span>
<span class="lineno">209 </span><span class="c1">// boolean column based on a predicate. You could also rename columns or perform</span>
<span class="lineno">210 </span><span class="c1">// computations involving multiple columns.</span>
<span class="lineno">211 </span><span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o">&lt;</span><span class="n">arrow</span><span class="o">::</span><span class="n">Table</span><span class="o">&gt;</span> <span class="n">ProjectDataset</span><span class="p">(</span>
<span class="lineno">212 </span> <span class="k">const</span> <span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o">&lt;</span><span class="n">fs</span><span class="o">::</span><span class="n">FileSystem</span><span class="o">&gt;&amp;</span> <span class="n">filesystem</span><span class="p">,</span>
<span class="lineno">213 </span> <span class="k">const</span> <span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o">&lt;</span><span class="n">ds</span><span class="o">::</span><span class="n">FileFormat</span><span class="o">&gt;&amp;</span> <span class="n">format</span><span class="p">,</span> <span class="k">const</span> <span class="n">std</span><span class="o">::</span><span class="n">string</span><span class="o">&amp;</span> <span class="n">base_dir</span><span class="p">)</span> <span class="p">{</span>
<span class="lineno">214 </span> <span class="n">fs</span><span class="o">::</span><span class="n">FileSelector</span> <span class="n">selector</span><span class="p">;</span>
<span class="lineno">215 </span> <span class="n">selector</span><span class="p">.</span><span class="n">base_dir</span> <span class="o">=</span> <span class="n">base_dir</span><span class="p">;</span>
<span class="lineno">216 </span> <span class="k">auto</span> <span class="n">factory</span> <span class="o">=</span> <span class="n">ds</span><span class="o">::</span><span class="n">FileSystemDatasetFactory</span><span class="o">::</span><span class="n">Make</span><span class="p">(</span><span class="n">filesystem</span><span class="p">,</span> <span class="n">selector</span><span class="p">,</span> <span class="n">format</span><span class="p">,</span>
<span class="lineno">217 </span> <span class="n">ds</span><span class="o">::</span><span class="n">FileSystemFactoryOptions</span><span class="p">())</span>
<span class="lineno">218 </span> <span class="p">.</span><span class="n">ValueOrDie</span><span class="p">();</span>
<span class="lineno">219 </span> <span class="k">auto</span> <span class="n">dataset</span> <span class="o">=</span> <span class="n">factory</span><span class="o">-&gt;</span><span class="n">Finish</span><span class="p">().</span><span class="n">ValueOrDie</span><span class="p">();</span>
<span class="lineno">220 </span> <span class="c1">// Read specified columns with a row filter</span>
<span class="lineno">221 </span> <span class="k">auto</span> <span class="n">scan_builder</span> <span class="o">=</span> <span class="n">dataset</span><span class="o">-&gt;</span><span class="n">NewScan</span><span class="p">().</span><span class="n">ValueOrDie</span><span class="p">();</span>
<span class="lineno">222 </span> <span class="n">ABORT_ON_FAILURE</span><span class="p">(</span><span class="n">scan_builder</span><span class="o">-&gt;</span><span class="n">Project</span><span class="p">(</span>
<span class="lineno">223 </span> <span class="p">{</span>
<span class="lineno">224 </span> <span class="c1">// Leave column &quot;a&quot; as-is.</span>
<span class="lineno">225 </span> <span class="n">cp</span><span class="o">::</span><span class="n">field_ref</span><span class="p">(</span><span class="s">&quot;a&quot;</span><span class="p">),</span>
<span class="lineno">226 </span> <span class="c1">// Cast column &quot;b&quot; to float32.</span>
<span class="lineno">227 </span> <span class="n">cp</span><span class="o">::</span><span class="n">call</span><span class="p">(</span><span class="s">&quot;cast&quot;</span><span class="p">,</span> <span class="p">{</span><span class="n">cp</span><span class="o">::</span><span class="n">field_ref</span><span class="p">(</span><span class="s">&quot;b&quot;</span><span class="p">)},</span>
<span class="lineno">228 </span> <span class="n">arrow</span><span class="o">::</span><span class="n">compute</span><span class="o">::</span><span class="n">CastOptions</span><span class="o">::</span><span class="n">Safe</span><span class="p">(</span><span class="n">arrow</span><span class="o">::</span><span class="n">float32</span><span class="p">())),</span>
<span class="lineno">229 </span> <span class="c1">// Derive a boolean column from &quot;c&quot;.</span>
<span class="lineno">230 </span> <span class="n">cp</span><span class="o">::</span><span class="n">equal</span><span class="p">(</span><span class="n">cp</span><span class="o">::</span><span class="n">field_ref</span><span class="p">(</span><span class="s">&quot;c&quot;</span><span class="p">),</span> <span class="n">cp</span><span class="o">::</span><span class="n">literal</span><span class="p">(</span><span class="mi">1</span><span class="p">)),</span>
<span class="lineno">231 </span> <span class="p">},</span>
<span class="lineno">232 </span> <span class="p">{</span><span class="s">&quot;a_renamed&quot;</span><span class="p">,</span> <span class="s">&quot;b_as_float32&quot;</span><span class="p">,</span> <span class="s">&quot;c_1&quot;</span><span class="p">}));</span>
<span class="lineno">233 </span> <span class="k">auto</span> <span class="n">scanner</span> <span class="o">=</span> <span class="n">scan_builder</span><span class="o">-&gt;</span><span class="n">Finish</span><span class="p">().</span><span class="n">ValueOrDie</span><span class="p">();</span>
<span class="lineno">234 </span> <span class="k">return</span> <span class="n">scanner</span><span class="o">-&gt;</span><span class="n">ToTable</span><span class="p">().</span><span class="n">ValueOrDie</span><span class="p">();</span>
<span class="lineno">235 </span><span class="p">}</span>
<span class="lineno">236 </span><span class="c1">// (Doc section: Projecting columns)</span>
<span class="lineno">237 </span>
<span class="lineno">238 </span><span class="c1">// (Doc section: Projecting columns #2)</span>
<span class="lineno">239 </span><span class="c1">// Read a dataset, but with column projection.</span>
<span class="lineno">240 </span><span class="c1">//</span>
<span class="lineno">241 </span><span class="c1">// This time, we read all original columns plus one derived column. This simply combines</span>
<span class="lineno">242 </span><span class="c1">// the previous two examples: selecting a subset of columns by name, and deriving new</span>
<span class="lineno">243 </span><span class="c1">// columns with an expression.</span>
<span class="lineno">244 </span><span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o">&lt;</span><span class="n">arrow</span><span class="o">::</span><span class="n">Table</span><span class="o">&gt;</span> <span class="n">SelectAndProjectDataset</span><span class="p">(</span>
<span class="lineno">245 </span> <span class="k">const</span> <span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o">&lt;</span><span class="n">fs</span><span class="o">::</span><span class="n">FileSystem</span><span class="o">&gt;&amp;</span> <span class="n">filesystem</span><span class="p">,</span>
<span class="lineno">246 </span> <span class="k">const</span> <span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o">&lt;</span><span class="n">ds</span><span class="o">::</span><span class="n">FileFormat</span><span class="o">&gt;&amp;</span> <span class="n">format</span><span class="p">,</span> <span class="k">const</span> <span class="n">std</span><span class="o">::</span><span class="n">string</span><span class="o">&amp;</span> <span class="n">base_dir</span><span class="p">)</span> <span class="p">{</span>
<span class="lineno">247 </span> <span class="n">fs</span><span class="o">::</span><span class="n">FileSelector</span> <span class="n">selector</span><span class="p">;</span>
<span class="lineno">248 </span> <span class="n">selector</span><span class="p">.</span><span class="n">base_dir</span> <span class="o">=</span> <span class="n">base_dir</span><span class="p">;</span>
<span class="lineno">249 </span> <span class="k">auto</span> <span class="n">factory</span> <span class="o">=</span> <span class="n">ds</span><span class="o">::</span><span class="n">FileSystemDatasetFactory</span><span class="o">::</span><span class="n">Make</span><span class="p">(</span><span class="n">filesystem</span><span class="p">,</span> <span class="n">selector</span><span class="p">,</span> <span class="n">format</span><span class="p">,</span>
<span class="lineno">250 </span> <span class="n">ds</span><span class="o">::</span><span class="n">FileSystemFactoryOptions</span><span class="p">())</span>
<span class="lineno">251 </span> <span class="p">.</span><span class="n">ValueOrDie</span><span class="p">();</span>
<span class="lineno">252 </span> <span class="k">auto</span> <span class="n">dataset</span> <span class="o">=</span> <span class="n">factory</span><span class="o">-&gt;</span><span class="n">Finish</span><span class="p">().</span><span class="n">ValueOrDie</span><span class="p">();</span>
<span class="lineno">253 </span> <span class="c1">// Read specified columns with a row filter</span>
<span class="lineno">254 </span> <span class="k">auto</span> <span class="n">scan_builder</span> <span class="o">=</span> <span class="n">dataset</span><span class="o">-&gt;</span><span class="n">NewScan</span><span class="p">().</span><span class="n">ValueOrDie</span><span class="p">();</span>
<span class="lineno">255 </span> <span class="n">std</span><span class="o">::</span><span class="n">vector</span><span class="o">&lt;</span><span class="n">std</span><span class="o">::</span><span class="n">string</span><span class="o">&gt;</span> <span class="n">names</span><span class="p">;</span>
<span class="lineno">256 </span> <span class="n">std</span><span class="o">::</span><span class="n">vector</span><span class="o">&lt;</span><span class="n">cp</span><span class="o">::</span><span class="n">Expression</span><span class="o">&gt;</span> <span class="n">exprs</span><span class="p">;</span>
<span class="lineno">257 </span> <span class="c1">// Read all the original columns.</span>
<span class="lineno">258 </span> <span class="k">for</span> <span class="p">(</span><span class="k">const</span> <span class="k">auto</span><span class="o">&amp;</span> <span class="nl">field</span> <span class="p">:</span> <span class="n">dataset</span><span class="o">-&gt;</span><span class="n">schema</span><span class="p">()</span><span class="o">-&gt;</span><span class="n">fields</span><span class="p">())</span> <span class="p">{</span>
<span class="lineno">259 </span> <span class="n">names</span><span class="p">.</span><span class="n">push_back</span><span class="p">(</span><span class="n">field</span><span class="o">-&gt;</span><span class="n">name</span><span class="p">());</span>
<span class="lineno">260 </span> <span class="n">exprs</span><span class="p">.</span><span class="n">push_back</span><span class="p">(</span><span class="n">cp</span><span class="o">::</span><span class="n">field_ref</span><span class="p">(</span><span class="n">field</span><span class="o">-&gt;</span><span class="n">name</span><span class="p">()));</span>
<span class="lineno">261 </span> <span class="p">}</span>
<span class="lineno">262 </span> <span class="c1">// Also derive a new column.</span>
<span class="lineno">263 </span> <span class="n">names</span><span class="p">.</span><span class="n">emplace_back</span><span class="p">(</span><span class="s">&quot;b_large&quot;</span><span class="p">);</span>
<span class="lineno">264 </span> <span class="n">exprs</span><span class="p">.</span><span class="n">push_back</span><span class="p">(</span><span class="n">cp</span><span class="o">::</span><span class="n">greater</span><span class="p">(</span><span class="n">cp</span><span class="o">::</span><span class="n">field_ref</span><span class="p">(</span><span class="s">&quot;b&quot;</span><span class="p">),</span> <span class="n">cp</span><span class="o">::</span><span class="n">literal</span><span class="p">(</span><span class="mi">1</span><span class="p">)));</span>
<span class="lineno">265 </span> <span class="n">ABORT_ON_FAILURE</span><span class="p">(</span><span class="n">scan_builder</span><span class="o">-&gt;</span><span class="n">Project</span><span class="p">(</span><span class="n">exprs</span><span class="p">,</span> <span class="n">names</span><span class="p">));</span>
<span class="lineno">266 </span> <span class="k">auto</span> <span class="n">scanner</span> <span class="o">=</span> <span class="n">scan_builder</span><span class="o">-&gt;</span><span class="n">Finish</span><span class="p">().</span><span class="n">ValueOrDie</span><span class="p">();</span>
<span class="lineno">267 </span> <span class="k">return</span> <span class="n">scanner</span><span class="o">-&gt;</span><span class="n">ToTable</span><span class="p">().</span><span class="n">ValueOrDie</span><span class="p">();</span>
<span class="lineno">268 </span><span class="p">}</span>
<span class="lineno">269 </span><span class="c1">// (Doc section: Projecting columns #2)</span>
<span class="lineno">270 </span>
<span class="lineno">271 </span><span class="c1">// (Doc section: Reading and writing partitioned data #2)</span>
<span class="lineno">272 </span><span class="c1">// Read an entire dataset, but with partitioning information.</span>
<span class="lineno">273 </span><span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o">&lt;</span><span class="n">arrow</span><span class="o">::</span><span class="n">Table</span><span class="o">&gt;</span> <span class="n">ScanPartitionedDataset</span><span class="p">(</span>
<span class="lineno">274 </span> <span class="k">const</span> <span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o">&lt;</span><span class="n">fs</span><span class="o">::</span><span class="n">FileSystem</span><span class="o">&gt;&amp;</span> <span class="n">filesystem</span><span class="p">,</span>
<span class="lineno">275 </span> <span class="k">const</span> <span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o">&lt;</span><span class="n">ds</span><span class="o">::</span><span class="n">FileFormat</span><span class="o">&gt;&amp;</span> <span class="n">format</span><span class="p">,</span> <span class="k">const</span> <span class="n">std</span><span class="o">::</span><span class="n">string</span><span class="o">&amp;</span> <span class="n">base_dir</span><span class="p">)</span> <span class="p">{</span>
<span class="lineno">276 </span> <span class="n">fs</span><span class="o">::</span><span class="n">FileSelector</span> <span class="n">selector</span><span class="p">;</span>
<span class="lineno">277 </span> <span class="n">selector</span><span class="p">.</span><span class="n">base_dir</span> <span class="o">=</span> <span class="n">base_dir</span><span class="p">;</span>
<span class="lineno">278 </span> <span class="n">selector</span><span class="p">.</span><span class="n">recursive</span> <span class="o">=</span> <span class="nb">true</span><span class="p">;</span> <span class="c1">// Make sure to search subdirectories</span>
<span class="lineno">279 </span> <span class="n">ds</span><span class="o">::</span><span class="n">FileSystemFactoryOptions</span> <span class="n">options</span><span class="p">;</span>
<span class="lineno">280 </span> <span class="c1">// We&#39;ll use Hive-style partitioning. We&#39;ll let Arrow Datasets infer the partition</span>
<span class="lineno">281 </span> <span class="c1">// schema.</span>
<span class="lineno">282 </span> <span class="n">options</span><span class="p">.</span><span class="n">partitioning</span> <span class="o">=</span> <span class="n">ds</span><span class="o">::</span><span class="n">HivePartitioning</span><span class="o">::</span><span class="n">MakeFactory</span><span class="p">();</span>
<span class="lineno">283 </span> <span class="k">auto</span> <span class="n">factory</span> <span class="o">=</span> <span class="n">ds</span><span class="o">::</span><span class="n">FileSystemDatasetFactory</span><span class="o">::</span><span class="n">Make</span><span class="p">(</span><span class="n">filesystem</span><span class="p">,</span> <span class="n">selector</span><span class="p">,</span> <span class="n">format</span><span class="p">,</span> <span class="n">options</span><span class="p">)</span>
<span class="lineno">284 </span> <span class="p">.</span><span class="n">ValueOrDie</span><span class="p">();</span>
<span class="lineno">285 </span> <span class="k">auto</span> <span class="n">dataset</span> <span class="o">=</span> <span class="n">factory</span><span class="o">-&gt;</span><span class="n">Finish</span><span class="p">().</span><span class="n">ValueOrDie</span><span class="p">();</span>
<span class="lineno">286 </span> <span class="c1">// Print out the fragments</span>
<span class="lineno">287 </span> <span class="k">for</span> <span class="p">(</span><span class="k">const</span> <span class="k">auto</span><span class="o">&amp;</span> <span class="nl">fragment</span> <span class="p">:</span> <span class="n">dataset</span><span class="o">-&gt;</span><span class="n">GetFragments</span><span class="p">().</span><span class="n">ValueOrDie</span><span class="p">())</span> <span class="p">{</span>
<span class="lineno">288 </span> <span class="n">std</span><span class="o">::</span><span class="n">cout</span> <span class="o">&lt;&lt;</span> <span class="s">&quot;Found fragment: &quot;</span> <span class="o">&lt;&lt;</span> <span class="p">(</span><span class="o">*</span><span class="n">fragment</span><span class="p">)</span><span class="o">-&gt;</span><span class="n">ToString</span><span class="p">()</span> <span class="o">&lt;&lt;</span> <span class="n">std</span><span class="o">::</span><span class="n">endl</span><span class="p">;</span>
<span class="lineno">289 </span> <span class="n">std</span><span class="o">::</span><span class="n">cout</span> <span class="o">&lt;&lt;</span> <span class="s">&quot;Partition expression: &quot;</span>
<span class="lineno">290 </span> <span class="o">&lt;&lt;</span> <span class="p">(</span><span class="o">*</span><span class="n">fragment</span><span class="p">)</span><span class="o">-&gt;</span><span class="n">partition_expression</span><span class="p">().</span><span class="n">ToString</span><span class="p">()</span> <span class="o">&lt;&lt;</span> <span class="n">std</span><span class="o">::</span><span class="n">endl</span><span class="p">;</span>
<span class="lineno">291 </span> <span class="p">}</span>
<span class="lineno">292 </span> <span class="k">auto</span> <span class="n">scan_builder</span> <span class="o">=</span> <span class="n">dataset</span><span class="o">-&gt;</span><span class="n">NewScan</span><span class="p">().</span><span class="n">ValueOrDie</span><span class="p">();</span>
<span class="lineno">293 </span> <span class="k">auto</span> <span class="n">scanner</span> <span class="o">=</span> <span class="n">scan_builder</span><span class="o">-&gt;</span><span class="n">Finish</span><span class="p">().</span><span class="n">ValueOrDie</span><span class="p">();</span>
<span class="lineno">294 </span> <span class="k">return</span> <span class="n">scanner</span><span class="o">-&gt;</span><span class="n">ToTable</span><span class="p">().</span><span class="n">ValueOrDie</span><span class="p">();</span>
<span class="lineno">295 </span><span class="p">}</span>
<span class="lineno">296 </span><span class="c1">// (Doc section: Reading and writing partitioned data #2)</span>
<span class="lineno">297 </span>
<span class="lineno">298 </span><span class="c1">// (Doc section: Reading and writing partitioned data #3)</span>
<span class="lineno">299 </span><span class="c1">// Read an entire dataset, but with partitioning information. Also, filter the dataset on</span>
<span class="lineno">300 </span><span class="c1">// the partition values.</span>
<span class="lineno">301 </span><span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o">&lt;</span><span class="n">arrow</span><span class="o">::</span><span class="n">Table</span><span class="o">&gt;</span> <span class="n">FilterPartitionedDataset</span><span class="p">(</span>
<span class="lineno">302 </span> <span class="k">const</span> <span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o">&lt;</span><span class="n">fs</span><span class="o">::</span><span class="n">FileSystem</span><span class="o">&gt;&amp;</span> <span class="n">filesystem</span><span class="p">,</span>
<span class="lineno">303 </span> <span class="k">const</span> <span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o">&lt;</span><span class="n">ds</span><span class="o">::</span><span class="n">FileFormat</span><span class="o">&gt;&amp;</span> <span class="n">format</span><span class="p">,</span> <span class="k">const</span> <span class="n">std</span><span class="o">::</span><span class="n">string</span><span class="o">&amp;</span> <span class="n">base_dir</span><span class="p">)</span> <span class="p">{</span>
<span class="lineno">304 </span> <span class="n">fs</span><span class="o">::</span><span class="n">FileSelector</span> <span class="n">selector</span><span class="p">;</span>
<span class="lineno">305 </span> <span class="n">selector</span><span class="p">.</span><span class="n">base_dir</span> <span class="o">=</span> <span class="n">base_dir</span><span class="p">;</span>
<span class="lineno">306 </span> <span class="n">selector</span><span class="p">.</span><span class="n">recursive</span> <span class="o">=</span> <span class="nb">true</span><span class="p">;</span>
<span class="lineno">307 </span> <span class="n">ds</span><span class="o">::</span><span class="n">FileSystemFactoryOptions</span> <span class="n">options</span><span class="p">;</span>
<span class="lineno">308 </span> <span class="n">options</span><span class="p">.</span><span class="n">partitioning</span> <span class="o">=</span> <span class="n">ds</span><span class="o">::</span><span class="n">HivePartitioning</span><span class="o">::</span><span class="n">MakeFactory</span><span class="p">();</span>
<span class="lineno">309 </span> <span class="k">auto</span> <span class="n">factory</span> <span class="o">=</span> <span class="n">ds</span><span class="o">::</span><span class="n">FileSystemDatasetFactory</span><span class="o">::</span><span class="n">Make</span><span class="p">(</span><span class="n">filesystem</span><span class="p">,</span> <span class="n">selector</span><span class="p">,</span> <span class="n">format</span><span class="p">,</span> <span class="n">options</span><span class="p">)</span>
<span class="lineno">310 </span> <span class="p">.</span><span class="n">ValueOrDie</span><span class="p">();</span>
<span class="lineno">311 </span> <span class="k">auto</span> <span class="n">dataset</span> <span class="o">=</span> <span class="n">factory</span><span class="o">-&gt;</span><span class="n">Finish</span><span class="p">().</span><span class="n">ValueOrDie</span><span class="p">();</span>
<span class="lineno">312 </span> <span class="k">auto</span> <span class="n">scan_builder</span> <span class="o">=</span> <span class="n">dataset</span><span class="o">-&gt;</span><span class="n">NewScan</span><span class="p">().</span><span class="n">ValueOrDie</span><span class="p">();</span>
<span class="lineno">313 </span> <span class="c1">// Filter based on the partition values. This will mean that we won&#39;t even read the</span>
<span class="lineno">314 </span> <span class="c1">// files whose partition expressions don&#39;t match the filter.</span>
<span class="lineno">315 </span> <span class="n">ABORT_ON_FAILURE</span><span class="p">(</span>
<span class="lineno">316 </span> <span class="n">scan_builder</span><span class="o">-&gt;</span><span class="n">Filter</span><span class="p">(</span><span class="n">cp</span><span class="o">::</span><span class="n">equal</span><span class="p">(</span><span class="n">cp</span><span class="o">::</span><span class="n">field_ref</span><span class="p">(</span><span class="s">&quot;part&quot;</span><span class="p">),</span> <span class="n">cp</span><span class="o">::</span><span class="n">literal</span><span class="p">(</span><span class="s">&quot;b&quot;</span><span class="p">))));</span>
<span class="lineno">317 </span> <span class="k">auto</span> <span class="n">scanner</span> <span class="o">=</span> <span class="n">scan_builder</span><span class="o">-&gt;</span><span class="n">Finish</span><span class="p">().</span><span class="n">ValueOrDie</span><span class="p">();</span>
<span class="lineno">318 </span> <span class="k">return</span> <span class="n">scanner</span><span class="o">-&gt;</span><span class="n">ToTable</span><span class="p">().</span><span class="n">ValueOrDie</span><span class="p">();</span>
<span class="lineno">319 </span><span class="p">}</span>
<span class="lineno">320 </span><span class="c1">// (Doc section: Reading and writing partitioned data #3)</span>
<span class="lineno">321 </span>
<span class="lineno">322 </span><span class="kt">int</span> <span class="n">main</span><span class="p">(</span><span class="kt">int</span> <span class="n">argc</span><span class="p">,</span> <span class="kt">char</span><span class="o">**</span> <span class="n">argv</span><span class="p">)</span> <span class="p">{</span>
<span class="lineno">323 </span> <span class="k">if</span> <span class="p">(</span><span class="n">argc</span> <span class="o">&lt;</span> <span class="mi">3</span><span class="p">)</span> <span class="p">{</span>
<span class="lineno">324 </span> <span class="c1">// Fake success for CI purposes.</span>
<span class="lineno">325 </span> <span class="k">return</span> <span class="n">EXIT_SUCCESS</span><span class="p">;</span>
<span class="lineno">326 </span> <span class="p">}</span>
<span class="lineno">327 </span>
<span class="lineno">328 </span> <span class="n">std</span><span class="o">::</span><span class="n">string</span> <span class="n">uri</span> <span class="o">=</span> <span class="n">argv</span><span class="p">[</span><span class="mi">1</span><span class="p">];</span>
<span class="lineno">329 </span> <span class="n">std</span><span class="o">::</span><span class="n">string</span> <span class="n">format_name</span> <span class="o">=</span> <span class="n">argv</span><span class="p">[</span><span class="mi">2</span><span class="p">];</span>
<span class="lineno">330 </span> <span class="n">std</span><span class="o">::</span><span class="n">string</span> <span class="n">mode</span> <span class="o">=</span> <span class="n">argc</span> <span class="o">&gt;</span> <span class="mi">3</span> <span class="o">?</span> <span class="n">argv</span><span class="p">[</span><span class="mi">3</span><span class="p">]</span> <span class="o">:</span> <span class="s">&quot;no_filter&quot;</span><span class="p">;</span>
<span class="lineno">331 </span> <span class="n">std</span><span class="o">::</span><span class="n">string</span> <span class="n">root_path</span><span class="p">;</span>
<span class="lineno">332 </span> <span class="k">auto</span> <span class="n">fs</span> <span class="o">=</span> <span class="n">fs</span><span class="o">::</span><span class="n">FileSystemFromUri</span><span class="p">(</span><span class="n">uri</span><span class="p">,</span> <span class="o">&amp;</span><span class="n">root_path</span><span class="p">).</span><span class="n">ValueOrDie</span><span class="p">();</span>
<span class="lineno">333 </span>
<span class="lineno">334 </span> <span class="n">std</span><span class="o">::</span><span class="n">string</span> <span class="n">base_path</span><span class="p">;</span>
<span class="lineno">335 </span> <span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o">&lt;</span><span class="n">ds</span><span class="o">::</span><span class="n">FileFormat</span><span class="o">&gt;</span> <span class="n">format</span><span class="p">;</span>
<span class="lineno">336 </span> <span class="k">if</span> <span class="p">(</span><span class="n">format_name</span> <span class="o">==</span> <span class="s">&quot;feather&quot;</span><span class="p">)</span> <span class="p">{</span>
<span class="lineno">337 </span> <span class="n">format</span> <span class="o">=</span> <span class="n">std</span><span class="o">::</span><span class="n">make_shared</span><span class="o">&lt;</span><span class="n">ds</span><span class="o">::</span><span class="n">IpcFileFormat</span><span class="o">&gt;</span><span class="p">();</span>
<span class="lineno">338 </span> <span class="n">base_path</span> <span class="o">=</span> <span class="n">CreateExampleFeatherDataset</span><span class="p">(</span><span class="n">fs</span><span class="p">,</span> <span class="n">root_path</span><span class="p">);</span>
<span class="lineno">339 </span> <span class="p">}</span> <span class="k">else</span> <span class="k">if</span> <span class="p">(</span><span class="n">format_name</span> <span class="o">==</span> <span class="s">&quot;parquet&quot;</span><span class="p">)</span> <span class="p">{</span>
<span class="lineno">340 </span> <span class="n">format</span> <span class="o">=</span> <span class="n">std</span><span class="o">::</span><span class="n">make_shared</span><span class="o">&lt;</span><span class="n">ds</span><span class="o">::</span><span class="n">ParquetFileFormat</span><span class="o">&gt;</span><span class="p">();</span>
<span class="lineno">341 </span> <span class="n">base_path</span> <span class="o">=</span> <span class="n">CreateExampleParquetDataset</span><span class="p">(</span><span class="n">fs</span><span class="p">,</span> <span class="n">root_path</span><span class="p">);</span>
<span class="lineno">342 </span> <span class="p">}</span> <span class="k">else</span> <span class="k">if</span> <span class="p">(</span><span class="n">format_name</span> <span class="o">==</span> <span class="s">&quot;parquet_hive&quot;</span><span class="p">)</span> <span class="p">{</span>
<span class="lineno">343 </span> <span class="n">format</span> <span class="o">=</span> <span class="n">std</span><span class="o">::</span><span class="n">make_shared</span><span class="o">&lt;</span><span class="n">ds</span><span class="o">::</span><span class="n">ParquetFileFormat</span><span class="o">&gt;</span><span class="p">();</span>
<span class="lineno">344 </span> <span class="n">base_path</span> <span class="o">=</span> <span class="n">CreateExampleParquetHivePartitionedDataset</span><span class="p">(</span><span class="n">fs</span><span class="p">,</span> <span class="n">root_path</span><span class="p">);</span>
<span class="lineno">345 </span> <span class="p">}</span> <span class="k">else</span> <span class="p">{</span>
<span class="lineno">346 </span> <span class="n">std</span><span class="o">::</span><span class="n">cerr</span> <span class="o">&lt;&lt;</span> <span class="s">&quot;Unknown format: &quot;</span> <span class="o">&lt;&lt;</span> <span class="n">format_name</span> <span class="o">&lt;&lt;</span> <span class="n">std</span><span class="o">::</span><span class="n">endl</span><span class="p">;</span>
<span class="lineno">347 </span> <span class="n">std</span><span class="o">::</span><span class="n">cerr</span> <span class="o">&lt;&lt;</span> <span class="s">&quot;Supported formats: feather, parquet, parquet_hive&quot;</span> <span class="o">&lt;&lt;</span> <span class="n">std</span><span class="o">::</span><span class="n">endl</span><span class="p">;</span>
<span class="lineno">348 </span> <span class="k">return</span> <span class="n">EXIT_FAILURE</span><span class="p">;</span>
<span class="lineno">349 </span> <span class="p">}</span>
<span class="lineno">350 </span>
<span class="lineno">351 </span> <span class="n">std</span><span class="o">::</span><span class="n">shared_ptr</span><span class="o">&lt;</span><span class="n">arrow</span><span class="o">::</span><span class="n">Table</span><span class="o">&gt;</span> <span class="n">table</span><span class="p">;</span>
<span class="lineno">352 </span> <span class="k">if</span> <span class="p">(</span><span class="n">mode</span> <span class="o">==</span> <span class="s">&quot;no_filter&quot;</span><span class="p">)</span> <span class="p">{</span>
<span class="lineno">353 </span> <span class="n">table</span> <span class="o">=</span> <span class="n">ScanWholeDataset</span><span class="p">(</span><span class="n">fs</span><span class="p">,</span> <span class="n">format</span><span class="p">,</span> <span class="n">base_path</span><span class="p">);</span>
<span class="lineno">354 </span> <span class="p">}</span> <span class="k">else</span> <span class="k">if</span> <span class="p">(</span><span class="n">mode</span> <span class="o">==</span> <span class="s">&quot;filter&quot;</span><span class="p">)</span> <span class="p">{</span>
<span class="lineno">355 </span> <span class="n">table</span> <span class="o">=</span> <span class="n">FilterAndSelectDataset</span><span class="p">(</span><span class="n">fs</span><span class="p">,</span> <span class="n">format</span><span class="p">,</span> <span class="n">base_path</span><span class="p">);</span>
<span class="lineno">356 </span> <span class="p">}</span> <span class="k">else</span> <span class="k">if</span> <span class="p">(</span><span class="n">mode</span> <span class="o">==</span> <span class="s">&quot;project&quot;</span><span class="p">)</span> <span class="p">{</span>
<span class="lineno">357 </span> <span class="n">table</span> <span class="o">=</span> <span class="n">ProjectDataset</span><span class="p">(</span><span class="n">fs</span><span class="p">,</span> <span class="n">format</span><span class="p">,</span> <span class="n">base_path</span><span class="p">);</span>
<span class="lineno">358 </span> <span class="p">}</span> <span class="k">else</span> <span class="k">if</span> <span class="p">(</span><span class="n">mode</span> <span class="o">==</span> <span class="s">&quot;select_project&quot;</span><span class="p">)</span> <span class="p">{</span>
<span class="lineno">359 </span> <span class="n">table</span> <span class="o">=</span> <span class="n">SelectAndProjectDataset</span><span class="p">(</span><span class="n">fs</span><span class="p">,</span> <span class="n">format</span><span class="p">,</span> <span class="n">base_path</span><span class="p">);</span>
<span class="lineno">360 </span> <span class="p">}</span> <span class="k">else</span> <span class="k">if</span> <span class="p">(</span><span class="n">mode</span> <span class="o">==</span> <span class="s">&quot;partitioned&quot;</span><span class="p">)</span> <span class="p">{</span>
<span class="lineno">361 </span> <span class="n">table</span> <span class="o">=</span> <span class="n">ScanPartitionedDataset</span><span class="p">(</span><span class="n">fs</span><span class="p">,</span> <span class="n">format</span><span class="p">,</span> <span class="n">base_path</span><span class="p">);</span>
<span class="lineno">362 </span> <span class="p">}</span> <span class="k">else</span> <span class="k">if</span> <span class="p">(</span><span class="n">mode</span> <span class="o">==</span> <span class="s">&quot;filter_partitioned&quot;</span><span class="p">)</span> <span class="p">{</span>
<span class="lineno">363 </span> <span class="n">table</span> <span class="o">=</span> <span class="n">FilterPartitionedDataset</span><span class="p">(</span><span class="n">fs</span><span class="p">,</span> <span class="n">format</span><span class="p">,</span> <span class="n">base_path</span><span class="p">);</span>
<span class="lineno">364 </span> <span class="p">}</span> <span class="k">else</span> <span class="p">{</span>
<span class="lineno">365 </span> <span class="n">std</span><span class="o">::</span><span class="n">cerr</span> <span class="o">&lt;&lt;</span> <span class="s">&quot;Unknown mode: &quot;</span> <span class="o">&lt;&lt;</span> <span class="n">mode</span> <span class="o">&lt;&lt;</span> <span class="n">std</span><span class="o">::</span><span class="n">endl</span><span class="p">;</span>
<span class="lineno">366 </span> <span class="n">std</span><span class="o">::</span><span class="n">cerr</span>
<span class="lineno">367 </span> <span class="o">&lt;&lt;</span> <span class="s">&quot;Supported modes: no_filter, filter, project, select_project, partitioned&quot;</span>
<span class="lineno">368 </span> <span class="o">&lt;&lt;</span> <span class="n">std</span><span class="o">::</span><span class="n">endl</span><span class="p">;</span>
<span class="lineno">369 </span> <span class="k">return</span> <span class="n">EXIT_FAILURE</span><span class="p">;</span>
<span class="lineno">370 </span> <span class="p">}</span>
<span class="lineno">371 </span> <span class="n">std</span><span class="o">::</span><span class="n">cout</span> <span class="o">&lt;&lt;</span> <span class="s">&quot;Read &quot;</span> <span class="o">&lt;&lt;</span> <span class="n">table</span><span class="o">-&gt;</span><span class="n">num_rows</span><span class="p">()</span> <span class="o">&lt;&lt;</span> <span class="s">&quot; rows&quot;</span> <span class="o">&lt;&lt;</span> <span class="n">std</span><span class="o">::</span><span class="n">endl</span><span class="p">;</span>
<span class="lineno">372 </span> <span class="n">std</span><span class="o">::</span><span class="n">cout</span> <span class="o">&lt;&lt;</span> <span class="n">table</span><span class="o">-&gt;</span><span class="n">ToString</span><span class="p">()</span> <span class="o">&lt;&lt;</span> <span class="n">std</span><span class="o">::</span><span class="n">endl</span><span class="p">;</span>
<span class="lineno">373 </span> <span class="k">return</span> <span class="n">EXIT_SUCCESS</span><span class="p">;</span>
<span class="lineno">374 </span><span class="p">}</span>
</pre></div>
</div>
</div>
</div>
</div>
<!-- Previous / next buttons -->
<div class='prev-next-area'>
<a class='left-prev' id="prev-link" href="json.html" title="previous page">
<i class="fas fa-angle-left"></i>
<div class="prev-next-info">
<p class="prev-next-subtitle">previous</p>
<p class="prev-next-title">Reading JSON files</p>
</div>
</a>
<a class='right-next' id="next-link" href="flight.html" title="next page">
<div class="prev-next-info">
<p class="prev-next-subtitle">next</p>
<p class="prev-next-title">Arrow Flight RPC</p>
</div>
<i class="fas fa-angle-right"></i>
</a>
</div>
</main>
</div>
</div>
<script src="../_static/scripts/pydata-sphinx-theme.js?digest=1999514e3f237ded88cf"></script>
<footer class="footer mt-5 mt-md-0">
<div class="container">
<div class="footer-item">
<p class="copyright">
&copy; Copyright 2016-2022 Apache Software Foundation.<br>
</p>
</div>
<div class="footer-item">
<p class="sphinx-version">
Created using <a href="http://sphinx-doc.org/">Sphinx</a> 4.4.0.<br>
</p>
</div>
</div>
</footer>
<script type="text/javascript" src="/docs/_static/versionwarning.js"></script> </body>
</html>